#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

from pathlib import Path

import numpy as np
import pandas as pd

import tripleblind as tb


tb.util.set_script_dir_current()
data_dir = Path("example_data")
data_dir.mkdir(exist_ok=True)


def generate_table_a() -> pd.DataFrame:
    rows = 1000

    patient_id = np.random.randint(0, np.iinfo(np.int64).max, size=rows, dtype=np.int64)
    age = np.random.uniform(18, 89, size=rows).astype(np.int64)
    height_in = np.random.uniform(58, 78, size=rows).astype(np.int64)
    weight_lbs = np.random.uniform(130, 250, size=rows).astype(np.int64)

    return pd.DataFrame(
        data={
            "Patient_Id": patient_id,
            "Age": age,
            "Height_IN": height_in,
            "Weight_LBS": weight_lbs,
        }
    )


def generate_table_b() -> pd.DataFrame:
    rows = 5000

    pid = np.random.randint(0, np.iinfo(np.int64).max, size=rows, dtype=np.int64)
    height_cm = np.random.uniform(147, 198, size=rows).astype(np.int64)
    weight_kg = np.random.uniform(58, 113, size=rows).astype(np.int64)

    return pd.DataFrame(
        data={"pid": pid, "height_cm": height_cm, "weight_kg": weight_kg}
    )


generate_table_a().to_csv(data_dir / "hipaa_restricted_a.csv", index=False)
generate_table_b().to_csv(data_dir / "hipaa_restricted_b.csv", index=False)
