#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

import csv
import random
import time
from pathlib import Path

import pandas as pd
from faker import Faker
from faker.providers import BaseProvider

import tripleblind as tb


# Declare Lists for Prescription Data
drug_name_list = [
    "Atorvastatin",
    "Lisinopril",
    "Albuterol",
    "Levothyroxine",
    "Amlodipine",
    "Gabapentin",
    "Omeprazole",
    "Metformin",
    "Losartan",
    "Hydrocodone",
]

drug_dosage_list = ["5mg", "10mg", "20mg"]

drug_frequency_list = ["Twice Daily", "Daily", "Every Other Day", "Weekly"]

# Declare Lists for EHR Data

# Declare Lists for EHR Data


age_list = [
    "<1",
    "1-4",
    "5-9",
    "10-14",
    "15-19",
    "20-29",
    "30-39",
    "40-49",
    "50-59",
    "60-69",
    "70-79",
    ">=80",
]

gender_list = ["M", "F"]

healthcare_unit_list = ["ED", "General Admit"]

performed_procedure_list = [
    "Treatment of pneumonia or influenza (303140151)",
    "Cardiac catheterization (211020010)",
    "Evaluation for hearing deficiency diagnosis (211070092)",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
]

main_diagnosis_list = [
    "E78.4 Other hyperlipidemia",
    "E78.5 Hyperlipidemia, unspecified",
    "E11.9 Type 2 diabetes mellitus without complications",
    "I10 Essential (primary) hypertension",
    "E03.9 Hypothyroidism, unspecified",
    "K52.2 Allergic and dietetic gastroenteritis and colitis"
    "K52.89 Other specified noninfective gastroenteritis and colitis"
    "R19.7 Diarrhea, unspecified",
    "F41.9 Anxiety disorder, unspecified",
    "L03.90 Cellulitis, unspecified",
    "L03.91 Acute lymphangitis, unspecified",
    "R05 Cough",
    "R50.2 Drug induced fever",
    "R50.9 Fever, unspecified",
    "Z00.00 Encntr for general adult medical exam w/o abnormal findings",
    "Z00.00 Encntr for general adult medical exam w/o abnormal findings",
    "Z00.00 Encntr for general adult medical exam w/o abnormal findings",
    "Z00.00 Encntr for general adult medical exam w/o abnormal findings",
    "Z00.00 Encntr for general adult medical exam w/o abnormal findings",
    "Z00.00 Encntr for general adult medical exam w/o abnormal findings",
]

secondary_diagnosis_list = [
    "E78.4 Other hyperlipidemia",
    "E78.5 Hyperlipidemia, unspecified",
    "E11.9 Type 2 diabetes mellitus without complications",
    "I10 Essential (primary) hypertension",
    "E03.9 Hypothyroidism, unspecified",
    "K52.2 Allergic and dietetic gastroenteritis and colitis"
    "K52.89 Other specified noninfective gastroenteritis and colitis"
    "R19.7 Diarrhea, unspecified",
    "F41.9 Anxiety disorder, unspecified",
    "L03.90 Cellulitis, unspecified",
    "L03.91 Acute lymphangitis, unspecified",
    "R05 Cough",
    "R50.2 Drug induced fever",
    "R50.9 Fever, unspecified",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
    " ",
]


# Declare Provider Class
class Provider(BaseProvider):
    def drug_name_provider(self):
        return self.random_element(drug_name_list)

    def drug_dosage_provider(self):
        return self.random_element(drug_dosage_list)

    def drug_frequency_provider(self):
        return self.random_element(drug_frequency_list)

    def age_provider(self):
        return self.random_element(age_list)

    def gender_provider(self):
        return self.random_element(gender_list)

    def healthcare_unit_provider(self):
        return self.random_element(healthcare_unit_list)

    def performed_procedure_provider(self):
        return self.random_element(performed_procedure_list)

    def main_diagnosis_provider(self):
        return self.random_element(main_diagnosis_list)

    def secondary_diagnosis_provider(self):
        return self.random_element(secondary_diagnosis_list)


fake = Faker()
fake.add_provider(Provider)
Faker.seed(0)
tb.util.set_script_dir_current()
data_dir = Path("example_data")
data_dir.mkdir(exist_ok=True)

# Create a population of fake individuals
num_people = 100
population = [
    [
        fake.name(),
        fake.address().replace("\n", "; "),
        fake.age_provider(),
        fake.gender_provider(),
    ]
    for i in range(num_people)
]


# Build an imaginary database for Prescription Data.
#
# Each row is a transaction containing:
#   dates, name, address, drug name, dosage, frequency

num_prescription_transactions = 500
prescription_customers = 0.5  # percentage of population who shops at this store

# Assume the top portion of the population shops at this store
total_prescription_populations = int(len(population) * prescription_customers)
first_customer = 0
last_customer = total_prescription_populations - 1

prescription_data = []
while len(prescription_data) < num_prescription_transactions:
    date = time.time()
    name, address, age, gender = population[
        random.randint(first_customer, last_customer)
    ]
    drug_name = fake.drug_name_provider()
    dosage = fake.drug_dosage_provider()
    frequency = fake.drug_frequency_provider()

    prescription_data.append(
        {
            "date": date,
            "name": name,
            "address": address,
            "drug_name": drug_name,
            "dosage": dosage,
            "frequency": frequency,
        }
    )
df2 = pd.DataFrame(prescription_data)
# TODO: Snowflake can be picky about the quoting of data.  Might need to preprocess.
df2.to_csv(
    data_dir / "prescription_data.csv", index=False, quoting=csv.QUOTE_NONNUMERIC
)

# Build an imaginary database of Hope Valley Hospital EHR Data

number_patient_records = 1000  # number of EHR Records
patients = 0.5  # percentage of population who are patients with EHR records

# Assume the middle portion of the population list has EHR records
total_patient_population = int(len(population) * patients)
first_patient = (len(population) - total_patient_population) // 2
last_patient = (first_patient + total_patient_population) - 1

print(len(population))
print(first_patient)
print(last_patient)
EHR_data = []
while len(EHR_data) < number_patient_records:
    name, address, age, gender = population[random.randint(first_patient, last_patient)]
    issue_date = time.time()
    healthcare_unit = fake.healthcare_unit_provider()
    performed_procedure = fake.performed_procedure_provider()
    main_diagnosis = fake.main_diagnosis_provider()
    secondary_diagnosis = fake.secondary_diagnosis_provider()

    # Munge the address information for patients so it is a little different
    # than what is in the prescripton's database
    address = address.replace("Apt.", "Apartment")
    address = address.replace("Suite", "Ste")

    EHR_data.append(
        {
            "patient name": name,
            "patient address": address,
            "patient age": age,
            "patient gender": gender,
            "issue date": issue_date,
            "healthcare unit": healthcare_unit,
            "performed procedure": performed_procedure,
            "main diagnosis": main_diagnosis,
            "secondary diagnosis": secondary_diagnosis,
        }
    )

df = pd.DataFrame(EHR_data)
df.to_csv(
    data_dir / "Hope_Valley_Hospital_EHR.csv", index=False, quoting=csv.QUOTE_NONNUMERIC
)
