#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

from pathlib import Path
from typing import Tuple

import pandas as pd

import tripleblind as tb


tb.util.set_script_dir_current()
data_dir = Path("example_data")
data_dir.mkdir(exist_ok=True)


def load_file(path: Path) -> Tuple[str, str]:
    with open(path, "r") as file:
        data = file.read()
    return path.name, data


files = [load_file(path) for path in Path("notes").iterdir()]
df = pd.DataFrame(files, columns=["filename", "content"])

df.to_csv(data_dir / "ehr_notes.csv", index=False)
