#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

from pathlib import Path

import pandas as pd
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import train_test_split

import tripleblind as tb


tb.util.set_script_dir_current()

data_dir = Path("example_data")
data_dir.mkdir(exist_ok=True)

##########################################################################
# Make classification datasets
X, y = make_classification(n_samples=1000, n_features=120)
df = pd.DataFrame(X, columns=["data_" + str(i) for i in range(120)])
df["y"] = y
df["ID"] = [i for i in range(1000)]

train, test = train_test_split(df)

train.to_csv(data_dir / "train_clf.csv", index=False)
test.to_csv(data_dir / "test_clf.csv", index=False)
train_ids = train["ID"].copy()
test_ids = test["ID"].copy()

del train["ID"]
del test["ID"]

train0 = train.iloc[:, :40].copy()
train0["ID"] = train_ids
train0.to_csv(data_dir / "train_clf0.csv", index=False)

train1 = train.iloc[:, 40:100].copy()
train1["ID"] = train_ids
train1.to_csv(data_dir / "train_clf1.csv", index=False)

train2 = train.iloc[:, 100:].copy()
train2["ID"] = train_ids
train2.to_csv(data_dir / "train_clf2.csv", index=False)


test0 = test.iloc[:, :40].copy()
test0["ID"] = test_ids
test0.to_csv(data_dir / "test_clf0.csv", index=False)

test1 = test.iloc[:, 40:100]
test1["ID"] = test_ids
test1.to_csv(data_dir / "test_clf1.csv", index=False)


test2 = test.iloc[:, 100:120]
test2["ID"] = test_ids
test2.to_csv(data_dir / "test_clf2.csv", index=False)

##########################################################################
# Make regression datasets
X, y = make_regression(n_samples=1000, n_features=120)
df = pd.DataFrame(X, columns=["data_" + str(i) for i in range(120)])
df["y"] = y
df["ID"] = [i for i in range(1000)]

train, test = train_test_split(df)

train.to_csv(data_dir / "train_reg.csv", index=False)
test.to_csv(data_dir / "test_reg.csv", index=False)
train_ids = train["ID"].copy()
test_ids = test["ID"].copy()

del train["ID"]
del test["ID"]

train0 = train.iloc[:, :40].copy()
train0["ID"] = train_ids
train0.to_csv(data_dir / "train_reg0.csv", index=False)

train1 = train.iloc[:, 40:].copy()
train1["ID"] = train_ids
train1.to_csv(data_dir / "train_reg1.csv", index=False)


test0 = test.iloc[:, :40].copy()
test0["ID"] = test_ids
test0.to_csv(data_dir / "test_reg0.csv", index=False)

test1 = test.iloc[:, 40:]
test1["ID"] = test_ids
test1.to_csv(data_dir / "test_reg1.csv", index=False)
