#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

from pathlib import Path

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

import tripleblind as tb


tb.util.set_script_dir_current()
data_dir = Path("example_data")

pack = tb.Package.load("forest_model.zip")
rf_model: RandomForestClassifier = pack.model()
data_transforms = pack.get_data_transforms()
target_transforms = pack.get_target_transforms()

# Load the original (complete) training dataset
train_data = tb.util.download_tripleblind_resource(
    "college_acceptance.csv",
    save_to_dir=data_dir,
    cache_dir="../../.cache",
)


data = pd.read_csv(train_data)

# remove target value from data
admitted = data["admitted"].copy()
del data["admitted"]

# apply the data transformation that was used on the training data
data_trfrm = data_transforms.transform(data)
data_new = pd.DataFrame(data_trfrm)


# Infer against this dataset
predictions = rf_model.predict(data_new)
print(predictions)
pd.DataFrame(predictions).to_csv("rf_local_infer.csv", header=None, index=None)

# Report accuracy against this known truth
acc = accuracy_score(admitted, predictions)
print(f"Accuracy score: {acc}")
