#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

from pathlib import Path

import tripleblind as tb


##############################################################################
#
# Script to create CA housing data.  Two datasets are created and
# placed on the access points for example_user1, and 2.  Permissions are
# set for training jobs to operate hands-free from the team associated
# with example_user3.
#
##############################################################################

user1 = tb.config.example_user1
user2 = tb.config.example_user2

name1 = "EXAMPLE - San Diego Housing Census 1990"
desc1 = """
This dataset is a fictional set of housing data for San Diego residents
collected for the 1990 U.S. census.

The San Diego Housing dataset contains just over 10,000 observations. Obtained
from [Scikit-learn](https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset),
it was originally derived from the 1990 U.S. census. The dataset contains 7
predictive attributes and a target value. The target column contains the median
house value and is expressed in increments of 100K. See the associated Los
Angeles Housing Census 1990 dataset.
"""

name2 = "EXAMPLE - Los Angeles Housing Census 1990"
desc2 = """
This dataset is a fictional set of housing data for Los Angeles residents
collected for the 1990 U.S. census.

The CA Housing dataset contains just over 10,000 observations. Obtained
from [Scikit-learn](https://scikit-learn.org/stable/datasets/real_world.html#california-housing-dataset),
it was originally derived from the 1990 U.S. census. The dataset contains 7
predictive attributes and a target value. The target column contains the median
house value and is expressed in increments of 100K. See the associated San
Diego Housing Census 1990 dataset.
"""

tb.util.set_script_dir_current()
tb.initialize(api_token=user1["token"])
data_dir = Path("example_data")


#############################################################################
#  Organization one
#############################################################################

try:
    session1 = tb.Session(api_token=user1["token"], from_default=True)
    package_cali_housing_a = tb.Package.create(
        filename=data_dir / "cali_housing_a.zip",
        record_data=data_dir / "cali_housing_a.csv",
    )
    print(f"Creating dataset '{name1}' on {user1['login']}'s Access Point...")
    dataset1 = tb.Asset.position(
        file_handle=package_cali_housing_a,
        name=name1,
        desc=desc1,
        is_discoverable=True,
        session=session1,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"   asset '{name1}' already exists, skipping.")
    dataset1 = tb.Asset.find(name1, owned_by=tb.config.example_user1["team_id"])

# Attach an Agreement to the dataset to allow anyone to train against
# this dataset without further interaction.
dataset1.add_agreement(
    with_team="ANY", operation=tb.Operation.RANDOM_FOREST_TRAIN, session=session1
)
print("Created Agreement for any to train against this dataset.\n")

#############################################################################
#  Organization two
#############################################################################

try:
    session2 = tb.Session(api_token=user2["token"], from_default=True)
    package_cali_housing_b = tb.Package.create(
        filename=data_dir / "cali_housing_b.zip",
        record_data=data_dir / "cali_housing_b.csv",
    )
    print(f"Creating dataset '{name2}' on {user2['login']}'s Access Point...")
    dataset2 = tb.Asset.position(
        file_handle=package_cali_housing_b,
        name=name2,
        desc=desc2,
        is_discoverable=True,
        session=session2,
    )
except tb.TripleblindAssetAlreadyExists:
    print(f"   asset '{name2}' already exists, skipping.")
    dataset2 = tb.Asset.find(name2, owned_by=tb.config.example_user2["team_id"])

# Attach an Agreement to the dataset to allow anyone to train against
# this dataset without further interaction.
dataset2.add_agreement(
    with_team="ANY", operation=tb.Operation.RANDOM_FOREST_TRAIN, session=session2
)
print("Created Agreement for any to train against this dataset.\n")

print("Data is in position.")
