#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

from pathlib import Path

import tripleblind as tb


tb.util.set_script_dir_current()
data_dir = Path("example_data")

prefix = "EXAMPLE - "

tb.initialize(api_token=tb.config.example_user1["token"])
session_org_2 = tb.Session(
    api_token=tb.config.example_user2["token"], from_default=True
)
session_org_3 = tb.Session(
    api_token=tb.config.example_user3["token"], from_default=True
)


try:
    name = f"{prefix}Classification PSI Regression vertical train 0-40"
    print(f"Uploading '{name}' to organization-one's Access Point...")
    asset_0 = tb.Asset.position(
        data_dir / "train_clf0.csv",
        name=name,
        desc="""The first part of three datasets which jointly can be used to
        train a classification model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 0-40, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")

try:
    name = f"{prefix}Classification PSI Regression vertical test 0-40"
    print(f"Uploading '{name}' to organization-one's Access Point...")
    asset_0 = tb.Asset.position(
        data_dir / "test_clf0.csv",
        name=name,
        desc="""The first part of three datasets which jointly can be used to
        test a classification model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 0-40, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = f"{prefix}Classification PSI Regression vertical train 41-100"
    print(f"Uploading '{name}' to organization-two's Access Point...")
    asset_1 = tb.Asset.position(
        data_dir / "train_clf1.csv",
        name=name,
        desc="""The second part of three datasets which jointly can be used to
        train a classification model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 41-100, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
        session=session_org_2,
    )

    asset_1.add_agreement(
        with_team=tb.config.example_user1["team_id"],
        operation=tb.Operation.PSI_VERTICAL_REGRESSION_TRAIN,
        session=session_org_2,
    )
except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = f"{prefix}Classification PSI Regression vertical test 41-100"
    print(f"Uploading '{name}' to organization-two's Access Point...")
    asset_1 = tb.Asset.position(
        data_dir / "test_clf1.csv",
        name=name,
        desc="""The second part of three datasets which jointly can be used to
        test a classification model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 41-100, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
        session=session_org_2,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = f"{prefix}Classification PSI Regression vertical train 101-120"
    print(f"Uploading '{name}' to organization-three's Access Point...")
    asset_2 = tb.Asset.position(
        data_dir / "train_clf2.csv",
        name=name,
        desc="""The third part of three datasets which jointly can be used to
        train a classification model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 101-120, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
        session=session_org_3,
    )

    if tb.config.create_agreements_on_example_input_assets:
        # For this example we will attach an Agreement to the datasets.  This
        # agreement makes the dataset available to the other organization,
        # meaning the training step will not require the dataset owner to
        # explicitly grant permission for use.
        asset_2.add_agreement(
            with_team=tb.config.example_user1["team_id"],
            operation=tb.Operation.PSI_VERTICAL_REGRESSION_TRAIN,
            session=session_org_3,
        )
        print("Created Agreement for training")
except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = f"{prefix}Classification PSI Regression vertical test 101-120"
    print(f"Uploading '{name}' to organization-three's Access Point...")
    asset_2 = tb.Asset.position(
        data_dir / "test_clf2.csv",
        name=name,
        desc="""The third part of three datasets which jointly can be used to
        test a classification model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 101-120, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
        session=session_org_3,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")
print("Classification Data is in position.")


################################################################################
# Linear Regression data

try:
    name = f"{prefix}Linear PSI Regression vertical train 0-40"
    print(f"Uploading '{name}' to organization-one's Access Point...")
    asset_0 = tb.Asset.position(
        data_dir / "train_reg0.csv",
        name=name,
        desc="""The first part of three datasets which jointly can be used to
        train a linear regression model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 0-40, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = f"{prefix}Linear PSI Regression vertical test 0-40"
    print(f"Uploading '{name}' to organization-one's Access Point...")
    asset_0 = tb.Asset.position(
        data_dir / "test_reg0.csv",
        name=name,
        desc="""The first part of three datasets which jointly can be used to
        test a linear regression model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 0-40, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = f"{prefix}Linear PSI Regression vertical train 41-100"
    print(f"Uploading '{name}' to organization-two's Access Point...")
    asset_1 = tb.Asset.position(
        data_dir / "train_reg1.csv",
        name=name,
        desc="""The second part of three datasets which jointly can be used to
        train a linear regression model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 41-100, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
        session=session_org_2,
    )

    asset_1.add_agreement(
        with_team=tb.config.example_user1["team_id"],
        operation=tb.Operation.PSI_VERTICAL_REGRESSION_TRAIN,
        session=session_org_2,
    )
except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = f"{prefix}Linear PSI Regression vertical test 41-100"
    print(f"Uploading '{name}' to organization-two's Access Point...")
    asset_1 = tb.Asset.position(
        data_dir / "test_reg1.csv",
        name=name,
        desc="""The second part of three datasets which jointly can be used to
        test a linear regression model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 41-100, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
        session=session_org_2,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = f"{prefix}Linear PSI Regression vertical train 101-120"
    print(f"Uploading '{name}' to organization-three's Access Point...")
    asset_2 = tb.Asset.position(
        data_dir / "train_reg2.csv",
        name=name,
        desc="""The third part of three datasets which jointly can be used to
        train a linear regression model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 101-120, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
        session=session_org_3,
    )

    if tb.config.create_agreements_on_example_input_assets:
        # For this example we will attach an Agreement to the datasets.  This
        # agreement makes the dataset available to the other organization,
        # meaning the training step will not require the dataset owner to
        # explicitly grant permission for use.
        asset_2.add_agreement(
            with_team=tb.config.example_user1["team_id"],
            operation=tb.Operation.PSI_VERTICAL_REGRESSION_TRAIN,
            session=session_org_3,
        )
        print("Created Agreement for training")
except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = f"{prefix}Linear PSI Regression vertical test 101-120"
    print(f"Uploading '{name}' to organization-three's Access Point...")
    asset_2 = tb.Asset.position(
        data_dir / "test_reg2.csv",
        name=name,
        desc="""The third part of three datasets which jointly can be used to
        test a linear regression model.  The data has been vertically partitioned, meaning
        each dataset has portions of the data for each complete record.  This dataset
        holds columns 101-120, along with a common record ID used to match the record
        across the three parts.""",
        is_discoverable=True,
        session=session_org_3,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


# Position test and train datasets on org1
try:
    name = "EXAMPLE - Classification test data - psi vertical regression"
    print(f"Uploading '{name}' to organization-one's Access Point...")
    test_asset_0 = tb.Asset.position(
        data_dir / "test_clf.csv",
        desc="A set of test data used during classification inference",
        name=name,
        is_discoverable=False,
    )
except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = "EXAMPLE - Linear test data - psi vertical regression"
    print(f"Uploading '{name}' to organization-one's Access Point...")
    test_asset_0 = tb.Asset.position(
        data_dir / "test_reg.csv",
        desc="A set of test data used during linear regression inference",
        name=name,
        is_discoverable=False,
    )
except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


try:
    name = "EXAMPLE - Classification train data - psi vertical regression"
    print(f"Uploading '{name}' to organization-one's Access Point...")
    train_asset_0 = tb.Asset.position(
        data_dir / "train_clf.csv",
        desc="A set of train data used during classification inference for model comparison",
        name=name,
        is_discoverable=False,
    )
except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


# Cache train dataset on org1 for later use
try:
    name = "EXAMPLE - Linear train data - psi vertical regression"
    print(f"Uploading '{name}' to organization-one's Access Point...")
    train_asset_0 = tb.Asset.position(
        data_dir / "train_reg.csv",
        desc="A set of train data used during linear regression inference for model comparison",
        name=name,
        is_discoverable=False,
    )
except tb.TripleblindAssetAlreadyExists:
    print(f"Asset '{name}' already exists.")


print("Data is in position.")
