#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

from pathlib import Path

import tripleblind as tb
from preprocessor.package import Package
from preprocessor.spec import FieldOverride


##############################################################################
#
# Script to create sample financial data.  Three datasets are created and
# placed on the access points for example_user1, 2 and 3.  Permissions are
# set for training jobs to operate hands-free from the organization associated
# with example_user3.
#
##############################################################################

user1 = tb.config.example_user1
user2 = tb.config.example_user2
user3 = tb.config.example_user3

name1 = "EXAMPLE - SAN Customer Database"
desc1 = """
![logo](https://goodlogo.com/images/logos/small/grupo_santander_logo_2870.gif)
Santandar Bank customer dataset (fictional)

Dataset built from the original Kaggle [customer transaction prediction
challenge](https://www.kaggle.com/c/santander-customer-transaction-prediction/).
That dataset has been split into three parts and can be used for a variety of
financial data examples and tests.  For demonstration purposes it has been
"de-anonymized" by renaming the columns to fictional but realistic financial
variable names.  See also the associated PNB and JPM datasets.
"""

name2 = "EXAMPLE - JPM Customer Database"
desc2 = """
![logo](https://goodlogo.com/images/logos/small/jp_morgan_chase_logo_2723.gif)
JPMorgan Chase Bank customer dataset (fictional)

Dataset built from the original Kaggle [customer transaction prediction
challenge](https://www.kaggle.com/c/santander-customer-transaction-prediction/).
That dataset has been split into three parts and can be used for a variety of
financial data examples and tests.  For demonstration purposes it has been
"de-anonymized" by renaming the columns to fictional but realistic financial
variable names.  See also the associated PNB and SAN datasets.
"""

name3 = "EXAMPLE - PNB Customer Database"
desc3 = """
![logo](https://goodlogo.com/images/logos/small/punjab_national_bank_logo_3901.gif)
Punjab National Bank customer dataset (fictional)

Dataset built from the original Kaggle [customer transaction prediction
challenge](https://www.kaggle.com/c/santander-customer-transaction-prediction/).
That dataset has been split into three parts and can be used for a variety of
financial data examples and tests.  For demonstration purposes it has been
"de-anonymized" by renaming the columns to fictional but realistic financial
variable names.  See also the associated SAN and JPM datasets.
"""


tb.util.set_script_dir_current()
tb.initialize(api_token=user1["token"])
data_dir = Path("example_data")

##############################################################################
#  Company one - simulating data from Santander Bank
#############################################################################

try:
    session1 = tb.Session(api_token=user1["token"], from_default=True)
    overrides = [
        FieldOverride(name="amt", restricted=False),
        FieldOverride(name="reemb", restricted=False),
    ]
    pack = Package.create(
        data_dir / "SAN.zip", record_data=data_dir / "SAN.csv", spec_override=overrides
    )
    print(f"Creating dataset '{name1}' on {user1['login']}'s Access Point...")
    dataset1 = tb.Asset.position(
        file_handle=pack,
        name=name1,
        desc=desc1,
        is_discoverable=True,
        session=session1,
    )

except tb.TripleblindAssetAlreadyExists:
    print(f"   asset '{name1}' already exists, skipping.")
    dataset1 = tb.Asset.find(name1, owned_by=tb.config.example_user1["team_id"])

# Attach an Agreement to the dataset to allow anyone to train against
# this dataset without further interaction.
dataset1.add_agreement(
    with_team="ANY", operation=tb.Operation.BLIND_LEARNING, session=session1
)
dataset1.add_agreement(
    with_team="ANY", operation=tb.Operation.XGBOOST_TRAIN, session=session1
)
dataset1.add_agreement(
    with_team="ANY", operation=tb.Operation.PRIVATE_SET_INTERSECTION, session=session1
)
print("Created Agreement for any to train against this dataset.\n")


#############################################################################
#  Company two - simulating data from JPMorgan Chase
#############################################################################

try:
    session2 = tb.Session(api_token=user2["token"], from_default=True)

    print(f"Creating dataset '{name2}' on {user2['login']}'s Access Point...")
    dataset2 = tb.Asset.position(
        file_handle=data_dir / "JPM.csv",
        name=name2,
        desc=desc2,
        is_discoverable=True,
        session=session2,
    )
except tb.TripleblindAssetAlreadyExists:
    print(f"   asset '{name2}' already exists, skipping.")
    dataset2 = tb.Asset.find(name2, owned_by=tb.config.example_user2["team_id"])

# Attach an Agreement to the dataset to allow anyone to train against
# this dataset without further interaction.
dataset2.add_agreement(
    with_team="ANY", operation=tb.Operation.BLIND_LEARNING, session=session2
)
dataset2.add_agreement(
    with_team="ANY", operation=tb.Operation.XGBOOST_TRAIN, session=session2
)
dataset2.add_agreement(
    with_team="ANY", operation=tb.Operation.PRIVATE_SET_INTERSECTION, session=session2
)
print("Created Agreement for any to train against this dataset.\n")


#############################################################################
#  Company three - simulating data from PNB Paribas
#############################################################################

try:
    session3 = tb.Session(api_token=user3["token"], from_default=True)

    print(f"Creating dataset '{name3}' on {user3['login']}'s Access Point...")
    dataset_train2 = tb.Asset.position(
        file_handle=data_dir / "PNB.csv",
        name=name3,
        desc=desc3,
        is_discoverable=True,
        session=session3,
    )

    # No agreement is created for this third dataset.  This is intentional to
    # allow different demonstrations.  If a training is initiated by a login
    # associated with the third team, no permission is needed to
    # perform the training.  This is useful for examples and tests with no
    # GUI interaction.
    #
    # If the training is initiated by another team, usage of this
    # dataset will need to be approved.  This is useful for demonstrations.
except tb.TripleblindAssetAlreadyExists:
    print(f"   asset '{name3}' already exists, skipping.")


print("Data is in position.")
