#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

import sys
from datetime import datetime

import torch

import tripleblind as tb


tb.util.set_script_dir_current()

# Unique value used by all scripts in this folder.  Edit "run_id.txt" to change
run_id = tb.util.read_run_id()


##########################################################################
# GET AUTHENTICATION TOKENS AND ESTABLISH CONNECTION TO THE ROUTER
#
# IN THIS INSTANCE WE ARE TREATING ORGANIZATION-ONE AS "SANTANDER"
#
# Establish the connection details to reach the TripleBlind instance.
# Unless explicitly specified, all operations will occur via this default
# session as the user 'organization_one'
tb.initialize(api_token=tb.config.example_user1["token"])

#############################################################################
# Validate that the datasets are available

# Find the training databases in the Router index
dataset_train0 = tb.Asset.find(
    f"SAN Client Stats {run_id}", owned_by=tb.config.example_user1["team_id"]
)
dataset_train1 = tb.Asset.find(
    f"JPM Client Stats {run_id}", owned_by=tb.config.example_user2["team_id"]
)
dataset_train2 = tb.Asset.find(
    f"PNB Client Stats {run_id}", owned_by=tb.config.example_user3["team_id"]
)
if not dataset_train0 or not dataset_train1 or not dataset_train2:
    print("Datasets not found.")
    print("Must run 1_position_data_on_accesspoint.py first.")
    sys.exit(1)


#############################################################################
# Define the neural network we want to train
#############################################################################

training_model_name = "example-network-santandar-trainer"

builder = tb.NetworkBuilder()
builder.add_dense_layer(26, 120)
builder.add_relu()
builder.add_dense_layer(120, 160)
builder.add_relu()
builder.add_dropout(0.25)
builder.add_dense_layer(160, 200)
builder.add_relu()
builder.add_split()  # required split layer
builder.add_dense_layer(200, 160)
builder.add_relu()
builder.add_dense_layer(160, 10)
builder.add_relu()
builder.add_dense_layer(10, 1)

training_model = tb.create_network(training_model_name, builder)

#############################################################################
# Designate the files to use and train the network
#

# Loss function names must be consistent with PyTorch.
#   See: https://pytorch.org/docs/stable/nn.html#loss-functions
# Currently tested: 'BCEWithLogitsLoss', 'NLLLoss', 'CrossEntropyLoss'
loss_name = "BCEWithLogitsLoss"
pos_weight = tb.TorchEncoder.encode(torch.arange(17, 18, dtype=torch.int32))

# Optimizer names must be consistent with PyTorch.
#   See: https://pytorch.org/docs/stable/optim.html
# Currently tested: 'SGD', 'Adam', 'Adadelta'
optimizer_name = "Adam"
optimizer_params = {"lr": 0.001}

# Use the Preprocessor to specify what data to use for training and which
# column to treat as the classification label.  Additionally, the first
# two datasets need normalization to match the naming convetion and data format
# defined by the PNB dataset.

# NOTE: when defining the sql_transform portion of the preprocessor, user must
# use a table named "data"
preprocess_0 = (
    tb.TabularPreprocessor.builder()
    .add_column("target", target=True)
    .all_columns(True)
    .sql_transform(
        "SELECT target, salar as sal, reemb / 5.6 as reemb, amt, (svr * base) / 2 as svr, recib, tar, ind, ind_larg, ind_med, ind_cort, ind_emit, ind_recib, sal_q1, sal_q2, sal_q3, sal_q4, imp_amort, imp_venta, imp_sal, imp_op, imp_aport, saldo, base, FoR, num_med, mes_med FROM data"
    )
    .dtype("float32")
)

# NOTE: when defining the sql_transform portion of the preprocessor, user must
# use a table named "data"
preprocess_1 = (
    tb.TabularPreprocessor.builder()
    .add_column("target", target=True)
    .all_columns(True)
    .sql_transform(
        "SELECT target, SAL as sal, reemb, amt * 8.2 as amt, svr, recib, tar, ind, ind_larg, ind_med, ind_cort, ind_emit, ind_recib, sal_q1, sal_q2, sal_q3, sal_q4, imp_amort, imp_venta, imp_sal, imp_op, imp_aport, saldo, base, FoR, num_med, mes_med FROM data"
    )
    .dtype("float32")
)

preprocess_2 = (
    tb.TabularPreprocessor.builder()
    .add_column("target", target=True)
    .all_columns(True)
    .dtype("float32")
)

job = tb.create_job(
    job_name=f"Santandar - {str(datetime.now()).replace(' ', ' @ ')}",
    operation=training_model,
    dataset=[dataset_train0, dataset_train1, dataset_train2],
    preprocessor=[preprocess_0, preprocess_1, preprocess_2],
    params={
        "epochs": 1,
        "loss_meta": {"name": loss_name, "params": {"pos_weight": pos_weight}},
        "optimizer_meta": {"name": optimizer_name, "params": optimizer_params},
        "data_type": "table",
        "data_shape": [26],  # number of columns of data in table
        "model_output": "binary",  # binary/multiclass/regression
    },
)
print(f"Training network")

###########################################################################
# Create the network asset and local .pth file from the trained network

if job.submit():
    print(f"Creating network asset under name: {training_model_name}")
    job.wait_for_completion()

    # Throw away this network definition (no longer needed)
    training_model.archive()

    if job.success:
        print()
        print("Trained Network Asset ID:")
        print("    ===============================================")
        print(f"    ===>  {job.result.asset.uuid} <===")
        print("    ===============================================")
        print("    Algorithm: Deep Learning Model")
        print(f"    Job ID:    {job.job_name}")
        print()
        trained_network = job.result.asset
    else:
        print(f"Training failed")
        sys.exit(1)

    # Pull down the model for local validation
    local_filename = trained_network.retrieve(save_as="local.zip", overwrite=True)
    print("Trained network has been downloaded as:")
    print(f"   {local_filename}")

    # Save for use in 3a_local_inference.py
    with open("local_model_filename.out", "w") as output:
        output.write(str(local_filename))

    # Save for use in 3b_fed_inference.py / 3c_smpc_inference.py
    with open("model_asset_id.out", "w") as output:
        output.write(str(job.result.asset.uuid))

    print("Ready to run local inference.")
    print()

    # Create an agreement which allows the other team to use this
    # trained model in subsequent steps.
    agreement = job.result.asset.add_agreement(
        with_team=tb.config.example_user2["team_id"], operation=tb.Operation.EXECUTE
    )
    if agreement:
        print("Created Agreement for use of trained Asset.")


############################################################################
# The 'trained_network.filename' variable is the local filename used when
# downloading the trained PyTorch object locally. It could easily be passed to
# an additional step to run the local inference.
