#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import tripleblind as tb


tb.util.set_script_dir_current()
data_dir = Path("example_data")
data_dir.mkdir(exist_ok=True)

tb.util.download_tripleblind_resource(
    "cont_infer_train.csv",
    save_to_dir=data_dir,
    cache_dir="../../.cache",
)

# Load Training Data
df = pd.read_csv(data_dir / "cont_infer_train.csv")

# Model Parameters
# T number of previous seconds to use for prediction
T = 30

power = df["P"].to_numpy()


# Create input vector, T previous power points
print(f"Generating dataset of sliding window of size = {T}")

# Uses the previous T points to predict the T+Horizon'th point
Horizon = 3

data_x = np.empty((len(power) - T - Horizon, T))
power_T = np.empty((len(power) - T - Horizon, 1))

for i in tqdm(range(len(power) - T - Horizon)):
    input_vector = power[i : i + T]
    data_x[i, :] = input_vector
    power_T[i, :] = power[i + T + Horizon]


X = data_x.reshape(len(data_x), 1, T, 1)

y = power_T.reshape(int(len(power_T)), 1)

# Train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.6)

# Create TripleBlind numpy package that will be ready to be positioned
# as an asset
tb.Package.from_numpy(data_dir / "train.zip", X_train, y_train)
tb.Package.from_numpy(data_dir / "test.zip", X_test, y_test)
