#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

from pathlib import Path

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

import tripleblind as tb


tb.util.set_script_dir_current()

COLUMNS = ["id", "cycle", "setting1", "setting2", "setting3"]
COLUMNS += [f"s{i}" for i in range(1, 22)]


def windows(nrows, size):
    start, step = 0, 2
    while start < nrows:
        yield start, start + size
        start += step


def segment_signal(features, labels, window_size=30):
    segments = []
    segment_labels = []
    nrows = len(features)
    for start, end in windows(nrows, window_size):
        if len(features[start:end]) == window_size:
            segment = features[np.newaxis, start:end]  # (channel, rows, cols)
            label = labels[(end - 1)]
            segments.append(segment)
            segment_labels.append(label)
    segments = np.stack(segments)  # (batch, channel, rows, cols)
    segment_labels = np.stack(segment_labels)
    return segments, segment_labels


def load_train_df(train_file):
    # Read files into dataframes
    train_df = pd.read_csv(train_file, sep="\s+", header=None)
    train_df.columns = COLUMNS

    # Add RUL column
    train_df["RUL"] = (
        train_df.groupby(["id"])["cycle"].transform(max) - train_df["cycle"]
    )
    train_df["RUL"] = train_df["RUL"].astype(int)
    return train_df


def create_train_numpy_dump_from_dataframe(df, output_filename):
    Y = df["RUL"].values
    Y = np.expand_dims(Y, axis=1)
    del df["RUL"]
    df.drop(df.columns[[0, 1]], axis=1, inplace=True)

    ####################
    # Normalize the data
    ##
    scaler = StandardScaler()
    X = scaler.fit_transform(df)

    train_x, train_y = segment_signal(X, Y)
    train_x = train_x.astype(np.float32)
    train_y = train_y.astype(np.float32)

    tb.Package.from_numpy(output_filename, train_x, train_y)
    # Where X is the binary training data and Y is the corresponding label.
    return train_x, train_y


def create_nn_train(df):
    # Create single training set for neural network example
    Y = df["RUL"].values
    del df["RUL"]

    df.drop(df.columns[[0, 1]], axis=1, inplace=True)
    df["target"] = Y

    df.to_csv(data_dir / "FD001_nn_train.csv", index=False)


# Download CMAPSS Data
data_dir = Path("example_data")
test_data = tb.util.download_tripleblind_resource(
    "CMAPSSData.zip", save_to_dir=data_dir, cache_dir="../../../.cache", expand=True
)

#########################################################################
# Process the file for several reasons:
# 1. To reformat into 2D shape.
# 2. Normalize the data (although may not be required).
# 3. Calculate the RUL-like dependent variable for training purposes
# 4. Drop features that don't change.

#########################################################################
#  PRE-PROCESS
df = load_train_df(data_dir / "train_FD001.txt")
create_nn_train(df.copy())
train_x1, train_y1 = create_train_numpy_dump_from_dataframe(df, "FD001_train.zip")

df2 = load_train_df(data_dir / "train_FD002.txt")
train_x2, train_y2 = create_train_numpy_dump_from_dataframe(df2, "FD002_train.zip")

df3 = load_train_df(data_dir / "train_FD003.txt")
train_x3, train_y3 = create_train_numpy_dump_from_dataframe(df3, "FD003_train.zip")

df4 = load_train_df(data_dir / "train_FD004.txt")
train_x4, train_y4 = create_train_numpy_dump_from_dataframe(df4, "FD004_train.zip")
