#!/usr/bin/env Rscript
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

# Install required packages if not already installed
RequiredPackages <- c("reticulate", "stringr")
for (i in RequiredPackages) { # Installs packages if not yet installed
  if (!require(i, character.only = TRUE)) install.packages(i)
}

# Load packages
library(reticulate)
library(stringr)

tb <- import("tripleblind")

# In this example the retailer is running report and the transit company
# will be asked for permission to access their data.
tb$initialize(api_token = tb$config$example_user1[["token"]])
tb$util$set_script_dir_current()

result <- tryCatch(
  {
    run_id <- tb$util$read_run_id()
    trips <- tb$TableAsset$find(paste("Transport Data-", run_id, sep = ""))
    purchases <- tb$TableAsset$find(paste("Shop Transaction-", run_id, sep = ""))
  },
  error = function(err) {
    print(paste("An error occurred: ", err, ""))
    print("ERROR: You must run 1_position_data_on_accesspoint.py first.")
    quit(save = "ask")
  }
) # end trycatch

# Find common customers between the two databases by matching on address.
overlap <- purchases$blind_join(
  intersect_with = trips,
  match_column = ["address", "customer_address"],
  match_fuzziness = 0.3,
  return_columns = ["depart_station", "arrive_station"],
  silent = TRUE,
  join_type=JoinType.INNER_PARTITIONED,
)

df <- overlap$dataframe
counts <- count(stack(df), values)

# Sort and limit to the top 10
top10 <- top_n(counts[order(-counts$n), ], 10) # - symbol before counts$n means descending order

colnames(top10)[1] <- "station_id"
colnames(top10)[2] <- "# visits"

cat("\n")
print("Top 10 stations")
print(top10)

# Save values for inspection/validation
overlap$download("out.csv", overwrite = TRUE)
