#!/usr/bin/env Rscript
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

# Install required packages if not already installed
RequiredPackages <- c("reticulate", "stringr", "dplyr")
for (i in RequiredPackages) { # Installs packages if not yet installed
  if (!require(i, character.only = TRUE)) install.packages(i)
}

# Load packages
library(reticulate)
library(stringr)
library(dplyr)

tb <- import("tripleblind")

tb$util$set_script_dir_current()
tb$initialize(api_token = tb$config$example_user1[["token"]])

result <- tryCatch(
  {
    run_id <- tb$util$read_run_id()
    trips <- tb$TableAsset$find(paste("Transport Data-", run_id, sep = ""))
    purchases <- tb$TableAsset$find(paste("Shop Transaction-", run_id, sep = ""))
  },
  error = function(err) {
    print(paste("An error occurred: ", err, ""))
    print("ERROR: You must run 1_position_data_on_accesspoint.py first.")
    quit(save = "ask")
  }
) # end trycatch

# Perform a Private Set Intersection between the two databases by "address"
overlap <- purchases$blind_join(
  intersect_with = trips,
  match_column = ["address", "customer_address"],
  return_columns = ["customer_address"],
  silent = TRUE
)

overlap$download("overlap_exact.csv", overwrite = TRUE)
print(
  paste("Number of common customers: ",
    length(count(overlap$dataframe, overlap$dataframe["customer_address"])),
    sep = ""
  ) # count is from dplyr package
)
