#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

# Federated Blind Reports allow a consortium of data providers to offer access
# to reports backed by their private data.  All data providers create a set of
# standard data views, mapping their own data to match these agreed-upon common
# views.  Then a FederationGroup ties all of these views into an easily
# accessed consortium.
#
# Reports can now be defined to work against the standard views.  The report
# creator only needs to think about the standard view definition, they don't
# need to be concerned with data provider mappings -- those happen
# automatically.  Reports consist of three basic pieces:
#   * Query against the standard view
#     This is an SQL query performed against the data standard tables and fields
#   * Aggregation
#     Query results from all of the data providers are aggregated in priv
#   * Final reporting
#     The report stage can automatically return the simple aggregated results,
#     or the report writer can perform nearly any kind of post-processing on
#     the results to output a report as data files, HTML reports, plots, etc.
#
# At the time of usage, the consumer of the report has the ability to choose
# which data providers will be included in their report.  The query is executed
# by each of the data providers and aggregation automatically happens before the
# final report is returned to the consumer.


# In this example, we will create all the pieces needed for a consortium of two
# data providers to offer a Federate Blind Report to any consumer while still
# protecting the privacy of their data.

import tripleblind as tb


##########################################################################
# Step 1) Create standard view datasets

# For each data provider, we need to create the data sources.  The
# example_user1 will create the data sources for Hope Valley, and example_user2
# will create the data sources for Black Hill.  The data sets must
# conform to the agreed-upon standard data views, generating the same column
# names and data types.
#
# NOTE: Commonly the definition of the the data sources would be performed by
# the data owners independently.  Data owners just need to conform to the
# agreed-upon standard data views.  In this example, we are creating all of the
# data sources in one script for simplicity.
#
# The standard data views for this example are:
#
#    Patient table:
#      health_acc_no: str
#      sex: str
#      race: str
#      language: str
#      height: float
#      weight: float
#      dob: date
#
#    Result table:
#      health_acc_no: str
#      blood_pressure: float
#      glucose: float
#      cholesterol: float
#      hcv_ab: str
#      hcv_rna: str
#
# A standard can also include much more detail, such as the units of measure or
# categories for a field.

raise SystemExit("This script is for reference only.")

##########################################################################
# Create Hope Valley's standard datasets
tb.initialize(tb.config.example_user1["token"])

hopevalley_patients = tb.asset.MSSQLDatabase.create(
    host="{{DEMO_HOST_1}}",
    port=1433,
    database="{{DEMO_DATABASE_1}}",
    username="{{DEMO_USER_1}}",
    password="{{DEMO_PASSWORD_1}}",
    name="EXAMPLE - Hope Valley Patient Database",
    desc="Patient information database #1 from the Federated Report example.",
    is_discoverable=True,
    allow_overwrite=True,
    query="SELECT health_acc_no, sex, race, language, height, weight, dob FROM HopeValley1",
)

hopevalley_results = tb.asset.MSSQLDatabase.create(
    host="{{DEMO_HOST_1}}",
    port=1433,
    database="{{DEMO_DATABASE_1}}",
    username="{{DEMO_USER_1}}",
    password="{{DEMO_PASSWORD_1}}",
    name="EXAMPLE - Hope Valley Results Database",
    desc="Results for patients of Hope Valley Hospital from the Federated Report example.",
    is_discoverable=True,
    allow_overwrite=True,
    query="SELECT health_acc_no, blood_pressure, glucose, cholesterol, hcv_ab, hcv_rna FROM HopeValley1",
)

##########################################################################
# Create Black Hill's standard datasets
tb.initialize(tb.config.example_user2["token"])

blackhill_patients = tb.asset.MSSQLDatabase.create(
    host="{{DEMO_HOST_2}}",
    port=1433,
    database="{{DEMO_DATABASE_2}}",
    username="{{DEMO_USER_2}}",
    password="{{DEMO_PASSWORD_2}}",
    name="EXAMPLE - Black Hill Patient Database",
    desc="Patient information database #2 from the Federated Report example.",
    is_discoverable=True,
    allow_overwrite=True,
    query="SELECT health_acc_no, sex, race, language, height, weight, dob FROM BlackHill1",
)

blackhill_results = tb.asset.MSSQLDatabase.create(
    host="{{DEMO_HOST_2}}",
    port=1433,
    database="{{DEMO_DATABASE_2}}",
    username="{{DEMO_USER_2}}",
    password="{{DEMO_PASSWORD_2}}",
    name="EXAMPLE - Black Hill Results Database",
    desc="Results for patients of Black Hill Hospital from the Federated Report example.",
    is_discoverable=True,
    allow_overwrite=True,
    query="SELECT health_acc_no, blood_pressure, glucose, cholesterol, hcv_ab, hcv_rna FROM BlackHill1",
)

##########################################################################
# Step 2) Create the FederationGroup

# For this example a third organization will act as the data consortium
# aggregator, although it could also be one of the data providers. The
# aggregator is responsible for maintaining the FederationGroup to tie these
# independent datasets into a usable data ecosystem.
session = tb.initialize(tb.config.example_user3["token"])

# To define a member, simply specify how you want it displayed and provide the
# standard view assets for that member.
hope_valley_member = tb.FederationMember.create(
    name="Hope Valley",
    display="Hope Valley",
    desc="Hope Valley Federation Member",
    assets={"Person": hopevalley_patients, "Results": hopevalley_results},
)

black_hill_member = tb.FederationMember.create(
    name="Black Hill",
    display="Black Hill",
    desc="Black Hill Federation Member",
    assets={"Person": blackhill_patients, "Results": blackhill_results},
)

# Finally, the FederationGroup is created with the members defined above.
tb.FederationGroup.create(
    name="EXAMPLE - Federation Group 1",
    members=[hope_valley_member, black_hill_member],
    allow_overwrite=True,
)
# NOTE: The FederationGroup can be redefined at any time, e.g. to add or remove
# a new data provider.  Simply re-run the create command using the same name.
# Also, the FederationGroup is only visible to the aggregator.


##########################################################################
# Step 3) Define the first Federated Blind Report

# Now that the data sources and the FederationGroup are in place, the aggregator
# can create one or more Federated Blind Reports which utilizes them.
group = tb.FederationGroup.find(name="EXAMPLE - Federation Group 1")

# The query template is written in SQL against the standard data
# views.  The query can utilize variable parameters with {{mustache}} values.
# When the report is run, the user-selected options will replace these values.
query_template = """
   SELECT  p.sex, r.blood_pressure, r.glucose, r.hcv_rna
   FROM Person p
   JOIN Results r ON p.health_acc_no = r.health_acc_no
   WHERE r.hcv_rna = 'positive' AND p.race='{{race}}'
"""
# NOTE: You can also use standard mustache templating like {{^value}} or
# {{#value}} to conditionally include simple logic in the the query.


# Next the AggregationRules specifies how the data from the data providers will
# be combined.  Non-aggregated data can never be viewed by either the
# aggregator or the report consumer.
agg_template = tb.report_asset.AggregationRules.create(
    group_by="sex",
    aggregates={"blood_pressure": "mean", "glucose": "mean", "hcv_rna": "count"},
)

# Optionally, the aggregator can define a post-processing script to generate
# more complex custom reports.  If not defined, the aggregated values are
# returned as a simple CSV table.
#
# Post-processing scripts are written in Python and have access to the
# aggregated values in the form of a pandas DataFrame.
# NOTE: This can also be the path to a file containing the script
#       (e.g. "my_post_process.py"), making it easier to write and test.  If a
#       path is provided, the file will be loaded and embedded into the report
#       template at the time it is created.
post_processing_script = """
def postprocess(df: "pd.Dataframe", ctx: "dict") -> "pd.Dataframe":
    df["blood_pressure"] = df["blood_pressure"].round(2)
    df["glucose"] = df["glucose"].round(2)
    df.rename(columns={"blood_pressure": "Mean Blood Pressure", "glucose": "Mean Glucose", "hcv_rna": "HCV RNA Count"}, inplace=True)
    return df
"""
# NOTE: This can also be a path to a file containing the script.  If a path
# is provided, the file will be loaded and embedded into the report template.


# The report is created with the query, aggregation, and post-processing steps
# along with any optional parameters that the report consumer can select.
blind_report = tb.report_asset.DatabaseReport.create(
    name="EXAMPLE - Federated Blind Report (Hep-C Positive Patients)",
    desc="Example of a Federated Blind Report for HCV RNA positive patients, filtered by on selected race.",
    query_template=query_template,
    is_discoverable=True,
    allow_overwrite=True,
    federation_group=group,
    federation_aggregation=agg_template,
    post_processing=post_processing_script,
    params=[
        tb.report_asset.ReportParameter.create_string(
            name="race",
            display="Race",
            description="Racial demographic to filter this report on.",
            options=[
                tb.report_asset.ParameterOption("Asian"),
                tb.report_asset.ParameterOption("Native Hawaiian"),
                tb.report_asset.ParameterOption("White"),
                tb.report_asset.ParameterOption("Black"),
                tb.report_asset.ParameterOption("American Indian"),
                tb.report_asset.ParameterOption("Other"),
            ],
        )
    ],
)

print("Federated Blind Report Created")

# Make this report visible and executable by all
blind_report.add_agreement(with_team="ANY", operation=blind_report)

##########################################################################
# Step 4) Data Provider Agreements

# Each data provider must agree to participate in this report.  This provides
# them assurance that they are aware how their data is being offered to data
# consumers.  They each have the ability to revoke this agreement at any time.
tb.initialize(tb.config.example_user1["token"])
hopevalley_patients.add_agreement(with_team="ANY", operation=blind_report)
hopevalley_results.add_agreement(with_team="ANY", operation=blind_report)

tb.initialize(tb.config.example_user2["token"])
blackhill_patients.add_agreement(with_team="ANY", operation=blind_report)
blackhill_results.add_agreement(with_team="ANY", operation=blind_report)
