#!/usr/bin/env python
# Copyright (c) TripleBlind Holdings, Inc. Confidential and Proprietary. All rights reserved.

import tripleblind as tb


raise SystemExit("This script is for reference only.")

tb.initialize()

#############################################################################
# Databricks is a cloud-based data platform that provides a unified analytics
# platform for data science, engineering, and business analytics.  It is
# commonly used to process large amounts of data and is often used in
# conjunction with Apache Spark.
#
# Databricks can be used to create and manage data assets that can be shared
# with other users and applications.  These assets can be used to create
# views of data that can be shared with other users and applications.
#
# This script demonstrates how to create a TripleBlind data asset that is linked
# to a Databricks database.  The asset is created by defining a view of the data
# using an SQL Query.  The view can be as simple or complex as needed.
#
# The asset can be shared with other users and applications, and the data can be
# accessed using the TripleBlind SDK.   The data is protected using TripleBlind's
# privacy-preserving technology, so that the data can be used without exposing
# sensitive information.
#
# This script is for reference only and should be customized to match your
# specific environment and requirements.
#############################################################################

# You can find the connection details for your Databricks cluster in the
# Databricks web interface.  Under the Compute in the sidebar, choose your
# target cluster.  Under the Configuration tab for that cluster expand Advanced
# Options and choose the JDBC/ODBC tab, where you will find the needed
# values.   See the Databricks documentation for more details:
# https://docs.databricks.com/en/integrations/compute-details.html
#
# Address of your Databricks server
# This is the Server Hostname or IP address of your Databricks instance for
# accessing the data.
# e.g. "dbc-90812f4e-f4bf.cloud.databricks.com"
SERVER = "{{MY_DATABRICKS_SERVER}}"

# The URL for this Databricks server
# This is the URL that you use to access the Databricks server.  This is
# the URL that you use to access the Databricks web interface.
# e.g. ""sql/protocolv1/o/2724819753459962/0517-143110-4mjk0emz"
HTTP_PATH = "{{MY_DATABRICKS_HTTP_PATH}}"


# A specific database name
CATALOG = "samples"  # This is the name of the catalog within Databricks
SCHEMA = "tpch"  # This is the name of the schema within the catalog


# Access token for Databricks.  You can find this in the Databricks web
# interface under the User Settings > Developer > Access Tokens > Manage.
# This can be a simple copy of your Databricks access token, but it is
# recommended that you create assets using the TripleBlind Secrets manager.
# e.g. "dapi12345678951c81e344a935123b34bf0b"
ACCESS_TOKEN = "{{MY_DATABRICKS_TOKEN}}"

# SQL statement which generates the view of data you want to connect as an
# asset.  This can be simple or complex with conditions and calculated fields.
SQL_VIEW = (
    "SELECT c_age as Age, c_zip as Zip, c_prim_diag_cd as primary_diagnosis_cd, c.panel1_image as Panel1_Image FROM tpch.customer;"
)

#############################################################################

linked_storage_columns = {"panel1_image": { "storage_type": "azure", "content_type": "image/tiff"}}

asset1 = tb.asset.DatabricksDatabase.create(
    access_token=ACCESS_TOKEN,
    server_hostname=SERVER,
    http_path=HTTP_PATH,
    catalog=CATALOG,
    schema=SCHEMA,
    query=SQL_VIEW,
    name="Databricks Demo DB",
    desc="Databricks Database Asset",
    is_discoverable=False,
    linked_storage_columns=linked_storage_columns,
)
