diff --git a/.gitignore b/.gitignore index 334911b12..dc10fb96f 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ remorph_transpile/ /linter/src/main/antlr4/library/gen/ .databricks-login.json /core/src/main/antlr4/com/databricks/labs/remorph/parsers/*/gen/ +src/databricks/labs/remorph/app/.env diff --git a/src/databricks/labs/remorph/app/README.md b/src/databricks/labs/remorph/app/README.md new file mode 100644 index 000000000..4e8e6624a --- /dev/null +++ b/src/databricks/labs/remorph/app/README.md @@ -0,0 +1,55 @@ +# Pre requisites: + +1. Setup the CLI. +2. Clone the Remorph repo +3. Checkout feature/reconcile-databricks-app +4. Open app.yaml in the app module + +![YAML Path](assets/yaml_path.png) + +5. Update the REMORPH_METADATA_SCHEMA value with your Remorph reconcile schema (Use the same schema you used while installing Remorph). +6. If you dont have remorph, feel free to use any other where you have access. + + + + +# Steps to deploy app + +1. Create the app + +>> databricks apps create + +2. Sync the app directory to local path and upload the files to Workspace.First navigate to app directory + +>> cd src/databricks/labs/remorph/app/ + + +Then, upload the app files to workspace. Do this in a new terminal tab an leave it open to sync. + +>> databricks sync --watch . /Workspace/Users/user..name@databricks.com/ + + +3. Deploy the app + +>> databricks apps deploy +--source-code-path /Workspace/Users/user..name@databricks.com/ + + + + + +# Fix permission issues +(TODO: Do this programmatically) + +1. Copy service principle ID of your app. Goto Compute > Apps Your app > Authorization tab + +![Fix permission](assets/app_service_principle.png) + + + +2Provide this service principle access to your remorph Schema. Data editor access should be fine: + +![Catalog permission](assets/catalog_permission.png) + +Once done, launch the app and you should see a message that some tables have been created. If yes then app is successfully deployed. + diff --git a/src/databricks/labs/remorph/app/app.py b/src/databricks/labs/remorph/app/app.py new file mode 100644 index 000000000..5bd38d936 --- /dev/null +++ b/src/databricks/labs/remorph/app/app.py @@ -0,0 +1,21 @@ +from src.resources.web_components.homepage import render_homepage +from src.services.spark_service import initialize_app + +selected_option = render_homepage() +initialize_app() + +# Routing +if selected_option == "Home": + from src.routes.home import main as page_main +elif selected_option == "Recon Executor": + from src.routes.recon_executor import main as page_main +elif selected_option == "Secret Manager": + from src.routes.secret_manager import main as page_main +elif selected_option == "Config Manager": + from src.routes.config_manager import main as page_main +elif selected_option == "Dashboard": + from src.routes.dashboard import main as page_main +elif selected_option == "About": + from src.routes.about import main as page_main + +page_main() diff --git a/src/databricks/labs/remorph/app/app.yaml b/src/databricks/labs/remorph/app/app.yaml new file mode 100644 index 000000000..0c737b6fb --- /dev/null +++ b/src/databricks/labs/remorph/app/app.yaml @@ -0,0 +1,18 @@ +command: [ + "streamlit", + "run", + "app.py" +] + +env: + - name: "REMORPH_METADATA_SCHEMA" + value: "kushagra_remorph.reconcile" + - name: STREAMLIT_BROWSER_GATHER_USAGE_STATS + value: "false" + - name: DATABRICKS_CLUSTER_ID + value: "0709-132523-cnhxf2p6" + - name: RECON_CONFIG_TABLE_NAME + value: "recon_app_config_table" + - name: RECON_JOB_RUN_DETAILS_TABLE_NAME + value: "job_run_details" + diff --git a/src/databricks/labs/remorph/app/assets/app_service_principle.png b/src/databricks/labs/remorph/app/assets/app_service_principle.png new file mode 100644 index 000000000..53a3137e2 Binary files /dev/null and b/src/databricks/labs/remorph/app/assets/app_service_principle.png differ diff --git a/src/databricks/labs/remorph/app/assets/catalog_permission.png b/src/databricks/labs/remorph/app/assets/catalog_permission.png new file mode 100644 index 000000000..204fc1011 Binary files /dev/null and b/src/databricks/labs/remorph/app/assets/catalog_permission.png differ diff --git a/src/databricks/labs/remorph/app/assets/yaml_path.png b/src/databricks/labs/remorph/app/assets/yaml_path.png new file mode 100644 index 000000000..84f5023ea Binary files /dev/null and b/src/databricks/labs/remorph/app/assets/yaml_path.png differ diff --git a/src/databricks/labs/remorph/app/requirements.txt b/src/databricks/labs/remorph/app/requirements.txt new file mode 100644 index 000000000..e32cf0b6d --- /dev/null +++ b/src/databricks/labs/remorph/app/requirements.txt @@ -0,0 +1,11 @@ +streamlit==1.41.0 +databricks-connect==15.4.0 +streamlit-dynamic-filters +streamlit-aggrid +altair==5.0.0 +streamlit-dynamic-filters +pandas~=2.2.2 +databricks-sdk~=0.41.0 +streamlit_option_menu +python-dotenv~=1.0.1 +pandas-stubs diff --git a/src/databricks/labs/remorph/app/src/__init__.py b/src/databricks/labs/remorph/app/src/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/config/__init__.py b/src/databricks/labs/remorph/app/src/config/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/config/settings.py b/src/databricks/labs/remorph/app/src/config/settings.py new file mode 100644 index 000000000..f4a49846b --- /dev/null +++ b/src/databricks/labs/remorph/app/src/config/settings.py @@ -0,0 +1,17 @@ +import os +from dotenv import load_dotenv +from databricks.connect.session import DatabricksSession + + +class Settings: + def __init__(self): + load_dotenv() + self.DATABRICKS_CLUSTER_ID = os.getenv('DATABRICKS_CLUSTER_ID') + self.REMORPH_METADATA_SCHEMA = os.getenv('REMORPH_METADATA_SCHEMA') + self.RECON_CONFIG_TABLE_NAME = os.getenv('RECON_CONFIG_TABLE_NAME') + self.RECON_JOB_RUN_DETAILS_TABLE_NAME = os.getenv('RECON_JOB_RUN_DETAILS_TABLE_NAME') + # self.LOG_LEVEL = os.getenv('LOG_LEVEL', 'INFO') + self.spark = DatabricksSession.builder.clusterId(self.DATABRICKS_CLUSTER_ID).getOrCreate() + + +settings = Settings() diff --git a/src/databricks/labs/remorph/app/src/queries/__init__.py b/src/databricks/labs/remorph/app/src/queries/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/queries/ddls/__init__.py b/src/databricks/labs/remorph/app/src/queries/ddls/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/queries/ddls/create_config_table.sql b/src/databricks/labs/remorph/app/src/queries/ddls/create_config_table.sql new file mode 100644 index 000000000..c8cfe67db --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/ddls/create_config_table.sql @@ -0,0 +1,61 @@ +CREATE TABLE IF NOT EXISTS {RECON_CONFIG_TABLE_NAME} +( + config_id + INT + PRIMARY + KEY, + source_catalog + STRING + NOT + NULL, + source_schema + STRING + NOT + NULL, + target_catalog + STRING + NOT + NULL, + target_schema + STRING + NOT + NULL, + tables + ARRAY< + STRUCT< + source_name + : + STRING, + target_name + : + STRING, + drop_columns + : + ARRAY< + STRING>, + join_columns + : + ARRAY< + STRING>, + transformations + : + ARRAY< + STRUCT< + column_name + : + STRING, + source + : + STRING, + target + : + STRING + >>, + jdbc_reader_options + : + MAP< + STRING, + STRING> + >> +) + USING DELTA \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/ddls/create_status_table.sql b/src/databricks/labs/remorph/app/src/queries/ddls/create_status_table.sql new file mode 100644 index 000000000..a849ad1a1 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/ddls/create_status_table.sql @@ -0,0 +1,29 @@ +CREATE TABLE IF NOT EXISTS {RECON_JOB_RUN_DETAILS_TABLE_NAME} +( + job_run_id + BIGINT + PRIMARY + KEY, + start_time + TIMESTAMP, + end_time + TIMESTAMP, + user_name + STRING, + duration + BIGINT, -- Store duration in seconds + source_dialect + STRING, + workspace_id + STRING, + workspace_name + STRING, + status + STRING, + exception_message + STRING, + created_at + TIMESTAMP, + updated_at + TIMESTAMP +) USING DELTA; \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/__init__.py b/src/databricks/labs/remorph/app/src/queries/dmls/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/fetch_existing_configs.sql b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_existing_configs.sql new file mode 100644 index 000000000..a85ba8182 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_existing_configs.sql @@ -0,0 +1 @@ +select * from {REMORPH_METADATA_SCHEMA}.recon_app_config_table; \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/fetch_recon_type.sql b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_recon_type.sql new file mode 100644 index 000000000..7c580f202 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_recon_type.sql @@ -0,0 +1 @@ +SELECT DISTINCT recon_type FROM {REMORPH_METADATA_SCHEMA}.details \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/fetch_schema_comparison_details.sql b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_schema_comparison_details.sql new file mode 100644 index 000000000..92e0b347f --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_schema_comparison_details.sql @@ -0,0 +1,38 @@ +WITH tmp AS (SELECT recon_table_id, + inserted_ts, + EXPLODE(data) AS schema_data + FROM {REMORPH_METADATA_SCHEMA}.details +WHERE + recon_type = 'schema' + ) +SELECT main.recon_id, + main.source_table.`catalog` AS source_catalog, + main.source_table.`schema` AS source_schema, + main.source_table.table_name AS source_table_name, + IF( + ISNULL(source_catalog), + CONCAT_WS('.', source_schema, source_table_name), + CONCAT_WS('.', source_catalog, source_schema, source_table_name) + ) AS source_table, + main.target_table.`catalog` AS target_catalog, + main.target_table.`schema` AS target_schema, + main.target_table.table_name AS target_table_name, + CONCAT( + main.target_table.catalog, + '.', + main.target_table.schema, + '.', + main.target_table.table_name + ) AS target_table, + schema_data['source_column'] AS source_column, + schema_data['source_datatype'] AS source_datatype, + schema_data['databricks_column'] AS databricks_column, + schema_data['databricks_datatype'] AS databricks_datatype, + schema_data['is_valid'] AS is_valid +FROM {REMORPH_METADATA_SCHEMA}.main AS main +INNER JOIN tmp +ON main.recon_table_id = tmp.recon_table_id +ORDER BY + tmp.inserted_ts DESC, + main.recon_id, + main.target_table \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/fetch_source_types.sql b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_source_types.sql new file mode 100644 index 000000000..cd1f32e10 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_source_types.sql @@ -0,0 +1 @@ +SELECT DISTINCT source_type FROM {REMORPH_METADATA_SCHEMA}.main \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/fetch_summary.sql b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_summary.sql new file mode 100644 index 000000000..f037b42d0 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_summary.sql @@ -0,0 +1,39 @@ +SELECT main.recon_id, + main.source_type, + main.report_type, + main.source_table.`catalog` AS source_catalog, + main.source_table.`schema` AS source_schema, + main.source_table.table_name AS source_table_name, + IF( + ISNULL(source_catalog), + CONCAT_WS('.', source_schema, source_table_name), + CONCAT_WS('.', source_catalog, source_schema, source_table_name) + ) AS source_table, + main.target_table.`catalog` AS target_catalog, + main.target_table.`schema` AS target_schema, + main.target_table.table_name AS target_table_name, + CONCAT( + main.target_table.catalog, + '.', + main.target_table.schema, + '.', + main.target_table.table_name + ) AS target_table, + metrics.run_metrics.status AS status, + metrics.run_metrics.exception_message AS exception, + metrics.recon_metrics.row_comparison.missing_in_source AS missing_in_source, + metrics.recon_metrics.row_comparison.missing_in_target AS missing_in_target, + metrics.recon_metrics.column_comparison.absolute_mismatch AS absolute_mismatch, + metrics.recon_metrics.column_comparison.threshold_mismatch AS threshold_mismatch, + metrics.recon_metrics.column_comparison.mismatch_columns AS mismatch_columns, + metrics.recon_metrics.schema_comparison AS schema_comparison, + metrics.run_metrics.run_by_user AS executed_by, + main.start_ts AS start_ts, + main.end_ts AS end_ts +FROM {REMORPH_METADATA_SCHEMA}.main AS main + INNER JOIN {REMORPH_METADATA_SCHEMA}.metrics AS metrics +ON main.recon_table_id = metrics.recon_table_id +ORDER BY + metrics.inserted_ts DESC, + main.recon_id, + main.target_table.table_name \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/fetch_total_failed_runs.sql b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_total_failed_runs.sql new file mode 100644 index 000000000..d2b6f1b23 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_total_failed_runs.sql @@ -0,0 +1,7 @@ +SELECT main.recon_id AS rec_id, + CAST(main.start_ts AS DATE) AS start_date +FROM {REMORPH_METADATA_SCHEMA}.main AS main +INNER JOIN {REMORPH_METADATA_SCHEMA}.metrics AS metrics +ON main.recon_table_id = metrics.recon_table_id +WHERE + metrics.run_metrics.status = FALSE \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/fetch_unique_target_tables_failed.sql b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_unique_target_tables_failed.sql new file mode 100644 index 000000000..3e3ca3e99 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_unique_target_tables_failed.sql @@ -0,0 +1,7 @@ +SELECT CONCAT_WS('.', main.target_table.catalog, main.target_table.schema, main.target_table.table_name) AS t_table, + CAST(main.start_ts AS DATE) AS start_date +FROM {REMORPH_METADATA_SCHEMA}.main AS main +INNER JOIN {REMORPH_METADATA_SCHEMA}.metrics AS metrics +ON main.recon_table_id = metrics.recon_table_id +WHERE + metrics.run_metrics.status = FALSE \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/fetch_unique_target_tables_successful.SQL b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_unique_target_tables_successful.SQL new file mode 100644 index 000000000..098035e62 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_unique_target_tables_successful.SQL @@ -0,0 +1,7 @@ +SELECT CONCAT_WS('.', main.target_table.catalog, main.target_table.schema, main.target_table.table_name) AS t_table, + CAST(main.start_ts AS DATE) AS start_date +FROM {REMORPH_METADATA_SCHEMA}.main AS main +INNER JOIN {REMORPH_METADATA_SCHEMA}.metrics AS metrics +ON main.recon_table_id = metrics.recon_table_id +WHERE + metrics.run_metrics.status = TRUE \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/queries/dmls/fetch_users.sql b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_users.sql new file mode 100644 index 000000000..8054b86c8 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/queries/dmls/fetch_users.sql @@ -0,0 +1 @@ +SELECT DISTINCT run_metrics.run_by_user FROM {REMORPH_METADATA_SCHEMA}.metrics \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/resources/logo/remorph.png b/src/databricks/labs/remorph/app/src/resources/logo/remorph.png new file mode 100644 index 000000000..b5de4ed67 Binary files /dev/null and b/src/databricks/labs/remorph/app/src/resources/logo/remorph.png differ diff --git a/src/databricks/labs/remorph/app/src/resources/logo/remorph.svg b/src/databricks/labs/remorph/app/src/resources/logo/remorph.svg new file mode 100644 index 000000000..5f3c32f82 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/resources/logo/remorph.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/src/databricks/labs/remorph/app/src/resources/web_components/__init__.py b/src/databricks/labs/remorph/app/src/resources/web_components/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/resources/web_components/homepage.py b/src/databricks/labs/remorph/app/src/resources/web_components/homepage.py new file mode 100644 index 000000000..2918525d5 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/resources/web_components/homepage.py @@ -0,0 +1,73 @@ +import streamlit as st # type: ignore +from pathlib import Path +from streamlit_option_menu import option_menu # type: ignore + +# Set up paths for logo +logo_path_svg = Path("src/resources/logo/remorph.svg") +logo_path_png = Path("src/resources/logo/remorph.png") +st.set_page_config(page_title="Reconcile", page_icon=logo_path_png, layout="wide") + + +def load_svg_inline(svg_path): + with open(svg_path, "r") as file: + return file.read() + + +def render_homepage(): + logo_svg_content = load_svg_inline(logo_path_svg) + + with st.sidebar: + st.markdown( + f""" +
+
+ {logo_svg_content} +
+

Reconcile

+
+ """, + unsafe_allow_html=True, + ) + + menu_options = { + "Home": None, + "Recon Executor": "recon_executor", + "Secret Manager": "secret_manager", + "Config Manager": "config_manager", + "Dashboard": "dashboard", + "About": "about", + } + + query_params = st.query_params.get("page", None) + + default_index = list(menu_options.values()).index(query_params) if query_params in menu_options.values() else 0 + selected_option = option_menu( + menu_title=None, + options=list(menu_options.keys()), + icons=["house", "play-circle", "key", "wrench", "bar-chart", "info-circle"], + menu_icon="cast", + default_index=default_index, + styles={ + "container": {"padding": "0", "background-color": "#f8f9fa"}, + "icon": {"color": "blue", "font-size": "20px"}, + "nav-link": { + "font-size": "16px", + "text-align": "left", + "margin": "0px", + "color": "black", + "--hover-color": "#eeeeee", + }, + "nav-link-selected": {"background-color": "#4CAF50", "color": "white"}, + }, + ) + + # Update the URL **only if the selected page has changed** (avoiding unnecessary rerun) + new_page = menu_options[selected_option] + if new_page != query_params: + if new_page is None: + st.query_params.clear() + else: + st.query_params["page"] = new_page + st.rerun() + + return selected_option diff --git a/src/databricks/labs/remorph/app/src/routes/__init__.py b/src/databricks/labs/remorph/app/src/routes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/routes/about.py b/src/databricks/labs/remorph/app/src/routes/about.py new file mode 100644 index 000000000..dfdc77d9c --- /dev/null +++ b/src/databricks/labs/remorph/app/src/routes/about.py @@ -0,0 +1,7 @@ +import streamlit as st # type: ignore + + +def main(): + # Title and Introduction + st.title("Remorph - Reconcile") + st.markdown("### W.I.P") diff --git a/src/databricks/labs/remorph/app/src/routes/config_manager.py b/src/databricks/labs/remorph/app/src/routes/config_manager.py new file mode 100644 index 000000000..ec602c3b3 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/routes/config_manager.py @@ -0,0 +1,160 @@ +import json +import streamlit as st # type: ignore +from ..config.settings import settings +from ..services.spark_service import load_query, fetch_dataframe, save_config_to_delta +from ..utils.pretty_print_configs import create_collapsible_json + + +def main(): + st.title("Config Manager") + tab1, tab2 = st.tabs(["Add New Config", "View Existing Configs"]) + with tab1: + table_form_expander = st.expander("Add a new config") + with table_form_expander: + col1, col2, col3, col4 = st.columns(4) + with col1: + source_catalog = st.text_input("Source Catalog", value="hive_metastore") + with col2: + source_schema = st.text_input("Source Schema", value="labs") + with col3: + target_catalog = st.text_input("Target Catalog", value="sandbox_db") + with col4: + target_schema = st.text_input("Target Schema", value="labs") + + # Initialize tables list + if "tables" not in st.session_state: + st.session_state["tables"] = [] + + col1, col2 = st.columns(2) + with col1: + source_name = st.text_input("Source Table Name") + with col2: + target_name = st.text_input("Target Table Name") + + # Column mapping + st.subheader("Column Mapping") + column_mapping = [] + num_columns = st.number_input("Number of columns to map", min_value=1, step=1, key="num_columns") + for i in range(num_columns): + col1, col2 = st.columns(2) + with col1: + source_column = st.text_input(f"Source Column {i + 1}", key=f"source_column_{i}") + with col2: + target_column = st.text_input(f"Target Column {i + 1}", key=f"target_column_{i}") + column_mapping.append({"source_name": source_column, "target_name": target_column}) + + # Join columns + st.subheader("Join Columns") + join_columns = st.text_input("Enter join columns, comma-separated") + + # Column thresholds + st.subheader("Column Thresholds") + column_thresholds = [] + num_thresholds = st.number_input("Number of column thresholds", min_value=0, step=1, key="num_thresholds") + for i in range(num_thresholds): + col1, col2, col3, col4 = st.columns(4) + with col1: + column_name = st.text_input(f"Threshold Column {i + 1}", key=f"threshold_column_{i}") + with col2: + upper_bound = st.text_input(f"Upper Bound for {column_name}", key=f"upper_bound_{i}") + with col3: + lower_bound = st.text_input(f"Lower Bound for {column_name}", key=f"lower_bound_{i}") + with col4: + threshold_type = st.selectbox( + f"Type for {column_name}", ["int", "float", "string"], key=f"threshold_type_{i}" + ) + column_thresholds.append( + { + "column_name": column_name, + "upper_bound": upper_bound, + "lower_bound": lower_bound, + "type": threshold_type, + } + ) + + # Transformations + st.subheader("Transformations") + transformations = [] + num_transformations = st.number_input( + "Number of transformations", min_value=0, step=1, key="num_transformations" + ) + for i in range(num_transformations): + col1, col2, col3 = st.columns(3) + with col1: + column_name = st.text_input(f"Column Name {i + 1}", key=f"column_name_{i}") + with col2: + source = st.text_input(f"Transformation Source {i + 1}", key=f"transformation_source_{i}") + with col3: + target = st.text_input(f"Transformation Target {i + 1}", key=f"transformation_target_{i}") + transformations.append({"column_name": column_name, "source": source, "target": target}) + + # Drop columns + st.subheader("Drop Columns") + drop_columns = st.text_input("Enter columns to drop, comma-separated") + + # Filters + st.subheader("Filters") + col1, col2 = st.columns(2) + with col1: + source_filter = st.text_area("Source Filter") + with col2: + target_filter = st.text_area("Target Filter") + filters = {"source": source_filter, "target": target_filter} + + # JDBC Reader Options + st.subheader("JDBC Reader Options") + jdbc_reader_options = st.text_area("Enter JDBC reader options as JSON") + + # Table thresholds + st.subheader("Table Thresholds") + table_thresholds = st.text_area("Enter table thresholds as JSON") + + table_details = { + "source_catalog": source_catalog, + "source_schema": source_schema, + "target_catalog": target_catalog, + "target_schema": target_schema, + "tables": [ + { + "source_name": source_name, + "target_name": target_name, + "column_mapping": column_mapping, + "drop_columns": drop_columns.split(",") if drop_columns else [], + "filters": filters, + "jdbc_reader_options": json.loads(jdbc_reader_options) if jdbc_reader_options else {}, + "join_columns": join_columns.split(",") if join_columns else [], + "select_columns": None, # This can be updated based on additional user inputs if needed + "column_thresholds": column_thresholds, + "table_thresholds": json.loads(table_thresholds) if table_thresholds else {}, + "transformations": transformations, + } + ], + } + table_details_json = json.dumps(table_details, indent=4) + view_config, save_config = st.columns(2) + with view_config: + if st.button("View Config"): + st.json(table_details_json) + st.button("Close") + with save_config: + if st.button("Save Config"): + # save_json(json_data=table_details_json, path=RECON_APP_RESOURCES_DIR) + save_config_to_delta(table_details=table_details) + + with tab2: + st.write("View Existing Configs") + # Fetch existing configs + fetch_configs = load_query( + "dmls", "fetch_existing_configs", REMORPH_METADATA_SCHEMA=settings.REMORPH_METADATA_SCHEMA + ) + configs = fetch_dataframe(fetch_configs) + + # Convert the 'tables' column to a string + configs['tables'] = configs['tables'].apply(create_collapsible_json) + + # Display the DataFrame + st.dataframe(configs) + + +if __name__ == "__main__": + main() diff --git a/src/databricks/labs/remorph/app/src/routes/dashboard.py b/src/databricks/labs/remorph/app/src/routes/dashboard.py new file mode 100644 index 000000000..b0cd0b521 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/routes/dashboard.py @@ -0,0 +1,83 @@ +import streamlit as st # type: ignore +from ..config.settings import settings +from ..services.spark_service import load_query, fetch_dataframe, fetch_list_from_queries + + +def main(): + st.title("Reconciliation Metrics") + + # Fetch all distinct users to populate the dropdown + fetch_users = load_query("dmls", "fetch_users", REMORPH_METADATA_SCHEMA=settings.REMORPH_METADATA_SCHEMA) + users = fetch_list_from_queries(fetch_users) + users = [None] + users if users is not None else [None] + + # Fetch all distinct recon tables to populate the dropdown + fetch_recon_type = load_query("dmls", "fetch_recon_type", REMORPH_METADATA_SCHEMA=settings.REMORPH_METADATA_SCHEMA) + recon_tables = fetch_list_from_queries(fetch_recon_type) + recon_tables = [None] + recon_tables if recon_tables is not None else [None] + + # Fetch source types to populate the dropdown + fetch_source_types = load_query( + "dmls", "fetch_source_types", REMORPH_METADATA_SCHEMA=settings.REMORPH_METADATA_SCHEMA + ) + source_types = fetch_list_from_queries(fetch_source_types) + source_types = [None] + source_types if source_types is not None else [None] + + filter_1, filter_2, filter_3 = st.columns(3) + with filter_1: + user_name_filter = st.selectbox("Select User", users, index=0) + with filter_2: + recon_table_filter = st.selectbox("Select Recon Type", recon_tables, index=0) + with filter_3: + source_type_filter = st.selectbox("Select Source", source_types, index=0) + print(f'User: {user_name_filter}, Recon Table: {recon_table_filter}, Source Type: {source_type_filter}') + + st.divider() + + fetch_total_failed_runs = load_query( + "dmls", 'fetch_total_failed_runs', REMORPH_METADATA_SCHEMA=settings.REMORPH_METADATA_SCHEMA + ) + total_failed_runs = fetch_dataframe(fetch_total_failed_runs) + total_failed_runs = ( + len(total_failed_runs.index) if total_failed_runs is not None and not total_failed_runs.empty else 0 + ) + + fetch_unique_target_tables_failed = load_query( + "dmls", 'fetch_unique_target_tables_failed', REMORPH_METADATA_SCHEMA=settings.REMORPH_METADATA_SCHEMA + ) + unique_target_tables_failed = fetch_dataframe(fetch_unique_target_tables_failed) + unique_target_tables_failed = ( + len(unique_target_tables_failed.index) + if unique_target_tables_failed is not None and not unique_target_tables_failed.empty + else 0 + ) + + fetch_unique_target_tables_successful = load_query( + "dmls", 'fetch_unique_target_tables_successful', REMORPH_METADATA_SCHEMA=settings.REMORPH_METADATA_SCHEMA + ) + unique_target_tables_successful = fetch_dataframe(fetch_unique_target_tables_successful) + unique_target_tables_successful = ( + len(unique_target_tables_successful.index) + if unique_target_tables_successful is not None and not unique_target_tables_successful.empty + else 0 + ) + + col1, col2, col3 = st.columns(3) + with col1: + st.metric("Total Failed Runs", total_failed_runs, " - No of failed runs") + with col2: + st.metric("Unique Target Tables Failed", unique_target_tables_failed, " - Unique Failed Tables") + with col3: + st.metric("Unique Target Tables Successful", unique_target_tables_successful, "Unique Successful") + + st.write("Reconciliation Summary") + fetch_summary_sql = load_query("dmls", "fetch_summary", REMORPH_METADATA_SCHEMA=settings.REMORPH_METADATA_SCHEMA) + summary_df = fetch_dataframe(fetch_summary_sql) + st.dataframe(summary_df) + + st.write("Schema Comparison Details") + fetch_schema_comparison_details_sql = load_query( + "dmls", "fetch_schema_comparison_details", REMORPH_METADATA_SCHEMA=settings.REMORPH_METADATA_SCHEMA + ) + df_schema_comparison_details = fetch_dataframe(fetch_schema_comparison_details_sql) + st.dataframe(df_schema_comparison_details) diff --git a/src/databricks/labs/remorph/app/src/routes/home.py b/src/databricks/labs/remorph/app/src/routes/home.py new file mode 100644 index 000000000..7dd0c5c04 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/routes/home.py @@ -0,0 +1,94 @@ +import streamlit as st # type: ignore +import streamlit.components.v1 as components # type: ignore + + +def main(): + # Title and Introduction + st.title("Remorph - Reconcile") + st.markdown("### A powerful tool for **automated data reconciliation** in Databricks.") + + st.header("What is Reconcile?") + st.markdown( + """ + **Reconcile** is an automated **data validation and comparison** tool designed for **verifying the accuracy of migrated data** + between a **source system** and **Databricks**. It helps identify **discrepancies** in data to ensure **seamless and error-free migrations**. + + **Key Capabilities:** + - ✅ **Compare** source data (e.g., Snowflake, Oracle) with target data in Databricks. + - 🔍 **Detect anomalies** such as missing rows, mismatched values, or schema inconsistencies. + - 🚀 **Scale efficiently** to handle large datasets with optimized performance. + """ + ) + + st.header("How Reconcile Works") + st.markdown("Below is a **visual representation** of the reconciliation process:") + + mermaid_html = """ +
+ flowchart TD + A(Transpile CLI) --> |Directory| B[Transpile All Files In Directory]; + A --> |File| C[Transpile Single File] ; + B --> D[List Files]; + C --> E("Sqlglot(transpile)"); + D --> E + E --> |Parse Error| F(Failed Queries) + E --> G{Skip Validations} + G --> |Yes| H(Save Output) + G --> |No| I{Validate} + I --> |Success| H + I --> |Fail| J(Flag, Capture) + J --> H +
+ + + """ + + components.html(mermaid_html, height=1000) + + st.markdown( + """ + The **Reconcile process** works in the following steps: + + 1️⃣ **Extract Data**: Fetches data from both the **source system** (e.g., Snowflake, Oracle) and **Databricks target table**. + 2️⃣ **Transform & Normalize**: Applies **data transformations** and aligns schema differences. + 3️⃣ **Compare Data**: Performs **row-level, column-level, and aggregated** comparisons. + 4️⃣ **Generate Report**: Identifies **mismatched records**, missing values, and summary statistics. + + 🎯 **Result:** A clear report showing how well the migrated data matches the original source. + """ + ) + + st.header("Why is Data Reconciliation Important?") + options = { + "Prevent incorrect reports due to data mismatches": "Reconciliation ensures **accurate insights** by eliminating errors.", + "Ensure compliance with industry regulations": "Regulatory standards often require **data integrity checks**.", + "Detect unexpected data loss during migration": "Helps catch missing records **before they impact business decisions**.", + "Improve trust in the migration process": "Ensures **stakeholders** have confidence in the new system.", + } + + selected_reason = st.radio("📌 Select a reason to learn more:", list(options.keys())) + st.success(options[selected_reason]) + + st.header("Common Data Reconciliation Challenges") + st.markdown( + """ + Even with **automated tools** like Reconcile, data validation presents challenges: + + - ⚠️ **Schema Drift**: Unexpected changes in column types or structures. + - 🔄 **Data Sync Issues**: Time-lagged data updates leading to inconsistencies. + - 📉 **Large Data Volume**: Millions of rows requiring optimized comparison techniques. + - 🔍 **Precision vs. Performance**: Balancing speed with deep data validation. + + Reconcile tackles these challenges with **efficient algorithms, distributed processing, and flexible configurations**. + """ + ) + + st.header("Explore More") + st.markdown( + "🔗 **Check out the full project on GitHub:** [Databricks Labs Remorph](https://github.com/databrickslabs/remorph)" + ) + + st.markdown("💡 **Need more details?** Reach out to the community and contribute to the project!") diff --git a/src/databricks/labs/remorph/app/src/routes/recon_executor.py b/src/databricks/labs/remorph/app/src/routes/recon_executor.py new file mode 100644 index 000000000..bbba09555 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/routes/recon_executor.py @@ -0,0 +1,31 @@ +import streamlit as st # type: ignore + + +def load_configurations(): + # This function should load available configurations + # For demonstration, we use a static list + return ["Config 1", "Config 2", "Config 3"] + + +def run_reconciliation(selected_config): + # This function should run the reconciliation process based on the selected configuration + st.success(f"Running reconciliation with {selected_config}") + + +def main(): + st.title("Recon Executor") + st.markdown("### W.I.P") + + # Load available configurations + configurations = load_configurations() + + # Select a configuration + selected_config = st.selectbox("Choose a configuration", configurations) + + # Button to run reconciliation + if st.button("Run Reconciliation"): + run_reconciliation(selected_config) + + +if __name__ == "__main__": + main() diff --git a/src/databricks/labs/remorph/app/src/routes/secret_manager.py b/src/databricks/labs/remorph/app/src/routes/secret_manager.py new file mode 100644 index 000000000..bfb0a3339 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/routes/secret_manager.py @@ -0,0 +1,7 @@ +import streamlit as st # type: ignore + + +def main(): + st.title("Secret Manager") + st.write("Manage your secrets securely here.") + st.text_input("Enter your secret key:") diff --git a/src/databricks/labs/remorph/app/src/services/__init__.py b/src/databricks/labs/remorph/app/src/services/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/services/data_service.py b/src/databricks/labs/remorph/app/src/services/data_service.py new file mode 100644 index 000000000..5d7a9e83f --- /dev/null +++ b/src/databricks/labs/remorph/app/src/services/data_service.py @@ -0,0 +1,31 @@ +# import os +# import streamlit as st +# +# +# def save_json(json_data, path: str): +# try: +# import dbutils +# +# is_databricks = True +# except ImportError: +# is_databricks = False +# path = path if path.endswith('/') else f'{path}/' +# full_path_with_extension = f'{path}config.json' +# if is_databricks: +# try: +# dbutils.fs.put(full_path_with_extension, json_data, overwrite=True) +# print(f"JSON file successfully saved to Databricks workspace path: {path}") +# st.success(f"JSON file successfully saved to Databricks workspace path: {path}") +# except Exception as e: +# print(f"Failed to save JSON to Databricks workspace: {e}") +# st.exception(e) +# else: +# # Local environment: Save to local file system +# try: +# os.makedirs(os.path.dirname(path), exist_ok=True) +# with open(full_path_with_extension, "w") as file: +# file.write(json_data) +# print(f"JSON file successfully saved to local path: {full_path_with_extension}") +# st.success(f"JSON file successfully saved to locaL path: {full_path_with_extension}") +# except Exception as e: +# print(f"Failed to save JSON locally: {e}") diff --git a/src/databricks/labs/remorph/app/src/services/spark_service.py b/src/databricks/labs/remorph/app/src/services/spark_service.py new file mode 100644 index 000000000..c3f8ef2a2 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/services/spark_service.py @@ -0,0 +1,87 @@ +import streamlit as st # type: ignore +from databricks.connect.session import DatabricksSession +from pyspark.sql import functions as F +from ..utils.schemas.config_schema import config_schema +from ..utils.query_loader import load_query +from ..config.settings import settings + + +def create_spark_session(cluster_id): + spark = DatabricksSession.builder.clusterId(cluster_id).getOrCreate() + return spark + + +def save_config_to_delta(table_details): + existing_df = settings.spark.table(f"{settings.REMORPH_METADATA_SCHEMA}.{settings.RECON_CONFIG_TABLE_NAME}") + max_config_id = existing_df.agg(F.max("config_id")).collect()[0][0] + new_config_id = (max_config_id or 0) + 1 + table_details["config_id"] = new_config_id + + df = settings.spark.createDataFrame([table_details], schema=config_schema) + # df.show(truncate=False) + try: + df.write.format("delta").mode("append").saveAsTable( + f"{settings.REMORPH_METADATA_SCHEMA}." f"{settings.RECON_CONFIG_TABLE_NAME}" + ) + st.success("Config saved successfully") + except Exception as e: + st.error(f"Error saving config: {e}") + + +def run_ddl(ddl, table_name): + if settings.spark.catalog.tableExists(table_name): + pass + else: + settings.spark.sql(ddl) + st.success(f"Table {table_name} created successfully") + + +def initialize_tables(): + with st.spinner("Ensuring config and status tables are present. May take a while if cluster is not up"): + try: + create_config_table_ddl = load_query( + "ddls", + "create_config_table", + RECON_CONFIG_TABLE_NAME=f"{settings.REMORPH_METADATA_SCHEMA}." f"{settings.RECON_CONFIG_TABLE_NAME}", + ) + + create_status_table_ddl = load_query( + "ddls", + "create_status_table", + RECON_JOB_RUN_DETAILS_TABLE_NAME=f"{settings.REMORPH_METADATA_SCHEMA}." + f"{settings.RECON_JOB_RUN_DETAILS_TABLE_NAME}", + ) + + run_ddl( + create_config_table_ddl, f"{settings.REMORPH_METADATA_SCHEMA}." f"{settings.RECON_CONFIG_TABLE_NAME}" + ) + + run_ddl( + create_status_table_ddl, + f"{settings.REMORPH_METADATA_SCHEMA}." f"{settings.RECON_JOB_RUN_DETAILS_TABLE_NAME}", + ) + except Exception as e: + st.error(f'Error creating tables. Please check if "{settings.REMORPH_METADATA_SCHEMA}" schema exists') + st.error(f"Error: {e}") # FIXME : Remove this logging, it is for development purpose only + # st.error( + # f"Please check if these " + # f"tables are present: {settings.RECON_CONFIG_TABLE_NAME} and " + # f"{settings.RECON_JOB_RUN_DETAILS_TABLE_NAME}" + # ) + return + st.session_state.tables_initialized = True + + +def initialize_app(): + if "tables_initialized" not in st.session_state: + initialize_tables() + + +def fetch_dataframe(sql): + df = settings.spark.sql(sql).toPandas() + return df + + +def fetch_list_from_queries(sql): + items = [item[0] for item in settings.spark.sql(sql).toPandas().values.tolist()] + return items or None diff --git a/src/databricks/labs/remorph/app/src/utils/__init__.py b/src/databricks/labs/remorph/app/src/utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/utils/pretty_print_configs.py b/src/databricks/labs/remorph/app/src/utils/pretty_print_configs.py new file mode 100644 index 000000000..178fedd13 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/utils/pretty_print_configs.py @@ -0,0 +1,8 @@ +# utils.py +import numpy as np + + +def create_collapsible_json(data): + if isinstance(data, np.ndarray): + data = data.tolist() + return str(data) diff --git a/src/databricks/labs/remorph/app/src/utils/query_loader.py b/src/databricks/labs/remorph/app/src/utils/query_loader.py new file mode 100644 index 000000000..86a2d62ab --- /dev/null +++ b/src/databricks/labs/remorph/app/src/utils/query_loader.py @@ -0,0 +1,34 @@ +import os + + +def load_query(category, query_name, **kwargs): + """ + Loads an SQL query from the 'queries' folder (ddls or dmls) and formats it with provided parameters. + + Args: + category (str): 'ddls' or 'dmls' (subdirectory in 'queries') + query_name (str): The name of the SQL file without extension. + kwargs: Dynamic parameters to format inside the query. + + Returns: + str: The formatted SQL query. + + Raises: + FileNotFoundError: If the SQL file is not found. + ValueError: If an invalid category is provided. + """ + + valid_categories = {"ddls", "dmls"} + + if category not in valid_categories: + raise ValueError(f"Invalid category '{category}'. Choose from {valid_categories}") + + query_path = os.path.join(os.path.dirname(__file__), "..", "queries", category, f"{query_name}.sql") + + if not os.path.exists(query_path): + raise FileNotFoundError(f"Query file '{query_name}.sql' not found in '{category}'") + + with open(query_path, "r", encoding="utf-8") as file: + query = file.read() + + return query.format(**kwargs) diff --git a/src/databricks/labs/remorph/app/src/utils/schemas/__init__.py b/src/databricks/labs/remorph/app/src/utils/schemas/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/src/utils/schemas/config_schema.py b/src/databricks/labs/remorph/app/src/utils/schemas/config_schema.py new file mode 100644 index 000000000..818037728 --- /dev/null +++ b/src/databricks/labs/remorph/app/src/utils/schemas/config_schema.py @@ -0,0 +1,38 @@ +from pyspark.sql.types import StructType, StructField, StringType, ArrayType, IntegerType, MapType + +# Define the schema for the transformations array +transformations_schema = ArrayType( + StructType( + [ + StructField("column_name", StringType(), True), + StructField("source", StringType(), True), + StructField("target", StringType(), True), + ] + ) +) + +# Define the schema for the tables array +tables_schema = ArrayType( + StructType( + [ + StructField("source_name", StringType(), True), + StructField("target_name", StringType(), True), + StructField("drop_columns", ArrayType(StringType()), True), + StructField("join_columns", ArrayType(StringType()), True), + StructField("transformations", transformations_schema, True), + StructField("jdbc_reader_options", MapType(StringType(), StringType()), True), + ] + ) +) + +# Define the overall schema +config_schema = StructType( + [ + StructField("config_id", IntegerType(), False), + StructField("source_catalog", StringType(), False), + StructField("source_schema", StringType(), False), + StructField("target_catalog", StringType(), False), + StructField("target_schema", StringType(), False), + StructField("tables", tables_schema, False), + ] +) diff --git a/src/databricks/labs/remorph/app/src/utils/schemas/job_run_details_schema.py b/src/databricks/labs/remorph/app/src/utils/schemas/job_run_details_schema.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/tests/__init__.py b/src/databricks/labs/remorph/app/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/databricks/labs/remorph/app/tests/test_config/test_settings.py b/src/databricks/labs/remorph/app/tests/test_config/test_settings.py new file mode 100644 index 000000000..9ff6517c7 --- /dev/null +++ b/src/databricks/labs/remorph/app/tests/test_config/test_settings.py @@ -0,0 +1,17 @@ +from ...src.config.settings import settings + + +def test_databricks_cluster_id(): + assert settings.DATABRICKS_CLUSTER_ID is not None + + +def test_remorph_metadata_schema(): + assert settings.REMORPH_METADATA_SCHEMA is not None + + +def test_recon_config_table_name(): + assert settings.RECON_CONFIG_TABLE_NAME is not None + + +# def test_recon_job_run_details_table_name(): +# assert settings.RECON_JOB_RUN_DETAILS_TABLE_NAME is not None diff --git a/tests/resources/lsp_transpiler/test-lsp-server.log b/tests/resources/lsp_transpiler/test-lsp-server.log new file mode 100644 index 000000000..26666c0c4 --- /dev/null +++ b/tests/resources/lsp_transpiler/test-lsp-server.log @@ -0,0 +1,49 @@ +INFO:pygls.feature_manager:Registered builtin feature exit +INFO:pygls.feature_manager:Registered builtin feature initialize +INFO:pygls.feature_manager:Registered builtin feature initialized +INFO:pygls.feature_manager:Registered builtin feature notebookDocument/didChange +INFO:pygls.feature_manager:Registered builtin feature notebookDocument/didClose +INFO:pygls.feature_manager:Registered builtin feature notebookDocument/didOpen +INFO:pygls.feature_manager:Registered builtin feature $/setTrace +INFO:pygls.feature_manager:Registered builtin feature shutdown +INFO:pygls.feature_manager:Registered builtin feature textDocument/didChange +INFO:pygls.feature_manager:Registered builtin feature textDocument/didClose +INFO:pygls.feature_manager:Registered builtin feature textDocument/didOpen +INFO:pygls.feature_manager:Registered builtin feature window/workDoneProgress/cancel +INFO:pygls.feature_manager:Registered builtin feature workspace/didChangeWorkspaceFolders +INFO:pygls.feature_manager:Registered builtin feature workspace/executeCommand +INFO:pygls.feature_manager:Registered "initialize" with options "None" +INFO:pygls.feature_manager:Registered "textDocument/didOpen" with options "None" +INFO:pygls.feature_manager:Registered "textDocument/didClose" with options "None" +INFO:pygls.feature_manager:Registered "document/transpileToDatabricks" with options "None" +DEBUG:__main__:SOME_ENV=abc +DEBUG:__main__:sys.args=['lsp_server.py', '--stuff=12'] +INFO:pygls.server:Starting async IO server +DEBUG:asyncio:Using selector: KqueueSelector +DEBUG:pygls.server:Content length: 301 +DEBUG:pygls.protocol.json_rpc:Request message received. +INFO:pygls.protocol.language_server:Language server initialized InitializeParams(capabilities=ClientCapabilities(workspace=None, text_document=None, notebook_document=None, window=None, general=None, experimental=None), process_id=None, client_info=None, locale=None, root_path='.', root_uri=None, initialization_options={'remorph': {'source-dialect': 'snowflake'}, 'custom': {'whatever': 'xyz'}}, trace=None, work_done_token=None, workspace_folders=None) +DEBUG:pygls.protocol.language_server:Server capabilities: {"positionEncoding": "utf-16", "textDocumentSync": {"openClose": true, "change": 2, "save": false}, "executeCommandProvider": {"commands": []}, "workspace": {"workspaceFolders": {"supported": true, "changeNotifications": true}, "fileOperations": {}}} +INFO:pygls.protocol.json_rpc:Sending data: {"id": "d27a418d-1dbf-4279-aa39-0cfb419a208f", "jsonrpc": "2.0", "result": {"capabilities": {"positionEncoding": "utf-16", "textDocumentSync": {"openClose": true, "change": 2, "save": false}, "executeCommandProvider": {"commands": []}, "workspace": {"workspaceFolders": {"supported": true, "changeNotifications": true}, "fileOperations": {}}}, "serverInfo": {"name": "test-lsp-server", "version": "v0.1"}}} +DEBUG:__main__:dialect=snowflake +DEBUG:__main__:whatever=xyz +DEBUG:pygls.protocol.json_rpc:Sending request with id "e9005efb-b04c-43a1-8ebd-db929eb509c8": client/registerCapability RegistrationParams(registrations=[Registration(id='6006fa53-c96f-4d02-80c8-56ed086963a5', method='document/transpileToDatabricks', register_options=None)]) +INFO:pygls.protocol.json_rpc:Sending data: {"id": "e9005efb-b04c-43a1-8ebd-db929eb509c8", "params": {"registrations": [{"id": "6006fa53-c96f-4d02-80c8-56ed086963a5", "method": "document/transpileToDatabricks"}]}, "method": "client/registerCapability", "jsonrpc": "2.0"} +DEBUG:pygls.server:Content length: 274 +DEBUG:pygls.protocol.json_rpc:Notification message received. +DEBUG:__main__:open-document-uri=file:///Users/kushagra.parashar/IdeaProjects/remorph/tests/resources/lsp_transpiler/internal.sql +DEBUG:pygls.server:Content length: 80 +DEBUG:pygls.protocol.json_rpc:Response message received. +DEBUG:pygls.protocol.json_rpc:Received result for message "e9005efb-b04c-43a1-8ebd-db929eb509c8": None +DEBUG:pygls.server:Content length: 248 +DEBUG:pygls.protocol.json_rpc:Request message received. +INFO:pygls.protocol.json_rpc:Sending data: {"id": "a2225697-3269-426b-9b00-3ee6da142767", "jsonrpc": "2.0", "result": {"uri": "file:///Users/kushagra.parashar/IdeaProjects/remorph/tests/resources/lsp_transpiler/internal.sql", "changes": [{"range": {"start": {"line": 0, "character": 0}, "end": {"line": 2, "character": 0}}, "newText": "create table stuff(name varchar(12))\n"}], "diagnostics": [{"range": {"start": {"line": 0, "character": 0}, "end": {"line": 2, "character": 0}}, "message": "Something went wrong", "severity": 2, "code": "SOME_ERROR_CODE"}]}} +DEBUG:pygls.server:Content length: 190 +DEBUG:pygls.protocol.json_rpc:Notification message received. +DEBUG:__main__:close-document-uri=file:///Users/kushagra.parashar/IdeaProjects/remorph/tests/resources/lsp_transpiler/internal.sql +DEBUG:pygls.server:Content length: 86 +DEBUG:pygls.protocol.json_rpc:Request message received. +INFO:pygls.protocol.json_rpc:Sending data: {"id": "93f3df4c-1f36-468d-ace6-f0e11371d552", "jsonrpc": "2.0", "result": null} +DEBUG:pygls.server:Content length: 36 +DEBUG:pygls.protocol.json_rpc:Notification message received. +INFO:pygls.server:Shutting down the server diff --git a/tests/resources/lsp_transpiler/transpile_errors_2025_02_10_20_04_33_827971.lst b/tests/resources/lsp_transpiler/transpile_errors_2025_02_10_20_04_33_827971.lst new file mode 100644 index 000000000..ca742cb66 --- /dev/null +++ b/tests/resources/lsp_transpiler/transpile_errors_2025_02_10_20_04_33_827971.lst @@ -0,0 +1 @@ +TranspileError(code=UNSUPPORTED_LCA, kind=ANALYSIS, severity=ERROR, path='/Users/kushagra.parashar/IdeaProjects/remorph/tests/resources/lsp_transpiler/unsupported_lca.sql', message='LCA conversion not supported') diff --git a/tests/resources/lsp_transpiler/transpile_errors_2025_02_10_20_12_00_970479.lst b/tests/resources/lsp_transpiler/transpile_errors_2025_02_10_20_12_00_970479.lst new file mode 100644 index 000000000..ca742cb66 --- /dev/null +++ b/tests/resources/lsp_transpiler/transpile_errors_2025_02_10_20_12_00_970479.lst @@ -0,0 +1 @@ +TranspileError(code=UNSUPPORTED_LCA, kind=ANALYSIS, severity=ERROR, path='/Users/kushagra.parashar/IdeaProjects/remorph/tests/resources/lsp_transpiler/unsupported_lca.sql', message='LCA conversion not supported')