From da1a0afda1b41081da5877fe5637696a823897c8 Mon Sep 17 00:00:00 2001 From: Dan Date: Wed, 3 May 2023 16:55:05 -0600 Subject: [PATCH 1/3] enhance cloud event metadata --- data_diff/cloud/__init__.py | 2 +- data_diff/cloud/datafold_api.py | 13 +++++++++++++ data_diff/dbt.py | 10 +++++++--- data_diff/tracking.py | 6 ++++++ tests/test_dbt.py | 28 ++++++++++++++-------------- 5 files changed, 41 insertions(+), 18 deletions(-) diff --git a/data_diff/cloud/__init__.py b/data_diff/cloud/__init__.py index 5893496d..5b774a7e 100644 --- a/data_diff/cloud/__init__.py +++ b/data_diff/cloud/__init__.py @@ -1,2 +1,2 @@ -from .datafold_api import DatafoldAPI, TCloudApiDataDiff +from .datafold_api import DatafoldAPI, TCloudApiDataDiff, TCloudApiOrgMeta from .data_source import get_or_create_data_source diff --git a/data_diff/cloud/datafold_api.py b/data_diff/cloud/datafold_api.py index 84d813f7..02969caf 100644 --- a/data_diff/cloud/datafold_api.py +++ b/data_diff/cloud/datafold_api.py @@ -105,6 +105,12 @@ class TCloudApiDataDiff(pydantic.BaseModel): filter2: Optional[str] = None +class TCloudApiOrgMeta(pydantic.BaseModel): + org_id: int + org_name: str + user_id: int + + class TSummaryResultPrimaryKeyStats(pydantic.BaseModel): total_rows: Tuple[int, int] nulls: Tuple[int, int] @@ -267,3 +273,10 @@ def check_data_source_test_results(self, job_id: int) -> List[TCloudApiDataSourc ) for item in rv.json()["results"] ] + + def get_org_meta(self) -> TCloudApiOrgMeta: + response = self.make_get_request(f"api/v1/organization/meta") + response_json = response.json() + return TCloudApiOrgMeta( + org_id=response_json["org_id"], org_name=response_json["org_name"], user_id=response_json["user_id"] + ) diff --git a/data_diff/dbt.py b/data_diff/dbt.py index 1f4cd01d..13584d9f 100644 --- a/data_diff/dbt.py +++ b/data_diff/dbt.py @@ -11,7 +11,7 @@ import keyring -from .cloud import DatafoldAPI, TCloudApiDataDiff, get_or_create_data_source +from .cloud import DatafoldAPI, TCloudApiDataDiff, TCloudApiOrgMeta, get_or_create_data_source from .dbt_parser import DbtParser, PROJECT_FILE @@ -79,6 +79,7 @@ def dbt_diff( # exit so the user can set the key if not api: return + org_meta = api.get_org_meta() if datasource_id is None: rich.print("[red]Data source ID not found in dbt_project.yml") @@ -113,7 +114,7 @@ def dbt_diff( if diff_vars.primary_keys: if is_cloud: - diff_thread = run_as_daemon(_cloud_diff, diff_vars, datasource_id, api) + diff_thread = run_as_daemon(_cloud_diff, diff_vars, datasource_id, api, org_meta) diff_threads.append(diff_thread) else: _local_diff(diff_vars) @@ -263,7 +264,7 @@ def _initialize_api() -> Optional[DatafoldAPI]: return DatafoldAPI(api_key=api_key, host=datafold_host) -def _cloud_diff(diff_vars: DiffVars, datasource_id: int, api: DatafoldAPI) -> None: +def _cloud_diff(diff_vars: DiffVars, datasource_id: int, api: DatafoldAPI, org_meta: TCloudApiOrgMeta) -> None: diff_output_str = _diff_output_base(".".join(diff_vars.dev_path), ".".join(diff_vars.prod_path)) payload = TCloudApiDataDiff( data_source1_id=datasource_id, @@ -337,6 +338,9 @@ def _cloud_diff(diff_vars: DiffVars, datasource_id: int, api: DatafoldAPI) -> No error=err_message, diff_id=diff_id, is_cloud=True, + org_id=org_meta.org_id, + org_name=org_meta.org_name, + user_id=org_meta.user_id ) send_event_json(event_json) diff --git a/data_diff/tracking.py b/data_diff/tracking.py index e6ecd0ce..0fb679c8 100644 --- a/data_diff/tracking.py +++ b/data_diff/tracking.py @@ -116,6 +116,9 @@ def create_end_event_json( error: Optional[str], diff_id: Optional[int] = None, is_cloud: bool = False, + org_id: Optional[int] = None, + org_name: Optional[str] = None, + user_id: Optional[int] = None, ): return { "event": "os_diff_run_end", @@ -138,6 +141,9 @@ def create_end_event_json( "dbt_user_id": dbt_user_id, "dbt_version": dbt_version, "dbt_project_id": dbt_project_id, + "org_id": org_id, + "org_name": org_name, + "user_id": user_id, }, } diff --git a/tests/test_dbt.py b/tests/test_dbt.py index e45a1045..6852af8b 100644 --- a/tests/test_dbt.py +++ b/tests/test_dbt.py @@ -2,6 +2,7 @@ from pathlib import Path import yaml +from data_diff.cloud.datafold_api import TCloudApiOrgMeta from data_diff.diff_tables import Algorithm from .test_cli import run_datadiff_cli @@ -482,6 +483,7 @@ def test_local_diff_no_diffs(self, mock_diff_tables): @patch("data_diff.dbt.os.environ") @patch("data_diff.dbt.DatafoldAPI") def test_cloud_diff(self, mock_api, mock_os_environ, mock_print): + org_meta = TCloudApiOrgMeta(org_id=1, org_name="", user_id=1) expected_api_key = "an_api_key" mock_api.create_data_diff.return_value = {"id": 123} mock_os_environ.get.return_value = expected_api_key @@ -493,7 +495,7 @@ def test_cloud_diff(self, mock_api, mock_os_environ, mock_print): threads = None where = "a_string" diff_vars = DiffVars(dev_qualified_list, prod_qualified_list, expected_primary_keys, connection, threads, where) - _cloud_diff(diff_vars, expected_datasource_id, api=mock_api) + _cloud_diff(diff_vars, expected_datasource_id, org_meta=org_meta, api=mock_api) mock_api.create_data_diff.assert_called_once() self.assertEqual(mock_print.call_count, 2) @@ -516,6 +518,7 @@ def test_cloud_diff(self, mock_api, mock_os_environ, mock_print): def test_diff_is_cloud( self, mock_print, mock_dbt_parser, mock_cloud_diff, mock_local_diff, mock_get_diff_vars, mock_initialize_api ): + org_meta = TCloudApiOrgMeta(org_id=1, org_name="", user_id=1) mock_dbt_parser_inst = Mock() mock_model = Mock() expected_dbt_vars_dict = { @@ -523,10 +526,9 @@ def test_diff_is_cloud( "prod_schema": "prod_schema", "datasource_id": 1, } - host = "a_host" - api_key = "a_api_key" - api = DatafoldAPI(api_key=api_key, host=host) - mock_initialize_api.return_value = api + mock_api = Mock() + mock_initialize_api.return_value = mock_api + mock_api.get_org_meta.return_value = org_meta connection = None threads = None where = "a_string" @@ -541,7 +543,7 @@ def test_diff_is_cloud( mock_dbt_parser_inst.set_connection.assert_not_called() mock_initialize_api.assert_called_once() - mock_cloud_diff.assert_called_once_with(expected_diff_vars, 1, api) + mock_cloud_diff.assert_called_once_with(expected_diff_vars, 1, mock_api, org_meta) mock_local_diff.assert_not_called() mock_print.assert_called_once() @@ -555,16 +557,16 @@ def test_diff_is_cloud( def test_diff_is_cloud_no_ds_id( self, _, mock_print, mock_dbt_parser, mock_cloud_diff, mock_local_diff, mock_get_diff_vars, mock_initialize_api ): + org_meta = TCloudApiOrgMeta(org_id=1, org_name="", user_id=1) mock_dbt_parser_inst = Mock() mock_model = Mock() expected_dbt_vars_dict = { "prod_database": "prod_db", "prod_schema": "prod_schema", } - host = "a_host" - api_key = "a_api_key" - api = DatafoldAPI(api_key=api_key, host=host) - mock_initialize_api.return_value = api + mock_api = Mock() + mock_initialize_api.return_value = mock_api + mock_api.get_org_meta.return_value = org_meta connection = None threads = None where = "a_string" @@ -723,10 +725,8 @@ def test_diff_is_cloud_no_pks( "prod_schema": "prod_schema", "datasource_id": 1, } - host = "a_host" - api_key = "a_api_key" - api = DatafoldAPI(api_key=api_key, host=host) - mock_initialize_api.return_value = api + mock_api = Mock() + mock_initialize_api.return_value = mock_api mock_dbt_parser_inst.get_models.return_value = [mock_model] mock_dbt_parser_inst.get_datadiff_variables.return_value = expected_dbt_vars_dict From 9bcab49d65509ac60e05ee1f3912a2c2568c785f Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 15 May 2023 14:34:37 -0600 Subject: [PATCH 2/3] merge fixes --- tests/test_dbt.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/test_dbt.py b/tests/test_dbt.py index 1a8a08f0..402df0c9 100644 --- a/tests/test_dbt.py +++ b/tests/test_dbt.py @@ -631,9 +631,6 @@ def test_diff_is_cloud( mock_api.get_data_source.return_value = TCloudApiDataSource(id=1, type="snowflake", name="snowflake") mock_initialize_api.return_value = mock_api mock_api.get_org_meta.return_value = org_meta - connection = None - threads = None - where = "a_string" mock_dbt_parser.return_value = mock_dbt_parser_inst mock_dbt_parser_inst.get_models.return_value = [mock_model] @@ -675,8 +672,6 @@ def test_diff_is_cloud_no_ds_id( connection = {} threads = None where = "a_string" - host = "a_host" - api_key = "a_api_key" mock_dbt_parser_inst = Mock() mock_model = Mock() expected_dbt_vars_dict = { @@ -686,9 +681,6 @@ def test_diff_is_cloud_no_ds_id( mock_api = Mock() mock_initialize_api.return_value = mock_api mock_api.get_org_meta.return_value = org_meta - connection = None - threads = None - where = "a_string" mock_dbt_parser.return_value = mock_dbt_parser_inst mock_dbt_parser_inst.get_models.return_value = [mock_model] From ad1ad26f1c508c6cb042bb79c58d977742dd71c5 Mon Sep 17 00:00:00 2001 From: Dan Date: Mon, 15 May 2023 14:44:18 -0600 Subject: [PATCH 3/3] format --- data_diff/dbt.py | 2 +- tests/test_dbt.py | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/data_diff/dbt.py b/data_diff/dbt.py index a7559f4e..b9693b89 100644 --- a/data_diff/dbt.py +++ b/data_diff/dbt.py @@ -359,7 +359,7 @@ def _cloud_diff(diff_vars: TDiffVars, datasource_id: int, api: DatafoldAPI, org_ is_cloud=True, org_id=org_meta.org_id, org_name=org_meta.org_name, - user_id=org_meta.user_id + user_id=org_meta.user_id, ) send_event_json(event_json) diff --git a/tests/test_dbt.py b/tests/test_dbt.py index 402df0c9..1bb93a15 100644 --- a/tests/test_dbt.py +++ b/tests/test_dbt.py @@ -1,7 +1,6 @@ import os from pathlib import Path -import yaml from data_diff.cloud.datafold_api import TCloudApiDataSource from data_diff.cloud.datafold_api import TCloudApiOrgMeta from data_diff.diff_tables import Algorithm @@ -615,7 +614,14 @@ def test_cloud_diff(self, mock_api, mock_os_environ, mock_print): @patch("data_diff.dbt.rich.print") @patch("data_diff.dbt.DatafoldAPI") def test_diff_is_cloud( - self, mock_api, mock_print, mock_dbt_parser, mock_cloud_diff, mock_local_diff, mock_get_diff_vars, mock_initialize_api, + self, + mock_api, + mock_print, + mock_dbt_parser, + mock_cloud_diff, + mock_local_diff, + mock_get_diff_vars, + mock_initialize_api, ): org_meta = TCloudApiOrgMeta(org_id=1, org_name="", user_id=1) connection = {} @@ -832,7 +838,14 @@ def test_diff_only_prod_schema( @patch("data_diff.dbt.rich.print") @patch("data_diff.dbt.DatafoldAPI") def test_diff_is_cloud_no_pks( - self, mock_api, mock_print, mock_dbt_parser, mock_cloud_diff, mock_local_diff, mock_get_diff_vars, mock_initialize_api + self, + mock_api, + mock_print, + mock_dbt_parser, + mock_cloud_diff, + mock_local_diff, + mock_get_diff_vars, + mock_initialize_api, ): mock_dbt_parser_inst = Mock() mock_dbt_parser.return_value = mock_dbt_parser_inst