From a3410f0223311ded2d39d9291bb50cb77da42983 Mon Sep 17 00:00:00 2001 From: Valentin Khomutenko Date: Fri, 12 Jan 2024 14:08:32 +0100 Subject: [PATCH] improve error reporting for PK type mismatch --- data_diff/diff_tables.py | 9 +++++++-- data_diff/errors.py | 4 ++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/data_diff/diff_tables.py b/data_diff/diff_tables.py index 0086b878..71fbf2c0 100644 --- a/data_diff/diff_tables.py +++ b/data_diff/diff_tables.py @@ -11,6 +11,7 @@ import attrs +from data_diff.errors import DataDiffMismatchingKeyTypesError from data_diff.info_tree import InfoTree, SegmentInfo from data_diff.utils import dbt_diff_string_template, run_as_daemon, safezip, getLogger, truncate_error, Vector from data_diff.thread_utils import ThreadedYielder @@ -292,9 +293,13 @@ def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, in if not isinstance(kt, IKey): raise NotImplementedError(f"Cannot use a column of type {kt} as a key") - for kt1, kt2 in safezip(key_types1, key_types2): + for i, (kt1, kt2) in enumerate(safezip(key_types1, key_types2)): if kt1.python_type is not kt2.python_type: - raise TypeError(f"Incompatible key types: {kt1} and {kt2}") + k1 = table1.key_columns[i] + k2 = table2.key_columns[i] + raise DataDiffMismatchingKeyTypesError( + f"Key columns {k1} and {k2} can't be compared due to different types." + ) # Query min/max values key_ranges = self._threaded_call_as_completed("query_key_range", [table1, table2]) diff --git a/data_diff/errors.py b/data_diff/errors.py index 3b446b32..b4cb92b8 100644 --- a/data_diff/errors.py +++ b/data_diff/errors.py @@ -68,3 +68,7 @@ class DataDiffCloudDiffTimedOut(Exception): class DataDiffSimpleSelectNotFound(Exception): "Raised when using --select on dbt < 1.5 and a model node is not found in the manifest." + + +class DataDiffMismatchingKeyTypesError(Exception): + "Raised when the key types of two tables do not match, like VARCHAR and INT."