improve error reporting for PK type mismatch

vvkh · vvkh · commit b71db7abbfb9 · 2024-01-12T14:35:11.000+01:00
diff --git a/data_diff/diff_tables.py b/data_diff/diff_tables.py
@@ -11,6 +11,7 @@
 
 import attrs
 
+from data_diff.errors import DataDiffMismatchingKeyTypesError
 from data_diff.info_tree import InfoTree, SegmentInfo
 from data_diff.utils import dbt_diff_string_template, run_as_daemon, safezip, getLogger, truncate_error, Vector
 from data_diff.thread_utils import ThreadedYielder
@@ -285,16 +286,20 @@ def _bisect_and_diff_tables(self, table1: TableSegment, table2: TableSegment, in
         if len(table1.key_columns) != len(table2.key_columns):
             raise ValueError("Tables should have an equivalent number of key columns!")
 
-        key_types1 = [table1._schema[i] for i in table1.key_columns]
-        key_types2 = [table2._schema[i] for i in table2.key_columns]
+        key_types1 = [(i, table1._schema[i]) for i in table1.key_columns]
+        key_types2 = [(i, table2._schema[i]) for i in table2.key_columns]
 
         for kt in key_types1 + key_types2:
             if not isinstance(kt, IKey):
                 raise NotImplementedError(f"Cannot use a column of type {kt} as a key")
 
-        for kt1, kt2 in safezip(key_types1, key_types2):
+        for i, (kt1, kt2) in enumerate(safezip(key_types1, key_types2)):
             if kt1.python_type is not kt2.python_type:
-                raise TypeError(f"Incompatible key types: {kt1} and {kt2}")
+                k1 = table1.key_columns[i]
+                k2 = table2.key_columns[i]
+                raise DataDiffMismatchingKeyTypesError(
+                    f"Key columns {k1} and {k2} can't be compared due to different types."
+                )
 
         # Query min/max values
         key_ranges = self._threaded_call_as_completed("query_key_range", [table1, table2])
diff --git a/data_diff/errors.py b/data_diff/errors.py
@@ -68,3 +68,7 @@ class DataDiffCloudDiffTimedOut(Exception):
 
 class DataDiffSimpleSelectNotFound(Exception):
     "Raised when using --select on dbt < 1.5 and a model node is not found in the manifest."
+
+
+class DataDiffMismatchingKeyTypesError(Exception):
+    "Raised when the key types of two tables do not match, like VARCHAR and INT."