From 5e5d65a2455215282ce6a1bcbb7df9d497434c98 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Sun, 4 May 2025 22:12:04 +0000 Subject: [PATCH] Fix: table_diff - correctly handle nulls in boolean columns when displaying the row diff --- sqlmesh/core/console.py | 4 +++ tests/core/test_table_diff.py | 53 ++++++++++++++++++++++++++++++++++- 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/sqlmesh/core/console.py b/sqlmesh/core/console.py index ffe63a80de..98a49a1e6c 100644 --- a/sqlmesh/core/console.py +++ b/sqlmesh/core/console.py @@ -2205,6 +2205,10 @@ def _cells_match(x: t.Any, y: t.Any) -> bool: # Convert array-like objects to list for consistent comparison def _normalize(val: t.Any) -> t.Any: + # Convert Pandas null to Python null for the purposes of comparison to prevent errors like the following on boolean fields: + # - TypeError: boolean value of NA is ambiguous + if pd.isnull(val): + val = None return list(val) if isinstance(val, (pd.Series, np.ndarray)) else val return _normalize(x) == _normalize(y) diff --git a/tests/core/test_table_diff.py b/tests/core/test_table_diff.py index 9c2b07138a..f01ad4a6d7 100644 --- a/tests/core/test_table_diff.py +++ b/tests/core/test_table_diff.py @@ -9,7 +9,7 @@ from rich.console import Console from sqlmesh.core.console import TerminalConsole from sqlmesh.core.context import Context -from sqlmesh.core.config import AutoCategorizationMode, CategorizerConfig +from sqlmesh.core.config import AutoCategorizationMode, CategorizerConfig, DuckDBConnectionConfig from sqlmesh.core.model import SqlModel, load_sql_based_model from sqlmesh.core.table_diff import TableDiff import numpy as np @@ -511,3 +511,54 @@ def test_data_diff_array_dict(sushi_context_fixed_date): stripped_output = strip_ansi_codes(output) stripped_expected = expected_output.strip() assert stripped_output == stripped_expected + + +def test_data_diff_nullable_booleans(): + engine_adapter = DuckDBConnectionConfig().create_engine_adapter() + + columns_to_types = {"key": exp.DataType.build("int"), "value": exp.DataType.build("boolean")} + + engine_adapter.create_table("table_diff_source", columns_to_types) + engine_adapter.create_table("table_diff_target", columns_to_types) + + engine_adapter.execute( + "insert into table_diff_source (key, value) values (1, true), (2, false), (3, null)" + ) + engine_adapter.execute( + "insert into table_diff_target (key, value) values (1, false), (2, null), (3, true)" + ) + + table_diff = TableDiff( + adapter=engine_adapter, + source="table_diff_source", + target="table_diff_target", + source_alias="dev", + target_alias="prod", + on=["key"], + ) + + diff = table_diff.row_diff() + + output = capture_console_output("show_row_diff", row_diff=diff) + + expected_output = """ +Row Counts: +└── PARTIAL MATCH: 3 rows (100.0%) + +COMMON ROWS column comparison stats: + pct_match +value 0.0 + + +COMMON ROWS sample data differences: +Column: value +┏━━━━━┳━━━━━━━┳━━━━━━━┓ +┃ key ┃ DEV ┃ PROD ┃ +┡━━━━━╇━━━━━━━╇━━━━━━━┩ +│ 1 │ True │ False │ +│ 2 │ False │ │ +│ 3 │ │ True │ +└─────┴───────┴───────┘ +""" + + assert strip_ansi_codes(output) == expected_output.strip()