Skip to content

Fix: table_diff - correctly handle nulls in boolean columns when displaying the row diff #4310

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 1 commit into from
May 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sqlmesh/core/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -2205,6 +2205,10 @@ def _cells_match(x: t.Any, y: t.Any) -> bool:

# Convert array-like objects to list for consistent comparison
def _normalize(val: t.Any) -> t.Any:
# Convert Pandas null to Python null for the purposes of comparison to prevent errors like the following on boolean fields:
# - TypeError: boolean value of NA is ambiguous
if pd.isnull(val):
val = None
return list(val) if isinstance(val, (pd.Series, np.ndarray)) else val

return _normalize(x) == _normalize(y)
Expand Down
53 changes: 52 additions & 1 deletion tests/core/test_table_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from rich.console import Console
from sqlmesh.core.console import TerminalConsole
from sqlmesh.core.context import Context
from sqlmesh.core.config import AutoCategorizationMode, CategorizerConfig
from sqlmesh.core.config import AutoCategorizationMode, CategorizerConfig, DuckDBConnectionConfig
from sqlmesh.core.model import SqlModel, load_sql_based_model
from sqlmesh.core.table_diff import TableDiff
import numpy as np
Expand Down Expand Up @@ -511,3 +511,54 @@ def test_data_diff_array_dict(sushi_context_fixed_date):
stripped_output = strip_ansi_codes(output)
stripped_expected = expected_output.strip()
assert stripped_output == stripped_expected


def test_data_diff_nullable_booleans():
engine_adapter = DuckDBConnectionConfig().create_engine_adapter()

columns_to_types = {"key": exp.DataType.build("int"), "value": exp.DataType.build("boolean")}

engine_adapter.create_table("table_diff_source", columns_to_types)
engine_adapter.create_table("table_diff_target", columns_to_types)

engine_adapter.execute(
"insert into table_diff_source (key, value) values (1, true), (2, false), (3, null)"
)
engine_adapter.execute(
"insert into table_diff_target (key, value) values (1, false), (2, null), (3, true)"
)

table_diff = TableDiff(
adapter=engine_adapter,
source="table_diff_source",
target="table_diff_target",
source_alias="dev",
target_alias="prod",
on=["key"],
)

diff = table_diff.row_diff()

output = capture_console_output("show_row_diff", row_diff=diff)

expected_output = """
Row Counts:
└── PARTIAL MATCH: 3 rows (100.0%)

COMMON ROWS column comparison stats:
pct_match
value 0.0


COMMON ROWS sample data differences:
Column: value
┏━━━━━┳━━━━━━━┳━━━━━━━┓
┃ key ┃ DEV ┃ PROD ┃
┡━━━━━╇━━━━━━━╇━━━━━━━┩
│ 1 │ True │ False │
│ 2 │ False │ <NA> │
│ 3 │ <NA> │ True │
└─────┴───────┴───────┘
"""

assert strip_ansi_codes(output) == expected_output.strip()