diff --git a/data_diff/databases/_connect.py b/data_diff/databases/_connect.py index be55cc2d..1e34ef62 100644 --- a/data_diff/databases/_connect.py +++ b/data_diff/databases/_connect.py @@ -26,7 +26,7 @@ from data_diff.databases.mssql import MsSQL -@attrs.define(frozen=True) +@attrs.frozen class MatchUriPath: database_cls: Type[Database] @@ -98,13 +98,11 @@ class Connect: """Provides methods for connecting to a supported database using a URL or connection dict.""" database_by_scheme: Dict[str, Database] - match_uri_path: Dict[str, MatchUriPath] conn_cache: MutableMapping[Hashable, Database] def __init__(self, database_by_scheme: Dict[str, Database] = DATABASE_BY_SCHEME): super().__init__() self.database_by_scheme = database_by_scheme - self.match_uri_path = {name: MatchUriPath(cls) for name, cls in database_by_scheme.items()} self.conn_cache = weakref.WeakValueDictionary() def for_databases(self, *dbs) -> Self: @@ -157,12 +155,10 @@ def connect_to_uri(self, db_uri: str, thread_count: Optional[int] = 1, **kwargs) return self.connect_with_dict(conn_dict, thread_count, **kwargs) try: - matcher = self.match_uri_path[scheme] + cls = self.database_by_scheme[scheme] except KeyError: raise NotImplementedError(f"Scheme '{scheme}' currently not supported") - cls = matcher.database_cls - if scheme == "databricks": assert not dsn.user kw = {} @@ -175,6 +171,7 @@ def connect_to_uri(self, db_uri: str, thread_count: Optional[int] = 1, **kwargs) kw["filepath"] = dsn.dbname kw["dbname"] = dsn.user else: + matcher = MatchUriPath(cls) kw = matcher.match_path(dsn) if scheme == "bigquery": @@ -198,7 +195,7 @@ def connect_to_uri(self, db_uri: str, thread_count: Optional[int] = 1, **kwargs) kw = {k: v for k, v in kw.items() if v is not None} - if issubclass(cls, ThreadedDatabase): + if isinstance(cls, type) and issubclass(cls, ThreadedDatabase): db = cls(thread_count=thread_count, **kw, **kwargs) else: db = cls(**kw, **kwargs) @@ -209,11 +206,10 @@ def connect_with_dict(self, d, thread_count, **kwargs): d = dict(d) driver = d.pop("driver") try: - matcher = self.match_uri_path[driver] + cls = self.database_by_scheme[driver] except KeyError: raise NotImplementedError(f"Driver '{driver}' currently not supported") - cls = matcher.database_cls if issubclass(cls, ThreadedDatabase): db = cls(thread_count=thread_count, **d, **kwargs) else: diff --git a/data_diff/databases/base.py b/data_diff/databases/base.py index bf165461..059854a5 100644 --- a/data_diff/databases/base.py +++ b/data_diff/databases/base.py @@ -1093,11 +1093,7 @@ def _refine_coltypes( list, log_message=table_path, ) - if not samples_by_row: - raise ValueError(f"Table {table_path} is empty.") - - samples_by_col = list(zip(*samples_by_row)) - + samples_by_col = list(zip(*samples_by_row)) if samples_by_row else [[]] * len(text_columns) for col_name, samples in safezip(text_columns, samples_by_col): uuid_samples = [s for s in samples if s and is_uuid(s)] diff --git a/data_diff/hashdiff_tables.py b/data_diff/hashdiff_tables.py index b77594cc..b9bf1239 100644 --- a/data_diff/hashdiff_tables.py +++ b/data_diff/hashdiff_tables.py @@ -118,14 +118,6 @@ def _validate_and_adjust_columns(self, table1: TableSegment, table2: TableSegmen if lowest.precision != col2.precision: table2._schema[c2] = attrs.evolve(col2, precision=lowest.precision) - elif isinstance(col1, ColType_UUID): - if strict and not isinstance(col2, ColType_UUID): - raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}") - - elif isinstance(col1, StringType): - if strict and not isinstance(col2, StringType): - raise TypeError(f"Incompatible types for column '{c1}': {col1} <-> {col2}") - for t in [table1, table2]: for c in t.relevant_columns: ctype = t._schema[c] diff --git a/tests/test_diff_tables.py b/tests/test_diff_tables.py index 2e48798a..9a975089 100644 --- a/tests/test_diff_tables.py +++ b/tests/test_diff_tables.py @@ -696,10 +696,12 @@ def setUp(self): self.differ = HashDiffer(bisection_factor=2) def test_right_table_empty(self): - self.assertRaises(ValueError, list, self.differ.diff_tables(self.a, self.b)) + # NotImplementedError: Cannot use a column of type Text(_notes=[]) as a key + self.assertRaises(NotImplementedError, list, self.differ.diff_tables(self.a, self.b)) def test_left_table_empty(self): - self.assertRaises(ValueError, list, self.differ.diff_tables(self.a, self.b)) + # NotImplementedError: Cannot use a column of type Text(_notes=[]) as a key + self.assertRaises(NotImplementedError, list, self.differ.diff_tables(self.a, self.b)) class TestInfoTree(DiffTestCase):