23
23
24
24
# Just for local readability: TODO: later switch to real type declarations of these.
25
25
_Op = Literal ["+" , "-" ]
26
- _PK = Any
26
+ _PK = Sequence [ Any ]
27
27
_Row = Tuple [Any ]
28
28
29
29
@@ -34,24 +34,27 @@ def diff_sets(
34
34
json_cols : dict = None ,
35
35
columns1 : Sequence [str ],
36
36
columns2 : Sequence [str ],
37
+ key_columns1 : Sequence [str ],
38
+ key_columns2 : Sequence [str ],
37
39
ignored_columns1 : Collection [str ],
38
40
ignored_columns2 : Collection [str ],
39
41
) -> Iterator :
40
42
# Differ only by columns of interest (PKs+relevant-ignored). But yield with ignored ones!
41
43
sa : Set [_Row ] = {tuple (val for col , val in safezip (columns1 , row ) if col not in ignored_columns1 ) for row in a }
42
44
sb : Set [_Row ] = {tuple (val for col , val in safezip (columns2 , row ) if col not in ignored_columns2 ) for row in b }
43
45
44
- # The first item is always the key (see TableDiffer.relevant_columns)
45
- # TODO update when we add compound keys to hashdiff
46
+ # The first items are always the PK (see TableSegment.relevant_columns)
46
47
diffs_by_pks : Dict [_PK , List [Tuple [_Op , _Row ]]] = defaultdict (list )
47
48
for row in a :
49
+ pk : _PK = tuple (val for col , val in zip (key_columns1 , row ))
48
50
cutrow : _Row = tuple (val for col , val in zip (columns1 , row ) if col not in ignored_columns1 )
49
51
if cutrow not in sb :
50
- diffs_by_pks [row [ 0 ] ].append (("-" , row ))
52
+ diffs_by_pks [pk ].append (("-" , row ))
51
53
for row in b :
54
+ pk : _PK = tuple (val for col , val in zip (key_columns2 , row ))
52
55
cutrow : _Row = tuple (val for col , val in zip (columns2 , row ) if col not in ignored_columns2 )
53
56
if cutrow not in sa :
54
- diffs_by_pks [row [ 0 ] ].append (("+" , row ))
57
+ diffs_by_pks [pk ].append (("+" , row ))
55
58
56
59
warned_diff_cols = set ()
57
60
for diffs in (diffs_by_pks [pk ] for pk in sorted (diffs_by_pks )):
@@ -232,6 +235,8 @@ def _bisect_and_diff_segments(
232
235
json_cols = json_cols ,
233
236
columns1 = table1 .relevant_columns ,
234
237
columns2 = table2 .relevant_columns ,
238
+ key_columns1 = table1 .key_columns ,
239
+ key_columns2 = table2 .key_columns ,
235
240
ignored_columns1 = self .ignored_columns1 ,
236
241
ignored_columns2 = self .ignored_columns2 ,
237
242
)
0 commit comments