Skip to content

Commit

Permalink
Merge pull request seperman#454 from sf-tcalhoun/diff_for_lists_fix
Browse files Browse the repository at this point in the history
Fix for bug when diffing two lists with ignore_order and providing compare_func
  • Loading branch information
seperman authored Mar 12, 2024
2 parents a08d550 + d705a4b commit 71fde30
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 1 deletion.
4 changes: 3 additions & 1 deletion deepdiff/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,9 @@ def defaultdict_orderedset():
pre_calced_distances = self._precalculate_numpy_arrays_distance(
hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)

if hashes_added and hashes_removed and self.iterable_compare_func and len(hashes_added) > 1 and len(hashes_removed) > 1:
if hashes_added and hashes_removed \
and self.iterable_compare_func \
and len(hashes_added) > 0 and len(hashes_removed) > 0:
pre_calced_distances = self._precalculate_distance_by_custom_compare_func(
hashes_added, hashes_removed, t1_hashtable, t2_hashtable, _original_type)

Expand Down
115 changes: 115 additions & 0 deletions tests/test_ignore_order.py
Original file line number Diff line number Diff line change
Expand Up @@ -1136,6 +1136,121 @@ def compare_func(x, y, level=None):
assert expected_with_compare_func == ddiff2
assert ddiff != ddiff2

def test_ignore_order_with_compare_func_with_one_each_hashes_added_hashes_removed(self):
"""
Scenario:
In this example which demonstrates the problem... We have two dictionaries containing lists for
individualNames. Each list contains exactly 2 elements. The effective change is that we are
replacing the 2nd element in the list.
NOTE: This is considered a REPLACEMENT of the second element and not an UPDATE of the element
because we are providing a custom compare_func which will determine matching elements based on
the value of the nameIdentifier field. If the custom compare_func is not used, then
deepdiff.diff will mistakenly treat the difference as being individual field updates for every
field in the second element of the list.
Intent:
Use our custom compare_func, since we have provided it.
We need to fall into self._precalculate_distance_by_custom_compare_func
To do this, we are proposing a change to deepdiff.diff line 1128:
Original:
if hashes_added and hashes_removed and self.iterable_compare_func and len(hashes_added) > 1 and len(hashes_removed) > 1:
Proposed/Updated:
if hashes_added and hashes_removed \
and self.iterable_compare_func \
and len(hashes_added) > 0 and len(hashes_removed) > 0:
NOTE: It is worth mentioning that deepdiff.diff line 1121, might also benefit by changing the length conditions
to evaluate for > 0 (rather than > 1).
"""

t1 = {
"individualNames": [
{
"firstName": "Johnathan",
"lastName": "Doe",
"prefix": "COLONEL",
"middleName": "A",
"primaryIndicator": True,
"professionalDesignation": "PHD",
"suffix": "SR",
"nameIdentifier": "00001"
},
{
"firstName": "John",
"lastName": "Doe",
"prefix": "",
"middleName": "",
"primaryIndicator": False,
"professionalDesignation": "",
"suffix": "SR",
"nameIdentifier": "00002"
}
]
}

t2 = {
"individualNames": [
{
"firstName": "Johnathan",
"lastName": "Doe",
"prefix": "COLONEL",
"middleName": "A",
"primaryIndicator": True,
"professionalDesignation": "PHD",
"suffix": "SR",
"nameIdentifier": "00001"
},
{
"firstName": "Johnny",
"lastName": "Doe",
"prefix": "",
"middleName": "A",
"primaryIndicator": False,
"professionalDesignation": "",
"suffix": "SR",
"nameIdentifier": "00003"
}
]
}
def compare_func(item1, item2, level=None):
print("*** inside compare ***")
it1_keys = item1.keys()

try:

# --- individualNames ---
if 'nameIdentifier' in it1_keys and 'lastName' in it1_keys:
match_result = item1['nameIdentifier'] == item2['nameIdentifier']
print("individualNames - matching result:", match_result)
return match_result
else:
print("Unknown list item...", "matching result:", item1 == item2)
return item1 == item2
except Exception:
raise CannotCompare() from None
# ---------------------------- End of nested function

actual_diff = DeepDiff(t1, t2, report_repetition=True,
ignore_order=True, iterable_compare_func=compare_func, cutoff_intersection_for_pairs=1)

old_invalid_diff = {
'values_changed': {"root['individualNames'][1]['firstName']": {'new_value': 'Johnny', 'old_value': 'John'},
"root['individualNames'][1]['middleName']": {'new_value': 'A', 'old_value': ''},
"root['individualNames'][1]['nameIdentifier']": {'new_value': '00003',
'old_value': '00002'}}}
new_expected_diff = {'iterable_item_added': {
"root['individualNames'][1]": {'firstName': 'Johnny', 'lastName': 'Doe', 'prefix': '', 'middleName': 'A',
'primaryIndicator': False, 'professionalDesignation': '', 'suffix': 'SR',
'nameIdentifier': '00003'}}, 'iterable_item_removed': {
"root['individualNames'][1]": {'firstName': 'John', 'lastName': 'Doe', 'prefix': '', 'middleName': '',
'primaryIndicator': False, 'professionalDesignation': '', 'suffix': 'SR',
'nameIdentifier': '00002'}}}

assert old_invalid_diff != actual_diff
assert new_expected_diff == actual_diff


class TestDynamicIgnoreOrder:
def test_ignore_order_func(self):
Expand Down

0 comments on commit 71fde30

Please # to comment.