diff --git a/pii_check/pii_check_hook.py b/pii_check/pii_check_hook.py index 5f90beb..d9200fc 100644 --- a/pii_check/pii_check_hook.py +++ b/pii_check/pii_check_hook.py @@ -37,7 +37,7 @@ def get_payload(content, enabled_entity_list, blocked_list): def get_flagged_lines(files): flagged = [] for file in files: - if os.path.exists(file): + if os.path.exists(file) and not os.path.isdir(file): with open(file, "r") as fp: lines = fp.readlines() start_flag = False @@ -77,8 +77,8 @@ def locate_pii_in_files(content, files, checked, pii_dict): for number, line in enumerate(lines, 1): if content in line: if ( - pii_dict["stt_idx"], - pii_dict["end_idx"], + pii_dict["location"]["stt_idx"], + pii_dict["location"]["end_idx"], number, file, ) in checked: @@ -115,7 +115,7 @@ def check_for_pii(url, api_key, enabled_entity_list, blocked_list): continue for pii_dict in item["entities"]: line, file = locate_pii_in_files(content, files, checked, pii_dict) - checked.append((pii_dict["stt_idx"], pii_dict["end_idx"], line, file)) + checked.append((pii_dict["location"]["stt_idx"], pii_dict["location"]["end_idx"], line, file)) skip = False for item in flagged: if line > item[0] and line < item[1] and file == item[2]: @@ -123,8 +123,8 @@ def check_for_pii(url, api_key, enabled_entity_list, blocked_list): break if skip == False: msg.append( - f"PII found - type: {pii_dict['best_label']}, line number: {line}, file: {file}, start index: {pii_dict['stt_idx'] + 1}, end " - f"index: {pii_dict['end_idx'] + 1} " + f"PII found - type: {pii_dict['best_label']}, line number: {line}, file: {file}, start index: {pii_dict['location']['stt_idx'] + 1}, end " + f"index: {pii_dict['location']['end_idx'] + 1} " ) if not msg: diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..191320e --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,4 @@ +pytest==7.2.1 +pytest-check==2.1.2 +python-dotenv==0.19.0 +requests==2.28.1 \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_data/dir_with_files/file_with_pii.txt b/tests/test_data/dir_with_files/file_with_pii.txt new file mode 100644 index 0000000..c3fb7da --- /dev/null +++ b/tests/test_data/dir_with_files/file_with_pii.txt @@ -0,0 +1 @@ +Credit card number: 1234 5678 9101 1123 \ No newline at end of file diff --git a/tests/test_data/dir_with_files/file_with_pii_flag b/tests/test_data/dir_with_files/file_with_pii_flag new file mode 100644 index 0000000..f436311 --- /dev/null +++ b/tests/test_data/dir_with_files/file_with_pii_flag @@ -0,0 +1,10 @@ +PII_CHECK:OFF +Some content in between the flags. Ideally this content won't be checked for PII. +Below is a dummy PII to check this +Credit card number: 1234 5678 9101 1123 +CVV: 123 +PII_CHECK:ON + +Some content where the check will be performed. +Credit card number: 1234 5678 9101 1123 +CVV: 123 \ No newline at end of file diff --git a/tests/test_data/dir_with_files/file_without_pii.txt b/tests/test_data/dir_with_files/file_without_pii.txt new file mode 100644 index 0000000..700578d --- /dev/null +++ b/tests/test_data/dir_with_files/file_without_pii.txt @@ -0,0 +1 @@ +Here's some content. \ No newline at end of file diff --git a/tests/test_data/symlink_of_dir_with_files b/tests/test_data/symlink_of_dir_with_files new file mode 120000 index 0000000..5d87a7d --- /dev/null +++ b/tests/test_data/symlink_of_dir_with_files @@ -0,0 +1 @@ +./dir_with_files \ No newline at end of file diff --git a/tests/test_get_flagged_lines.py b/tests/test_get_flagged_lines.py new file mode 100644 index 0000000..9455cd7 --- /dev/null +++ b/tests/test_get_flagged_lines.py @@ -0,0 +1,13 @@ +import pytest_check as check +from pii_check.pii_check_hook import get_flagged_lines + + +def test_get_flagged_lines(): + files = [ + "tests/test_data/dir_with_files/file_with_pii.txt", "tests/test_data/dir_with_files/file_without_pii.txt", + "tests/test_data/dir_with_files/file_with_pii_flag_on", "tests/test_data/dir_with_files/file_with_pii_flag_off", + "tests/test_data/dir_with_files/file_with_pii_flag", "tests/test_data/symlink_of_dir_with_files" + ] + res = get_flagged_lines(files) + check.equal(res, [(1, 6, 'tests/test_data/dir_with_files/file_with_pii_flag')]) +