From 8bf1476432f6dbc5c129f367d447835afe674718 Mon Sep 17 00:00:00 2001 From: Girish Sharma Date: Wed, 8 Feb 2023 19:44:23 +0530 Subject: [PATCH] Merging dev into main (#19) * Updated README to include blocked-list and restructured * Adding minor suggestion by Guy Co-authored-by: Guy Dumais * update the readme with the blocked-list details * update with new REST route * Update README.md Co-authored-by: koryf <101284003+koryf@users.noreply.github.com> * Update README.md Co-authored-by: koryf <101284003+koryf@users.noreply.github.com> * keeping the old REST path until 3.0 is released. * Update pii_dict format according to deid 3.0.0beta3 * Fix failing pre-commit hook fails on directories (#17) * Add test for get flagged lines * Skip PII flag check for directories --------- Co-authored-by: ketakipai Co-authored-by: ketakipai <110412492+ketakipai@users.noreply.github.com> Co-authored-by: Guy Dumais Co-authored-by: Guy Dumais Co-authored-by: koryf <101284003+koryf@users.noreply.github.com> --- pii_check/pii_check_hook.py | 12 ++++++------ requirements-test.txt | 4 ++++ tests/__init__.py | 0 tests/test_data/dir_with_files/file_with_pii.txt | 1 + tests/test_data/dir_with_files/file_with_pii_flag | 10 ++++++++++ tests/test_data/dir_with_files/file_without_pii.txt | 1 + tests/test_data/symlink_of_dir_with_files | 1 + tests/test_get_flagged_lines.py | 13 +++++++++++++ 8 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 requirements-test.txt create mode 100644 tests/__init__.py create mode 100644 tests/test_data/dir_with_files/file_with_pii.txt create mode 100644 tests/test_data/dir_with_files/file_with_pii_flag create mode 100644 tests/test_data/dir_with_files/file_without_pii.txt create mode 120000 tests/test_data/symlink_of_dir_with_files create mode 100644 tests/test_get_flagged_lines.py diff --git a/pii_check/pii_check_hook.py b/pii_check/pii_check_hook.py index 5f90beb..d9200fc 100644 --- a/pii_check/pii_check_hook.py +++ b/pii_check/pii_check_hook.py @@ -37,7 +37,7 @@ def get_payload(content, enabled_entity_list, blocked_list): def get_flagged_lines(files): flagged = [] for file in files: - if os.path.exists(file): + if os.path.exists(file) and not os.path.isdir(file): with open(file, "r") as fp: lines = fp.readlines() start_flag = False @@ -77,8 +77,8 @@ def locate_pii_in_files(content, files, checked, pii_dict): for number, line in enumerate(lines, 1): if content in line: if ( - pii_dict["stt_idx"], - pii_dict["end_idx"], + pii_dict["location"]["stt_idx"], + pii_dict["location"]["end_idx"], number, file, ) in checked: @@ -115,7 +115,7 @@ def check_for_pii(url, api_key, enabled_entity_list, blocked_list): continue for pii_dict in item["entities"]: line, file = locate_pii_in_files(content, files, checked, pii_dict) - checked.append((pii_dict["stt_idx"], pii_dict["end_idx"], line, file)) + checked.append((pii_dict["location"]["stt_idx"], pii_dict["location"]["end_idx"], line, file)) skip = False for item in flagged: if line > item[0] and line < item[1] and file == item[2]: @@ -123,8 +123,8 @@ def check_for_pii(url, api_key, enabled_entity_list, blocked_list): break if skip == False: msg.append( - f"PII found - type: {pii_dict['best_label']}, line number: {line}, file: {file}, start index: {pii_dict['stt_idx'] + 1}, end " - f"index: {pii_dict['end_idx'] + 1} " + f"PII found - type: {pii_dict['best_label']}, line number: {line}, file: {file}, start index: {pii_dict['location']['stt_idx'] + 1}, end " + f"index: {pii_dict['location']['end_idx'] + 1} " ) if not msg: diff --git a/requirements-test.txt b/requirements-test.txt new file mode 100644 index 0000000..191320e --- /dev/null +++ b/requirements-test.txt @@ -0,0 +1,4 @@ +pytest==7.2.1 +pytest-check==2.1.2 +python-dotenv==0.19.0 +requests==2.28.1 \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_data/dir_with_files/file_with_pii.txt b/tests/test_data/dir_with_files/file_with_pii.txt new file mode 100644 index 0000000..c3fb7da --- /dev/null +++ b/tests/test_data/dir_with_files/file_with_pii.txt @@ -0,0 +1 @@ +Credit card number: 1234 5678 9101 1123 \ No newline at end of file diff --git a/tests/test_data/dir_with_files/file_with_pii_flag b/tests/test_data/dir_with_files/file_with_pii_flag new file mode 100644 index 0000000..f436311 --- /dev/null +++ b/tests/test_data/dir_with_files/file_with_pii_flag @@ -0,0 +1,10 @@ +PII_CHECK:OFF +Some content in between the flags. Ideally this content won't be checked for PII. +Below is a dummy PII to check this +Credit card number: 1234 5678 9101 1123 +CVV: 123 +PII_CHECK:ON + +Some content where the check will be performed. +Credit card number: 1234 5678 9101 1123 +CVV: 123 \ No newline at end of file diff --git a/tests/test_data/dir_with_files/file_without_pii.txt b/tests/test_data/dir_with_files/file_without_pii.txt new file mode 100644 index 0000000..700578d --- /dev/null +++ b/tests/test_data/dir_with_files/file_without_pii.txt @@ -0,0 +1 @@ +Here's some content. \ No newline at end of file diff --git a/tests/test_data/symlink_of_dir_with_files b/tests/test_data/symlink_of_dir_with_files new file mode 120000 index 0000000..5d87a7d --- /dev/null +++ b/tests/test_data/symlink_of_dir_with_files @@ -0,0 +1 @@ +./dir_with_files \ No newline at end of file diff --git a/tests/test_get_flagged_lines.py b/tests/test_get_flagged_lines.py new file mode 100644 index 0000000..9455cd7 --- /dev/null +++ b/tests/test_get_flagged_lines.py @@ -0,0 +1,13 @@ +import pytest_check as check +from pii_check.pii_check_hook import get_flagged_lines + + +def test_get_flagged_lines(): + files = [ + "tests/test_data/dir_with_files/file_with_pii.txt", "tests/test_data/dir_with_files/file_without_pii.txt", + "tests/test_data/dir_with_files/file_with_pii_flag_on", "tests/test_data/dir_with_files/file_with_pii_flag_off", + "tests/test_data/dir_with_files/file_with_pii_flag", "tests/test_data/symlink_of_dir_with_files" + ] + res = get_flagged_lines(files) + check.equal(res, [(1, 6, 'tests/test_data/dir_with_files/file_with_pii_flag')]) +