From 6700681a5d975081ecda72940e09109afaab17ee Mon Sep 17 00:00:00 2001
From: John Taylor <jftuga@users.noreply.github.com>
Date: Thu, 2 Jan 2025 21:04:06 -0500
Subject: [PATCH] get_identified_elements() will now always return pronouns

---
 README.md                                      | 9 +++------
 deidentification/deidentification.py           | 6 +++++-
 deidentification/deidentification_constants.py | 2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index 394bd14..8704ea1 100644
--- a/README.md
+++ b/README.md
@@ -29,11 +29,6 @@ Download the required spaCy model:
 python -m spacy download en_core_web_trf
 ```
 
-For debugging, by setting `config.debug=True`, you will also need [VeryPrettyTable](https://github.com/smeggingsmegger/):
-```bash
-pip install VeryPrettyTable
-```
-
 ## Usage
 
 ### Command Line Interface
@@ -41,6 +36,8 @@ pip install VeryPrettyTable
 The package includes a command-line tool for quick de-identification of text files:
 
 ```bash
+deidentify input_file [options]
+# or:
 python -m deidentification.deidentify input_file [options]
 ```
 
@@ -55,7 +52,7 @@ Options:
 Example:
 ```bash
 # De-identify a text file and save with HTML markup
-python -m deidentification.deidentify input.txt -H -o output.html -r "[REDACTED]"
+deidentify input.txt -H -o output.html -r "[REDACTED]"
 ```
 
 ### Python API Usage
diff --git a/deidentification/deidentification.py b/deidentification/deidentification.py
index d901cbe..6087225 100644
--- a/deidentification/deidentification.py
+++ b/deidentification/deidentification.py
@@ -69,6 +69,9 @@ def __init__(self, config: DeidentificationConfig = DeidentificationConfig()):
         # this combines all self.all_persons lists from multiple passes of self._find_all_persons()
         self.aggregate_persons: list[dict] = []
 
+        # this combines all self.all_pronouns lists from multiple loop iterations in self.deidentify()
+        self.aggregate_pronouns: list[dict] = []
+
         self.all_pronouns: list[dict] = []
         self.doc: Optional[Doc] = None
         self.table_class  = None
@@ -139,6 +142,7 @@ def deidentify(self, text: str) -> str:
                 self.__debug_log(f"deidentify(): next iter, persons={len(self.all_persons)}")
             if persons_count == 0:
                 break
+            self.aggregate_pronouns.extend(self.all_pronouns)
             self.all_pronouns = []
             merged = self._merge_metadata()
             replaced_text = self._replace_merged(replaced_text, merged)
@@ -167,7 +171,7 @@ def deidentify_with_wrapped_html(self, text: str, html_begin: str = HTML_BEGIN,
         return buffer.getvalue()
 
     def get_identified_elements(self) -> dict:
-        elements = {"message": self.replaced_text, "entities": self.aggregate_persons, "pronouns": self.all_pronouns}
+        elements = {"message": self.replaced_text, "entities": self.aggregate_persons, "pronouns": self.aggregate_pronouns}
         return elements
 
     def _find_all_persons(self) -> int:
diff --git a/deidentification/deidentification_constants.py b/deidentification/deidentification_constants.py
index 0ca4bb3..5099d0d 100644
--- a/deidentification/deidentification_constants.py
+++ b/deidentification/deidentification_constants.py
@@ -1,6 +1,6 @@
 pgmName = "deidentification"
 pgmUrl = "https://github.com/jftuga/deidentification"
-pgmVersion = "1.1.1"
+pgmVersion = "1.1.2"
 
 GENDER_PRONOUNS = {
     "he": "HE/SHE",