Skip to content

Commit

Permalink
Merge pull request #7 from jftuga/feature-automate-download
Browse files Browse the repository at this point in the history
advise user on model download
  • Loading branch information
jftuga authored Jan 3, 2025
2 parents afb0cd1 + 713437d commit 26c0fa7
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 46 deletions.
38 changes: 19 additions & 19 deletions Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 26 additions & 1 deletion deidentification/deidentification.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,12 @@ def __init__(self, config: DeidentificationConfig = DeidentificationConfig()):
torch.load = self.__safe_load
spacy.prefer_gpu()
if not Deidentification.nlp:
Deidentification.nlp = spacy.load(self.config.spacy_model)
try:
Deidentification.nlp = spacy.load(self.config.spacy_model)
except OSError as err:
self.model_not_found_error(str(err))
except Exception as err:
raise err

def __str__(self) -> str:
"""Return a string representation of the Deidentification instance.
Expand All @@ -107,6 +112,26 @@ def __str__(self) -> str:
]
return "\n".join(program_info) + "\n" + str(self.config)

def model_not_found_error(self, err: str):
"""Handles errors related to missing spaCy models and provides installation instructions.
This function processes spaCy model errors, specifically handling cases where
a required model cannot be found. If the error indicates a missing model,
it prints installation instructions to stderr and exits the program.
Args:
err: Error message string from the spaCy library.
"""
print(file=sys.stderr)
print(str(err), file=sys.stderr)
if "Can't find model" in str(err):
print(file=sys.stderr)
print("Please manually run the following command one time to download the required model:", file=sys.stderr)
print(file=sys.stderr)
print(f"python -m spacy download {self.config.spacy_model}", file=sys.stderr)
print(file=sys.stderr)
sys.exit(1)

def deidentify(self, text: str) -> str:
"""De-identify personal information in the input text.
Expand Down
27 changes: 1 addition & 26 deletions deidentification/deidentification_constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pgmName = "deidentification"
pgmUrl = "https://github.com/jftuga/deidentification"
pgmVersion = "1.1.2"
pgmVersion = "1.2.0"

GENDER_PRONOUNS = {
"he": "HE/SHE",
Expand All @@ -15,30 +15,6 @@
"mrs.": "",
"ms.": ""}

# ALL_CONTRACTIONS = (
# "it",
# "that",
# "what",
# "there",
# "here",
# "let",
# "how",
# "where",
# "who",
# "when",
# "one",
# "somebody",
# "someone",
# "something",
# "nobody",
# "everyone",
# "anybody",
# "nothing",
# "why",
# "this",
# "which")


HTML_BEGIN = """<!DOCTYPE html>
<html>
<head>
Expand Down Expand Up @@ -81,4 +57,3 @@ class bcolors:
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'

0 comments on commit 26c0fa7

Please # to comment.