diff --git a/Pipfile.lock b/Pipfile.lock index 8ef2099..2853526 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1120,28 +1120,28 @@ }, "ruff": { "hashes": [ - "sha256:0d5f89f254836799af1615798caa5f80b7f935d7a670fad66c5007928e57ace8", - "sha256:13e9ec6d6b55f6da412d59953d65d66e760d583dd3c1c72bf1f26435b5bfdbae", - "sha256:552fb6d861320958ca5e15f28b20a3d071aa83b93caee33a87b471f99a6c0835", - "sha256:58072f0c06080276804c6a4e21a9045a706584a958e644353603d36ca1eb8a60", - "sha256:6ddf5d654ac0d44389f6bf05cee4caeefc3132a64b58ea46738111d687352296", - "sha256:736272574e97157f7edbbb43b1d046125fce9e7d8d583d5d65d0c9bf2c15addf", - "sha256:8ef06f66f4a05c3ddbc9121a8b0cecccd92c5bf3dd43b5472ffe40b8ca10f0f8", - "sha256:9183dd615d8df50defa8b1d9a074053891ba39025cf5ae88e8bcb52edcc4bf08", - "sha256:97d9aefef725348ad77d6db98b726cfdb075a40b936c7984088804dfd38268a7", - "sha256:9f8402b7c4f96463f135e936d9ab77b65711fcd5d72e5d67597b543bbb43cf3f", - "sha256:ab78e33325a6f5374e04c2ab924a3367d69a0da36f8c9cb6b894a62017506111", - "sha256:bf197b98ed86e417412ee3b6c893f44c8864f816451441483253d5ff22c0e81e", - "sha256:c41319b85faa3aadd4d30cb1cffdd9ac6b89704ff79f7664b853785b48eccdf3", - "sha256:e248b1f0fa2749edd3350a2a342b67b43a2627434c059a063418e3d375cfe643", - "sha256:e4e56b3baa9c23d324ead112a4fdf20db9a3f8f29eeabff1355114dd96014604", - "sha256:e5fe710ab6061592521f902fca7ebcb9fabd27bc7c57c764298b1c1f15fff720", - "sha256:f21a1143776f8656d7f364bd264a9d60f01b7f52243fbe90e7670c0dfe0cf65d", - "sha256:ffb60904651c00a1e0b8df594591770018a0f04587f7deeb3838344fe3adabac" + "sha256:03a90200c5dfff49e4c967b405f27fdfa81594cbb7c5ff5609e42d7fe9680da5", + "sha256:1098d36f69831f7ff2a1da3e6407d5fbd6dfa2559e4f74ff2d260c5588900317", + "sha256:134ae019ef13e1b060ab7136e7828a6d83ea727ba123381307eb37c6bd5e01cb", + "sha256:4020d8bf8d3a32325c77af452a9976a9ad6455773bcb94991cf15bd66b347e47", + "sha256:587c5e95007612c26509f30acc506c874dab4c4abbacd0357400bd1aa799931b", + "sha256:5ad11a5e3868a73ca1fa4727fe7e33735ea78b416313f4368c504dbeb69c0f88", + "sha256:622b82bf3429ff0e346835ec213aec0a04d9730480cbffbb6ad9372014e31bbd", + "sha256:7512e8cb038db7f5db6aae0e24735ff9ea03bb0ed6ae2ce534e9baa23c1dc9ea", + "sha256:762f113232acd5b768d6b875d16aad6b00082add40ec91c927f0673a8ec4ede8", + "sha256:7b75ac29715ac60d554a049dbb0ef3b55259076181c3369d79466cb130eb5afd", + "sha256:8710ffd57bdaa6690cbf6ecff19884b8629ec2a2a2a2f783aa94b1cc795139ed", + "sha256:9d99cf80b0429cbebf31cbbf6f24f05a29706f0437c40413d950e67e2d4faca4", + "sha256:b5462d7804558ccff9c08fe8cbf6c14b7efe67404316696a2dde48297b1925bb", + "sha256:c01c048f9c3385e0fd7822ad0fd519afb282af9cf1778f3580e540629df89725", + "sha256:c9d526a62c9eda211b38463528768fd0ada25dad524cb33c0e99fcff1c67b5dc", + "sha256:d56de7220a35607f9fe59f8a6d018e14504f7b71d784d980835e20fc0611cd50", + "sha256:f69ab37771ea7e0715fead8624ec42996d101269a96e31f4d31be6fc33aa19b7", + "sha256:f99be814d77a5dac8a8957104bdd8c359e85c86b0ee0e38dca447cb1095f70fb" ], "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==0.8.4" + "version": "==0.8.5" } } } diff --git a/deidentification/deidentification.py b/deidentification/deidentification.py index e2b7082..380607f 100644 --- a/deidentification/deidentification.py +++ b/deidentification/deidentification.py @@ -89,7 +89,12 @@ def __init__(self, config: DeidentificationConfig = DeidentificationConfig()): torch.load = self.__safe_load spacy.prefer_gpu() if not Deidentification.nlp: - Deidentification.nlp = spacy.load(self.config.spacy_model) + try: + Deidentification.nlp = spacy.load(self.config.spacy_model) + except OSError as err: + self.model_not_found_error(str(err)) + except Exception as err: + raise err def __str__(self) -> str: """Return a string representation of the Deidentification instance. @@ -107,6 +112,26 @@ def __str__(self) -> str: ] return "\n".join(program_info) + "\n" + str(self.config) + def model_not_found_error(self, err: str): + """Handles errors related to missing spaCy models and provides installation instructions. + + This function processes spaCy model errors, specifically handling cases where + a required model cannot be found. If the error indicates a missing model, + it prints installation instructions to stderr and exits the program. + + Args: + err: Error message string from the spaCy library. + """ + print(file=sys.stderr) + print(str(err), file=sys.stderr) + if "Can't find model" in str(err): + print(file=sys.stderr) + print("Please manually run the following command one time to download the required model:", file=sys.stderr) + print(file=sys.stderr) + print(f"python -m spacy download {self.config.spacy_model}", file=sys.stderr) + print(file=sys.stderr) + sys.exit(1) + def deidentify(self, text: str) -> str: """De-identify personal information in the input text. diff --git a/deidentification/deidentification_constants.py b/deidentification/deidentification_constants.py index 5099d0d..515d708 100644 --- a/deidentification/deidentification_constants.py +++ b/deidentification/deidentification_constants.py @@ -1,6 +1,6 @@ pgmName = "deidentification" pgmUrl = "https://github.com/jftuga/deidentification" -pgmVersion = "1.1.2" +pgmVersion = "1.2.0" GENDER_PRONOUNS = { "he": "HE/SHE", @@ -15,30 +15,6 @@ "mrs.": "", "ms.": ""} -# ALL_CONTRACTIONS = ( -# "it", -# "that", -# "what", -# "there", -# "here", -# "let", -# "how", -# "where", -# "who", -# "when", -# "one", -# "somebody", -# "someone", -# "something", -# "nobody", -# "everyone", -# "anybody", -# "nothing", -# "why", -# "this", -# "which") - - HTML_BEGIN = """ @@ -81,4 +57,3 @@ class bcolors: ENDC = '\033[0m' BOLD = '\033[1m' UNDERLINE = '\033[4m' -