Skip to content

Commit 32aeba5

Browse files
authored
fix: fix the stanza_resources download (#217)
1 parent f1747ce commit 32aeba5

File tree

1 file changed

+19
-6
lines changed

1 file changed

+19
-6
lines changed

dialogy/plugins/text/list_search_plugin/__init__.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,6 @@ def __init__(
105105
def fuzzy_init(self) -> None:
106106
"""
107107
Initializing the parameters for fuzzy dp search with their values
108-
109108
"""
110109
valid_langs = ["hi", "en"]
111110
for lang_code in self.fuzzy_dp_config.keys():
@@ -115,11 +114,25 @@ def fuzzy_init(self) -> None:
115114
)
116115
self.entity_dict[lang_code] = self.fuzzy_dp_config[lang_code]
117116
self.entity_types[lang_code] = list(self.entity_dict[lang_code].keys())
118-
self.nlp[lang_code] = stanza.Pipeline(
119-
lang=lang_code,
120-
tokenize_pretokenized=True,
121-
download_method=DownloadMethod.REUSE_RESOURCES,
122-
)
117+
try:
118+
# Only load the processors we actually need
119+
self.nlp[lang_code] = stanza.Pipeline(
120+
lang=lang_code,
121+
processors='tokenize', # Only load tokenizer
122+
tokenize_pretokenized=True,
123+
download_method=DownloadMethod.REUSE_RESOURCES,
124+
use_gpu=False,
125+
verbose=False # Reduce logging noise
126+
)
127+
except Exception as e:
128+
logger.warning(f"Failed to initialize stanza pipeline with error: {str(e)}")
129+
# Try alternative initialization with minimal configuration
130+
self.nlp[lang_code] = stanza.Pipeline(
131+
lang=lang_code,
132+
processors='tokenize',
133+
download_method=DownloadMethod.REUSE_RESOURCES,
134+
verbose=False
135+
)
123136

124137
def _search(self, transcripts: List[str], lang: str) -> List[MatchType]:
125138
"""

0 commit comments

Comments
 (0)