@@ -105,7 +105,6 @@ def __init__(
105
105
def fuzzy_init (self ) -> None :
106
106
"""
107
107
Initializing the parameters for fuzzy dp search with their values
108
-
109
108
"""
110
109
valid_langs = ["hi" , "en" ]
111
110
for lang_code in self .fuzzy_dp_config .keys ():
@@ -115,11 +114,25 @@ def fuzzy_init(self) -> None:
115
114
)
116
115
self .entity_dict [lang_code ] = self .fuzzy_dp_config [lang_code ]
117
116
self .entity_types [lang_code ] = list (self .entity_dict [lang_code ].keys ())
118
- self .nlp [lang_code ] = stanza .Pipeline (
119
- lang = lang_code ,
120
- tokenize_pretokenized = True ,
121
- download_method = DownloadMethod .REUSE_RESOURCES ,
122
- )
117
+ try :
118
+ # Only load the processors we actually need
119
+ self .nlp [lang_code ] = stanza .Pipeline (
120
+ lang = lang_code ,
121
+ processors = 'tokenize' , # Only load tokenizer
122
+ tokenize_pretokenized = True ,
123
+ download_method = DownloadMethod .REUSE_RESOURCES ,
124
+ use_gpu = False ,
125
+ verbose = False # Reduce logging noise
126
+ )
127
+ except Exception as e :
128
+ logger .warning (f"Failed to initialize stanza pipeline with error: { str (e )} " )
129
+ # Try alternative initialization with minimal configuration
130
+ self .nlp [lang_code ] = stanza .Pipeline (
131
+ lang = lang_code ,
132
+ processors = 'tokenize' ,
133
+ download_method = DownloadMethod .REUSE_RESOURCES ,
134
+ verbose = False
135
+ )
123
136
124
137
def _search (self , transcripts : List [str ], lang : str ) -> List [MatchType ]:
125
138
"""
0 commit comments