From 2a75bdfa736a049a38c579b58c07e74a7cc60b2e Mon Sep 17 00:00:00 2001
From: root <root@e2e-81-69.ssdcloudindia.net>
Date: Thu, 13 Jan 2022 20:59:13 +0530
Subject: [PATCH] Updating regex compilation, handling of null alternatives

---
 .../text/list_search_plugin/__init__.py       | 80 +++++++++++--------
 1 file changed, 45 insertions(+), 35 deletions(-)

diff --git a/dialogy/plugins/text/list_search_plugin/__init__.py b/dialogy/plugins/text/list_search_plugin/__init__.py
index cae11d22..8d09a495 100644
--- a/dialogy/plugins/text/list_search_plugin/__init__.py
+++ b/dialogy/plugins/text/list_search_plugin/__init__.py
@@ -175,6 +175,12 @@ def _search(self, transcripts: List[str], lang: str) -> List[MatchType]:
         :return: Token matches with the transcript.
         :rtype: List[MatchType]
         """
+        
+        """
+        Remove empty alternatives
+        """
+        transcripts = [x for x in transcripts if x and x!= '' and len(x) > 0]
+        
         logger.debug(f"style: {self.style}")
         logger.debug("transcripts")
         logger.debug(transcripts)
@@ -194,7 +200,7 @@ def search_regex(
         final_match = None
 
         for pattern in entity_patterns:
-            result = re.search(pattern, query)
+            result = re.search(re.compile(r'\b' + pattern + r'\b'), query)
             if result:
                 match_value = match_dict[result.group()]
                 match_len = len(match_value)
@@ -216,40 +222,44 @@ def dp_search(
         match_dict: Dict[Any, Any] = {},
     ) -> Tuple[Text, Label, Value, Span, Score]:
 
-        sentence = nlp(query).sentences[0]
-        value = ""
-        pos_tags = ["PROPN", "NOUN", "ADP"]
-        result_dict = {}
-        for word in sentence.words:
-            if word.upos in pos_tags:
-                if value == "":
-                    span_start = word.start_char
-                span_end = word.end_char
-
-                """
-                joining individual tokens that together are the real entity,
-                Since we are dealing with Multi-Word entities here
-
-                """
-                value = value + str(word.text) + " "
-        if value != "":
-            for pattern in entity_patterns:
-                val = fuzz.ratio(pattern, value) / 100
-                if val > self.fuzzy_threshold:
-                    match_value = match_dict[pattern]
-                    result_dict[match_value] = val
-            if result_dict:
-                match_output = max(result_dict, key=lambda x: result_dict[x])
-                match_score = result_dict[match_output]
-
-                return (
-                    value,
-                    entity_type,
-                    match_output,
-                    (span_start, span_end),
-                    match_score,
-                )
-        return (value, entity_type, "", (0, 0), 0.0)
+        """
+        Make sure query != '' to avoid error: List index out of range
+        """
+        if not query == '' and len(query) > 0:
+            sentence = nlp(query).sentences[0]
+            value = ""
+            pos_tags = ["PROPN", "NOUN", "ADP"]
+            result_dict = {}
+            for word in sentence.words:
+                if word.upos in pos_tags:
+                    if value == "":
+                        span_start = word.start_char
+                    span_end = word.end_char
+
+                    """
+                    joining individual tokens that together are the real entity,
+                    Since we are dealing with Multi-Word entities here
+
+                    """
+                    value = value + str(word.text) + " "
+            if value != "":
+                for pattern in entity_patterns:
+                    val = fuzz.ratio(pattern, value) / 100
+                    if val > self.fuzzy_threshold:
+                        match_value = match_dict[pattern]
+                        result_dict[match_value] = val
+                if result_dict:
+                    match_output = max(result_dict, key=lambda x: result_dict[x])
+                    match_score = result_dict[match_output]
+
+                    return (
+                        value,
+                        entity_type,
+                        match_output,
+                        (span_start, span_end),
+                        match_score,
+                    )
+        return (value, entity_type, "", (0, 0), float(0))
 
     # new method based on experiments done during development of channel parser
     def get_fuzzy_dp_search(self, transcript: str, lang: str = "") -> MatchType: