From 451f3e12b6d99590202bd6e457906ec316813a46 Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Fri, 5 Apr 2019 17:11:09 +0200 Subject: [PATCH 1/2] Remove normalization of confidence scores in intent classification The logreg intent classifier used to have a specific logic when used with an intents filter: the intent classification scores were renormalized to sum to 1.0. This could lead to unexpected behaviors, especially when all the intents in the intents filter are associated to very low confidence scores. In such cases, the renormalization would significantly increase the scores. --- .../intent_classifier/log_reg_classifier.py | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/snips_nlu/intent_classifier/log_reg_classifier.py b/snips_nlu/intent_classifier/log_reg_classifier.py index 1e56dbcc8..52835af7a 100644 --- a/snips_nlu/intent_classifier/log_reg_classifier.py +++ b/snips_nlu/intent_classifier/log_reg_classifier.py @@ -153,7 +153,7 @@ def _get_intents(self, text, intents_filter): # pylint: disable=C0103 X = self.featurizer.transform([text_to_utterance(text)]) # pylint: enable=C0103 - proba_vec = self._predict_proba(X, intents_filter=intents_filter) + proba_vec = self._predict_proba(X) logger.debug( "%s", DifferedLoggingMessage(self.log_activation_weights, text, X)) results = [ @@ -163,15 +163,9 @@ def _get_intents(self, text, intents_filter): return sorted(results, key=lambda res: -res[RES_PROBA]) - def _predict_proba(self, X, intents_filter): # pylint: disable=C0103 + def _predict_proba(self, X): # pylint: disable=C0103 self.classifier._check_proba() # pylint: disable=W0212 - filtered_out_indexes = None - if intents_filter is not None: - filtered_out_indexes = [ - i for i, intent in enumerate(self.intent_list) - if intent not in intents_filter and intent is not None] - prob = self.classifier.decision_function(X) prob *= -1 np.exp(prob, prob) @@ -179,14 +173,7 @@ def _predict_proba(self, X, intents_filter): # pylint: disable=C0103 np.reciprocal(prob, prob) if prob.ndim == 1: return np.vstack([1 - prob, prob]).T - else: - if filtered_out_indexes: # not None and not empty - prob[:, filtered_out_indexes] = 0. - # OvR normalization, like LibLinear's predict_probability - prob /= prob.sum(axis=1).reshape((prob.shape[0], -1)) - # We do not normalize when there is no intents filter, to keep the - # probabilities calibrated - return prob + return prob @check_persisted_path def persist(self, path): From b086f96440561897d219ebbee843312f6b883136 Mon Sep 17 00:00:00 2001 From: Adrien Ball Date: Fri, 5 Apr 2019 17:15:29 +0200 Subject: [PATCH 2/2] Update Changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 40184b130..fa6f402cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ All notable changes to this project will be documented in this file. - Improved failed linking error message after download of resources [#774](https://github.com/snipsco/snips-nlu/pull/774) - Improve handling of ambiguous utterances in DeterministicIntentParser [#773](https://github.com/snipsco/snips-nlu/pull/773) +### Changed +- Remove normalization of confidence scores in intent classification [#782](https://github.com/snipsco/snips-nlu/pull/782) + ### Fixed - Fixed a crash due to missing resources when refitting the `CRFSlotFiller` [#771](https://github.com/snipsco/snips-nlu/pull/771) - Fixed issue with egg fragments in download cli [#769](https://github.com/snipsco/snips-nlu/pull/769)