From 451f3e12b6d99590202bd6e457906ec316813a46 Mon Sep 17 00:00:00 2001
From: Adrien Ball <adrien.ball@snips.ai>
Date: Fri, 5 Apr 2019 17:11:09 +0200
Subject: [PATCH 1/2] Remove normalization of confidence scores in intent
 classification

The logreg intent classifier used to have a specific logic when used
with an intents filter: the intent classification scores were
renormalized to sum to 1.0.

This could lead to unexpected behaviors, especially when all the intents
in the intents filter are associated to very low confidence scores. In such
cases, the renormalization would significantly increase the scores.
---
 .../intent_classifier/log_reg_classifier.py   | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/snips_nlu/intent_classifier/log_reg_classifier.py b/snips_nlu/intent_classifier/log_reg_classifier.py
index 1e56dbcc8..52835af7a 100644
--- a/snips_nlu/intent_classifier/log_reg_classifier.py
+++ b/snips_nlu/intent_classifier/log_reg_classifier.py
@@ -153,7 +153,7 @@ def _get_intents(self, text, intents_filter):
         # pylint: disable=C0103
         X = self.featurizer.transform([text_to_utterance(text)])
         # pylint: enable=C0103
-        proba_vec = self._predict_proba(X, intents_filter=intents_filter)
+        proba_vec = self._predict_proba(X)
         logger.debug(
             "%s", DifferedLoggingMessage(self.log_activation_weights, text, X))
         results = [
@@ -163,15 +163,9 @@ def _get_intents(self, text, intents_filter):
 
         return sorted(results, key=lambda res: -res[RES_PROBA])
 
-    def _predict_proba(self, X, intents_filter):  # pylint: disable=C0103
+    def _predict_proba(self, X):  # pylint: disable=C0103
         self.classifier._check_proba()  # pylint: disable=W0212
 
-        filtered_out_indexes = None
-        if intents_filter is not None:
-            filtered_out_indexes = [
-                i for i, intent in enumerate(self.intent_list)
-                if intent not in intents_filter and intent is not None]
-
         prob = self.classifier.decision_function(X)
         prob *= -1
         np.exp(prob, prob)
@@ -179,14 +173,7 @@ def _predict_proba(self, X, intents_filter):  # pylint: disable=C0103
         np.reciprocal(prob, prob)
         if prob.ndim == 1:
             return np.vstack([1 - prob, prob]).T
-        else:
-            if filtered_out_indexes:  # not None and not empty
-                prob[:, filtered_out_indexes] = 0.
-                # OvR normalization, like LibLinear's predict_probability
-                prob /= prob.sum(axis=1).reshape((prob.shape[0], -1))
-            # We do not normalize when there is no intents filter, to keep the
-            # probabilities calibrated
-            return prob
+        return prob
 
     @check_persisted_path
     def persist(self, path):

From b086f96440561897d219ebbee843312f6b883136 Mon Sep 17 00:00:00 2001
From: Adrien Ball <adrien.ball@snips.ai>
Date: Fri, 5 Apr 2019 17:15:29 +0200
Subject: [PATCH 2/2] Update Changelog

---
 CHANGELOG.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 40184b130..fa6f402cf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,9 @@ All notable changes to this project will be documented in this file.
 - Improved failed linking error message after download of resources [#774](https://github.com/snipsco/snips-nlu/pull/774)
 - Improve handling of ambiguous utterances in DeterministicIntentParser [#773](https://github.com/snipsco/snips-nlu/pull/773)
 
+### Changed
+- Remove normalization of confidence scores in intent classification [#782](https://github.com/snipsco/snips-nlu/pull/782)
+
 ### Fixed
 - Fixed a crash due to missing resources when refitting the `CRFSlotFiller` [#771](https://github.com/snipsco/snips-nlu/pull/771)
 - Fixed issue with egg fragments in download cli [#769](https://github.com/snipsco/snips-nlu/pull/769)