tinkoff-ai · Mr-Geekman · Jul 10, 2023 · Jul 10, 2023 · Jul 10, 2023 · Jul 10, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -23,7 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - 
 - 
 - 
-- 
+- `mrmr` feature selection working with categoricals ([#1311](https://github.com/tinkoff-ai/etna/pull/1311))
 - 
 
 ### Removed

diff --git a/etna/analysis/feature_selection/mrmr_selection.py b/etna/analysis/feature_selection/mrmr_selection.py
@@ -82,6 +82,14 @@ def mrmr(
     redundancy_table = pd.DataFrame(np.inf, index=all_features, columns=all_features)
     top_k = min(top_k, len(all_features))
 
+    # can't compute correlation of categorical column with the others
+    cat_cols = regressors.dtypes[regressors.dtypes == "category"].index
+    for cat_col in cat_cols:
+        try:
+            regressors[cat_col] = regressors[cat_col].astype(float)
+        except ValueError:
+            raise ValueError(f"{cat_col} column cannot be cast to float type! Please, use encoders.")
+
     for i in range(top_k):
         score_numerator = relevance.loc[not_selected_features]
         score_denominator = pd.Series(1, index=not_selected_features)

diff --git a/tests/test_analysis/test_feature_selection/test_mrmr.py b/tests/test_analysis/test_feature_selection/test_mrmr.py
@@ -32,6 +32,12 @@ def df_with_regressors() -> Dict[str, pd.DataFrame]:
         regressor = df_regressors_useless[df_regressors_useless["segment"] == segment]["target"].values
         df_exog[f"regressor_useless_{i}"] = regressor
 
+    # useless categorical regressor
+    num_cat_useless = 3
+    for i in range(num_cat_useless):
+        df_exog[f"categorical_regressor_useless_{i}"] = i
+        df_exog[f"categorical_regressor_useless_{i}"] = df_exog[f"categorical_regressor_useless_{i}"].astype("category")
+
     # useful regressors: the same as target but with little noise
     df_regressors_useful = df.copy()
     sampler = RandomState(seed=2).normal
@@ -174,3 +180,10 @@ def test_fast_redundancy_deprecation_warning(df_with_regressors):
     relevance_table = ModelRelevanceTable()(df=df, df_exog=regressors, model=RandomForestRegressor())
     with pytest.warns(DeprecationWarning, match="Option `fast_redundancy=False` was added for backward compatibility"):
         mrmr(relevance_table=relevance_table, regressors=regressors, top_k=2, fast_redundancy=False)
+
+
+@pytest.mark.parametrize("fast_redundancy", [True, False])
+def test_mrmr_with_categorical_regressor(df_with_regressors, fast_redundancy):
+    df, regressors = df_with_regressors["df"], df_with_regressors["regressors"]
+    relevance_table = ModelRelevanceTable()(df=df, df_exog=regressors, model=RandomForestRegressor())
+    mrmr(relevance_table=relevance_table, regressors=regressors, top_k=len(regressors), fast_redundancy=fast_redundancy)
-Original file line number
+Diff line change
@@ Expand Up @@
     -
     -
     -
-    -
+    - `mrmr` feature selection working with categoricals ([#1311](https://github.com/tinkoff-ai/etna/pull/1311))
     -
     ### Removed
@@ Expand Down @@