Skip to content

Commit

Permalink
resolve adding shap values to df
Browse files Browse the repository at this point in the history
  • Loading branch information
drhosseinjavedani committed Jan 29, 2024
1 parent 84fc151 commit 4b6e5f2
Show file tree
Hide file tree
Showing 6 changed files with 2,261 additions and 150 deletions.
872 changes: 840 additions & 32 deletions examples/shap/shap_estimator_optimized_by_gridsearchcv.ipynb

Large diffs are not rendered by default.

1,273 changes: 1,273 additions & 0 deletions examples/shap/shap_estimator_optimized_by_gridsearchcv_fastreeshap.ipynb

Large diffs are not rendered by default.

218 changes: 121 additions & 97 deletions examples/shap/shap_estimator_optimized_by_gridsearchcv_gpboost.ipynb

Large diffs are not rendered by default.

15 changes: 14 additions & 1 deletion tests/test_gpboost.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,26 @@
import pytest
import pandas as pd
import numpy as np
import sys
import subprocess

from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import KFold
import time
from zoish.feature_selectors.shap_selectors import ShapFeatureSelector
import gpboost as gpb
#import gpboost as gpb
from sklearn.metrics import f1_score, r2_score

def import_gpboost():
try:
import gpboost as gpb
except ImportError:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gpboost==1.2.6'])
import gpboost as gpb
return gpb


gpb = import_gpboost()
@pytest.fixture
def binary_classification_dataset_with_random_effects():
# Generate classification data
Expand Down Expand Up @@ -95,6 +107,7 @@ def test_shap_feature_selector_binary_classification_with_random_effects(model_c

@pytest.mark.parametrize("model_class", [gpb.GPBoostRegressor])
def test_shap_feature_selector_regression_with_random_effects(model_class, regression_dataset_with_random_effects):
gpb = import_gpboost()
X, y, groups = regression_dataset_with_random_effects
gp_model = gpb.GPModel(group_data=groups, likelihood="gaussian")
gp_model.set_prediction_data(group_data_pred=groups)
Expand Down
2 changes: 1 addition & 1 deletion tests/test_zoish.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from zoish import __version__
def test_version():
assert __version__ == "5.0.2"
assert __version__ == "5.0.3"
31 changes: 12 additions & 19 deletions zoish/feature_selectors/shap_selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,8 @@
import logging
import warnings

# import sys
# import subprocess

import fasttreeshap


# Plotting libraries
import numpy as np
import pandas as pd
Expand All @@ -23,15 +19,6 @@

logger.info("Shap Feature Selector has started !")


# def import_gpboost():
# try:
# import gpboost as gpb
# except ImportError:
# subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'gpboost==1.2.6'])
# import gpboost as gpb
# return gpb

class ShapPlotFeatures(PlotFeatures):
"""
Initializes the class with a feature selector and additional keyword arguments.
Expand Down Expand Up @@ -696,7 +683,10 @@ def setup_kernel_explainer(X):

# Implement the functions
class_name = self.model.__class__.__name__
is_gpboost_model = "GPBoostClassifier" in class_name or "GPBoostRegressor" in class_name
is_gpboost_model = (
"GPBoostClassifier" in class_name
or "GPBoostRegressor" in class_name
)
if not is_gpboost_model:
setup_kernel_explainer(X)
self.shap_values = self.explainer.shap_values(X)
Expand All @@ -713,6 +703,9 @@ def setup_kernel_explainer(X):
self.model, **self.shap_fast_tree_explainer_kwargs
)
self.shap_values = self.explainer.shap_values(X)
print(
"FastTreeShap TreeExplainer has used !"
)
except Exception as e:
logger.error(f"FastTreeShap TreeExplainer could not be used: {e}")
raise e
Expand Down Expand Up @@ -765,18 +758,18 @@ def setup_kernel_explainer(X):
)
if self.feature_importances_ is not None:
ordered_importances = self.feature_importances_[self.importance_order]
self.importance_df['Values'] = ordered_importances
self.importance_df["Values"] = ordered_importances

else:
raise ValueError(
"feature_importances_ is None."
)
raise ValueError("feature_importances_ is None.")

self.X = X
self.y = y

class_name = self.model.__class__.__name__
is_gpboost_model = "GPBoostClassifier" in class_name or "GPBoostRegressor" in class_name
is_gpboost_model = (
"GPBoostClassifier" in class_name or "GPBoostRegressor" in class_name
)
if is_gpboost_model:
return self
else:
Expand Down

0 comments on commit 4b6e5f2

Please # to comment.