KernelSHAP / Lime Improvements (#619)

vivekmig · facebook-github-bot · commit c82532790068 · 2021-02-25T17:13:11.000-08:00
Summary: * Adds support for generators as perturb function for Lime with corresponding tests * Modifies KernelSHAP to sample based on categorical distributed on expected selected features and randomly sample vectors given expected number of selected features. This is theoretically equivalent to the previous approach of weighting randomly selected vectors, but this approach computationally scales better with larger numbers of features, since weights for larger numbers of features lead to arithmetic underflow. Pull Request resolved: #619 Reviewed By: NarineK Differential Revision: D26505649 Pulled By: vivekmig fbshipit-source-id: 596ca849208cadf3165d2c39c9eb7889f78e9b2d
diff --git a/captum/attr/_core/kernel_shap.py b/captum/attr/_core/kernel_shap.py
@@ -1,50 +1,21 @@
 #!/usr/bin/env python3
 
-import math
-from typing import Any, Callable, Tuple, Union
+from typing import Any, Callable, Generator, Tuple, Union
 
 import torch
 from torch import Tensor
+from torch.distributions.categorical import Categorical
 
 from captum._utils.models.linear_model import SkLearnLinearRegression
 from captum._utils.typing import BaselineType, TargetType, TensorOrTupleOfTensorsGeneric
-from captum.attr._core.lime import Lime
-from captum.attr._utils.common import lime_n_perturb_samples_deprecation_decorator
+from captum.attr._core.lime import Lime, construct_feature_mask
+from captum.attr._utils.common import (
+    _format_input_baseline,
+    lime_n_perturb_samples_deprecation_decorator,
+)
 from captum.log import log_usage
 
 
-def combination(n: int, k: int) -> int:
-    try:
-        # Combination only available in Python 3.8
-        return math.comb(n, k)  # type: ignore
-    except AttributeError:
-        return math.factorial(n) // math.factorial(k) // math.factorial(n - k)
-
-
-def kernel_shap_similarity_kernel(
-    _, __, interpretable_sample: Tensor, **kwargs
-) -> Tensor:
-    assert (
-        "num_interp_features" in kwargs
-    ), "Must provide num_interp_features to use default similarity kernel"
-    num_selected_features = int(interpretable_sample.sum(dim=1).item())
-    num_features = kwargs["num_interp_features"]
-    combinations = combination(num_features, num_selected_features)
-    denom = (
-        combinations * num_selected_features * (num_features - num_selected_features)
-    )
-    if denom != 0:
-        similarities = (num_features - 1) / denom
-    else:
-        # weight should be theoretically infinite when denom = 0
-        # enforcing that trained linear model must satisfy
-        # end-point criteria. In practice, it is sufficient to
-        # make this weight substantially larger so setting this
-        # weight to 100 (all other weights are < 1).
-        similarities = 100.0
-    return torch.tensor([similarities])
-
-
 class KernelShap(Lime):
     r"""
     Kernel SHAP is a method that uses the LIME framework to compute
@@ -68,9 +39,11 @@ def __init__(self, forward_func: Callable) -> None:
         Lime.__init__(
             self,
             forward_func,
-            SkLearnLinearRegression(),
-            kernel_shap_similarity_kernel,
+            interpretable_model=SkLearnLinearRegression(),
+            similarity_func=self.kernel_shap_similarity_kernel,
+            perturb_func=self.kernel_shap_perturb_generator,
         )
+        self.inf_weight = 1000000.0
 
     @log_usage()
     @lime_n_perturb_samples_deprecation_decorator
@@ -294,8 +267,15 @@ def attribute(  # type: ignore
             >>> # Computes KernelSHAP attributions with feature mask.
             >>> attr = ks.attribute(input, target=1, feature_mask=feature_mask)
         """
-        return Lime.attribute.__wrapped__(
-            self,
+        formatted_inputs, baselines = _format_input_baseline(inputs, baselines)
+        feature_mask, num_interp_features = construct_feature_mask(
+            feature_mask, formatted_inputs
+        )
+        num_features_list = torch.arange(num_interp_features, dtype=torch.float)
+        denom = num_features_list * (num_interp_features - num_features_list)
+        probs = (num_interp_features - 1) / denom
+        probs[0] = 0.0
+        return self._attribute_kwargs(
             inputs=inputs,
             baselines=baselines,
             target=target,
@@ -304,4 +284,63 @@ def attribute(  # type: ignore
             n_samples=n_samples,
             perturbations_per_eval=perturbations_per_eval,
             return_input_shape=return_input_shape,
+            num_select_distribution=Categorical(probs),
+        )
+
+    def kernel_shap_similarity_kernel(
+        self, _, __, interpretable_sample: Tensor, **kwargs
+    ) -> Tensor:
+        assert (
+            "num_interp_features" in kwargs
+        ), "Must provide num_interp_features to use default similarity kernel"
+        num_selected_features = int(interpretable_sample.sum(dim=1).item())
+        num_features = kwargs["num_interp_features"]
+        if num_selected_features == 0 or num_selected_features == num_features:
+            # weight should be theoretically infinite when
+            # num_selected_features = 0 or num_features
+            # enforcing that trained linear model must satisfy
+            # end-point criteria. In practice, it is sufficient to
+            # make this weight substantially larger so setting this
+            # weight to 1000000 (all other weights are 1).
+            similarities = self.inf_weight
+        else:
+            similarities = 1.0
+        return torch.tensor([similarities])
+
+    def kernel_shap_perturb_generator(
+        self, original_inp: Union[Tensor, Tuple[Tensor, ...]], **kwargs
+    ) -> Generator[Tensor, None, None]:
+        r"""
+        Perturbations are sampled by the following process:
+         - Choose k (number of selected features), based on the distribution
+                p(k) = (M - 1) / (k * (M - k))
+            where M is the total number of features in the interpretable space
+         - Randomly select a binary vector with k ones, each sample is equally
+            likely. This is done by generating a random vector of normal
+            values and thresholding based on the top k elements.
+
+         Since there are M choose k vectors with k ones, this weighted sampling
+         is equivalent to applying the Shapley kernel for the sample weight,
+         defined as:
+         k(M, k) = (M - 1) / (k * (M - k) * (M choose k))
+        """
+        assert (
+            "num_select_distribution" in kwargs and "num_interp_features" in kwargs
+        ), (
+            "num_select_distribution and num_interp_features are necessary"
+            " to use kernel_shap_perturb_func"
         )
+        if isinstance(original_inp, Tensor):
+            device = original_inp.device
+        else:
+            device = original_inp[0].device
+        num_features = kwargs["num_interp_features"]
+        yield torch.ones(1, num_features, device=device, dtype=torch.long)
+        yield torch.zeros(1, num_features, device=device, dtype=torch.long)
+        while True:
+            num_selected_features = kwargs["num_select_distribution"].sample()
+            rand_vals = torch.randn(1, num_features)
+            threshold = torch.kthvalue(
+                rand_vals, num_features - num_selected_features
+            ).values.item()
+            yield (rand_vals > threshold).to(device=device).long()
diff --git a/captum/attr/_core/lime.py b/captum/attr/_core/lime.py
@@ -1,4 +1,5 @@
 #!/usr/bin/env python3
+import inspect
 import math
 import typing
 import warnings
@@ -138,13 +139,18 @@ def __init__(
                     the original input space (matching type and tensor shapes
                     of original input) or in the interpretable input space,
                     which is a vector containing the intepretable features.
+                    Alternatively, this function can return a generator
+                    yielding samples to train the interpretable surrogate
+                    model, and n_samples perturbations will be sampled
+                    from this generator.
 
                     The expected signature of this callable is:
 
                     >>> perturb_func(
                     >>>    original_input: Tensor or tuple of Tensors,
                     >>>    **kwargs: Any
-                    >>> ) -> Tensor or tuple of Tensors
+                    >>> ) -> Tensor or tuple of Tensors or
+                    >>>    generator yielding tensor or tuple of Tensors
 
                     All kwargs passed to the attribute method are
                     provided as keyword arguments (kwargs) to this callable.
@@ -411,8 +417,22 @@ def attribute(
             curr_model_inputs = []
             expanded_additional_args = None
             expanded_target = None
+            perturb_generator = None
+            if inspect.isgeneratorfunction(self.perturb_func):
+                perturb_generator = self.perturb_func(inputs, **kwargs)
+            batch_count = 0
             for _ in range(n_samples):
-                curr_sample = self.perturb_func(inputs, **kwargs)
+                if perturb_generator:
+                    try:
+                        curr_sample = next(perturb_generator)
+                    except StopIteration:
+                        warnings.warn(
+                            "Generator completed prior to given n_samples iterations!"
+                        )
+                        break
+                else:
+                    curr_sample = self.perturb_func(inputs, **kwargs)
+                batch_count += 1
                 if self.perturb_interpretable_space:
                     interpretable_inps.append(curr_sample)
                     curr_model_inputs.append(
@@ -481,7 +501,7 @@ def attribute(
             dataset = TensorDataset(
                 combined_interp_inps, combined_outputs, combined_sim
             )
-            self.interpretable_model.fit(DataLoader(dataset, batch_size=n_samples))
+            self.interpretable_model.fit(DataLoader(dataset, batch_size=batch_count))
             return self.interpretable_model.representation()
 
     def _evaluate_batch(
@@ -602,6 +622,31 @@ def default_perturb_func(original_inp, **kwargs):
     return torch.bernoulli(probs).to(device=device).long()
 
 
+def construct_feature_mask(feature_mask, formatted_inputs):
+    if feature_mask is None:
+        feature_mask, num_interp_features = _construct_default_feature_mask(
+            formatted_inputs
+        )
+    else:
+        feature_mask = _format_input(feature_mask)
+        min_interp_features = int(
+            min(torch.min(single_inp).item() for single_inp in feature_mask)
+        )
+        if min_interp_features != 0:
+            warnings.warn(
+                "Minimum element in feature mask is not 0, shifting indices to"
+                " start at 0."
+            )
+            feature_mask = tuple(
+                single_inp - min_interp_features for single_inp in feature_mask
+            )
+
+        num_interp_features = int(
+            max(torch.max(single_inp).item() for single_inp in feature_mask) + 1
+        )
+    return feature_mask, num_interp_features
+
+
 class Lime(LimeBase):
     r"""
     Lime is an interpretability method that trains an interpretable surrogate model
@@ -713,7 +758,7 @@ def __init__(
                     (integer, determined from feature mask).
             perturb_func (optional, callable): Function which returns a single
                     sampled input, which is a binary vector of length
-                    num_interp_features.
+                    num_interp_features, or a generator of such tensors.
 
                     This function is optional, the default function returns
                     a binary vector where each element is selected
@@ -726,6 +771,7 @@ def __init__(
                     >>>    original_input: Tensor or tuple of Tensors,
                     >>>    **kwargs: Any
                     >>> ) -> Tensor [Binary 2D Tensor 1 x num_interp_features]
+                    >>>  or generator yielding such tensors
 
                     kwargs includes baselines, feature_mask, num_interp_features
                     (integer, determined from feature mask).
@@ -975,31 +1021,36 @@ def attribute(  # type: ignore
             >>> # matching input shape.
             >>> attr = lime.attribute(input, target=1, feature_mask=feature_mask)
         """
+        return self._attribute_kwargs(
+            inputs=inputs,
+            baselines=baselines,
+            target=target,
+            additional_forward_args=additional_forward_args,
+            feature_mask=feature_mask,
+            n_samples=n_samples,
+            perturbations_per_eval=perturbations_per_eval,
+            return_input_shape=return_input_shape,
+        )
+
+    def _attribute_kwargs(  # type: ignore
+        self,
+        inputs: TensorOrTupleOfTensorsGeneric,
+        baselines: BaselineType = None,
+        target: TargetType = None,
+        additional_forward_args: Any = None,
+        feature_mask: Union[None, Tensor, Tuple[Tensor, ...]] = None,
+        n_samples: int = 25,
+        perturbations_per_eval: int = 1,
+        return_input_shape: bool = True,
+        **kwargs
+    ) -> TensorOrTupleOfTensorsGeneric:
         is_inputs_tuple = _is_tuple(inputs)
         formatted_inputs, baselines = _format_input_baseline(inputs, baselines)
         bsz = formatted_inputs[0].shape[0]
 
-        if feature_mask is None:
-            feature_mask, num_interp_features = _construct_default_feature_mask(
-                formatted_inputs
-            )
-        else:
-            feature_mask = _format_input(feature_mask)
-            min_interp_features = int(
-                min(torch.min(single_inp).item() for single_inp in feature_mask)
-            )
-            if min_interp_features != 0:
-                warnings.warn(
-                    "Minimum element in feature mask is not 0, shifting indices to"
-                    " start at 0."
-                )
-                feature_mask = tuple(
-                    single_inp + min_interp_features for single_inp in feature_mask
-                )
-
-            num_interp_features = int(
-                max(torch.max(single_inp).item() for single_inp in feature_mask) + 1
-            )
+        feature_mask, num_interp_features = construct_feature_mask(
+            feature_mask, formatted_inputs
+        )
 
         if num_interp_features > 10000:
             warnings.warn(
@@ -1051,6 +1102,7 @@ def attribute(  # type: ignore
                             if is_inputs_tuple
                             else curr_feature_mask[0],
                             num_interp_features=num_interp_features,
+                            **kwargs
                         )
                         if return_input_shape:
                             output_list.append(
@@ -1087,6 +1139,7 @@ def attribute(  # type: ignore
             baselines=baselines if is_inputs_tuple else baselines[0],
             feature_mask=feature_mask if is_inputs_tuple else feature_mask[0],
             num_interp_features=num_interp_features,
+            **kwargs
         )
         if return_input_shape:
             return self._convert_output_shape(
diff --git a/tests/attr/test_kernel_shap.py b/tests/attr/test_kernel_shap.py
@@ -197,6 +197,7 @@ def test_multi_input_batch_kernel_shap(self) -> None:
             expected,
             additional_input=(1,),
             feature_mask=(mask1, mask2, mask3),
+            n_perturb_samples=300,
         )
         expected_with_baseline = (
             [[1040, 1040, 1040], [184, 580.0, 184]],
diff --git a/tests/attr/test_lime.py b/tests/attr/test_lime.py

Original file line number	Diff line number	Diff line change
`@@ -197,6 +197,7 @@ def test_multi_input_batch_kernel_shap(self) -> None:`
`197`	`197`	`expected,`
`198`	`198`	`additional_input=(1,),`
`199`	`199`	`feature_mask=(mask1, mask2, mask3),`
	`200`	`+ n_perturb_samples=300,`
`200`	`201`	`)`
`201`	`202`	`expected_with_baseline = (`
`202`	`203`	`[[1040, 1040, 1040], [184, 580.0, 184]],`