Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Filter gradients of IndexedSlices in tf grad sim backend, fixes #828 #829

Merged
merged 35 commits into from
Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
f15d7a8
Filter gradients of IndexedSlices in tf grad sim backend, fixes #828
mauicv Nov 30, 2022
1fd9c38
Add functionality to convert sparse gradients to nd.array
mauicv Jan 5, 2023
3dbc6c6
Add warnings if non-trainable layers present in model
mauicv Jan 5, 2023
ab16515
Fix linting and typing errors
mauicv Jan 5, 2023
87d4ca7
Add check for non-trainable layers in gradsim
mauicv Jan 6, 2023
6cad12c
Add better docstrings
mauicv Jan 6, 2023
12937d8
Add better docstrings for tests
mauicv Jan 6, 2023
219a6a0
Fix minor linting error
mauicv Jan 6, 2023
764bc56
Add check for tensor attribute
mauicv Jan 6, 2023
4a89f48
Check for and log list of non trainable layers on GradSim __init__
mauicv Jan 6, 2023
e77f67f
Add error for non-trainable models in GradSim method
mauicv Jan 9, 2023
efde102
Rewrite get_non_trainable method docstrings
mauicv Jan 9, 2023
22dc19a
Remove typos from doctstrings
mauicv Jan 9, 2023
557af3b
Minor warning rephrase and update tests
mauicv Jan 9, 2023
b9c8f61
Fix minor flake8 error
mauicv Jan 9, 2023
6015d50
Improve error message for numpy conversion error
mauicv Jan 9, 2023
a3270be
Update tensorflow _grad_to_numpy type hints
mauicv Jan 9, 2023
188e1d3
Revert minor changes
mauicv Jan 9, 2023
2af3367
Minor spelling fix to test docstring
mauicv Jan 9, 2023
a6e437c
Update test comment to make behavour clearer
mauicv Jan 9, 2023
ab23c05
Fix typeo in test docstring
mauicv Jan 9, 2023
e95aaf1
Add comment to test to explain build method call
mauicv Jan 9, 2023
6704c64
Add note on batch norm layer in GradSim integration tests
mauicv Jan 9, 2023
876ea61
Fix minor linting errors
mauicv Jan 9, 2023
bf7fad3
Add backticks to modules names in docstrings
mauicv Jan 9, 2023
0e0c919
Make get_not_trainable private
mauicv Jan 9, 2023
09a1674
Add questions to the FAQ docs page detailing performance soln and war…
mauicv Jan 9, 2023
f443185
Minor fixes
mauicv Jan 10, 2023
8a99be5
Fix linting error
mauicv Jan 10, 2023
f69227b
Add minor changes
mauicv Jan 10, 2023
42cf904
Fix Requested PR changes
mauicv Jan 10, 2023
57d04b4
Update non-trainable params warning
mauicv Jan 10, 2023
f72d197
Minor fix
mauicv Jan 10, 2023
308b51f
Remove backticks
mauicv Jan 10, 2023
4bd1f20
Remove commented out lines from test
mauicv Jan 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 41 additions & 5 deletions alibi/explainers/similarity/backends/pytorch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,27 @@ def get_grads(
loss = loss_fn(output, Y)
loss.backward()
model.train(initial_model_state)
return np.concatenate([_PytorchBackend.to_numpy(param.grad).reshape(-1) # type: ignore [arg-type] # see #810
for param in model.parameters()])

return np.concatenate([_PytorchBackend._grad_to_numpy(grad=param.grad, name=name)
for name, param in model.named_parameters()
if param.grad is not None])
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that torch tensors have a grad attribute by default set to None so we don't have to check it exist first!


@staticmethod
def _grad_to_numpy(grad: torch.Tensor, name: Optional[str] = None) -> np.ndarray:
"""Convert gradient to `np.ndarray`.

Converts gradient tensor to flat `numpy` array. If the gradient is a sparse tensor, it is converted to a dense
tensor first.
"""
if grad.is_sparse:
grad = grad.to_dense()

if not hasattr(grad, 'numpy'):
name = f' for the named tensor: {name}' if name else ''
raise TypeError((f'Could not convert gradient to `numpy` array{name}. To ignore these '
'gradients in the similarity computation set ``requires_grad=False`` on the '
'corresponding parameter.'))
return grad.reshape(-1).cpu().numpy()

@staticmethod
def to_tensor(X: np.ndarray) -> torch.Tensor:
Expand All @@ -72,15 +91,32 @@ def set_device(device: Union[str, int, torch.device, None] = None) -> None:
elif isinstance(device, torch.device):
_PytorchBackend.device = device
elif device is not None:
raise TypeError(("`device` must be a None, string, integer or "
f"torch.device object. Got {type(device)} instead."))
raise TypeError(("`device` must be a ``None``, `string`, `integer` or "
f"`torch.device` object. Got {type(device)} instead."))

@staticmethod
def to_numpy(X: torch.Tensor) -> np.ndarray:
"""Maps a `pytorch` tensor to a `numpy` array."""
"""Maps a `pytorch` tensor to `np.ndarray`."""
return X.detach().cpu().numpy()

@staticmethod
def argmax(X: torch.Tensor, dim=-1) -> torch.Tensor:
"""Returns the index of the maximum value in a tensor."""
return torch.argmax(X, dim=dim)

@staticmethod
def _count_non_trainable(model: nn.Module) -> int:
"""Returns number of non trainable parameters.

Returns the number of parameters that are non trainable. If no trainable parameter exists we raise
a `ValueError`.
"""

num_non_trainable_params = len([param for param in model.parameters() if not param.requires_grad])

if num_non_trainable_params == len(list(model.parameters())):
raise ValueError("The model has no trainable parameters. This method requires at least "
"one trainable parameter to compute the gradients for. "
"Try setting ``.requires_grad_(True)`` on the model or one of its parameters.")

return num_non_trainable_params
44 changes: 39 additions & 5 deletions alibi/explainers/similarity/backends/tensorflow/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow import keras


class _TensorFlowBackend:
Expand Down Expand Up @@ -50,9 +50,29 @@ def get_grads(

# compute gradients of the loss w.r.t the weights
grad_X_train = tape.gradient(loss, model.trainable_weights)
grad_X_train = np.concatenate([w.numpy().reshape(-1) for w in grad_X_train])
grad_X_train = np.concatenate([_TensorFlowBackend._grad_to_numpy(w, getattr(w, 'name', None))
for w in grad_X_train])
return grad_X_train

@staticmethod
def _grad_to_numpy(grad: Union[tf.IndexedSlices, tf.Tensor], name: Optional[str] = None) -> np.ndarray:
"""Convert gradient to `np.ndarray`.

Converts gradient tensor to flat `numpy` array. If the gradient is a sparse tensor, it is converted to a dense
tensor first.
"""

if isinstance(grad, tf.IndexedSlices):
# see https://github.com/SeldonIO/alibi/issues/828
grad = tf.convert_to_tensor(grad)

if not hasattr(grad, 'numpy'):
name = f' for the named tensor: {name}' if name else ''
raise TypeError((f'Could not convert gradient to `numpy` array{name}. To ignore these '
'gradients in the similarity computation set ``trainable=False`` on the '
'corresponding parameter.'))
return grad.numpy().reshape(-1)

@staticmethod
def to_tensor(X: np.ndarray) -> tf.Tensor:
"""Converts a `numpy` array to a `tensorflow` tensor."""
Expand All @@ -67,15 +87,29 @@ def set_device(device: Union[str, None] = None) -> None:
if device is None or isinstance(device, str):
_TensorFlowBackend.device = device
else:
raise TypeError(f"`device` must be a string or None. Got {type(device)} instead.")
raise TypeError(f"`device` must be a `string` or ``None``. Got {type(device)} instead.")

@staticmethod
def to_numpy(X: tf.Tensor) -> tf.Tensor:
"""Converts a tensor to a `numpy` array."""
def to_numpy(X: tf.Tensor) -> np.ndarray:
"""Converts a tensor to `np.ndarray`."""
return X.numpy()

@staticmethod
def argmax(X: tf.Tensor, dim=-1) -> tf.Tensor:
"""Returns the index of the maximum value in a tensor."""
X = tf.math.argmax(X, axis=dim)
return X

@staticmethod
def _count_non_trainable(model: keras.Model) -> int:
"""Returns number of non trainable parameters.

Returns the number of parameters that are non trainable. If no trainable parameter exists we raise
a `ValueError`.
"""

if len(model.trainable_weights) == 0:
raise ValueError("The model has no trainable weights. This method requires at least "
"one trainable parameter to compute the gradients for. "
"Set ``trainable=True`` on the model or a model weight.")
return len(model.non_trainable_weights)
19 changes: 15 additions & 4 deletions alibi/explainers/similarity/grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import TYPE_CHECKING, Callable, Optional, Union, Dict, Tuple
from typing_extensions import Literal
from enum import Enum
import warnings

import numpy as np

Expand Down Expand Up @@ -44,12 +45,12 @@ def __init__(self,
device: 'Union[int, str, torch.device, None]' = None,
verbose: bool = False,
):
"""GradientSimilarity explainer.
"""`GradientSimilarity` explainer.

The gradient similarity explainer is used to find examples in the training data that the predictor considers
similar to test instances the user wants to explain. It uses the gradients of the loss between the model output
and the training data labels. These are compared using the similarity function specified by ``sim_fn``. The
GradientSimilarity can be applied to models trained for both classification and regression tasks.
`GradientSimilarity` explainer can be applied to models trained for both classification and regression tasks.


Parameters
Expand Down Expand Up @@ -128,13 +129,23 @@ def __init__(self,
task_name=task
)

num_non_trainable = self.backend._count_non_trainable(self.predictor)
if num_non_trainable:
warning_msg = (f"Found {num_non_trainable} non-trainable parameters in the model. These parameters "
"don't have gradients and will not be included in the computation of gradient similarity."
" This might be because your model has layers that track statistics using non-trainable "
"parameters such as batch normalization layers. In this case, you don't need to worry. "
"Otherwise it's because you have set some parameters to be non-trainable and alibi is "
"letting you know.")
warnings.warn(warning_msg)

def fit(self,
X_train: np.ndarray,
Y_train: np.ndarray) -> "Explainer":
"""Fit the explainer.

The GradientSimilarity explainer requires the model gradients over the training data. In the explain method it
compares them to the model gradients for the test instance(s). If ``store_grads`` was set to ``True`` on
The `GradientSimilarity` explainer requires the model gradients over the training data. In the explain method
it compares them to the model gradients for the test instance(s). If ``precompute_grads=True`` on
initialization then the gradients are precomputed here and stored. This will speed up the explain method call
but storing the gradients may not be feasible for large models.

Expand Down
7 changes: 4 additions & 3 deletions alibi/explainers/tests/test_simiarlity/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,10 @@ def tf_linear_model(input_shape, output_shape):
Constructs a linear model for `tensorflow`.
"""
return keras.Sequential([
keras.layers.InputLayer(input_shape=input_shape),
keras.layers.Dense(output_shape),
keras.layers.Softmax()
tf.keras.layers.InputLayer(input_shape=input_shape),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(output_shape),
tf.keras.layers.Softmax()
])


Expand Down
92 changes: 92 additions & 0 deletions alibi/explainers/tests/test_simiarlity/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import torch
import numpy as np
import tensorflow as tf

from alibi.explainers.similarity.backends.tensorflow.base import _TensorFlowBackend
from alibi.explainers.similarity.backends.pytorch.base import _PytorchBackend
Expand Down Expand Up @@ -41,3 +42,94 @@ def test_backends(random_cls_dataset, linear_models):
torch_grads = np.sort(torch_grads)
tf_grads = np.sort(tf_grads)
np.testing.assert_allclose(torch_grads, tf_grads, rtol=1e-04)


@pytest.mark.parametrize('trainable_emd, grads_shape', [(True, (61, )), (False, (21, ))])
def test_tf_embedding_similarity(trainable_emd, grads_shape):
"""Test `GradientSimilarity` explainer correctly handles sparsity and non-trainable layers for `tensorflow`.

Test that `tensorflow` embedding layers work as expected and also that layers
marked as non-trainable are not included in the gradients.
See https://github.com/SeldonIO/alibi/issues/828.
"""
model = tf.keras.models.Sequential([
tf.keras.layers.Embedding(10, 4, input_shape=(5,), trainable=trainable_emd),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(1)
])

X = tf.random.uniform(shape=(1, 5), minval=0, maxval=10, dtype=tf.float32)
Y = tf.random.uniform(shape=(1, 1), minval=0, maxval=10, dtype=tf.float32)
loss_fn = tf.keras.losses.MeanSquaredError()
tf_grads = _TensorFlowBackend.get_grads(model, X, Y, loss_fn)
assert tf_grads.shape == grads_shape # (4 * 10) * trainable_emd + (5 * 4) + 1


@pytest.mark.parametrize('trainable_emd, grads_shape', [(True, (61, )), (False, (21, ))])
@pytest.mark.parametrize('sparse', [True, False])
def test_pytorch_embedding_similarity(trainable_emd, grads_shape, sparse):
"""Test GradientSimilarity explainer correctly handles sparsity and non-trainable layers for pytorch.

Tests that the `pytorch` embedding layers work as expected and that layers marked as
non-trainable are not included in the gradients.
"""

model = torch.nn.Sequential(
torch.nn.Embedding(10, 4, 5, sparse=sparse),
torch.nn.Flatten(),
torch.nn.LazyLinear(1)
)

model[0].weight.requires_grad = trainable_emd

X = torch.randint(0, 10, (1, 5))
Y = torch.randint(0, 10, (1, 1), dtype=torch.float32)
loss_fn = torch.nn.MSELoss()
pt_grads = _PytorchBackend.get_grads(model, X, Y, loss_fn)
assert pt_grads.shape == grads_shape # (4 * 10) * trainable_emd + (5 * 4) + 1


def test_non_numpy_grads_pytorch():
"""Test that the `pytorch` backend handles gradients withtout `numpy` methods correctly.

`_PytorchBackend` should throw an error if the gradients cannot be converted to numpy arrays.
"""
class MockTensor():
is_sparse = False

with pytest.raises(TypeError) as err:
_PytorchBackend._grad_to_numpy(MockTensor())

assert ("Could not convert gradient to `numpy` array. To ignore these gradients in the "
"similarity computation set ``requires_grad=False`` on the corresponding parameter.") \
in str(err.value)

with pytest.raises(TypeError) as err:
_PytorchBackend._grad_to_numpy(MockTensor(), 'test')

assert ("Could not convert gradient to `numpy` array for the named tensor: test. "
"To ignore these gradients in the similarity computation set ``requires_grad=False``"
" on the corresponding parameter.") in str(err.value)


def test_non_numpy_grads_tensorflow():
"""Test that the `tensorflow` backend handles gradients without `numpy` methods correctly.

`_TensorFlowBackend` should throw an error if the gradients cannot be converted to `numpy` arrays.
"""
class MockTensor():
is_sparse = False

with pytest.raises(TypeError) as err:
_TensorFlowBackend._grad_to_numpy(MockTensor())

assert ("Could not convert gradient to `numpy` array. To ignore these gradients "
"in the similarity computation set ``trainable=False`` on the corresponding parameter.") \
in str(err.value)

with pytest.raises(TypeError) as err:
_TensorFlowBackend._grad_to_numpy(MockTensor(), 'test')

assert ("Could not convert gradient to `numpy` array for the named tensor: test."
" To ignore these gradients in the similarity computation set "
"``trainable=False`` on the corresponding parameter.") in str(err.value)
Loading