Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Filter gradients of IndexedSlices in tf grad sim backend, fixes #828 #829

Merged
merged 35 commits into from
Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
f15d7a8
Filter gradients of IndexedSlices in tf grad sim backend, fixes #828
mauicv Nov 30, 2022
1fd9c38
Add functionality to convert sparse gradients to nd.array
mauicv Jan 5, 2023
3dbc6c6
Add warnings if non-trainable layers present in model
mauicv Jan 5, 2023
ab16515
Fix linting and typing errors
mauicv Jan 5, 2023
87d4ca7
Add check for non-trainable layers in gradsim
mauicv Jan 6, 2023
6cad12c
Add better docstrings
mauicv Jan 6, 2023
12937d8
Add better docstrings for tests
mauicv Jan 6, 2023
219a6a0
Fix minor linting error
mauicv Jan 6, 2023
764bc56
Add check for tensor attribute
mauicv Jan 6, 2023
4a89f48
Check for and log list of non trainable layers on GradSim __init__
mauicv Jan 6, 2023
e77f67f
Add error for non-trainable models in GradSim method
mauicv Jan 9, 2023
efde102
Rewrite get_non_trainable method docstrings
mauicv Jan 9, 2023
22dc19a
Remove typos from doctstrings
mauicv Jan 9, 2023
557af3b
Minor warning rephrase and update tests
mauicv Jan 9, 2023
b9c8f61
Fix minor flake8 error
mauicv Jan 9, 2023
6015d50
Improve error message for numpy conversion error
mauicv Jan 9, 2023
a3270be
Update tensorflow _grad_to_numpy type hints
mauicv Jan 9, 2023
188e1d3
Revert minor changes
mauicv Jan 9, 2023
2af3367
Minor spelling fix to test docstring
mauicv Jan 9, 2023
a6e437c
Update test comment to make behavour clearer
mauicv Jan 9, 2023
ab23c05
Fix typeo in test docstring
mauicv Jan 9, 2023
e95aaf1
Add comment to test to explain build method call
mauicv Jan 9, 2023
6704c64
Add note on batch norm layer in GradSim integration tests
mauicv Jan 9, 2023
876ea61
Fix minor linting errors
mauicv Jan 9, 2023
bf7fad3
Add backticks to modules names in docstrings
mauicv Jan 9, 2023
0e0c919
Make get_not_trainable private
mauicv Jan 9, 2023
09a1674
Add questions to the FAQ docs page detailing performance soln and war…
mauicv Jan 9, 2023
f443185
Minor fixes
mauicv Jan 10, 2023
8a99be5
Fix linting error
mauicv Jan 10, 2023
f69227b
Add minor changes
mauicv Jan 10, 2023
42cf904
Fix Requested PR changes
mauicv Jan 10, 2023
57d04b4
Update non-trainable params warning
mauicv Jan 10, 2023
f72d197
Minor fix
mauicv Jan 10, 2023
308b51f
Remove backticks
mauicv Jan 10, 2023
4bd1f20
Remove commented out lines from test
mauicv Jan 10, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 24 additions & 11 deletions alibi/explainers/similarity/backends/pytorch/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,18 +57,19 @@ def get_grads(

@staticmethod
def _grad_to_numpy(grad: torch.Tensor, name: Optional[str] = None) -> torch.Tensor:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

grad type should probably be a union here since it can be sparse?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ignore this, was thinking of the tensorflow backend.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No your correct, torch also has SparseTensors

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ignore me, torch handles sparse tensors by setting a layout attribute on normal tensors!

"""Convert graidient to numpy array.
"""Convert graidient to `np.ndarray`.

Converts gradient tensor to flat numpy array. If the gradient is a sparse tensor, it is converted to a dense \
Converts gradient tensor to flat `numpy` array. If the gradient is a sparse tensor, it is converted to a dense
tensor first.
"""
if grad.is_sparse:
grad = grad.to_dense()

if not hasattr(grad, 'numpy'):
name = f' for the named tensor: {name}' if name else ''
raise TypeError((f'Could not convert gradient to numpy array{name}. To ignore these '
'gradients in the similarity computation use `requires_grad=False`.'))
raise TypeError((f'Could not convert gradient to `numpy` array{name}. To ignore these '
'gradients in the similarity computation set `requires_grad=False` on the '
'corresponding parameter.'))
return grad.reshape(-1).cpu().numpy()

@staticmethod
Expand All @@ -90,12 +91,12 @@ def set_device(device: Union[str, int, torch.device, None] = None) -> None:
elif isinstance(device, torch.device):
_PytorchBackend.device = device
elif device is not None:
raise TypeError(("`device` must be a None, string, integer or "
f"torch.device object. Got {type(device)} instead."))
raise TypeError(("`device` must be a `None`, `string`, `integer` or "
f"`torch.device` object. Got {type(device)} instead."))

@staticmethod
def to_numpy(X: torch.Tensor) -> np.ndarray:
"""Maps a `pytorch` tensor to a `numpy` array."""
"""Maps a `pytorch` tensor to `np.ndarray`."""
return X.detach().cpu().numpy()

@staticmethod
Expand All @@ -104,7 +105,19 @@ def argmax(X: torch.Tensor, dim=-1) -> torch.Tensor:
return torch.argmax(X, dim=dim)

@staticmethod
def get_non_trainable(model: nn.Module) -> List[Union[int, str]]:
"""Checks that all layers in a model are trainable."""
return [name if name else i for i, (name, param) in enumerate(model.named_parameters())
if not param.requires_grad]
def _get_non_trainable(model: nn.Module) -> List[Optional[str]]:
"""Returns a list of non trainable parameters.

Returns a list of names of parameters that are non trainable. If no trainable parameter exists we raise
a `ValueError`.
"""

params = [name if name else None for name, param in model.named_parameters()
if not param.requires_grad]

if len(params) == len(list(model.parameters())):
raise ValueError('The model has no trainable parameters. This method requires at least'
'one trainable parameter to compute the gradients for. '
"Try setting `.requires_grad_(True)` on the model or one of it's parameters")

return params
32 changes: 20 additions & 12 deletions alibi/explainers/similarity/backends/tensorflow/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow import keras


class _TensorFlowBackend:
Expand Down Expand Up @@ -55,10 +55,10 @@ def get_grads(
return grad_X_train

@staticmethod
def _grad_to_numpy(grad: tf.Tensor, name: Optional[str] = None) -> tf.Tensor:
"""Convert graidient to numpy array.
def _grad_to_numpy(grad: Union[tf.IndexedSlices, tf.Tensor], name: Optional[str] = None) -> tf.Tensor:
"""Convert graidient to `np.ndarray`.

Converts gradient tensor to flat numpy array. If the gradient is a sparse tensor, it is converted to a dense \
Converts gradient tensor to flat `numpy` array. If the gradient is a sparse tensor, it is converted to a dense
tensor first.
"""

Expand All @@ -68,8 +68,9 @@ def _grad_to_numpy(grad: tf.Tensor, name: Optional[str] = None) -> tf.Tensor:

if not hasattr(grad, 'numpy'):
name = f' for the named tensor: {name}' if name else ''
raise TypeError((f'Could not convert gradient to numpy array{name}. To ignore these '
'gradients in the similarity computation use `trainable=False`.'))
raise TypeError((f'Could not convert gradient to `numpy` array{name}. To ignore these '
'gradients in the similarity computation set `trainable=False` on the '
'corresponding parameter.'))
return grad.numpy().reshape(-1)

@staticmethod
Expand All @@ -86,11 +87,11 @@ def set_device(device: Union[str, None] = None) -> None:
if device is None or isinstance(device, str):
_TensorFlowBackend.device = device
else:
raise TypeError(f"`device` must be a string or None. Got {type(device)} instead.")
raise TypeError(f"`device` must be a `string` or `None`. Got {type(device)} instead.")

@staticmethod
def to_numpy(X: tf.Tensor) -> tf.Tensor:
"""Converts a tensor to a `numpy` array."""
"""Converts a tensor to `np.ndarray`."""
return X.numpy()

@staticmethod
Expand All @@ -100,9 +101,16 @@ def argmax(X: tf.Tensor, dim=-1) -> tf.Tensor:
return X

@staticmethod
def get_non_trainable(model: keras.Model) -> List[Union[int, str]]:
"""Checks if all layers in a model are trainable.
def _get_non_trainable(model: keras.Model) -> List[Optional[str]]:
"""Returns a list of non trainable parameters.

Note: batch normalization layers are ignored as they are not trainable by default.
Returns a list of names of parameters that are non trainable. If no trainable parameter exists we raise
a `ValueError`.
"""
return [getattr(layer, 'name', i) for i, layer in enumerate(model.layers) if not layer.trainable]

if len(model.trainable_weights) == 0:
raise ValueError('The model has no trainable weights. This method requires at least'
'one trainable parameter to compute the gradients for. '
'Set `trainable=True` on the model or a model weight')

return [getattr(weight, 'name', None) for weight in model.non_trainable_weights]
17 changes: 9 additions & 8 deletions alibi/explainers/similarity/grad.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def __init__(self,
device: 'Union[int, str, torch.device, None]' = None,
verbose: bool = False,
):
"""GradientSimilarity explainer.
"""``GradientSimilarity`` explainer.

The gradient similarity explainer is used to find examples in the training data that the predictor considers
similar to test instances the user wants to explain. It uses the gradients of the loss between the model output
Expand Down Expand Up @@ -129,21 +129,22 @@ def __init__(self,
task_name=task
)

non_trainable_layers = self.backend.get_non_trainable(self.predictor)
non_trainable_layers = self.backend._get_non_trainable(self.predictor)
if non_trainable_layers:
layers_msg = 'The following layers are not trainable: '
layers_msg = 'The following tensors are not trainable: '
layers = ", ".join([f"'{layer}'" for layer in non_trainable_layers if layer is not None])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if layer is not None -> are there instances where the returned list containe None elements?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think eager tensors don't have names... Although I've forgotten to handle them properly here as I'm enumerating and returning the layer index instead of the name. Ill need to check how that behaves though!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't been able to figure out this behaviour. EagerTensors don't have name attributes, so those layer names returned would be None. I'm leaving this in defensively so that if this behaviour does happen then an error won't occur but the user still receives an error message.

I've also added an error message for the case where no parameter in the model is trainable.

warning_msg = ("Some layers in the model are not trainable. These layer gradients will not be "
f"included in the computation of gradient similarity. {layers_msg}{layers}")
warnings.warn(warning_msg)
warning_msg = ("Some layers in the model are not trainable. These layers don't have gradients "
"and will not be included in the computation of gradient similarity. "
f"{layers_msg}{layers}")
warnings.warn(warning_msg) # todo: scope warning to this location

def fit(self,
X_train: np.ndarray,
Y_train: np.ndarray) -> "Explainer":
"""Fit the explainer.

The GradientSimilarity explainer requires the model gradients over the training data. In the explain method it
compares them to the model gradients for the test instance(s). If ``store_grads`` was set to ``True`` on
The ``GradientSimilarity`` explainer requires the model gradients over the training data. In the explain method
it compares them to the model gradients for the test instance(s). If ``store_grads`` was set to ``True`` on
initialization then the gradients are precomputed here and stored. This will speed up the explain method call
but storing the gradients may not be feasible for large models.

Expand Down
20 changes: 5 additions & 15 deletions alibi/explainers/tests/test_simiarlity/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,10 @@ def linear_cls_model(request):
input_shape = request.param.get('input_shape', (10,))
output_shape = request.param.get('output_shape', 10)
framework = request.param.get('framework', 'tensorflow')
batch_norm = request.param.get('batch_norm', False)

model = {
'tensorflow': lambda i_shape, o_shape: tf_linear_model(i_shape, o_shape, batch_norm),
'pytorch': lambda i_shape, o_shape: torch_linear_model(i_shape, o_shape, batch_norm)
'tensorflow': lambda i_shape, o_shape: tf_linear_model(i_shape, o_shape),
'pytorch': lambda i_shape, o_shape: torch_linear_model(i_shape, o_shape)
}[framework](input_shape, output_shape)

loss_fn = {
Expand Down Expand Up @@ -144,26 +143,19 @@ def linear_models(request):
return tf_model, tf_loss, torch_model, torch_loss


def tf_linear_model(input_shape, output_shape, batch_norm=False):
def tf_linear_model(input_shape, output_shape):
"""
Constructs a linear model for `tensorflow`.
"""
layers = [
return keras.Sequential([
tf.keras.layers.InputLayer(input_shape=input_shape),
tf.keras.layers.Flatten(),
]
if batch_norm:
layers.append(tf.keras.layers.BatchNormalization())
layers.extend([
tf.keras.layers.Dense(output_shape),
tf.keras.layers.Dense(output_shape),
tf.keras.layers.Softmax()
])

return keras.Sequential(layers)


def torch_linear_model(input_shape_arg, output_shape_arg, batch_norm=False):
def torch_linear_model(input_shape_arg, output_shape_arg):
"""
Constructs a linear model for `torch`.
"""
Expand All @@ -174,9 +166,7 @@ def __init__(self, input_shape, output_shape):
super(Model, self).__init__()
self.linear_stack = nn.Sequential(
nn.Flatten(start_dim=1),
nn.BatchNorm1d(input_shape) if batch_norm else nn.Identity(),
nn.Linear(input_shape, output_shape),
nn.Linear(output_shape, output_shape),
nn.Softmax()
)

Expand Down
30 changes: 17 additions & 13 deletions alibi/explainers/tests/test_simiarlity/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def test_backends(random_cls_dataset, linear_models):

@pytest.mark.parametrize('trainable_emd, grads_shape', [(True, (61, )), (False, (21, ))])
def test_tf_embedding_similarity(trainable_emd, grads_shape):
"""Test GradSim explainer correctly handles sparcity and non-trainable layers for tensorflow.
"""Test GradSim explainer correctly handles sparsity and non-trainable layers for tensorflow.

Test that `tensorflow` embedding layers work as expected and also that layers
marked as non-trainable are not included in the gradients.
Expand All @@ -62,13 +62,13 @@ def test_tf_embedding_similarity(trainable_emd, grads_shape):
Y = tf.random.uniform(shape=(1, 1), minval=0, maxval=10, dtype=tf.float32)
loss_fn = tf.keras.losses.MeanSquaredError()
tf_grads = _TensorFlowBackend.get_grads(model, X, Y, loss_fn)
assert tf_grads.shape == grads_shape # (4 * 10) + (5 * 4) + 1
assert tf_grads.shape == grads_shape # (4 * 10) * trainable_emd + (5 * 4) + 1


@pytest.mark.parametrize('trainable_emd, grads_shape', [(True, (61, )), (False, (21, ))])
@pytest.mark.parametrize('sparse', [True, False])
def test_pytorch_embedding_similarity(trainable_emd, grads_shape, sparse):
"""Test GradSim explainer correctly handles sparcity and non-trainable layers for pytorch.
"""Test GradSim explainer correctly handles sparsity and non-trainable layers for pytorch.

Tests that the `pytorch` embedding layers work as expected and that layers marked as
non-trainable are not included in the gradients.
Expand All @@ -85,8 +85,8 @@ def test_pytorch_embedding_similarity(trainable_emd, grads_shape, sparse):
X = torch.randint(0, 10, (1, 5))
Y = torch.randint(0, 10, (1, 1), dtype=torch.float32)
loss_fn = torch.nn.MSELoss()
tf_grads = _PytorchBackend.get_grads(model, X, Y, loss_fn)
assert tf_grads.shape == grads_shape # (4 * 10) + (5 * 4) + 1
pt_grads = _PytorchBackend.get_grads(model, X, Y, loss_fn)
assert pt_grads.shape == grads_shape # (4 * 10) * trainable_emd + (5 * 4) + 1


def test_non_numpy_grads_pytorch():
Expand All @@ -100,14 +100,16 @@ class MockTensor():
with pytest.raises(TypeError) as err:
_PytorchBackend._grad_to_numpy(MockTensor())

assert ("Could not convert gradient to numpy array. To ignore these gradients in"
" the similarity computation use `requires_grad=False`.") in str(err.value)
assert ("Could not convert gradient to `numpy` array. To ignore these gradients in the "
"similarity computation set `requires_grad=False` on the corresponding parameter.") \
in str(err.value)

with pytest.raises(TypeError) as err:
_PytorchBackend._grad_to_numpy(MockTensor(), 'test')

assert ("Could not convert gradient to numpy array for the named tensor: test. "
"To ignore these gradients in the similarity computation use `requires_grad=False`.") in str(err.value)
assert ("Could not convert gradient to `numpy` array for the named tensor: test. "
"To ignore these gradients in the similarity computation set `requires_grad=False`"
" on the corresponding parameter.") in str(err.value)


def test_non_numpy_grads_tensorflow():
Expand All @@ -121,11 +123,13 @@ class MockTensor():
with pytest.raises(TypeError) as err:
_TensorFlowBackend._grad_to_numpy(MockTensor())

assert ("Could not convert gradient to numpy array. To ignore these gradients "
"in the similarity computation use `trainable=False`.") in str(err.value)
assert ("Could not convert gradient to `numpy` array. To ignore these gradients "
"in the similarity computation set `trainable=False` on the corresponding parameter.") \
in str(err.value)

with pytest.raises(TypeError) as err:
_TensorFlowBackend._grad_to_numpy(MockTensor(), 'test')

assert ("Could not convert gradient to numpy array for the named tensor: test."
" To ignore these gradients in the similarity computation use `trainable=False`.") in str(err.value)
assert ("Could not convert gradient to `numpy` array for the named tensor: test."
" To ignore these gradients in the similarity computation set "
"`trainable=False` on the corresponding parameter.") in str(err.value)
Loading