diff --git a/captum/_utils/gradient.py b/captum/_utils/gradient.py
index fe7f65b9ab..4d885ff749 100644
--- a/captum/_utils/gradient.py
+++ b/captum/_utils/gradient.py
@@ -738,13 +738,13 @@ def _compute_jacobian_wrt_params(
         inputs (tuple[Any, ...]): The minibatch for which the forward pass is computed.
                 It is unpacked before passing to `model`, so it must be a tuple.  The
                 individual elements of `inputs` can be anything.
-        labels (Tensor or None): Labels for input if computing a loss function.
-        loss_fn (torch.nn.Module or Callable or None): The loss function. If a library
+        labels (Tensor, optional): Labels for input if computing a loss function.
+        loss_fn (torch.nn.Module or Callable, optional): The loss function. If a library
                 defined loss function is provided, it would be expected to be a
                 torch.nn.Module. If a custom loss is provided, it can be either type,
                 but must behave as a library loss function would if `reduction='none'`.
-        layer_modules (List[torch.nn.Module]): A list of PyTorch modules w.r.t. which
-                jacobian gradients are computed.
+        layer_modules (List[torch.nn.Module], optional): A list of PyTorch modules
+                 w.r.t. which jacobian gradients are computed.
     Returns:
         grads (tuple[Tensor, ...]): Returns the Jacobian for the minibatch as a
                 tuple of gradients corresponding to the tuple of trainable parameters
@@ -813,18 +813,19 @@ def _compute_jacobian_wrt_params_with_sample_wise_trick(
         inputs (tuple[Any, ...]): The minibatch for which the forward pass is computed.
                 It is unpacked before passing to `model`, so it must be a tuple.  The
                 individual elements of `inputs` can be anything.
-        labels (Tensor or None): Labels for input if computing a loss function.
-        loss_fn (torch.nn.Module or Callable or None): The loss function. If a library
+        labels (Tensor, optional): Labels for input if computing a loss function.
+        loss_fn (torch.nn.Module or Callable, optional): The loss function. If a library
                 defined loss function is provided, it would be expected to be a
                 torch.nn.Module. If a custom loss is provided, it can be either type,
                 but must behave as a library loss function would if `reduction='sum'` or
                 `reduction='mean'`.
-        reduction_type (str): The type of reduction applied. If a loss_fn is passed,
-                this should match `loss_fn.reduction`. Else if gradients are being
-                computed on direct model outputs (scores), then 'sum' should be used.
+        reduction_type (str, optional): The type of reduction applied. If a loss_fn is
+                passed, this should match `loss_fn.reduction`. Else if gradients are
+                being computed on direct model outputs (scores), then 'sum' should be
+                used.
                 Defaults to 'sum'.
-        layer_modules (torch.nn.Module): A list of PyTorch modules w.r.t. which
-                jacobian gradients are computed.
+        layer_modules (torch.nn.Module, optional): A list of PyTorch modules w.r.t.
+                 which jacobian gradients are computed.
 
     Returns:
         grads (tuple[Tensor, ...]): Returns the Jacobian for the minibatch as a
diff --git a/captum/attr/_core/layer/layer_activation.py b/captum/attr/_core/layer/layer_activation.py
index f967f21790..7c53570ac2 100644
--- a/captum/attr/_core/layer/layer_activation.py
+++ b/captum/attr/_core/layer/layer_activation.py
@@ -88,7 +88,7 @@ def attribute(
 
         Returns:
             *Tensor* or *tuple[Tensor, ...]* or list of **attributions**:
-            - **attributions** (*Tensor* or *tuple[Tensor, ...]* or *list*):
+            - **attributions** (*Tensor*, *tuple[Tensor, ...]*, or *list*):
                         Activation of each neuron in given layer output.
                         Attributions will always be the same size as the
                         output of the given layer.
diff --git a/captum/attr/_core/layer/layer_gradient_x_activation.py b/captum/attr/_core/layer/layer_gradient_x_activation.py
index b188760639..0d6a2075cd 100644
--- a/captum/attr/_core/layer/layer_gradient_x_activation.py
+++ b/captum/attr/_core/layer/layer_gradient_x_activation.py
@@ -135,7 +135,7 @@ def attribute(
 
         Returns:
             *Tensor* or *tuple[Tensor, ...]* or list of **attributions**:
-            - **attributions** (*Tensor* or *tuple[Tensor, ...]* or *list*):
+            - **attributions** (*Tensor*, *tuple[Tensor, ...]*, or *list*):
                         Product of gradient and activation for each
                         neuron in given layer output.
                         Attributions will always be the same size as the
diff --git a/captum/attr/_core/occlusion.py b/captum/attr/_core/occlusion.py
index 6c54b1bf8a..6ca1355944 100644
--- a/captum/attr/_core/occlusion.py
+++ b/captum/attr/_core/occlusion.py
@@ -80,7 +80,7 @@ def attribute(  # type: ignore
                             this must be a tuple containing one tuple for each input
                             tensor defining the dimensions of the patch for that
                             input tensor, as described for the single tensor case.
-                strides (int or tuple or tuple[int] or tuple[tuple], optional):
+                strides (int, tuple, tuple[int], or tuple[tuple], optional):
                             This defines the step by which the occlusion hyperrectangle
                             should be shifted by in each direction for each iteration.
                             For a single tensor input, this can be either a single
diff --git a/captum/influence/_utils/common.py b/captum/influence/_utils/common.py
index f828390f49..b43e3aa553 100644
--- a/captum/influence/_utils/common.py
+++ b/captum/influence/_utils/common.py
@@ -86,7 +86,7 @@ def _jacobian_loss_wrt_inputs(
     batch).
 
     Args:
-        loss_fn (torch.nn.Module or Callable or None): The loss function. If a library
+        loss_fn (torch.nn.Module, Callable, or None): The loss function. If a library
                 defined loss function is provided, it would be expected to be a
                 torch.nn.Module. If a custom loss is provided, it can be either type,
                 but must behave as a library loss function would if `reduction='sum'`