diff --git a/deepspeed/module_inject/layers.py b/deepspeed/module_inject/layers.py index 722ba413a671..2f884ba4fb09 100644 --- a/deepspeed/module_inject/layers.py +++ b/deepspeed/module_inject/layers.py @@ -91,6 +91,13 @@ def forward(self, input): output += self.bias return output + def extra_repr(self): + out_features, in_features = self.weight.shape if self.weight is not None else (None, None) + dtype = self.weight.dtype if self.weight is not None else None + extra_repr_str = "in_features={}, out_features={}, bias={}, dtype={}".format( + in_features, out_features, self.bias is not None, dtype) + return extra_repr_str + class LmHeadLinearAllreduce(nn.Module): @@ -120,6 +127,13 @@ def forward(self, input): output += self.bias return output + def extra_repr(self): + out_features, in_features = self.weight.shape if self.weight is not None else (None, None) + dtype = self.weight.dtype if self.weight is not None else None + extra_repr_str = "in_features={}, out_features={}, bias={}, dtype={}".format( + in_features, out_features, self.bias is not None, dtype) + return extra_repr_str + class LinearLayer(nn.Module): @@ -144,6 +158,13 @@ def forward(self, input): output += self.bias return output + def extra_repr(self): + out_features, in_features = self.weight.shape + dtype = self.weight.dtype + extra_repr_str = "in_features={}, out_features={}, bias={}, dtype={}".format( + in_features, out_features, self.bias is not None, dtype) + return extra_repr_str + class Normalize(nn.Module):