diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py index 4d35a1498..33116776b 100644 --- a/tensor2tensor/layers/common_attention.py +++ b/tensor2tensor/layers/common_attention.py @@ -619,7 +619,7 @@ def add_timing_signal_nd(x, min_timescale=1.0, max_timescale=1.0e4): memory inputs to attention. The use of relative position is possible because sin(a+b) and cos(a+b) can be - experessed in terms of b, sin(a) and cos(a). + expressed in terms of b, sin(a) and cos(a). x is a Tensor with n "positional" dimensions, e.g. one dimension for a sequence or two dimensions for an image