diff --git a/tensor2tensor/layers/common_attention.py b/tensor2tensor/layers/common_attention.py
index 4d35a1498..33116776b 100644
--- a/tensor2tensor/layers/common_attention.py
+++ b/tensor2tensor/layers/common_attention.py
@@ -619,7 +619,7 @@ def add_timing_signal_nd(x, min_timescale=1.0, max_timescale=1.0e4):
   memory inputs to attention.
 
   The use of relative position is possible because sin(a+b) and cos(a+b) can be
-  experessed in terms of b, sin(a) and cos(a).
+  expressed in terms of b, sin(a) and cos(a).
 
   x is a Tensor with n "positional" dimensions, e.g. one dimension for a
   sequence or two dimensions for an image