diff --git a/python/flashinfer/page.py b/python/flashinfer/page.py
index 75f2640e..7abfb0ba 100644
--- a/python/flashinfer/page.py
+++ b/python/flashinfer/page.py
@@ -258,7 +258,7 @@ def append_paged_kv_cache(
     >>> kv_append_length = torch.tensor([45, 8, 25, 22], dtype=torch.int32, device="cuda:0")
     >>> kv_append_indptr = torch.cat(
     ...     [torch.zeros(1).int().to(0), torch.cumsum(kv_append_length, dim=0)]
-    ... ).int()
+    ... ).int()  # [0, 45, 53, 78, 100]
     >>> max_num_pages = 1000
     >>> page_size = 16
     >>> paged_kv_cache = torch.randn(max_num_pages, 2, page_size, num_kv_heads, head_dim).half().to(0)
@@ -303,9 +303,6 @@ def append_paged_kv_cache(
 
     Note
     ----
-    Please refer to the :ref:`tutorial <recursive-attention>` for a detailed
-    explanation of the log-sum-exp function and attention states.
-
     The function assumes that the space for appended k/v have already been allocated,
     which means :attr:`kv_indices`, :attr:`kv_indptr`, :attr:`kv_last_page_len` has
     incorporated appended k/v.