executablebooks · chrisjsewell · Jun 1, 2023 · Jun 1, 2023 · Jun 1, 2023 · Jun 1, 2023
diff --git a/markdown_it/common/utils.py b/markdown_it/common/utils.py
@@ -4,12 +4,12 @@
 
 import html
 import re
-from typing import Any, Match, TypeVar
+from typing import Match, TypeVar
 
 from .entities import entities
 
 
-def charCodeAt(src: str, pos: int) -> Any:
+def charCodeAt(src: str, pos: int) -> int | None:
     """
     Returns the Unicode value of the character at the specified location.
 
@@ -24,6 +24,21 @@ def charCodeAt(src: str, pos: int) -> Any:
         return None
 
 
+def charStrAt(src: str, pos: int) -> str | None:
+    """
+    Returns the Unicode value of the character at the specified location.
+
+    @param - index The zero-based index of the desired character.
+    If there is no character at the specified index, NaN is returned.
+
+    This was added for compatibility with python
+    """
+    try:
+        return src[pos]
+    except IndexError:
+        return None
+
+
 _ItemTV = TypeVar("_ItemTV")
 
 
@@ -96,7 +111,7 @@ def replaceEntityPattern(match: str, name: str) -> str:
     if name in entities:
         return entities[name]
 
-    if ord(name[0]) == 0x23 and DIGITAL_ENTITY_TEST_RE.search(name):
+    if name[0] == "#" and DIGITAL_ENTITY_TEST_RE.search(name):
         code = int(name[2:], 16) if name[1].lower() == "x" else int(name[1:], 10)
         if isValidEntityCode(code):
             return fromCodePoint(code)
@@ -178,8 +193,14 @@ def escapeRE(string: str) -> str:
 # //////////////////////////////////////////////////////////////////////////////
 
 
-def isSpace(code: object) -> bool:
-    return code in {0x09, 0x20}
+def isSpace(code: int | None) -> bool:
+    """Check if character code is a whitespace."""
+    return code in (0x09, 0x20)
+
+
+def isStrSpace(ch: str | None) -> bool:
+    """Check if character is a whitespace."""
+    return ch in ("\t", " ")
 
 
 MD_WHITESPACE = {
@@ -188,7 +209,7 @@ def isSpace(code: object) -> bool:
     0x0B,  # \v
     0x0C,  # \f
     0x0D,  # \r
-    0x20,
+    0x20,  # space
     0xA0,
     0x1680,
     0x202F,
@@ -213,6 +234,7 @@ def isWhiteSpace(code: int) -> bool:
 
 # Currently without astral characters support.
 def isPunctChar(ch: str) -> bool:
+    """Check if character is a punctuation character."""
     return UNICODE_PUNCT_RE.search(ch) is not None
 
 

diff --git a/markdown_it/helpers/parse_link_destination.py b/markdown_it/helpers/parse_link_destination.py
@@ -49,7 +49,7 @@ def parseLinkDestination(string: str, pos: int, maximum: int) -> _Result:
     while pos < maximum:
         code = charCodeAt(string, pos)
 
-        if code == 0x20:
+        if code is None or code == 0x20:
             break
 
         # ascii control characters

diff --git a/markdown_it/helpers/parse_link_label.py b/markdown_it/helpers/parse_link_label.py
@@ -17,16 +17,16 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False)
     level = 1
 
     while state.pos < state.posMax:
-        marker = state.srcCharCode[state.pos]
-        if marker == 0x5D:  # /* ] */)
+        marker = state.src[state.pos]
+        if marker == "]":
             level -= 1
             if level == 0:
                 found = True
                 break
 
         prevPos = state.pos
         state.md.inline.skipToken(state)
-        if marker == 0x5B:  # /* [ */)
+        if marker == "[":
             if prevPos == state.pos - 1:
                 # increase level if we find text `[`,
                 # which is not a part of any token

diff --git a/markdown_it/main.py b/markdown_it/main.py
@@ -46,7 +46,7 @@ def __init__(
         """
         # add modules
         self.utils = utils
-        self.helpers: Any = helpers
+        self.helpers = helpers
 
         # initialise classes
         self.inline = ParserInline()

diff --git a/markdown_it/parser_block.py b/markdown_it/parser_block.py
@@ -97,16 +97,11 @@ def tokenize(
                 state.line = line
 
     def parse(
-        self,
-        src: str,
-        md: MarkdownIt,
-        env: EnvType,
-        outTokens: list[Token],
-        ords: tuple[int, ...] | None = None,
+        self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
     ) -> list[Token] | None:
         """Process input string and push block tokens into `outTokens`."""
         if not src:
             return None
-        state = StateBlock(src, md, env, outTokens, ords)
+        state = StateBlock(src, md, env, outTokens)
         self.tokenize(state, state.line, state.lineMax)
         return state.tokens
diff --git a/markdown_it/port.yaml b/markdown_it/port.yaml
@@ -23,8 +23,7 @@
       to manipulate `Token.attrs`, which have an identical signature to those upstream.
     - Use python version of `charCodeAt`
     - |
-      Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state
-      objects and sharing those whenever possible
+      Use `str` units instead of `int`s to represent Unicode codepoints.
       This provides a significant performance boost
     - |
       In markdown_it/rules_block/reference.py,

diff --git a/markdown_it/ruler.py b/markdown_it/ruler.py
@@ -20,6 +20,7 @@ class Ruler
 from collections.abc import Callable, Iterable
 from dataclasses import dataclass, field
 from typing import TYPE_CHECKING, TypedDict
+import warnings
 
 from markdown_it._compat import DATACLASS_KWARGS
 
@@ -30,8 +31,6 @@ class Ruler
 
 
 class StateBase:
-    srcCharCode: tuple[int, ...]  # noqa: N815
-
     def __init__(self, src: str, md: MarkdownIt, env: EnvType):
         self.src = src
         self.env = env
@@ -44,7 +43,18 @@ def src(self) -> str:
     @src.setter
     def src(self, value: str) -> None:
         self._src = value
-        self.srcCharCode = tuple(ord(c) for c in self.src)
+        self._srcCharCode: tuple[int, ...] | None = None
+
+    @property
+    def srcCharCode(self) -> tuple[int, ...]:
+        warnings.warn(
+            "StateBase.srcCharCode is deprecated. Use StateBase.src instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        if self._srcCharCode is None:
+            self._srcCharCode = tuple(ord(c) for c in self._src)
+        return self._srcCharCode
 
 
 # The first positional arg is always a subtype of `StateBase`. Other

diff --git a/markdown_it/rules_block/blockquote.py b/markdown_it/rules_block/blockquote.py
@@ -3,7 +3,7 @@
 
 import logging
 
-from ..common.utils import isSpace
+from ..common.utils import isStrSpace
 from .state_block import StateBlock
 
 LOGGER = logging.getLogger(__name__)
@@ -23,7 +23,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
 
     # check the block quote marker
     try:
-        if state.srcCharCode[pos] != 0x3E:  # /* > */
+        if state.src[pos] != ">":
             return False
     except IndexError:
         return False
@@ -38,20 +38,20 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
     initial = offset = state.sCount[startLine] + 1
 
     try:
-        second_char_code: int | None = state.srcCharCode[pos]
+        second_char: str | None = state.src[pos]
     except IndexError:
-        second_char_code = None
+        second_char = None
 
     # skip one optional space after '>'
-    if second_char_code == 0x20:  # /* space */
+    if second_char == " ":
         # ' >   test '
         #     ^ -- position start of line here:
         pos += 1
         initial += 1
         offset += 1
         adjustTab = False
         spaceAfterMarker = True
-    elif second_char_code == 0x09:  # /* tab */
+    elif second_char == "\t":
         spaceAfterMarker = True
 
         if (state.bsCount[startLine] + offset) % 4 == 3:
@@ -74,10 +74,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
     state.bMarks[startLine] = pos
 
     while pos < max:
-        ch = state.srcCharCode[pos]
+        ch = state.src[pos]
 
-        if isSpace(ch):
-            if ch == 0x09:  # / tab /
+        if isStrSpace(ch):
+            if ch == "\t":
                 offset += (
                     4
                     - (offset + state.bsCount[startLine] + (1 if adjustTab else 0)) % 4
@@ -147,7 +147,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
             # Case 1: line is not inside the blockquote, and this line is empty.
             break
 
-        evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented  # /* > */
+        evaluatesTrue = state.src[pos] == ">" and not isOutdented
         pos += 1
         if evaluatesTrue:
             # This line is inside the blockquote.
@@ -156,20 +156,20 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
             initial = offset = state.sCount[nextLine] + 1
 
             try:
-                next_char: int | None = state.srcCharCode[pos]
+                next_char: str | None = state.src[pos]
             except IndexError:
                 next_char = None
 
             # skip one optional space after '>'
-            if next_char == 0x20:  # /* space */
+            if next_char == " ":
                 # ' >   test '
                 #     ^ -- position start of line here:
                 pos += 1
                 initial += 1
                 offset += 1
                 adjustTab = False
                 spaceAfterMarker = True
-            elif next_char == 0x09:  # /* tab */
+            elif next_char == "\t":
                 spaceAfterMarker = True
 
                 if (state.bsCount[nextLine] + offset) % 4 == 3:
@@ -192,10 +192,10 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
             state.bMarks[nextLine] = pos
 
             while pos < max:
-                ch = state.srcCharCode[pos]
+                ch = state.src[pos]
 
-                if isSpace(ch):
-                    if ch == 0x09:
+                if isStrSpace(ch):
+                    if ch == "\t":
                         offset += (
                             4
                             - (

diff --git a/markdown_it/rules_block/fence.py b/markdown_it/rules_block/fence.py
@@ -19,15 +19,14 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool
     if pos + 3 > maximum:
         return False
 
-    marker = state.srcCharCode[pos]
+    marker = state.src[pos]
 
-    # /* ~ */  /* ` */
-    if marker != 0x7E and marker != 0x60:
+    if marker not in ("~", "`"):
         return False
 
     # scan marker length
     mem = pos
-    pos = state.skipChars(pos, marker)
+    pos = state.skipCharsStr(pos, marker)
 
     length = pos - mem
 
@@ -37,8 +36,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool
     markup = state.src[mem:pos]
     params = state.src[pos:maximum]
 
-    # /* ` */
-    if marker == 0x60 and chr(marker) in params:
+    if marker == "`" and marker in params:
         return False
 
     # Since start is found, we can report success here in validation mode
@@ -65,15 +63,15 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bool
             break
 
         try:
-            if state.srcCharCode[pos] != marker:
+            if state.src[pos] != marker:
                 continue
         except IndexError:
             break
 
         if state.is_code_block(nextLine):
             continue
 
-        pos = state.skipChars(pos, marker)
+        pos = state.skipCharsStr(pos, marker)
 
         # closing code fence must be at least as long as the opening one
         if pos - mem < length:

diff --git a/markdown_it/rules_block/heading.py b/markdown_it/rules_block/heading.py
@@ -3,7 +3,7 @@
 
 import logging
 
-from ..common.utils import isSpace
+from ..common.utils import isStrSpace
 from .state_block import StateBlock
 
 LOGGER = logging.getLogger(__name__)
@@ -18,29 +18,27 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo
     if state.is_code_block(startLine):
         return False
 
-    ch: int | None = state.srcCharCode[pos]
+    ch: str | None = state.src[pos]
 
-    # /* # */
-    if ch != 0x23 or pos >= maximum:
+    if ch != "#" or pos >= maximum:
         return False
 
     # count heading level
     level = 1
     pos += 1
     try:
-        ch = state.srcCharCode[pos]
+        ch = state.src[pos]
     except IndexError:
         ch = None
-    # /* # */
-    while ch == 0x23 and pos < maximum and level <= 6:
+    while ch == "#" and pos < maximum and level <= 6:
         level += 1
         pos += 1
         try:
-            ch = state.srcCharCode[pos]
+            ch = state.src[pos]
         except IndexError:
             ch = None
 
-    if level > 6 or (pos < maximum and not isSpace(ch)):
+    if level > 6 or (pos < maximum and not isStrSpace(ch)):
         return False
 
     if silent:
@@ -49,8 +47,8 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool) -> bo
     # Let's cut tails like '    ###  ' from the end of string
 
     maximum = state.skipSpacesBack(maximum, pos)
-    tmp = state.skipCharsBack(maximum, 0x23, pos)  # #
-    if tmp > pos and isSpace(state.srcCharCode[tmp - 1]):
+    tmp = state.skipCharsStrBack(maximum, "#", pos)
+    if tmp > pos and isStrSpace(state.src[tmp - 1]):
         maximum = tmp
 
     state.line = startLine + 1