Use caches (#50)

* Use 3.11 everywhere (#38) * v0.6.0 (#42) (#45) * Bump version and add note to README Prepare for release tomorrow and move forward for v0.7.0 and v0.8.0 release * Steal pygments regexes (#34) * test.py * Move to server functions and set up proper tests * Reformat * Reformat * Change to beartype typing * Even more formatting * Remove regex stealer test Runs different locally than on gh runner and not worth the time or effort. * Get docstring areas * Make function work * Add type annotation * format * Add lots of comments but don't remove private type @leycec I tried using the idea you gave for private types but got Union type errors for some reason I couldn't really understand. Thank you so much for your time and thoughts and congrats on the bike ride, thats a long stretch! * Fix a small bug * Improve highlighting functions significantly Now it ignores whitespace at the front of the line! * Stop using private variable * Format for black and ruff * Move docstring tokens up * Update tests * Fix line number for docstring tokens Was 1 behind * Reformat * Bump version * Implement token overwriting (#49) * Display the problem * Get working test * Better overlap checking * Better tests * Sort and remove duplicates * Remove old vestige and format * Move token merging to highlight file * Format * Use overwrite_and_merge_tokens * Cache important functions * Remove old file * Format
salve-org · Jul 7, 2024 · c6c6299 · c6c6299
1 parent a17f289
commit c6c6299
Show file tree

Hide file tree

Showing 7 changed files with 166 additions and 20 deletions.
diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
@@ -13,7 +13,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v2
         with:
-          python-version: 3.9
+          python-version: 3.11.1
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-<h1 align="center">Salve v0.6.0</h1>
+<h1 align="center">Salve v0.7.0</h1>
 
 # Installation
 

diff --git a/salve_ipc/server_functions/find_words.py b/salve_ipc/server_functions/find_words.py
diff --git a/salve_ipc/server_functions/highlight.py b/salve_ipc/server_functions/highlight.py
@@ -1,3 +1,4 @@
+from functools import cache
 from re import MULTILINE, Match, Pattern, compile
 
 from beartype.typing import Callable
@@ -25,6 +26,149 @@
 ]
 
 
+def merge_tokens(tokens: list[Token]) -> list[Token]:
+    output_tokens: list[Token] = []
+    depth: int = 0
+    for token in tokens:
+        # Deal with basic edge case
+        if depth == 0:
+            output_tokens.append(token)
+            depth += 1
+            continue
+
+        previous_token = output_tokens[-1]
+
+        # Get our boolean checks
+        same_token_type: bool = previous_token[2] == token[2]
+        same_line: bool = previous_token[0][0] == token[0][0]
+        neighboring_tokens: bool = (
+            previous_token[0][1] + previous_token[1] == token[0][1]
+        )
+
+        # Determine if tokens should be merged
+        if not (same_token_type and same_line and neighboring_tokens):
+            output_tokens.append(token)
+            depth += 1
+            continue
+
+        # Replace previous token with new token (we don't increase depth because we are substituting, not adding)
+        new_token: Token = (
+            (token[0][0], previous_token[0][1]),
+            previous_token[1] + token[1],
+            token[2],
+        )
+        output_tokens[-1] = new_token
+    return output_tokens
+
+
+def overwrite_tokens(old_tokens: list[Token], new_tokens: list[Token]):
+    output_tokens: list[Token] = []
+    dont_add_tokens: list[Token] = []
+    for new_token in new_tokens:
+        for old_token in old_tokens:
+            same_token: bool = old_token == new_token
+            if same_token:
+                continue
+
+            same_line: bool = old_token[0][0] == new_token[0][0]
+            can_add_token: bool = old_token not in dont_add_tokens
+            if not same_line:
+                if can_add_token:
+                    output_tokens.append(old_token)
+                continue
+
+            # Check if the ranges overlap and if so either (remove the old_token and add to don't add list) or,
+            # if part of the token is out of the new_token_range, remove the part in the new tokens range
+
+            old_token_end: int = old_token[0][1] + old_token[1]
+            new_token_end: int = new_token[0][1] + new_token[1]
+
+            partial_front_overlap: bool = (
+                new_token[0][1] <= old_token_end
+                and not old_token_end > new_token_end
+            )
+            partial_end_overlap: bool = new_token_end >= old_token[0][1]
+            fully_contained: bool = (
+                old_token_end <= new_token_end
+                and old_token[0][1] >= new_token[0][1]
+            )
+
+            if not (
+                partial_front_overlap or partial_end_overlap or fully_contained
+            ):
+                continue
+
+            dont_add_tokens.append(old_token)
+
+            while old_token in output_tokens:
+                output_tokens.remove(old_token)
+
+            if fully_contained:
+                continue
+
+            # If we are here if means its a partial overlap
+            if partial_front_overlap:
+                created_token: Token = (
+                    (new_token[0][0], old_token[0][1]),
+                    new_token[0][1] - old_token[0][1],
+                    old_token[2],
+                )
+                while created_token in output_tokens:
+                    output_tokens.remove(created_token)
+                output_tokens.append(created_token)
+                dont_add_tokens.append(created_token)
+                continue
+
+            if old_token[0][1] < new_token[0][1]:
+                created_token_1: Token = (
+                    (new_token[0][0], old_token[0][1]),
+                    new_token[0][1] - old_token[0][1],
+                    old_token[2],
+                )
+                created_token_2: Token = (
+                    (new_token[0][0], new_token_end),
+                    old_token_end - new_token_end,
+                    old_token[2],
+                )
+                while created_token_1 in output_tokens:
+                    output_tokens.remove(created_token_1)
+                output_tokens.append(created_token_1)
+                while created_token_2 in output_tokens:
+                    output_tokens.remove(created_token_2)
+                output_tokens.append(created_token_2)
+                dont_add_tokens.append(created_token_1)
+                dont_add_tokens.append(created_token_2)
+
+            created_token: Token = (
+                (new_token[0][0], new_token_end),
+                old_token_end - new_token_end,
+                old_token[2],
+            )
+            while created_token in output_tokens:
+                output_tokens.remove(created_token)
+            output_tokens.append(created_token)
+            dont_add_tokens.append(created_token)
+
+        output_tokens.append(new_token)
+
+    output_tokens = sorted(set(output_tokens))
+    return output_tokens
+
+
+def overwrite_and_merge_tokens(
+    old_tokens: list[Token], new_tokens: list[Token]
+) -> list[Token]:
+    merged_old_tokens: list[Token] = merge_tokens(sorted(set(old_tokens)))
+    merged_new_tokens: list[Token] = merge_tokens(sorted(set(new_tokens)))
+    output_tokens: list[Token] = overwrite_tokens(
+        merged_old_tokens, merged_new_tokens
+    )
+
+    output_tokens = sorted(set(merge_tokens(output_tokens)))
+    return output_tokens
+
+
+@cache
 def get_new_token_type(old_token: str) -> str:
     """Turns pygments token types into a generic predefined Token"""
     new_type: str = generic_tokens[0]
@@ -158,6 +302,7 @@ def find_hidden_chars(lines: list[str], start_line: int = 1) -> list[Token]:
 _LexReturnTokens = list[tuple[_TokenType, str]]
 
 
+@cache
 def get_pygments_comment_regexes(lexer: RegexLexer) -> _TokenTupleReturnType:
     """
     Steals the regexes that pgments uses to give docstring, heredoc, comment, and multiline comment highlights
@@ -306,6 +451,11 @@ def proper_docstring_tokens(lexer: RegexLexer, full_text: str) -> list[Token]:
     return new_docstring_tokens
 
 
+@cache
+def lexer_by_name_cached(language: str) -> Lexer:
+    return get_lexer_by_name(language)
+
+
 def get_highlights(
     full_text: str,
     language: str = "text",
@@ -314,7 +464,7 @@ def get_highlights(
     """Gets pygments tokens from text provided in language proved and converts them to Token's"""
 
     # Create some variables used all throughout the function
-    lexer: Lexer = get_lexer_by_name(language)
+    lexer: Lexer = lexer_by_name_cached(language)
     split_text: _ListOfStrs = full_text.splitlines()
     new_tokens: list[Token] = []
 
@@ -354,11 +504,14 @@ def get_highlights(
     # override older tokens that may not be as accurate
 
     if isinstance(lexer, RegexLexer):
-        new_tokens += proper_docstring_tokens(lexer, full_text)
+        new_tokens = overwrite_and_merge_tokens(
+            new_tokens, proper_docstring_tokens(lexer, full_text)
+        )
 
     new_tokens += get_urls(split_text, text_range[0])
     if [char for char in hidden_chars if char in full_text]:
         # if there are not hidden chars we don't want to needlessly compute this
         new_tokens += find_hidden_chars(split_text, text_range[0])
 
+    new_tokens = merge_tokens(new_tokens)
     return new_tokens
diff --git a/salve_ipc/server_functions/misc.py b/salve_ipc/server_functions/misc.py
@@ -1,3 +1,4 @@
+from functools import cache
 from unicodedata import category
 
 Token = tuple[tuple[int, int], int, str]
@@ -21,6 +22,7 @@
 ]
 
 
+@cache
 def is_unicode_letter(char: str) -> bool:
     """Returns a boolean value of whether a given unicode char is a letter or not (includes "_" for code completion reasons)"""
     return char == "_" or category(char).startswith("L")

diff --git a/setup.py b/setup.py
@@ -7,15 +7,15 @@
 
 setup(
     name="salve_ipc",
-    version="0.6.0",
+    version="0.7.0",
     description="Salve is an IPC library that can be used by code editors to easily get autocompletions, replacements, editorconfig suggestions, definitions, and syntax highlighting.",
     author="Moosems",
     author_email="moosems.j@gmail.com",
     url="https://github.com/Moosems/salve",
     long_description=long_description,
     long_description_content_type="text/markdown",
     install_requires=["pygments", "pyeditorconfig", "beartype"],
-    python_requires=">=3.9",
+    python_requires=">=3.11",
     license="MIT license",
     classifiers=[
         "Development Status :: 3 - Alpha",

diff --git a/tests/test_ipc.py b/tests/test_ipc.py
@@ -91,34 +91,26 @@ def test_IPC():
             ((3, 11), 7, "Comment"),
             ((5, 0), 5, "Name"),
             ((5, 5), 1, "Punctuation"),
-            ((5, 6), 1, "String"),
-            ((5, 7), 1, "String"),
-            ((5, 8), 1, "String"),
+            ((5, 6), 3, "String"),
             ((5, 9), 1, "Punctuation"),
             ((5, 12), 16, "Comment"),
             ((8, 0), 5, "Keyword"),
             ((8, 6), 3, "Name"),
             ((8, 9), 1, "Punctuation"),
             ((8, 10), 3, "Name"),
-            ((8, 13), 1, "Punctuation"),
-            ((8, 14), 1, "Punctuation"),
+            ((8, 13), 2, "Punctuation"),
             ((9, 4), 3, "String"),
-            ((10, 4), 4, "Name"),
+            ((10, 4), 4, "String"),
             ((11, 4), 3, "String"),
             ((13, 4), 3, "Keyword"),
             ((13, 8), 8, "Name"),
             ((13, 16), 1, "Punctuation"),
             ((13, 17), 4, "Name"),
-            ((13, 21), 1, "Punctuation"),
-            ((13, 22), 1, "Punctuation"),
+            ((13, 21), 2, "Punctuation"),
             ((14, 8), 4, "Keyword"),
             ((17, 0), 3, "Name"),
-            ((17, 3), 1, "Punctuation"),
-            ((17, 4), 1, "Punctuation"),
+            ((17, 3), 2, "Punctuation"),
             ((18, 0), 24, "Comment"),
-            ((9, 4), 3, "String"),
-            ((10, 4), 4, "String"),
-            ((11, 4), 3, "String"),
             ((18, 2), 22, "Link"),
             ((5, 7), 1, "Hidden_Char"),
         ],