salve-org · Moosems · Jul 7, 2024 · Jun 30, 2024 · Jun 30, 2024 · Jun 30, 2024
diff --git a/salve_ipc/server_functions/highlight.py b/salve_ipc/server_functions/highlight.py
@@ -1,9 +1,11 @@
-from re import Match, Pattern, compile
+from re import MULTILINE, Match, Pattern, compile
 
+from beartype.typing import Callable
 from pygments import lex
-from pygments.lexer import Lexer
+from pygments.lexer import Lexer, RegexLexer, default
 from pygments.lexers import get_lexer_by_name
-from pygments.token import _TokenType
+from pygments.token import Comment as CommentToken
+from pygments.token import String as StringToken
 
 from .misc import Token, generic_tokens
 
@@ -140,42 +142,223 @@ def find_hidden_chars(lines: list[str], start_line: int = 1) -> list[Token]:
     return tok_list
 
 
+# Instantiate some useful variables/types for the following functions
+useful_toks = {
+    StringToken.Doc,
+    StringToken.Heredoc,
+    CommentToken,
+    CommentToken.Multiline,
+}
+
+# Beartype speed optimizations
+_TokenType = type(StringToken)  # Resolves to pygments.token._TokenType
+_TokenTupleInternalType = tuple[_TokenType | Callable, ...]
+_TokenTupleReturnType = list[tuple[str, _TokenType]]
+_ListOfStrs = list[str]
+_LexReturnTokens = list[tuple[_TokenType, str]]
+
+
+def get_pygments_comment_regexes(lexer: RegexLexer) -> _TokenTupleReturnType:
+    """
+    Steals the regexes that pgments uses to give docstring, heredoc, comment, and multiline comment highlights
+    (css comments, though multine, aren't called multiline comments)
+    """
+
+    regexes: _TokenTupleReturnType = []
+
+    for path in lexer.tokens:
+        # This should have a better type definition but I didn't have the mental capacity to
+        # write each possibility so I'm waiting for beartype to implement the functionality for me like the bum I am
+        path_tokens: list = lexer.tokens[path]
+
+        if isinstance(path_tokens[0], str):
+            # This means that the path is redirecting to another path in its place but we check them all anyway so just exit this path
+            continue
+
+        for token_tuple in path_tokens:
+            # Ensure that this is actually a tuple and not a random type
+            if isinstance(token_tuple, default):
+                continue
+
+            if token_tuple[1] in useful_toks:
+                regexes.append((token_tuple[0], token_tuple[1]))
+                continue
+
+            # The Token tuple SHOULD be a callable at this point
+            if not callable(token_tuple[1]):
+                continue
+
+            pygments_func: Callable = token_tuple[1]
+
+            if pygments_func.__closure__ is None:
+                # Will always evaluate to False but its for the static type checkers appeasement
+                continue
+
+            tokens: _TokenTupleInternalType = [
+                cell.cell_contents for cell in token_tuple[1].__closure__
+            ][
+                0
+            ]  # Sometimes pygments hides these types in functional programming
+
+            for token in tokens:
+                if token in useful_toks:
+                    # We know if its in the useful tokens list that its a token type but the static type checker doesn't
+                    regexes.append((token_tuple[0], token))  # type: ignore
+                    continue
+
+    return list(set(regexes))  # type: ignore
+
+
+def proper_docstring_tokens(lexer: RegexLexer, full_text: str) -> list[Token]:
+    proper_highlight_regexes: _TokenTupleReturnType = (
+        get_pygments_comment_regexes(lexer)
+    )
+
+    new_docstring_tokens: list[Token] = []
+    split_text: _ListOfStrs = full_text.splitlines()
+
+    for regex, token_type in proper_highlight_regexes:
+        current_text = full_text
+        match: Match[str] | None = compile(regex, flags=MULTILINE).search(
+            full_text
+        )
+
+        if match is None:
+            # Onwards to the next regex!
+            continue
+
+        start_pos: tuple[int, int] = (1, 0)
+        simple_token_type: str = get_new_token_type(str(token_type))
+
+        while match:
+            span: tuple[int, int] = match.span()
+            matched_str: str = current_text[span[0] : span[1]]
+
+            # Remove any whitespace previous to the match and update span accordingly
+            matched_len_initial: int = len(matched_str)
+            matched_str = matched_str.lstrip()
+            matched_len_lstripped: int = len(matched_str)
+            span = (
+                (span[0] + matched_len_initial - matched_len_lstripped),
+                span[1],
+            )
+
+            # Other useful variables without relation
+            newline_count: int = matched_str.count("\n")
+            previous_text: str = current_text[: span[0]]
+
+            start_line: int = previous_text.count("\n") + start_pos[0]
+
+            # Deal with the easy case first
+            if not newline_count:
+                # Prepare token variables
+                start_col: int = split_text[start_line].find(matched_str)
+                current_text: str = full_text[span[0] + span[1] - span[0] :]
+
+                # Create and add token
+                token: Token = (
+                    (start_line, start_col),
+                    matched_len_lstripped,
+                    simple_token_type,
+                )
+                new_docstring_tokens.append(token)
+
+                start_pos = (start_line, start_col + matched_len_lstripped)
+                current_text = current_text[: span[1]]
+
+                # Continue onward!
+                match = compile(regex, flags=MULTILINE).search(current_text)
+                continue
+
+            # Now for multiple line matches
+            split_match: list[str] = matched_str.splitlines()
+            for i in range(newline_count + 1):
+                match_str: str = split_match[i]
+                initial_len: int = len(match_str)
+                start_col: int = initial_len - len(match_str.lstrip())
+
+                if i == 0:
+                    line: str = split_text[start_line - 1]
+
+                    true_len: int = len(line)
+                    lstripped_len: int = len(line.lstrip())
+                    initial_len = lstripped_len
+                    if lstripped_len != true_len:
+                        # In case the regex doesn't skip whitespace/junk
+                        initial_len = true_len
+
+                    start_col = line.find(match_str)
+
+                # Create and add token
+                token: Token = (
+                    (start_line + i, start_col),
+                    initial_len - start_col,
+                    simple_token_type,
+                )
+                new_docstring_tokens.append(token)
+
+                start_pos = (start_line + i, start_col + len(match_str))
+
+            # Continue onward!
+            current_text = current_text[span[1] :]
+            match = compile(regex, flags=MULTILINE).search(current_text)
+
+    return new_docstring_tokens
+
+
 def get_highlights(
     full_text: str,
     language: str = "text",
     text_range: tuple[int, int] = (1, -1),
 ) -> list[Token]:
     """Gets pygments tokens from text provided in language proved and converts them to Token's"""
+
+    # Create some variables used all throughout the function
     lexer: Lexer = get_lexer_by_name(language)
-    split_text: list[str] = full_text.splitlines()
+    split_text: _ListOfStrs = full_text.splitlines()
     new_tokens: list[Token] = []
+
     if text_range[1] == -1:
+        # This indicates that the text range should span the length of the entire code
         text_range = (text_range[0], len(split_text))
+
     start_index: tuple[int, int] = (text_range[0], 0)
-    split_text = split_text[text_range[0] - 1 : text_range[1]]
+    # We want only the lines in the text range because this list is iterated
+    split_text: _ListOfStrs = split_text[text_range[0] - 1 : text_range[1]]
 
     for line in split_text:
-        og_tokens: list[tuple[_TokenType, str]] = list(lex(line, lexer))
+        og_tokens: _LexReturnTokens = list(lex(line, lexer))
         for token in og_tokens:
             new_type: str = get_new_token_type(str(token[0]))
             token_str: str = token[1]
             token_len: int = len(token_str)
 
-            if token_str == "\n":  # Lexer adds the newline back
+            if token_str == "\n":
+                # Lexer adds the newline back as its own token
                 continue
+
             if not token_str.strip() and new_type == "Text":
+                # If the token is empty or is plain Text we simply skip it because thats ultimately useless info
                 start_index = (start_index[0], start_index[1] + token_len)
                 continue
 
+            # Create and append the Token that will be returned
             new_token = (start_index, token_len, new_type)
             new_tokens.append(new_token)
 
             start_index = (start_index[0], start_index[1] + token_len)
         start_index = (start_index[0] + 1, 0)
 
     # Add extra token types
+    # NOTE: we add these at the end so that when they are applied one by one by the editor these
+    # override older tokens that may not be as accurate
+
+    if isinstance(lexer, RegexLexer):
+        new_tokens += proper_docstring_tokens(lexer, full_text)
+
     new_tokens += get_urls(split_text, text_range[0])
     if [char for char in hidden_chars if char in full_text]:
+        # if there are not hidden chars we don't want to needlessly compute this
         new_tokens += find_hidden_chars(split_text, text_range[0])
 
     return new_tokens
diff --git a/setup.py b/setup.py
@@ -1,4 +1,4 @@
-# pip install -r requirements.txt --break-system-packages; pip uninstall salve_ipc -y --break-system-packages; pip install . --break-system-packages --no-build-isolation; pytest .
+# pip install -r requirements.txt --break-system-packages; pip uninstall salve_ipc -y --break-system-packages; pip install . --break-system-packages --no-build-isolation; python3 -m pytest .
 from setuptools import setup
 
 with open("README.md", "r") as file:

diff --git a/tests/test_ipc.py b/tests/test_ipc.py
@@ -55,7 +55,7 @@ def test_IPC():
         "type": "response",
         "cancelled": False,
         "command": AUTOCOMPLETE,
-        "result": ["this"],
+        "result": ["test", "this"],
     }
 
     replacements_output: Response | None = context.get_response(REPLACEMENTS)
@@ -102,52 +102,28 @@ def test_IPC():
             ((8, 10), 3, "Name"),
             ((8, 13), 1, "Punctuation"),
             ((8, 14), 1, "Punctuation"),
-            ((9, 4), 3, "Keyword"),
-            ((9, 8), 8, "Name"),
-            ((9, 16), 1, "Punctuation"),
-            ((9, 17), 4, "Name"),
-            ((9, 21), 1, "Punctuation"),
-            ((9, 22), 1, "Punctuation"),
-            ((10, 8), 4, "Keyword"),
-            ((13, 0), 3, "Name"),
-            ((13, 3), 1, "Punctuation"),
-            ((13, 4), 1, "Punctuation"),
-            ((14, 0), 24, "Comment"),
-            ((14, 2), 22, "Link"),
+            ((9, 4), 3, "String"),
+            ((10, 4), 4, "Name"),
+            ((11, 4), 3, "String"),
+            ((13, 4), 3, "Keyword"),
+            ((13, 8), 8, "Name"),
+            ((13, 16), 1, "Punctuation"),
+            ((13, 17), 4, "Name"),
+            ((13, 21), 1, "Punctuation"),
+            ((13, 22), 1, "Punctuation"),
+            ((14, 8), 4, "Keyword"),
+            ((17, 0), 3, "Name"),
+            ((17, 3), 1, "Punctuation"),
+            ((17, 4), 1, "Punctuation"),
+            ((18, 0), 24, "Comment"),
+            ((9, 4), 3, "String"),
+            ((10, 4), 4, "String"),
+            ((11, 4), 3, "String"),
+            ((18, 2), 22, "Link"),
             ((5, 7), 1, "Hidden_Char"),
         ],
     }
 
-    editorconfig_response: Response | None = context.get_response(EDITORCONFIG)
-    if editorconfig_response is None:
-        raise AssertionError("Editorconfig output is None")
-    editorconfig_response["id"] = 0
-    assert editorconfig_response == {
-        "id": 0,
-        "type": "response",
-        "cancelled": False,
-        "command": EDITORCONFIG,
-        "result": {
-            "end_of_line": "lf",
-            "insert_final_newline": "true",
-            "charset": "utf-8",
-            "indent_style": "space",
-            "indent_size": "4",
-        },
-    }
-
-    definition_response: Response | None = context.get_response(DEFINITION)
-    if definition_response is None:
-        raise AssertionError("Definition output is None")
-    definition_response["id"] = 0
-    assert definition_response == {
-        "id": 0,
-        "type": "response",
-        "cancelled": False,
-        "command": DEFINITION,
-        "result": ((3, 0), 3, "Definition"),
-    }
-
     context.remove_file("test")
     context.kill_IPC()
 

diff --git a/tests/testing_file1.py b/tests/testing_file1.py
@@ -6,6 +6,10 @@
 
 
 class Foo(Bar):
+    """
+    test
+    """
+
     def __init__(self):
         pass
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,10 @@ @@
     class Foo(Bar):
+        """
+        test
+        """
         def __init__(self):
             pass
@@ Expand Down @@