Skip to content

Commit

Permalink
Use caches (#50)
Browse files Browse the repository at this point in the history
* Use 3.11 everywhere (#38)

* v0.6.0 (#42) (#45)

* Bump version and add note to README

Prepare for release tomorrow and move forward for v0.7.0 and v0.8.0 release

* Steal pygments regexes (#34)

* test.py

* Move to server functions and set up proper tests

* Reformat

* Reformat

* Change to beartype typing

* Even more formatting

* Remove regex stealer test

Runs different locally than on gh runner and not worth the time or effort.

* Get docstring areas

* Make function work

* Add type annotation

* format

* Add lots of comments but don't remove private type

@leycec I tried using the idea you gave for private types but got Union type errors for some reason I couldn't really understand. Thank you so much for your time and thoughts and congrats on the bike ride, thats a long stretch!

* Fix a small bug

* Improve highlighting functions significantly

Now it ignores whitespace at the front of the line!

* Stop using private variable

* Format for black and ruff

* Move docstring tokens up

* Update tests

* Fix line number for docstring tokens

Was 1 behind

* Reformat

* Bump version

* Implement token overwriting (#49)

* Display the problem

* Get working test

* Better overlap checking

* Better tests

* Sort and remove duplicates

* Remove old vestige and format

* Move token merging to highlight file

* Format

* Use overwrite_and_merge_tokens

* Cache important functions

* Remove old file

* Format
  • Loading branch information
Moosems authored Jul 7, 2024
1 parent a17f289 commit c6c6299
Show file tree
Hide file tree
Showing 7 changed files with 166 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: 3.9
python-version: 3.11.1
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<h1 align="center">Salve v0.6.0</h1>
<h1 align="center">Salve v0.7.0</h1>

# Installation

Expand Down
1 change: 0 additions & 1 deletion salve_ipc/server_functions/find_words.py

This file was deleted.

157 changes: 155 additions & 2 deletions salve_ipc/server_functions/highlight.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from functools import cache
from re import MULTILINE, Match, Pattern, compile

from beartype.typing import Callable
Expand Down Expand Up @@ -25,6 +26,149 @@
]


def merge_tokens(tokens: list[Token]) -> list[Token]:
output_tokens: list[Token] = []
depth: int = 0
for token in tokens:
# Deal with basic edge case
if depth == 0:
output_tokens.append(token)
depth += 1
continue

previous_token = output_tokens[-1]

# Get our boolean checks
same_token_type: bool = previous_token[2] == token[2]
same_line: bool = previous_token[0][0] == token[0][0]
neighboring_tokens: bool = (
previous_token[0][1] + previous_token[1] == token[0][1]
)

# Determine if tokens should be merged
if not (same_token_type and same_line and neighboring_tokens):
output_tokens.append(token)
depth += 1
continue

# Replace previous token with new token (we don't increase depth because we are substituting, not adding)
new_token: Token = (
(token[0][0], previous_token[0][1]),
previous_token[1] + token[1],
token[2],
)
output_tokens[-1] = new_token
return output_tokens


def overwrite_tokens(old_tokens: list[Token], new_tokens: list[Token]):
output_tokens: list[Token] = []
dont_add_tokens: list[Token] = []
for new_token in new_tokens:
for old_token in old_tokens:
same_token: bool = old_token == new_token
if same_token:
continue

same_line: bool = old_token[0][0] == new_token[0][0]
can_add_token: bool = old_token not in dont_add_tokens
if not same_line:
if can_add_token:
output_tokens.append(old_token)
continue

# Check if the ranges overlap and if so either (remove the old_token and add to don't add list) or,
# if part of the token is out of the new_token_range, remove the part in the new tokens range

old_token_end: int = old_token[0][1] + old_token[1]
new_token_end: int = new_token[0][1] + new_token[1]

partial_front_overlap: bool = (
new_token[0][1] <= old_token_end
and not old_token_end > new_token_end
)
partial_end_overlap: bool = new_token_end >= old_token[0][1]
fully_contained: bool = (
old_token_end <= new_token_end
and old_token[0][1] >= new_token[0][1]
)

if not (
partial_front_overlap or partial_end_overlap or fully_contained
):
continue

dont_add_tokens.append(old_token)

while old_token in output_tokens:
output_tokens.remove(old_token)

if fully_contained:
continue

# If we are here if means its a partial overlap
if partial_front_overlap:
created_token: Token = (
(new_token[0][0], old_token[0][1]),
new_token[0][1] - old_token[0][1],
old_token[2],
)
while created_token in output_tokens:
output_tokens.remove(created_token)
output_tokens.append(created_token)
dont_add_tokens.append(created_token)
continue

if old_token[0][1] < new_token[0][1]:
created_token_1: Token = (
(new_token[0][0], old_token[0][1]),
new_token[0][1] - old_token[0][1],
old_token[2],
)
created_token_2: Token = (
(new_token[0][0], new_token_end),
old_token_end - new_token_end,
old_token[2],
)
while created_token_1 in output_tokens:
output_tokens.remove(created_token_1)
output_tokens.append(created_token_1)
while created_token_2 in output_tokens:
output_tokens.remove(created_token_2)
output_tokens.append(created_token_2)
dont_add_tokens.append(created_token_1)
dont_add_tokens.append(created_token_2)

created_token: Token = (
(new_token[0][0], new_token_end),
old_token_end - new_token_end,
old_token[2],
)
while created_token in output_tokens:
output_tokens.remove(created_token)
output_tokens.append(created_token)
dont_add_tokens.append(created_token)

output_tokens.append(new_token)

output_tokens = sorted(set(output_tokens))
return output_tokens


def overwrite_and_merge_tokens(
old_tokens: list[Token], new_tokens: list[Token]
) -> list[Token]:
merged_old_tokens: list[Token] = merge_tokens(sorted(set(old_tokens)))
merged_new_tokens: list[Token] = merge_tokens(sorted(set(new_tokens)))
output_tokens: list[Token] = overwrite_tokens(
merged_old_tokens, merged_new_tokens
)

output_tokens = sorted(set(merge_tokens(output_tokens)))
return output_tokens


@cache
def get_new_token_type(old_token: str) -> str:
"""Turns pygments token types into a generic predefined Token"""
new_type: str = generic_tokens[0]
Expand Down Expand Up @@ -158,6 +302,7 @@ def find_hidden_chars(lines: list[str], start_line: int = 1) -> list[Token]:
_LexReturnTokens = list[tuple[_TokenType, str]]


@cache
def get_pygments_comment_regexes(lexer: RegexLexer) -> _TokenTupleReturnType:
"""
Steals the regexes that pgments uses to give docstring, heredoc, comment, and multiline comment highlights
Expand Down Expand Up @@ -306,6 +451,11 @@ def proper_docstring_tokens(lexer: RegexLexer, full_text: str) -> list[Token]:
return new_docstring_tokens


@cache
def lexer_by_name_cached(language: str) -> Lexer:
return get_lexer_by_name(language)


def get_highlights(
full_text: str,
language: str = "text",
Expand All @@ -314,7 +464,7 @@ def get_highlights(
"""Gets pygments tokens from text provided in language proved and converts them to Token's"""

# Create some variables used all throughout the function
lexer: Lexer = get_lexer_by_name(language)
lexer: Lexer = lexer_by_name_cached(language)
split_text: _ListOfStrs = full_text.splitlines()
new_tokens: list[Token] = []

Expand Down Expand Up @@ -354,11 +504,14 @@ def get_highlights(
# override older tokens that may not be as accurate

if isinstance(lexer, RegexLexer):
new_tokens += proper_docstring_tokens(lexer, full_text)
new_tokens = overwrite_and_merge_tokens(
new_tokens, proper_docstring_tokens(lexer, full_text)
)

new_tokens += get_urls(split_text, text_range[0])
if [char for char in hidden_chars if char in full_text]:
# if there are not hidden chars we don't want to needlessly compute this
new_tokens += find_hidden_chars(split_text, text_range[0])

new_tokens = merge_tokens(new_tokens)
return new_tokens
2 changes: 2 additions & 0 deletions salve_ipc/server_functions/misc.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from functools import cache
from unicodedata import category

Token = tuple[tuple[int, int], int, str]
Expand All @@ -21,6 +22,7 @@
]


@cache
def is_unicode_letter(char: str) -> bool:
"""Returns a boolean value of whether a given unicode char is a letter or not (includes "_" for code completion reasons)"""
return char == "_" or category(char).startswith("L")
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@

setup(
name="salve_ipc",
version="0.6.0",
version="0.7.0",
description="Salve is an IPC library that can be used by code editors to easily get autocompletions, replacements, editorconfig suggestions, definitions, and syntax highlighting.",
author="Moosems",
author_email="moosems.j@gmail.com",
url="https://github.com/Moosems/salve",
long_description=long_description,
long_description_content_type="text/markdown",
install_requires=["pygments", "pyeditorconfig", "beartype"],
python_requires=">=3.9",
python_requires=">=3.11",
license="MIT license",
classifiers=[
"Development Status :: 3 - Alpha",
Expand Down
18 changes: 5 additions & 13 deletions tests/test_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,34 +91,26 @@ def test_IPC():
((3, 11), 7, "Comment"),
((5, 0), 5, "Name"),
((5, 5), 1, "Punctuation"),
((5, 6), 1, "String"),
((5, 7), 1, "String"),
((5, 8), 1, "String"),
((5, 6), 3, "String"),
((5, 9), 1, "Punctuation"),
((5, 12), 16, "Comment"),
((8, 0), 5, "Keyword"),
((8, 6), 3, "Name"),
((8, 9), 1, "Punctuation"),
((8, 10), 3, "Name"),
((8, 13), 1, "Punctuation"),
((8, 14), 1, "Punctuation"),
((8, 13), 2, "Punctuation"),
((9, 4), 3, "String"),
((10, 4), 4, "Name"),
((10, 4), 4, "String"),
((11, 4), 3, "String"),
((13, 4), 3, "Keyword"),
((13, 8), 8, "Name"),
((13, 16), 1, "Punctuation"),
((13, 17), 4, "Name"),
((13, 21), 1, "Punctuation"),
((13, 22), 1, "Punctuation"),
((13, 21), 2, "Punctuation"),
((14, 8), 4, "Keyword"),
((17, 0), 3, "Name"),
((17, 3), 1, "Punctuation"),
((17, 4), 1, "Punctuation"),
((17, 3), 2, "Punctuation"),
((18, 0), 24, "Comment"),
((9, 4), 3, "String"),
((10, 4), 4, "String"),
((11, 4), 3, "String"),
((18, 2), 22, "Link"),
((5, 7), 1, "Hidden_Char"),
],
Expand Down

0 comments on commit c6c6299

Please # to comment.