Skip to content

Commit

Permalink
Merge pull request #3 from danixeee/danixeee/fix-escape-regex-chars
Browse files Browse the repository at this point in the history
Escape regex chars
  • Loading branch information
danixeee authored Dec 28, 2019
2 parents 65605d7 + 7a30ae1 commit 84fffe1
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"python.testing.nosetestsEnabled": false,
"python.testing.pytestEnabled": true,
"python.linting.flake8Enabled": false,
"python.linting.pep8Enabled": false,
"python.linting.pycodestyleEnabled": false,
"python.formatting.blackPath": "black",
"python.sortImports.args": [
"-l",
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning][semver].

## [Unreleased]

### Fixed

- Escape non-ascii characters when generating textmate syntax highlighting ([#3])

[#3]: https://github.com/danixeee/textx-gen-coloring/pull/3

## [0.1.1] - 10/03/2019

### Added
Expand Down
5 changes: 5 additions & 0 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
trigger:
branches:
include:
- '*'

pr:
- master

pool:
Expand Down
11 changes: 8 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,16 @@
os.path.join(os.path.dirname(__file__), "README.md"), "r", encoding="utf-8"
).read()

ci_require = ["bandit", "pytest", "pytest-cov", "pytest-azurepipelines"]
ci_require = [
"bandit==1.6.2",
"pytest==5.3.2",
"pytest-cov==2.8.1",
"pytest-azurepipelines==0.8.0",
]

dev_require = ["bandit==1.5.1"]
dev_require = ["bandit==1.6.2"]

tests_require = ["coverage==4.5.3", "pytest==4.3.1", "pytest-cov==2.6.1"]
tests_require = ["coverage==5.0.1", "pytest==5.3.2", "pytest-cov==2.8.1"]

if python_version().startswith("3.6"): # For python 3.6
ci_require.append("black")
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"Robot": {
"name": "Robot",
"keywords": set(
["begin", "end", "initial", "up", "down", "left", "right", ","]
["begin", "end", "initial", "up", "down", "left", "right", "\\,"]
),
"grammar_path": str(DATA_PATH / "robot.tx"),
}
Expand Down
21 changes: 16 additions & 5 deletions tests/test_gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,18 @@ def _textmate_gen_cli(grammar_path, **kwargs):
runner = CliRunner()
result = runner.invoke(textx, cmd)

return result.stdout, result.exception
try:
return json.loads(result.stdout.split("\n", 2)[2]), result.exception
except json.JSONDecodeError:
return result.stdout, result.exception


def _get_keywords_from_textmate(textmate):
"""Return keywords from textmate object.
"""
return [
kw["match"] for kw in textmate["repository"]["language_keyword"]["patterns"]
]


def test_textmate_gen_cli_console(lang):
Expand All @@ -37,8 +48,9 @@ def test_textmate_gen_cli_console(lang):
grammar_path = lang["grammar_path"]

output, _ = _textmate_gen_cli(grammar_path, name=name)
output_kws = _get_keywords_from_textmate(output)
for kw in keywords:
assert kw in output
assert kw in output_kws


def test_textmate_gen_cli_console_bad_args(lang):
Expand Down Expand Up @@ -78,10 +90,9 @@ def test_textmate_gen_cli_file(lang, tmpdir):
assert textmate_json["name"] == name
assert textmate_json["scopeName"] == "source." + name

kw_patterns = textmate_json["repository"]["language_keyword"]["patterns"]
kw_pattern_matches = set(map(lambda x: x["match"], kw_patterns))
kw_patterns = _get_keywords_from_textmate(textmate_json)

assert keywords == kw_pattern_matches
assert set(keywords) == set(kw_patterns)


def test_textmate_gen_cli_file_already_exists(lang, tmpdir):
Expand Down
43 changes: 40 additions & 3 deletions textx_gen_coloring/generators.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import re
import string
from functools import partial

from .metamodels import coloring_mm, textx_mm
from .templates import jinja_env, textmate_template_dir

ASCII_LETTERS = string.ascii_letters


class GrammarInfo:
"""
Expand All @@ -13,6 +16,8 @@ class GrammarInfo:
def __init__(self, name):
self.name = name
self.keywords = []
self.regexes = []
self.comments = []


class _TextmateGen:
Expand Down Expand Up @@ -74,6 +79,28 @@ def _kwd_class(kwd):
]


def _escape_keyword(keyword):
"""
Prepend `\\\\` to all chars that are not ascii letters.
NOTE: `re.escape` does not work the same for 3.6 and 3.7 versions.
"""
return "".join(
[
letter if letter in ASCII_LETTERS else "\\\\{}".format(letter)
for letter in keyword
]
)


def _get_textx_rule_name(parent_rule):
"""
Iterate parent instances until `TextxRule` instance.
"""
while not type(parent_rule).__name__ == "TextxRule":
parent_rule = parent_rule.parent
return parent_rule.name


def _parse_syntax_spec(syntax_spec):
"""
Parse syntax specification with coloring metamodel.
Expand All @@ -89,14 +116,24 @@ def _parse_grammar(grammar_file, lang_name, skip_keywords=False):
textx_mm.obj_processors = {}
grammar_info = GrammarInfo(lang_name)

# Object processors
def _str_obj_processor(grammar_info, str_match):
"""Get language keywords (all strings in language grammar definition"""
grammar_info.keywords.append(str_match.match)
"""Get language keywords (all strings in language grammar definition)"""
keyword = _escape_keyword(str_match.match)

if keyword not in grammar_info.keywords:
grammar_info.keywords.append(keyword)

def _regex_obj_processor(grammar_info, reg_match):
"""Get language regular expressions"""
if _get_textx_rule_name(reg_match.parent) == "Comment":
grammar_info.comments.append(reg_match.match)
else:
grammar_info.regexes.append(reg_match.match)

proccessors = {}
if not skip_keywords:
proccessors["StrMatch"] = partial(_str_obj_processor, grammar_info)
proccessors["ReMatch"] = partial(_regex_obj_processor, grammar_info)

textx_mm.register_obj_processors(proccessors)
textx_mm.model_from_file(grammar_file)
Expand Down

0 comments on commit 84fffe1

Please # to comment.