Merge pull request #3 from danixeee/danixeee/fix-escape-regex-chars

Escape regex chars
danixeee · Dec 28, 2019 · 84fffe1 · 84fffe1
2 parents 65605d7 + 7a30ae1
commit 84fffe1
Show file tree

Hide file tree

Showing 7 changed files with 77 additions and 13 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -7,7 +7,7 @@
     "python.testing.nosetestsEnabled": false,
     "python.testing.pytestEnabled": true,
     "python.linting.flake8Enabled": false,
-    "python.linting.pep8Enabled": false,
+    "python.linting.pycodestyleEnabled": false,
     "python.formatting.blackPath": "black",
     "python.sortImports.args": [
         "-l",

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning][semver].
 
 ## [Unreleased]
 
+### Fixed
+
+- Escape non-ascii characters when generating textmate syntax highlighting ([#3])
+
+[#3]: https://github.com/danixeee/textx-gen-coloring/pull/3
+
 ## [0.1.1] - 10/03/2019
 
 ### Added

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -1,4 +1,9 @@
 trigger:
+  branches:
+    include:
+    - '*'
+
+pr:
 - master
 
 pool:

diff --git a/setup.py b/setup.py
@@ -21,11 +21,16 @@
     os.path.join(os.path.dirname(__file__), "README.md"), "r", encoding="utf-8"
 ).read()
 
-ci_require = ["bandit", "pytest", "pytest-cov", "pytest-azurepipelines"]
+ci_require = [
+    "bandit==1.6.2",
+    "pytest==5.3.2",
+    "pytest-cov==2.8.1",
+    "pytest-azurepipelines==0.8.0",
+]
 
-dev_require = ["bandit==1.5.1"]
+dev_require = ["bandit==1.6.2"]
 
-tests_require = ["coverage==4.5.3", "pytest==4.3.1", "pytest-cov==2.6.1"]
+tests_require = ["coverage==5.0.1", "pytest==5.3.2", "pytest-cov==2.8.1"]
 
 if python_version().startswith("3.6"):  # For python 3.6
     ci_require.append("black")

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -8,7 +8,7 @@
     "Robot": {
         "name": "Robot",
         "keywords": set(
-            ["begin", "end", "initial", "up", "down", "left", "right", ","]
+            ["begin", "end", "initial", "up", "down", "left", "right", "\\,"]
         ),
         "grammar_path": str(DATA_PATH / "robot.tx"),
     }

diff --git a/tests/test_gen.py b/tests/test_gen.py
@@ -22,7 +22,18 @@ def _textmate_gen_cli(grammar_path, **kwargs):
     runner = CliRunner()
     result = runner.invoke(textx, cmd)
 
-    return result.stdout, result.exception
+    try:
+        return json.loads(result.stdout.split("\n", 2)[2]), result.exception
+    except json.JSONDecodeError:
+        return result.stdout, result.exception
+
+
+def _get_keywords_from_textmate(textmate):
+    """Return keywords from textmate object.
+    """
+    return [
+        kw["match"] for kw in textmate["repository"]["language_keyword"]["patterns"]
+    ]
 
 
 def test_textmate_gen_cli_console(lang):
@@ -37,8 +48,9 @@ def test_textmate_gen_cli_console(lang):
     grammar_path = lang["grammar_path"]
 
     output, _ = _textmate_gen_cli(grammar_path, name=name)
+    output_kws = _get_keywords_from_textmate(output)
     for kw in keywords:
-        assert kw in output
+        assert kw in output_kws
 
 
 def test_textmate_gen_cli_console_bad_args(lang):
@@ -78,10 +90,9 @@ def test_textmate_gen_cli_file(lang, tmpdir):
     assert textmate_json["name"] == name
     assert textmate_json["scopeName"] == "source." + name
 
-    kw_patterns = textmate_json["repository"]["language_keyword"]["patterns"]
-    kw_pattern_matches = set(map(lambda x: x["match"], kw_patterns))
+    kw_patterns = _get_keywords_from_textmate(textmate_json)
 
-    assert keywords == kw_pattern_matches
+    assert set(keywords) == set(kw_patterns)
 
 
 def test_textmate_gen_cli_file_already_exists(lang, tmpdir):

diff --git a/textx_gen_coloring/generators.py b/textx_gen_coloring/generators.py
@@ -1,9 +1,12 @@
 import re
+import string
 from functools import partial
 
 from .metamodels import coloring_mm, textx_mm
 from .templates import jinja_env, textmate_template_dir
 
+ASCII_LETTERS = string.ascii_letters
+
 
 class GrammarInfo:
     """
@@ -13,6 +16,8 @@ class GrammarInfo:
     def __init__(self, name):
         self.name = name
         self.keywords = []
+        self.regexes = []
+        self.comments = []
 
 
 class _TextmateGen:
@@ -74,6 +79,28 @@ def _kwd_class(kwd):
         ]
 
 
+def _escape_keyword(keyword):
+    """
+    Prepend `\\\\` to all chars that are not ascii letters.
+    NOTE: `re.escape` does not work the same for 3.6 and 3.7 versions.
+    """
+    return "".join(
+        [
+            letter if letter in ASCII_LETTERS else "\\\\{}".format(letter)
+            for letter in keyword
+        ]
+    )
+
+
+def _get_textx_rule_name(parent_rule):
+    """
+    Iterate parent instances until `TextxRule` instance.
+    """
+    while not type(parent_rule).__name__ == "TextxRule":
+        parent_rule = parent_rule.parent
+    return parent_rule.name
+
+
 def _parse_syntax_spec(syntax_spec):
     """
     Parse syntax specification with coloring metamodel.
@@ -89,14 +116,24 @@ def _parse_grammar(grammar_file, lang_name, skip_keywords=False):
     textx_mm.obj_processors = {}
     grammar_info = GrammarInfo(lang_name)
 
-    # Object processors
     def _str_obj_processor(grammar_info, str_match):
-        """Get language keywords (all strings in language grammar definition"""
-        grammar_info.keywords.append(str_match.match)
+        """Get language keywords (all strings in language grammar definition)"""
+        keyword = _escape_keyword(str_match.match)
+
+        if keyword not in grammar_info.keywords:
+            grammar_info.keywords.append(keyword)
+
+    def _regex_obj_processor(grammar_info, reg_match):
+        """Get language regular expressions"""
+        if _get_textx_rule_name(reg_match.parent) == "Comment":
+            grammar_info.comments.append(reg_match.match)
+        else:
+            grammar_info.regexes.append(reg_match.match)
 
     proccessors = {}
     if not skip_keywords:
         proccessors["StrMatch"] = partial(_str_obj_processor, grammar_info)
+        proccessors["ReMatch"] = partial(_regex_obj_processor, grammar_info)
 
     textx_mm.register_obj_processors(proccessors)
     textx_mm.model_from_file(grammar_file)