Merge pull request #2 from johnfraney/lint-pycon-blocks

johnfraney · web-flow · commit 048b08238e30 · 2019-06-14T23:28:20.000-04:00
Add support for linting pycon code blocks
diff --git a/README.md b/README.md
@@ -11,6 +11,11 @@ Flake8 Markdown lints [GitHub-style Python code blocks](https://help.github.com/
 
 This package helps improve a Python project's documentation by ensuring that code samples are error-free.
 
+## Features
+
+- Lints code blocks containing regular Python and Python interpreter code ([`pycon`](http://pygments.org/docs/lexers/#pygments.lexers.python.PythonConsoleLexer))
+- [pre-commit](#pre-commit-hook) hook to lint on commit
+
 ## Installation
 
 Flake8 Markdown can be installed from PyPI using `pip` or your package manager of choice:
@@ -46,7 +51,7 @@ To enable this hook in your local repository, add the following `repo` to your `
 # .pre-commit-config.yaml
 repos:
   - repo: https://github.com/johnfraney/flake8-markdown
-    rev: v0.1.1
+    rev: v0.2.0
     hooks:
       - id: flake8-markdown
 ```
@@ -57,6 +62,12 @@ Everyone interacting in the project's codebases, issue trackers, chat rooms, and
 
 ## History
 
+## [0.2.0] - 2019-06-14
+
+### Added
+
+- [`pycon`](http://pygments.org/docs/lexers/#pygments.lexers.python.PythonConsoleLexer) code block support
+
 ### [0.1.1] - 2019-05-19
 
 #### Changed
diff --git a/flake8_markdown/__init__.py b/flake8_markdown/__init__.py
@@ -5,13 +5,9 @@
 import sys
 from concurrent.futures import ThreadPoolExecutor
 
-from .constants import SUBPROCESS_ARGS
+from flake8_markdown.constants import SUBPROCESS_ARGS
 
-__version__ = '0.1.1'
-
-
-def non_matching_lookbehind(pattern):
-    return r'(?<={})'.format(pattern)
+__version__ = '0.2.0'
 
 
 def non_matching_lookahead(pattern):
@@ -26,13 +22,30 @@ def non_matching_group(pattern):
     return r'(?:{})'.format(pattern)
 
 
+def strip_repl_characters(code):
+    """Removes the first four characters from each REPL-style line.
+
+    >>> strip_repl_characters('>>> "banana"') == '"banana"'
+    True
+    >>> strip_repl_characters('... banana') == 'banana'
+    True
+    """
+    stripped_lines = []
+    for line in code.splitlines():
+        if line.startswith('>>> ') or line.startswith('... '):
+            stripped_lines.append(line[4:])
+        else:
+            stripped_lines.append(line)
+    return '\n'.join(stripped_lines)
+
+
 ONE_OR_MORE_LINES_NOT_GREEDY = r'(?:.*\n)+?'
 
 regex_rule = ''.join([
     # Use non-matching group instead of a lookbehind because the code
     # block may have line highlighting hints. See:
     # https://python-markdown.github.io/extensions/fenced_code_blocks/#emphasized-lines
-    non_matching_group('^```python.*$'),
+    non_matching_group('^```(python|pycon).*$'),
     matching_group(ONE_OR_MORE_LINES_NOT_GREEDY),
     non_matching_lookahead('^```')
 ])
@@ -44,28 +57,50 @@ def lint_markdown_file(markdown_file_path):
     linting_errors = []
     markdown_content = open(markdown_file_path, 'r').read()
     code_block_start_lines = []
-    for line_no, line in enumerate(markdown_content.split('\n'), start=1):
-        if line.startswith('```python'):
+    for line_no, line in enumerate(markdown_content.splitlines(), start=1):
+        # Match python and pycon
+        if line.startswith('```py'):
             code_block_start_lines.append(line_no)
-    matches = regex.findall(markdown_content)
-    for match_number, match in enumerate(matches):
-        match_text = match.lstrip()
+    code_block_matches = regex.findall(markdown_content)
+    for match_number, code_block_match in enumerate(code_block_matches):
+        code_block_type = code_block_match[0]
+        match_text = code_block_match[1]
+        # pycon lines start with ">>> " or "... ", so strip those characters
+        if code_block_type == 'pycon':
+            match_text = strip_repl_characters(match_text)
+        match_text = match_text.lstrip()
         flake8_process = subprocess.run(
             ['flake8', '-'],
             input=match_text,
             **SUBPROCESS_ARGS,
         )
         flake8_output = flake8_process.stdout
-        markdown_line_number = code_block_start_lines[match_number] + 1
+        flake8_output = flake8_output.strip()
+        # Skip empty lines
+        if not flake8_output:
+            continue
+        flake8_output_split = flake8_output.split(':')
+        line_number = int(flake8_output_split[1])
+        column_number = int(flake8_output_split[2])
+        markdown_line_number = (
+            line_number + code_block_start_lines[match_number]
+        )
+        if code_block_type == 'pycon':
+            match_lines = match_text.splitlines()
+            line = match_lines[line_number - 1]
+            if any([
+                    line.startswith('>>> '),
+                    line.startswith('... '),
+            ]):
+                flake8_output_split[2] = column_number + 4
         # Replace reference to stdin line number with file line number
         flake8_output = re.sub(
             r'stdin:[0-9]+',
             '{}:{}'.format(markdown_file_path, markdown_line_number),
             flake8_output
         )
-        stripped_output = flake8_output.strip()
-        if stripped_output:
-            linting_errors.append(stripped_output)
+        linting_errors.append(flake8_output)
+
     if linting_errors:
         linting_error_output = '\n'.join(linting_errors)
         print(linting_error_output)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "flake8-markdown"
-version = "0.1.1"
+version = "0.2.0"
 description = "Lints Python code blocks in Markdown files using flake8"
 authors = ["John Franey <johnfraney@gmail.com>"]
 repository = "https://github.com/johnfraney/flake8-markdown"
@@ -17,7 +17,7 @@ classifiers = [
     "Topic :: Software Development :: Quality Assurance",
 ]
 include = [
-    "LICENCE",
+    "LICENSE",
 ]
 
 [tool.poetry.dependencies]
diff --git a/tests/samples/pycon.md b/tests/samples/pycon.md
@@ -0,0 +1,35 @@
+# This file contains a Python console block to lint
+
+This contains an unknown variable:
+
+```pycon
+>>> print("Hello")
+'Hello'
+>>> banana = "banana"
+>>> for character in banana:
+...     print(characterr)
+
+```
+
+This contains an EOL error:
+
+```pycon
+>>> 'chocolate
+
+```
+
+This contains an undefined variable as a return:
+
+```pycon
+>>> True
+false
+
+```
+
+This contains a valid code example:
+
+```pycon
+>>> len([1, 2, 3])
+3
+
+```
diff --git a/tests/test_flake8_markdown.py b/tests/test_flake8_markdown.py
@@ -7,6 +7,7 @@
 FILE_WITH_ERRORS = 'tests/samples/basic.md'
 FILE_WITHOUT_ERRORS = 'tests/samples/good.md'
 FILE_WITH_EMPHASIZED_LINES = 'tests/samples/emphasized_lines.md'
+FILE_WITH_PYCON_BLOCKS = 'tests/samples/pycon.md'
 
 
 @pytest.fixture
@@ -75,6 +76,18 @@ def test_run_with_file_containing_emphasized_lines(run_flake8_markdown):
     assert "tests/samples/emphasized_lines.md:6:1: F821 undefined name 'emphasized_imaginary_function'" in output
 
 
+def test_run_with_file_containing_pycon_blocks(run_flake8_markdown):
+    flake8_markdown_process = run_flake8_markdown(FILE_WITH_PYCON_BLOCKS)
+    output = flake8_markdown_process.stdout
+    print(output)
+    assert flake8_markdown_process.returncode == 1
+    error_count = len(output.splitlines())
+    assert error_count == 3
+    assert 'tests/samples/pycon.md:10:11: F821' in output
+    assert 'tests/samples/pycon.md:17:10: E999' in output
+    assert 'tests/samples/pycon.md:25:1: F821' in output
+
+
 def test_run_with_glob(run_flake8_markdown):
     flake8_markdown_process = run_flake8_markdown('tests/samples/*.md')
     assert flake8_markdown_process.returncode == 1