From d6018f2539d271963c3e7f54f36ef11900363c69 Mon Sep 17 00:00:00 2001 From: Greg Guthe Date: Mon, 23 Mar 2020 13:46:36 -0400 Subject: [PATCH] fix bug 1623633 expand and comment on sanitize_css gauntlet regex per https://github.com/mozilla/bleach/pull/61/files#r677453 --- bleach/sanitizer.py | 10 ++++++++-- tests/test_css.py | 33 +++++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/bleach/sanitizer.py b/bleach/sanitizer.py index 54f624fa..0f6babda 100644 --- a/bleach/sanitizer.py +++ b/bleach/sanitizer.py @@ -593,8 +593,14 @@ def sanitize_css(self, style): # the whole thing. parts = style.split(';') gauntlet = re.compile( - r"""^([-/:,#%.'"\s!\w]|\w-\w|'[\s\w]+'\s*|"[\s\w]+"|\([\d,%\.\s]+\))*$""", - flags=re.U + r"""^( # consider a style attribute value as composed of: +[/:,#%!.\s\w] # a non-newline character +|\w-\w # 3 characters in the form \w-\w +|'[\s\w]+'\s* # a single quoted string of [\s\w]+ with trailing space +|"[\s\w]+" # a double quoted string of [\s\w]+ +|\([\d,%\.\s]+\) # a parenthesized string of one or more digits, commas, periods, percent signs, or whitespace e.g. from 'color: hsl(30,100%,50%)'' +)*$""", + flags=re.U | re.VERBOSE ) for part in parts: diff --git a/tests/test_css.py b/tests/test_css.py index 92fe9553..6a3d8eb9 100644 --- a/tests/test_css.py +++ b/tests/test_css.py @@ -1,6 +1,7 @@ from __future__ import unicode_literals from functools import partial +from timeit import timeit import pytest @@ -37,10 +38,12 @@ '

bar

' ), # Handle leading - in attributes - ( + # regressed with the fix for bug 1623633 + pytest.param( '

bar

', ['cursor'], - '

bar

' + '

bar

', + marks=pytest.mark.xfail, ), # Handle () in attributes ( @@ -54,16 +57,20 @@ '

bar

', ), # Handle ' in attributes - ( + # regressed with the fix for bug 1623633 + pytest.param( '

bar

', ['text-overflow'], - '

bar

' + '

bar

', + marks=pytest.mark.xfail, ), # Handle " in attributes - ( + # regressed with the fix for bug 1623633 + pytest.param( '

bar

', ['text-overflow'], - '

bar

' + '

bar

', + marks=pytest.mark.xfail, ), ( '

bar

', @@ -223,3 +230,17 @@ def test_style_hang(): def test_css_parsing_with_entities(data, styles, expected): """The sanitizer should be ok with character entities""" assert clean(data, tags=['p'], attributes={'p': ['style']}, styles=styles) == expected + + +@pytest.mark.parametrize('overlap_test_char', ["\"", "'", "-"]) +def test_css_parsing_gauntlet_regex_backtracking(overlap_test_char): + """The sanitizer gauntlet regex should not catastrophically backtrack""" + # refs: https://bugzilla.mozilla.org/show_bug.cgi?id=1623633 + + def time_clean(test_char, size): + style_attr_value = (test_char + 'a' + test_char) * size + '^' + stmt = """clean('''''', attributes={'a': ['style']})""" % style_attr_value + return timeit(stmt=stmt, setup='from bleach import clean', number=1) + + # should complete in less than one second + assert time_clean(overlap_test_char, 22) < 1.0