Skip to content

Commit

Permalink
fix: replace slow regex attribute check with crass parser
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Dec 11, 2022
1 parent ea853aa commit a6e0a1a
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 18 deletions.
27 changes: 26 additions & 1 deletion lib/loofah/html5/scrub.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,11 @@ def scrub_attributes(node)
end
end
end

if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value
scrub_attribute_that_allows_local_ref(attr_node)
end

if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m
attr_node.remove
next
Expand Down Expand Up @@ -127,6 +129,29 @@ def scrub_css(style)
Crass::Parser.stringify(sanitized_tree)
end

def scrub_attribute_that_allows_local_ref(attr_node)
return unless attr_node.value

nodes = Crass::Parser.new(attr_node.value).parse_component_values

values = nodes.map do |node|
case node[:node]
when :url
if node[:value].start_with?("#")
node[:raw]
else
nil
end
when :hash, :ident, :string
node[:raw]
else
nil
end
end.compact

attr_node.value = values.join(" ")
end

#
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
#
Expand Down
6 changes: 3 additions & 3 deletions test/assets/testdata_sanitizer_tests1.dat
Original file line number Diff line number Diff line change
Expand Up @@ -463,9 +463,9 @@
{
"name": "absolute_uri_refs_in_svg_attributes",
"input": "<rect fill='url(http://bad.com/) #fff' />",
"rexml": "<rect fill=' #fff'></rect>",
"xhtml": "<rect fill=' #fff'></rect>",
"output": "<rect fill=' #fff'/>"
"rexml": "<rect fill='#fff'></rect>",
"xhtml": "<rect fill='#fff'></rect>",
"output": "<rect fill='#fff'/>"
},

{
Expand Down
18 changes: 4 additions & 14 deletions test/html5/test_sanitizer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -267,15 +267,15 @@ def test_figure_element_is_valid

## added because we don't have any coverage above on SVG_ATTR_VAL_ALLOWS_REF
HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.each do |attr_name|
define_method "test_should_allow_uri_refs_in_svg_attribute_#{attr_name}" do
define_method "test_allow_uri_refs_in_svg_attribute_#{attr_name}" do
input = "<rect fill='url(#foo)' />"
output = "<rect fill='url(#foo)'></rect>"
check_sanitization(input, output, output, output)
end

define_method "test_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
input = "<rect fill='url(http://bad.com/) #fff' />"
output = "<rect fill=' #fff'></rect>"
define_method "test_disallow_absolute_uri_refs_in_svg_attribute_#{attr_name}" do
input = "<rect fill='yellow url(http://bad.com/) #fff \"blue\"' />"
output = "<rect fill='yellow #fff \"blue\"'></rect>"
check_sanitization(input, output, output, output)
end
end
Expand Down Expand Up @@ -480,16 +480,6 @@ def test_css_order
assert_match %r/order:5/, sane.inner_html
end

def test_issue_90_slow_regex
skip("timing tests are hard to make pass and have little regression-testing value")

html = %q{<span style="background: url('data:image/svg&#43;xml;charset=utf-8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20width%3D%2232%22%20height%3D%2232%22%20viewBox%3D%220%200%2032%2032%22%3E%3Cpath%20fill%3D%22%23D4C8AE%22%20d%3D%22M0%200h32v32h-32z%22%2F%3E%3Cpath%20fill%3D%22%2383604B%22%20d%3D%22M0%200h31.99v11.75h-31.99z%22%2F%3E%3Cpath%20fill%3D%22%233D2319%22%20d%3D%22M0%2011.5h32v.5h-32z%22%2F%3E%3Cpath%20fill%3D%22%23F83651%22%20d%3D%22M5%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23FCD050%22%20d%3D%22M6%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%2371C797%22%20d%3D%22M7%200h1v10.5h-1z%22%2F%3E%3Cpath%20fill%3D%22%23509CF9%22%20d%3D%22M8%200h1v10.5h-1z%22%2F%3E%3ClinearGradient%20id%3D%22a%22%20gradientUnits%3D%22userSpaceOnUse%22%20x1%3D%2224.996%22%20y1%3D%2210.5%22%20x2%3D%2224.996%22%20y2%3D%224.5%22%3E%3Cstop%20offset%3D%220%22%20stop-color%3D%22%23796055%22%2F%3E%3Cstop%20offset%3D%22.434%22%20stop-color%3D%22%23614C43%22%2F%3E%3Cstop%20offset%3D%221%22%20stop-color%3D%22%233D2D28%22%2F%3E%3C%2FlinearGradient%3E%3Cpath%20fill%3D%22url(%23a)%22%20d%3D%22M28%208.5c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3Cpath%20fill%3D%22%235F402E%22%20d%3D%22M28%208c0%201.1-.9%202-2%202h-2c-1.1%200-2-.9-2-2v-2c0-1.1.9-2%202-2h2c1.1%200%202%20.9%202%202v2z%22%2F%3E%3C');"></span>}

assert_completes_in_reasonable_time {
Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_html)
}
end

def test_upper_case_css_property
html = "<div style=\"COLOR: BLUE; NOTAPROPERTY: RED;\">asdf</div>"
sane = Nokogiri::HTML(Loofah.scrub_fragment(html, :strip).to_xml)
Expand Down

0 comments on commit a6e0a1a

Please # to comment.