Skip to content

Commit

Permalink
refactor: extract scrub_uri_attribute for downstream use
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Dec 11, 2022
1 parent 47a835a commit 84ca20c
Showing 1 changed file with 19 additions and 14 deletions.
33 changes: 19 additions & 14 deletions lib/loofah/html5/scrub.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,7 @@ def scrub_attributes(node)
end

if SafeList::ATTR_VAL_IS_URI.include?(attr_name)
# this block lifted nearly verbatim from HTML5 sanitization
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
attr_node.remove
next
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
# permit only allowed data mediatypes
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
mediatype, _ = mediatype.split(";")[0..1] if mediatype
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
attr_node.remove
next
end
end
next if scrub_uri_attribute(attr_node)
end

if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name)
Expand Down Expand Up @@ -152,6 +139,24 @@ def scrub_attribute_that_allows_local_ref(attr_node)
attr_node.value = values.join(" ")
end

def scrub_uri_attribute(attr_node)
# this block lifted nearly verbatim from HTML5 sanitization
val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase
if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0])
attr_node.remove
return true
elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data"
# permit only allowed data mediatypes
mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1]
mediatype, _ = mediatype.split(";")[0..1] if mediatype
if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype)
attr_node.remove
return true
end
end
false
end

#
# libxml2 >= 2.9.2 fails to escape comments within some attributes.
#
Expand Down

0 comments on commit 84ca20c

Please # to comment.