From b5de45fee2354f06829292fae8bb24822017d848 Mon Sep 17 00:00:00 2001 From: Jonathan Hedley Date: Mon, 23 Jan 2023 16:47:52 +1100 Subject: [PATCH] Use Iterator.remove to avoid ConcurrentModificationException Add improved test coverage --- CHANGES | 3 +++ src/main/java/org/jsoup/safety/Safelist.java | 16 +++++++----- .../java/org/jsoup/safety/CleanerTest.java | 26 +++++++++++++++++++ 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/CHANGES b/CHANGES index 59d4b0b123..4e3849af7b 100644 --- a/CHANGES +++ b/CHANGES @@ -44,6 +44,9 @@ Release 1.15.4 [PENDING] body. + * Bugfix: fixed an issue in Safelist.removeAttributes which could throw a ConcurrentModificationException when using + the ":all" pseudo-attribute. + * Change: deprecated the unused Document#normalise() method. Normalization occurs during the HTML tree construction, and no longer as a distinct phase. diff --git a/src/main/java/org/jsoup/safety/Safelist.java b/src/main/java/org/jsoup/safety/Safelist.java index 289d555706..710c070e38 100644 --- a/src/main/java/org/jsoup/safety/Safelist.java +++ b/src/main/java/org/jsoup/safety/Safelist.java @@ -12,6 +12,7 @@ Thank you to Ryan Grove (wonko.com) for the Ruby HTML cleaner http://github.com/ import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.Map; import java.util.Set; @@ -63,6 +64,7 @@ XSS attack examples (that jsoup will safegaurd against the default Cleaner and S

*/ public class Safelist { + private static final String All = ":all"; private final Set tagNames; // tags allowed, lower case. e.g. [p, br, span] private final Map> attributes; // tag -> attribute[]. allowed attributes [href] for a tag. private final Map> enforcedAttributes; // always set these attribute values @@ -342,14 +344,16 @@ public Safelist removeAttributes(String tag, String... attributes) { if(currentSet.isEmpty()) // Remove tag from attribute map if no attributes are allowed for tag this.attributes.remove(tagName); } - if(tag.equals(":all")) // Attribute needs to be removed from all individually set tags - for(TagName name: this.attributes.keySet()) { - Set currentSet = this.attributes.get(name); + if(tag.equals(All)) { // Attribute needs to be removed from all individually set tags + Iterator>> it = this.attributes.entrySet().iterator(); + while (it.hasNext()) { + Map.Entry> entry = it.next(); + Set currentSet = entry.getValue(); currentSet.removeAll(attributeSet); - if(currentSet.isEmpty()) // Remove tag from attribute map if no attributes are allowed for tag - this.attributes.remove(name); + it.remove(); } + } return this; } @@ -555,7 +559,7 @@ protected boolean isSafeAttribute(String tagName, Element el, Attribute attr) { } } // no attributes defined for tag, try :all tag - return !tagName.equals(":all") && isSafeAttribute(":all", el, attr); + return !tagName.equals(All) && isSafeAttribute(All, el, attr); } private boolean testValidProtocol(Element el, Attribute attr, Set protocols) { diff --git a/src/test/java/org/jsoup/safety/CleanerTest.java b/src/test/java/org/jsoup/safety/CleanerTest.java index 8a03ed02dc..d7c6371cbc 100644 --- a/src/test/java/org/jsoup/safety/CleanerTest.java +++ b/src/test/java/org/jsoup/safety/CleanerTest.java @@ -67,6 +67,32 @@ public class CleanerTest { assertEquals("

Nice

Hello
", TextUtil.stripNewlines(cleanHtml)); } + @Test void allAttributes() { + String h = "

Text

Foo"; + Safelist safelist = Safelist.relaxed(); + safelist.addAttributes(":all", "class"); + safelist.addAttributes("div", "data"); + + String clean1 = Jsoup.clean(h, safelist); + assertEquals("

Text

Foo
", TextUtil.stripNewlines(clean1)); + + safelist.removeAttributes(":all", "class", "cite"); + + String clean2 = Jsoup.clean(h, safelist); + assertEquals("

Text

Foo
", TextUtil.stripNewlines(clean2)); + } + + @Test void removeProtocols() { + String h = "Link"; + Safelist safelist = Safelist.relaxed(); + String clean1 = Jsoup.clean(h, safelist); + assertEquals("Link", clean1); + + safelist.removeProtocols("a", "href", "ftp", "http", "https", "mailto"); + String clean2 = Jsoup.clean(h, safelist); // all removed means any will work + assertEquals("Link", clean2); + } + @Test public void testRemoveEnforcedAttributes() { String h = "

Nice

Hello
"; String cleanHtml = Jsoup.clean(h, Safelist.basic().removeEnforcedAttribute("a", "rel"));