From 6109e0183cecf4f8b587d76209716cb1bbcd6bd5 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Wed, 21 Aug 2024 15:23:00 +0900 Subject: [PATCH] Fix a bug that Stream parser doesn't expand the user-defined entity references for "text" (#200) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Why? Pull parser expands character references and predefined entity references, but doesn't expand user-defined entity references. ## Change - text_stream_unnormalize.rb ``` $LOAD_PATH.unshift(File.expand_path("lib")) require 'rexml/document' require 'rexml/parsers/sax2parser' require 'rexml/parsers/pullparser' require 'rexml/parsers/streamparser' require 'rexml/streamlistener' xml = < ]>&la;&lala;<P> <I> <B> Text </B> </I>test™ EOS class StListener include REXML::StreamListener def text(text) puts text end end puts "REXML(DOM)" REXML::Document.new(xml).elements.each("/root/*") {|element| puts element.text} puts "" puts "REXML(Pull)" parser = REXML::Parsers::PullParser.new(xml) while parser.has_next? event = parser.pull case event.event_type when :text puts event[1] end end puts "" puts "REXML(Stream)" parser = REXML::Parsers::StreamParser.new(xml, StListener.new).parse puts "" puts "REXML(SAX)" sax = REXML::Parsers::SAX2Parser.new(xml) sax.listen(:characters) {|x| puts x } sax.parse ``` ## Before (master) ``` $ ruby text_stream_unnormalize.rb REXML(DOM) 1234 --1234--

Text test™ REXML(Pull) 1234 --1234--

Text test™ REXML(Stream) &la; #<= This &lala; #<= This

Text test™ REXML(SAX) 1234 --1234--

Text test™ ``` ## After(This PR) ``` $ ruby text_stream_unnormalize.rb REXML(DOM) 1234 --1234--

Text test™ REXML(Pull) 1234 --1234--

Text test™ REXML(Stream) 1234 --1234--

Text test™ REXML(SAX) 1234 --1234--

Text test™ ``` --- lib/rexml/parsers/streamparser.rb | 8 +- test/test_stream.rb | 141 +++++++++++++++++++++++++++++- 2 files changed, 147 insertions(+), 2 deletions(-) diff --git a/lib/rexml/parsers/streamparser.rb b/lib/rexml/parsers/streamparser.rb index e2da2a7d..7781fe44 100644 --- a/lib/rexml/parsers/streamparser.rb +++ b/lib/rexml/parsers/streamparser.rb @@ -7,12 +7,17 @@ class StreamParser def initialize source, listener @listener = listener @parser = BaseParser.new( source ) + @entities = {} end def add_listener( listener ) @parser.add_listener( listener ) end + def entity_expansion_count + @parser.entity_expansion_count + end + def parse # entity string while true @@ -28,7 +33,7 @@ def parse when :end_element @listener.tag_end( event[1] ) when :text - unnormalized = @parser.unnormalize( event[1] ) + unnormalized = @parser.unnormalize( event[1], @entities ) @listener.text( unnormalized ) when :processing_instruction @listener.instruction( *event[1,2] ) @@ -40,6 +45,7 @@ def parse when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl @listener.send( event[0].to_s, *event[1..-1] ) when :entitydecl, :notationdecl + @entities[ event[1] ] = event[2] if event.size == 3 @listener.send( event[0].to_s, event[1..-1] ) when :externalentity entity_reference = event[1] diff --git a/test/test_stream.rb b/test/test_stream.rb index 615d497f..782066c2 100644 --- a/test/test_stream.rb +++ b/test/test_stream.rb @@ -87,6 +87,42 @@ def entity(content) assert_equal(["ISOLat2"], listener.entities) end + + def test_entity_replacement + source = <<-XML + + + +]>&la;&lala; + XML + + listener = MyListener.new + class << listener + attr_accessor :text_values + def text(text) + @text_values << text + end + end + listener.text_values = [] + REXML::Document.parse_stream(source, listener) + assert_equal(["1234", "--1234--"], listener.text_values) + end + + def test_characters_predefined_entities + source = '<P> <I> <B> Text </B> </I>' + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + REXML::Document.parse_stream(source, listener) + assert_equal("

Text ", listener.text_value) + end end class EntityExpansionLimitTest < Test::Unit::TestCase @@ -100,6 +136,81 @@ def teardown REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit end + def test_have_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + assert_raise(RuntimeError.new("entity expansion has grown too large")) do + REXML::Document.parse_stream(source, MyListener.new) + end + end + + def test_empty_value + source = <<-XML + + + + + + +]> + +&a; + + XML + + listener = MyListener.new + REXML::Security.entity_expansion_limit = 100000 + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.parse + assert_equal(11111, parser.entity_expansion_count) + + REXML::Security.entity_expansion_limit = @default_entity_expansion_limit + parser = REXML::Parsers::StreamParser.new( source, listener ) + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + parser.parse + end + assert do + parser.entity_expansion_count > @default_entity_expansion_limit + end + end + + def test_with_default_entity + source = <<-XML + + + +]> + +&a; +&a2; +< + + XML + + listener = MyListener.new + REXML::Security.entity_expansion_limit = 4 + REXML::Document.parse_stream(source, listener) + + REXML::Security.entity_expansion_limit = 3 + assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do + REXML::Document.parse_stream(source, listener) + end + end + def test_with_only_default_entities member_value = "<p>#{'A' * @default_entity_expansion_text_limit}</p>" source = <<-XML @@ -117,14 +228,42 @@ def text(text) end end listener.text_value = "" - REXML::Document.parse_stream(source, listener) + parser = REXML::Parsers::StreamParser.new( source, listener ) + parser.parse expected_value = "

#{'A' * @default_entity_expansion_text_limit}

" assert_equal(expected_value, listener.text_value.strip) + assert_equal(0, parser.entity_expansion_count) assert do listener.text_value.bytesize > @default_entity_expansion_text_limit end end + + def test_entity_expansion_text_limit + source = <<-XML + + + + + +]> +&a; + XML + + listener = MyListener.new + class << listener + attr_accessor :text_value + def text(text) + @text_value << text + end + end + listener.text_value = "" + REXML::Security.entity_expansion_text_limit = 90 + REXML::Document.parse_stream(source, listener) + + assert_equal(90, listener.text_value.size) + end end # For test_listener