From 910e5a2b487cb5a30989884a39f9cad2cc499cfc Mon Sep 17 00:00:00 2001 From: Watson Date: Tue, 16 Jul 2024 11:36:05 +0900 Subject: [PATCH] Fix performance issue caused by using repeated `>` characters inside `` (#177) A `<` is treated as a string delimiter. In certain cases, if `<` is used in succession, read and match are repeated, which slows down the process. Therefore, the following is used to read ahead to a specific part of the string in advance. --- lib/rexml/parsers/baseparser.rb | 2 +- test/parse/test_comment.rb | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 47380f0d..5688c773 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -430,7 +430,7 @@ def pull_event #STDERR.puts "SOURCE BUFFER = #{source.buffer}, #{source.buffer.size}" raise REXML::ParseException.new("Malformed node", @source) unless md if md[0][0] == ?- - md = @source.match(/--(.*?)-->/um, true) + md = @source.match(/--(.*?)-->/um, true, term: Private::COMMENT_TERM) if md.nil? || /--|-\z/.match?(md[1]) raise REXML::ParseException.new("Malformed comment", @source) diff --git a/test/parse/test_comment.rb b/test/parse/test_comment.rb index 543d9ad8..50c765f5 100644 --- a/test/parse/test_comment.rb +++ b/test/parse/test_comment.rb @@ -128,5 +128,12 @@ def test_gt_linear_performance REXML::Document.new('') end end + + def test_gt_linear_performance_in_element + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new('') + end + end end end