From ab4962c7dd5ce30be7220d52d71fddf73f9205e1 Mon Sep 17 00:00:00 2001 From: TSUYUSATO Kitsune Date: Thu, 30 May 2024 21:45:25 +0900 Subject: [PATCH] Optimize implementation --- .../css_selector/adapters/rexml_adapter.rb | 10 ++++++---- lib/rexml/css_selector/base_adapter.rb | 8 ++++++-- tool/bench.rb | 18 +++++++++++------- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/lib/rexml/css_selector/adapters/rexml_adapter.rb b/lib/rexml/css_selector/adapters/rexml_adapter.rb index 2c47de8..6376f4e 100644 --- a/lib/rexml/css_selector/adapters/rexml_adapter.rb +++ b/lib/rexml/css_selector/adapters/rexml_adapter.rb @@ -35,7 +35,10 @@ def get_attribute(element, name, namespace = nil, attribute_name_case = :sensiti case attribute_name_case in :sensitive - element.attribute(name, namespace)&.value + attrs = element.attributes + # NOTE: `REXML::Element.attribute` is too slow to use. + # Therefore, we call `REXML::Attributes#[]` instead. + namespace.nil? ? attrs[name] : attrs.get_attribute_ns(namespace, name)&.value in :insensitive name = name.downcase(:ascii) target_attr = nil @@ -65,9 +68,8 @@ def each_child_element(element, &) element.each_child { yield _1 if element?(_1) } end - def each_recursive_element(element, &) - element.each_recursive(&) - end + # NOTE: `REXML::Element#each_recursive` is too slow. + # Therefore, we use our default implementation instead. # INSTANCE is the default instance. INSTANCE = new diff --git a/lib/rexml/css_selector/base_adapter.rb b/lib/rexml/css_selector/base_adapter.rb index 3def7dd..3f1cdb7 100644 --- a/lib/rexml/css_selector/base_adapter.rb +++ b/lib/rexml/css_selector/base_adapter.rb @@ -78,9 +78,13 @@ def get_id(element) # Enumerates the elements in +element+ def each_recursive_element(element, &) - each_child_element(element) do |child| + stack = [] + each_child_element(element) { stack.unshift _1 } + until stack.empty? + child = stack.pop yield child - each_recursive_element(child, &) + n = stack.size + each_child_element(child) { stack.insert n, _1 } end end end diff --git a/tool/bench.rb b/tool/bench.rb index 0abc2eb..165d5eb 100644 --- a/tool/bench.rb +++ b/tool/bench.rb @@ -10,6 +10,7 @@ filepath = Fixture.filepath("sizzle.html") selector = "h2, #qunit-fixture p" +bench_rexml_xpath = false n = 1000 opt = OptionParser.new @@ -17,30 +18,33 @@ opt.on("-f ") { filepath = _1 } opt.on("-s ") { selector = _1 } opt.on("-n ") { n = _1.to_i } -opt.on("-o ") { _1 } +opt.on("--bench-rexml-xpath") { bench_rexml_xpath = true } puts "==> Parse command-line options" opt.parse!(ARGV) puts <<~HERE - filepath: #{filepath.inspect} - selector: #{selector.inspect} - n: #{n} + filepath: #{filepath.inspect} + selector: #{selector.inspect} + n: #{n} + bench_rexml_xpath: #{bench_rexml_xpath} HERE puts "==> Load and parse a XML file" content = File.read(filepath) nokogiri_doc = Nokogiri.HTML(content) -selector_xpath = Nokogiri::CSS.xpath_for(selector).join(" | ") rexml_doc = REXML::Document.new(content) -puts " XPath: #{selector_xpath}" +if bench_rexml_xpath + selector_xpath = Nokogiri::CSS.xpath_for(selector).join(" | ") + puts " XPath: #{selector_xpath}" +end puts "==> Start a benchmark" Benchmark.bm do |x| x.report("Nokogiri ") { n.times { nokogiri_doc.css(selector) } } - x.report("REXML (XPath) ") { n.times { rexml_doc.get_elements(selector_xpath) } } + x.report("REXML (XPath) ") { n.times { rexml_doc.get_elements(selector_xpath) } } if bench_rexml_xpath x.report("REXML::CSSSelector") { n.times { REXML::CSSSelector.select_all(rexml_doc, selector) } } end