From 3a0c307b7338f65e7b7c043ba0e3663e6b9b111b Mon Sep 17 00:00:00 2001 From: Stephen Checkoway Date: Thu, 23 Aug 2018 03:58:31 -0400 Subject: [PATCH] Rename :max_parse_errors to :max_errors --- CHANGELOG.md | 3 +++ README.md | 4 ++-- ext/nokogumbo/nokogumbo.c | 4 ++-- lib/nokogumbo.rb | 6 ++++-- test/test_encoding.rb | 2 +- test/test_nokogumbo.rb | 28 +++++++++++++++++----------- test/test_null.rb | 4 ++-- test/test_tree-construction.rb | 12 +----------- 8 files changed, 32 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d68ee31..e0f52c2d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,8 +12,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed - Integrated [Gumbo parser](https://github.com/google/gumbo-parser) into Nokogumbo. A system version will not be used. +- The undocumented (but publicly mentioned) `:max_parse_errors` renamed to `:max_errors`; + `:max_parse_errors` is deprecated and will go away ### Deprecated +- `:max_parse_errors`; use `:max_errors` ### Removed diff --git a/README.md b/README.md index 17060b3f..00aab3af 100644 --- a/README.md +++ b/README.md @@ -33,11 +33,11 @@ doc = Nokogiri::HTML5.get(uri) ## Error reporting Nokogumbo contains an experimental parse error reporting facility. By default, no parse errors are reported but this can be configured by passing the -`:max_parse_errors` option to `::parse` or `::fragment`. +`:max_errors` option to `::parse` or `::fragment`. ```ruby require 'nokogumbo' -doc = Nokogiri::HTML5.parse('Hi there!', max_parse_errors: 10) +doc = Nokogiri::HTML5.parse('Hi there!', max_errors: 10) doc.errors.each do |err| puts err end diff --git a/ext/nokogumbo/nokogumbo.c b/ext/nokogumbo/nokogumbo.c index 51ac4f3a..93cda617 100644 --- a/ext/nokogumbo/nokogumbo.c +++ b/ext/nokogumbo/nokogumbo.c @@ -242,9 +242,9 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) { } // Parse a string using gumbo_parse into a Nokogiri document -static VALUE parse(VALUE self, VALUE string, VALUE max_parse_errors) { +static VALUE parse(VALUE self, VALUE string, VALUE max_errors) { GumboOptions options = kGumboDefaultOptions; - options.max_errors = NUM2INT(max_parse_errors); + options.max_errors = NUM2INT(max_errors); const char *input = RSTRING_PTR(string); size_t input_len = RSTRING_LEN(string); diff --git a/lib/nokogumbo.rb b/lib/nokogumbo.rb index a88e4460..1ab492ad 100644 --- a/lib/nokogumbo.rb +++ b/lib/nokogumbo.rb @@ -14,7 +14,8 @@ module HTML5 # may also be an IO-like object. Returns a +Nokogiri::HTML::Document+. def self.parse(string, options={}) string = read_and_encode(string) - document = Nokogumbo.parse(string.to_s, options[:max_parse_errors] || 0) + max_errors = options[:max_errors] || options[:max_parse_errors] || 0 + document = Nokogumbo.parse(string.to_s, max_errors) document.encoding = 'UTF-8' document end @@ -95,7 +96,8 @@ def self.fragment(tags, options = {}) else path = "/html/body/node()" end - temp_doc = Nokogumbo.parse("#{tags}", options[:max_parse_errors] || 0) + max_errors = options[:max_errors] || options[:max_parse_errors] || 0 + temp_doc = Nokogumbo.parse("#{tags}", max_errors) temp_doc.xpath(path).each { |child| child.parent = frag } frag.errors = temp_doc.errors frag diff --git a/test/test_encoding.rb b/test/test_encoding.rb index 13fd565a..b6555372 100644 --- a/test/test_encoding.rb +++ b/test/test_encoding.rb @@ -44,7 +44,7 @@ def test_charset_sniff_to_html EOF - doc = Nokogiri::HTML5(html, max_parse_errors: 10) + doc = Nokogiri::HTML5(html, max_errors: 10) assert_equal 0, doc.errors.length refute_equal '', doc.to_html end diff --git a/test/test_nokogumbo.rb b/test/test_nokogumbo.rb index cd8038b5..d7e29a13 100644 --- a/test/test_nokogumbo.rb +++ b/test/test_nokogumbo.rb @@ -51,7 +51,13 @@ def test_html5_doctype end def test_fragment_no_errors - doc = Nokogiri::HTML5.fragment("no missing DOCTYPE errors", max_parse_errors: 10) + doc = Nokogiri::HTML5.fragment("no missing DOCTYPE errors", max_errors: 10) + assert_equal 0, doc.errors.length + end + + # This should be deleted when `:max_parse_errors` is removed. + def test_fragment_max_parse_errors + doc = Nokogiri::HTML5.fragment("testing deprecated :max_parse_errors", max_parse_errors: 10) assert_equal 0, doc.errors.length end @@ -99,28 +105,28 @@ def test_root_comments end def test_parse_errors - doc = Nokogiri::HTML5("", max_parse_errors: 10) + doc = Nokogiri::HTML5("", max_errors: 10) assert_equal doc.errors.length, 2 - doc = Nokogiri::HTML5("", max_parse_errors: 10) + doc = Nokogiri::HTML5("", max_errors: 10) assert_empty doc.errors end - def test_max_parse_errors + def test_max_errors # This document contains 2 parse errors, but we force limit to 1. - doc = Nokogiri::HTML5("", max_parse_errors: 1) + doc = Nokogiri::HTML5("", max_errors: 1) assert_equal 1, doc.errors.length - doc = Nokogiri::HTML5("", max_parse_errors: 1) + doc = Nokogiri::HTML5("", max_errors: 1) assert_empty doc.errors end - def test_default_max_parse_errors + def test_default_max_errors # This document contains 200 parse errors, but default limit is 0. doc = Nokogiri::HTML5("" + "

" * 200) assert_equal 0, doc.errors.length end def test_parse_fragment_errors - doc = Nokogiri::HTML5.fragment("<\r\n", max_parse_errors: 10) + doc = Nokogiri::HTML5.fragment("<\r\n", max_errors: 10) refute_empty doc.errors end @@ -140,13 +146,13 @@ def test_document_encoding assert_equal "Кирилические символы", doc.at('body').text.gsub(/\n\s+/,'') end - def test_fragment_max_parse_errors + def test_fragment_max_errors # This fragment contains 3 parse errors, but we force limit to 1. - doc = Nokogiri::HTML5.fragment("", max_parse_errors: 1) + doc = Nokogiri::HTML5.fragment("", max_errors: 1) assert_equal 1, doc.errors.length end - def test_fragment_default_max_parse_errors + def test_fragment_default_max_errors # This fragment contains 201 parse errors, but default limit is 0. doc = Nokogiri::HTML5.fragment("

" * 200) assert_equal 0, doc.errors.length diff --git a/test/test_null.rb b/test/test_null.rb index 0c9f4c01..bd60986e 100644 --- a/test/test_null.rb +++ b/test/test_null.rb @@ -4,7 +4,7 @@ class TestNull < Minitest::Test def fragment(s) - Nokogiri::HTML5.fragment(s, max_parse_errors: 10) + Nokogiri::HTML5.fragment(s, max_errors: 10) end def test_null_char_ref @@ -83,7 +83,7 @@ def test_comment_state def test_doctype_name_states # There are two missing here for double quoted PUBLIC and SYSTEM values. doc = Nokogiri::HTML5.parse("", - max_parse_errors: 10) + max_errors: 10) # 12.2.5.54 Before DOCTYPE name state: unexpected-null-character parse # error # 12.2.5.55 DOCTYPE name state: unexpected-null-character parse error diff --git a/test/test_tree-construction.rb b/test/test_tree-construction.rb index a89f99bf..0468173d 100644 --- a/test/test_tree-construction.rb +++ b/test/test_tree-construction.rb @@ -2,16 +2,6 @@ require 'nokogumbo' require 'minitest/autorun' -# class TestTreeConstructionBase < Minitest::Test -# def fragment(s) -# Nokogiri::HTML5.fragment(s, context, max_parse_errors: 100) -# end -# -# def parse(s) -# Nokogiri::HTML5.parse(s, max_parse_errors: 100) -# end -# end - def parse_test(test_data) test = { script: :both } #index = test_data.start_with?("#errors\n") ? 0 : test_data.index("\n#errors\n") @@ -185,7 +175,7 @@ def compare_nodes(node, ng_node) def run_test skip "Scripting tests not supported" if @test[:script] == :on skip "Fragment tests not supported" unless @test[:context].nil? - doc = Nokogiri::HTML5.parse(@test[:data], max_parse_errors: @test[:errors].length + 1) + doc = Nokogiri::HTML5.parse(@test[:data], max_errors: @test[:errors].length + 1) # assert_equal doc.errors.length, @test[:errors].length # Walk the tree.