From 8d2725c0387d22d64ad5e86fdaa50958cf9e9079 Mon Sep 17 00:00:00 2001 From: Trinity Quirk Date: Sat, 6 Jan 2024 11:03:49 -0800 Subject: [PATCH 1/3] Split document H1 sniffing into its own method We may want to add more methods to sniff out various parts of the document. Stuffing them all inline is going to make the `sniff` method into a pretty big mess, so we'll split out the existing stuff into its own method. We can add more methods to do other kinds of content sniffing as needed. --- lib/ronn/document.rb | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/ronn/document.rb b/lib/ronn/document.rb index b554c0a..25ff1fe 100644 --- a/lib/ronn/document.rb +++ b/lib/ronn/document.rb @@ -214,8 +214,14 @@ def sniff html = Kramdown::Document.new(data[0, 512], auto_ids: false, smart_quotes: ['apos', 'apos', 'quot', 'quot'], typographic_symbols: { hellip: '...', ndash: '--', mdash: '--' }).to_html + sniff_h1_heading(html) or [nil, nil, nil] + end + + # If the document has a top-level '# ' type heading, see + # what kind of metadata we can sniff out of it. + def sniff_h1_heading(html) heading, html = html.split("\n", 2) - return [nil, nil, nil] if html.nil? + return if html.nil? case heading when /([\w_.\[\]~+=@:-]+)\s*\((\d\w*)\)\s*-+\s*(.*)/ From 1c085db8dc16da02c28f1dcd384831c2b633cdcc Mon Sep 17 00:00:00 2001 From: Trinity Quirk Date: Sat, 6 Jan 2024 11:06:36 -0800 Subject: [PATCH 2/3] Sniff H2 sections when ingesting documents Documents may already have NAME sections which already have name and tagline, so we should try that before looking at the H1 header. The sections may use the hyphenated form, which we can split up as name and tagline, or they may just have the name of the page. We can handle both. --- lib/ronn/document.rb | 20 +++++++++++++++++++- test/test_ronn_document.rb | 12 ++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/lib/ronn/document.rb b/lib/ronn/document.rb index 25ff1fe..b549128 100644 --- a/lib/ronn/document.rb +++ b/lib/ronn/document.rb @@ -214,7 +214,25 @@ def sniff html = Kramdown::Document.new(data[0, 512], auto_ids: false, smart_quotes: ['apos', 'apos', 'quot', 'quot'], typographic_symbols: { hellip: '...', ndash: '--', mdash: '--' }).to_html - sniff_h1_heading(html) or [nil, nil, nil] + sniff_h2_headings(html) or sniff_h1_heading(html) or [nil, nil, nil] + end + + # If the document has a '## NAME' heading, see if we can sniff out + # some of the document metadata. + def sniff_h2_headings(html) + html.split('

').each do |section| + case section + when /^NAME<\/h2>\s*

([\w_.\/\[\]~+=@:<>-]+)\s+-+\s+([\w_.\/\[\]~+=@: -]*)<\/p>/m + # name -- description + description = $2 + name = $1.gsub(/<[^>]+>/, '') + return [name, nil, description] + when /^NAME<\/h2>\s*

([\w_.\/\[\]~+=@:<>-]+)<\/p>/m + # name + return [$1.gsub(/<[^>]+>/, ''), nil, nil] + end + end + nil end # If the document has a top-level '# ' type heading, see diff --git a/test/test_ronn_document.rb b/test/test_ronn_document.rb index cd573fe..56a8b85 100644 --- a/test/test_ronn_document.rb +++ b/test/test_ronn_document.rb @@ -74,6 +74,18 @@ def canonicalize(text) assert_equal '5', doc.section assert_equal 'wootderitis', doc.tagline end + + test "new with NAME heading with #{i} dashes and description" do + doc = Ronn::Document.new { "# whatever\n\n## NAME\n\n`foo` #{dashes} bar" } + assert_equal 'foo', doc.name + assert_equal 'bar', doc.tagline + end + end + + test 'new with NAME heading without description' do + doc = Ronn::Document.new { "# whatever\n\n## NAME\n\n`foo`" } + assert_equal 'foo', doc.name + assert_equal nil, doc.tagline end context 'simple conventionally named document' do From 239744980a9a919c2dc348d74c31edca7f15f090 Mon Sep 17 00:00:00 2001 From: Trinity Quirk Date: Sat, 6 Jan 2024 13:58:49 -0800 Subject: [PATCH 3/3] Ensure there is only one NAME section Some documents may include a NAME section in the original text. We'll make sure to check before we add one of our own; if there's already one, we can skip adding a new one. --- lib/ronn/document.rb | 2 +- test/existing_name_section.ronn | 9 +++++++++ test/test_ronn_document.rb | 5 +++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 test/existing_name_section.ronn diff --git a/lib/ronn/document.rb b/lib/ronn/document.rb index b549128..7da173d 100644 --- a/lib/ronn/document.rb +++ b/lib/ronn/document.rb @@ -460,7 +460,7 @@ def html_filter_inject_name_section markup = if title? "

#{title}

" - elsif name + elsif name && !@html.css('h2').map(&:text).include?('NAME') "

NAME

\n" \ "

\n #{name}" + (tagline ? " - #{tagline}\n" : "\n") + diff --git a/test/existing_name_section.ronn b/test/existing_name_section.ronn new file mode 100644 index 0000000..f100833 --- /dev/null +++ b/test/existing_name_section.ronn @@ -0,0 +1,9 @@ +# Test # + +## NAME ## + +test - the test manpage + +## DESCRIPTION ## + +Testing items. w00t! diff --git a/test/test_ronn_document.rb b/test/test_ronn_document.rb index 56a8b85..57cf9d2 100644 --- a/test/test_ronn_document.rb +++ b/test/test_ronn_document.rb @@ -200,4 +200,9 @@ def canonicalize(text) @doc = Ronn::Document.new('hello.1.ronn', styles: %w[test boom test]) { '' } assert_equal %w[man test boom], @doc.styles end + + test 'NAME section is not duplicated' do + html = Ronn::Document.new(File.expand_path('existing_name_section.ronn', __dir__)).to_html + assert html.scan(/]*>NAME<\/h2>/).length == 1 + end end