From 8d2725c0387d22d64ad5e86fdaa50958cf9e9079 Mon Sep 17 00:00:00 2001
From: Trinity Quirk <tquirk@ymb.net>
Date: Sat, 6 Jan 2024 11:03:49 -0800
Subject: [PATCH 1/3] Split document H1 sniffing into its own method

We may want to add more methods to sniff out various parts of the
document.  Stuffing them all inline is going to make the `sniff`
method into a pretty big mess, so we'll split out the existing stuff
into its own method.  We can add more methods to do other kinds of
content sniffing as needed.
---
 lib/ronn/document.rb | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/lib/ronn/document.rb b/lib/ronn/document.rb
index b554c0a..25ff1fe 100644
--- a/lib/ronn/document.rb
+++ b/lib/ronn/document.rb
@@ -214,8 +214,14 @@ def sniff
       html = Kramdown::Document.new(data[0, 512], auto_ids: false,
         smart_quotes: ['apos', 'apos', 'quot', 'quot'],
         typographic_symbols: { hellip: '...', ndash: '--', mdash: '--' }).to_html
+      sniff_h1_heading(html) or [nil, nil, nil]
+    end
+
+    # If the document has a top-level '# <data>' type heading, see
+    # what kind of metadata we can sniff out of it.
+    def sniff_h1_heading(html)
       heading, html = html.split("</h1>\n", 2)
-      return [nil, nil, nil] if html.nil?
+      return if html.nil?
 
       case heading
       when /([\w_.\[\]~+=@:-]+)\s*\((\d\w*)\)\s*-+\s*(.*)/

From 1c085db8dc16da02c28f1dcd384831c2b633cdcc Mon Sep 17 00:00:00 2001
From: Trinity Quirk <tquirk@ymb.net>
Date: Sat, 6 Jan 2024 11:06:36 -0800
Subject: [PATCH 2/3] Sniff H2 sections when ingesting documents

Documents may already have NAME sections which already have name and
tagline, so we should try that before looking at the H1 header.

The sections may use the hyphenated form, which we can split up as
name and tagline, or they may just have the name of the page.  We can
handle both.
---
 lib/ronn/document.rb       | 20 +++++++++++++++++++-
 test/test_ronn_document.rb | 12 ++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/lib/ronn/document.rb b/lib/ronn/document.rb
index 25ff1fe..b549128 100644
--- a/lib/ronn/document.rb
+++ b/lib/ronn/document.rb
@@ -214,7 +214,25 @@ def sniff
       html = Kramdown::Document.new(data[0, 512], auto_ids: false,
         smart_quotes: ['apos', 'apos', 'quot', 'quot'],
         typographic_symbols: { hellip: '...', ndash: '--', mdash: '--' }).to_html
-      sniff_h1_heading(html) or [nil, nil, nil]
+      sniff_h2_headings(html) or sniff_h1_heading(html) or [nil, nil, nil]
+    end
+
+    # If the document has a '## NAME' heading, see if we can sniff out
+    # some of the document metadata.
+    def sniff_h2_headings(html)
+      html.split('<h2>').each do |section|
+        case section
+        when /^NAME<\/h2>\s*<p>([\w_.\/\[\]~+=@:<>-]+)\s+-+\s+([\w_.\/\[\]~+=@: -]*)<\/p>/m
+          # name -- description
+          description = $2
+          name = $1.gsub(/<[^>]+>/, '')
+          return [name, nil, description]
+        when /^NAME<\/h2>\s*<p>([\w_.\/\[\]~+=@:<>-]+)<\/p>/m
+          # name
+          return [$1.gsub(/<[^>]+>/, ''), nil, nil]
+        end
+      end
+      nil
     end
 
     # If the document has a top-level '# <data>' type heading, see
diff --git a/test/test_ronn_document.rb b/test/test_ronn_document.rb
index cd573fe..56a8b85 100644
--- a/test/test_ronn_document.rb
+++ b/test/test_ronn_document.rb
@@ -74,6 +74,18 @@ def canonicalize(text)
       assert_equal '5', doc.section
       assert_equal 'wootderitis', doc.tagline
     end
+
+    test "new with NAME heading with #{i} dashes and description" do
+      doc = Ronn::Document.new { "# whatever\n\n## NAME\n\n`foo` #{dashes} bar" }
+      assert_equal 'foo', doc.name
+      assert_equal 'bar', doc.tagline
+    end
+  end
+
+  test 'new with NAME heading without description' do
+    doc = Ronn::Document.new { "# whatever\n\n## NAME\n\n`foo`" }
+    assert_equal 'foo', doc.name
+    assert_equal nil, doc.tagline
   end
 
   context 'simple conventionally named document' do

From 239744980a9a919c2dc348d74c31edca7f15f090 Mon Sep 17 00:00:00 2001
From: Trinity Quirk <tquirk@ymb.net>
Date: Sat, 6 Jan 2024 13:58:49 -0800
Subject: [PATCH 3/3] Ensure there is only one NAME section

Some documents may include a NAME section in the original text.  We'll
make sure to check before we add one of our own; if there's already
one, we can skip adding a new one.
---
 lib/ronn/document.rb            | 2 +-
 test/existing_name_section.ronn | 9 +++++++++
 test/test_ronn_document.rb      | 5 +++++
 3 files changed, 15 insertions(+), 1 deletion(-)
 create mode 100644 test/existing_name_section.ronn

diff --git a/lib/ronn/document.rb b/lib/ronn/document.rb
index b549128..7da173d 100644
--- a/lib/ronn/document.rb
+++ b/lib/ronn/document.rb
@@ -460,7 +460,7 @@ def html_filter_inject_name_section
       markup =
         if title?
           "<h1>#{title}</h1>"
-        elsif name
+        elsif name && !@html.css('h2').map(&:text).include?('NAME')
           "<h2>NAME</h2>\n" \
           "<p class='man-name'>\n  <code>#{name}</code>" +
             (tagline ? " - <span class='man-whatis'>#{tagline}</span>\n" : "\n") +
diff --git a/test/existing_name_section.ronn b/test/existing_name_section.ronn
new file mode 100644
index 0000000..f100833
--- /dev/null
+++ b/test/existing_name_section.ronn
@@ -0,0 +1,9 @@
+# Test #
+
+## NAME ##
+
+test - the test manpage
+
+## DESCRIPTION ##
+
+Testing items.  w00t!
diff --git a/test/test_ronn_document.rb b/test/test_ronn_document.rb
index 56a8b85..57cf9d2 100644
--- a/test/test_ronn_document.rb
+++ b/test/test_ronn_document.rb
@@ -200,4 +200,9 @@ def canonicalize(text)
     @doc = Ronn::Document.new('hello.1.ronn', styles: %w[test boom test]) { '' }
     assert_equal %w[man test boom], @doc.styles
   end
+
+  test 'NAME section is not duplicated' do
+    html = Ronn::Document.new(File.expand_path('existing_name_section.ronn', __dir__)).to_html
+    assert html.scan(/<h2[^>]*>NAME<\/h2>/).length == 1
+  end
 end