improvement: Use existing match highlighter for search matches.

elixir-lang · Jan 30, 2025 · 5ff3096 · 5ff3096
1 parent f71fcd6
commit 5ff3096
Show file tree

Hide file tree

Showing 4 changed files with 70 additions and 58 deletions.
diff --git a/assets/js/autocomplete/suggestions.js b/assets/js/autocomplete/suggestions.js
@@ -1,5 +1,6 @@
 import { getSidebarNodes } from '../globals'
-import { escapeRegexModifiers, escapeHtmlEntities, isBlank } from '../helpers'
+import { isBlank } from '../helpers'
+import { highlightMatches } from '../highlighter'
 
 /**
  * @typedef Suggestion
@@ -285,27 +286,3 @@ function startsWith (text, subtext) {
 function tokenize (query) {
   return query.trim().split(/\s+/)
 }
-
-/**
- * Returns an HTML string highlighting the individual tokens from the query string.
- */
-function highlightMatches (text, query) {
-  // Sort terms length, so that the longest are highlighted first.
-  const terms = tokenize(query).sort((term1, term2) => term2.length - term1.length)
-  return highlightTerms(text, terms)
-}
-
-function highlightTerms (text, terms) {
-  if (terms.length === 0) return text
-
-  const [firstTerm, ...otherTerms] = terms
-  const match = text.match(new RegExp(`(.*)(${escapeRegexModifiers(firstTerm)})(.*)`, 'i'))
-
-  if (match) {
-    const [, before, matching, after] = match
-    // Note: this has exponential complexity, but we expect just a few terms, so that's fine.
-    return highlightTerms(before, terms) + '<em>' + escapeHtmlEntities(matching) + '</em>' + highlightTerms(after, terms)
-  } else {
-    return highlightTerms(text, otherTerms)
-  }
-}
diff --git a/assets/js/highlighter.js b/assets/js/highlighter.js
@@ -0,0 +1,34 @@
+import { escapeRegexModifiers, escapeHtmlEntities } from './helpers'
+
+/**
+ * Returns an HTML string highlighting the individual tokens from the query string.
+ */
+export function highlightMatches (text, query, opts = {}) {
+  // Sort terms length, so that the longest are highlighted first.
+  if (typeof query === 'string') {
+    query = query.split(/\s+/)
+  }
+  const terms = query.sort((term1, term2) => term2.length - term1.length)
+  return highlightTerms(text, terms, opts)
+}
+
+function highlightTerms (text, terms, opts) {
+  if (terms.length === 0) return text
+
+  let flags = 'i'
+
+  if (opts.multiline) {
+    flags = 'is'
+  }
+
+  const [firstTerm, ...otherTerms] = terms
+  const match = text.match(new RegExp(`(.*)(${escapeRegexModifiers(firstTerm)})(.*)`, flags))
+
+  if (match) {
+    const [, before, matching, after] = match
+    // Note: this has exponential complexity, but we expect just a few terms, so that's fine.
+    return highlightTerms(before, terms, opts) + '<em>' + escapeHtmlEntities(matching) + '</em>' + highlightTerms(after, terms, opts)
+  } else {
+    return highlightTerms(text, otherTerms, opts)
+  }
+}
diff --git a/assets/js/search-page.js b/assets/js/search-page.js
@@ -5,6 +5,7 @@ import { qs, escapeHtmlEntities, isBlank, getQueryParamByName, getProjectNameAnd
 import { setSearchInputValue } from './search-bar'
 import searchResultsTemplate from './handlebars/templates/search-results.handlebars'
 import { getSearchNodes } from './globals'
+import { highlightMatches } from './highlighter'
 
 const EXCERPT_RADIUS = 80
 const SEARCH_CONTAINER_SELECTOR = '#search'
@@ -24,7 +25,7 @@ lunr.Pipeline.registerFunction(docTrimmerFunction, 'docTrimmer')
 window.addEventListener('swup:page:view', initialize)
 initialize()
 
-function initialize () {
+function initialize() {
   const pathname = window.location.pathname
   if (pathname.endsWith('/search.html') || pathname.endsWith('/search')) {
     const query = getQueryParamByName('q')
@@ -33,7 +34,7 @@ function initialize () {
   }
 }
 
-async function search (value, queryType) {
+async function search(value, queryType) {
   if (isBlank(value)) {
     renderResults({ value })
   } else {
@@ -56,7 +57,7 @@ async function search (value, queryType) {
   }
 }
 
-async function localSearch (value) {
+async function localSearch(value) {
   const index = await getIndex()
 
   // We cannot match on atoms :foo because that would be considered
@@ -65,7 +66,7 @@ async function localSearch (value) {
   return searchResultsToDecoratedSearchItems(index.search(fixedValue))
 }
 
-async function remoteSearch (value, queryType, searchNodes) {
+async function remoteSearch(value, queryType, searchNodes) {
   let filterNodes = searchNodes
 
   if (queryType === 'latest') {
@@ -86,7 +87,7 @@ async function remoteSearch (value, queryType, searchNodes) {
     return payload.hits.map(result => {
       const [packageName, packageVersion] = result.document.package.split('-')
 
-      const doc = result.document.doc
+      const doc = highlightMatches(result.document.doc, value, { multiline: true })
       const excerpts = [doc]
       const metadata = {}
       const ref = `https://hexdocs.pm/${packageName}/${packageVersion}/${result.document.ref}`
@@ -107,13 +108,13 @@ async function remoteSearch (value, queryType, searchNodes) {
   }
 }
 
-function renderResults ({ value, results, errorMessage }) {
+function renderResults({ value, results, errorMessage }) {
   const searchContainer = qs(SEARCH_CONTAINER_SELECTOR)
   const resultsHtml = searchResultsTemplate({ value, results, errorMessage })
   searchContainer.innerHTML = resultsHtml
 }
 
-async function getIndex () {
+async function getIndex() {
   const cachedIndex = await loadIndex()
   if (cachedIndex) { return cachedIndex }
 
@@ -122,7 +123,7 @@ async function getIndex () {
   return index
 }
 
-async function loadIndex () {
+async function loadIndex() {
   try {
     const serializedIndex = sessionStorage.getItem(indexStorageKey())
     if (serializedIndex) {
@@ -137,7 +138,7 @@ async function loadIndex () {
   }
 }
 
-async function saveIndex (index) {
+async function saveIndex(index) {
   try {
     const serializedIndex = await compress(index)
     sessionStorage.setItem(indexStorageKey(), serializedIndex)
@@ -146,7 +147,7 @@ async function saveIndex (index) {
   }
 }
 
-async function compress (index) {
+async function compress(index) {
   const stream = new Blob([JSON.stringify(index)], {
     type: 'application/json'
   }).stream().pipeThrough(new window.CompressionStream('gzip'))
@@ -156,7 +157,7 @@ async function compress (index) {
   return b64encode(buffer)
 }
 
-async function decompress (index) {
+async function decompress(index) {
   const stream = new Blob([b64decode(index)], {
     type: 'application/json'
   }).stream().pipeThrough(new window.DecompressionStream('gzip'))
@@ -165,7 +166,7 @@ async function decompress (index) {
   return JSON.parse(blob)
 }
 
-function b64encode (buffer) {
+function b64encode(buffer) {
   let binary = ''
   const bytes = new Uint8Array(buffer)
   const len = bytes.byteLength
@@ -175,7 +176,7 @@ function b64encode (buffer) {
   return window.btoa(binary)
 }
 
-function b64decode (str) {
+function b64decode(str) {
   const binaryString = window.atob(str)
   const len = binaryString.length
   const bytes = new Uint8Array(new ArrayBuffer(len))
@@ -185,11 +186,11 @@ function b64decode (str) {
   return bytes
 }
 
-function indexStorageKey () {
+function indexStorageKey() {
   return `idv5:${getProjectNameAndVersion()}`
 }
 
-function createIndex () {
+function createIndex() {
   return lunr(function () {
     this.ref('ref')
     this.field('title', { boost: 3 })
@@ -207,11 +208,11 @@ function createIndex () {
   })
 }
 
-function docTokenSplitter (builder) {
+function docTokenSplitter(builder) {
   builder.pipeline.before(lunr.stemmer, docTokenFunction)
 }
 
-function docTokenFunction (token) {
+function docTokenFunction(token) {
   // If we have something with an arity, we split on : . to make partial
   // matches easier. We split only when tokenizing, not when searching.
   // Below we use ExDoc.Markdown.to_ast/2 as an example.
@@ -275,11 +276,11 @@ function docTokenFunction (token) {
   return tokens
 }
 
-function docTrimmer (builder) {
+function docTrimmer(builder) {
   builder.pipeline.before(lunr.stemmer, docTrimmerFunction)
 }
 
-function docTrimmerFunction (token) {
+function docTrimmerFunction(token) {
   // Preserve @ and : at the beginning of tokens,
   // and ? and ! at the end of tokens. It needs to
   // be done before stemming, otherwise search and
@@ -289,7 +290,7 @@ function docTrimmerFunction (token) {
   })
 }
 
-function searchResultsToDecoratedSearchItems (results) {
+function searchResultsToDecoratedSearchItems(results) {
   return results
     // If the docs are regenerated without changing its version,
     // a reference may have been doc'ed false in the code but
@@ -306,11 +307,11 @@ function searchResultsToDecoratedSearchItems (results) {
     })
 }
 
-function getSearchItemByRef (ref) {
+function getSearchItemByRef(ref) {
   return searchData.items.find(searchItem => searchItem.ref === ref) || null
 }
 
-function getExcerpts (searchItem, metadata) {
+function getExcerpts(searchItem, metadata) {
   const { doc } = searchItem
   const searchTerms = Object.keys(metadata)
 
@@ -331,7 +332,7 @@ function getExcerpts (searchItem, metadata) {
   return excerpts.slice(0, 1)
 }
 
-function excerpt (doc, sliceStart, sliceLength) {
+function excerpt(doc, sliceStart, sliceLength) {
   const startPos = Math.max(sliceStart - EXCERPT_RADIUS, 0)
   const endPos = Math.min(sliceStart + sliceLength + EXCERPT_RADIUS, doc.length)
   return [