Skip to content

Commit

Permalink
improvement: Use existing match highlighter for search matches.
Browse files Browse the repository at this point in the history
  • Loading branch information
jimsynz committed Jan 30, 2025
1 parent f71fcd6 commit 5ff3096
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 58 deletions.
27 changes: 2 additions & 25 deletions assets/js/autocomplete/suggestions.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { getSidebarNodes } from '../globals'
import { escapeRegexModifiers, escapeHtmlEntities, isBlank } from '../helpers'
import { isBlank } from '../helpers'
import { highlightMatches } from '../highlighter'

/**
* @typedef Suggestion
Expand Down Expand Up @@ -285,27 +286,3 @@ function startsWith (text, subtext) {
function tokenize (query) {
return query.trim().split(/\s+/)
}

/**
* Returns an HTML string highlighting the individual tokens from the query string.
*/
function highlightMatches (text, query) {
// Sort terms length, so that the longest are highlighted first.
const terms = tokenize(query).sort((term1, term2) => term2.length - term1.length)
return highlightTerms(text, terms)
}

function highlightTerms (text, terms) {
if (terms.length === 0) return text

const [firstTerm, ...otherTerms] = terms
const match = text.match(new RegExp(`(.*)(${escapeRegexModifiers(firstTerm)})(.*)`, 'i'))

if (match) {
const [, before, matching, after] = match
// Note: this has exponential complexity, but we expect just a few terms, so that's fine.
return highlightTerms(before, terms) + '<em>' + escapeHtmlEntities(matching) + '</em>' + highlightTerms(after, terms)
} else {
return highlightTerms(text, otherTerms)
}
}
34 changes: 34 additions & 0 deletions assets/js/highlighter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import { escapeRegexModifiers, escapeHtmlEntities } from './helpers'

/**
* Returns an HTML string highlighting the individual tokens from the query string.
*/
export function highlightMatches (text, query, opts = {}) {
// Sort terms length, so that the longest are highlighted first.
if (typeof query === 'string') {
query = query.split(/\s+/)
}
const terms = query.sort((term1, term2) => term2.length - term1.length)
return highlightTerms(text, terms, opts)
}

function highlightTerms (text, terms, opts) {
if (terms.length === 0) return text

let flags = 'i'

if (opts.multiline) {
flags = 'is'
}

const [firstTerm, ...otherTerms] = terms
const match = text.match(new RegExp(`(.*)(${escapeRegexModifiers(firstTerm)})(.*)`, flags))

if (match) {
const [, before, matching, after] = match
// Note: this has exponential complexity, but we expect just a few terms, so that's fine.
return highlightTerms(before, terms, opts) + '<em>' + escapeHtmlEntities(matching) + '</em>' + highlightTerms(after, terms, opts)
} else {
return highlightTerms(text, otherTerms, opts)
}
}
47 changes: 24 additions & 23 deletions assets/js/search-page.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import { qs, escapeHtmlEntities, isBlank, getQueryParamByName, getProjectNameAnd
import { setSearchInputValue } from './search-bar'
import searchResultsTemplate from './handlebars/templates/search-results.handlebars'
import { getSearchNodes } from './globals'
import { highlightMatches } from './highlighter'

const EXCERPT_RADIUS = 80
const SEARCH_CONTAINER_SELECTOR = '#search'
Expand All @@ -24,7 +25,7 @@ lunr.Pipeline.registerFunction(docTrimmerFunction, 'docTrimmer')
window.addEventListener('swup:page:view', initialize)
initialize()

function initialize () {
function initialize() {
const pathname = window.location.pathname
if (pathname.endsWith('/search.html') || pathname.endsWith('/search')) {
const query = getQueryParamByName('q')
Expand All @@ -33,7 +34,7 @@ function initialize () {
}
}

async function search (value, queryType) {
async function search(value, queryType) {
if (isBlank(value)) {
renderResults({ value })
} else {
Expand All @@ -56,7 +57,7 @@ async function search (value, queryType) {
}
}

async function localSearch (value) {
async function localSearch(value) {
const index = await getIndex()

// We cannot match on atoms :foo because that would be considered
Expand All @@ -65,7 +66,7 @@ async function localSearch (value) {
return searchResultsToDecoratedSearchItems(index.search(fixedValue))
}

async function remoteSearch (value, queryType, searchNodes) {
async function remoteSearch(value, queryType, searchNodes) {
let filterNodes = searchNodes

if (queryType === 'latest') {
Expand All @@ -86,7 +87,7 @@ async function remoteSearch (value, queryType, searchNodes) {
return payload.hits.map(result => {
const [packageName, packageVersion] = result.document.package.split('-')

const doc = result.document.doc
const doc = highlightMatches(result.document.doc, value, { multiline: true })
const excerpts = [doc]
const metadata = {}
const ref = `https://hexdocs.pm/${packageName}/${packageVersion}/${result.document.ref}`
Expand All @@ -107,13 +108,13 @@ async function remoteSearch (value, queryType, searchNodes) {
}
}

function renderResults ({ value, results, errorMessage }) {
function renderResults({ value, results, errorMessage }) {
const searchContainer = qs(SEARCH_CONTAINER_SELECTOR)
const resultsHtml = searchResultsTemplate({ value, results, errorMessage })
searchContainer.innerHTML = resultsHtml
}

async function getIndex () {
async function getIndex() {
const cachedIndex = await loadIndex()
if (cachedIndex) { return cachedIndex }

Expand All @@ -122,7 +123,7 @@ async function getIndex () {
return index
}

async function loadIndex () {
async function loadIndex() {
try {
const serializedIndex = sessionStorage.getItem(indexStorageKey())
if (serializedIndex) {
Expand All @@ -137,7 +138,7 @@ async function loadIndex () {
}
}

async function saveIndex (index) {
async function saveIndex(index) {
try {
const serializedIndex = await compress(index)
sessionStorage.setItem(indexStorageKey(), serializedIndex)
Expand All @@ -146,7 +147,7 @@ async function saveIndex (index) {
}
}

async function compress (index) {
async function compress(index) {
const stream = new Blob([JSON.stringify(index)], {
type: 'application/json'
}).stream().pipeThrough(new window.CompressionStream('gzip'))
Expand All @@ -156,7 +157,7 @@ async function compress (index) {
return b64encode(buffer)
}

async function decompress (index) {
async function decompress(index) {
const stream = new Blob([b64decode(index)], {
type: 'application/json'
}).stream().pipeThrough(new window.DecompressionStream('gzip'))
Expand All @@ -165,7 +166,7 @@ async function decompress (index) {
return JSON.parse(blob)
}

function b64encode (buffer) {
function b64encode(buffer) {
let binary = ''
const bytes = new Uint8Array(buffer)
const len = bytes.byteLength
Expand All @@ -175,7 +176,7 @@ function b64encode (buffer) {
return window.btoa(binary)
}

function b64decode (str) {
function b64decode(str) {
const binaryString = window.atob(str)
const len = binaryString.length
const bytes = new Uint8Array(new ArrayBuffer(len))
Expand All @@ -185,11 +186,11 @@ function b64decode (str) {
return bytes
}

function indexStorageKey () {
function indexStorageKey() {
return `idv5:${getProjectNameAndVersion()}`
}

function createIndex () {
function createIndex() {
return lunr(function () {
this.ref('ref')
this.field('title', { boost: 3 })
Expand All @@ -207,11 +208,11 @@ function createIndex () {
})
}

function docTokenSplitter (builder) {
function docTokenSplitter(builder) {
builder.pipeline.before(lunr.stemmer, docTokenFunction)
}

function docTokenFunction (token) {
function docTokenFunction(token) {
// If we have something with an arity, we split on : . to make partial
// matches easier. We split only when tokenizing, not when searching.
// Below we use ExDoc.Markdown.to_ast/2 as an example.
Expand Down Expand Up @@ -275,11 +276,11 @@ function docTokenFunction (token) {
return tokens
}

function docTrimmer (builder) {
function docTrimmer(builder) {
builder.pipeline.before(lunr.stemmer, docTrimmerFunction)
}

function docTrimmerFunction (token) {
function docTrimmerFunction(token) {
// Preserve @ and : at the beginning of tokens,
// and ? and ! at the end of tokens. It needs to
// be done before stemming, otherwise search and
Expand All @@ -289,7 +290,7 @@ function docTrimmerFunction (token) {
})
}

function searchResultsToDecoratedSearchItems (results) {
function searchResultsToDecoratedSearchItems(results) {
return results
// If the docs are regenerated without changing its version,
// a reference may have been doc'ed false in the code but
Expand All @@ -306,11 +307,11 @@ function searchResultsToDecoratedSearchItems (results) {
})
}

function getSearchItemByRef (ref) {
function getSearchItemByRef(ref) {
return searchData.items.find(searchItem => searchItem.ref === ref) || null
}

function getExcerpts (searchItem, metadata) {
function getExcerpts(searchItem, metadata) {
const { doc } = searchItem
const searchTerms = Object.keys(metadata)

Expand All @@ -331,7 +332,7 @@ function getExcerpts (searchItem, metadata) {
return excerpts.slice(0, 1)
}

function excerpt (doc, sliceStart, sliceLength) {
function excerpt(doc, sliceStart, sliceLength) {
const startPos = Math.max(sliceStart - EXCERPT_RADIUS, 0)
const endPos = Math.min(sliceStart + sliceLength + EXCERPT_RADIUS, doc.length)
return [
Expand Down
Loading

0 comments on commit 5ff3096

Please # to comment.