From d938565839e5c59576b45ccb6e8caa40baa7129d Mon Sep 17 00:00:00 2001 From: inv-hareesh Date: Fri, 7 Feb 2025 08:59:36 +0530 Subject: [PATCH 1/4] =?UTF-8?q?Fix=20search=20issue=20for=20words=20inside?= =?UTF-8?q?=20Guillemets=20(=C2=AB=20=C2=BB)=20without=20spaces?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/Search/SearchIndex.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index c7d9d6502e2..e10219e2d2f 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -16,7 +16,7 @@ class SearchIndex /** * A list of delimiter characters used to break-up parsed content into terms for indexing. */ - public static string $delimiters = " \n\t.,!?:;()[]{}<>`'\""; + public static string $delimiters = " \n\t.,!?:;()[]{}<>`'\"«»"; public function __construct( protected EntityProvider $entityProvider From 45a15b479294349638bbe334f8701f077aa899c4 Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 14 Feb 2025 13:24:39 +0000 Subject: [PATCH 2/4] Searching: Split out search tests into their own dir --- tests/{Entity => Search}/EntitySearchTest.php | 181 +----------------- tests/Search/SearchIndexingTest.php | 77 ++++++++ .../{Entity => Search}/SearchOptionsTest.php | 2 +- tests/Search/SiblingSearchTest.php | 117 +++++++++++ 4 files changed, 196 insertions(+), 181 deletions(-) rename tests/{Entity => Search}/EntitySearchTest.php (74%) create mode 100644 tests/Search/SearchIndexingTest.php rename tests/{Entity => Search}/SearchOptionsTest.php (99%) create mode 100644 tests/Search/SiblingSearchTest.php diff --git a/tests/Entity/EntitySearchTest.php b/tests/Search/EntitySearchTest.php similarity index 74% rename from tests/Entity/EntitySearchTest.php rename to tests/Search/EntitySearchTest.php index 5ace70e3ab2..9c76d0f7136 100644 --- a/tests/Entity/EntitySearchTest.php +++ b/tests/Search/EntitySearchTest.php @@ -1,12 +1,9 @@ assertDontSee($templatePage->name); } - public function test_sibling_search_for_pages() - { - $chapter = $this->entities->chapterHasPages(); - $this->assertGreaterThan(2, count($chapter->pages), 'Ensure we\'re testing with at least 1 sibling'); - $page = $chapter->pages->first(); - - $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$page->id}&entity_type=page"); - $search->assertSuccessful(); - foreach ($chapter->pages as $page) { - $search->assertSee($page->name); - } - - $search->assertDontSee($chapter->name); - } - - public function test_sibling_search_for_pages_without_chapter() - { - $page = $this->entities->pageNotWithinChapter(); - $bookChildren = $page->book->getDirectVisibleChildren(); - $this->assertGreaterThan(2, count($bookChildren), 'Ensure we\'re testing with at least 1 sibling'); - - $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$page->id}&entity_type=page"); - $search->assertSuccessful(); - foreach ($bookChildren as $child) { - $search->assertSee($child->name); - } - - $search->assertDontSee($page->book->name); - } - - public function test_sibling_search_for_chapters() - { - $chapter = $this->entities->chapter(); - $bookChildren = $chapter->book->getDirectVisibleChildren(); - $this->assertGreaterThan(2, count($bookChildren), 'Ensure we\'re testing with at least 1 sibling'); - - $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$chapter->id}&entity_type=chapter"); - $search->assertSuccessful(); - foreach ($bookChildren as $child) { - $search->assertSee($child->name); - } - - $search->assertDontSee($chapter->book->name); - } - - public function test_sibling_search_for_books() - { - $books = Book::query()->take(10)->get(); - $book = $books->first(); - $this->assertGreaterThan(2, count($books), 'Ensure we\'re testing with at least 1 sibling'); - - $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$book->id}&entity_type=book"); - $search->assertSuccessful(); - foreach ($books as $expectedBook) { - $search->assertSee($expectedBook->name); - } - } - - public function test_sibling_search_for_shelves() - { - $shelves = Bookshelf::query()->take(10)->get(); - $shelf = $shelves->first(); - $this->assertGreaterThan(2, count($shelves), 'Ensure we\'re testing with at least 1 sibling'); - - $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$shelf->id}&entity_type=bookshelf"); - $search->assertSuccessful(); - foreach ($shelves as $expectedShelf) { - $search->assertSee($expectedShelf->name); - } - } - - public function test_sibling_search_for_books_provides_results_in_alphabetical_order() - { - $contextBook = $this->entities->book(); - $searchBook = $this->entities->book(); - - $searchBook->name = 'Zebras'; - $searchBook->save(); - - $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$contextBook->id}&entity_type=book"); - $this->withHtml($search)->assertElementNotContains('a:first-child', 'Zebras'); - - $searchBook->name = '1AAAAAAArdvarks'; - $searchBook->save(); - - $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$contextBook->id}&entity_type=book"); - $this->withHtml($search)->assertElementContains('a:first-child', '1AAAAAAArdvarks'); - } - - public function test_sibling_search_for_shelves_provides_results_in_alphabetical_order() - { - $contextShelf = $this->entities->shelf(); - $searchShelf = $this->entities->shelf(); - - $searchShelf->name = 'Zebras'; - $searchShelf->save(); - - $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$contextShelf->id}&entity_type=bookshelf"); - $this->withHtml($search)->assertElementNotContains('a:first-child', 'Zebras'); - - $searchShelf->name = '1AAAAAAArdvarks'; - $searchShelf->save(); - - $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$contextShelf->id}&entity_type=bookshelf"); - $this->withHtml($search)->assertElementContains('a:first-child', '1AAAAAAArdvarks'); - } - public function test_search_works_on_updated_page_content() { $page = $this->entities->page(); @@ -453,75 +343,6 @@ public function test_search_ranks_common_words_lower() $this->withHtml($search)->assertElementContains('.entity-list > .page:nth-child(2)', 'Test page A'); } - public function test_terms_in_headers_have_an_adjusted_index_score() - { - $page = $this->entities->newPage(['name' => 'Test page A', 'html' => ' -

TermA

-

TermB TermNested

-

TermC

-

TermD

-

TermE

-
TermF
-
TermG
- ']); - - $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); - - $this->assertEquals(1, $scoreByTerm->get('TermA')); - $this->assertEquals(10, $scoreByTerm->get('TermB')); - $this->assertEquals(10, $scoreByTerm->get('TermNested')); - $this->assertEquals(5, $scoreByTerm->get('TermC')); - $this->assertEquals(4, $scoreByTerm->get('TermD')); - $this->assertEquals(3, $scoreByTerm->get('TermE')); - $this->assertEquals(2, $scoreByTerm->get('TermF')); - // Is 1.5 but stored as integer, rounding up - $this->assertEquals(2, $scoreByTerm->get('TermG')); - } - - public function test_indexing_works_as_expected_for_page_with_lots_of_terms() - { - $this->markTestSkipped('Time consuming test'); - - $count = 100000; - $text = ''; - $chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_#'; - for ($i = 0; $i < $count; $i++) { - $text .= substr(str_shuffle($chars), 0, 5) . ' '; - } - - $page = $this->entities->newPage(['name' => 'Test page A', 'html' => '

' . $text . '

']); - - $termCount = $page->searchTerms()->count(); - - // Expect at least 90% unique rate - $this->assertGreaterThan($count * 0.9, $termCount); - } - - public function test_name_and_content_terms_are_merged_to_single_score() - { - $page = $this->entities->newPage(['name' => 'TermA', 'html' => ' -

TermA

- ']); - - $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); - - // Scores 40 for being in the name then 1 for being in the content - $this->assertEquals(41, $scoreByTerm->get('TermA')); - } - - public function test_tag_names_and_values_are_indexed_for_search() - { - $page = $this->entities->newPage(['name' => 'PageA', 'html' => '

content

', 'tags' => [ - ['name' => 'Animal', 'value' => 'MeowieCat'], - ['name' => 'SuperImportant'], - ]]); - - $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); - $this->assertEquals(5, $scoreByTerm->get('MeowieCat')); - $this->assertEquals(3, $scoreByTerm->get('Animal')); - $this->assertEquals(3, $scoreByTerm->get('SuperImportant')); - } - public function test_matching_terms_in_search_results_are_highlighted() { $this->entities->newPage(['name' => 'My Meowie Cat', 'html' => '

A superimportant page about meowieable animals

', 'tags' => [ diff --git a/tests/Search/SearchIndexingTest.php b/tests/Search/SearchIndexingTest.php new file mode 100644 index 00000000000..43219a4ed98 --- /dev/null +++ b/tests/Search/SearchIndexingTest.php @@ -0,0 +1,77 @@ +entities->newPage(['name' => 'Test page A', 'html' => ' +

TermA

+

TermB TermNested

+

TermC

+

TermD

+

TermE

+
TermF
+
TermG
+ ']); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + + $this->assertEquals(1, $scoreByTerm->get('TermA')); + $this->assertEquals(10, $scoreByTerm->get('TermB')); + $this->assertEquals(10, $scoreByTerm->get('TermNested')); + $this->assertEquals(5, $scoreByTerm->get('TermC')); + $this->assertEquals(4, $scoreByTerm->get('TermD')); + $this->assertEquals(3, $scoreByTerm->get('TermE')); + $this->assertEquals(2, $scoreByTerm->get('TermF')); + // Is 1.5 but stored as integer, rounding up + $this->assertEquals(2, $scoreByTerm->get('TermG')); + } + + public function test_indexing_works_as_expected_for_page_with_lots_of_terms() + { + $this->markTestSkipped('Time consuming test'); + + $count = 100000; + $text = ''; + $chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_#'; + for ($i = 0; $i < $count; $i++) { + $text .= substr(str_shuffle($chars), 0, 5) . ' '; + } + + $page = $this->entities->newPage(['name' => 'Test page A', 'html' => '

' . $text . '

']); + + $termCount = $page->searchTerms()->count(); + + // Expect at least 90% unique rate + $this->assertGreaterThan($count * 0.9, $termCount); + } + + public function test_name_and_content_terms_are_merged_to_single_score() + { + $page = $this->entities->newPage(['name' => 'TermA', 'html' => ' +

TermA

+ ']); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + + // Scores 40 for being in the name then 1 for being in the content + $this->assertEquals(41, $scoreByTerm->get('TermA')); + } + + public function test_tag_names_and_values_are_indexed_for_search() + { + $page = $this->entities->newPage(['name' => 'PageA', 'html' => '

content

', 'tags' => [ + ['name' => 'Animal', 'value' => 'MeowieCat'], + ['name' => 'SuperImportant'], + ]]); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + $this->assertEquals(5, $scoreByTerm->get('MeowieCat')); + $this->assertEquals(3, $scoreByTerm->get('Animal')); + $this->assertEquals(3, $scoreByTerm->get('SuperImportant')); + } +} diff --git a/tests/Entity/SearchOptionsTest.php b/tests/Search/SearchOptionsTest.php similarity index 99% rename from tests/Entity/SearchOptionsTest.php rename to tests/Search/SearchOptionsTest.php index 0c2ad271c58..39c20c19591 100644 --- a/tests/Entity/SearchOptionsTest.php +++ b/tests/Search/SearchOptionsTest.php @@ -1,6 +1,6 @@ entities->chapterHasPages(); + $this->assertGreaterThan(2, count($chapter->pages), 'Ensure we\'re testing with at least 1 sibling'); + $page = $chapter->pages->first(); + + $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$page->id}&entity_type=page"); + $search->assertSuccessful(); + foreach ($chapter->pages as $page) { + $search->assertSee($page->name); + } + + $search->assertDontSee($chapter->name); + } + + public function test_sibling_search_for_pages_without_chapter() + { + $page = $this->entities->pageNotWithinChapter(); + $bookChildren = $page->book->getDirectVisibleChildren(); + $this->assertGreaterThan(2, count($bookChildren), 'Ensure we\'re testing with at least 1 sibling'); + + $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$page->id}&entity_type=page"); + $search->assertSuccessful(); + foreach ($bookChildren as $child) { + $search->assertSee($child->name); + } + + $search->assertDontSee($page->book->name); + } + + public function test_sibling_search_for_chapters() + { + $chapter = $this->entities->chapter(); + $bookChildren = $chapter->book->getDirectVisibleChildren(); + $this->assertGreaterThan(2, count($bookChildren), 'Ensure we\'re testing with at least 1 sibling'); + + $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$chapter->id}&entity_type=chapter"); + $search->assertSuccessful(); + foreach ($bookChildren as $child) { + $search->assertSee($child->name); + } + + $search->assertDontSee($chapter->book->name); + } + + public function test_sibling_search_for_books() + { + $books = Book::query()->take(10)->get(); + $book = $books->first(); + $this->assertGreaterThan(2, count($books), 'Ensure we\'re testing with at least 1 sibling'); + + $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$book->id}&entity_type=book"); + $search->assertSuccessful(); + foreach ($books as $expectedBook) { + $search->assertSee($expectedBook->name); + } + } + + public function test_sibling_search_for_shelves() + { + $shelves = Bookshelf::query()->take(10)->get(); + $shelf = $shelves->first(); + $this->assertGreaterThan(2, count($shelves), 'Ensure we\'re testing with at least 1 sibling'); + + $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$shelf->id}&entity_type=bookshelf"); + $search->assertSuccessful(); + foreach ($shelves as $expectedShelf) { + $search->assertSee($expectedShelf->name); + } + } + + public function test_sibling_search_for_books_provides_results_in_alphabetical_order() + { + $contextBook = $this->entities->book(); + $searchBook = $this->entities->book(); + + $searchBook->name = 'Zebras'; + $searchBook->save(); + + $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$contextBook->id}&entity_type=book"); + $this->withHtml($search)->assertElementNotContains('a:first-child', 'Zebras'); + + $searchBook->name = '1AAAAAAArdvarks'; + $searchBook->save(); + + $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$contextBook->id}&entity_type=book"); + $this->withHtml($search)->assertElementContains('a:first-child', '1AAAAAAArdvarks'); + } + + public function test_sibling_search_for_shelves_provides_results_in_alphabetical_order() + { + $contextShelf = $this->entities->shelf(); + $searchShelf = $this->entities->shelf(); + + $searchShelf->name = 'Zebras'; + $searchShelf->save(); + + $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$contextShelf->id}&entity_type=bookshelf"); + $this->withHtml($search)->assertElementNotContains('a:first-child', 'Zebras'); + + $searchShelf->name = '1AAAAAAArdvarks'; + $searchShelf->save(); + + $search = $this->actingAs($this->users->viewer())->get("/search/entity/siblings?entity_id={$contextShelf->id}&entity_type=bookshelf"); + $this->withHtml($search)->assertElementContains('a:first-child', '1AAAAAAArdvarks'); + } +} From f4449928f83748d015a633cdc1cef50fe822648c Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 14 Feb 2025 19:01:51 +0000 Subject: [PATCH 3/4] Searching: Added custom tokenizer that considers soft delimiters. This changes indexing so that a.b now indexes as "a", "b" AND "a.b" instead of just the first two, for periods and hypens, so terms containing those characters can be searched within. Adds hypens as a delimiter - #2095 --- app/Search/SearchIndex.php | 41 ++++++++++++++--- app/Search/SearchOptions.php | 2 +- app/Search/SearchTextTokenizer.php | 70 +++++++++++++++++++++++++++++ tests/Search/SearchIndexingTest.php | 16 +++++++ 4 files changed, 121 insertions(+), 8 deletions(-) create mode 100644 app/Search/SearchTextTokenizer.php diff --git a/app/Search/SearchIndex.php b/app/Search/SearchIndex.php index c7d9d6502e2..a8bd2c4b285 100644 --- a/app/Search/SearchIndex.php +++ b/app/Search/SearchIndex.php @@ -16,7 +16,13 @@ class SearchIndex /** * A list of delimiter characters used to break-up parsed content into terms for indexing. */ - public static string $delimiters = " \n\t.,!?:;()[]{}<>`'\""; + public static string $delimiters = " \n\t.-,!?:;()[]{}<>`'\""; + + /** + * A list of delimiter which could be commonly used within a single term and also indicate a break between terms. + * The indexer will index the full term with these delimiters, plus the terms split via these delimiters. + */ + public static string $softDelimiters = ".-"; public function __construct( protected EntityProvider $entityProvider @@ -196,15 +202,36 @@ protected function generateTermScoreMapFromTags(array $tags): array protected function textToTermCountMap(string $text): array { $tokenMap = []; // {TextToken => OccurrenceCount} - $splitChars = static::$delimiters; - $token = strtok($text, $splitChars); + $softDelims = static::$softDelimiters; + $tokenizer = new SearchTextTokenizer($text, static::$delimiters); + $extendedToken = ''; + $extendedLen = 0; + + $token = $tokenizer->next(); while ($token !== false) { - if (!isset($tokenMap[$token])) { - $tokenMap[$token] = 0; + $delim = $tokenizer->previousDelimiter(); + + if ($delim && str_contains($softDelims, $delim) && $token !== '') { + $extendedToken .= $delim . $token; + $extendedLen++; + } else { + if ($extendedLen > 1) { + $tokenMap[$extendedToken] = ($tokenMap[$extendedToken] ?? 0) + 1; + } + $extendedToken = $token; + $extendedLen = 1; } - $tokenMap[$token]++; - $token = strtok($splitChars); + + if ($token) { + $tokenMap[$token] = ($tokenMap[$token] ?? 0) + 1; + } + + $token = $tokenizer->next(); + } + + if ($extendedLen > 1) { + $tokenMap[$extendedToken] = ($tokenMap[$extendedToken] ?? 0) + 1; } return $tokenMap; diff --git a/app/Search/SearchOptions.php b/app/Search/SearchOptions.php index a6f82029920..bf527d9c305 100644 --- a/app/Search/SearchOptions.php +++ b/app/Search/SearchOptions.php @@ -181,7 +181,7 @@ protected static function decodeEscapes(string $input): string protected static function parseStandardTermString(string $termString): array { $terms = explode(' ', $termString); - $indexDelimiters = SearchIndex::$delimiters; + $indexDelimiters = implode('', array_diff(str_split(SearchIndex::$delimiters), str_split(SearchIndex::$softDelimiters))); $parsed = [ 'terms' => [], 'exacts' => [], diff --git a/app/Search/SearchTextTokenizer.php b/app/Search/SearchTextTokenizer.php new file mode 100644 index 00000000000..f43fd56f113 --- /dev/null +++ b/app/Search/SearchTextTokenizer.php @@ -0,0 +1,70 @@ +length = strlen($this->text); + } + + /** + * Get the current delimiter to be found. + */ + public function currentDelimiter(): string + { + return $this->currentDelimiter; + } + + /** + * Get the previous delimiter found. + */ + public function previousDelimiter(): string + { + return $this->previousDelimiter; + } + + /** + * Get the next token between delimiters. + * Returns false if there's no further tokens. + */ + public function next(): string|false + { + $token = ''; + + for ($i = $this->currentIndex; $i < $this->length; $i++) { + $char = $this->text[$i]; + if (str_contains($this->delimiters, $char)) { + $this->previousDelimiter = $this->currentDelimiter; + $this->currentDelimiter = $char; + $this->currentIndex = $i + 1; + return $token; + } + + $token .= $char; + } + + if ($token) { + $this->currentIndex = $this->length; + $this->previousDelimiter = $this->currentDelimiter; + $this->currentDelimiter = ''; + return $token; + } + + return false; + } +} diff --git a/tests/Search/SearchIndexingTest.php b/tests/Search/SearchIndexingTest.php index 43219a4ed98..6933813b608 100644 --- a/tests/Search/SearchIndexingTest.php +++ b/tests/Search/SearchIndexingTest.php @@ -74,4 +74,20 @@ public function test_tag_names_and_values_are_indexed_for_search() $this->assertEquals(3, $scoreByTerm->get('Animal')); $this->assertEquals(3, $scoreByTerm->get('SuperImportant')); } + + public function test_terms_containing_punctuation_within_retain_original_form_and_split_form_in_index() + { + $page = $this->entities->newPage(['html' => '

super.duper awesome-beans big- barry cheese.

biscuits

a-bs

']); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + $expected = ['super', 'duper', 'super.duper', 'awesome-beans', 'awesome', 'beans', 'big', 'barry', 'cheese', 'biscuits', 'a-bs', 'a', 'bs']; + foreach ($expected as $term) { + $this->assertNotNull($scoreByTerm->get($term), "Failed asserting that \"$term\" is indexed"); + } + + $nonExpected = ['big-', 'big-barry', 'cheese.', 'cheese.biscuits']; + foreach ($nonExpected as $term) { + $this->assertNull($scoreByTerm->get($term), "Failed asserting that \"$term\" is not indexed"); + } + } } From f1b8e857bf68eb83ecc2db40da53f5d4626a16de Mon Sep 17 00:00:00 2001 From: Dan Brown Date: Fri, 14 Feb 2025 19:30:25 +0000 Subject: [PATCH 4/4] Searching: Added test for guillemets To cover #5475 --- tests/Search/SearchIndexingTest.php | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/tests/Search/SearchIndexingTest.php b/tests/Search/SearchIndexingTest.php index 6933813b608..57cf412e1b7 100644 --- a/tests/Search/SearchIndexingTest.php +++ b/tests/Search/SearchIndexingTest.php @@ -75,6 +75,22 @@ public function test_tag_names_and_values_are_indexed_for_search() $this->assertEquals(3, $scoreByTerm->get('SuperImportant')); } + public function test_terms_containing_guillemets_handled() + { + $page = $this->entities->newPage(['html' => '

«Hello there» and « there »

']); + + $scoreByTerm = $page->searchTerms()->pluck('score', 'term'); + $expected = ['Hello', 'there', 'and']; + foreach ($expected as $term) { + $this->assertNotNull($scoreByTerm->get($term), "Failed asserting that \"$term\" is indexed"); + } + + $nonExpected = ['«', '»']; + foreach ($nonExpected as $term) { + $this->assertNull($scoreByTerm->get($term), "Failed asserting that \"$term\" is not indexed"); + } + } + public function test_terms_containing_punctuation_within_retain_original_form_and_split_form_in_index() { $page = $this->entities->newPage(['html' => '

super.duper awesome-beans big- barry cheese.

biscuits

a-bs

']);