From 3ef1fba7de257f78eda005bbd30186597b754562 Mon Sep 17 00:00:00 2001 From: Sam Wilson Date: Tue, 19 Mar 2024 09:09:38 +0800 Subject: [PATCH] Fix issue with cleaning transcluded reference links This fixes two issues with the PageParser::cleanReferenceLinks() method: the first when there are no reference links found, the second the actual bug linked below. The `typeof` attribute can contain values other than 'mw:Extension/ref' but we were only checking for that one on its own. Switching to xPath contains() function fixes this. Bug: T358965 --- src/PageParser.php | 5 ++++- tests/Book/PageParserTest.php | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/PageParser.php b/src/PageParser.php index 8592e9a3..8641bf8a 100644 --- a/src/PageParser.php +++ b/src/PageParser.php @@ -442,8 +442,11 @@ private function cleanRedLinks() { private function cleanReferenceLinks() { $links = $this->xPath->query( - '//*[@typeof="mw:Extension/ref"]/a | //a[@rel="mw:referencedBy"]' + '//*[contains(@typeof,"mw:Extension/ref")]/a | //a[@rel="mw:referencedBy"]' ); + if ( !$links ) { + return; + } foreach ( $links as $link ) { $href = $link->getAttribute( 'href' ); $pos = strpos( $href, '#' ); diff --git a/tests/Book/PageParserTest.php b/tests/Book/PageParserTest.php index 957e2c30..1a9c561b 100644 --- a/tests/Book/PageParserTest.php +++ b/tests/Book/PageParserTest.php @@ -233,4 +233,25 @@ public function provideGetPicturesList(): array { ], ]; } + + /** + * @dataProvider provideCleanReferenceLinks + */ + public function testCleanReferenceLinks( string $html, string $expected ) { + $pageParser1 = new PageParser( Util::buildDOMDocumentFromHtml( $html ) ); + $this->assertStringContainsString( $expected, $pageParser1->getContent( false )->saveXML() ); + } + + public function provideCleanReferenceLinks() { + return [ + 'no links to clean' => [ + '[1]', + '', + ], + 'ref that is also transcluded' => [ + '[1]', + '', + ], + ]; + } }