Skip to content

Commit

Permalink
Improve parsing of link relationships
Browse files Browse the repository at this point in the history
* Parse the rel attribute in accordance with the WHATWG spec:
  https://infra.spec.whatwg.org/#split-on-ascii-whitespace
* Only list unique rel values in the rel-urls output, fixes microformats#159:
  microformats/microformats2-parsing#30
* Sort the unique rel values alphabetically:
  microformats/microformats2-parsing#29
* Correctly merge attribute values into the resulting object.
  • Loading branch information
Zegnat committed Mar 24, 2018
1 parent c10b53a commit 2bfa856
Showing 1 changed file with 24 additions and 11 deletions.
35 changes: 24 additions & 11 deletions Mf2/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -1250,16 +1250,15 @@ public function parseRelsAndAlternates() {

// Iterate through all a, area and link elements with rel attributes
foreach ($this->xpath->query('//a[@rel and @href] | //link[@rel and @href] | //area[@rel and @href]') as $hyperlink) {
if ($hyperlink->getAttribute('rel') == '') {
// Parse the set of rels for the current link
$linkRels = array_unique(array_filter(preg_split('/[\t\n\f\r ]/', $hyperlink->getAttribute('rel'))));
if (count($linkRels) === 0) {
continue;
}

// Resolve the href
$href = $this->resolveUrl($hyperlink->getAttribute('href'));

// Split up the rel into space-separated values
$linkRels = array_filter(explode(' ', $hyperlink->getAttribute('rel')));

$rel_attributes = array();

if ($hyperlink->hasAttribute('media')) {
Expand Down Expand Up @@ -1299,13 +1298,27 @@ public function parseRelsAndAlternates() {
$rels[$rel][] = $href;
}

if (!in_array($href, $rel_urls)) {
$rel_urls[$href] = array_merge(
$rel_attributes,
array('rels' => $linkRels)
);
if (!array_key_exists($href, $rel_urls)) {
$rel_urls[$href] = array('rels' => array());
}

// Add the attributes collected only if they were not already set
$rel_urls[$href] = array_merge(
$rel_attributes,
$rel_urls[$href]
);

// Merge current rels with those already set
$rel_urls[$href]['rels'] = array_merge(
$rel_urls[$href]['rels'],
$linkRels
);
}

// Alphabetically sort the rels arrays after removing duplicates
foreach ($rel_urls as $href => $object) {
$rel_urls[$href]['rels'] = array_unique($rel_urls[$href]['rels']);
sort($rel_urls[$href]['rels']);
}

if (empty($rels) and $this->jsonMode) {
Expand All @@ -1314,8 +1327,8 @@ public function parseRelsAndAlternates() {

if (empty($rel_urls) and $this->jsonMode) {
$rel_urls = new stdClass();
}
}

return array($rels, $rel_urls, $alternates);
}

Expand Down

0 comments on commit 2bfa856

Please # to comment.