Skip to content

Commit 44f118a

Browse files
fix: replace filter_var for uri and uri-reference to userland code to be RFC 3986 compliant (#800)
This pull request introduces new validators for URI and relative references and integrates them into the `FormatConstraint` class. The changes also include corresponding unit tests to ensure the validators work as expected. ### New Validators: * [`src/JsonSchema/Tool/Validator/UriValidator.php`](diffhunk://#diff-2019f410f3ef4cdf8478ffa71444226beb8a118d60b3337c40eaaec8d3aef7a3R1-R65): Added a new class `UriValidator` to validate URIs according to RFC 3986 and RFC 5322 for `mailto:` URIs. * [`src/JsonSchema/Tool/Validator/RelativeReferenceValidator.php`](diffhunk://#diff-0bfeeb9c38593a2d65cc2e8c49fe873c78765eac81c00cf0a398bd754ca9c7a8R1-R53): Added a new class `RelativeReferenceValidator` to validate relative references according to RFC 3986. ### Integration into `FormatConstraint`: * [`src/JsonSchema/Constraints/FormatConstraint.php`](diffhunk://#diff-44020f0c0690a2a4c1c446e97185986c31b19374b4a99f4b0970c5df36279067L104-R114): Integrated the new `UriValidator` and `RelativeReferenceValidator` into the `check` method to replace the previous inline validation logic for `uri` and `uri-reference` formats. ### Unit Tests: * [`tests/Tool/Validator/UriValidatorTest.php`](diffhunk://#diff-6b107cb8679795fb59c070ba500d0646f6d357e4c03a585f4a0c67181e4101fcR1-R49): Added unit tests for `UriValidator` to ensure it correctly validates valid and invalid URIs. * [`tests/Tool/Validator/RelativeReferenceValidatorTest.php`](diffhunk://#diff-97a7acc2a16f7653d307a16e356c7545b0a0bc26156ce60f7ca13332a6656729R1-R40): Added unit tests for `RelativeReferenceValidator` to ensure it correctly validates valid and invalid relative references. # Closing keywords fixes #685
1 parent a8c0088 commit 44f118a

File tree

5 files changed

+212
-22
lines changed

5 files changed

+212
-22
lines changed

src/JsonSchema/Constraints/FormatConstraint.php

+5-22
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
use JsonSchema\ConstraintError;
1515
use JsonSchema\Entity\JsonPointer;
1616
use JsonSchema\Rfc3339;
17+
use JsonSchema\Tool\Validator\RelativeReferenceValidator;
18+
use JsonSchema\Tool\Validator\UriValidator;
1719

1820
/**
1921
* Validates against the "format" property
@@ -101,34 +103,15 @@ public function check(&$element, $schema = null, ?JsonPointer $path = null, $i =
101103
break;
102104

103105
case 'uri':
104-
if (is_string($element) && null === filter_var($element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE)) {
106+
if (is_string($element) && !UriValidator::isValid($element)) {
105107
$this->addError(ConstraintError::FORMAT_URL(), $path, ['format' => $schema->format]);
106108
}
107109
break;
108110

109111
case 'uriref':
110112
case 'uri-reference':
111-
if (is_string($element) && null === filter_var($element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE)) {
112-
// FILTER_VALIDATE_URL does not conform to RFC-3986, and cannot handle relative URLs, but
113-
// the json-schema spec uses RFC-3986, so need a bit of hackery to properly validate them.
114-
// See https://tools.ietf.org/html/rfc3986#section-4.2 for additional information.
115-
if (substr($element, 0, 2) === '//') { // network-path reference
116-
$validURL = filter_var('scheme:' . $element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE);
117-
} elseif (substr($element, 0, 1) === '/') { // absolute-path reference
118-
$validURL = filter_var('scheme://host' . $element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE);
119-
} elseif (strlen($element)) { // relative-path reference
120-
$pathParts = explode('/', $element, 2);
121-
if (strpos($pathParts[0], ':') !== false) {
122-
$validURL = null;
123-
} else {
124-
$validURL = filter_var('scheme://host/' . $element, FILTER_VALIDATE_URL, FILTER_NULL_ON_FAILURE);
125-
}
126-
} else {
127-
$validURL = null;
128-
}
129-
if ($validURL === null) {
130-
$this->addError(ConstraintError::FORMAT_URL_REF(), $path, ['format' => $schema->format]);
131-
}
113+
if (is_string($element) && !(UriValidator::isValid($element) || RelativeReferenceValidator::isValid($element))) {
114+
$this->addError(ConstraintError::FORMAT_URL(), $path, ['format' => $schema->format]);
132115
}
133116
break;
134117

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace JsonSchema\Tool\Validator;
6+
7+
class RelativeReferenceValidator
8+
{
9+
public static function isValid(string $ref): bool
10+
{
11+
// Relative reference pattern as per RFC 3986, Section 4.1
12+
$pattern = '/^(([^\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$/';
13+
14+
if (preg_match($pattern, $ref) !== 1) {
15+
return false;
16+
}
17+
18+
// Additional checks for invalid cases
19+
if (preg_match('/^(http|https):\/\//', $ref)) {
20+
return false; // Absolute URI
21+
}
22+
23+
if (preg_match('/^:\/\//', $ref)) {
24+
return false; // Missing scheme in authority
25+
}
26+
27+
if (preg_match('/^:\//', $ref)) {
28+
return false; // Invalid scheme separator
29+
}
30+
31+
if (preg_match('/^\/\/$/', $ref)) {
32+
return false; // Empty authority
33+
}
34+
35+
if (preg_match('/^\/\/\/[^\/]/', $ref)) {
36+
return false; // Invalid authority with three slashes
37+
}
38+
39+
if (preg_match('/\s/', $ref)) {
40+
return false; // Spaces are not allowed in URIs
41+
}
42+
43+
if (preg_match('/^\?#|^#$/', $ref)) {
44+
return false; // Missing path but having query and fragment
45+
}
46+
47+
if ($ref === '#' || $ref === '?') {
48+
return false; // Missing path and having only fragment or query
49+
}
50+
51+
return true;
52+
}
53+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace JsonSchema\Tool\Validator;
6+
7+
class UriValidator
8+
{
9+
public static function isValid(string $uri): bool
10+
{
11+
// RFC 3986: Hierarchical URIs (http, https, ftp, etc.)
12+
$hierarchicalPattern = '/^
13+
([a-z][a-z0-9+\-.]*):\/\/ # Scheme (http, https, ftp, etc.)
14+
(?:([^:@\/?#]+)(?::([^@\/?#]*))?@)? # Optional userinfo (user:pass@)
15+
([a-z0-9.-]+|\[[a-f0-9:.]+\]) # Hostname or IPv6 in brackets
16+
(?::(\d{1,5}))? # Optional port
17+
(\/[a-zA-Z0-9._~!$&\'()*+,;=:@\/%-]*)* # Path (valid characters only)
18+
(\?([^#]*))? # Optional query
19+
(\#(.*))? # Optional fragment
20+
$/ix';
21+
22+
// RFC 3986: Non-Hierarchical URIs (mailto, data, urn)
23+
$nonHierarchicalPattern = '/^
24+
(mailto|data|urn): # Only allow known non-hierarchical schemes
25+
(.+) # Must contain at least one character after scheme
26+
$/ix';
27+
28+
// RFC 5322-compliant email validation for `mailto:` URIs
29+
$emailPattern = '/^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$/';
30+
31+
// First, check if it's a valid hierarchical URI
32+
if (preg_match($hierarchicalPattern, $uri, $matches) === 1) {
33+
// Validate domain name (no double dots like example..com)
34+
if (!empty($matches[4]) && preg_match('/\.\./', $matches[4])) {
35+
return false;
36+
}
37+
38+
// Validate port (should be between 1 and 65535 if specified)
39+
if (!empty($matches[5]) && ($matches[5] < 1 || $matches[5] > 65535)) {
40+
return false;
41+
}
42+
43+
// Validate path (reject illegal characters: < > { } | \ ^ `)
44+
if (!empty($matches[6]) && preg_match('/[<>{}|\\\^`]/', $matches[6])) {
45+
return false;
46+
}
47+
48+
return true;
49+
}
50+
51+
// If not hierarchical, check non-hierarchical URIs
52+
if (preg_match($nonHierarchicalPattern, $uri, $matches) === 1) {
53+
$scheme = strtolower($matches[1]); // Extract the scheme
54+
55+
// Special case: `mailto:` must contain a **valid email address**
56+
if ($scheme === 'mailto') {
57+
return preg_match($emailPattern, $matches[2]) === 1;
58+
}
59+
60+
return true; // Valid non-hierarchical URI
61+
}
62+
63+
return false;
64+
}
65+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Tool\Validator;
6+
7+
use JsonSchema\Tool\Validator\RelativeReferenceValidator;
8+
use PHPUnit\Framework\TestCase;
9+
10+
class RelativeReferenceValidatorTest extends TestCase
11+
{
12+
/** @dataProvider validRelativeReferenceDataProvider */
13+
public function testValidRelativeReferencesAreValidatedAsSuch(string $ref): void
14+
{
15+
self::assertTrue(RelativeReferenceValidator::isValid($ref));
16+
}
17+
18+
/** @dataProvider invalidRelativeReferenceDataProvider */
19+
public function testInvalidRelativeReferencesAreValidatedAsSuch(string $ref): void
20+
{
21+
self::assertFalse(RelativeReferenceValidator::isValid($ref));
22+
}
23+
24+
public function validRelativeReferenceDataProvider(): \Generator
25+
{
26+
yield 'Relative path from root' => ['ref' => '/relative/path'];
27+
yield 'Relative path up one level' => ['ref' => '../up-one-level'];
28+
yield 'Relative path from current' => ['ref' => 'foo/bar'];
29+
}
30+
31+
public function invalidRelativeReferenceDataProvider(): \Generator
32+
{
33+
yield 'Absolute URI' => ['ref' => 'http://example.com'];
34+
yield 'Three slashes' => ['ref' => '///three/slashes'];
35+
yield 'Path with spaces' => ['ref' => '/path with spaces'];
36+
yield 'No path having query and fragment' => ['ref' => '?#invalid'];
37+
yield 'Missing path having fragment' => ['ref' => '#'];
38+
yield 'Missing path having query' => ['ref' => '?'];
39+
}
40+
}
+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Tool\Validator;
6+
7+
use JsonSchema\Tool\Validator\UriValidator;
8+
use PHPUnit\Framework\TestCase;
9+
10+
class UriValidatorTest extends TestCase
11+
{
12+
/** @dataProvider validUriDataProvider */
13+
public function testValidUrisAreValidatedAsSuch(string $uri): void
14+
{
15+
self::assertTrue(UriValidator::isValid($uri));
16+
}
17+
18+
/** @dataProvider invalidUriDataProvider */
19+
public function testInvalidUrisAreValidatedAsSuch(string $uri): void
20+
{
21+
self::assertFalse(UriValidator::isValid($uri));
22+
}
23+
24+
public function validUriDataProvider(): \Generator
25+
{
26+
yield 'Simple HTTP URI' => ['uri' => 'https://example.com'];
27+
yield 'Subdomain HTTP URI' => ['uri' => 'https://sub.domain.example.com'];
28+
yield 'Full HTTP URI' => ['uri' => 'https://example.com:8080/path/to/resource?query=string#fragment'];
29+
yield 'Full FTP URI' => ['uri' => 'ftp://user:pass@ftp.example.com:21/path'];
30+
yield 'IPV6 HTTP URI' => ['uri' => 'http://[2001:db8::ff00:42:8329]'];
31+
yield 'Mailto URI' => ['uri' => 'mailto:user@example.com'];
32+
yield 'Data URI' => ['uri' => 'data:text/plain;charset=utf-8,Hello%20World!'];
33+
yield 'ISBN URN URI' => ['uri' => 'urn:isbn:0451450523'];
34+
yield 'OASIS URN URI' => ['uri' => 'urn:oasis:names:specification:docbook:dtd:xml:4.1.2'];
35+
}
36+
37+
public function invalidUriDataProvider(): \Generator
38+
{
39+
yield 'Invalid schema' => ['uri' => 'ht!tp://example.com'];
40+
yield 'Missing schema' => ['uri' => '://example.com'];
41+
yield 'Double dot in domain' => ['uri' => 'https://example..com'];
42+
yield 'To high of a port number' => ['uri' => 'https://example.com:65536'];
43+
yield 'Invalid path characters with "<>"' => ['uri' => 'http://example.com/<>'];
44+
yield 'Invalid path characters with "{}"' => ['uri' => 'http://example.com/{bad}'];
45+
yield 'Invalid path characters with "^"' => ['uri' => 'http://example.com/^invalid'];
46+
yield 'Only mailto:' => ['uri' => 'mailto:'];
47+
yield 'Invalid email used in mailto:' => ['uri' => 'mailto:user@.com'];
48+
}
49+
}

0 commit comments

Comments
 (0)