diff --git a/CHANGELOG.md b/CHANGELOG.md index 11aa4f80ed..f5930d0f0f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org). ## TBD - 3.0.0 +### Security Fix + +- Prevent XXE when loading files + ### Added - Nothing diff --git a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php index 979325a44c..3946ed00e9 100644 --- a/src/PhpSpreadsheet/Reader/Security/XmlScanner.php +++ b/src/PhpSpreadsheet/Reader/Security/XmlScanner.php @@ -35,15 +35,11 @@ private static function forceString(mixed $arg): string private function toUtf8(string $xml): string { - $pattern = '/encoding="(.*?)"/'; - $result = preg_match($pattern, $xml, $matches); - $charset = strtoupper($result ? $matches[1] : 'UTF-8'); - + $charset = $this->findCharSet($xml); if ($charset !== 'UTF-8') { $xml = self::forceString(mb_convert_encoding($xml, 'UTF-8', $charset)); - $result = preg_match($pattern, $xml, $matches); - $charset = strtoupper($result ? $matches[1] : 'UTF-8'); + $charset = $this->findCharSet($xml); if ($charset !== 'UTF-8') { throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks'); } @@ -52,6 +48,22 @@ private function toUtf8(string $xml): string return $xml; } + private function findCharSet(string $xml): string + { + $patterns = [ + '/encoding="([^"]*]?)"/', + "/encoding='([^']*?)'/", + ]; + + foreach ($patterns as $pattern) { + if (preg_match($pattern, $xml, $matches)) { + return strtoupper($matches[1]); + } + } + + return 'UTF-8'; + } + /** * Scan the XML for use of + +ADw-+ACE-DOCTYPE+ACA-foo+ACA-+AFs-+ADw-+ACE-ENTITY+ACA-toreplace+ACA-+ACI-xxe+AF8-test+ACI-+AD4-+ACA-+AF0-+AD4-+AAo-+ADw-sst+ACA-xmlns+AD0-+ACI-http://schemas.openxmlformats.org/spreadsheetml/2006/main+ACI-+ACA-count+AD0-+ACI-2+ACI-+ACA-uniqueCount+AD0-+ACI-1+ACI-+AD4-+ADw-si+AD4-+ADw-t+AD4-+ACY-toreplace+ADs-+ADw-/t+AD4-+ADw-/si+AD4-+ADw-/sst+AD4- diff --git a/tests/data/Reader/Xml/XEETestValidUTF-8-single-quote.xml b/tests/data/Reader/Xml/XEETestValidUTF-8-single-quote.xml new file mode 100644 index 0000000000..e478c7d408 --- /dev/null +++ b/tests/data/Reader/Xml/XEETestValidUTF-8-single-quote.xml @@ -0,0 +1,4 @@ + + + test: Valid +