Skip to content

Commit

Permalink
Merge pull request #4119 from PHPOffice/powerkiki
Browse files Browse the repository at this point in the history
Security: prevent XXE (XML External Entity) when loading files
  • Loading branch information
PowerKiKi authored Jul 29, 2024
2 parents b43947f + bea2d4b commit ea97c17
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 6 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org).

## TBD - 3.0.0

### Security Fix

- Prevent XXE when loading files

### Added

- Nothing
Expand Down
24 changes: 18 additions & 6 deletions src/PhpSpreadsheet/Reader/Security/XmlScanner.php
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,11 @@ private static function forceString(mixed $arg): string

private function toUtf8(string $xml): string
{
$pattern = '/encoding="(.*?)"/';
$result = preg_match($pattern, $xml, $matches);
$charset = strtoupper($result ? $matches[1] : 'UTF-8');

$charset = $this->findCharSet($xml);
if ($charset !== 'UTF-8') {
$xml = self::forceString(mb_convert_encoding($xml, 'UTF-8', $charset));

$result = preg_match($pattern, $xml, $matches);
$charset = strtoupper($result ? $matches[1] : 'UTF-8');
$charset = $this->findCharSet($xml);
if ($charset !== 'UTF-8') {
throw new Reader\Exception('Suspicious Double-encoded XML, spreadsheet file load() aborted to prevent XXE/XEE attacks');
}
Expand All @@ -52,6 +48,22 @@ private function toUtf8(string $xml): string
return $xml;
}

private function findCharSet(string $xml): string
{
$patterns = [
'/encoding="([^"]*]?)"/',
"/encoding='([^']*?)'/",
];

foreach ($patterns as $pattern) {
if (preg_match($pattern, $xml, $matches)) {
return strtoupper($matches[1]);
}
}

return 'UTF-8';
}

/**
* Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
*
Expand Down
2 changes: 2 additions & 0 deletions tests/data/Reader/Xml/XEETestInvalidUTF-7-single-quote.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<?xml version="1.0" encoding='UTF-7' standalone="yes"?>
+ADw-+ACE-DOCTYPE+ACA-foo+ACA-+AFs-+ADw-+ACE-ENTITY+ACA-toreplace+ACA-+ACI-xxe+AF8-test+ACI-+AD4-+ACA-+AF0-+AD4-+AAo-+ADw-sst+ACA-xmlns+AD0-+ACI-http://schemas.openxmlformats.org/spreadsheetml/2006/main+ACI-+ACA-count+AD0-+ACI-2+ACI-+ACA-uniqueCount+AD0-+ACI-1+ACI-+AD4-+ADw-si+AD4-+ADw-t+AD4-+ACY-toreplace+ADs-+ADw-/t+AD4-+ADw-/si+AD4-+ADw-/sst+AD4-
4 changes: 4 additions & 0 deletions tests/data/Reader/Xml/XEETestValidUTF-8-single-quote.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
<?xml version='1.0' encoding='UTF-8' standalone='yes'?>
<root>
test: Valid
</root>

0 comments on commit ea97c17

Please # to comment.