Skip to content

Commit c4710f0

Browse files
committed
Merge commit from fork
1 parent 6ea0c81 commit c4710f0

File tree

5 files changed

+124
-1
lines changed

5 files changed

+124
-1
lines changed

docs/changes/0.3.1.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# 0.3.1
2+
3+
## Enhancements
4+
5+
- N/A
6+
7+
## Bug fixes
8+
9+
- N/A
10+
11+
## Miscellaneous
12+
13+
- N/A
14+
15+
## Security fixes
16+
17+
- Fixed XXE when processing an XML file in the MathML format by [@012git012](https://github/012git012) & [@Progi1984](https://github/Progi1984) in [GHSA-42hm-pq2f-3r7m](https://github.com/PHPOffice/Math/security/advisories/GHSA-42hm-pq2f-3r7m)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<?php
2+
3+
namespace PhpOffice\Math\Exception;
4+
5+
class SecurityException extends MathException
6+
{
7+
}

src/Math/Reader/MathML.php

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
use PhpOffice\Math\Exception\InvalidInputException;
1111
use PhpOffice\Math\Exception\NotImplementedException;
1212
use PhpOffice\Math\Math;
13+
use PhpOffice\Math\Reader\Security\XmlScanner;
1314

1415
class MathML implements ReaderInterface
1516
{
@@ -22,8 +23,17 @@ class MathML implements ReaderInterface
2223
/** @var DOMXPath */
2324
private $xpath;
2425

26+
/** @var XmlScanner */
27+
private $xmlScanner;
28+
29+
public function __construct()
30+
{
31+
$this->xmlScanner = XmlScanner::getInstance();
32+
}
33+
2534
public function read(string $content): ?Math
2635
{
36+
$content = $this->xmlScanner->scan($content);
2737
$content = str_replace(
2838
[
2939
'&InvisibleTimes;',
@@ -35,7 +45,7 @@ public function read(string $content): ?Math
3545
);
3646

3747
$this->dom = new DOMDocument();
38-
$this->dom->loadXML($content, LIBXML_DTDLOAD);
48+
$this->dom->loadXML($content);
3949

4050
$this->math = new Math();
4151
$this->parseNode(null, $this->math);
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
<?php
2+
3+
namespace PhpOffice\Math\Reader\Security;
4+
5+
use PhpOffice\Math\Exception\SecurityException;
6+
7+
class XmlScanner
8+
{
9+
public static function getInstance(): self
10+
{
11+
return new self();
12+
}
13+
14+
/**
15+
* Scan the XML for use of <!ENTITY to prevent XXE/XEE attacks.
16+
*/
17+
public function scan(string $xml): string
18+
{
19+
// Don't rely purely on libxml_disable_entity_loader()
20+
$searchDoctype = static::mb_str_split('<!DOCTYPE', 1, 'UTF-8');
21+
$patternDoctype = '/\0*' . implode('\0*', is_array($searchDoctype) ? $searchDoctype : []) . '\0*/';
22+
$searchDoctypeMath = static::mb_str_split('<!DOCTYPE math', 1, 'UTF-8');
23+
$patternDoctypeMath = '/\0*' . implode('\0*', is_array($searchDoctypeMath) ? $searchDoctypeMath : []) . '\0*/';
24+
25+
if (preg_match($patternDoctype, $xml) && !preg_match($patternDoctypeMath, $xml)) {
26+
throw new SecurityException('Detected use of ENTITY in XML, loading aborted to prevent XXE/XEE attacks');
27+
}
28+
29+
return $xml;
30+
}
31+
32+
/**
33+
* @param string $string
34+
* @param int<1, max> $split_length
35+
* @param string|null $encoding
36+
*
37+
* @return array<string>|bool|null
38+
*/
39+
public static function mb_str_split(string $string, int $split_length = 1, ?string $encoding = null)
40+
{
41+
if (extension_loaded('mbstring')) {
42+
if (function_exists('mb_str_split')) {
43+
return mb_str_split($string, $split_length, $encoding);
44+
}
45+
}
46+
// @phpstan-ignore-next-line
47+
if (null !== $string && !\is_scalar($string) && !(\is_object($string) && method_exists($string, '__toString'))) {
48+
trigger_error('mb_str_split() expects parameter 1 to be string, ' . \gettype($string) . ' given', \E_USER_WARNING);
49+
50+
return null;
51+
}
52+
53+
// @phpstan-ignore-next-line
54+
if (1 > $split_length = (int) $split_length) {
55+
trigger_error('The length of each segment must be greater than zero', \E_USER_WARNING);
56+
57+
return false;
58+
}
59+
60+
if (null === $encoding) {
61+
$encoding = mb_internal_encoding();
62+
}
63+
64+
if ('UTF-8' === $encoding || \in_array(strtoupper($encoding), ['UTF-8', 'UTF8'], true)) {
65+
return preg_split("/(.{{$split_length}})/u", $string, -1, \PREG_SPLIT_DELIM_CAPTURE | \PREG_SPLIT_NO_EMPTY);
66+
}
67+
68+
$result = [];
69+
$length = mb_strlen($string, $encoding);
70+
71+
for ($i = 0; $i < $length; $i += $split_length) {
72+
$result[] = mb_substr($string, $i, $split_length, $encoding);
73+
}
74+
75+
return $result;
76+
}
77+
}

tests/Math/Reader/MathMLTest.php

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
use PhpOffice\Math\Element;
88
use PhpOffice\Math\Exception\InvalidInputException;
99
use PhpOffice\Math\Exception\NotImplementedException;
10+
use PhpOffice\Math\Exception\SecurityException;
1011
use PhpOffice\Math\Math;
1112
use PhpOffice\Math\Reader\MathML;
1213
use PHPUnit\Framework\TestCase;
@@ -294,4 +295,15 @@ public function testReadNotImplemented(): void
294295
$reader = new MathML();
295296
$math = $reader->read($content);
296297
}
298+
299+
public function testReadSecurity(): void
300+
{
301+
$this->expectException(SecurityException::class);
302+
$this->expectExceptionMessage('Detected use of ENTITY in XML, loading aborted to prevent XXE/XEE attacks');
303+
304+
$content = '<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE x SYSTEM "php://filter/convert.base64-decode/zlib.inflate/resource=data:,7Ztdb9owFIbv%2bRVZJ9armNjOZ2k7QUaL%2bRYO2nqFUnBFNQaMptP272cnNFuTsBbSskg1iATZzvGxn/ccX3A4fdfoecS7UsrK1A98hV5Rr9FVjlaz1UmlcnM7D9i6MlkufrB1AK79O2bqKltMllMWt96KL6ADwci7sJ4Yu0vr9/tlwKbqan27CPzrOXvevFGrbRvOGIseaCa7TAxok1x44xahXzQEcdKPKZPevap3RZw920I0VscWGLlU1efPsy0c5cbV1AoI7ZuOMCZW12nkcP9Q2%2bQObBNmL6ajg8s6xJqmJTrq5NIArX6zVk8Zcwwt4fPuLvHnbeBSvpdIQ6g93MvUv3CHqKNrmtEW4EYmCr5gDT5QzyNWE4x6xO1/aqQmgMhGYgaVDFUnScKltbFnaJoKHRuHK0L1pIkuaYselMe9cPUqRmm5C51u00kkhy1S3aBougkl7e4d6RGaTYeSehdCjAG/O/p%2bYfKyQsoLmgdlmsFYQFDjh6GWJyGE0ZfMX08EZtwNTdAYud7nLcksnwppA2UnqpCzgyDo1QadAU3vLOQZ82EHMxAi0KVcq7rzas5xD6AQoeqkYkgk02abukkJ/z%2bNvkj%2bjUy16Ba5d/S8anhBLwt44EgGkoFkIBlIBpKBZCAZSAaSgWQgGUgGkoFkIBlIBpKBZCAZSAaSgWQgGUgGxWOwW2nF7kt%2by7/Kb3ag2GUTUgBvXAAxiKxt4Is3sB4WniVrOvhwzB0CXerg5GN9esGRQv7RgQdMmMO9sIwtc/sIJUOCsY4ee7f7FIWu2Si4euKan8wg58nFsEIXxYGntgZqMog3Z2FrgPhgyzIOlsmijowqwb0jyMqMoGEbarqdOpP/iqFISMkSVFG1Z5p8f3OK%2bxAZ7gClpgUPg70rq0T2RIkcup/0newQ7NbcUXv/DPl4LL/N7hdfn2dp07pmd8v79YSdVVgwqcyWd8HC/8aOzkunf6r%2b2c8bpSxK/6uPmlf%2br/nSnyrHcduH99iqKiz7HwLxTLMgEM0QWUDjb3ji8NdHPslZmV%2bqR%2bfH56Xyxni1VGbV0m8=" []><foo></foo>M';
305+
306+
$reader = new MathML();
307+
$math = $reader->read($content);
308+
}
297309
}

0 commit comments

Comments
 (0)