Skip to content

Commit fafdfc0

Browse files
author
Jaco Labuschagne
committed
xml value and sax parser
1 parent d68f27d commit fafdfc0

14 files changed

+5154
-0
lines changed

Diff for: src/SaxParser.php

+342
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,342 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Zodimo\Xml;
6+
7+
use Zodimo\BaseReturn\IOMonad;
8+
use Zodimo\BaseReturn\Option;
9+
use Zodimo\Xml\Value\XmlValue;
10+
use Zodimo\Xml\Value\XmlValueBuilder;
11+
12+
class SaxParser
13+
{
14+
/**
15+
* @var resource|\XMLParser
16+
*
17+
* @phpstan-ignore class.notFound
18+
*/
19+
private $parser;
20+
21+
/**
22+
* Defines how much bytes to read from file per iteration.
23+
*
24+
* @var int<1,max>
25+
*/
26+
private $readBuffer = 8192;
27+
28+
/**
29+
* @var array<string>
30+
*/
31+
private array $path;
32+
33+
private bool $isCollecting;
34+
35+
/**
36+
* @var Option<string>
37+
*/
38+
private Option $collectingFrom;
39+
40+
/**
41+
* @var Option<XmlValueBuilder>
42+
*/
43+
private Option $collectedData;
44+
45+
/**
46+
* @var array<string,callable>
47+
*/
48+
private array $callbacks;
49+
50+
/**
51+
* @param resource|\XMLParser $parser
52+
*
53+
* @phpstan-ignore class.notFound
54+
*/
55+
public function __construct($parser)
56+
{
57+
$this->parser = $parser;
58+
// @phpstan-ignore argument.type
59+
xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, 0);
60+
// @phpstan-ignore argument.type
61+
xml_set_object($this->parser, $this);
62+
// @phpstan-ignore argument.type
63+
xml_set_element_handler($this->parser, [$this, 'startTag'], [$this, 'endTag']);
64+
// @phpstan-ignore argument.type
65+
xml_set_character_data_handler($this->parser, [$this, 'tagData']);
66+
// @phpstan-ignore argument.type
67+
xml_parser_set_option($this->parser, XML_OPTION_SKIP_WHITE, 1);
68+
// xml_set_external_entity_ref_handler($this->parser, 'convertEntities');
69+
$this->path = [''];
70+
$this->collectedData = Option::none();
71+
$this->isCollecting = false;
72+
$this->callbacks = [];
73+
$this->collectingFrom = Option::none();
74+
}
75+
76+
public static function create(): SaxParser
77+
{
78+
$parser = xml_parser_create('UTF-8');
79+
80+
return new self($parser);
81+
}
82+
83+
/**
84+
* @return int
85+
*/
86+
public function getReadBuffer()
87+
{
88+
return $this->readBuffer;
89+
}
90+
91+
/**
92+
* Handles start tag.
93+
* start_element_handler(XMLParser $parser, string $name, array $attributes): void.
94+
*
95+
* @param mixed $_
96+
* @param array<string,mixed> $attributes
97+
*/
98+
public function startTag($_, string $name, array $attributes): void
99+
{
100+
// $this->currentTag = $name;
101+
102+
$this->addNodeToPath($name);
103+
104+
if ($this->isCollecting) {
105+
// just append...
106+
$this->collectedData = Option::some(XmlValueBuilder::create($name, $attributes));
107+
} elseif ($this->hasHandler($this->pathAsString())) {
108+
// cannot add addition handlers when already collecting..
109+
$this->collectingFrom = Option::some($this->pathAsString());
110+
$this->collectedData->map(fn ($valueBuilder) => $valueBuilder->addChild(XmlValueBuilder::create($name, $attributes)));
111+
$this->isCollecting = true;
112+
}
113+
}
114+
115+
/**
116+
* Handles close tag.
117+
*
118+
* @param mixed $_
119+
*/
120+
public function endTag($_, string $name): void
121+
{
122+
$path = $this->pathAsString();
123+
124+
if ($this->isCollectionPath($path)) {
125+
// handle the data
126+
// reset the data
127+
$collectedDataOption = $this->collectedData->match(
128+
fn ($builder) => Option::some($builder->build()),
129+
fn () => Option::none()
130+
);
131+
132+
$result = $this->callHandlerWithData($collectedDataOption);
133+
if ($result->isFailure()) {
134+
$error = $result->unwrapFailure(fn ($_) => new \RuntimeException('BUG, false positive on callback failure'));
135+
if ($error instanceof \Throwable) {
136+
throw $error;
137+
}
138+
139+
// @phpstan-ignore deadCode.unreachable
140+
throw new \RuntimeException((string) $error);
141+
}
142+
143+
$this->collectedData = Option::none();
144+
}
145+
146+
$tail = preg_quote('/'.$name, '/');
147+
$pattern = "/{$tail}$/";
148+
$path = preg_replace($pattern, '', $path);
149+
150+
/**
151+
* @todo handle the $path errors better
152+
*/
153+
// @phpstan-ignore argument.type
154+
$this->setPathFromString($path);
155+
}
156+
157+
/**
158+
* Handles tag content.
159+
* handler(XMLParser $parser, string $data): void.
160+
*
161+
* @param mixed $_
162+
*/
163+
public function tagData($_, string $data): void
164+
{
165+
$this->addData($data);
166+
}
167+
168+
public function addData(string $data): void
169+
{
170+
if ($this->isCollecting) {
171+
$this->collectedData->map(fn ($valueBuilder) => $valueBuilder->addValue($data));
172+
}
173+
}
174+
175+
/**
176+
* Summary of parseString.
177+
*
178+
* @return IOMonad<void,mixed>
179+
*/
180+
public function parseString(string $data, bool $isFinal): IOMonad
181+
{
182+
if (0 == count($this->callbacks)) {
183+
// we dont need to do anything, nobody is there to observe
184+
// you probably think you are observing, so this should be an error state
185+
return IOMonad::fail('No callbacks registered');
186+
}
187+
188+
// @phpstan-ignore argument.type
189+
$result = xml_parse($this->parser, $data, $isFinal);
190+
191+
if ($isFinal) {
192+
// @phpstan-ignore argument.type
193+
xml_parser_free($this->parser);
194+
}
195+
196+
if (1 === $result) {
197+
// success
198+
// @phpstan-ignore return.type
199+
return IOMonad::pure(null);
200+
}
201+
202+
// failure
203+
// For unsuccessful parses, error information can be retrieved with
204+
205+
/**
206+
* @todo give better errors
207+
*/
208+
209+
// $errorCode = xml_get_error_code($this->parser);
210+
// $errorString = xml_error_string($errorCode);
211+
// $lineNumber = xml_get_current_line_number($this->parser);
212+
// $columnNumber = xml_get_current_column_number($this->parser);
213+
214+
// xml_get_current_byte_index().
215+
return IOMonad::fail('something happened');
216+
}
217+
218+
/**
219+
* @return IOMonad<void,mixed>
220+
*/
221+
public function parseFile(string $file)
222+
{
223+
/**
224+
* support for gzip.
225+
*/
226+
$wrapGzip = function ($uri) {
227+
/**
228+
* @todo: do not be so naive
229+
*/
230+
$file_parts = pathinfo($uri);
231+
if (key_exists('extension', $file_parts) and 'gz' == $file_parts['extension']) {
232+
return "compress.zlib://{$uri}";
233+
}
234+
235+
return $uri;
236+
};
237+
238+
$wrappedFile = call_user_func($wrapGzip, $file);
239+
240+
$handle = fopen($wrappedFile, 'r');
241+
if (!$handle) {
242+
return IOMonad::fail(new \Exception('Unable to open file.'));
243+
}
244+
$result = IOMonad::pure(null);
245+
while (!feof($handle) and $result->isSuccess()) {
246+
$data = fread($handle, $this->readBuffer);
247+
if (false === $data) {
248+
break;
249+
}
250+
$result = $this->parseString($data, feof($handle));
251+
}
252+
253+
fclose($handle);
254+
255+
// @phpstan-ignore return.type
256+
return $result;
257+
}
258+
259+
public function registerCallback(string $path, callable $callback): void
260+
{
261+
$this->callbacks[$path] = $callback;
262+
}
263+
264+
private function hasHandler(string $path): bool
265+
{
266+
/**
267+
* path / includes /roots.
268+
*/
269+
foreach ($this->callbacks as $hpath => $handler) {
270+
if (0 === strpos($path, $hpath)) {
271+
return true;
272+
}
273+
}
274+
275+
return key_exists($path, $this->callbacks);
276+
}
277+
278+
private function pathAsString(): string
279+
{
280+
return implode('/', $this->path);
281+
}
282+
283+
private function setPathFromString(string $path): void
284+
{
285+
$this->path = explode('/', $path);
286+
}
287+
288+
private function isCollectionPath(string $path): bool
289+
{
290+
return $this->collectingFrom->match(
291+
fn (string $collecingFromPath) => $collecingFromPath == $path,
292+
fn () => false,
293+
);
294+
}
295+
296+
private function addNodeToPath(string $name): void
297+
{
298+
$this->path[] = $name;
299+
}
300+
301+
/**
302+
* @return \Zodimo\BaseReturn\Option<callable>
303+
*/
304+
private function getHandlerForPath(string $path): Option
305+
{
306+
if ($this->hasHandler($path)) {
307+
foreach ($this->callbacks as $hpath => $handler) {
308+
if (0 === strpos($path, $hpath)) {
309+
return Option::some($this->callbacks[$hpath]);
310+
}
311+
}
312+
313+
// this should not happen.. the hasHandler has checked already for its existence;
314+
return Option::none();
315+
}
316+
317+
return Option::none();
318+
}
319+
320+
/**
321+
* @param Option<XmlValue> $dataOption
322+
*
323+
* @return IOMonad<void,\Throwable>
324+
*/
325+
private function callHandlerWithData(Option $dataOption): IOMonad
326+
{
327+
$path = $this->pathAsString();
328+
329+
// only call the callback if we actually have data
330+
// @phpstan-ignore return.type
331+
return $dataOption->match(
332+
fn ($data) => $this->getHandlerForPath($path)->match(
333+
function ($handler) use ($data) {
334+
// ignore return value from callback
335+
return IOMonad::try(fn () => call_user_func($handler, $data))->fmap(fn ($_) => null);
336+
},
337+
fn () => IOMonad::pure(null)
338+
),
339+
fn () => IOMonad::pure(null)
340+
);
341+
}
342+
}

0 commit comments

Comments
 (0)