diff --git a/docs/dom.md b/docs/dom.md index c489d165..79889e60 100644 --- a/docs/dom.md +++ b/docs/dom.md @@ -647,6 +647,18 @@ $document = Document::fromLoader($loader, ...$configurators); ## Loaders +#### xml_document_loader + +Loads an XML document from an external `Dom\XMLDocument`. +It copies the content of the external document into a new `Dom\XMLDocument` and re-applies e.g. LIBXML flags. + +```php +use VeeWee\Xml\Dom\Document; +use VeeWee\Xml\Dom\Loader\xml_document_loader; + +$doc = Document::fromLoader(xml_document_loader($originalDocument, options: LIBXML_NOCDATA, override_encoding: 'UTF-8')); +``` + #### xml_file_loader Loads an XML document from a file. diff --git a/src/Xml/Dom/Configurator/canonicalize.php b/src/Xml/Dom/Configurator/canonicalize.php index 51a6883b..a0fae431 100644 --- a/src/Xml/Dom/Configurator/canonicalize.php +++ b/src/Xml/Dom/Configurator/canonicalize.php @@ -15,13 +15,18 @@ */ function canonicalize(): Closure { - return static fn (XMLDocument $document): XMLDocument - => Document::fromLoader( + return static function (XMLDocument $document): XMLDocument { + if (!$document->documentElement) { + return $document; + } + + return Document::fromLoader( xml_string_loader( non_empty_string()->assert($document->C14N()), LIBXML_NSCLEAN + LIBXML_NOCDATA ), pretty_print(), - normalize() + normalize(), )->toUnsafeDocument(); + }; } diff --git a/src/Xml/Dom/Configurator/pretty_print.php b/src/Xml/Dom/Configurator/pretty_print.php index 88f2ec93..aaff15f7 100644 --- a/src/Xml/Dom/Configurator/pretty_print.php +++ b/src/Xml/Dom/Configurator/pretty_print.php @@ -7,7 +7,7 @@ use Closure; use Dom\XMLDocument; use VeeWee\Xml\Dom\Document; -use function VeeWee\Xml\Dom\Loader\xml_string_loader; +use function VeeWee\Xml\Dom\Loader\xml_document_loader; /** * @return Closure(XMLDocument): XMLDocument @@ -15,15 +15,12 @@ function pretty_print(): Closure { return static function (XMLDocument $document): XMLDocument { - $trimmed = Document::fromLoader( - xml_string_loader( - Document::fromUnsafeDocument($document)->toXmlString(), - LIBXML_NOBLANKS - ) + $prettyPrinted = Document::fromLoader( + xml_document_loader($document, LIBXML_NOBLANKS) )->toUnsafeDocument(); - $trimmed->formatOutput = true; + $prettyPrinted->formatOutput = true; - return $trimmed; + return $prettyPrinted; }; } diff --git a/src/Xml/Dom/Configurator/trim_spaces.php b/src/Xml/Dom/Configurator/trim_spaces.php index 0518055d..54516267 100644 --- a/src/Xml/Dom/Configurator/trim_spaces.php +++ b/src/Xml/Dom/Configurator/trim_spaces.php @@ -7,7 +7,7 @@ use Closure; use Dom\XMLDocument; use VeeWee\Xml\Dom\Document; -use function VeeWee\Xml\Dom\Loader\xml_string_loader; +use function VeeWee\Xml\Dom\Loader\xml_document_loader; /** * @return Closure(XMLDocument): XMLDocument @@ -16,10 +16,7 @@ function trim_spaces(): Closure { return static function (XMLDocument $document): XMLDocument { $trimmed = Document::fromLoader( - xml_string_loader( - Document::fromUnsafeDocument($document)->toXmlString(), - LIBXML_NOBLANKS - ) + xml_document_loader($document, LIBXML_NOBLANKS) )->toUnsafeDocument(); $trimmed->formatOutput = false; diff --git a/src/Xml/Dom/Loader/xml_document_loader.php b/src/Xml/Dom/Loader/xml_document_loader.php new file mode 100644 index 00000000..5bdce09b --- /dev/null +++ b/src/Xml/Dom/Loader/xml_document_loader.php @@ -0,0 +1,35 @@ + disallow_issues(static function () use ($importedDocument, $options, $override_encoding): XMLDocument { + + if ($importedDocument->documentElement === null) { + return XMLDocument::createEmpty($importedDocument->xmlVersion, $importedDocument->xmlEncoding); + } + + return XMLDocument::createFromString( + non_empty_string()->assert($importedDocument->saveXml()), + $options, + $override_encoding + ); + }); +} diff --git a/src/bootstrap.php b/src/bootstrap.php index 1341e878..2a459941 100644 --- a/src/bootstrap.php +++ b/src/bootstrap.php @@ -32,6 +32,7 @@ 'Xml\Dom\Configurator\trim_spaces' => __DIR__.'/Xml/Dom/Configurator/trim_spaces.php', 'Xml\Dom\Configurator\utf8' => __DIR__.'/Xml/Dom/Configurator/utf8.php', 'Xml\Dom\Configurator\validator' => __DIR__.'/Xml/Dom/Configurator/validator.php', + 'Xml\Dom\Loader\xml_document_loader' => __DIR__.'/Xml/Dom/Loader/xml_document_loader.php', 'Xml\Dom\Loader\xml_file_loader' => __DIR__.'/Xml/Dom/Loader/xml_file_loader.php', 'Xml\Dom\Loader\xml_node_loader' => __DIR__.'/Xml/Dom/Loader/xml_node_loader.php', 'Xml\Dom\Loader\xml_string_loader' => __DIR__.'/Xml/Dom/Loader/xml_string_loader.php', diff --git a/tests/Xml/Dom/Configurator/CanonicalizeTest.php b/tests/Xml/Dom/Configurator/CanonicalizeTest.php index 75930147..9ee34aec 100644 --- a/tests/Xml/Dom/Configurator/CanonicalizeTest.php +++ b/tests/Xml/Dom/Configurator/CanonicalizeTest.php @@ -23,6 +23,16 @@ public function test_it_can_canonicalize(string $input, string $expected): void static::assertSame($expected, $actual); } + public function test_it_can_canonicalize_empty_xml(): void + { + $configurator = canonicalize(); + + $doc = Document::empty()->toUnsafeDocument(); + $result = $configurator($doc); + + static::assertSame($doc, $result); + } + public static function provideXmls() { yield 'no-action' => [ diff --git a/tests/Xml/Dom/Configurator/PrettyPrintTest.php b/tests/Xml/Dom/Configurator/PrettyPrintTest.php index a5d98ee3..eeef64cd 100644 --- a/tests/Xml/Dom/Configurator/PrettyPrintTest.php +++ b/tests/Xml/Dom/Configurator/PrettyPrintTest.php @@ -11,7 +11,7 @@ final class PrettyPrintTest extends TestCase { - public function test_it_can_trim_contents(): void + public function test_it_can_pretty_print_contents(): void { $configurator = pretty_print(); @@ -28,4 +28,27 @@ public function test_it_can_trim_contents(): void static::assertTrue($result->formatOutput); static::assertSame($expected, xml_string()($result->documentElement)); } + + public function test_it_can_pretty_print_empty_xml(): void + { + $configurator = pretty_print(); + + $doc = Document::empty()->toUnsafeDocument(); + $result = $configurator($doc); + + $result->append( + $hello = $result->createElement('hello') + ); + $hello->append($result->createElement('world')); + + $expected = << + + + EOXML; + + static::assertNotSame($doc, $result); + static::assertTrue($result->formatOutput); + static::assertSame($expected, xml_string()($result->documentElement)); + } } diff --git a/tests/Xml/Dom/Configurator/TrimSpacesTest.php b/tests/Xml/Dom/Configurator/TrimSpacesTest.php index db48ec97..a908f985 100644 --- a/tests/Xml/Dom/Configurator/TrimSpacesTest.php +++ b/tests/Xml/Dom/Configurator/TrimSpacesTest.php @@ -21,4 +21,25 @@ public function test_it_can_trim_contents(): void static::assertFalse($result->formatOutput); static::assertSame('', xml_string()($result->documentElement)); } + + public function test_it_can_trim_spaces_on_empty_xml(): void + { + $configurator = trim_spaces(); + + $doc = Document::empty()->toUnsafeDocument(); + $result = $configurator($doc); + + $result->append( + $hello = $result->createElement('hello') + ); + $hello->append($result->createElement('world')); + + $expected = << + EOXML; + + static::assertNotSame($doc, $result); + static::assertFalse($result->formatOutput); + static::assertSame($expected, xml_string()($result->documentElement)); + } } diff --git a/tests/Xml/Dom/Loader/XmlDocumentLoaderTest.php b/tests/Xml/Dom/Loader/XmlDocumentLoaderTest.php new file mode 100644 index 00000000..3cf82aa7 --- /dev/null +++ b/tests/Xml/Dom/Loader/XmlDocumentLoaderTest.php @@ -0,0 +1,54 @@ +'); + $loader = xml_document_loader($initialDoc); + $doc = $loader(); + + static::assertXmlStringEqualsXmlString($initialDoc->saveXml(), $doc->saveXML()); + } + + public function test_it_can_load_xml_string_with_different_charset(): void + { + $initialDoc = XMLDocument::createFromString('héllo'); + $loader = xml_document_loader($initialDoc, override_encoding: 'Windows-1252'); + $doc = $loader(); + + static::assertNotSame($initialDoc, $doc); + static::assertSame('héllo', $doc->documentElement->textContent); + static::assertSame('Windows-1252', $doc->xmlEncoding); + } + + public function test_it_can_load_with_options(): void + { + $initialDoc = XMLDocument::createFromString(''); + $loader = xml_document_loader($initialDoc, options: LIBXML_NOCDATA); + $doc = $loader(); + + static::assertNotSame($initialDoc, $doc); + static::assertSame('HELLO', $doc->saveXML($doc->documentElement)); + } + + public function test_it_can_load_empty_xml_string(): void + { + $initialDoc = XMLDocument::createEmpty(version: '1.1', encoding: 'ASCII'); + $loader = xml_document_loader($initialDoc); + $doc = $loader(); + + static::assertSame($initialDoc->saveXml(), $doc->saveXML()); + static::assertSame('ASCII', $doc->xmlEncoding); + static::assertSame('1.1', $doc->xmlVersion); + } + +}