From 9e11ec147dbf3459cbb5622e67c8edc824c41b99 Mon Sep 17 00:00:00 2001 From: Toon Verwerft Date: Wed, 1 Nov 2023 19:06:01 +0100 Subject: [PATCH] Introduce the nested() XML Reader matcher --- docs/reader.md | 69 +++++- src/Xml/Reader/Matcher/nested.php | 63 ++++++ src/Xml/Reader/Matcher/not.php | 2 +- src/Xml/Reader/Matcher/sequence.php | 9 +- src/Xml/Reader/Node/NodeSequence.php | 33 ++- src/bootstrap.php | 1 + tests/Xml/Reader/Matcher/NestedTest.php | 243 +++++++++++++++++++++ tests/Xml/Reader/Node/NodeSequenceTest.php | 68 +++++- 8 files changed, 476 insertions(+), 12 deletions(-) create mode 100644 src/Xml/Reader/Matcher/nested.php create mode 100644 tests/Xml/Reader/Matcher/NestedTest.php diff --git a/docs/reader.md b/docs/reader.md index ae72ec64..2e6a3584 100644 --- a/docs/reader.md +++ b/docs/reader.md @@ -304,6 +304,67 @@ use \VeeWee\Xml\Reader\Matcher; Matcher\namespaced_element('https://some', 'item'); ``` +#### nested + +Provide nested matchers that represents parts of an XML tree. +It can be used similar to the `//user` xpath operator to search on any matching node at any level in the XML + +Given: + +```xml + + + Jos + Bos + Mos + + +``` + +This matcher will grab the `user` element with `locale="nl"` + +```php +use \VeeWee\Xml\Reader\Matcher; + +Matcher\nested( + // Breakpoint 1: + Matcher\document_element(), + // Breakpoint 2: Jos + // Searches for all elements that matches `` and attribute `locale="nl"` in the `` document. + // Note that you can skip matching on `` here : it's not an exact matcher + Matcher\all( + Matcher\element_name('user'), + Matcher\attribute_value('locale', 'nl') + ) +); +``` + +Every provided matcher acts as a breakpoint in the `NodeSequence` for the next matcher, +making it composable with the exact XML tree [sequence](#sequence) matcher as well. + +```php +use \VeeWee\Xml\Reader\Matcher; + +Matcher\nested( + // Breakpoint 1: + Matcher\document_element(), + // Breakpoint 2: + // The nested matcher will provide the NodeSequence starting from the element after previous match. + // The sequence will basically receive: 'users > user' + Matcher\sequence( + // Level 0: The element inside at level 0 must exactly match + Matcher\element_name('users'), + // Level 1: The element inside at level 1 must exactly match + Matcher\element_name('user'), + ), + // Breakpoint 3: + // After matching a sequence, you can still continue matching deeper or adding even more sequences: + Matcher\element_name('email') +); +``` + +If you want every level of the XML to match exactly, you might use the [sequence](#sequence) matcher instead. + #### not Inverses a matcher's result. @@ -318,8 +379,10 @@ Matcher\not( #### sequence -Provide a sequence of matchers that represents the XML tree. -Only the items that are described by the sequence will match. +Provide a sequence of matchers that represents the exact XML tree. +Every provided matcher step must result in an exact match with the matcher on the same index. +It can be used similar to the `/root/users/user` xpath operator to search on an exact node match at every level in the XML. +Only the items that are described by the sequence will match: Given: @@ -352,6 +415,8 @@ Matcher\sequence( ); ``` +If you don't want every level of XML to match exactly, you might use the [nested](#nested) matcher instead. + #### Writing your own matcher diff --git a/src/Xml/Reader/Matcher/nested.php b/src/Xml/Reader/Matcher/nested.php new file mode 100644 index 00000000..a9be299b --- /dev/null +++ b/src/Xml/Reader/Matcher/nested.php @@ -0,0 +1,63 @@ + $matchers + * + * @return \Closure(NodeSequence): bool + */ +function nested(callable ... $matchers): Closure +{ + return static function (NodeSequence $sequence) use ($matchers) : bool { + $lastMatchedAtIndex = -1; + $currentMatcher = array_shift($matchers); + if (!$currentMatcher) { + return false; + } + + $stepCount = $sequence->count(); + foreach ($sequence->replay() as $index => $step) { + // Slice the step NodeSequence based on previous "match" breakpoint + // and see if it matches on current matcher: + $step = $step->slice($lastMatchedAtIndex + 1); + if (!$currentMatcher($step)) { + continue; + } + + // If there was a match, select the next matcher and store the last matched NodeSequence index. + $currentMatcher = array_shift($matchers); + $lastMatchedAtIndex = $index; + + // If the list of matchers is empty + // The function will return true if the element is the last step in the complete sequence. + // Otherwise, the nested match has an even deeper element on which we don't wish to match. + if (!$currentMatcher) { + $isLastStep = $index === $stepCount - 1; + + return $isLastStep; + } + } + + return false; + }; +} diff --git a/src/Xml/Reader/Matcher/not.php b/src/Xml/Reader/Matcher/not.php index e0673051..c268c18a 100644 --- a/src/Xml/Reader/Matcher/not.php +++ b/src/Xml/Reader/Matcher/not.php @@ -8,7 +8,7 @@ use VeeWee\Xml\Reader\Node\NodeSequence; /** - * @param callable(NodeSequence) $matcher + * @param callable(NodeSequence): bool $matcher * * @return \Closure(NodeSequence): bool */ diff --git a/src/Xml/Reader/Matcher/sequence.php b/src/Xml/Reader/Matcher/sequence.php index 4fe6e3d2..79284db0 100644 --- a/src/Xml/Reader/Matcher/sequence.php +++ b/src/Xml/Reader/Matcher/sequence.php @@ -22,15 +22,12 @@ function sequence(callable ... $matcherSequence): Closure { return static function (NodeSequence $sequence) use ($matcherSequence) : bool { - $nodeSequence = $sequence->sequence(); - if (count($matcherSequence) !== count($nodeSequence)) { + if (count($matcherSequence) !== $sequence->count()) { return false; } - $currentSequence = new NodeSequence(); - foreach ($nodeSequence as $i => $node) { - $currentSequence = $currentSequence->append($node); - $matcher = $matcherSequence[$i]; + foreach ($sequence->replay() as $index => $currentSequence) { + $matcher = $matcherSequence[$index]; if (!$matcher($currentSequence)) { return false; } diff --git a/src/Xml/Reader/Node/NodeSequence.php b/src/Xml/Reader/Node/NodeSequence.php index 3d2cb01f..924bd7ee 100644 --- a/src/Xml/Reader/Node/NodeSequence.php +++ b/src/Xml/Reader/Node/NodeSequence.php @@ -4,10 +4,13 @@ namespace VeeWee\Xml\Reader\Node; +use Countable; +use Generator; use InvalidArgumentException; use Webmozart\Assert\Assert; +use function Psl\Vec\slice; -final class NodeSequence +final class NodeSequence implements Countable { /** * @var list @@ -64,6 +67,34 @@ public function sequence(): array return $this->elementNodes; } + public function count(): int + { + return \count($this->elementNodes); + } + + /** + * @param non-negative-int $start + * @param non-negative-int|null $length + */ + public function slice(int $start, ?int $length = null): self + { + return new self(...slice($this->elementNodes, $start, $length)); + } + + /** + * Replays every step in the sequence + * + * @return Generator + */ + public function replay(): Generator + { + $step = new self(); + foreach ($this->elementNodes as $index => $node) { + $step = $step->append($node); + yield $index => $step; + } + } + /** * @throws InvalidArgumentException */ diff --git a/src/bootstrap.php b/src/bootstrap.php index 202424b7..6e9d71fd 100644 --- a/src/bootstrap.php +++ b/src/bootstrap.php @@ -136,6 +136,7 @@ require_once __DIR__.'/Xml/Reader/Matcher/namespaced_attribute.php'; require_once __DIR__.'/Xml/Reader/Matcher/namespaced_attribute_value.php'; require_once __DIR__.'/Xml/Reader/Matcher/namespaced_element.php'; +require_once __DIR__.'/Xml/Reader/Matcher/nested.php'; require_once __DIR__.'/Xml/Reader/Matcher/node_attribute.php'; require_once __DIR__.'/Xml/Reader/Matcher/node_name.php'; require_once __DIR__.'/Xml/Reader/Matcher/not.php'; diff --git a/tests/Xml/Reader/Matcher/NestedTest.php b/tests/Xml/Reader/Matcher/NestedTest.php new file mode 100644 index 00000000..5f08800f --- /dev/null +++ b/tests/Xml/Reader/Matcher/NestedTest.php @@ -0,0 +1,243 @@ + [ + nested( + document_element(), + element_name('users'), + all( + element_name('user'), + attribute_value('locale', 'nl') + ) + ), + <<<'EOXML' + + + Jos + Bos + Mos + + + EOXML, + [ + 'Jos', + ] + ]; + yield 'skipped-items' => [ + nested( + element_name('root'), + element_name('user'), + ), + <<<'EOXML' + + + Jos + Bos + Mos + + + EOXML, + [ + 'Jos', + 'Bos', + 'Mos', + ] + ]; + yield 'similar-paths' => [ + nested( + element_name('users'), + element_name('user'), + ), + <<<'EOXML' + + + Jos + Bos + + + Mos + + + EOXML, + [ + 'Jos', + 'Bos', + ] + ]; + yield 'combined-with-sequence' => [ + nested( + document_element(), + sequence( + element_name('users'), + element_name('user'), + ), + ), + <<<'EOXML' + + + Jos + Bos + + + EOXML, + [ + 'Jos', + 'Bos', + ] + ]; + yield 'multi-parent-items' => [ + nested( + element_name('root'), + element_name('user'), + ), + <<<'EOXML' + + + Jos + Bos + + + + Mos + + + EOXML, + [ + 'Jos', + 'Bos', + 'Mos', + ] + ]; + yield 'in-between-match' => [ + nested( + element_name('root'), + element_name('users'), + ), + <<<'EOXML' + + + Jos + Bos + + + Mos + + + EOXML, + [ + <<<'EOXML' + + Jos + Bos + + EOXML, + ] + ]; + yield 'deeply-nested-sequence' => [ + nested( + document_element(), + sequence(element_name('users')), + sequence(element_name('user')), + ), + <<<'EOXML' + + + Jos + Bos + + + EOXML, + [ + 'Jos', + 'Bos', + ] + ]; + } + + public static function provideMatcherCases(): Generator + { + yield 'it_returns_false_if_no_matcher' => [ + nested(), + new NodeSequence( + new ElementNode(1, 'root', 'root', '', '', []), + ), + false + ]; + + yield 'it_returns_false_if_no_sequence' => [ + nested(document_element()), + new NodeSequence(), + false + ]; + + yield 'it_returns_false_if_there_are_no_matchers_left_before_the_end_of_the_node_sequence' => [ + nested(document_element()), + new NodeSequence( + new ElementNode(1, 'root', 'root', '', '', []), + new ElementNode(1, 'users', 'users', '', '', []), + ), + false + ]; + + yield 'it_returns_false_if_there_are_still_matchers_left_at_the_end_of_the_node_sequence' => [ + nested(element_name('notfound')), + new NodeSequence( + new ElementNode(1, 'root', 'root', '', '', []), + new ElementNode(1, 'users', 'users', '', '', []), + ), + false + ]; + + yield 'it_returns_true_if_the_last_matcher_hits_the_end_of_the_node_sequence' => [ + nested(element_name('root'), element_name('users')), + new NodeSequence( + new ElementNode(1, 'root', 'root', '', '', []), + new ElementNode(1, 'users', 'users', '', '', []), + ), + true + ]; + + yield 'it_can_skip_nodes_looking_for_the_next_one' => [ + nested(element_name('root'), element_name('user')), + new NodeSequence( + new ElementNode(1, 'root', 'root', '', '', []), + new ElementNode(1, 'users', 'users', '', '', []), + new ElementNode(1, 'user', 'user', '', '', []), + ), + true + ]; + + yield 'it_uses_a_sliced_node_sequence_breakpoint_on_match' => [ + nested( + element_name('root'), + sequence( + element_name('users'), + element_name('user') + ) + ), + new NodeSequence( + new ElementNode(1, 'root', 'root', '', '', []), + new ElementNode(1, 'users', 'users', '', '', []), + new ElementNode(1, 'user', 'user', '', '', []), + ), + true + ]; + } +} diff --git a/tests/Xml/Reader/Node/NodeSequenceTest.php b/tests/Xml/Reader/Node/NodeSequenceTest.php index 463ce3a5..3b5ad38e 100644 --- a/tests/Xml/Reader/Node/NodeSequenceTest.php +++ b/tests/Xml/Reader/Node/NodeSequenceTest.php @@ -4,6 +4,7 @@ namespace VeeWee\Tests\Xml\Reader\Node; +use Countable; use InvalidArgumentException; use PHPUnit\Framework\TestCase; use VeeWee\Xml\Reader\Node\ElementNode; @@ -22,7 +23,7 @@ public function test_it_can_be_empty(): void $sequence->current(); } - + public function test_it_can_not_pop_empty_sequence(): void { $sequence = new NodeSequence(); @@ -30,7 +31,7 @@ public function test_it_can_not_pop_empty_sequence(): void $this->expectException(InvalidArgumentException::class); $sequence->pop(); } - + public function test_it_can_remember_sequences_in_an_immutable_way(): void { $sequence = new NodeSequence( @@ -61,4 +62,67 @@ public function test_it_can_remember_sequences_in_an_immutable_way(): void static::assertNull($emptySequence->parent()); static::assertSame([], $emptySequence->sequence()); } + + + public function test_it_can_count_a_sequence(): void + { + $sequence = new NodeSequence( + new ElementNode(1, 'item1', 'item1', '', '', []), + ); + + static::assertInstanceOf(Countable::class, $sequence); + static::assertCount(1, $sequence); + } + + + public function test_it_can_replay_sequence(): void + { + $sequence = new NodeSequence( + $element1 = new ElementNode(1, 'item1', 'item1', '', '', []), + $element2 = new ElementNode(1, 'item2', 'item2', '', '', []), + ); + + $replayed = [...$sequence->replay()]; + static::assertCount(2, $replayed); + static::assertEquals(new NodeSequence($element1), $replayed[0]); + static::assertEquals(new NodeSequence($element1, $element2), $replayed[1]); + } + + /** + * Added to keep both infections 'YieldValue' and psalm's non-negative-int happy. + * Yet it adds little value since it is not allowed to use the sequence like this in psalm. + * Meh ... :) + * + * + */ + public function test_it_keeps_index_during_yielding(): void + { + $sequence = new NodeSequence( + el1: $element1 = new ElementNode(1, 'item1', 'item1', '', '', []), + el2: $element2 = new ElementNode(1, 'item2', 'item2', '', '', []), + ); + + $replayed = [...$sequence->replay()]; + static::assertCount(2, $replayed); + static::assertEquals(new NodeSequence($element1), $replayed['el1']); + static::assertEquals(new NodeSequence($element1, $element2), $replayed['el2']); + } + + + public function test_it_can_slice_node_sequence(): void + { + $emptySequence = new NodeSequence(); + static::assertEquals($emptySequence, $emptySequence->slice(0, 100)); + + $sequence = new NodeSequence( + $element1 = new ElementNode(1, 'item1', 'item1', '', '', []), + $element2 = new ElementNode(1, 'item2', 'item2', '', '', []), + ); + + static::assertEquals($sequence, $sequence->slice(-1)); + static::assertEquals(new NodeSequence($element1), $sequence->slice(-1, 1)); + static::assertEquals(new NodeSequence($element1), $sequence->slice(0, 1)); + static::assertEquals(new NodeSequence($element1, $element2), $sequence->slice(0)); + static::assertEquals(new NodeSequence($element2), $sequence->slice(1, 1)); + } }