Skip to content

Commit

Permalink
Merge pull request #305 from oat-sa/fix/TR-3373/extended_text_or_inli…
Browse files Browse the repository at this point in the history
…ne_text_with_pattern

fix: remove checking caret & dollar, add test
  • Loading branch information
ivan-timchur-oat authored Mar 1, 2022
2 parents 0927bd2 + db4d9a4 commit 7795402
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 27 deletions.
64 changes: 38 additions & 26 deletions src/qtism/runtime/expressions/operators/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ public static function lcm($a, $b)
* Compute the arithmetic mean of $sample.
*
* @param array An array of numeric values.
* @return false|number The arithmetic mean of $sample or false if any of the values of $sample is not numeric or if $sample is empty.
* @return false|number The arithmetic mean of $sample or false if any of the values of $sample is not numeric or
* if $sample is empty.
*/
public static function mean(array $sample)
{
Expand Down Expand Up @@ -117,7 +118,7 @@ public static function mean(array $sample)
* returns false.
*
* @param array $sample An array of numeric values.
* @param bool $correction (optional) Apply the Bessel's correction on the computed variance.
* @param bool $correction (optional) Apply the Bessel's correction on the computed variance.
* @return false|number The variance of $sample or false if $sample is empty or contains non-numeric values.
* @link http://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance
*/
Expand Down Expand Up @@ -154,16 +155,18 @@ public static function variance(array $sample, $correction = true)
/**
* Compute the standard deviation of $sample.
*
* * To compute the population standard deviation: $sample is considered as a population if $correction equals false.
* * To compute the population standard deviation: $sample is considered as a population if $correction equals
* false.
* * To compute the sample standard deviation: $sample is considered as sample if $correction equals true.
*
* IMPORTANT:
* If $correction is true, $sample must contain more than 1 value, otherwise this method
* returns false.
*
* @param array $sample An array of numeric values.
* @param bool $correction (optional) Whether to apply Bessel's correction.
* @return false|number The standard deviation of $sample or false if $sample is empty or contains non-numeric values.
* @param bool $correction (optional) Whether to apply Bessel's correction.
* @return false|number The standard deviation of $sample or false if $sample is empty or contains non-numeric
* values.
* @link http://en.wikipedia.org/wiki/Variance#Population_variance_and_sample_variance
*/
public static function standardDeviation(array $sample, $correction = true)
Expand All @@ -186,7 +189,7 @@ public static function standardDeviation(array $sample, $correction = true)
* @param string $string
* @return string|bool The delimited string or false if no appropriate delimiters can be found.
*/
public static function pregAddDelimiter($string)
public static function pregAddDelimiter(string $string)
{
return '/' . static::escapeSymbols($string, '/') . '/';
}
Expand All @@ -195,7 +198,7 @@ public static function pregAddDelimiter($string)
* Get the amout of backslash (\) characters in $string that precede $offset.
*
* @param string $string
* @param int $offset
* @param int $offset
* @return int
*/
public static function getPrecedingBackslashesCount($string, $offset)
Expand All @@ -218,11 +221,11 @@ public static function getPrecedingBackslashesCount($string, $offset)
/**
* Escape with a backslash (\) the $symbols in $string.
*
* @param string $string
* @param string $string
* @param array|string $symbols An array of symbols or a single symbol.
* @return string The escaped string.
*/
public static function escapeSymbols($string, $symbols)
public static function escapeSymbols(string $string, $symbols): string
{
if (!is_array($symbols)) {
$symbols = [$symbols];
Expand All @@ -238,9 +241,9 @@ public static function escapeSymbols($string, $symbols)
// If the amount of preceding backslashes is even, it is not escaped.
// If a caret is preceded by a left bracket, don't escape it
if ((in_array($char, $symbols)) && static::getPrecedingBackslashesCount($string, $i) % 2 === 0
&& ($i === 0 || $char !== '^' || $string[$i-1] !== '[')
&& ($i === 0 || $char !== '^' || $string[$i - 1] !== '[')
) {
// It is not escaped, so ecape it.
// It is not escaped, so escape it.
$returnValue .= '\\';
}

Expand All @@ -255,7 +258,8 @@ public static function escapeSymbols($string, $symbols)
* fully qualified class name e.g. 'org\qtism\custom\Explode'.
*
* @param string $class A custom operator class name where namespace separator is '.' (dot).
* @return bool|string A fully qualified PHP class name corresponding to $class or false if the transformation failed.
* @return bool|string A fully qualified PHP class name corresponding to $class or false if the transformation
* failed.
*/
public static function customOperatorClassToPhpClass($class)
{
Expand Down Expand Up @@ -329,18 +333,26 @@ public static function lastPregErrorMessage()
* @param string $pattern
* @return string
*/
public static function prepareXsdPatternForPcre($pattern)
{
// XML schema always implicitly anchors the entire regular expression
// Neither caret (^) nor dollar ($) sign have special meaning so they are
// considered as normal characters.
// see http://www.regular-expressions.info/xml.html
$pattern = self::escapeSymbols($pattern, ['$', '^']);
$pattern = self::pregAddDelimiter('^' . $pattern . '$');

// XSD regexp always case-sensitive (nothing to do), dot matches white-spaces (use PCRE_DOTALL).
$pattern .= 's';

return $pattern;
}
public static function prepareXsdPatternForPcre(string $pattern): string
{
// XML schema always implicitly anchors the entire regular expression
// Neither caret (^) nor dollar ($) sign have special meaning so they are
// considered as normal characters.
// see http://www.regular-expressions.info/xml.html
$pattern = self::withoutStringAnchors($pattern);
$pattern = self::escapeSymbols($pattern, ['$', '^']);
$pattern = self::pregAddDelimiter('^' . $pattern . '$');

// XSD regexp always case-sensitive (nothing to do), dot matches white-spaces (use PCRE_DOTALL).
$pattern .= 's';

return $pattern;
}

private static function withoutStringAnchors(string $pattern): string
{
$pattern = ltrim($pattern, '^');

return rtrim($pattern, '$');
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,14 @@ public function testLastPregErrorMessage($pcre, $subject, $message, $recursionLi
$this::assertEquals($message, OperatorsUtils::lastPregErrorMessage());
}

/**
* @dataProvider patternForPcreProvider
*/
public function testPrepareXsdPatternForPcre($pattern, $expected)
{
$this->assertEquals($expected, OperatorsUtils::prepareXsdPatternForPcre($pattern));
}

/**
* @return array
*/
Expand Down Expand Up @@ -333,8 +341,32 @@ public function lastPregErrorMessageProvider()
return [
['', 'foobar', 'PCRE Engine internal error'],
['/***', 'foobar', 'PCRE Engine internal error'],
['/(?:\D+|<\d+>)*[!?]/', 'foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar', 'PCRE Engine backtrack limit exceeded'],
[
'/(?:\D+|<\d+>)*[!?]/',
'foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar',
'PCRE Engine backtrack limit exceeded',
],
['/abc/u', "\xa0\xa1", 'PCRE Engine malformed UTF-8 error'],
];
}

public function patternForPcreProvider(): array
{
return [
'max chars string pattern without anchors' => ['[\s\S]{0,5}', '/^[\s\S]{0,5}$/s'],
'max chars string pattern with anchors' => ['^[\s\S]{0,5}$', '/^[\s\S]{0,5}$/s'],
'digits only pattern' => ['[0,1]+', '/^[0,1]+$/s'],
'digits only pattern with anchors' => ['^[0,1]+$', '/^[0,1]+$/s'],
'string prefix without anchors' => ['test(.*)', '/^test(.*)$/s'],
'string prefix with anchors' => ['^test(.*)$', '/^test(.*)$/s'],
'max words pattern without anchors' => [
'(?:(?:[^\s\:\!\?\;\…\€]+)[\s\:\!\?\;\…\€]*){0,5}',
'/^(?:(?:[^\s\:\!\?\;\…\€]+)[\s\:\!\?\;\…\€]*){0,5}$/s',
],
'max words pattern with anchors' => [
'^(?:(?:[^\s\:\!\?\;\…\€]+)[\s\:\!\?\;\…\€]*){0,5}$',
'/^(?:(?:[^\s\:\!\?\;\…\€]+)[\s\:\!\?\;\…\€]*){0,5}$/s',
],
];
}
}

0 comments on commit 7795402

Please sign in to comment.