Skip to content

Commit

Permalink
Enhancement: improve data validation for the give_clean() function (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
glaubersilva authored Dec 24, 2024
1 parent aa24c85 commit 29eaaf5
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 10 deletions.
5 changes: 3 additions & 2 deletions includes/formatting.php
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,7 @@ function give_get_cache_key($action, $query_args)
* Clean variables using sanitize_text_field. Arrays are cleaned recursively.
* Non-scalar values are ignored.
*
* @unreleased Don't unserialize data by default and return an empty string when data is serialized and $allow_serialized_data is false
* @since 3.17.2 Safe unserialize data by default
* @since 1.8
*
Expand All @@ -748,8 +749,8 @@ function give_clean($var, $allow_serialized_data = false)
return array_map('give_clean', $var);
}

if ( ! $allow_serialized_data) {
$var = Utils::safeUnserialize($var);
if ( Utils::isSerialized($var)) {
$var = $allow_serialized_data ? Utils::safeUnserialize($var) : '';
}

return is_scalar($var) ? sanitize_text_field(wp_unslash($var)) : $var;
Expand Down
14 changes: 11 additions & 3 deletions src/Helpers/Utils.php
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ public static function removeBackslashes($data)
}

/**
* The regular expression attempts to capture the basic structure of a serialized array
* or object. This is more robust than the is_serialized() function but still not perfect.
* The regular expression attempts to capture the basic structure of all data types that can be serialized by PHP.
*
* @unreleased Support all types of serialized data instead of only objects and arrays
* @since 3.17.2
*/
public static function containsSerializedDataRegex($data): bool
Expand All @@ -141,7 +141,15 @@ public static function containsSerializedDataRegex($data): bool
return false;
}

$pattern = '/(a:\d+:\{.*\})|(O:\d+:"[^"]+":\{.*\})/';
$pattern = '/
(a:\d+:\{.*\}) | # Matches arrays (e.g: a:2:{i:0;s:5:"hello";i:1;i:42;})
(O:\d+:"[^"]+":\{.*\}) | # Matches objects (e.g: O:8:"stdClass":1:{s:4:"name";s:5:"James";})
(s:\d+:"[^"]*";) | # Matches strings (e.g: s:5:"hello";)
(i:\d+;) | # Matches integers (e.g: i:42;)
(b:[01];) | # Matches booleans (e.g: b:1; or b:0;)
(d:\d+(\.\d+)?;) | # Matches floats (e.g: d:3.14;)
(N;) # Matches NULL (e.g: N;)
/x';

return preg_match($pattern, $data) === 1;
}
Expand Down
14 changes: 9 additions & 5 deletions tests/Unit/Helpers/UtilsTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public function testMaybeSafeUnserialize($data, bool $expected)
}

/**
* @unreleased Test all types of serialized data
* @since 3.17.2
*/
public function serializedDataProvider(): array
Expand All @@ -90,15 +91,18 @@ public function serializedDataProvider(): array
[serialize('bar'), true],
['\\' . serialize('backslash-bypass'), true],
['\\\\' . serialize('double-backslash-bypass'), true],
[
// String with serialized data hidden in the middle of the content
'Lorem ipsum dolor sit amet, {a:2:{i:0;s:5:\"hello\";i:1;s:5:\"world\";}} consectetur adipiscing elit.',
true,
],
['foo', false],
[serialize('qux'), true],
['bar', false],
['foo bar', false],
// Strings with serialized data hidden in the middle of the content
['Lorem ipsum a:2:{i:0;s:5:"hello";i:1;i:42;} dolor sit amet', true], // array
['Lorem ipsum O:8:"stdClass":1:{s:4:"name";s:5:"James";} dolor sit amet', true], // object
['Lorem ipsum s:5:"hello"; dolor sit amet', true], // string
['Lorem ipsum i:42; dolor sit amet', true], // integer
['Lorem ipsum b:1; dolor sit amet', true], // boolean
['Lorem ipsum d:3.14; dolor sit amet', true], // float
['Lorem ipsum N; dolor sit amet', true], // NULL
];
}
}

0 comments on commit 29eaaf5

Please sign in to comment.