diff --git a/core/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java b/core/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java index b903cbe9..a66a7064 100644 --- a/core/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java +++ b/core/src/main/java/com/schibsted/spt/data/jslt/impl/BuiltinFunctions.java @@ -104,6 +104,7 @@ public class BuiltinFunctions { functions.put("from-json", new BuiltinFunctions.FromJson()); functions.put("to-json", new BuiltinFunctions.ToJson()); functions.put("replace", new BuiltinFunctions.Replace()); + functions.put("replace-regexp", new BuiltinFunctions.ReplaceRegexp()); functions.put("trim", new BuiltinFunctions.Trim()); functions.put("uuid", new BuiltinFunctions.Uuid()); @@ -961,6 +962,32 @@ else if (pos < string.length()) } } + // ===== REPLACE-REGEXP + + public static class ReplaceRegexp extends AbstractRegexpFunction { + + public ReplaceRegexp() { + super("replace-regexp", 3, 3); + } + + public JsonNode call(JsonNode input, JsonNode[] arguments) { + int args = null == arguments ? 0 : arguments.length; + if (args != 3) { + throw new JsltException("ReplaceRegexp requires 3 arguments, only " + args + " provided!"); + } + + String string = NodeUtils.toString(arguments[0], true); + if (string == null) + return NullNode.instance; + + String regexp = NodeUtils.toString(arguments[1], false); + String replacement = NodeUtils.toString(arguments[2], false); + String result = !string.matches(regexp) ? string : string.replaceAll(regexp, replacement); + + return new TextNode(result); + } + } + // ===== TRIM public static class Trim extends AbstractFunction { diff --git a/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java b/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java index f5acc006..bc4ab161 100644 --- a/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java +++ b/core/src/main/java/com/schibsted/spt/data/jslt/parser/ParserImpl.java @@ -503,7 +503,13 @@ private static String makeString(ParseContext ctx, Token literal) { result[pos++] = ch; else { ch = string.charAt(++ix); - + // special Regexp characters, s. https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html, + // "Predefined character classes". + if ("dDhHsSvVwW".contains(String.valueOf(ch))) { + result[pos++] = '\\'; + result[pos++] = ch; + continue; + } switch (ch) { case '\\': result[pos++] = ch; break; case '"': result[pos++] = ch; break; diff --git a/core/src/test/resources/function-tests.json b/core/src/test/resources/function-tests.json index bea7f7b3..3408c03f 100644 --- a/core/src/test/resources/function-tests.json +++ b/core/src/test/resources/function-tests.json @@ -1127,6 +1127,21 @@ "input" : "\"some text\"", "output": "\"\"" }, + { + "query": "replace-regexp(., \"(\\d{4})-(\\d{2})-(\\d{2})\", \"$2/$3/$1\")", + "input" : "\"2019-12-31\"", + "output": "\"12/31/2019\"" + }, + { + "query": "replace-regexp(., \"(?\\d{4})-(?\\d{2})-(?\\d{2})\", \"${day}.${month}.${year}\")", + "input" : "\"2019-12-31\"", + "output": "\"31.12.2019\"" + }, + { + "query": "replace-regexp(., \"([a-z]+)\", \"$1\")", + "input" : "\"2019-12-31\"", + "output" : "\"2019-12-31\"" + }, { "query": "trim(.)", "input" : "\"some text\"", diff --git a/core/src/test/resources/json-parse-error-tests.json b/core/src/test/resources/json-parse-error-tests.json index 9a98f998..487245fb 100644 --- a/core/src/test/resources/json-parse-error-tests.json +++ b/core/src/test/resources/json-parse-error-tests.json @@ -1,7 +1,6 @@ { "description" : "Tests that should cause the JSLT parser to declare JSON syntax error.", "tests" : [ - "\" \\d \"", "\"\\u\"", "\"\\u0\"", "\"\\u00\"", diff --git a/functions.md b/functions.md index 3f3accc4..8fbc219b 100644 --- a/functions.md +++ b/functions.md @@ -510,6 +510,27 @@ replace("abc def ghi", "[a-z]", "x") => "xxx xxx xxx" replace("abc def ghi", "[a-z]+", "x") => "x x x" ``` +### _replace-regexp(value, regexp, out) -> string_ + +Replaces the string in `value` that matches `regexp` with `out`. +If `value` is not a string, it's converted to a string, except +if it is `null`. `regexp` and `out` must be strings. + +It is an error for `regexp` ever to match an empty string. + +If the `regexp` does not match the input,`out` corresponds to `value`. + +Examples: + +``` +replace-regexp("2019-12-31", "(\\d{4})-(\\d{2})-(\\d{2})", "$2/$3/$1") + => "12/31/2019" +replace-regexp("2019-12-31", "(?\\d{4})-(?\\d{2})-(?\\d{2})", "${day}.${month}.${year}") + => "31.12.2019" +replace-regexp("2019-12-31", "([a-z]+)", "$1") + => "2019-12-31" +``` + ### _trim(string) -> string_ Removes leading and trailing whitespace in the input string. If the