Skip to content

Commit

Permalink
Jdk19 regexp fix (#10972)
Browse files Browse the repository at this point in the history
* fix for \b regexp with JDK>=19

* [ro] fix \b regex for JDK>=19

* [pt] fix \b regex for JDK>=19

* [fr] fix \b regex for JDK>=19

* [es] fix \b regex for JDK>=19

* [ca] fix \b regex for JDK>=19

* [de] fix for \b regexp with JDK>=19

* use new segment parameter for \b fix

* [de] fix for \b regexp with JDK>=19

* update commandline tests

* [uk] fix for a type in regex

---------

Co-authored-by: Stefan Viol <[email protected]>
  • Loading branch information
arysin and SteVio89 authored Nov 11, 2024
1 parent 01bd51e commit f755fb3
Show file tree
Hide file tree
Showing 20 changed files with 113 additions and 186 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,7 @@
import org.junit.Before;
import org.junit.Test;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.io.*;
import java.nio.charset.StandardCharsets;

import static org.hamcrest.CoreMatchers.is;
Expand All @@ -42,7 +34,7 @@
*
* @author Marcin Miłkowski
*/
public class MainTest extends AbstractSecurityTestCase {
public class MainTest {

private final File enTestFile;
private final File xxRuleFile;
Expand Down Expand Up @@ -113,8 +105,7 @@ public MainTest() throws IOException {
}

@Before
public void setUp() throws Exception {
super.setUp();
public void setUp() {
this.stdout = System.out;
this.stderr = System.err;
this.out = new ByteArrayOutputStream();
Expand All @@ -124,38 +115,33 @@ public void setUp() throws Exception {
}

@After
public void tearDown() throws Exception {
public void tearDown() {
System.setOut(this.stdout);
System.setErr(this.stderr);
super.tearDown();
}

@Test
public void testUsageMessage() throws Exception {
try {
String[] args = {"-h"};
Main.main(args);
fail("LT should have exited with status 0!");
} catch (ExitException e) {
String output = new String(this.out.toByteArray());
assertTrue(output.contains("Usage: java -jar languagetool-commandline.jar"));
assertEquals("Exit status", 1, e.status);
}
Process process = new ProcessBuilder(
"java", "-cp", System.getProperty("java.class.path"), "org.languagetool.commandline.Main", "-h"
).start();
int exitCode = process.waitFor();
String output = readProcessOutput(process);
assertTrue(output.contains("Usage: java -jar languagetool-commandline.jar"));
assertEquals("Exit status", 1, exitCode);
}

@Test
public void testPrintLanguages() throws Exception {
try {
String[] args = {"--list"};
Main.main(args);
fail("LT should have exited with status 0!");
} catch (ExitException e) {
String output = new String(this.out.toByteArray());
assertTrue(output.contains("German"));
assertTrue(output.contains("de-DE"));
assertTrue(output.contains("English"));
assertEquals("Exit status", 0, e.status);
}
Process process = new ProcessBuilder(
"java", "-cp", System.getProperty("java.class.path"), "org.languagetool.commandline.Main", "--list"
).start();
int exitCode = process.waitFor();
String output = readProcessOutput(process);
assertTrue(output.contains("German"));
assertTrue(output.contains("de-DE"));
assertTrue(output.contains("English"));
assertEquals("Exit status", 0, exitCode);
}

@Test
Expand Down Expand Up @@ -670,4 +656,14 @@ private String getExternalFalseFriends() {
return xxFalseFriendFile.getAbsolutePath();
}

private String readProcessOutput(Process process) throws IOException {
try (BufferedReader reader = new BufferedReader(new InputStreamReader(process.getInputStream()))) {
StringBuilder output = new StringBuilder();
String line;
while ((line = reader.readLine()) != null) {
output.append(line).append(System.lineSeparator());
}
return output.toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public abstract class AbstractUnitConversionRule extends Rule {
protected static final String NUMBER_REGEX = "(-?[0-9]{1,32}[0-9,.]{0,32})";
protected static final String NUMBER_REGEX_WITH_BOUNDARY = "(-?\\b[0-9]{1,32}[0-9,.]{0,32})";

protected final Pattern numberRangePart = Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "$");
protected final Pattern numberRangePart = Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "$", Pattern.UNICODE_CHARACTER_CLASS);

private static final double DELTA = 1e-2;
private static final double ROUNDING_DELTA = 0.05;
Expand Down Expand Up @@ -196,7 +196,7 @@ protected String formatRounded(String s) {
*/
protected void addUnit(String pattern, Unit base, String symbol, double factor, boolean metric) {
Unit unit = base.multiply(factor);
unitPatterns.put(Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "[\\s\u00A0]{0," + WHITESPACE_LIMIT + "}" + pattern + "\\b"), unit);
unitPatterns.put(Pattern.compile(NUMBER_REGEX_WITH_BOUNDARY + "[\\s\u00A0]{0," + WHITESPACE_LIMIT + "}" + pattern + "\\b", Pattern.UNICODE_CHARACTER_CLASS), unit);
unitSymbols.putIfAbsent(unit, new ArrayList<>());
unitSymbols.get(unit).add(symbol);
if (metric && !metricUnits.contains(unit)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,8 @@ private void createRules(List<PatternToken> elemList,
rule.setDistanceTokens(distanceTokens);
rule.setXmlLineNumber(xmlLineNumber);
} else if (regex.length() > 0) {
int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
// int flags = regexCaseSensitive ? 0 : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
int flags = regexCaseSensitive ? Pattern.UNICODE_CHARACTER_CLASS : Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CHARACTER_CLASS;
String regexStr = regex.toString();
if (regexMode == RegexpMode.SMART) {
// Note: it's not that easy to add \b because the regex might look like '(foo)' or '\d' so we cannot just look at the last character
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public RuleMatch acceptRuleMatch(RuleMatch match, Map<String, String> arguments,
}
String[] antiPatterns = antiPatternStr.split("\\|");
for (String antiPattern : antiPatterns) {
Pattern p = Pattern.compile(antiPattern);
Pattern p = Pattern.compile(antiPattern, Pattern.UNICODE_CHARACTER_CLASS);
Matcher matcher = p.matcher(sentenceObj.getText());
while (matcher.find()) {
// partial overlap is enough to filter out a match:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,18 @@
*/
package org.languagetool.rules.patterns;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.ResourceBundle;
import java.util.function.Function;

import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.tuple.Triple;
import org.jetbrains.annotations.Nullable;
import org.languagetool.Language;
import org.languagetool.ResourceBundleTools;
import org.languagetool.chunking.ChunkTag;
import org.languagetool.rules.CorrectExample;
import org.languagetool.rules.ErrorTriggeringExample;
Expand All @@ -35,9 +42,6 @@
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;

import java.util.*;
import java.util.function.Function;

/**
* XML rule handler that loads rules from XML and throws
* exceptions on errors and warnings.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

/**
* Tools for loading an SRX tokenizer file.
Expand Down Expand Up @@ -59,7 +60,8 @@ static SrxDocument createSrxDocument(String path) {

static List<String> tokenize(String text, SrxDocument srxDocument, String code) {
List<String> segments = new ArrayList<>();
TextIterator textIterator = new SrxTextIterator(srxDocument, code, text);
Map<String, Object> parserParameters = Map.of(SrxTextIterator.DEFAULT_PATTERN_FLAGS_PARAMETER, Pattern.UNICODE_CHARACTER_CLASS);
TextIterator textIterator = new SrxTextIterator(srxDocument, code, text, parserParameters);
while (textIterator.hasNext()) {
segments.add(textIterator.next());
}
Expand Down
Loading

0 comments on commit f755fb3

Please sign in to comment.