From 46b7efc3fd590111213b73ffe43a6edd9068d87a Mon Sep 17 00:00:00 2001 From: Tatu Saloranta Date: Thu, 6 Jun 2024 17:47:50 -0700 Subject: [PATCH] Fix #91: handle split-buffary surrogate for comments as well --- release-notes/VERSION | 4 ++- .../fasterxml/aalto/out/ByteXmlWriter.java | 19 ++++++++---- .../fasterxml/aalto/sax/TestSaxWriter.java | 29 +++++++++++++++---- 3 files changed, 40 insertions(+), 12 deletions(-) diff --git a/release-notes/VERSION b/release-notes/VERSION index fefec29..d1a9893 100644 --- a/release-notes/VERSION +++ b/release-notes/VERSION @@ -6,10 +6,12 @@ Project: aalto-xml 1.3.3 (not yet released) -#86: Multi-byte characters are split in writeCData() if first byte sits +#86: Multi-byte characters are split in `writeCData()` if first byte sits right at the end of the buffer (reported, fix contributed by @tatsel) #90: Update stax2-api dep to 4.2.2 (from 4.2) +#91: Multi-byte characters are split in `writeComment()` if first byte sits + right at the end of the buffer 1.3.2 (25-Apr-2022) diff --git a/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java b/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java index 64517a4..b0491d7 100644 --- a/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java +++ b/src/main/java/com/fasterxml/aalto/out/ByteXmlWriter.java @@ -799,6 +799,7 @@ protected int writeCDataContents(char[] cbuf, int offset, int len) ++offset; --len; } + // Unlike with writeCharacters() and fastWriteName(), let's not // worry about split buffers here: this is unlikely to become // performance bottleneck. This allows keeping it simple; and @@ -1197,12 +1198,18 @@ public int writeComment(String data) throws IOException, XMLStreamException protected int writeCommentContents(char[] cbuf, int offset, int len) throws IOException, XMLStreamException { - /* Unlike with writeCharacters() and fastWriteName(), let's not - * worry about split buffers here: this is unlikely to become - * performance bottleneck. This allows keeping it simple; and - * should it matter, we could start doing fast version here - * as well. - */ + if (_surrogate != 0) { + outputSurrogates(_surrogate, cbuf[offset]); + // reset the temporary surrogate storage + _surrogate = 0; + ++offset; + --len; + } + + // Unlike with writeCharacters() and fastWriteName(), let's not + // worry about split buffers here: this is unlikely to become + // performance bottleneck. This allows keeping it simple; and + // should it matter, we could start doing fast version here as well. len += offset; // now marks the end main_loop: diff --git a/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java b/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java index 58c9b9f..31ccc10 100644 --- a/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java +++ b/src/test/java/com/fasterxml/aalto/sax/TestSaxWriter.java @@ -60,11 +60,7 @@ public void testSplitSurrogateWithAttributeValue2() throws Exception public void testSplitSurrogateWithCData() throws Exception { - // This test aims to produce the - // javax.xml.stream.XMLStreamException: Incomplete surrogate pair in content: first char 0xdfce, second 0x78 - // error message. The issue was similar to the one described in testSurrogateMemory1(), except it happened in - // ByteXmlWriter#writeCDataContents(), where check for existing _surrogate was missing prior to the fix, - // as opposed to ByteXmlWriter#writeCharacters(). + // Modification of "testSplitSurrogateWithAttributeValue()" but for CDATA StringBuilder testText = new StringBuilder(); for (int i = 0; i < 511; i++) { testText.append('x'); @@ -83,4 +79,27 @@ public void testSplitSurrogateWithCData() throws Exception writer.writeEndTag(writer.constructName("testelement")); writer.close(false); } + + + public void testSplitSurrogateWithComment() throws Exception + { + // Modification of "testSplitSurrogateWithAttributeValue()" but for Comment + StringBuilder testText = new StringBuilder(); + for (int i = 0; i < 511; i++) { + testText.append('x'); + } + testText.append("\uD835\uDFCE"); + for (int i = 0; i < 512; i++) { + testText.append('x'); + } + + WriterConfig writerConfig = new WriterConfig(); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + Utf8XmlWriter writer = new Utf8XmlWriter(writerConfig, byteArrayOutputStream); + writer.writeStartTagStart(writer.constructName("testelement")); + writer.writeComment(testText.toString()); + writer.writeStartTagEnd(); + writer.writeEndTag(writer.constructName("testelement")); + writer.close(false); + } }