diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b44440..3291f93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,11 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- XML characters outside the [Unicode Basic Multilingual Plane](https://en.wikipedia.org/wiki/Plane_(Unicode)) + (i.e. 0x10000-0x10FFFF) are now supported and escaped - [Dependency analysis Gradle plugin](https://github.com/autonomousapps/dependency-analysis-gradle-plugin) -- The `check` task now depends on the `buildHealth` task and will fail the build on health violations +- The `check` task now depends on the `buildHealth` task and will fail the build on health violations such as + unused dependencies ### Changed +- Numeric character entities are now written in hexidecimal (e.g. `©`) rather than decimal +- Invalid XML characters are now written in hexidecimal (e.g. `ctrl-0xFFFE`) rather than decimal - Changed JSR-305 dependency from `implementation` to `api` ## [2.0.1] - 2023-12-23 diff --git a/build.gradle.kts b/build.gradle.kts index ccf67aa..ac7b5c1 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -18,7 +18,7 @@ plugins { alias(libs.plugins.versions) } -val baseVersion = "2.0.2" +val baseVersion = "2.1.0" val isSnapshot = true val isCIServer = System.getenv("CTHING_CI") != null diff --git a/dev/checkstyle/checkstyle.xml b/dev/checkstyle/checkstyle.xml index c3dad2c..6db0a94 100644 --- a/dev/checkstyle/checkstyle.xml +++ b/dev/checkstyle/checkstyle.xml @@ -259,13 +259,6 @@ - - - - - - - diff --git a/src/main/java/org/cthing/xmlwriter/XmlWriter.java b/src/main/java/org/cthing/xmlwriter/XmlWriter.java index e2392fa..1073e11 100755 --- a/src/main/java/org/cthing/xmlwriter/XmlWriter.java +++ b/src/main/java/org/cthing/xmlwriter/XmlWriter.java @@ -353,7 +353,6 @@ private enum Event { private static final String DEF_INDENT = " "; private static final String DEF_OFFSET = ""; private static final String SYNTH_NS_PREFIX = "__NS"; - private static final String NEWLINE = System.lineSeparator(); private static final AttributesImpl EMPTY_ATTRS = new AttributesImpl(); /** All output is written to this writer. */ @@ -2514,7 +2513,7 @@ private int writeNSDecls() throws SAXException { */ private void writeNewline() throws SAXException { try { - this.out.write(NEWLINE); + this.out.write(System.lineSeparator()); } catch (final IOException ex) { throw new SAXException(ex); } @@ -2613,7 +2612,8 @@ void writeQuoted(final char[] carr, final int start, final int length) throws SA * @param length Number of characters to test * @return {@code true} if the specified character array requires escaping. */ - private static boolean needsEscaping(final char[] carr, final int start, final int length) { + @AccessForTesting + static boolean needsEscaping(final char[] carr, final int start, final int length) { int end = start + length; while (--end >= start) { final char c = carr[end]; @@ -2643,8 +2643,11 @@ private static boolean needsEscaping(final char[] carr, final int start, final i void writeEscaped(final char[] carr, final int start, final int length) throws SAXException { if (this.escaping && needsEscaping(carr, start, length)) { final int end = start + length; - for (int i = start; i < end; i++) { - writeEscaped(carr[i]); + int i = start; + while (i < end) { + final int codePoint = Character.codePointAt(carr, i); + writeEscaped(codePoint); + i += Character.charCount(codePoint); } } else { writeRaw(carr, start, length); @@ -2654,29 +2657,32 @@ void writeEscaped(final char[] carr, final int start, final int length) throws S /** * Writes the specified character to the output escaping the '&', '<', and '>' characters using the * standard XML escape sequences. Control characters and characters outside the ASCII range are escaped using a - * numeric character reference. Invalid XML control characters are written as "ctrl-nnn". + * numeric character reference. Invalid XML control characters are written as {code ctrl-0xN} where {@code N} + * is the hexidecimal value of the invalid character. * * @param c Character to write * @throws SAXException If there is an error writing the character. The SAXException wraps an IOException. */ @AccessForTesting - void writeEscaped(final char c) throws SAXException { + void writeEscaped(final int c) throws SAXException { switch (c) { case '&' -> writeRaw("&"); case '<' -> writeRaw("<"); case '>' -> writeRaw(">"); case '\n' -> writeNewline(); - case '\t', '\r' -> writeRaw(c); + case '\t', '\r' -> writeRaw((char)c); default -> { if (c > '\u001F' && c < '\u007F') { - writeRaw(c); - } else if ((c >= '\u007F' && c <= '\uD7FF') || (c >= '\uE000' && c <= '\uFFFD')) { - writeRaw("&#"); - writeRaw(Integer.toString(c)); + writeRaw((char)c); + } else if ((c >= '\u007F' && c <= '\uD7FF') + || (c >= '\uE000' && c <= '\uFFFD') + || (c >= 0x10000 && c <= 0x10FFFF)) { + writeRaw("&#x"); + writeRaw(Integer.toHexString(c).toUpperCase()); writeRaw(';'); } else { - writeRaw("ctrl-"); - writeRaw(Integer.toString(c)); + writeRaw("ctrl-0x"); + writeRaw(Integer.toHexString(c).toUpperCase()); } } } diff --git a/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java b/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java index 5454248..1082a6f 100755 --- a/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java +++ b/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java @@ -41,14 +41,12 @@ import org.xml.sax.helpers.AttributesImpl; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.params.provider.Arguments.arguments; @SuppressWarnings({ "HttpUrlsUsage", "UnnecessaryUnicodeEscape" }) class XmlWriterTest { - /** Specifies the newline character sequence to use. */ - private static final String NEWLINE = System.getProperty("line.separator"); - private StringWriter stringWriter; private XmlWriter xmlWriter; @@ -89,23 +87,69 @@ void testWriteRawChar() throws Exception { assertThat(this.stringWriter).hasToString(testString); } + public static Stream needsEscapingProvider() { + return Stream.of( + arguments("", false), + arguments("abc", false), + arguments("", true), + arguments("a&bc", true), + arguments("a\nbc", true), + arguments("a\tbc", false), + arguments("a\rbc", false), + arguments("a\u008Abc", true), + arguments("a\uE08Abc", true), + arguments("a\uD83D\uDE03bc", true) + ); + } + + @ParameterizedTest + @MethodSource("needsEscapingProvider") + @DisplayName("Determine whether an array requires escaping") + void testNeedsEscaping(final String str, final boolean needsEscaping) { + assertThat(XmlWriter.needsEscaping(str.toCharArray(), 0, str.length())).isEqualTo(needsEscaping); + } + @Test @DisplayName("Write an array with escaping") void testWriteEscapedArray() throws Exception { - final String testStringIn = "\" World\u00A9\u001A\t\n"; - final String testStringOut = "<Hello &<>\" World©ctrl-26\t\n"; + final String testStringIn = "\" World\u00A9\u001A\uFFFE\uD83D\uDE03\t\n"; + final String testStringOut = "<Hello &<>\" World©ctrl-0x1Actrl-0xFFFE😃\t\n"; this.xmlWriter.writeEscaped(testStringIn.toCharArray(), 0, testStringIn.length()); assertThat(this.stringWriter).hasToString(testStringOut); } - @Test - @DisplayName("Write a character with escaping") - void testWriteEscapedChar() throws Exception { - this.xmlWriter.writeEscaped('\u00A9'); + public static Stream writeEscapedProvider() { + return Stream.of( + arguments(' ', " "), + arguments('a', "a"), + arguments('Z', "Z"), + arguments('~', "~"), + arguments('&', "&"), + arguments('<', "<"), + arguments('>', ">"), + arguments('\n', "\n"), + arguments('\t', "\t"), + arguments('\r', "\r"), + arguments(0x0, "ctrl-0x0"), + arguments(0x1F, "ctrl-0x1F"), + arguments(0xFFFF, "ctrl-0xFFFF"), + arguments(0x7F, ""), + arguments(0xD7FF, "퟿"), + arguments(0xE000, ""), + arguments(0xFFFD, "�"), + arguments(0x1F603, "😃") + ); + } - assertThat(this.stringWriter).hasToString("©"); + @ParameterizedTest + @MethodSource("writeEscapedProvider") + @DisplayName("Write a character with escaping") + void testWriteEscapedChar(final int ch, final String expected) throws Exception { + this.xmlWriter.writeEscaped(ch); + assertThat(this.stringWriter).hasToString(expected); } @Test @@ -123,7 +167,7 @@ void testWriteEscapedDisabled() throws Exception { @DisplayName("Write a string adding quotes") void testWriteQuotedString() throws Exception { final String testStringIn = "Hello &<>\"' World\u00A9"; - final String testStringOut = "\"Hello &<>"' World©\""; + final String testStringOut = "\"Hello &<>"' World©\""; this.xmlWriter.writeQuoted(testStringIn); @@ -134,7 +178,7 @@ void testWriteQuotedString() throws Exception { @DisplayName("Write an array adding quotes") void testWriteQuotedArray() throws Exception { final String testStringIn = "Hello &<>\"' World\u00A9"; - final String testStringOut = "\"Hello &<>"' World©\""; + final String testStringOut = "\"Hello &<>"' World©\""; this.xmlWriter.writeQuoted(testStringIn.toCharArray(), 0, testStringIn.length()); @@ -193,7 +237,7 @@ private static Stream minimalDocumentProvider() { """), - Arguments.of("UTF-8", false, true, NEWLINE) + Arguments.of("UTF-8", false, true, System.lineSeparator()) ); }