diff --git a/CHANGELOG.md b/CHANGELOG.md
index 4b44440..3291f93 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,11 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
+- XML characters outside the [Unicode Basic Multilingual Plane](https://en.wikipedia.org/wiki/Plane_(Unicode))
+ (i.e. 0x10000-0x10FFFF) are now supported and escaped
- [Dependency analysis Gradle plugin](https://github.com/autonomousapps/dependency-analysis-gradle-plugin)
-- The `check` task now depends on the `buildHealth` task and will fail the build on health violations
+- The `check` task now depends on the `buildHealth` task and will fail the build on health violations such as
+ unused dependencies
### Changed
+- Numeric character entities are now written in hexidecimal (e.g. `©`) rather than decimal
+- Invalid XML characters are now written in hexidecimal (e.g. `ctrl-0xFFFE`) rather than decimal
- Changed JSR-305 dependency from `implementation` to `api`
## [2.0.1] - 2023-12-23
diff --git a/build.gradle.kts b/build.gradle.kts
index ccf67aa..ac7b5c1 100644
--- a/build.gradle.kts
+++ b/build.gradle.kts
@@ -18,7 +18,7 @@ plugins {
alias(libs.plugins.versions)
}
-val baseVersion = "2.0.2"
+val baseVersion = "2.1.0"
val isSnapshot = true
val isCIServer = System.getenv("CTHING_CI") != null
diff --git a/dev/checkstyle/checkstyle.xml b/dev/checkstyle/checkstyle.xml
index c3dad2c..6db0a94 100644
--- a/dev/checkstyle/checkstyle.xml
+++ b/dev/checkstyle/checkstyle.xml
@@ -259,13 +259,6 @@
-
-
-
-
-
-
-
diff --git a/src/main/java/org/cthing/xmlwriter/XmlWriter.java b/src/main/java/org/cthing/xmlwriter/XmlWriter.java
index e2392fa..1073e11 100755
--- a/src/main/java/org/cthing/xmlwriter/XmlWriter.java
+++ b/src/main/java/org/cthing/xmlwriter/XmlWriter.java
@@ -353,7 +353,6 @@ private enum Event {
private static final String DEF_INDENT = " ";
private static final String DEF_OFFSET = "";
private static final String SYNTH_NS_PREFIX = "__NS";
- private static final String NEWLINE = System.lineSeparator();
private static final AttributesImpl EMPTY_ATTRS = new AttributesImpl();
/** All output is written to this writer. */
@@ -2514,7 +2513,7 @@ private int writeNSDecls() throws SAXException {
*/
private void writeNewline() throws SAXException {
try {
- this.out.write(NEWLINE);
+ this.out.write(System.lineSeparator());
} catch (final IOException ex) {
throw new SAXException(ex);
}
@@ -2613,7 +2612,8 @@ void writeQuoted(final char[] carr, final int start, final int length) throws SA
* @param length Number of characters to test
* @return {@code true} if the specified character array requires escaping.
*/
- private static boolean needsEscaping(final char[] carr, final int start, final int length) {
+ @AccessForTesting
+ static boolean needsEscaping(final char[] carr, final int start, final int length) {
int end = start + length;
while (--end >= start) {
final char c = carr[end];
@@ -2643,8 +2643,11 @@ private static boolean needsEscaping(final char[] carr, final int start, final i
void writeEscaped(final char[] carr, final int start, final int length) throws SAXException {
if (this.escaping && needsEscaping(carr, start, length)) {
final int end = start + length;
- for (int i = start; i < end; i++) {
- writeEscaped(carr[i]);
+ int i = start;
+ while (i < end) {
+ final int codePoint = Character.codePointAt(carr, i);
+ writeEscaped(codePoint);
+ i += Character.charCount(codePoint);
}
} else {
writeRaw(carr, start, length);
@@ -2654,29 +2657,32 @@ void writeEscaped(final char[] carr, final int start, final int length) throws S
/**
* Writes the specified character to the output escaping the '&', '<', and '>' characters using the
* standard XML escape sequences. Control characters and characters outside the ASCII range are escaped using a
- * numeric character reference. Invalid XML control characters are written as "ctrl-nnn".
+ * numeric character reference. Invalid XML control characters are written as {code ctrl-0xN} where {@code N}
+ * is the hexidecimal value of the invalid character.
*
* @param c Character to write
* @throws SAXException If there is an error writing the character. The SAXException wraps an IOException.
*/
@AccessForTesting
- void writeEscaped(final char c) throws SAXException {
+ void writeEscaped(final int c) throws SAXException {
switch (c) {
case '&' -> writeRaw("&");
case '<' -> writeRaw("<");
case '>' -> writeRaw(">");
case '\n' -> writeNewline();
- case '\t', '\r' -> writeRaw(c);
+ case '\t', '\r' -> writeRaw((char)c);
default -> {
if (c > '\u001F' && c < '\u007F') {
- writeRaw(c);
- } else if ((c >= '\u007F' && c <= '\uD7FF') || (c >= '\uE000' && c <= '\uFFFD')) {
- writeRaw("");
- writeRaw(Integer.toString(c));
+ writeRaw((char)c);
+ } else if ((c >= '\u007F' && c <= '\uD7FF')
+ || (c >= '\uE000' && c <= '\uFFFD')
+ || (c >= 0x10000 && c <= 0x10FFFF)) {
+ writeRaw("");
+ writeRaw(Integer.toHexString(c).toUpperCase());
writeRaw(';');
} else {
- writeRaw("ctrl-");
- writeRaw(Integer.toString(c));
+ writeRaw("ctrl-0x");
+ writeRaw(Integer.toHexString(c).toUpperCase());
}
}
}
diff --git a/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java b/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java
index 5454248..1082a6f 100755
--- a/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java
+++ b/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java
@@ -41,14 +41,12 @@
import org.xml.sax.helpers.AttributesImpl;
import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.jupiter.params.provider.Arguments.arguments;
@SuppressWarnings({ "HttpUrlsUsage", "UnnecessaryUnicodeEscape" })
class XmlWriterTest {
- /** Specifies the newline character sequence to use. */
- private static final String NEWLINE = System.getProperty("line.separator");
-
private StringWriter stringWriter;
private XmlWriter xmlWriter;
@@ -89,23 +87,69 @@ void testWriteRawChar() throws Exception {
assertThat(this.stringWriter).hasToString(testString);
}
+ public static Stream needsEscapingProvider() {
+ return Stream.of(
+ arguments("", false),
+ arguments("abc", false),
+ arguments("", true),
+ arguments("a&bc", true),
+ arguments("a\nbc", true),
+ arguments("a\tbc", false),
+ arguments("a\rbc", false),
+ arguments("a\u008Abc", true),
+ arguments("a\uE08Abc", true),
+ arguments("a\uD83D\uDE03bc", true)
+ );
+ }
+
+ @ParameterizedTest
+ @MethodSource("needsEscapingProvider")
+ @DisplayName("Determine whether an array requires escaping")
+ void testNeedsEscaping(final String str, final boolean needsEscaping) {
+ assertThat(XmlWriter.needsEscaping(str.toCharArray(), 0, str.length())).isEqualTo(needsEscaping);
+ }
+
@Test
@DisplayName("Write an array with escaping")
void testWriteEscapedArray() throws Exception {
- final String testStringIn = "\" World\u00A9\u001A\t\n";
- final String testStringOut = "<Hello &<>\" World©ctrl-26\t\n";
+ final String testStringIn = "\" World\u00A9\u001A\uFFFE\uD83D\uDE03\t\n";
+ final String testStringOut = "<Hello &<>\" World©ctrl-0x1Actrl-0xFFFE😃\t\n";
this.xmlWriter.writeEscaped(testStringIn.toCharArray(), 0, testStringIn.length());
assertThat(this.stringWriter).hasToString(testStringOut);
}
- @Test
- @DisplayName("Write a character with escaping")
- void testWriteEscapedChar() throws Exception {
- this.xmlWriter.writeEscaped('\u00A9');
+ public static Stream writeEscapedProvider() {
+ return Stream.of(
+ arguments(' ', " "),
+ arguments('a', "a"),
+ arguments('Z', "Z"),
+ arguments('~', "~"),
+ arguments('&', "&"),
+ arguments('<', "<"),
+ arguments('>', ">"),
+ arguments('\n', "\n"),
+ arguments('\t', "\t"),
+ arguments('\r', "\r"),
+ arguments(0x0, "ctrl-0x0"),
+ arguments(0x1F, "ctrl-0x1F"),
+ arguments(0xFFFF, "ctrl-0xFFFF"),
+ arguments(0x7F, ""),
+ arguments(0xD7FF, ""),
+ arguments(0xE000, ""),
+ arguments(0xFFFD, "�"),
+ arguments(0x1F603, "😃")
+ );
+ }
- assertThat(this.stringWriter).hasToString("©");
+ @ParameterizedTest
+ @MethodSource("writeEscapedProvider")
+ @DisplayName("Write a character with escaping")
+ void testWriteEscapedChar(final int ch, final String expected) throws Exception {
+ this.xmlWriter.writeEscaped(ch);
+ assertThat(this.stringWriter).hasToString(expected);
}
@Test
@@ -123,7 +167,7 @@ void testWriteEscapedDisabled() throws Exception {
@DisplayName("Write a string adding quotes")
void testWriteQuotedString() throws Exception {
final String testStringIn = "Hello &<>\"' World\u00A9";
- final String testStringOut = "\"Hello &<>"' World©\"";
+ final String testStringOut = "\"Hello &<>"' World©\"";
this.xmlWriter.writeQuoted(testStringIn);
@@ -134,7 +178,7 @@ void testWriteQuotedString() throws Exception {
@DisplayName("Write an array adding quotes")
void testWriteQuotedArray() throws Exception {
final String testStringIn = "Hello &<>\"' World\u00A9";
- final String testStringOut = "\"Hello &<>"' World©\"";
+ final String testStringOut = "\"Hello &<>"' World©\"";
this.xmlWriter.writeQuoted(testStringIn.toCharArray(), 0, testStringIn.length());
@@ -193,7 +237,7 @@ private static Stream minimalDocumentProvider() {
"""),
- Arguments.of("UTF-8", false, true, NEWLINE)
+ Arguments.of("UTF-8", false, true, System.lineSeparator())
);
}