From ed3ec776101ba8c9849fdf26abb038c2f7682cc6 Mon Sep 17 00:00:00 2001 From: Baron Roberts Date: Sun, 14 Apr 2024 15:51:31 -0700 Subject: [PATCH] Build release candidate. --- CHANGELOG.md | 11 +- build.gradle.kts | 5 +- gradle/libs.versions.toml | 1 + .../java/org/cthing/xmlwriter/XmlWriter.java | 116 +++++++----------- .../org/cthing/xmlwriter/XmlWriterTest.java | 66 +++------- 5 files changed, 79 insertions(+), 120 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf57452..8b3e767 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,17 +11,26 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - XML characters outside the [Unicode Basic Multilingual Plane](https://en.wikipedia.org/wiki/Plane_(Unicode)) (i.e. 0x10000-0x10FFFF) are now supported and escaped +- The `setEscapeNonAscii` and `setUseDecimal` methods have been added to control escaping behavior - [Dependency analysis Gradle plugin](https://github.com/autonomousapps/dependency-analysis-gradle-plugin) - The `check` task now depends on the `buildHealth` task and will fail the build on health violations such as unused dependencies +- New dependency on the [escapers](https://central.sonatype.com/artifact/org.cthing/escapers) library ### Changed -- Numeric character entities are now written in hexidecimal (e.g. `©`) rather than decimal +- The escape behavior has changed. By default, characters outside the ASCII range are no longer escaped. To + escape these characters, call `setEscapeNonAscii(true)`. +- By default, numeric character entities are now written in hexidecimal (e.g. `©`) rather than decimal. + To write numeric entities in decimal, call `setUseDecimal(true)`. - Invalid XML characters are no longer written. In previous versions, they were written in decimal with the prefix "ctrl-". - Changed JSR-305 dependency from `implementation` to `api` +### Removed + +- The `setEscaping` method has been removed. Use the `setEscapeNonAscii` and `setUseDecimal` methods. + ## [2.0.1] - 2023-12-23 ### Added diff --git a/build.gradle.kts b/build.gradle.kts index ac7b5c1..2c76c6f 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -18,8 +18,8 @@ plugins { alias(libs.plugins.versions) } -val baseVersion = "2.1.0" -val isSnapshot = true +val baseVersion = "3.0.0" +val isSnapshot = false val isCIServer = System.getenv("CTHING_CI") != null val buildNumber = if (isCIServer) System.currentTimeMillis().toString() else "0" @@ -37,6 +37,7 @@ dependencies { api(libs.jsr305) implementation(libs.cthingAnnots) + implementation(libs.escapers) testImplementation(libs.junitApi) testImplementation(libs.junitParams) diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index 10c6ba6..6fd7d43 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -14,6 +14,7 @@ versions = { id = "com.github.ben-manes.versions", version = "0.51.0" } apiGuardian = "org.apiguardian:apiguardian-api:1.1.2" assertJ = "org.assertj:assertj-core:3.25.3" cthingAnnots = "org.cthing:cthing-annotations:1.0.0" +escapers = "org.cthing:escapers:1.0.0" jsr305 = "com.google.code.findbugs:jsr305:3.0.2" junitApi = { module = "org.junit.jupiter:junit-jupiter-api", version.ref = "junit" } junitEngine = { module = "org.junit.jupiter:junit-jupiter-engine", version.ref = "junit" } diff --git a/src/main/java/org/cthing/xmlwriter/XmlWriter.java b/src/main/java/org/cthing/xmlwriter/XmlWriter.java index 37c5971..6f66033 100755 --- a/src/main/java/org/cthing/xmlwriter/XmlWriter.java +++ b/src/main/java/org/cthing/xmlwriter/XmlWriter.java @@ -24,6 +24,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Deque; +import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -35,6 +36,7 @@ import javax.xml.XMLConstants; import org.cthing.annotations.AccessForTesting; +import org.cthing.escapers.XmlEscaper; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; @@ -361,8 +363,8 @@ private enum Event { /** Should output be formatted. */ private boolean prettyPrint; - /** Should characters be escaped. */ - private boolean escaping; + /** Options controlling the escaping behavior. */ + private final Set escapeOptions; /** Indent string. */ private String indentStr; @@ -563,7 +565,7 @@ public XmlWriter(@Nullable @WillNotClose final XMLReader reader, @Nullable @Will this.nsDeclMap = new HashMap<>(); this.nsRootDeclSet = new HashSet<>(); this.prettyPrint = false; - this.escaping = true; + this.escapeOptions = EnumSet.noneOf(XmlEscaper.Option.class); this.minimize = true; this.indentStr = DEF_INDENT; this.offsetStr = DEF_OFFSET; @@ -720,26 +722,50 @@ public boolean getPrettyPrint() { } /** - * Enables or disables XML escaping of attribute values and character data. In addition, enables or disables - * escaping of embedded quotes in attribute values. By default, escaping is enabled. + * Escape characters above the ASCII range (i.e. ch > 0x7F). By default, only ASCII control characters + * and markup-significant ASCII characters are escaped. Specifying this option causes all ISO Latin-1, + * Unicode BMP and surrogate pair characters to be escaped. * - *

WARNING: Under normal operating conditions, escaping should always be enabled. If the - * character data to be written is known to not require escaping, disabling escaping will improve performance. - * Use this feature at your own risk. + * @param enable {@code true} to escape characters outside the ASCII range using numerical entity references + */ + public void setEscapeNonAscii(final boolean enable) { + if (enable) { + this.escapeOptions.add(XmlEscaper.Option.ESCAPE_NON_ASCII); + } else { + this.escapeOptions.remove(XmlEscaper.Option.ESCAPE_NON_ASCII); + } + } + + /** + * Indicates whether characters above the ASCII range (i.e. ch > 0x7F) are escaped. * - * @param enable {@code true} to enable escaping (the default). + * @return {@code true} if characters outside the ASCII range are being escaped. */ - public void setEscaping(final boolean enable) { - this.escaping = enable; + public boolean getEscapeNonAscii() { + return this.escapeOptions.contains(XmlEscaper.Option.ESCAPE_NON_ASCII); } /** - * Indicates whether XML escaping is enabled. + * Use decimal for numerical character entities (i.e. &#DDDD;). By default, this library uses hexadecimal + * (i.e. &#xHHH;) for numerical character entities. * - * @return Whether XML escaping is enabled or disabled. + * @param enable {@code true} to use decimal rather than hexadecimal for numerical character entities */ - public boolean getEscaping() { - return this.escaping; + public void setUseDecimal(final boolean enable) { + if (enable) { + this.escapeOptions.add(XmlEscaper.Option.USE_DECIMAL); + } else { + this.escapeOptions.remove(XmlEscaper.Option.USE_DECIMAL); + } + } + + /** + * Indicates whether decimal is being used for numerical character entities rather than hexadecimal. + * + * @return {@code true} if decimal is being used for numerical character entities. + */ + public boolean getUseDecimal() { + return this.escapeOptions.contains(XmlEscaper.Option.USE_DECIMAL); } /** @@ -2586,21 +2612,7 @@ void writeQuoted(final String s) throws SAXException { @AccessForTesting void writeQuoted(final char[] carr, final int start, final int length) throws SAXException { writeRaw('"'); - if (this.escaping && containsQuotes(carr, start, length)) { - final int end = start + length; - for (int i = start; i < end; i++) { - final char c = carr[i]; - if (c == '"') { - writeRaw("""); - } else if (c == '\'') { - writeRaw("'"); - } else { - writeEscaped(c); - } - } - } else { - writeEscaped(carr, start, length); - } + writeEscaped(carr, start, length); writeRaw('"'); } @@ -2616,46 +2628,10 @@ void writeQuoted(final char[] carr, final int start, final int length) throws SA */ @AccessForTesting void writeEscaped(final char[] carr, final int start, final int length) throws SAXException { - if (this.escaping) { - final int end = start + length; - int i = start; - while (i < end) { - final int codePoint = Character.codePointAt(carr, i); - writeEscaped(codePoint); - i += Character.charCount(codePoint); - } - } else { - writeRaw(carr, start, length); - } - } - - /** - * Writes the specified character to the output escaping the '&', '<', and '>' characters using the - * standard XML escape sequences. Control characters and characters outside the ASCII range are escaped using a - * numeric character reference. Invalid XML characters are not written. - * - * @param c Character to write - * @throws SAXException If there is an error writing the character. The SAXException wraps an IOException. - */ - @AccessForTesting - void writeEscaped(final int c) throws SAXException { - switch (c) { - case '&' -> writeRaw("&"); - case '<' -> writeRaw("<"); - case '>' -> writeRaw(">"); - case '\n' -> writeNewline(); - case '\t', '\r' -> writeRaw((char)c); - default -> { - if (c > 0x001F && c < 0x007F) { - writeRaw((char)c); - } else if ((c >= 0x007F && c <= 0xD7FF) - || (c >= 0xE000 && c <= 0xFFFD) - || (c >= 0x10000 && c <= 0x10FFFF)) { - writeRaw("&#x"); - writeRaw(Integer.toHexString(c).toUpperCase()); - writeRaw(';'); - } - } + try { + XmlEscaper.escape(carr, start, length, this.out, this.escapeOptions); + } catch (final IOException ex) { + throw new SAXException(ex); } } diff --git a/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java b/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java index 6e6c384..706244c 100755 --- a/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java +++ b/src/test/java/org/cthing/xmlwriter/XmlWriterTest.java @@ -41,7 +41,6 @@ import org.xml.sax.helpers.AttributesImpl; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.jupiter.params.provider.Arguments.arguments; @SuppressWarnings({ "HttpUrlsUsage", "UnnecessaryUnicodeEscape" }) @@ -91,60 +90,43 @@ void testWriteRawChar() throws Exception { @DisplayName("Write an array with escaping") void testWriteEscapedArray() throws Exception { final String testStringIn = "\" World\u00A9\u001A\uFFFE\uD83D\uDE03\t\n"; - final String testStringOut = "<Hello &<>\" World©😃\t\n"; + final String testStringOut = "<Hello &<>" World\u00A9\uD83D\uDE03\t\n"; this.xmlWriter.writeEscaped(testStringIn.toCharArray(), 0, testStringIn.length()); assertThat(this.stringWriter).hasToString(testStringOut); } - public static Stream writeEscapedProvider() { - return Stream.of( - arguments(' ', " "), - arguments('a', "a"), - arguments('Z', "Z"), - arguments('~', "~"), - arguments('&', "&"), - arguments('<', "<"), - arguments('>', ">"), - arguments('\n', "\n"), - arguments('\t', "\t"), - arguments('\r', "\r"), - arguments(0x0, ""), - arguments(0x1F, ""), - arguments(0xFFFF, ""), - arguments(0x7F, ""), - arguments(0xD7FF, "퟿"), - arguments(0xE000, ""), - arguments(0xFFFD, "�"), - arguments(0x1F603, "😃") - ); - } + @Test + @DisplayName("Write an array with escaping") + void testWriteEscapedArrayNonAscii() throws Exception { + final String testStringIn = "\" World\u00A9\u001A\uFFFE\uD83D\uDE03\t\n"; + final String testStringOut = "<Hello &<>" World©😃\t\n"; - @ParameterizedTest - @MethodSource("writeEscapedProvider") - @DisplayName("Write a character with escaping") - void testWriteEscapedChar(final int ch, final String expected) throws Exception { - this.xmlWriter.writeEscaped(ch); - assertThat(this.stringWriter).hasToString(expected); + this.xmlWriter.setEscapeNonAscii(true); + this.xmlWriter.writeEscaped(testStringIn.toCharArray(), 0, testStringIn.length()); + + assertThat(this.stringWriter).hasToString(testStringOut); } @Test - @DisplayName("Write a string with escaping disabled") - void testWriteEscapedDisabled() throws Exception { - final String testStringIn = "\" World\u00A9\u001A\t\n"; - this.xmlWriter.setEscaping(false); + @DisplayName("Write an array with escaping") + void testWriteEscapedArrayNonAsciiDecimal() throws Exception { + final String testStringIn = "\" World\u00A9\u001A\uFFFE\uD83D\uDE03\t\n"; + final String testStringOut = "<Hello &<>" World©😃\t\n"; + this.xmlWriter.setEscapeNonAscii(true); + this.xmlWriter.setUseDecimal(true); this.xmlWriter.writeEscaped(testStringIn.toCharArray(), 0, testStringIn.length()); - assertThat(this.stringWriter).hasToString(testStringIn); + assertThat(this.stringWriter).hasToString(testStringOut); } @Test @DisplayName("Write a string adding quotes") void testWriteQuotedString() throws Exception { final String testStringIn = "Hello &<>\"' World\u00A9"; - final String testStringOut = "\"Hello &<>"' World©\""; + final String testStringOut = "\"Hello &<>"' World\u00A9\""; this.xmlWriter.writeQuoted(testStringIn); @@ -157,22 +139,12 @@ void testWriteQuotedArray() throws Exception { final String testStringIn = "Hello &<>\"' World\u00A9"; final String testStringOut = "\"Hello &<>"' World©\""; + this.xmlWriter.setEscapeNonAscii(true); this.xmlWriter.writeQuoted(testStringIn.toCharArray(), 0, testStringIn.length()); assertThat(this.stringWriter).hasToString(testStringOut); } - @Test - @DisplayName("Write string adding quotes with escaping disabled") - void testWriteQuotedNoEscaping() throws Exception { - final String testStringIn = "Hello &<>\"' World\u00A9"; - this.xmlWriter.setEscaping(false); - - this.xmlWriter.writeQuoted(testStringIn); - - assertThat(this.stringWriter).hasToString("\"" + testStringIn + "\""); - } - @ParameterizedTest @MethodSource("minimalDocumentProvider") @DisplayName("Write a minimal XML document")