Skip to content

Commit

Permalink
Support valid XML characters outside the Unicode BMP. Write numeric c…
Browse files Browse the repository at this point in the history
…haracter entities in hex. Bump version number to 2.1.0.
  • Loading branch information
baron1405 committed Mar 23, 2024
1 parent 46357f1 commit d33dd8c
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 36 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- XML characters outside the [Unicode Basic Multilingual Plane](https://en.wikipedia.org/wiki/Plane_(Unicode))
(i.e. 0x10000-0x10FFFF) are now supported and escaped
- [Dependency analysis Gradle plugin](https://github.com/autonomousapps/dependency-analysis-gradle-plugin)
- The `check` task now depends on the `buildHealth` task and will fail the build on health violations
- The `check` task now depends on the `buildHealth` task and will fail the build on health violations such as
unused dependencies

### Changed

- Numeric character entities are now written in hexidecimal (e.g. `©`) rather than decimal
- Invalid XML characters are now written in hexidecimal (e.g. `ctrl-0xFFFE`) rather than decimal
- Changed JSR-305 dependency from `implementation` to `api`

## [2.0.1] - 2023-12-23
Expand Down
2 changes: 1 addition & 1 deletion build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ plugins {
alias(libs.plugins.versions)
}

val baseVersion = "2.0.2"
val baseVersion = "2.1.0"
val isSnapshot = true

val isCIServer = System.getenv("CTHING_CI") != null
Expand Down
7 changes: 0 additions & 7 deletions dev/checkstyle/checkstyle.xml
Original file line number Diff line number Diff line change
Expand Up @@ -259,13 +259,6 @@
<!-- Local variables must be camel case starting with a lowercase letter -->
<module name="LocalVariableName"/>

<!-- Numbers other than -1, 0, 1, and 2 must be defined as a constant except in hashCode methods and annotations -->
<module name="MagicNumber">
<property name="ignoreHashCodeMethod" value="true"/>
<property name="ignoreAnnotation" value="true"/>
<message key="magic.number" value="Replace magic number ''{0}'' with a semantically meaningful constant"/>
</module>

<!-- Non-static fields are camel case starting with a lowercase letter -->
<module name="MemberName"/>

Expand Down
34 changes: 20 additions & 14 deletions src/main/java/org/cthing/xmlwriter/XmlWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,6 @@ private enum Event {
private static final String DEF_INDENT = " ";
private static final String DEF_OFFSET = "";
private static final String SYNTH_NS_PREFIX = "__NS";
private static final String NEWLINE = System.lineSeparator();
private static final AttributesImpl EMPTY_ATTRS = new AttributesImpl();

/** All output is written to this writer. */
Expand Down Expand Up @@ -2514,7 +2513,7 @@ private int writeNSDecls() throws SAXException {
*/
private void writeNewline() throws SAXException {
try {
this.out.write(NEWLINE);
this.out.write(System.lineSeparator());
} catch (final IOException ex) {
throw new SAXException(ex);
}
Expand Down Expand Up @@ -2613,7 +2612,8 @@ void writeQuoted(final char[] carr, final int start, final int length) throws SA
* @param length Number of characters to test
* @return {@code true} if the specified character array requires escaping.
*/
private static boolean needsEscaping(final char[] carr, final int start, final int length) {
@AccessForTesting
static boolean needsEscaping(final char[] carr, final int start, final int length) {
int end = start + length;
while (--end >= start) {
final char c = carr[end];
Expand Down Expand Up @@ -2643,8 +2643,11 @@ private static boolean needsEscaping(final char[] carr, final int start, final i
void writeEscaped(final char[] carr, final int start, final int length) throws SAXException {
if (this.escaping && needsEscaping(carr, start, length)) {
final int end = start + length;
for (int i = start; i < end; i++) {
writeEscaped(carr[i]);
int i = start;
while (i < end) {
final int codePoint = Character.codePointAt(carr, i);
writeEscaped(codePoint);
i += Character.charCount(codePoint);
}
} else {
writeRaw(carr, start, length);
Expand All @@ -2654,29 +2657,32 @@ void writeEscaped(final char[] carr, final int start, final int length) throws S
/**
* Writes the specified character to the output escaping the '&amp;', '&lt;', and '&gt;' characters using the
* standard XML escape sequences. Control characters and characters outside the ASCII range are escaped using a
* numeric character reference. Invalid XML control characters are written as "ctrl-nnn".
* numeric character reference. Invalid XML control characters are written as {code ctrl-0xN} where {@code N}
* is the hexidecimal value of the invalid character.
*
* @param c Character to write
* @throws SAXException If there is an error writing the character. The SAXException wraps an IOException.
*/
@AccessForTesting
void writeEscaped(final char c) throws SAXException {
void writeEscaped(final int c) throws SAXException {
switch (c) {
case '&' -> writeRaw("&amp;");
case '<' -> writeRaw("&lt;");
case '>' -> writeRaw("&gt;");
case '\n' -> writeNewline();
case '\t', '\r' -> writeRaw(c);
case '\t', '\r' -> writeRaw((char)c);
default -> {
if (c > '\u001F' && c < '\u007F') {
writeRaw(c);
} else if ((c >= '\u007F' && c <= '\uD7FF') || (c >= '\uE000' && c <= '\uFFFD')) {
writeRaw("&#");
writeRaw(Integer.toString(c));
writeRaw((char)c);
} else if ((c >= '\u007F' && c <= '\uD7FF')
|| (c >= '\uE000' && c <= '\uFFFD')
|| (c >= 0x10000 && c <= 0x10FFFF)) {
writeRaw("&#x");
writeRaw(Integer.toHexString(c).toUpperCase());
writeRaw(';');
} else {
writeRaw("ctrl-");
writeRaw(Integer.toString(c));
writeRaw("ctrl-0x");
writeRaw(Integer.toHexString(c).toUpperCase());
}
}
}
Expand Down
70 changes: 57 additions & 13 deletions src/test/java/org/cthing/xmlwriter/XmlWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,12 @@
import org.xml.sax.helpers.AttributesImpl;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.params.provider.Arguments.arguments;


@SuppressWarnings({ "HttpUrlsUsage", "UnnecessaryUnicodeEscape" })
class XmlWriterTest {

/** Specifies the newline character sequence to use. */
private static final String NEWLINE = System.getProperty("line.separator");

private StringWriter stringWriter;
private XmlWriter xmlWriter;

Expand Down Expand Up @@ -89,23 +87,69 @@ void testWriteRawChar() throws Exception {
assertThat(this.stringWriter).hasToString(testString);
}

public static Stream<Arguments> needsEscapingProvider() {
return Stream.of(
arguments("", false),
arguments("abc", false),
arguments("<abc", true),
arguments("abc>", true),
arguments("a&bc", true),
arguments("a\nbc", true),
arguments("a\tbc", false),
arguments("a\rbc", false),
arguments("a\u008Abc", true),
arguments("a\uE08Abc", true),
arguments("a\uD83D\uDE03bc", true)
);
}

@ParameterizedTest
@MethodSource("needsEscapingProvider")
@DisplayName("Determine whether an array requires escaping")
void testNeedsEscaping(final String str, final boolean needsEscaping) {
assertThat(XmlWriter.needsEscaping(str.toCharArray(), 0, str.length())).isEqualTo(needsEscaping);
}

@Test
@DisplayName("Write an array with escaping")
void testWriteEscapedArray() throws Exception {
final String testStringIn = "<Hello &<>\" World\u00A9\u001A\t\n";
final String testStringOut = "&lt;Hello &amp;&lt;&gt;\" World&#169;ctrl-26\t\n";
final String testStringIn = "<Hello &<>\" World\u00A9\u001A\uFFFE\uD83D\uDE03\t\n";
final String testStringOut = "&lt;Hello &amp;&lt;&gt;\" World&#xA9;ctrl-0x1Actrl-0xFFFE&#x1F603;\t\n";

this.xmlWriter.writeEscaped(testStringIn.toCharArray(), 0, testStringIn.length());

assertThat(this.stringWriter).hasToString(testStringOut);
}

@Test
@DisplayName("Write a character with escaping")
void testWriteEscapedChar() throws Exception {
this.xmlWriter.writeEscaped('\u00A9');
public static Stream<Arguments> writeEscapedProvider() {
return Stream.of(
arguments(' ', " "),
arguments('a', "a"),
arguments('Z', "Z"),
arguments('~', "~"),
arguments('&', "&amp;"),
arguments('<', "&lt;"),
arguments('>', "&gt;"),
arguments('\n', "\n"),
arguments('\t', "\t"),
arguments('\r', "\r"),
arguments(0x0, "ctrl-0x0"),
arguments(0x1F, "ctrl-0x1F"),
arguments(0xFFFF, "ctrl-0xFFFF"),
arguments(0x7F, "&#x7F;"),
arguments(0xD7FF, "&#xD7FF;"),
arguments(0xE000, "&#xE000;"),
arguments(0xFFFD, "&#xFFFD;"),
arguments(0x1F603, "&#x1F603;")
);
}

assertThat(this.stringWriter).hasToString("&#169;");
@ParameterizedTest
@MethodSource("writeEscapedProvider")
@DisplayName("Write a character with escaping")
void testWriteEscapedChar(final int ch, final String expected) throws Exception {
this.xmlWriter.writeEscaped(ch);
assertThat(this.stringWriter).hasToString(expected);
}

@Test
Expand All @@ -123,7 +167,7 @@ void testWriteEscapedDisabled() throws Exception {
@DisplayName("Write a string adding quotes")
void testWriteQuotedString() throws Exception {
final String testStringIn = "Hello &<>\"' World\u00A9";
final String testStringOut = "\"Hello &amp;&lt;&gt;&quot;&apos; World&#169;\"";
final String testStringOut = "\"Hello &amp;&lt;&gt;&quot;&apos; World&#xA9;\"";

this.xmlWriter.writeQuoted(testStringIn);

Expand All @@ -134,7 +178,7 @@ void testWriteQuotedString() throws Exception {
@DisplayName("Write an array adding quotes")
void testWriteQuotedArray() throws Exception {
final String testStringIn = "Hello &<>\"' World\u00A9";
final String testStringOut = "\"Hello &amp;&lt;&gt;&quot;&apos; World&#169;\"";
final String testStringOut = "\"Hello &amp;&lt;&gt;&quot;&apos; World&#xA9;\"";

this.xmlWriter.writeQuoted(testStringIn.toCharArray(), 0, testStringIn.length());

Expand Down Expand Up @@ -193,7 +237,7 @@ private static Stream<Arguments> minimalDocumentProvider() {
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
"""),
Arguments.of("UTF-8", false, true, NEWLINE)
Arguments.of("UTF-8", false, true, System.lineSeparator())
);
}

Expand Down

0 comments on commit d33dd8c

Please sign in to comment.