Skip to content

Commit d66f823

Browse files
committed
Make trimming more consistent with regular reader api
1 parent 4820820 commit d66f823

File tree

3 files changed

+35
-33
lines changed

3 files changed

+35
-33
lines changed

src/de/mod.rs

Lines changed: 22 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2168,31 +2168,6 @@ struct XmlReader<'i, R: XmlRead<'i>, E: EntityResolver = PredefinedEntityResolve
21682168
entity_resolver: E,
21692169
}
21702170

2171-
fn trim_cow<'a, F>(value: Cow<'a, str>, trim: F) -> Cow<'a, str>
2172-
where
2173-
F: FnOnce(&str) -> &str,
2174-
{
2175-
match value {
2176-
Cow::Borrowed(bytes) => Cow::Borrowed(trim(bytes)),
2177-
Cow::Owned(mut bytes) => {
2178-
let trimmed = trim(&bytes);
2179-
if trimmed.len() != bytes.len() {
2180-
bytes = trimmed.to_string();
2181-
}
2182-
Cow::Owned(bytes)
2183-
}
2184-
}
2185-
}
2186-
2187-
/// Removes trailing XML whitespace bytes from text content.
2188-
///
2189-
/// Returns `true` if content is empty after that
2190-
fn inplace_trim_end(mut s: &mut Cow<str>) -> bool {
2191-
let c: Cow<str> = replace(&mut s, Cow::Borrowed(""));
2192-
*s = trim_cow(c, str::trim_end);
2193-
s.is_empty()
2194-
}
2195-
21962171
impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
21972172
fn new(mut reader: R, entity_resolver: E) -> Self {
21982173
// Lookahead by one event immediately, so we do not need to check in the
@@ -2369,6 +2344,16 @@ where
23692344
T::deserialize(&mut de)
23702345
}
23712346

2347+
/// Deserialize from a custom reader.
2348+
pub fn from_custom_reader<R, T>(reader: Reader<R>) -> Result<T, DeError>
2349+
where
2350+
R: BufRead,
2351+
T: DeserializeOwned,
2352+
{
2353+
let mut de = Deserializer::from_custom_reader(reader);
2354+
T::deserialize(&mut de)
2355+
}
2356+
23722357
// TODO: According to the https://www.w3.org/TR/xmlschema11-2/#boolean,
23732358
// valid boolean representations are only "true", "false", "1", and "0"
23742359
fn str2bool<'de, V>(value: &str, visitor: V) -> Result<V::Value, DeError>
@@ -2875,8 +2860,6 @@ where
28752860
pub fn from_str_with_resolver(source: &'de str, entity_resolver: E) -> Self {
28762861
let mut reader = Reader::from_str(source);
28772862
let config = reader.config_mut();
2878-
config.trim_text_start = true;
2879-
config.trim_text_end = true;
28802863
config.expand_empty_elements = true;
28812864

28822865
Self::new(
@@ -3129,7 +3112,7 @@ impl StartTrimmer {
31293112
/// Converts raw reader's event into a payload event.
31303113
/// Returns `None`, if event should be skipped.
31313114
#[inline(always)]
3132-
fn trim<'a>(&mut self, event: Event<'a>) -> Option<PayloadEvent<'a>> {
3115+
fn trim<'a>(&mut self, event: Event<'a>, trim_text_start: bool) -> Option<PayloadEvent<'a>> {
31333116
let (event, trim_next_event) = match event {
31343117
Event::DocType(e) => (PayloadEvent::DocType(e), true),
31353118
Event::Start(e) => (PayloadEvent::Start(e), true),
@@ -3140,7 +3123,10 @@ impl StartTrimmer {
31403123
Event::CData(e) => (PayloadEvent::CData(e), false),
31413124
Event::Text(mut e) => {
31423125
// If event is empty after trimming, skip it
3143-
if self.trim_start && e.inplace_trim_start() {
3126+
// Or if event is all white space, skip it regardless of trimming settings
3127+
if (trim_text_start && self.trim_start && e.inplace_trim_start())
3128+
|| e.is_all_whitespace()
3129+
{
31443130
return None;
31453131
}
31463132
(PayloadEvent::Text(e), false)
@@ -3233,8 +3219,9 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
32333219
loop {
32343220
self.buf.clear();
32353221

3222+
let trim_text_start = self.reader.config().trim_text_start;
32363223
let event = self.reader.read_event_into(&mut self.buf)?;
3237-
if let Some(event) = self.start_trimmer.trim(event) {
3224+
if let Some(event) = self.start_trimmer.trim(event, trim_text_start) {
32383225
return Ok(event.into_owned());
32393226
}
32403227
}
@@ -3303,7 +3290,10 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
33033290
fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
33043291
loop {
33053292
let event = self.reader.read_event()?;
3306-
if let Some(event) = self.start_trimmer.trim(event) {
3293+
if let Some(event) = self
3294+
.start_trimmer
3295+
.trim(event, self.config().trim_text_start)
3296+
{
33073297
return Ok(event);
33083298
}
33093299
}
@@ -4481,7 +4471,7 @@ mod tests {
44814471
fn start() {
44824472
let mut de = make_de(" text <tag1><tag2>");
44834473
// Text is trimmed from both sides
4484-
assert_eq!(de.next().unwrap(), DeEvent::Text("text".into()));
4474+
assert_eq!(de.next().unwrap(), DeEvent::Text(" text ".into()));
44854475
assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag1")));
44864476
assert_eq!(de.next().unwrap(), DeEvent::Start(BytesStart::new("tag2")));
44874477
assert_eq!(de.next().unwrap(), DeEvent::Eof);

src/events/mod.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ use crate::escape::{
5353
use crate::name::{LocalName, QName};
5454
#[cfg(feature = "serialize")]
5555
use crate::utils::CowRef;
56-
use crate::utils::{name_len, trim_xml_end, trim_xml_start, write_cow_string};
56+
use crate::utils::{is_whitespace, name_len, trim_xml_end, trim_xml_start, write_cow_string};
5757
use attributes::{Attribute, Attributes};
5858

5959
/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
@@ -622,6 +622,11 @@ impl<'a> BytesText<'a> {
622622
self.content = trim_cow(replace(&mut self.content, Cow::Borrowed(b"")), trim_xml_end);
623623
self.content.is_empty()
624624
}
625+
626+
/// Returns `true` if all characters are whitespace characters.
627+
pub fn is_all_whitespace(&mut self) -> bool {
628+
self.content.iter().all(|&x| is_whitespace(x))
629+
}
625630
}
626631

627632
impl<'a> Debug for BytesText<'a> {

tests/reader.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ small_buffers_tests!(
1515
read_event_into: std::io::BufReader<_>
1616
);
1717

18+
#[test]
19+
fn test_text() {
20+
let mut r = Reader::from_str(" text ");
21+
22+
assert_eq!(r.read_event().unwrap(), Text(BytesText::new(" text ")));
23+
}
24+
1825
#[test]
1926
fn test_start_end() {
2027
let mut r = Reader::from_str("<a></a>");

0 commit comments

Comments
 (0)