From 9e09d8b08250aa3edff9d80ac4f3d356e8c23537 Mon Sep 17 00:00:00 2001 From: Simon Gene Gottlieb Date: Thu, 22 Aug 2024 10:29:29 +0200 Subject: [PATCH] fix: parse files with '\r' symbols as line ending correctly --- src/stream.cpp | 19 ++++++++++++++++++- src/stream.h | 1 + test/integration/load_node_test.cpp | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/stream.cpp b/src/stream.cpp index b1aa092f6..72f0ec0ae 100644 --- a/src/stream.cpp +++ b/src/stream.cpp @@ -262,7 +262,24 @@ char Stream::get() { AdvanceCurrent(); m_mark.column++; - if (ch == '\n') { + // if line ending symbol is unknown, set it to the first + // encountered line ending. + // if line ending '\r' set ending symbol to '\r' + // other wise set it to '\n' + if (!m_lineEndingSymbol) { + if (ch == '\n') { // line ending is '\n' + m_lineEndingSymbol = '\n'; + } else if (ch == '\r') { + auto ch2 = peek(); + if (ch2 == '\n') { // line ending is '\r\n' + m_lineEndingSymbol = '\n'; + } else { // line ending is '\r' + m_lineEndingSymbol = '\r'; + } + } + } + + if (ch == m_lineEndingSymbol) { m_mark.column = 0; m_mark.line++; } diff --git a/src/stream.h b/src/stream.h index 2bc7a1521..214104ade 100644 --- a/src/stream.h +++ b/src/stream.h @@ -53,6 +53,7 @@ class Stream { Mark m_mark; CharacterSet m_charSet; + char m_lineEndingSymbol{}; // 0 means it is not determined yet, must be '\n' or '\r' mutable std::deque m_readahead; unsigned char* const m_pPrefetched; mutable size_t m_nPrefetchedAvailable; diff --git a/test/integration/load_node_test.cpp b/test/integration/load_node_test.cpp index 9d0c790fd..1cc84a45a 100644 --- a/test/integration/load_node_test.cpp +++ b/test/integration/load_node_test.cpp @@ -360,5 +360,21 @@ TEST(LoadNodeTest, BlockCRNLEncoded) { EXPECT_EQ(1, node["followup"].as()); } +TEST(LoadNodeTest, BlockCREncoded) { + Node node = Load( + "blockText: |\r" + " some arbitrary text \r" + " spanning some \r" + " lines, that are split \r" + " by CR and NL\r" + "followup: 1"); + EXPECT_EQ( + "some arbitrary text \nspanning some \nlines, that are split \nby CR and " + "NL\n", + node["blockText"].as()); + EXPECT_EQ(1, node["followup"].as()); +} + + } // namespace } // namespace YAML