Skip to content

Commit 8de3465

Browse files
authored
Adding currency csv test (#1446)
* Adding currency csv test * Handling quote logic within CSV * proper quote handling for CSV * Using visit * Don't double skip comma * Update csv_test.cpp * Commenting out rowwise currency test
1 parent 5775bb4 commit 8de3465

File tree

5 files changed

+618
-83
lines changed

5 files changed

+618
-83
lines changed

include/glaze/csv/read.hpp

+99-83
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,9 @@ namespace glz
117117
}
118118
}
119119
};
120+
121+
// CSV spec: https://www.ietf.org/rfc/rfc4180.txt
122+
// Quotes are escaped via double quotes
120123

121124
template <string_t T>
122125
struct from<CSV, T>
@@ -129,35 +132,51 @@ namespace glz
129132
}
130133

131134
value.clear();
132-
auto start = it;
133-
while (it < end) {
134-
switch (*it) {
135-
case ',':
136-
case '\n': {
137-
value.append(start, static_cast<size_t>(it - start));
138-
return;
135+
136+
if (it == end) {
137+
return;
138+
}
139+
140+
if (*it == '"') {
141+
// Quoted field
142+
++it; // Skip the opening quote
143+
while (it != end) {
144+
if (*it == '"') {
145+
++it; // Skip the quote
146+
if (it == end) {
147+
// End of input after closing quote
148+
break;
149+
}
150+
if (*it == '"') {
151+
// Escaped quote
152+
value.push_back('"');
153+
++it;
154+
} else {
155+
// Closing quote
156+
break;
157+
}
158+
} else {
159+
value.push_back(*it);
160+
++it;
161+
}
139162
}
140-
case '\\':
141-
case '\b':
142-
case '\f':
143-
case '\r':
144-
case '\t': {
163+
// After closing quote, expect comma, newline, or end of input
164+
if (it != end && *it != ',' && *it == '\n') {
165+
// Invalid character after closing quote
145166
ctx.error = error_code::syntax_error;
146167
return;
147168
}
148-
case '\0': {
149-
ctx.error = error_code::unexpected_end;
150-
return;
151-
}
152-
default:
169+
} else {
170+
// Unquoted field
171+
while (it != end && *it != ',' && *it != '\n') {
172+
value.push_back(*it);
153173
++it;
154174
}
155175
}
156-
157-
value.append(start, static_cast<size_t>(it - start));
158176
}
159177
};
160178

179+
161180
template <bool_t T>
162181
struct from<CSV, T>
163182
{
@@ -439,89 +458,87 @@ namespace glz
439458
decode_hash_with_size<CSV, T, HashInfo, HashInfo.type>::op(key.data(), end, key.size());
440459

441460
if (index < N) [[likely]] {
442-
jump_table<N>(
443-
[&]<size_t I>() {
444-
decltype(auto) member = [&]() -> decltype(auto) {
445-
if constexpr (reflectable<T>) {
446-
return get_member(value, get<I>(to_tuple(value)));
461+
visit<N>([&]<size_t I>() {
462+
decltype(auto) member = [&]() -> decltype(auto) {
463+
if constexpr (reflectable<T>) {
464+
return get_member(value, get<I>(to_tuple(value)));
465+
}
466+
else {
467+
return get_member(value, get<I>(reflect<T>::values));
468+
}
469+
}();
470+
471+
using M = std::decay_t<decltype(member)>;
472+
if constexpr (fixed_array_value_t<M> && emplace_backable<M>) {
473+
size_t col = 0;
474+
while (it != end) {
475+
if (col < member.size()) [[likely]] {
476+
read<CSV>::op<Opts>(member[col][csv_index], ctx, it, end);
447477
}
448-
else {
449-
return get_member(value, get<I>(reflect<T>::values));
478+
else [[unlikely]] {
479+
read<CSV>::op<Opts>(member.emplace_back()[csv_index], ctx, it, end);
450480
}
451-
}();
452-
453-
using M = std::decay_t<decltype(member)>;
454-
if constexpr (fixed_array_value_t<M> && emplace_backable<M>) {
455-
size_t col = 0;
456-
while (it != end) {
457-
if (col < member.size()) [[likely]] {
458-
read<CSV>::op<Opts>(member[col][csv_index], ctx, it, end);
459-
}
460-
else [[unlikely]] {
461-
read<CSV>::op<Opts>(member.emplace_back()[csv_index], ctx, it, end);
462-
}
463481

464-
if (*it == '\r') {
465-
++it;
466-
if (*it == '\n') {
467-
++it;
468-
break;
469-
}
470-
else [[unlikely]] {
471-
ctx.error = error_code::syntax_error;
472-
return;
473-
}
474-
}
475-
else if (*it == '\n') {
482+
if (*it == '\r') {
483+
++it;
484+
if (*it == '\n') {
476485
++it;
477486
break;
478487
}
479-
else if (it == end) {
480-
return;
481-
}
482-
483-
if (*it == ',') [[likely]] {
484-
++it;
485-
}
486488
else [[unlikely]] {
487489
ctx.error = error_code::syntax_error;
488490
return;
489491
}
492+
}
493+
else if (*it == '\n') {
494+
++it;
495+
break;
496+
}
497+
else if (it == end) {
498+
return;
499+
}
490500

491-
++col;
501+
if (*it == ',') [[likely]] {
502+
++it;
492503
}
504+
else [[unlikely]] {
505+
ctx.error = error_code::syntax_error;
506+
return;
507+
}
508+
509+
++col;
493510
}
494-
else {
495-
while (it != end) {
496-
read<CSV>::op<Opts>(member, ctx, it, end);
511+
}
512+
else {
513+
while (it != end) {
514+
read<CSV>::op<Opts>(member, ctx, it, end);
497515

498-
if (*it == '\r') {
499-
++it;
500-
if (*it == '\n') {
501-
++it;
502-
break;
503-
}
504-
else [[unlikely]] {
505-
ctx.error = error_code::syntax_error;
506-
return;
507-
}
508-
}
509-
else if (*it == '\n') {
516+
if (*it == '\r') {
517+
++it;
518+
if (*it == '\n') {
510519
++it;
511520
break;
512521
}
513-
514-
if (*it == ',') [[likely]] {
515-
++it;
516-
}
517522
else [[unlikely]] {
518523
ctx.error = error_code::syntax_error;
519524
return;
520525
}
521526
}
527+
else if (*it == '\n') {
528+
++it;
529+
break;
530+
}
531+
532+
if (*it == ',') [[likely]] {
533+
++it;
534+
}
535+
else [[unlikely]] {
536+
ctx.error = error_code::syntax_error;
537+
return;
538+
}
522539
}
523-
},
524-
index);
540+
}
541+
}, index);
525542

526543
if (bool(ctx.error)) [[unlikely]] {
527544
return;
@@ -556,7 +573,7 @@ namespace glz
556573
key.data(), key.data() + key.size(), key.size());
557574

558575
if (index < N) [[likely]] {
559-
jump_table<N>(
576+
visit<N>(
560577
[&]<size_t I>() {
561578
decltype(auto) member = [&]() -> decltype(auto) {
562579
if constexpr (reflectable<T>) {
@@ -629,13 +646,12 @@ namespace glz
629646
return value;
630647
}
631648

632-
template <uint32_t layout = rowwise, read_csv_supported T>
633-
[[nodiscard]] inline error_ctx read_file_csv(T& value, const sv file_name)
649+
template <uint32_t layout = rowwise, read_csv_supported T, is_buffer Buffer>
650+
[[nodiscard]] inline error_ctx read_file_csv(T& value, const sv file_name, Buffer&& buffer)
634651
{
635652
context ctx{};
636653
ctx.current_file = file_name;
637654

638-
std::string buffer;
639655
const auto ec = file_to_buffer(buffer, ctx.current_file);
640656

641657
if (bool(ec)) {

include/glaze/util/for_each.hpp

+12
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ namespace glz
5050
}
5151
}
5252

53+
namespace glz
54+
{
55+
template <size_t N>
56+
inline constexpr void visit(auto&& lambda, const size_t index) {
57+
static constexpr auto mem_ptrs = []<size_t... I>(std::index_sequence<I...>) constexpr {
58+
return std::array{ &std::decay_t<decltype(lambda)>::template operator()<I>... };
59+
}(std::make_index_sequence<N>{});
60+
61+
(lambda.*mem_ptrs[index])();
62+
}
63+
}
64+
5365
#define GLZ_PARENS ()
5466

5567
// binary expansion is much more compile time efficient than quaternary expansion

tests/csv_test/CMakeLists.txt

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ if (MINGW)
88
target_compile_options(${PROJECT_NAME} PRIVATE "-Wa,-mbig-obj")
99
endif ()
1010

11+
target_compile_definitions(${PROJECT_NAME} PRIVATE
12+
GLZ_TEST_DIRECTORY="${CMAKE_CURRENT_SOURCE_DIR}"
13+
)
14+
1115
add_test(NAME ${PROJECT_NAME} COMMAND ${PROJECT_NAME})
1216

1317
target_code_coverage(${PROJECT_NAME} AUTO ALL)

tests/csv_test/csv_test.cpp

+57
Original file line numberDiff line numberDiff line change
@@ -589,4 +589,61 @@ suite fish_record = [] {
589589
};
590590
};
591591

592+
struct CurrencyCSV {
593+
std::vector<std::string> Entity;
594+
std::vector<std::string> Currency;
595+
std::vector<std::string> AlphabeticCode;
596+
std::vector<std::string> NumericCode;
597+
std::vector<std::string> MinorUnit;
598+
std::vector<std::string> WithdrawalDate;
599+
};
600+
601+
suite currency_csv_test = [] {
602+
"currency_col"_test = [] {
603+
CurrencyCSV obj{};
604+
std::string buffer{};
605+
auto ec = glz::read_file_csv<glz::colwise>(obj, GLZ_TEST_DIRECTORY "/currency.csv", buffer);
606+
expect(not ec) << glz::format_error(ec, buffer) << '\n';
607+
608+
constexpr auto kExpectedSize = 445;
609+
610+
expect(obj.Entity.size() == kExpectedSize);
611+
expect(obj.Currency.size() == kExpectedSize);
612+
expect(obj.AlphabeticCode.size() == kExpectedSize);
613+
expect(obj.NumericCode.size() == kExpectedSize);
614+
expect(obj.MinorUnit.size() == kExpectedSize);
615+
expect(obj.WithdrawalDate.size() == kExpectedSize);
616+
617+
expect(obj.Entity[0] == "AFGHANISTAN");
618+
expect(obj.Currency[0] == "Afghani");
619+
expect(obj.AlphabeticCode[0] == "AFN");
620+
expect(obj.NumericCode[0] == "971");
621+
expect(obj.MinorUnit[0] == "2");
622+
expect(obj.WithdrawalDate[0] == "");
623+
624+
expect(obj.Entity[29] == "BONAIRE, SINT EUSTATIUS AND SABA");
625+
expect(obj.Currency[29] == "US Dollar");
626+
expect(obj.AlphabeticCode[29] == "USD");
627+
expect(obj.NumericCode[29] == "840");
628+
expect(obj.MinorUnit[29] == "2");
629+
expect(obj.WithdrawalDate[29] == "");
630+
631+
expect(obj.Entity[324] == "EUROPEAN MONETARY CO-OPERATION FUND (EMCF)");
632+
expect(obj.Currency[324] == "European Currency Unit (E.C.U)");
633+
expect(obj.AlphabeticCode[324] == "XEU");
634+
expect(obj.NumericCode[324] == "954");
635+
expect(obj.MinorUnit[324] == "");
636+
expect(obj.WithdrawalDate[324] == "1999-01");
637+
};
638+
"currency_row"_test = [] {
639+
/*CurrencyCSV obj{};
640+
std::string buffer{};
641+
auto ec = glz::read_file_csv<glz::colwise>(obj, GLZ_TEST_DIRECTORY "/currency.csv", buffer);
642+
ec = glz::write_file_csv(obj, "currency_rowwise.csv", std::string{});
643+
expect(not ec);
644+
ec = glz::read_file_csv(obj, "currency_rowwise.csv", buffer);
645+
expect(not ec) << glz::format_error(ec, buffer) << '\n';*/
646+
};
647+
};
648+
592649
int main() { return 0; }

0 commit comments

Comments
 (0)