Skip to content

Commit d901d56

Browse files
authored
Faster string skipping with swar (#1499)
1 parent 111bf41 commit d901d56

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed

include/glaze/util/parse.hpp

+91
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,97 @@ namespace glz::detail
779779
}
780780

781781
template <opts Opts>
782+
requires(has_is_padded(Opts))
783+
GLZ_ALWAYS_INLINE void skip_string(is_context auto&& ctx, auto&& it, auto&& end) noexcept
784+
{
785+
if constexpr (!has_opening_handled(Opts)) {
786+
++it;
787+
}
788+
789+
if constexpr (Opts.validate_skipped) {
790+
while (true) {
791+
uint64_t swar{};
792+
std::memcpy(&swar, it, 8);
793+
794+
constexpr uint64_t lo7_mask = repeat_byte8(0b01111111);
795+
const uint64_t lo7 = swar & lo7_mask;
796+
const uint64_t backslash = (lo7 ^ repeat_byte8('\\')) + lo7_mask;
797+
const uint64_t quote = (lo7 ^ repeat_byte8('"')) + lo7_mask;
798+
const uint64_t less_32 = (swar & repeat_byte8(0b01100000)) + lo7_mask;
799+
uint64_t next = ~((backslash & quote & less_32) | swar);
800+
next &= repeat_byte8(0b10000000);
801+
802+
if (next == 0) {
803+
// No special characters in this chunk
804+
it += 8;
805+
continue;
806+
}
807+
808+
// Find the first occurrence
809+
size_t offset = (countr_zero(next) >> 3);
810+
it += offset;
811+
812+
const auto c = *it;
813+
if ((c & 0b11100000) == 0) [[unlikely]] {
814+
// Invalid control character (<0x20)
815+
ctx.error = error_code::syntax_error;
816+
return;
817+
}
818+
else if (c == '"') {
819+
// Check if this quote is escaped
820+
const auto* p = it - 1;
821+
int backslash_count{};
822+
// We don't have to worry about rewinding too far because we started with a quote
823+
while (*p == '\\') {
824+
++backslash_count;
825+
--p;
826+
}
827+
if ((backslash_count & 1) == 0) {
828+
// Even number of backslashes => not escaped => closing quote found
829+
++it;
830+
return;
831+
}
832+
else {
833+
// Odd number of backslashes => escaped quote
834+
++it;
835+
continue;
836+
}
837+
}
838+
else if (c == '\\') {
839+
// Handle escape sequence
840+
++it;
841+
842+
if (*it == 'u') {
843+
++it;
844+
if (not skip_unicode_code_point(it, end)) [[unlikely]] {
845+
ctx.error = error_code::unicode_escape_conversion_failure;
846+
return;
847+
}
848+
}
849+
else {
850+
if (not char_unescape_table[uint8_t(*it)]) [[unlikely]] {
851+
ctx.error = error_code::invalid_escape;
852+
return;
853+
}
854+
++it;
855+
}
856+
}
857+
}
858+
859+
// If we exit here, we never found a closing quote
860+
ctx.error = error_code::unexpected_end;
861+
}
862+
else {
863+
skip_string_view<Opts>(ctx, it, end);
864+
if (bool(ctx.error)) [[unlikely]] {
865+
return;
866+
}
867+
++it; // skip the quote
868+
}
869+
}
870+
871+
template <opts Opts>
872+
requires(not has_is_padded(Opts))
782873
GLZ_ALWAYS_INLINE void skip_string(is_context auto&& ctx, auto&& it, auto&& end) noexcept
783874
{
784875
if constexpr (!has_opening_handled(Opts)) {

0 commit comments

Comments
 (0)