Skip to content

Commit a659700

Browse files
committed
♻️ Rewrite the section-spec, header-list, etc parser
1 parent 0ea6aae commit a659700

File tree

1 file changed

+42
-37
lines changed

1 file changed

+42
-37
lines changed

lib/net/imap/response_parser.rb

Lines changed: 42 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,7 +1034,12 @@ def body_extension
10341034
end
10351035

10361036
# section = "[" [section-spec] "]"
1037-
#
1037+
def section
1038+
str = +lbra
1039+
str << section_spec unless peek_rbra?
1040+
str << rbra
1041+
end
1042+
10381043
# section-spec = section-msgtext / (section-part ["." section-text])
10391044
# section-msgtext = "HEADER" /
10401045
# "HEADER.FIELDS" [".NOT"] SP header-list /
@@ -1048,58 +1053,58 @@ def body_extension
10481053
# ; text other than actual body part (headers,
10491054
# ; etc.)
10501055
#
1056+
# n.b: we could "cheat" here and just grab all text inside the brackets,
1057+
# but literals would need special treatment.
1058+
def section_spec
1059+
str = "".b
1060+
str << atom # grabs everything up to "SP header-list" or "]"
1061+
str << " " << header_list if SP?
1062+
str
1063+
end
1064+
10511065
# header-list = "(" header-fld-name *(SP header-fld-name) ")"
1052-
#
1053-
def section
1054-
str = String.new
1055-
token = match(T_LBRA)
1056-
str.concat(token.value)
1057-
token = match(T_ATOM, T_NUMBER, T_RBRA)
1058-
if token.symbol == T_RBRA
1059-
str.concat(token.value)
1060-
return str
1061-
end
1062-
str.concat(token.value)
1063-
token = lookahead
1064-
if token.symbol == T_SPACE
1065-
shift_token
1066-
str.concat(token.value)
1067-
token = match(T_LPAR)
1068-
str.concat(token.value)
1069-
while true
1070-
token = lookahead
1071-
case token.symbol
1072-
when T_RPAR
1073-
str.concat(token.value)
1074-
shift_token
1075-
break
1076-
when T_SPACE
1077-
shift_token
1078-
str.concat(token.value)
1079-
end
1080-
str.concat(format_string(astring))
1081-
end
1082-
end
1083-
token = match(T_RBRA)
1084-
str.concat(token.value)
1085-
return str
1066+
def header_list
1067+
str = +""
1068+
str << lpar << header_fld_name
1069+
str << " " << header_fld_name while SP?
1070+
str << rpar
10861071
end
10871072

1073+
# RFC3501 & RFC9051:
10881074
# header-fld-name = astring
10891075
#
1076+
# Although RFC3501 allows any astring, RFC5322-valid header names are one
1077+
# or more of the printable US-ASCII characters, except SP and colon. So
1078+
# empty string isn't valid, and literals aren't needed and should not be
1079+
# used. This syntax is unchanged by [I18N-HDRS] (RFC6532).
1080+
#
10901081
# RFC5233:
10911082
# optional-field = field-name ":" unstructured CRLF
10921083
# field-name = 1*ftext
10931084
# ftext = %d33-57 / ; Printable US-ASCII
10941085
# %d59-126 ; characters not including
10951086
# ; ":".
1096-
def format_string(str)
1097-
case str
1087+
#
1088+
# Atom and quoted should be sufficient.
1089+
#
1090+
# TODO: Use original source string, rather than decode and re-encode.
1091+
# TODO: or at least, DRY up this code with the send_command formatting.
1092+
def header_fld_name
1093+
case (str = astring)
10981094
when ""
1095+
warn '%s header-fld-name is an invalid RFC5322 field-name: ""' %
1096+
[self.class]
10991097
return '""'
11001098
when /[\x80-\xff\r\n]/n
1099+
warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1100+
[self.class, str, $&]
11011101
# literal
11021102
return "{" + str.bytesize.to_s + "}" + CRLF + str
1103+
when /[^\x21-\x39\x3b-\xfe]/n
1104+
warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1105+
[self.class, str, $&]
1106+
# invalid quoted string
1107+
return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"'
11031108
when /[(){ \x00-\x1f\x7f%*"\\]/n
11041109
# quoted string
11051110
return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"'

0 commit comments

Comments
 (0)