@@ -1034,7 +1034,12 @@ def body_extension
1034
1034
end
1035
1035
1036
1036
# section = "[" [section-spec] "]"
1037
- #
1037
+ def section
1038
+ str = +lbra
1039
+ str << section_spec unless peek_rbra?
1040
+ str << rbra
1041
+ end
1042
+
1038
1043
# section-spec = section-msgtext / (section-part ["." section-text])
1039
1044
# section-msgtext = "HEADER" /
1040
1045
# "HEADER.FIELDS" [".NOT"] SP header-list /
@@ -1048,58 +1053,58 @@ def body_extension
1048
1053
# ; text other than actual body part (headers,
1049
1054
# ; etc.)
1050
1055
#
1056
+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1057
+ # but literals would need special treatment.
1058
+ def section_spec
1059
+ str = "" . b
1060
+ str << atom # grabs everything up to "SP header-list" or "]"
1061
+ str << " " << header_list if SP?
1062
+ str
1063
+ end
1064
+
1051
1065
# header-list = "(" header-fld-name *(SP header-fld-name) ")"
1052
- #
1053
- def section
1054
- str = String . new
1055
- token = match ( T_LBRA )
1056
- str . concat ( token . value )
1057
- token = match ( T_ATOM , T_NUMBER , T_RBRA )
1058
- if token . symbol == T_RBRA
1059
- str . concat ( token . value )
1060
- return str
1061
- end
1062
- str . concat ( token . value )
1063
- token = lookahead
1064
- if token . symbol == T_SPACE
1065
- shift_token
1066
- str . concat ( token . value )
1067
- token = match ( T_LPAR )
1068
- str . concat ( token . value )
1069
- while true
1070
- token = lookahead
1071
- case token . symbol
1072
- when T_RPAR
1073
- str . concat ( token . value )
1074
- shift_token
1075
- break
1076
- when T_SPACE
1077
- shift_token
1078
- str . concat ( token . value )
1079
- end
1080
- str . concat ( format_string ( astring ) )
1081
- end
1082
- end
1083
- token = match ( T_RBRA )
1084
- str . concat ( token . value )
1085
- return str
1066
+ def header_list
1067
+ str = +""
1068
+ str << lpar << header_fld_name
1069
+ str << " " << header_fld_name while SP?
1070
+ str << rpar
1086
1071
end
1087
1072
1073
+ # RFC3501 & RFC9051:
1088
1074
# header-fld-name = astring
1089
1075
#
1076
+ # Although RFC3501 allows any astring, RFC5322-valid header names are one
1077
+ # or more of the printable US-ASCII characters, except SP and colon. So
1078
+ # empty string isn't valid, and literals aren't needed and should not be
1079
+ # used. This syntax is unchanged by [I18N-HDRS] (RFC6532).
1080
+ #
1090
1081
# RFC5233:
1091
1082
# optional-field = field-name ":" unstructured CRLF
1092
1083
# field-name = 1*ftext
1093
1084
# ftext = %d33-57 / ; Printable US-ASCII
1094
1085
# %d59-126 ; characters not including
1095
1086
# ; ":".
1096
- def format_string ( str )
1097
- case str
1087
+ #
1088
+ # Atom and quoted should be sufficient.
1089
+ #
1090
+ # TODO: Use original source string, rather than decode and re-encode.
1091
+ # TODO: or at least, DRY up this code with the send_command formatting.
1092
+ def header_fld_name
1093
+ case ( str = astring )
1098
1094
when ""
1095
+ warn '%s header-fld-name is an invalid RFC5322 field-name: ""' %
1096
+ [ self . class ]
1099
1097
return '""'
1100
1098
when /[\x80 -\xff \r \n ]/n
1099
+ warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1100
+ [ self . class , str , $&]
1101
1101
# literal
1102
1102
return "{" + str . bytesize . to_s + "}" + CRLF + str
1103
+ when /[^\x21 -\x39 \x3b -\xfe ]/n
1104
+ warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1105
+ [ self . class , str , $&]
1106
+ # invalid quoted string
1107
+ return '"' + str . gsub ( /["\\ ]/n , "\\ \\ \\ &" ) + '"'
1103
1108
when /[(){ \x00 -\x1f \x7f %*"\\ ]/n
1104
1109
# quoted string
1105
1110
return '"' + str . gsub ( /["\\ ]/n , "\\ \\ \\ &" ) + '"'
0 commit comments