@@ -427,6 +427,17 @@ def case_insensitive__nstring
427
427
alias nz_number number
428
428
alias nz_number? number?
429
429
430
+ # valid number ranges are not enforced by parser
431
+ # nz-number64 = digit-nz *DIGIT
432
+ # ; Unsigned 63-bit integer
433
+ # ; (0 < n <= 9,223,372,036,854,775,807)
434
+ alias nz_number64 nz_number
435
+
436
+ # valid number ranges are not enforced by parser
437
+ # uniqueid = nz-number
438
+ # ; Strictly ascending
439
+ alias uniqueid nz_number
440
+
430
441
# [RFC3501 & RFC9051:]
431
442
# response = *(continue-req / response-data) response-done
432
443
#
@@ -607,49 +618,93 @@ def response_data__simple_numeric
607
618
alias mailbox_data__exists response_data__simple_numeric
608
619
alias mailbox_data__recent response_data__simple_numeric
609
620
621
+ # RFC3501 & RFC9051:
622
+ # msg-att = "(" (msg-att-dynamic / msg-att-static)
623
+ # *(SP (msg-att-dynamic / msg-att-static)) ")"
624
+ #
625
+ # msg-att-dynamic = "FLAGS" SP "(" [flag-fetch *(SP flag-fetch)] ")"
626
+ # RFC5257 (ANNOTATE extension):
627
+ # msg-att-dynamic =/ "ANNOTATION" SP
628
+ # ( "(" entry-att *(SP entry-att) ")" /
629
+ # "(" entry *(SP entry) ")" )
630
+ # RFC7162 (CONDSTORE extension):
631
+ # msg-att-dynamic =/ fetch-mod-resp
632
+ # fetch-mod-resp = "MODSEQ" SP "(" permsg-modsequence ")"
633
+ # RFC8970 (PREVIEW extension):
634
+ # msg-att-dynamic =/ "PREVIEW" SP nstring
635
+ #
636
+ # RFC3501:
637
+ # msg-att-static = "ENVELOPE" SP envelope /
638
+ # "INTERNALDATE" SP date-time /
639
+ # "RFC822" [".HEADER" / ".TEXT"] SP nstring /
640
+ # "RFC822.SIZE" SP number /
641
+ # "BODY" ["STRUCTURE"] SP body /
642
+ # "BODY" section ["<" number ">"] SP nstring /
643
+ # "UID" SP uniqueid
644
+ # RFC3516 (BINARY extension):
645
+ # msg-att-static =/ "BINARY" section-binary SP (nstring / literal8)
646
+ # / "BINARY.SIZE" section-binary SP number
647
+ # RFC8514 (SAVEDATE extension):
648
+ # msg-att-static =/ "SAVEDATE" SP (date-time / nil)
649
+ # RFC8474 (OBJECTID extension):
650
+ # msg-att-static =/ fetch-emailid-resp / fetch-threadid-resp
651
+ # fetch-emailid-resp = "EMAILID" SP "(" objectid ")"
652
+ # fetch-threadid-resp = "THREADID" SP ( "(" objectid ")" / nil )
653
+ # RFC9051:
654
+ # msg-att-static = "ENVELOPE" SP envelope /
655
+ # "INTERNALDATE" SP date-time /
656
+ # "RFC822.SIZE" SP number64 /
657
+ # "BODY" ["STRUCTURE"] SP body /
658
+ # "BODY" section ["<" number ">"] SP nstring /
659
+ # "BINARY" section-binary SP (nstring / literal8) /
660
+ # "BINARY.SIZE" section-binary SP number /
661
+ # "UID" SP uniqueid
662
+ #
663
+ # Re https://www.rfc-editor.org/errata/eid7246, I'm adding "offset" to the
664
+ # official "BINARY" ABNF, like so:
665
+ #
666
+ # msg-att-static =/ "BINARY" section-binary ["<" number ">"] SP
667
+ # (nstring / literal8)
610
668
def msg_att ( n )
611
- match ( T_LPAR )
669
+ lpar
612
670
attr = { }
613
671
while true
614
- token = lookahead
615
- case token . symbol
616
- when T_RPAR
617
- shift_token
618
- break
619
- when T_SPACE
620
- shift_token
621
- next
622
- end
623
- case token . value
624
- when /\A (?:ENVELOPE)\z /ni
625
- name , val = envelope_data
626
- when /\A (?:FLAGS)\z /ni
627
- name , val = flags_data
628
- when /\A (?:INTERNALDATE)\z /ni
629
- name , val = internaldate_data
630
- when /\A (?:RFC822(?:\. HEADER|\. TEXT)?)\z /ni
631
- name , val = rfc822_text
632
- when /\A (?:RFC822\. SIZE)\z /ni
633
- name , val = rfc822_size
634
- when /\A (?:BODY(?:STRUCTURE)?)\z /ni
635
- name , val = body_data
636
- when /\A (?:UID)\z /ni
637
- name , val = uid_data
638
- when /\A (?:MODSEQ)\z /ni
639
- name , val = modseq_data
640
- else
641
- parse_error ( "unknown attribute `%s' for {%d}" , token . value , n )
642
- end
672
+ name = msg_att__label ; SP!
673
+ val =
674
+ case name
675
+ when "UID" then uniqueid
676
+ when "FLAGS" then flag_list
677
+ when "BODY" then body
678
+ when /\A BODY\[ /ni then nstring
679
+ when "BODYSTRUCTURE" then body
680
+ when "ENVELOPE" then envelope
681
+ when "INTERNALDATE" then date_time
682
+ when "RFC822.SIZE" then number64
683
+ when "RFC822" then nstring # not in rev2
684
+ when "RFC822.HEADER" then nstring # not in rev2
685
+ when "RFC822.TEXT" then nstring # not in rev2
686
+ when "MODSEQ" then parens__modseq # CONDSTORE
687
+ else parse_error ( "unknown attribute `%s' for {%d}" , name , n )
688
+ end
643
689
attr [ name ] = val
690
+ break unless SP?
691
+ break if lookahead_rpar?
644
692
end
645
- return attr
646
- end
647
-
648
- def envelope_data
649
- token = match ( T_ATOM )
650
- name = token . value . upcase
651
- match ( T_SPACE )
652
- return name , envelope
693
+ rpar
694
+ attr
695
+ end
696
+
697
+ # appends "[section]" and "<partial>" to the base label
698
+ def msg_att__label
699
+ case ( name = tagged_ext_label )
700
+ when /\A (?:RFC822(?:\. HEADER|\. TEXT)?)\z /ni
701
+ # ignoring "[]" fixes https://bugs.ruby-lang.org/issues/5620
702
+ lbra? and rbra
703
+ when "BODY"
704
+ peek_lbra? and name << section and
705
+ peek_str? ( "<" ) and name << atom # partial
706
+ end
707
+ name
653
708
end
654
709
655
710
def envelope
@@ -687,58 +742,10 @@ def envelope
687
742
return result
688
743
end
689
744
690
- def flags_data
691
- token = match ( T_ATOM )
692
- name = token . value . upcase
693
- match ( T_SPACE )
694
- return name , flag_list
695
- end
696
-
697
- def internaldate_data
698
- token = match ( T_ATOM )
699
- name = token . value . upcase
700
- match ( T_SPACE )
701
- token = match ( T_QUOTED )
702
- return name , token . value
703
- end
704
-
705
- def rfc822_text
706
- token = match ( T_ATOM )
707
- name = token . value . upcase
708
- token = lookahead
709
- if token . symbol == T_LBRA
710
- shift_token
711
- match ( T_RBRA )
712
- end
713
- match ( T_SPACE )
714
- return name , nstring
715
- end
716
-
717
- def rfc822_size
718
- token = match ( T_ATOM )
719
- name = token . value . upcase
720
- match ( T_SPACE )
721
- return name , number
722
- end
723
-
724
- def body_data
725
- token = match ( T_ATOM )
726
- name = token . value . upcase
727
- token = lookahead
728
- if token . symbol == T_SPACE
729
- shift_token
730
- return name , body
731
- end
732
- name . concat ( section )
733
- token = lookahead
734
- if token . symbol == T_ATOM
735
- name . concat ( token . value )
736
- shift_token
737
- end
738
- match ( T_SPACE )
739
- data = nstring
740
- return name , data
741
- end
745
+ # date-time = DQUOTE date-day-fixed "-" date-month "-" date-year
746
+ # SP time SP zone DQUOTE
747
+ alias date_time quoted
748
+ alias ndatetime nquoted
742
749
743
750
# RFC-3501 & RFC-9051:
744
751
# body = "(" (body-type-1part / body-type-mpart) ")"
@@ -996,48 +1003,78 @@ def body_extension
996
1003
end
997
1004
end
998
1005
1006
+ # section = "[" [section-spec] "]"
999
1007
def section
1000
- str = String . new
1001
- token = match ( T_LBRA )
1002
- str . concat ( token . value )
1003
- token = match ( T_ATOM , T_NUMBER , T_RBRA )
1004
- if token . symbol == T_RBRA
1005
- str . concat ( token . value )
1006
- return str
1007
- end
1008
- str . concat ( token . value )
1009
- token = lookahead
1010
- if token . symbol == T_SPACE
1011
- shift_token
1012
- str . concat ( token . value )
1013
- token = match ( T_LPAR )
1014
- str . concat ( token . value )
1015
- while true
1016
- token = lookahead
1017
- case token . symbol
1018
- when T_RPAR
1019
- str . concat ( token . value )
1020
- shift_token
1021
- break
1022
- when T_SPACE
1023
- shift_token
1024
- str . concat ( token . value )
1025
- end
1026
- str . concat ( format_string ( astring ) )
1027
- end
1028
- end
1029
- token = match ( T_RBRA )
1030
- str . concat ( token . value )
1031
- return str
1008
+ str = +lbra
1009
+ str << section_spec unless peek_rbra?
1010
+ str << rbra
1011
+ end
1012
+
1013
+ # section-spec = section-msgtext / (section-part ["." section-text])
1014
+ # section-msgtext = "HEADER" /
1015
+ # "HEADER.FIELDS" [".NOT"] SP header-list /
1016
+ # "TEXT"
1017
+ # ; top-level or MESSAGE/RFC822 or
1018
+ # ; MESSAGE/GLOBAL part
1019
+ # section-part = nz-number *("." nz-number)
1020
+ # ; body part reference.
1021
+ # ; Allows for accessing nested body parts.
1022
+ # section-text = section-msgtext / "MIME"
1023
+ # ; text other than actual body part (headers,
1024
+ # ; etc.)
1025
+ #
1026
+ # n.b: we could "cheat" here and just grab all text inside the brackets,
1027
+ # but literals would need special treatment.
1028
+ def section_spec
1029
+ str = "" . b
1030
+ str << atom # grabs everything up to "SP header-list" or "]"
1031
+ str << " " << header_list if SP?
1032
+ str
1032
1033
end
1033
1034
1034
- def format_string ( str )
1035
- case str
1035
+ # header-list = "(" header-fld-name *(SP header-fld-name) ")"
1036
+ def header_list
1037
+ str = +""
1038
+ str << lpar << header_fld_name
1039
+ str << " " << header_fld_name while SP?
1040
+ str << rpar
1041
+ end
1042
+
1043
+ # RFC3501 & RFC9051:
1044
+ # header-fld-name = astring
1045
+ #
1046
+ # Although RFC3501 allows any astring, RFC5322-valid header names are one
1047
+ # or more of the printable US-ASCII characters, except SP and colon. So
1048
+ # empty string isn't valid, and literals aren't needed and should not be
1049
+ # used. This syntax is unchanged by [I18N-HDRS] (RFC6532).
1050
+ #
1051
+ # RFC5233:
1052
+ # optional-field = field-name ":" unstructured CRLF
1053
+ # field-name = 1*ftext
1054
+ # ftext = %d33-57 / ; Printable US-ASCII
1055
+ # %d59-126 ; characters not including
1056
+ # ; ":".
1057
+ #
1058
+ # Atom and quoted should be sufficient.
1059
+ #
1060
+ # TODO: Use original source string, rather than decode and re-encode.
1061
+ # TODO: or at least, DRY up this code with the send_command formatting.
1062
+ def header_fld_name
1063
+ case ( str = astring )
1036
1064
when ""
1065
+ warn '%s header-fld-name is an invalid RFC5322 field-name: ""' %
1066
+ [ self . class ]
1037
1067
return '""'
1038
1068
when /[\x80 -\xff \r \n ]/n
1069
+ warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1070
+ [ self . class , str , $&]
1039
1071
# literal
1040
1072
return "{" + str . bytesize . to_s + "}" + CRLF + str
1073
+ when /[^\x21 -\x39 \x3b -\xfe ]/n
1074
+ warn "%s header-fld-name %p has invalid RFC5322 field-name char: %p" %
1075
+ [ self . class , str , $&]
1076
+ # invalid quoted string
1077
+ return '"' + str . gsub ( /["\\ ]/n , "\\ \\ \\ &" ) + '"'
1041
1078
when /[(){ \x00 -\x1f \x7f %*"\\ ]/n
1042
1079
# quoted string
1043
1080
return '"' + str . gsub ( /["\\ ]/n , "\\ \\ \\ &" ) + '"'
@@ -1047,23 +1084,6 @@ def format_string(str)
1047
1084
end
1048
1085
end
1049
1086
1050
- def uid_data
1051
- token = match ( T_ATOM )
1052
- name = token . value . upcase
1053
- match ( T_SPACE )
1054
- return name , number
1055
- end
1056
-
1057
- def modseq_data
1058
- token = match ( T_ATOM )
1059
- name = token . value . upcase
1060
- match ( T_SPACE )
1061
- match ( T_LPAR )
1062
- modseq = number
1063
- match ( T_RPAR )
1064
- return name , modseq
1065
- end
1066
-
1067
1087
def mailbox_data__flags
1068
1088
token = match ( T_ATOM )
1069
1089
name = token . value . upcase
@@ -1631,6 +1651,20 @@ def charset
1631
1651
end
1632
1652
end
1633
1653
1654
+ # RFC7162:
1655
+ # mod-sequence-value = 1*DIGIT
1656
+ # ;; Positive unsigned 63-bit integer
1657
+ # ;; (mod-sequence)
1658
+ # ;; (1 <= n <= 9,223,372,036,854,775,807).
1659
+ alias mod_sequence_value nz_number64
1660
+
1661
+ # RFC7162:
1662
+ # permsg-modsequence = mod-sequence-value
1663
+ # ;; Per-message mod-sequence.
1664
+ alias permsg_modsequence mod_sequence_value
1665
+
1666
+ def parens__modseq ; lpar ; _ = permsg_modsequence ; rpar ; _ end
1667
+
1634
1668
# RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2):
1635
1669
# uid-set = (uniqueid / uid-range) *("," uid-set)
1636
1670
# uid-range = (uniqueid ":" uniqueid)
0 commit comments