Skip to content

Commit dcbdb21

Browse files
authored
🔀 Merge pull request #225 from nevans/parser/better-faster-cleaner-status
âš¡ Better Faster Cleaner `STATUS` parsing
2 parents 8a60524 + 8070925 commit dcbdb21

File tree

4 files changed

+314
-26
lines changed

4 files changed

+314
-26
lines changed

‎lib/net/imap/response_data.rb

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@
22

33
module Net
44
class IMAP < Protocol
5-
autoload :FetchData, File.expand_path("fetch_data", __dir__)
5+
autoload :FetchData, "#{__dir__}/fetch_data"
6+
autoload :SequenceSet, "#{__dir__}/sequence_set"
67

78
# Net::IMAP::ContinuationRequest represents command continuation requests.
89
#
@@ -71,7 +72,7 @@ class IgnoredResponse < UntaggedResponse
7172
# unknown extensions to response types without a well-defined extension
7273
# grammar.
7374
#
74-
# See also: UnparsedNumericResponseData
75+
# See also: UnparsedNumericResponseData, ExtensionData, IgnoredResponse
7576
class UnparsedData < Struct.new(:unparsed_data)
7677
##
7778
# method: unparsed_data
@@ -87,7 +88,7 @@ class UnparsedData < Struct.new(:unparsed_data)
8788
# Net::IMAP::UnparsedNumericResponseData represents data for unhandled
8889
# response types with a numeric prefix. See the documentation for #number.
8990
#
90-
# See also: UnparsedData
91+
# See also: UnparsedData, ExtensionData, IgnoredResponse
9192
class UnparsedNumericResponseData < Struct.new(:number, :unparsed_data)
9293
##
9394
# method: number
@@ -106,6 +107,23 @@ class UnparsedNumericResponseData < Struct.new(:number, :unparsed_data)
106107
# The unparsed data, not including #number or UntaggedResponse#name.
107108
end
108109

110+
# **Note:** This represents an intentionally _unstable_ API. Where
111+
# instances of this class are returned, future releases may return a
112+
# different (incompatible) object <em>without deprecation or warning</em>.
113+
#
114+
# Net::IMAP::ExtensionData represents data that is parsable according to the
115+
# forward-compatible extension syntax in RFC3501, RFC4466, or RFC9051, but
116+
# isn't directly known or understood by Net::IMAP yet.
117+
#
118+
# See also: UnparsedData, UnparsedNumericResponseData, IgnoredResponse
119+
class ExtensionData < Struct.new(:data)
120+
##
121+
# method: data
122+
# :call-seq: data -> string
123+
#
124+
# The parsed extension data.
125+
end
126+
109127
# Net::IMAP::TaggedResponse represents tagged responses.
110128
#
111129
# The server completion result response indicates the success or

‎lib/net/imap/response_parser.rb

Lines changed: 193 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,56 @@ module RFC3629
267267
# ; Is a valid RFC 3501 "atom".
268268
TAGGED_EXT_LABEL = /#{TAGGED_LABEL_FCHAR}#{TAGGED_LABEL_CHAR}*/n
269269

270+
# nz-number = digit-nz *DIGIT
271+
# ; Non-zero unsigned 32-bit integer
272+
# ; (0 < n < 4,294,967,296)
273+
NZ_NUMBER = /[1-9]\d*/n
274+
275+
# seq-number = nz-number / "*"
276+
# ; message sequence number (COPY, FETCH, STORE
277+
# ; commands) or unique identifier (UID COPY,
278+
# ; UID FETCH, UID STORE commands).
279+
# ; * represents the largest number in use. In
280+
# ; the case of message sequence numbers, it is
281+
# ; the number of messages in a non-empty mailbox.
282+
# ; In the case of unique identifiers, it is the
283+
# ; unique identifier of the last message in the
284+
# ; mailbox or, if the mailbox is empty, the
285+
# ; mailbox's current UIDNEXT value.
286+
# ; The server should respond with a tagged BAD
287+
# ; response to a command that uses a message
288+
# ; sequence number greater than the number of
289+
# ; messages in the selected mailbox. This
290+
# ; includes "*" if the selected mailbox is empty.
291+
SEQ_NUMBER = /#{NZ_NUMBER}|\*/n
292+
293+
# seq-range = seq-number ":" seq-number
294+
# ; two seq-number values and all values between
295+
# ; these two regardless of order.
296+
# ; Example: 2:4 and 4:2 are equivalent and
297+
# ; indicate values 2, 3, and 4.
298+
# ; Example: a unique identifier sequence range of
299+
# ; 3291:* includes the UID of the last message in
300+
# ; the mailbox, even if that value is less than
301+
# ; 3291.
302+
SEQ_RANGE = /#{SEQ_NUMBER}:#{SEQ_NUMBER}/n
303+
304+
# sequence-set = (seq-number / seq-range) ["," sequence-set]
305+
# ; set of seq-number values, regardless of order.
306+
# ; Servers MAY coalesce overlaps and/or execute
307+
# ; the sequence in any order.
308+
# ; Example: a message sequence number set of
309+
# ; 2,4:7,9,12:* for a mailbox with 15 messages is
310+
# ; equivalent to 2,4,5,6,7,9,12,13,14,15
311+
# ; Example: a message sequence number set of
312+
# ; *:4,5:7 for a mailbox with 10 messages is
313+
# ; equivalent to 10,9,8,7,6,5,4,5,6,7 and MAY
314+
# ; be reordered and overlap coalesced to be
315+
# ; 4,5,6,7,8,9,10.
316+
SEQUENCE_SET_ITEM = /#{SEQ_NUMBER}|#{SEQ_RANGE}/n
317+
SEQUENCE_SET = /#{SEQUENCE_SET_ITEM}(?:,#{SEQUENCE_SET_ITEM})*/n
318+
SEQUENCE_SET_STR = /\A#{SEQUENCE_SET}\z/n
319+
270320
# RFC3501:
271321
# literal = "{" number "}" CRLF *CHAR8
272322
# ; Number represents the number of CHAR8s
@@ -405,6 +455,24 @@ def unescape_quoted(quoted)
405455
# ATOM-CHAR = <any CHAR except atom-specials>
406456
ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS]
407457

458+
SEQUENCE_SET_TOKENS = [T_ATOM, T_NUMBER, T_STAR]
459+
460+
# sequence-set = (seq-number / seq-range) ["," sequence-set]
461+
# sequence-set =/ seq-last-command
462+
# ; Allow for "result of the last command"
463+
# ; indicator.
464+
# seq-last-command = "$"
465+
#
466+
# *note*: doesn't match seq-last-command
467+
def sequence_set
468+
str = combine_adjacent(*SEQUENCE_SET_TOKENS)
469+
if Patterns::SEQUENCE_SET_STR.match?(str)
470+
SequenceSet.new(str)
471+
else
472+
parse_error("unexpected atom %p, expected sequence-set", str)
473+
end
474+
end
475+
408476
# ASTRING-CHAR = ATOM-CHAR / resp-specials
409477
# resp-specials = "]"
410478
ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze
@@ -488,6 +556,60 @@ def case_insensitive__nstring
488556
NIL? ? nil : case_insensitive__string
489557
end
490558

559+
# tagged-ext-comp = astring /
560+
# tagged-ext-comp *(SP tagged-ext-comp) /
561+
# "(" tagged-ext-comp ")"
562+
# ; Extensions that follow this general
563+
# ; syntax should use nstring instead of
564+
# ; astring when appropriate in the context
565+
# ; of the extension.
566+
# ; Note that a message set or a "number"
567+
# ; can always be represented as an "atom".
568+
# ; A URL should be represented as
569+
# ; a "quoted" string.
570+
def tagged_ext_comp
571+
vals = []
572+
while true
573+
vals << case lookahead!(*ASTRING_TOKENS, T_LPAR).symbol
574+
when T_LPAR then lpar; ary = tagged_ext_comp; rpar; ary
575+
when T_NUMBER then number
576+
else astring
577+
end
578+
SP? or break
579+
end
580+
vals
581+
end
582+
583+
# tagged-ext-simple is a subset of atom
584+
# TODO: recognize sequence-set in the lexer
585+
#
586+
# tagged-ext-simple = sequence-set / number / number64
587+
def tagged_ext_simple
588+
number? || sequence_set
589+
end
590+
591+
# tagged-ext-val = tagged-ext-simple /
592+
# "(" [tagged-ext-comp] ")"
593+
def tagged_ext_val
594+
if lpar?
595+
_ = peek_rpar? ? [] : tagged_ext_comp
596+
rpar
597+
_
598+
else
599+
tagged_ext_simple
600+
end
601+
end
602+
603+
# mailbox = "INBOX" / astring
604+
# ; INBOX is case-insensitive. All case variants of
605+
# ; INBOX (e.g., "iNbOx") MUST be interpreted as INBOX
606+
# ; not as an astring. An astring which consists of
607+
# ; the case-insensitive sequence "I" "N" "B" "O" "X"
608+
# ; is considered to be INBOX and not an astring.
609+
# ; Refer to section 5.1 for further
610+
# ; semantic details of mailbox names.
611+
alias mailbox astring
612+
491613
# valid number ranges are not enforced by parser
492614
# number64 = 1*DIGIT
493615
# ; Unsigned 63-bit integer
@@ -1396,31 +1518,79 @@ def thread_branch(token)
13961518
return rootmember
13971519
end
13981520

1521+
# mailbox-data =/ "STATUS" SP mailbox SP "(" [status-att-list] ")"
13991522
def mailbox_data__status
1400-
token = match(T_ATOM)
1401-
name = token.value.upcase
1402-
match(T_SPACE)
1403-
mailbox = astring
1404-
match(T_SPACE)
1405-
match(T_LPAR)
1406-
attr = {}
1407-
while true
1408-
token = lookahead
1409-
case token.symbol
1410-
when T_RPAR
1411-
shift_token
1412-
break
1413-
when T_SPACE
1414-
shift_token
1523+
resp_name = label("STATUS"); SP!
1524+
mbox_name = mailbox; SP!
1525+
lpar; attr = status_att_list; rpar
1526+
UntaggedResponse.new(resp_name, StatusData.new(mbox_name, attr), @str)
1527+
end
1528+
1529+
# RFC3501
1530+
# status-att-list = status-att SP number *(SP status-att SP number)
1531+
# RFC4466, RFC9051, and RFC3501 Errata
1532+
# status-att-list = status-att-val *(SP status-att-val)
1533+
def status_att_list
1534+
attrs = [status_att_val]
1535+
while SP? do attrs << status_att_val end
1536+
attrs.to_h
1537+
end
1538+
1539+
# RFC3501 Errata:
1540+
# status-att-val = ("MESSAGES" SP number) / ("RECENT" SP number) /
1541+
# ("UIDNEXT" SP nz-number) / ("UIDVALIDITY" SP nz-number) /
1542+
# ("UNSEEN" SP number)
1543+
# RFC4466:
1544+
# status-att-val = ("MESSAGES" SP number) /
1545+
# ("RECENT" SP number) /
1546+
# ("UIDNEXT" SP nz-number) /
1547+
# ("UIDVALIDITY" SP nz-number) /
1548+
# ("UNSEEN" SP number)
1549+
# ;; Extensions to the STATUS responses
1550+
# ;; should extend this production.
1551+
# ;; Extensions should use the generic
1552+
# ;; syntax defined by tagged-ext.
1553+
# RFC9051:
1554+
# status-att-val = ("MESSAGES" SP number) /
1555+
# ("UIDNEXT" SP nz-number) /
1556+
# ("UIDVALIDITY" SP nz-number) /
1557+
# ("UNSEEN" SP number) /
1558+
# ("DELETED" SP number) /
1559+
# ("SIZE" SP number64)
1560+
# ; Extensions to the STATUS responses
1561+
# ; should extend this production.
1562+
# ; Extensions should use the generic
1563+
# ; syntax defined by tagged-ext.
1564+
# RFC7162:
1565+
# status-att-val =/ "HIGHESTMODSEQ" SP mod-sequence-valzer
1566+
# ;; Extends non-terminal defined in [RFC4466].
1567+
# ;; Value 0 denotes that the mailbox doesn't
1568+
# ;; support persistent mod-sequences
1569+
# ;; as described in Section 3.1.2.2.
1570+
# RFC7889:
1571+
# status-att-val =/ "APPENDLIMIT" SP (number / nil)
1572+
# ;; status-att-val is defined in RFC 4466
1573+
# RFC8438:
1574+
# status-att-val =/ "SIZE" SP number64
1575+
# RFC8474:
1576+
# status-att-val =/ "MAILBOXID" SP "(" objectid ")"
1577+
# ; follows tagged-ext production from [RFC4466]
1578+
def status_att_val
1579+
key = tagged_ext_label
1580+
SP!
1581+
val =
1582+
case key
1583+
when "MESSAGES" then number # RFC3501, RFC9051
1584+
when "UNSEEN" then number # RFC3501, RFC9051
1585+
when "DELETED" then number # RFC3501, RFC9051
1586+
when "UIDNEXT" then nz_number # RFC3501, RFC9051
1587+
when "UIDVALIDITY" then nz_number # RFC3501, RFC9051
1588+
when "RECENT" then number # RFC3501 (obsolete)
1589+
when "SIZE" then number64 # RFC8483, RFC9051
1590+
else
1591+
number? || ExtensionData.new(tagged_ext_val)
14151592
end
1416-
token = match(T_ATOM)
1417-
key = token.value.upcase
1418-
match(T_SPACE)
1419-
val = number
1420-
attr[key] = val
1421-
end
1422-
data = StatusData.new(mailbox, attr)
1423-
return UntaggedResponse.new(name, data, @str)
1593+
[key, val]
14241594
end
14251595

14261596
# The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here.

‎lib/net/imap/sequence_set.rb

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# frozen_string_literal: true
2+
3+
module Net
4+
class IMAP
5+
6+
##
7+
# An IMAP {sequence
8+
# set}[https://www.rfc-editor.org/rfc/rfc9051.html#section-4.1.1],
9+
# is a set of message sequence numbers or unique identifier numbers
10+
# ("UIDs"). It contains numbers and ranges of numbers. The numbers are all
11+
# non-zero unsigned 32-bit integers and one special value, <tt>*</tt>, that
12+
# represents the largest value in the mailbox.
13+
#
14+
# *NOTE:* This SequenceSet class is currently a placeholder for unhandled
15+
# extension data. All it does now is validate. It will be expanded to a
16+
# full API in a future release.
17+
class SequenceSet
18+
19+
def self.[](str) new(str).freeze end
20+
21+
def initialize(input)
22+
@atom = -String.try_convert(input)
23+
validate
24+
end
25+
26+
# Returns the IMAP string representation. In the IMAP grammar,
27+
# +sequence-set+ is a subset of +atom+ which is a subset of +astring+.
28+
attr_accessor :atom
29+
30+
# Returns #atom. In the IMAP grammar, +atom+ is a subset of +astring+.
31+
alias astring atom
32+
33+
# Returns the value of #atom
34+
alias to_s atom
35+
36+
# Hash equality requires the same encoded #atom representation.
37+
#
38+
# Net::IMAP::SequenceSet["1:3"] .eql? Net::IMAP::SequenceSet["1:3"] # => true
39+
# Net::IMAP::SequenceSet["1,2,3"].eql? Net::IMAP::SequenceSet["1:3"] # => false
40+
# Net::IMAP::SequenceSet["1,3"] .eql? Net::IMAP::SequenceSet["3,1"] # => false
41+
# Net::IMAP::SequenceSet["9,1:*"].eql? Net::IMAP::SequenceSet["1:*"] # => false
42+
#
43+
def eql?(other) self.class == other.class && atom == other.atom end
44+
alias == eql?
45+
46+
# See #eql?
47+
def hash; [self.class. atom].hash end
48+
49+
def inspect
50+
(frozen? ? "%s[%p]" : "#<%s %p>") % [self.class, to_s]
51+
end
52+
53+
# Unstable API, for internal use only (Net::IMAP#validate_data)
54+
def validate # :nodoc:
55+
ResponseParser::Patterns::SEQUENCE_SET_STR.match?(@atom) or
56+
raise ArgumentError, "invalid sequence-set: %p" % [input]
57+
true
58+
end
59+
60+
# Unstable API, for internal use only (Net::IMAP#send_data)
61+
def send_data(imap, tag) # :nodoc:
62+
imap.__send__(:put_string, atom)
63+
end
64+
65+
end
66+
end
67+
end

0 commit comments

Comments
 (0)