Skip to content

Commit 76fffde

Browse files
committed
♻️ Refactor stringprep table generation
Tables are now generated into one table per file. By using autoload, this avoids needing to load *all* of the tables to access only one. Also, PROHIBIT regexps combining all of the prohibited tables have been compiled for the "SASLprep", "nameprep", and "trace" profiles (previously, only "SASLprep" had its own combined PROHIBIT regexp).
1 parent bd6bd14 commit 76fffde

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+1097
-602
lines changed

lib/net/imap/sasl/anonymous_authenticator.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class AnonymousAuthenticator
3636
# Any other keyword arguments are silently ignored.
3737
def initialize(anon_msg = nil, anonymous_message: nil, **)
3838
message = (anonymous_message || anon_msg || "").to_str
39-
@anonymous_message = StringPrep::Trace.stringprep_trace message
39+
@anonymous_message = IMAP::StringPrep::Trace.stringprep_trace message
4040
if (size = @anonymous_message&.length)&.> 255
4141
raise ArgumentError,
4242
"anonymous_message is too long. (%d codepoints)" % [size]

lib/net/imap/stringprep.rb

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -74,34 +74,35 @@ def self.[](table)
7474
# The above steps MUST be performed in the order given to comply with
7575
# this specification.
7676
#
77-
def stringprep(string,
78-
maps:,
79-
normalization:,
80-
prohibited:,
81-
**opts)
77+
def stringprep(string, maps:, normalization:, prohibited:, **opts)
8278
string = string.encode("UTF-8") # also dups (and raises invalid encoding)
83-
map_tables!(string, *maps) if maps
79+
map!(string, maps) if maps&.any?
8480
string.unicode_normalize!(normalization) if normalization
8581
check_prohibited!(string, *prohibited, **opts) if prohibited
8682
string
8783
end
8884

89-
def map_tables!(string, *tables)
90-
tables.each do |table|
91-
regexp, replacements = Tables::MAPPINGS.fetch(table)
92-
string.gsub!(regexp, replacements)
85+
def map!(string, mappings)
86+
mappings.each do |mapping|
87+
mapping = Tables::MAPPINGS.fetch(mapping) if mapping.is_a?(String)
88+
string.gsub!(*mapping)
9389
end
9490
string
9591
end
9692

93+
def map_tables!(string, *tables)
94+
warn "map_tables! is deprecated. Use map! instead."
95+
map!(string, tables.map { Tables::MAPPINGS.fetch(table) })
96+
end
97+
9798
# Checks +string+ for any codepoint in +tables+. Raises a
9899
# ProhibitedCodepoint describing the first matching table.
99100
#
100-
# Also checks bidirectional characters, when <tt>bidi: true</tt>, which may
101-
# raise a BidiStringError.
101+
# Also checks bidirectional characters, when <tt>bidi: true</tt>, which
102+
# may raise a BidiStringError.
102103
#
103-
# +profile+ is an optional string which will be added to any exception that
104-
# is raised (it does not affect behavior).
104+
# +profile+ is an optional string which will be added to any exception
105+
# that is raised (it does not affect behavior).
105106
def check_prohibited!(string,
106107
*tables,
107108
bidi: false,

lib/net/imap/stringprep/saslprep.rb

Lines changed: 95 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,60 +4,126 @@ module Net
44
class IMAP
55
module StringPrep
66

7-
# SASLprep#saslprep can be used to prepare a string according to [RFC4013].
7+
# SASLprep#saslprep can be used to prepare a string according to
8+
# RFC4013[https://tools.ietf.org/html/rfc4013].
89
#
910
# \SASLprep maps characters three ways: to nothing, to space, and Unicode
1011
# normalization form KC. \SASLprep prohibits codepoints from nearly all
11-
# standard StringPrep tables (RFC3454, Appendix "C"), and uses
12+
# standard StringPrep tables
13+
# (RFC3454[https://tools.ietf.org/html/rfc3454], Appendix "C"), and uses
1214
# \StringPrep's standard bidirectional characters requirements (Appendix
1315
# "D"). \SASLprep also uses \StringPrep's definition of "Unassigned"
1416
# codepoints (Appendix "A").
1517
module SASLprep
18+
# Avoid loading these tables unless they are needed (for non-ASCII).
19+
autoload :PROHIBITED_OUTPUT, "#{__dir__}/saslprep/prohibited.rb"
20+
autoload :PROHIBITED_OUTPUT_STORED, "#{__dir__}/saslprep/prohibited.rb"
21+
autoload :PROHIBITED, "#{__dir__}/saslprep/prohibited.rb"
22+
autoload :PROHIBITED_STORED, "#{__dir__}/saslprep/prohibited.rb"
23+
24+
# Defined in RFC4013[https://tools.ietf.org/html/rfc4013].
25+
STRINGPREP_PROFILE = "SASLprep"
1626

1727
# Used to short-circuit strings that don't need preparation.
1828
ASCII_NO_CTRLS = /\A[\x20-\x7e]*\z/u.freeze
1929

20-
# Avoid loading these tables unless they are needed (they are only
21-
# needed for non-ASCII).
22-
saslprep_tables = File.expand_path("saslprep_tables", __dir__)
23-
autoload :MAP_TO_NOTHING, saslprep_tables
24-
autoload :MAP_TO_SPACE, saslprep_tables
25-
autoload :PROHIBITED, saslprep_tables
26-
autoload :PROHIBITED_STORED, saslprep_tables
27-
autoload :TABLES_PROHIBITED, saslprep_tables
28-
autoload :TABLES_PROHIBITED_STORED, saslprep_tables
30+
# Regexp for RFC4013[https://tools.ietf.org/html/rfc4013] §2.1 Mapping -
31+
# mapped to space
32+
MAP_TO_SPACE = Tables::IN_C_1_2
33+
34+
# Regexp for RFC4013[https://tools.ietf.org/html/rfc4013] §2.1 Mapping -
35+
# mapped to nothing
36+
MAP_TO_NOTHING = Tables::IN_B_1
37+
38+
# RFC4013[https://tools.ietf.org/html/rfc4013] §2.1 Mapping
39+
# >>>
40+
# This profile specifies:
41+
# - non-ASCII space characters (\StringPrep\[\"C.1.2\"]) that can
42+
# be mapped to SPACE (U+0020)
43+
# - the "commonly mapped to nothing" characters
44+
# (\StringPrep\[\"B.1\"]) that can be mapped to nothing.
45+
MAPPINGS = {
46+
MAP_TO_SPACE => " ",
47+
MAP_TO_NOTHING => "",
48+
}.freeze
49+
50+
# RFC4013[https://tools.ietf.org/html/rfc4013] §2.2 Normalization
51+
# >>>
52+
# This profile specifies using Unicode normalization form KC, as
53+
# described in Section 4 of [StringPrep].
54+
NORMALIZATION = :nfkc
55+
56+
# RFC4013[https://tools.ietf.org/html/rfc4013] §2.3 Prohibited Output
57+
# >>>
58+
# * Non-ASCII space characters — \StringPrep\[\"C.1.2\"]
59+
# * ASCII control characters — \StringPrep\[\"C.2.1\"]
60+
# * Non-ASCII control characters — \StringPrep\[\"C.2.2\"]
61+
# * Private Use characters — \StringPrep\[\"C.3\"]
62+
# * Non-character code points — \StringPrep\[\"C.4\"]
63+
# * Surrogate code points — \StringPrep\[\"C.5\"]
64+
# * Inappropriate for plain text characters — \StringPrep\[\"C.6\"]
65+
# * Inappropriate for canonical representation characters — \StringPrep\[\"C.7\"]
66+
# * Change display properties or deprecated characters — \StringPrep\[\"C.8\"]
67+
# * Tagging characters — \StringPrep\[\"C.9\"]
68+
PROHIBITED_TABLES = %w[C.1.2 C.2.1 C.2.2 C.3 C.4 C.5 C.6 C.7 C.8 C.9]
69+
.freeze
70+
71+
# RFC4013[https://tools.ietf.org/html/rfc4013] §2.4 Bidirectional
72+
# Characters
73+
# >>>
74+
# This profile specifies checking bidirectional strings as described
75+
# in [StringPrep, Section 6].
76+
CHECK_BIDI = true
77+
78+
# RFC4013[https://tools.ietf.org/html/rfc4013] §2.5 Unassigned Code
79+
# Points
80+
# >>>
81+
# This profile specifies the \StringPrep\[\"A.1\"] table as its
82+
# list of unassigned code points.
83+
UNASSIGNED_TABLE = "A.1"
84+
85+
# :nodoc:
86+
UNASSIGNED = Tables::IN_A_1
87+
deprecate_constant :UNASSIGNED
2988

3089
module_function
3190

3291
# Prepares a UTF-8 +string+ for comparison, using the \SASLprep profile
33-
# RFC4013 of the StringPrep algorithm RFC3454.
92+
# {[RFC4013]}[https://tools.ietf.org/html/rfc4013] of the StringPrep
93+
# algorithm {[RFC3454]}[https://tools.ietf.org/html/rfc3454].
3494
#
3595
# By default, prohibited strings will return +nil+. When +exception+ is
3696
# +true+, a StringPrepError describing the violation will be raised.
3797
#
3898
# When +stored+ is +true+, "unassigned" codepoints will be prohibited.
3999
# For \StringPrep and the \SASLprep profile, "unassigned" refers to
40-
# Unicode 3.2, and not later versions. See RFC3454 §7 for more
100+
# Unicode 3.2, and not later versions. See RFC3454[https://tools.ietf.org/html/rfc3454] §7 for more
41101
# information.
42-
def saslprep(str, stored: false, exception: false)
43-
return str if ASCII_NO_CTRLS.match?(str) # incompatible encoding raises
44-
str = str.encode("UTF-8") # also dups (and raises for invalid encoding)
45-
str.gsub!(MAP_TO_SPACE, " ")
46-
str.gsub!(MAP_TO_NOTHING, "")
47-
str.unicode_normalize!(:nfkc)
48-
# These regexps combine the prohibited and bidirectional checks
49-
return str unless str.match?(stored ? PROHIBITED_STORED : PROHIBITED)
50-
return nil unless exception
51-
# raise helpful errors to indicate *why* it failed:
52-
tables = stored ? TABLES_PROHIBITED_STORED : TABLES_PROHIBITED
53-
StringPrep.check_prohibited! str, *tables, bidi: true, profile: "SASLprep"
54-
raise InvalidStringError.new(
55-
"unknown error", string: string, profile: "SASLprep"
56-
)
102+
def saslprep(original, stored: false, exception: false)
103+
return original if ASCII_NO_CTRLS.match?(original) # incompatible encoding raises
104+
if exception
105+
StringPrep.stringprep(
106+
original,
107+
unassigned: UNASSIGNED_TABLE,
108+
maps: MAPPINGS,
109+
prohibited: PROHIBITED_TABLES,
110+
normalization: NORMALIZATION,
111+
bidi: CHECK_BIDI,
112+
stored: stored,
113+
profile: STRINGPREP_PROFILE,
114+
)
115+
else
116+
str = original.encode("UTF-8") # also dups (and raises for invalid encoding)
117+
str.gsub!(MAP_TO_SPACE, " ")
118+
str.gsub!(MAP_TO_NOTHING, "")
119+
str.unicode_normalize!(:nfkc)
120+
str unless str.match?(stored ? PROHIBITED_STORED : PROHIBITED)
121+
end
57122
rescue ArgumentError, Encoding::CompatibilityError => ex
58123
if /invalid byte sequence|incompatible encoding/.match? ex.message
59124
return nil unless exception
60-
raise StringPrepError.new(ex.message, string: str, profile: "saslprep")
125+
raise StringPrepError.new(ex.message, string: str,
126+
profile: STRINGPREP_PROFILE)
61127
end
62128
raise ex
63129
end
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# frozen_string_literal: true
2+
3+
module Net::IMAP::StringPrep
4+
5+
module SASLprep
6+
7+
# :nodoc:
8+
PROHIBITED_OUTPUT = Tables::SASLPREP_PROHIBIT
9+
10+
# :nodoc:
11+
PROHIBITED_OUTPUT_STORED = Tables::SASLPREP_PROHIBIT_STORED
12+
13+
# :nodoc:
14+
PROHIBITED = Regexp.union(PROHIBITED_OUTPUT, Tables::BIDI_FAILURE)
15+
16+
# :nodoc:
17+
PROHIBITED_STORED = Regexp.union(
18+
PROHIBITED_OUTPUT_STORED, Tables::BIDI_FAILURE,
19+
)
20+
21+
end
22+
end

lib/net/imap/stringprep/tables.rb

Lines changed: 36 additions & 135 deletions
Large diffs are not rendered by default.
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# frozen_string_literal: true
2+
3+
#--
4+
# This file is generated by `rake stringprep:tables`. Don't edit directly.
5+
#++
6+
7+
module Net::IMAP::StringPrep
8+
module Tables
9+
10+
BIDI_DESC_REQ2 = "A string with RandALCat characters must not contain LCat characters."
11+
12+
end
13+
end
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# frozen_string_literal: true
2+
3+
#--
4+
# This file is generated by `rake stringprep:tables`. Don't edit directly.
5+
#++
6+
7+
module Net::IMAP::StringPrep
8+
module Tables
9+
10+
BIDI_DESC_REQ3 = "A string with RandALCat characters must start and end with RandALCat characters."
11+
12+
end
13+
end

0 commit comments

Comments
 (0)