@@ -4,60 +4,126 @@ module Net
4
4
class IMAP
5
5
module StringPrep
6
6
7
- # SASLprep#saslprep can be used to prepare a string according to [RFC4013].
7
+ # SASLprep#saslprep can be used to prepare a string according to
8
+ # RFC4013[https://tools.ietf.org/html/rfc4013].
8
9
#
9
10
# \SASLprep maps characters three ways: to nothing, to space, and Unicode
10
11
# normalization form KC. \SASLprep prohibits codepoints from nearly all
11
- # standard StringPrep tables (RFC3454, Appendix "C"), and uses
12
+ # standard StringPrep tables
13
+ # (RFC3454[https://tools.ietf.org/html/rfc3454], Appendix "C"), and uses
12
14
# \StringPrep's standard bidirectional characters requirements (Appendix
13
15
# "D"). \SASLprep also uses \StringPrep's definition of "Unassigned"
14
16
# codepoints (Appendix "A").
15
17
module SASLprep
18
+ # Avoid loading these tables unless they are needed (for non-ASCII).
19
+ autoload :PROHIBITED_OUTPUT , "#{ __dir__ } /saslprep/prohibited.rb"
20
+ autoload :PROHIBITED_OUTPUT_STORED , "#{ __dir__ } /saslprep/prohibited.rb"
21
+ autoload :PROHIBITED , "#{ __dir__ } /saslprep/prohibited.rb"
22
+ autoload :PROHIBITED_STORED , "#{ __dir__ } /saslprep/prohibited.rb"
23
+
24
+ # Defined in RFC4013[https://tools.ietf.org/html/rfc4013].
25
+ STRINGPREP_PROFILE = "SASLprep"
16
26
17
27
# Used to short-circuit strings that don't need preparation.
18
28
ASCII_NO_CTRLS = /\A [\x20 -\x7e ]*\z /u . freeze
19
29
20
- # Avoid loading these tables unless they are needed (they are only
21
- # needed for non-ASCII).
22
- saslprep_tables = File . expand_path ( "saslprep_tables" , __dir__ )
23
- autoload :MAP_TO_NOTHING , saslprep_tables
24
- autoload :MAP_TO_SPACE , saslprep_tables
25
- autoload :PROHIBITED , saslprep_tables
26
- autoload :PROHIBITED_STORED , saslprep_tables
27
- autoload :TABLES_PROHIBITED , saslprep_tables
28
- autoload :TABLES_PROHIBITED_STORED , saslprep_tables
30
+ # Regexp for RFC4013[https://tools.ietf.org/html/rfc4013] §2.1 Mapping -
31
+ # mapped to space
32
+ MAP_TO_SPACE = Tables ::IN_C_1_2
33
+
34
+ # Regexp for RFC4013[https://tools.ietf.org/html/rfc4013] §2.1 Mapping -
35
+ # mapped to nothing
36
+ MAP_TO_NOTHING = Tables ::IN_B_1
37
+
38
+ # RFC4013[https://tools.ietf.org/html/rfc4013] §2.1 Mapping
39
+ # >>>
40
+ # This profile specifies:
41
+ # - non-ASCII space characters (\StringPrep\[\"C.1.2\"]) that can
42
+ # be mapped to SPACE (U+0020)
43
+ # - the "commonly mapped to nothing" characters
44
+ # (\StringPrep\[\"B.1\"]) that can be mapped to nothing.
45
+ MAPPINGS = {
46
+ MAP_TO_SPACE => " " ,
47
+ MAP_TO_NOTHING => "" ,
48
+ } . freeze
49
+
50
+ # RFC4013[https://tools.ietf.org/html/rfc4013] §2.2 Normalization
51
+ # >>>
52
+ # This profile specifies using Unicode normalization form KC, as
53
+ # described in Section 4 of [StringPrep].
54
+ NORMALIZATION = :nfkc
55
+
56
+ # RFC4013[https://tools.ietf.org/html/rfc4013] §2.3 Prohibited Output
57
+ # >>>
58
+ # * Non-ASCII space characters — \StringPrep\[\"C.1.2\"]
59
+ # * ASCII control characters — \StringPrep\[\"C.2.1\"]
60
+ # * Non-ASCII control characters — \StringPrep\[\"C.2.2\"]
61
+ # * Private Use characters — \StringPrep\[\"C.3\"]
62
+ # * Non-character code points — \StringPrep\[\"C.4\"]
63
+ # * Surrogate code points — \StringPrep\[\"C.5\"]
64
+ # * Inappropriate for plain text characters — \StringPrep\[\"C.6\"]
65
+ # * Inappropriate for canonical representation characters — \StringPrep\[\"C.7\"]
66
+ # * Change display properties or deprecated characters — \StringPrep\[\"C.8\"]
67
+ # * Tagging characters — \StringPrep\[\"C.9\"]
68
+ PROHIBITED_TABLES = %w[ C.1.2 C.2.1 C.2.2 C.3 C.4 C.5 C.6 C.7 C.8 C.9 ]
69
+ . freeze
70
+
71
+ # RFC4013[https://tools.ietf.org/html/rfc4013] §2.4 Bidirectional
72
+ # Characters
73
+ # >>>
74
+ # This profile specifies checking bidirectional strings as described
75
+ # in [StringPrep, Section 6].
76
+ CHECK_BIDI = true
77
+
78
+ # RFC4013[https://tools.ietf.org/html/rfc4013] §2.5 Unassigned Code
79
+ # Points
80
+ # >>>
81
+ # This profile specifies the \StringPrep\[\"A.1\"] table as its
82
+ # list of unassigned code points.
83
+ UNASSIGNED_TABLE = "A.1"
84
+
85
+ # :nodoc:
86
+ UNASSIGNED = Tables ::IN_A_1
87
+ deprecate_constant :UNASSIGNED
29
88
30
89
module_function
31
90
32
91
# Prepares a UTF-8 +string+ for comparison, using the \SASLprep profile
33
- # RFC4013 of the StringPrep algorithm RFC3454.
92
+ # {[RFC4013]}[https://tools.ietf.org/html/rfc4013] of the StringPrep
93
+ # algorithm {[RFC3454]}[https://tools.ietf.org/html/rfc3454].
34
94
#
35
95
# By default, prohibited strings will return +nil+. When +exception+ is
36
96
# +true+, a StringPrepError describing the violation will be raised.
37
97
#
38
98
# When +stored+ is +true+, "unassigned" codepoints will be prohibited.
39
99
# For \StringPrep and the \SASLprep profile, "unassigned" refers to
40
- # Unicode 3.2, and not later versions. See RFC3454 §7 for more
100
+ # Unicode 3.2, and not later versions. See RFC3454[https://tools.ietf.org/html/rfc3454] §7 for more
41
101
# information.
42
- def saslprep ( str , stored : false , exception : false )
43
- return str if ASCII_NO_CTRLS . match? ( str ) # incompatible encoding raises
44
- str = str . encode ( "UTF-8" ) # also dups (and raises for invalid encoding)
45
- str . gsub! ( MAP_TO_SPACE , " " )
46
- str . gsub! ( MAP_TO_NOTHING , "" )
47
- str . unicode_normalize! ( :nfkc )
48
- # These regexps combine the prohibited and bidirectional checks
49
- return str unless str . match? ( stored ? PROHIBITED_STORED : PROHIBITED )
50
- return nil unless exception
51
- # raise helpful errors to indicate *why* it failed:
52
- tables = stored ? TABLES_PROHIBITED_STORED : TABLES_PROHIBITED
53
- StringPrep . check_prohibited! str , *tables , bidi : true , profile : "SASLprep"
54
- raise InvalidStringError . new (
55
- "unknown error" , string : string , profile : "SASLprep"
56
- )
102
+ def saslprep ( original , stored : false , exception : false )
103
+ return original if ASCII_NO_CTRLS . match? ( original ) # incompatible encoding raises
104
+ if exception
105
+ StringPrep . stringprep (
106
+ original ,
107
+ unassigned : UNASSIGNED_TABLE ,
108
+ maps : MAPPINGS ,
109
+ prohibited : PROHIBITED_TABLES ,
110
+ normalization : NORMALIZATION ,
111
+ bidi : CHECK_BIDI ,
112
+ stored : stored ,
113
+ profile : STRINGPREP_PROFILE ,
114
+ )
115
+ else
116
+ str = original . encode ( "UTF-8" ) # also dups (and raises for invalid encoding)
117
+ str . gsub! ( MAP_TO_SPACE , " " )
118
+ str . gsub! ( MAP_TO_NOTHING , "" )
119
+ str . unicode_normalize! ( :nfkc )
120
+ str unless str . match? ( stored ? PROHIBITED_STORED : PROHIBITED )
121
+ end
57
122
rescue ArgumentError , Encoding ::CompatibilityError => ex
58
123
if /invalid byte sequence|incompatible encoding/ . match? ex . message
59
124
return nil unless exception
60
- raise StringPrepError . new ( ex . message , string : str , profile : "saslprep" )
125
+ raise StringPrepError . new ( ex . message , string : str ,
126
+ profile : STRINGPREP_PROFILE )
61
127
end
62
128
raise ex
63
129
end
0 commit comments