-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathquoted.rb
163 lines (152 loc) · 4.55 KB
/
quoted.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
module Tokens
class Quoted
CSI = 92.chr
ESC = 27.chr
TAB = 9.chr
LF = 10.chr
PCT="%"
LC=?a .. ?z
UC=?A .. ?Z
DIGITS=?0..?9
DQ='#'[0]
def self.escaped(s,q = DQ)
return nil if s.peek == q
e = s.get
if e == CSI
raised "Unexpected EOF" if !s.peek
e = s.get
case e
when 'e'
return ESC
when 't'
return TAB
when 'n'
return LF
else
return e
end
end
return e
end
HASH = "#"
def self.expect_dquoted(s,q='"')
ret = nil
buf = ""
while (e = escaped(s,q[0]));
if e == "#" && s.peek == ?{
# Uh-oh. String interpolation
#
# We'll need to do something dirty here:
#
# We will call back into the main parser... UGLY.
#
# We need to do this because you need to do a full
# parse to actually know when the string interpolation
# ends, since it can be recursive (!)
#
# This has an ugly impact: We need to get the parser
# object from somewhere. We'll pass that as a block.
#
# We will also need to return something other than a plain
# string. We'll return [:concat, string, fragments, one, by, one]
# where the fragments can be strings or expressions.
#
# NOTE: There's a semi-obvious optimization here that is
# NOT universally safe: Any seeming constant expression
# could result in the concatenation done at compile time.
# For 99.99% of apps this would be safe, but in Ruby some
# moron *could* overload methods and make the seemingly
# constant expression have side effects. We'll likely have
# an option to do this optimization (with some safety checks,
# but for correctness we also need to be able to turn the
# :concat into [:callm, original-string, :concat] or similar.
#
if !block_given?
STDERR.puts "WARNING: String interpolation requires passing block to Quoted.expect"
else
if !ret
ret = [:concat]
end
#ret ||= [:concat]
ret << buf
buf = ""
s.get
ret << yield
s.expect_str("}")
end
else
buf << e
end
end
raise "Unterminated string" if !s.expect(q)
if ret
ret << buf if buf != ""
return ret
else
return buf
end
end
def self.expect_squoted(s,q = "'" )
buf = ""
while (e = s.get) && e != q
if e == "\\" && (s.peek == ?' || s.peek == "\\"[0])
buf << "\\" + s.get
else
buf << e
end
end
raise "Unterminated string" if e != "'"
return buf
end
def self.expect(s,&block)
q = s.expect('"') || s.expect("'") || s.expect("%") or return nil
# Handle "special" quoted syntaxes. Currently we only handle generalized quoted
# strings, no backticks, regexps etc.. Examples:
# %() - double quote
# %q/string/ - generalized single quote
# %Q|String| - generalized double quote
# (etc. - the opening/close characters can be more or less anything. If (,[,{
# the closing char is the counterpart - ),],}.)
#
# Note that we're explicitly *NOT* handling "%" followed by a whitespace character.
# Not sure I ever want to, even though it's valid Ruby. Never seen it in the wild,
# and it's horrendously evil: "1 % 2" is an expression. "% 2 " is the string "2".
# "'1' + % 2 " is the string "12".
#
# For now we're doing % followed by a non-alphanumeric, non-whitespace
# character.
words = false
dquoted = true
if q == "'"
dquoted = false
elsif q == PCT
c = s.peek
case c
when ?q
dquoted = false
s.get
when ?Q
s.get
when ?w
words = true
s.get
when LC,UC
s.unget(c)
return nil
# FIXME: Separating this from above due to compiler bug
when DIGITS
s.unget(c)
return nil
end
q = s.get
if q == "(" then q = ")"
elsif q == "{" then q = "}"
elsif q == "[" then q = "]"
end
end
r = dquoted ? expect_dquoted(s,q,&block) : expect_squoted(s,q)
r = [:array].concat(r.split(" ")) if words
r
end
end
end