@@ -13,17 +13,17 @@ use crate::syntax_error::SyntaxError;
13
13
/// It is modelled after a Pratt Parser. For a gentle introduction to Pratt Parsing, see https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
14
14
pub struct Parser {
15
15
/// The ranges of the statements
16
- ranges : Vec < TextRange > ,
16
+ ranges : Vec < ( usize , usize ) > ,
17
17
/// The syntax errors accumulated during parsing
18
18
errors : Vec < SyntaxError > ,
19
19
/// The start of the current statement, if any
20
- current_stmt_start : Option < TextSize > ,
20
+ current_stmt_start : Option < usize > ,
21
21
/// The tokens to parse
22
22
pub tokens : Vec < Token > ,
23
23
24
24
eof_token : Token ,
25
25
26
- last_token_end : Option < TextSize > ,
26
+ next_pos : usize ,
27
27
}
28
28
29
29
/// Result of Building
@@ -46,66 +46,96 @@ impl Parser {
46
46
return !WHITESPACE_TOKENS . contains ( & t. kind )
47
47
|| ( t. kind == SyntaxKind :: Newline && t. text . chars ( ) . count ( ) > 1 ) ;
48
48
} )
49
- . rev ( )
50
49
. cloned ( )
51
50
. collect :: < Vec < _ > > ( ) ;
52
51
52
+ let eof_token = Token :: eof ( usize:: from (
53
+ tokens
54
+ . last ( )
55
+ . map ( |t| t. span . start ( ) )
56
+ . unwrap_or ( TextSize :: from ( 0 ) ) ,
57
+ ) ) ;
58
+
59
+ // next_pos should be the initialised with the first valid token already
60
+ let mut next_pos = 0 ;
61
+ loop {
62
+ let token = tokens. get ( next_pos) . unwrap_or ( & eof_token) ;
63
+
64
+ if is_irrelevant_token ( token) {
65
+ next_pos += 1 ;
66
+ } else {
67
+ break ;
68
+ }
69
+ }
70
+
53
71
Self {
54
72
ranges : Vec :: new ( ) ,
55
- eof_token : Token :: eof ( usize:: from (
56
- tokens
57
- . first ( )
58
- . map ( |t| t. span . start ( ) )
59
- . unwrap_or ( TextSize :: from ( 0 ) ) ,
60
- ) ) ,
73
+ eof_token,
61
74
errors : Vec :: new ( ) ,
62
75
current_stmt_start : None ,
63
76
tokens,
64
- last_token_end : None ,
77
+ next_pos ,
65
78
}
66
79
}
67
80
68
81
pub fn finish ( self ) -> Parse {
69
82
Parse {
70
- ranges : self . ranges ,
83
+ ranges : self
84
+ . ranges
85
+ . iter ( )
86
+ . map ( |( start, end) | {
87
+ println ! ( "{} {}" , start, end) ;
88
+ let from = self . tokens . get ( * start) ;
89
+ let to = self . tokens . get ( * end) . unwrap_or ( & self . eof_token ) ;
90
+
91
+ TextRange :: new ( from. unwrap ( ) . span . start ( ) , to. span . end ( ) )
92
+ } )
93
+ . collect ( ) ,
71
94
errors : self . errors ,
72
95
}
73
96
}
74
97
75
98
/// Start statement
76
- pub fn start_stmt ( & mut self ) -> Token {
99
+ pub fn start_stmt ( & mut self ) {
77
100
assert ! ( self . current_stmt_start. is_none( ) ) ;
78
-
79
- let token = self . peek ( ) ;
80
-
81
- self . current_stmt_start = Some ( token. span . start ( ) ) ;
82
-
83
- token
101
+ self . current_stmt_start = Some ( self . next_pos ) ;
84
102
}
85
103
86
104
/// Close statement
87
105
pub fn close_stmt ( & mut self ) {
88
- self . ranges . push ( TextRange :: new (
106
+ assert ! ( self . next_pos > 0 ) ;
107
+
108
+ self . ranges . push ( (
89
109
self . current_stmt_start . expect ( "Expected active statement" ) ,
90
- self . last_token_end . expect ( "Expected last token end" ) ,
110
+ self . next_pos - 1 ,
91
111
) ) ;
92
112
93
113
self . current_stmt_start = None ;
94
114
}
95
115
96
- fn advance ( & mut self ) -> Token {
97
- let token = self . tokens . pop ( ) . unwrap_or ( self . eof_token . clone ( ) ) ;
98
-
99
- self . last_token_end = Some ( token. span . end ( ) ) ;
100
-
101
- token
116
+ fn advance ( & mut self ) -> & Token {
117
+ let mut first_relevant_token = None ;
118
+ loop {
119
+ let token = self . tokens . get ( self . next_pos ) . unwrap_or ( & self . eof_token ) ;
120
+
121
+ // we need to continue with next_pos until the next relevant token after we already
122
+ // found the first one
123
+ if !is_irrelevant_token ( token) {
124
+ if let Some ( t) = first_relevant_token {
125
+ return t;
126
+ }
127
+ first_relevant_token = Some ( token) ;
128
+ }
129
+
130
+ self . next_pos += 1 ;
131
+ }
102
132
}
103
133
104
- fn peek ( & mut self ) -> Token {
105
- self . tokens
106
- . last ( )
107
- . cloned ( )
108
- . unwrap_or ( self . eof_token . clone ( ) )
134
+ fn peek ( & self ) -> & Token {
135
+ match self . tokens . get ( self . next_pos ) {
136
+ Some ( token ) => token ,
137
+ None => & self . eof_token ,
138
+ }
109
139
}
110
140
111
141
/// checks if the current token is of `kind` and advances if true
@@ -132,3 +162,8 @@ impl Parser {
132
162
todo ! ( ) ;
133
163
}
134
164
}
165
+
166
+ fn is_irrelevant_token ( t : & Token ) -> bool {
167
+ return WHITESPACE_TOKENS . contains ( & t. kind )
168
+ && ( t. kind != SyntaxKind :: Newline || t. text . chars ( ) . count ( ) == 1 ) ;
169
+ }
0 commit comments