Skip to content

Commit 3849cf7

Browse files
committed
refactor: parser now uses a pointer into the token vector instead of popping and cloning
1 parent 58c0374 commit 3849cf7

File tree

2 files changed

+75
-33
lines changed

2 files changed

+75
-33
lines changed

crates/pg_statement_splitter/src/parser.rs

Lines changed: 67 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,17 @@ use crate::syntax_error::SyntaxError;
1313
/// It is modelled after a Pratt Parser. For a gentle introduction to Pratt Parsing, see https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
1414
pub struct Parser {
1515
/// The ranges of the statements
16-
ranges: Vec<TextRange>,
16+
ranges: Vec<(usize, usize)>,
1717
/// The syntax errors accumulated during parsing
1818
errors: Vec<SyntaxError>,
1919
/// The start of the current statement, if any
20-
current_stmt_start: Option<TextSize>,
20+
current_stmt_start: Option<usize>,
2121
/// The tokens to parse
2222
pub tokens: Vec<Token>,
2323

2424
eof_token: Token,
2525

26-
last_token_end: Option<TextSize>,
26+
next_pos: usize,
2727
}
2828

2929
/// Result of Building
@@ -46,66 +46,96 @@ impl Parser {
4646
return !WHITESPACE_TOKENS.contains(&t.kind)
4747
|| (t.kind == SyntaxKind::Newline && t.text.chars().count() > 1);
4848
})
49-
.rev()
5049
.cloned()
5150
.collect::<Vec<_>>();
5251

52+
let eof_token = Token::eof(usize::from(
53+
tokens
54+
.last()
55+
.map(|t| t.span.start())
56+
.unwrap_or(TextSize::from(0)),
57+
));
58+
59+
// next_pos should be the initialised with the first valid token already
60+
let mut next_pos = 0;
61+
loop {
62+
let token = tokens.get(next_pos).unwrap_or(&eof_token);
63+
64+
if is_irrelevant_token(token) {
65+
next_pos += 1;
66+
} else {
67+
break;
68+
}
69+
}
70+
5371
Self {
5472
ranges: Vec::new(),
55-
eof_token: Token::eof(usize::from(
56-
tokens
57-
.first()
58-
.map(|t| t.span.start())
59-
.unwrap_or(TextSize::from(0)),
60-
)),
73+
eof_token,
6174
errors: Vec::new(),
6275
current_stmt_start: None,
6376
tokens,
64-
last_token_end: None,
77+
next_pos,
6578
}
6679
}
6780

6881
pub fn finish(self) -> Parse {
6982
Parse {
70-
ranges: self.ranges,
83+
ranges: self
84+
.ranges
85+
.iter()
86+
.map(|(start, end)| {
87+
println!("{} {}", start, end);
88+
let from = self.tokens.get(*start);
89+
let to = self.tokens.get(*end).unwrap_or(&self.eof_token);
90+
91+
TextRange::new(from.unwrap().span.start(), to.span.end())
92+
})
93+
.collect(),
7194
errors: self.errors,
7295
}
7396
}
7497

7598
/// Start statement
76-
pub fn start_stmt(&mut self) -> Token {
99+
pub fn start_stmt(&mut self) {
77100
assert!(self.current_stmt_start.is_none());
78-
79-
let token = self.peek();
80-
81-
self.current_stmt_start = Some(token.span.start());
82-
83-
token
101+
self.current_stmt_start = Some(self.next_pos);
84102
}
85103

86104
/// Close statement
87105
pub fn close_stmt(&mut self) {
88-
self.ranges.push(TextRange::new(
106+
assert!(self.next_pos > 0);
107+
108+
self.ranges.push((
89109
self.current_stmt_start.expect("Expected active statement"),
90-
self.last_token_end.expect("Expected last token end"),
110+
self.next_pos - 1,
91111
));
92112

93113
self.current_stmt_start = None;
94114
}
95115

96-
fn advance(&mut self) -> Token {
97-
let token = self.tokens.pop().unwrap_or(self.eof_token.clone());
98-
99-
self.last_token_end = Some(token.span.end());
100-
101-
token
116+
fn advance(&mut self) -> &Token {
117+
let mut first_relevant_token = None;
118+
loop {
119+
let token = self.tokens.get(self.next_pos).unwrap_or(&self.eof_token);
120+
121+
// we need to continue with next_pos until the next relevant token after we already
122+
// found the first one
123+
if !is_irrelevant_token(token) {
124+
if let Some(t) = first_relevant_token {
125+
return t;
126+
}
127+
first_relevant_token = Some(token);
128+
}
129+
130+
self.next_pos += 1;
131+
}
102132
}
103133

104-
fn peek(&mut self) -> Token {
105-
self.tokens
106-
.last()
107-
.cloned()
108-
.unwrap_or(self.eof_token.clone())
134+
fn peek(&self) -> &Token {
135+
match self.tokens.get(self.next_pos) {
136+
Some(token) => token,
137+
None => &self.eof_token,
138+
}
109139
}
110140

111141
/// checks if the current token is of `kind` and advances if true
@@ -132,3 +162,8 @@ impl Parser {
132162
todo!();
133163
}
134164
}
165+
166+
fn is_irrelevant_token(t: &Token) -> bool {
167+
return WHITESPACE_TOKENS.contains(&t.kind)
168+
&& (t.kind != SyntaxKind::Newline || t.text.chars().count() == 1);
169+
}

crates/pg_statement_splitter/src/parser/common.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,14 @@ pub(crate) fn unknown(p: &mut Parser) {
7777
loop {
7878
match p.peek() {
7979
Token {
80-
kind: SyntaxKind::Newline | SyntaxKind::Ascii59 | SyntaxKind::Eof,
80+
kind: SyntaxKind::Ascii59,
81+
..
82+
} => {
83+
p.advance();
84+
break;
85+
}
86+
Token {
87+
kind: SyntaxKind::Newline | SyntaxKind::Eof,
8188
..
8289
} => {
8390
break;

0 commit comments

Comments
 (0)