Skip to content

Commit 9ae67e0

Browse files
committed
fix: save
1 parent 79e2810 commit 9ae67e0

File tree

6 files changed

+230
-208
lines changed

6 files changed

+230
-208
lines changed
Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,17 @@
11
///! Postgres Statement Splitter
22
///!
33
///! This crate provides a function to split a SQL source string into individual statements.
4-
5-
mod data;
6-
mod split;
74
mod parser;
85
mod syntax_error;
96

10-
use parser::{Parse, Parser};
7+
use parser::{source, Parse, Parser};
118

12-
use pg_lexer::{lex};
13-
use split::parse_source;
9+
use pg_lexer::lex;
1410

1511
pub fn split(sql: &str) -> Parse {
1612
let mut parser = Parser::new(lex(sql));
1713

18-
parse_source(&mut parser);
14+
source(&mut parser);
1915

2016
parser.finish()
2117
}
@@ -25,8 +21,8 @@ mod tests {
2521
use super::*;
2622

2723
#[test]
28-
fn test_splitter() {
29-
let input = "select 1 from contact;\nselect 1;";
24+
fn basic() {
25+
let input = "select 1 from contact; select 1;";
3026

3127
let res = split(input);
3228
assert_eq!(res.ranges.len(), 2);
@@ -35,7 +31,7 @@ mod tests {
3531
}
3632

3733
#[test]
38-
fn test_splitter_no_semicolons() {
34+
fn no_semicolons() {
3935
let input = "select 1 from contact\nselect 1";
4036

4137
let res = split(input);
@@ -45,13 +41,16 @@ mod tests {
4541
}
4642

4743
#[test]
48-
fn test_splitter_double_newlines() {
44+
fn double_newlines() {
4945
let input = "select 1 from contact\nselect 1\n\nalter table t add column c int";
5046

5147
let res = split(input);
5248
assert_eq!(res.ranges.len(), 3);
5349
assert_eq!("select 1 from contact", input[res.ranges[0]].to_string());
5450
assert_eq!("select 1", input[res.ranges[1]].to_string());
55-
assert_eq!("alter table t add column c int", input[res.ranges[2]].to_string());
51+
assert_eq!(
52+
"alter table t add column c int",
53+
input[res.ranges[2]].to_string()
54+
);
5655
}
5756
}

crates/pg_statement_splitter/src/parser.rs

Lines changed: 82 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
mod common;
2+
mod data;
3+
mod dml;
4+
5+
pub use common::source;
6+
17
use std::cmp::min;
28

39
use pg_lexer::{SyntaxKind, Token, TokenType, WHITESPACE_TOKENS};
@@ -52,7 +58,7 @@ impl Parser {
5258
.iter()
5359
.map(|(start, end)| {
5460
let from = self.tokens.get(*start);
55-
let to = self.tokens.get(end - 1);
61+
let to = self.tokens.get(*end);
5662
// get text range from token range
5763
let text_start = from.unwrap().span.start();
5864
let text_end = to.unwrap().span.end();
@@ -64,13 +70,17 @@ impl Parser {
6470
}
6571
}
6672

67-
/// Start statement at last non-whitespace token
73+
/// Start statement at next non-whitespace token
6874
pub fn start_stmt(&mut self) {
6975
assert!(self.current_stmt_start.is_none());
7076

7177
if let Some(whitespace_token_buffer) = self.whitespace_token_buffer {
7278
self.current_stmt_start = Some(whitespace_token_buffer);
7379
} else {
80+
while self.nth(0, false).token_type == TokenType::Whitespace {
81+
self.advance(false);
82+
}
83+
7484
self.current_stmt_start = Some(self.pos);
7585
}
7686
}
@@ -79,6 +89,13 @@ impl Parser {
7989
pub fn close_stmt(&mut self) {
8090
assert!(self.current_stmt_start.is_some());
8191

92+
println!(
93+
"Closing statement {:?} / {:?}: {:?}",
94+
self.whitespace_token_buffer,
95+
self.pos,
96+
self.tokens.get(self.pos)
97+
);
98+
8299
self.ranges.push((
83100
self.current_stmt_start.unwrap(),
84101
self.whitespace_token_buffer.unwrap_or(self.pos),
@@ -88,71 +105,66 @@ impl Parser {
88105
}
89106

90107
/// applies token and advances
91-
pub fn advance(&mut self) {
92-
assert!(!self.eof());
108+
///
109+
/// if `ignore_whitespace` is true, it will advance the next non-whitespace token
110+
pub fn advance(&mut self, ignore_whitespace: bool) {
111+
assert!(!self.eof(ignore_whitespace));
112+
113+
loop {
114+
let whitespace = match self.nth(0, false).kind {
115+
SyntaxKind::Whitespace => {
116+
if self.whitespace_token_buffer.is_none() {
117+
self.whitespace_token_buffer = Some(self.pos);
118+
}
93119

94-
if self.nth(0).kind == SyntaxKind::Whitespace {
95-
if self.whitespace_token_buffer.is_none() {
96-
self.whitespace_token_buffer = Some(self.pos);
120+
true
121+
}
122+
_ => {
123+
self.whitespace_token_buffer = None;
124+
125+
false
126+
}
127+
};
128+
129+
self.pos += 1;
130+
131+
if !whitespace || !ignore_whitespace {
132+
break;
97133
}
98-
} else {
99-
self.flush_token_buffer();
100134
}
101-
self.pos += 1;
102135
}
103136

104137
/// checks if the current token is of `kind` and advances if true
105138
/// returns true if the current token is of `kind`
106-
pub fn eat(&mut self, kind: SyntaxKind) -> bool {
107-
if self.nth(0).kind == kind {
108-
self.advance();
139+
pub fn eat(&mut self, kind: SyntaxKind, ignore_whitespace: bool) -> bool {
140+
if self.nth(1, ignore_whitespace).kind == kind {
141+
println!("Eating {:?}", kind);
142+
self.advance(ignore_whitespace);
109143
true
110144
} else {
111145
false
112146
}
113147
}
114148

115149
pub fn at_whitespace(&self) -> bool {
116-
self.nth(0).kind == SyntaxKind::Whitespace
150+
self.nth(0, false).kind == SyntaxKind::Whitespace
117151
}
118152

119-
pub fn peek(&self) -> &Token {
120-
self.nth(1)
153+
pub fn peek(&self, ignore_whitespace: bool) -> &Token {
154+
self.nth(1, ignore_whitespace)
121155
}
122156

123-
pub fn expect(&mut self, kind: SyntaxKind) {
124-
if self.nth(0).kind == kind {
157+
pub fn expect(&mut self, kind: SyntaxKind, ignore_whitespace: bool) {
158+
println!("Expecting {:?}", kind);
159+
if self.eat(kind, ignore_whitespace) {
125160
return;
126161
}
127162

128163
self.error_at(format!("Expected {:#?}", kind));
129164
}
130165

131-
pub fn eof(&self) -> bool {
132-
self.pos == self.tokens.len()
133-
}
134-
135-
/// flush token buffer and applies all tokens
136-
fn flush_token_buffer(&mut self) {
137-
if self.whitespace_token_buffer.is_none() {
138-
return;
139-
}
140-
while self.whitespace_token_buffer.unwrap() < self.pos {
141-
self.whitespace_token_buffer = Some(self.whitespace_token_buffer.unwrap() + 1);
142-
}
143-
self.whitespace_token_buffer = None;
144-
}
145-
146-
pub fn next(&mut self) -> &Token {
147-
loop {
148-
if self.at_whitespace() {
149-
self.advance();
150-
continue;
151-
}
152-
break;
153-
}
154-
155-
self.nth(0)
166+
pub fn eof(&self, ignore_whitespace: bool) -> bool {
167+
self.peek(ignore_whitespace).kind == SyntaxKind::Eof
156168
}
157169

158170
/// collects an SyntaxError with an `error` message at the current position
@@ -171,11 +183,33 @@ impl Parser {
171183
}
172184

173185
/// lookahead method.
174-
fn nth(&self, lookahead: usize) -> &Token {
175-
match self.tokens.get(self.pos + lookahead) {
176-
Some(token) => token,
177-
None => &self.eof_token,
186+
///
187+
/// if `ignore_whitespace` is true, it will skip all whitespace tokens
188+
pub fn nth(&self, lookahead: usize, ignore_whitespace: bool) -> &Token {
189+
if ignore_whitespace {
190+
let mut idx = 0;
191+
let mut non_whitespace_token_ctr = 0;
192+
loop {
193+
match self.tokens.get(self.pos + idx) {
194+
Some(token) => {
195+
if !WHITESPACE_TOKENS.contains(&token.kind) {
196+
if non_whitespace_token_ctr == lookahead {
197+
return token;
198+
}
199+
non_whitespace_token_ctr += 1;
200+
}
201+
idx += 1;
202+
}
203+
None => {
204+
return &self.eof_token;
205+
}
206+
}
207+
}
208+
} else {
209+
match self.tokens.get(self.pos + lookahead) {
210+
Some(token) => token,
211+
None => &self.eof_token,
212+
}
178213
}
179214
}
180215
}
181-
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
use pg_lexer::{SyntaxKind, Token};
2+
3+
use super::{
4+
dml::{cte, select},
5+
Parser,
6+
};
7+
8+
pub fn source(p: &mut Parser) {
9+
loop {
10+
// todo find a better way to handle stmt start
11+
// same problem as below... for the first token we need to use nth(0),
12+
// but for the rest we need to use peek
13+
p.start_stmt();
14+
statement(p);
15+
p.close_stmt();
16+
17+
if p.eof(true) {
18+
break;
19+
}
20+
}
21+
}
22+
23+
pub(crate) fn statement(p: &mut Parser) {
24+
// todo find a better way to handle first token
25+
let token = if p.pos == 0 {
26+
p.nth(0, true)
27+
} else {
28+
p.peek(true)
29+
};
30+
31+
match token.kind {
32+
SyntaxKind::With => {
33+
cte(p);
34+
}
35+
SyntaxKind::Select => {
36+
select(p);
37+
}
38+
SyntaxKind::Insert => {
39+
todo!();
40+
// insert(p);
41+
}
42+
SyntaxKind::Update => {
43+
todo!();
44+
// update(p);
45+
}
46+
SyntaxKind::DeleteP => {
47+
todo!();
48+
// delete(p);
49+
}
50+
t => {
51+
panic!("stmt: Unknown token {:?}", t);
52+
// unknown(p);
53+
}
54+
}
55+
}
56+
57+
pub(crate) fn parenthesis(p: &mut Parser) {
58+
p.expect(SyntaxKind::Ascii40, true);
59+
60+
loop {
61+
if p.eof(true) {
62+
p.expect(SyntaxKind::Ascii41, true);
63+
break;
64+
}
65+
if p.nth(0, true).kind == SyntaxKind::Ascii41 {
66+
break;
67+
}
68+
}
69+
}
70+
71+
pub(crate) fn unknown(p: &mut Parser) {
72+
loop {
73+
match p.peek(false) {
74+
t @ Token {
75+
kind: SyntaxKind::Newline,
76+
..
77+
} => {
78+
if t.text.chars().count() > 1 {
79+
p.advance(false);
80+
break;
81+
}
82+
}
83+
Token {
84+
// ";"
85+
kind: SyntaxKind::Ascii59,
86+
..
87+
} => {
88+
p.advance(false);
89+
break;
90+
}
91+
Token {
92+
kind: SyntaxKind::Eof,
93+
..
94+
} => {
95+
break;
96+
}
97+
Token {
98+
kind: SyntaxKind::Ascii40,
99+
..
100+
} => {
101+
parenthesis(p);
102+
}
103+
t => {
104+
println!("Unknown token {:?}", t);
105+
p.advance(false);
106+
}
107+
}
108+
}
109+
}

0 commit comments

Comments
 (0)