Skip to content

Commit

Permalink
Improve lexer implementation and add additional integration tests (#29)
Browse files Browse the repository at this point in the history
  • Loading branch information
terror authored Oct 4, 2024
1 parent 43c1025 commit 5fcf5e2
Show file tree
Hide file tree
Showing 2 changed files with 170 additions and 38 deletions.
123 changes: 86 additions & 37 deletions src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,61 +16,78 @@ impl<'a> Lexer<'a> {

fn tokenize(&self) -> Result<Vec<String>> {
let mut tokens = Vec::new();

let mut chars = self.src.chars().peekable();

let mut current_token = String::new();

while let Some(ch) = chars.next() {
match ch {
'\'' | '"' => {
let mut group = String::new();

chars
.clone()
.collect::<Vec<char>>()
.iter()
.find(|next| **next == ch)
.ok_or(Error::LexError {
message: "Unmatched delimeter".into(),
})?;

for next in chars.by_ref() {
match next {
next if next == ch => break,
_ => group.push(next),
}
if !current_token.is_empty() {
tokens.push(current_token);
current_token = String::new();
}

chars.next();
let quoted_string = self.parse_quoted_string(ch, &mut chars)?;

tokens.push(group);
tokens.push(quoted_string);
}
' ' | '\t' => {
if !current_token.is_empty() {
tokens.push(current_token);
current_token = String::new();
}
}
'\\' => {
if let Some(next_ch) = chars.next() {
current_token.push(next_ch);
}
}
_ => {
let mut group = String::new();
current_token.push(ch);
}
}
}

group.push(ch);
if !current_token.is_empty() {
tokens.push(current_token);
}

Ok(tokens)
}

fn parse_quoted_string(
&self,
quote: char,
chars: &mut std::iter::Peekable<std::str::Chars>,
) -> Result<String> {
let mut result = String::new();
let mut escaped = false;

while let Some(next) = chars.peek() {
match next {
'\'' | '"' => break,
_ => {
group.push(*next);
chars.next();
}
for ch in chars.by_ref() {
match ch {
_ if escaped => {
match ch {
'\\' | '\'' | '"' => result.push(ch),
'n' => result.push('\n'),
't' => result.push('\t'),
'r' => result.push('\r'),
_ => {
result.push('\\');
result.push(ch);
}
}

tokens.extend(
group
.trim()
.split(' ')
.map(|argument| argument.to_owned())
.collect::<Vec<String>>(),
);
escaped = false;
}
'\\' => escaped = true,
ch if ch == quote => return Ok(result),
_ => result.push(ch),
}
}

Ok(tokens)
Err(Error::LexError {
message: "Unmatched delimiter".into(),
})
}
}

Expand Down Expand Up @@ -115,4 +132,36 @@ mod tests {
fn unmatched_delimiter() {
assert!(lex("-c 'echo foo").is_err());
}

#[test]
fn escaped_quotes() {
assert_eq!(
lex(r#"echo "Hello \"World\"""#).unwrap(),
vec!["echo", r#"Hello "World""#]
);
}

#[test]
fn nested_quotes() {
assert_eq!(
lex(r#"echo "outer 'inner' outer""#).unwrap(),
vec!["echo", r#"outer 'inner' outer"#]
);
}

#[test]
fn complex_command() {
assert_eq!(
lex(r#"bash -c "echo 'hello world' | tr ' ' '\n' | sort | uniq -c | sort -nr""#).unwrap(),
vec!["bash", "-c", "echo 'hello world' | tr ' ' '\n' | sort | uniq -c | sort -nr"]
);
}

#[test]
fn escaped_characters() {
assert_eq!(
lex(r#"echo "Hello\nWorld\t\"\\" 'Single\'Quote'"#).unwrap(),
vec!["echo", "Hello\nWorld\t\"\\", "Single'Quote"]
);
}
}
85 changes: 84 additions & 1 deletion tests/integration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,7 @@ fn inline_unmatched_delimiter() -> Result {
.expected_status(1)
.expected_stderr(
"
error: Lex Error: Unmatched delimeter
error: Lex Error: Unmatched delimiter
",
)
.run()
Expand Down Expand Up @@ -612,3 +612,86 @@ fn grapheme_handling() -> Result {
)
.run()
}

#[test]
fn large_output_handling() -> Result {
Test::new()?
.markdown(
r#"
```present python -c "print('Large ' * 1000)"
```
"#,
)
.expected_status(0)
.expected_stdout(&format!(
r#"
```present python -c "print('Large ' * 1000)"
{}
```
"#,
"Large ".repeat(1000)
))
.run()
}

#[test]
fn escaping_special_characters() -> Result {
Test::new()?
.markdown(
r#"
```present echo "Special chars: && || > < | ; \" ' \\"
```
"#,
)
.expected_status(0)
.expected_stdout(
r#"
```present echo "Special chars: && || > < | ; \" ' \\"
Special chars: && || > < | ; " ' \
```
"#,
)
.run()
}

#[test]
fn complex_shell_pipeline() -> Result {
Test::new()?
.markdown(
r#"
```present bash -c "echo 'hello world' | tr ' ' '\n' | sort | uniq -c | sort -nr | sed 's/^[[:space:]]*//' "
```
"#,
)
.expected_status(0)
.expected_stdout(
r#"
```present bash -c "echo 'hello world' | tr ' ' '\n' | sort | uniq -c | sort -nr | sed 's/^[[:space:]]*//' "
1 world
1 hello
```
"#,
)
.run()
}

#[test]
fn unicode_normalization() -> Result {
Test::new()?
.markdown(
r#"
```present bash -c "echo \"é\" | xxd -p && echo \"é\" | xxd -p"
```
"#,
)
.expected_status(0)
.expected_stdout(
r#"
```present bash -c "echo \"é\" | xxd -p && echo \"é\" | xxd -p"
c3a90a
65cc810a
```
"#,
)
.run()
}

0 comments on commit 5fcf5e2

Please sign in to comment.