1
0

Continue overhauling the lexer

This commit is contained in:
Jeff 2024-11-07 18:48:53 -05:00
parent b9ded3ea78
commit 2e51112b4b
4 changed files with 389 additions and 96 deletions

View File

@ -886,7 +886,7 @@ impl<'src> Compiler<'src> {
self.advance()?; self.advance()?;
self.chunk.begin_scope(); self.chunk.begin_scope();
while !self.allow(Token::RightCurlyBrace)? && !self.is_eof() { while !self.allow(Token::RightBrace)? && !self.is_eof() {
self.parse(Precedence::None)?; self.parse(Precedence::None)?;
} }
@ -902,7 +902,7 @@ impl<'src> Compiler<'src> {
let start_register = self.next_register(); let start_register = self.next_register();
while !self.allow(Token::RightSquareBrace)? && !self.is_eof() { while !self.allow(Token::RightBracket)? && !self.is_eof() {
let expected_register = self.next_register(); let expected_register = self.next_register();
self.parse_expression()?; self.parse_expression()?;
@ -953,11 +953,11 @@ impl<'src> Compiler<'src> {
let if_block_start = self.chunk.len(); let if_block_start = self.chunk.len();
let if_block_start_position = self.current_position; let if_block_start_position = self.current_position;
if let Token::LeftCurlyBrace = self.current_token { if let Token::LeftBrace = self.current_token {
self.parse_block()?; self.parse_block()?;
} else { } else {
return Err(CompileError::ExpectedToken { return Err(CompileError::ExpectedToken {
expected: TokenKind::LeftCurlyBrace, expected: TokenKind::LeftBrace,
found: self.current_token.to_owned(), found: self.current_token.to_owned(),
position: self.current_position, position: self.current_position,
}); });
@ -985,11 +985,11 @@ impl<'src> Compiler<'src> {
if let Token::Else = self.current_token { if let Token::Else = self.current_token {
self.advance()?; self.advance()?;
if let Token::LeftCurlyBrace = self.current_token { if let Token::LeftBrace = self.current_token {
self.parse_block()?; self.parse_block()?;
} else { } else {
return Err(CompileError::ExpectedTokenMultiple { return Err(CompileError::ExpectedTokenMultiple {
expected: &[TokenKind::If, TokenKind::LeftCurlyBrace], expected: &[TokenKind::If, TokenKind::LeftBrace],
found: self.current_token.to_owned(), found: self.current_token.to_owned(),
position: self.current_position, position: self.current_position,
}); });
@ -1147,7 +1147,7 @@ impl<'src> Compiler<'src> {
loop { loop {
self.parse(Precedence::None)?; self.parse(Precedence::None)?;
if self.is_eof() || self.allow(Token::RightCurlyBrace)? { if self.is_eof() || self.allow(Token::RightBrace)? {
self.parse_implicit_return()?; self.parse_implicit_return()?;
break; break;
@ -1179,10 +1179,8 @@ impl<'src> Compiler<'src> {
self.advance()?; self.advance()?;
let has_return_value = if matches!( let has_return_value = if matches!(self.current_token, Token::Semicolon | Token::RightBrace)
self.current_token, {
Token::Semicolon | Token::RightCurlyBrace
) {
false false
} else { } else {
self.parse_expression()?; self.parse_expression()?;
@ -1335,7 +1333,7 @@ impl<'src> Compiler<'src> {
None None
}; };
function_compiler.expect(Token::LeftCurlyBrace)?; function_compiler.expect(Token::LeftBrace)?;
function_compiler.parse_top_level()?; function_compiler.parse_top_level()?;
self.previous_token = function_compiler.previous_token; self.previous_token = function_compiler.previous_token;
@ -1673,7 +1671,7 @@ impl From<&Token<'_>> for ParseRule<'_> {
infix: None, infix: None,
precedence: Precedence::None, precedence: Precedence::None,
}, },
Token::LeftCurlyBrace => ParseRule { Token::LeftBrace => ParseRule {
prefix: Some(Compiler::parse_block), prefix: Some(Compiler::parse_block),
infix: None, infix: None,
precedence: Precedence::None, precedence: Precedence::None,
@ -1683,7 +1681,7 @@ impl From<&Token<'_>> for ParseRule<'_> {
infix: Some(Compiler::parse_call), infix: Some(Compiler::parse_call),
precedence: Precedence::Call, precedence: Precedence::Call,
}, },
Token::LeftSquareBrace => ParseRule { Token::LeftBracket => ParseRule {
prefix: Some(Compiler::parse_list), prefix: Some(Compiler::parse_list),
infix: None, infix: None,
precedence: Precedence::None, precedence: Precedence::None,
@ -1745,7 +1743,7 @@ impl From<&Token<'_>> for ParseRule<'_> {
infix: None, infix: None,
precedence: Precedence::None, precedence: Precedence::None,
}, },
Token::RightCurlyBrace => ParseRule { Token::RightBrace => ParseRule {
prefix: None, prefix: None,
infix: None, infix: None,
precedence: Precedence::None, precedence: Precedence::None,
@ -1755,7 +1753,7 @@ impl From<&Token<'_>> for ParseRule<'_> {
infix: None, infix: None,
precedence: Precedence::None, precedence: Precedence::None,
}, },
Token::RightSquareBrace => ParseRule { Token::RightBracket => ParseRule {
prefix: None, prefix: None,
infix: None, infix: None,
precedence: Precedence::None, precedence: Precedence::None,

View File

@ -101,13 +101,13 @@ impl<'src> Formatter<'src> {
String(string) => { String(string) => {
self.push_colored(string.magenta()); self.push_colored(string.magenta());
} }
LeftCurlyBrace => { LeftBrace => {
self.next_line.push_str(self.current_token.as_str()); self.next_line.push_str(self.current_token.as_str());
self.commit_line(LineKind::OpenBlock); self.commit_line(LineKind::OpenBlock);
self.indent += 1; self.indent += 1;
} }
RightCurlyBrace => { RightBrace => {
self.commit_line(LineKind::CloseBlock); self.commit_line(LineKind::CloseBlock);
self.next_line.push_str(self.current_token.as_str()); self.next_line.push_str(self.current_token.as_str());

View File

@ -41,11 +41,10 @@ pub fn lex<'tokens, 'src: 'tokens>(
error: CompileError::Lex(error), error: CompileError::Lex(error),
source, source,
})?; })?;
let is_eof = matches!(token, Token::Eof);
tokens.push((token, span)); tokens.push((token, span));
if is_eof { if lexer.is_eof() {
break; break;
} }
} }
@ -60,6 +59,7 @@ pub fn lex<'tokens, 'src: 'tokens>(
pub struct Lexer<'src> { pub struct Lexer<'src> {
source: &'src str, source: &'src str,
position: usize, position: usize,
is_eof: bool,
} }
impl<'src> Lexer<'src> { impl<'src> Lexer<'src> {
@ -68,6 +68,7 @@ impl<'src> Lexer<'src> {
Lexer { Lexer {
source, source,
position: 0, position: 0,
is_eof: false,
} }
} }
@ -76,7 +77,7 @@ impl<'src> Lexer<'src> {
} }
pub fn is_eof(&self) -> bool { pub fn is_eof(&self) -> bool {
self.position >= self.source.len() self.is_eof
} }
pub fn skip_to(&mut self, position: usize) { pub fn skip_to(&mut self, position: usize) {
@ -92,21 +93,14 @@ impl<'src> Lexer<'src> {
lexer(self)? lexer(self)?
} else { } else {
self.is_eof = true;
(Token::Eof, Span(self.position, self.position)) (Token::Eof, Span(self.position, self.position))
}; };
Ok((token, span)) Ok((token, span))
} }
/// Peek at the next token without consuming the source.
pub fn peek_token(&mut self) -> Result<(Token<'src>, Span), LexError> {
let token = self.next_token()?;
self.position -= token.0.len();
Ok(token)
}
/// Progress to the next character. /// Progress to the next character.
fn next_char(&mut self) -> Option<char> { fn next_char(&mut self) -> Option<char> {
if let Some(c) = self.source[self.position..].chars().next() { if let Some(c) = self.source[self.position..].chars().next() {
@ -261,7 +255,7 @@ impl<'src> Lexer<'src> {
} }
/// Lex an identifier token. /// Lex an identifier token.
fn lex_alphanumeric(&mut self) -> Result<(Token<'src>, Span), LexError> { fn lex_keyword_or_identifier(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position; let start_pos = self.position;
while let Some(c) = self.peek_char() { while let Some(c) = self.peek_char() {
@ -318,6 +312,247 @@ impl<'src> Lexer<'src> {
Ok((Token::String(text), Span(start_pos, self.position))) Ok((Token::String(text), Span(start_pos, self.position)))
} }
fn lex_char(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_position = self.position;
self.next_char();
let char = self.source[self.position..].chars().next().unwrap();
self.next_char();
if self.peek_char() == Some('\'') {
self.next_char();
} else {
return Err(LexError::ExpectedCharacter {
expected: '\'',
actual: self.peek_char().unwrap_or('\0'),
position: self.position,
});
}
let end_position = self.position;
Ok((Token::Character(char), Span(start_position, end_position)))
}
fn lex_plus(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
if let Some('=') = self.peek_char() {
self.next_char();
Ok((Token::PlusEqual, Span(start_pos, self.position)))
} else {
Ok((Token::Plus, Span(start_pos, self.position)))
}
}
fn lex_minus(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
if let Some('=') = self.peek_char() {
self.next_char();
Ok((Token::MinusEqual, Span(start_pos, self.position)))
} else if self.peek_chars(8) == "Infinity" {
self.position += 8;
Ok((Token::Float("Infinity"), Span(start_pos, self.position)))
} else {
Ok((Token::Minus, Span(start_pos, self.position)))
}
}
fn lex_star(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
if let Some('=') = self.peek_char() {
self.next_char();
Ok((Token::StarEqual, Span(start_pos, self.position)))
} else {
Ok((Token::Star, Span(start_pos, self.position)))
}
}
fn lex_slash(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
if let Some('=') = self.peek_char() {
self.next_char();
Ok((Token::SlashEqual, Span(start_pos, self.position)))
} else {
Ok((Token::Slash, Span(start_pos, self.position)))
}
}
fn lex_percent(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
if let Some('=') = self.peek_char() {
self.next_char();
Ok((Token::PercentEqual, Span(start_pos, self.position)))
} else {
Ok((Token::Percent, Span(start_pos, self.position)))
}
}
fn lex_unexpected(&mut self) -> Result<(Token<'src>, Span), LexError> {
Err(LexError::UnexpectedCharacter {
actual: self.peek_char().unwrap_or('\0'),
position: self.position,
})
}
fn lex_exclamation_mark(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
if let Some('=') = self.peek_char() {
self.next_char();
Ok((Token::BangEqual, Span(start_pos, self.position)))
} else {
Ok((Token::Bang, Span(start_pos, self.position)))
}
}
fn lex_equal(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
if let Some('=') = self.peek_char() {
self.next_char();
Ok((Token::DoubleEqual, Span(start_pos, self.position)))
} else {
Ok((Token::Equal, Span(start_pos, self.position)))
}
}
fn lex_less_than(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
if let Some('=') = self.peek_char() {
self.next_char();
Ok((Token::LessEqual, Span(start_pos, self.position)))
} else {
Ok((Token::Less, Span(start_pos, self.position)))
}
}
fn lex_greater_than(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
if let Some('=') = self.peek_char() {
self.next_char();
Ok((Token::GreaterEqual, Span(start_pos, self.position)))
} else {
Ok((Token::Greater, Span(start_pos, self.position)))
}
}
fn lex_ampersand(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
let peek_char = self.peek_char();
if let Some('&') = peek_char {
self.next_char();
Ok((Token::DoubleAmpersand, Span(start_pos, self.position)))
} else if peek_char.is_none() {
Err(LexError::UnexpectedEndOfFile {
position: self.position,
})
} else {
Err(LexError::ExpectedCharacter {
expected: '&',
actual: self.peek_char().unwrap(),
position: self.position,
})
}
}
fn lex_pipe(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
let peek_char = self.peek_char();
if let Some('|') = peek_char {
self.next_char();
Ok((Token::DoublePipe, Span(start_pos, self.position)))
} else if peek_char.is_none() {
Err(LexError::UnexpectedEndOfFile {
position: self.position,
})
} else {
Err(LexError::ExpectedCharacter {
expected: '&',
actual: self.peek_char().unwrap(),
position: self.position,
})
}
}
fn lex_left_parenthesis(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
Ok((Token::LeftParenthesis, Span(start_pos, self.position)))
}
fn lex_right_parenthesis(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
Ok((Token::RightParenthesis, Span(start_pos, self.position)))
}
fn lex_left_bracket(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
Ok((Token::LeftBracket, Span(start_pos, self.position)))
}
fn lex_right_bracket(&mut self) -> Result<(Token<'src>, Span), LexError> {
let start_pos = self.position;
self.next_char();
Ok((Token::RightBracket, Span(start_pos, self.position)))
}
} }
type LexerFn<'src> = fn(&mut Lexer<'src>) -> Result<(Token<'src>, Span), LexError>; type LexerFn<'src> = fn(&mut Lexer<'src>) -> Result<(Token<'src>, Span), LexError>;
@ -332,7 +567,63 @@ impl<'src> From<&char> for LexRule<'src> {
'0'..='9' => LexRule { '0'..='9' => LexRule {
lexer: Lexer::lex_numeric, lexer: Lexer::lex_numeric,
}, },
_ => panic!("Invalid character"), 'Z'..='a' => LexRule {
lexer: Lexer::lex_keyword_or_identifier,
},
'"' => LexRule {
lexer: Lexer::lex_string,
},
'\'' => LexRule {
lexer: Lexer::lex_char,
},
'+' => LexRule {
lexer: Lexer::lex_plus,
},
'-' => LexRule {
lexer: Lexer::lex_minus,
},
'*' => LexRule {
lexer: Lexer::lex_star,
},
'/' => LexRule {
lexer: Lexer::lex_slash,
},
'%' => LexRule {
lexer: Lexer::lex_percent,
},
'!' => LexRule {
lexer: Lexer::lex_exclamation_mark,
},
'=' => LexRule {
lexer: Lexer::lex_equal,
},
'<' => LexRule {
lexer: Lexer::lex_less_than,
},
'>' => LexRule {
lexer: Lexer::lex_greater_than,
},
'&' => LexRule {
lexer: Lexer::lex_ampersand,
},
'|' => LexRule {
lexer: Lexer::lex_pipe,
},
'(' => LexRule {
lexer: Lexer::lex_left_parenthesis,
},
')' => LexRule {
lexer: Lexer::lex_right_parenthesis,
},
'[' => LexRule {
lexer: Lexer::lex_left_bracket,
},
']' => LexRule {
lexer: Lexer::lex_right_bracket,
},
_ => LexRule {
lexer: Lexer::lex_unexpected,
},
} }
} }
} }
@ -464,7 +755,7 @@ mod tests {
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Map, Span(0, 3)), (Token::Map, Span(0, 3)),
(Token::LeftCurlyBrace, Span(4, 5)), (Token::LeftBrace, Span(4, 5)),
(Token::Identifier("x"), Span(6, 7)), (Token::Identifier("x"), Span(6, 7)),
(Token::Equal, Span(8, 9)), (Token::Equal, Span(8, 9)),
(Token::String("1"), Span(10, 13)), (Token::String("1"), Span(10, 13)),
@ -476,7 +767,7 @@ mod tests {
(Token::Identifier("z"), Span(22, 23)), (Token::Identifier("z"), Span(22, 23)),
(Token::Equal, Span(24, 25)), (Token::Equal, Span(24, 25)),
(Token::Float("3.0"), Span(26, 29)), (Token::Float("3.0"), Span(26, 29)),
(Token::RightCurlyBrace, Span(30, 31)), (Token::RightBrace, Span(30, 31)),
(Token::Eof, Span(31, 31)), (Token::Eof, Span(31, 31)),
]) ])
); );
@ -540,7 +831,7 @@ mod tests {
Ok(vec![ Ok(vec![
(Token::Struct, Span(0, 6)), (Token::Struct, Span(0, 6)),
(Token::Identifier("FooBar"), Span(7, 13)), (Token::Identifier("FooBar"), Span(7, 13)),
(Token::LeftCurlyBrace, Span(14, 15)), (Token::LeftBrace, Span(14, 15)),
(Token::Identifier("foo"), Span(16, 19)), (Token::Identifier("foo"), Span(16, 19)),
(Token::Colon, Span(19, 20)), (Token::Colon, Span(19, 20)),
(Token::Int, Span(21, 24)), (Token::Int, Span(21, 24)),
@ -548,7 +839,7 @@ mod tests {
(Token::Identifier("bar"), Span(26, 29)), (Token::Identifier("bar"), Span(26, 29)),
(Token::Colon, Span(29, 30)), (Token::Colon, Span(29, 30)),
(Token::FloatKeyword, Span(31, 36)), (Token::FloatKeyword, Span(31, 36)),
(Token::RightCurlyBrace, Span(37, 38)), (Token::RightBrace, Span(37, 38)),
(Token::Eof, Span(38, 38)) (Token::Eof, Span(38, 38))
]) ])
); );
@ -561,16 +852,16 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::LeftSquareBrace, Span(0, 1)), (Token::LeftBracket, Span(0, 1)),
(Token::Integer("1"), Span(1, 2)), (Token::Integer("1"), Span(1, 2)),
(Token::Comma, Span(2, 3)), (Token::Comma, Span(2, 3)),
(Token::Integer("2"), Span(4, 5)), (Token::Integer("2"), Span(4, 5)),
(Token::Comma, Span(5, 6)), (Token::Comma, Span(5, 6)),
(Token::Integer("3"), Span(7, 8)), (Token::Integer("3"), Span(7, 8)),
(Token::RightSquareBrace, Span(8, 9)), (Token::RightBracket, Span(8, 9)),
(Token::LeftSquareBrace, Span(9, 10)), (Token::LeftBracket, Span(9, 10)),
(Token::Integer("1"), Span(10, 11)), (Token::Integer("1"), Span(10, 11)),
(Token::RightSquareBrace, Span(11, 12)), (Token::RightBracket, Span(11, 12)),
(Token::Eof, Span(12, 12)), (Token::Eof, Span(12, 12)),
]) ])
) )
@ -583,13 +874,13 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::LeftSquareBrace, Span(0, 1)), (Token::LeftBracket, Span(0, 1)),
(Token::Integer("1"), Span(1, 2)), (Token::Integer("1"), Span(1, 2)),
(Token::Comma, Span(2, 3)), (Token::Comma, Span(2, 3)),
(Token::Integer("2"), Span(4, 5)), (Token::Integer("2"), Span(4, 5)),
(Token::Comma, Span(5, 6)), (Token::Comma, Span(5, 6)),
(Token::Integer("3"), Span(7, 8)), (Token::Integer("3"), Span(7, 8)),
(Token::RightSquareBrace, Span(8, 9)), (Token::RightBracket, Span(8, 9)),
(Token::Eof, Span(9, 9)), (Token::Eof, Span(9, 9)),
]) ])
) )
@ -602,7 +893,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::LeftCurlyBrace, Span(0, 1)), (Token::LeftBrace, Span(0, 1)),
(Token::Identifier("a"), Span(1, 2)), (Token::Identifier("a"), Span(1, 2)),
(Token::Equal, Span(3, 4)), (Token::Equal, Span(3, 4)),
(Token::Integer("1"), Span(5, 6)), (Token::Integer("1"), Span(5, 6)),
@ -614,7 +905,7 @@ mod tests {
(Token::Identifier("c"), Span(15, 16)), (Token::Identifier("c"), Span(15, 16)),
(Token::Equal, Span(17, 18)), (Token::Equal, Span(17, 18)),
(Token::Integer("3"), Span(19, 20)), (Token::Integer("3"), Span(19, 20)),
(Token::RightCurlyBrace, Span(20, 21)), (Token::RightBrace, Span(20, 21)),
(Token::Dot, Span(21, 22)), (Token::Dot, Span(21, 22)),
(Token::Identifier("c"), Span(22, 23)), (Token::Identifier("c"), Span(22, 23)),
(Token::Eof, Span(23, 23)), (Token::Eof, Span(23, 23)),
@ -683,15 +974,15 @@ mod tests {
(Token::Identifier("x"), Span(3, 4)), (Token::Identifier("x"), Span(3, 4)),
(Token::Less, Span(5, 6)), (Token::Less, Span(5, 6)),
(Token::Integer("10"), Span(7, 9)), (Token::Integer("10"), Span(7, 9)),
(Token::LeftCurlyBrace, Span(10, 11)), (Token::LeftBrace, Span(10, 11)),
(Token::Identifier("x"), Span(12, 13)), (Token::Identifier("x"), Span(12, 13)),
(Token::Plus, Span(14, 15)), (Token::Plus, Span(14, 15)),
(Token::Integer("1"), Span(16, 17)), (Token::Integer("1"), Span(16, 17)),
(Token::RightCurlyBrace, Span(18, 19)), (Token::RightBrace, Span(18, 19)),
(Token::Else, Span(20, 24)), (Token::Else, Span(20, 24)),
(Token::LeftCurlyBrace, Span(25, 26)), (Token::LeftBrace, Span(25, 26)),
(Token::Identifier("x"), Span(27, 28)), (Token::Identifier("x"), Span(27, 28)),
(Token::RightCurlyBrace, Span(29, 30)), (Token::RightBrace, Span(29, 30)),
(Token::Eof, Span(30, 30)), (Token::Eof, Span(30, 30)),
]) ])
) )
@ -708,11 +999,11 @@ mod tests {
(Token::Identifier("x"), Span(6, 7)), (Token::Identifier("x"), Span(6, 7)),
(Token::Less, Span(8, 9)), (Token::Less, Span(8, 9)),
(Token::Integer("10"), Span(10, 12)), (Token::Integer("10"), Span(10, 12)),
(Token::LeftCurlyBrace, Span(13, 14)), (Token::LeftBrace, Span(13, 14)),
(Token::Identifier("x"), Span(15, 16)), (Token::Identifier("x"), Span(15, 16)),
(Token::PlusEqual, Span(17, 19)), (Token::PlusEqual, Span(17, 19)),
(Token::Integer("1"), Span(20, 21)), (Token::Integer("1"), Span(20, 21)),
(Token::RightCurlyBrace, Span(22, 23)), (Token::RightBrace, Span(22, 23)),
(Token::Eof, Span(23, 23)), (Token::Eof, Span(23, 23)),
]) ])
) )
@ -755,7 +1046,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::LeftCurlyBrace, Span(0, 1)), (Token::LeftBrace, Span(0, 1)),
(Token::Identifier("x"), Span(2, 3)), (Token::Identifier("x"), Span(2, 3)),
(Token::Equal, Span(4, 5)), (Token::Equal, Span(4, 5)),
(Token::Integer("42"), Span(6, 8)), (Token::Integer("42"), Span(6, 8)),
@ -763,7 +1054,7 @@ mod tests {
(Token::Identifier("y"), Span(10, 11)), (Token::Identifier("y"), Span(10, 11)),
(Token::Equal, Span(12, 13)), (Token::Equal, Span(12, 13)),
(Token::String("foobar"), Span(14, 22)), (Token::String("foobar"), Span(14, 22)),
(Token::RightCurlyBrace, Span(23, 24)), (Token::RightBrace, Span(23, 24)),
(Token::Eof, Span(24, 24)), (Token::Eof, Span(24, 24)),
]) ])
) )
@ -1104,8 +1395,8 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::LeftSquareBrace, Span(0, 1)), (Token::LeftBracket, Span(0, 1)),
(Token::RightSquareBrace, Span(1, 2)), (Token::RightBracket, Span(1, 2)),
(Token::Eof, Span(2, 2)), (Token::Eof, Span(2, 2)),
]) ])
) )

View File

@ -9,15 +9,19 @@ use serde::{Deserialize, Serialize};
use crate::Span; use crate::Span;
pub fn output_token_list<W: Write>(tokens: &[(Token, Span)], writer: &mut W) { pub fn output_token_list<W: Write>(tokens: &[(Token, Span)], writer: &mut W) {
const HEADER: [&str; 2] = ["TOKEN POSITION ", "------------ ----------"]; const HEADER: [&str; 2] = [
"TOKEN KIND POSITION ",
"------------ ---------- ----------",
];
writeln!(writer, "{}", HEADER[0]).unwrap(); writeln!(writer, "{}", HEADER[0]).unwrap();
writeln!(writer, "{}", HEADER[1]).unwrap(); writeln!(writer, "{}", HEADER[1]).unwrap();
for (token, position) in tokens { for (token, position) in tokens {
let kind = token.kind().to_string();
let token = token.to_string(); let token = token.to_string();
writeln!(writer, "{token:<12} {position}").unwrap(); writeln!(writer, "{token:<12} {kind:<10} {position}").unwrap();
} }
} }
@ -90,9 +94,9 @@ define_tokens! {
Equal, Equal,
Greater, Greater,
GreaterEqual, GreaterEqual,
LeftCurlyBrace, LeftBrace,
LeftBracket,
LeftParenthesis, LeftParenthesis,
LeftSquareBrace,
Less, Less,
LessEqual, LessEqual,
Minus, Minus,
@ -101,9 +105,9 @@ define_tokens! {
PercentEqual, PercentEqual,
Plus, Plus,
PlusEqual, PlusEqual,
RightCurlyBrace, RightBrace,
RightBracket,
RightParenthesis, RightParenthesis,
RightSquareBrace,
Semicolon, Semicolon,
Slash, Slash,
SlashEqual, SlashEqual,
@ -151,9 +155,9 @@ impl<'src> Token<'src> {
Token::Equal => 1, Token::Equal => 1,
Token::Greater => 1, Token::Greater => 1,
Token::GreaterEqual => 2, Token::GreaterEqual => 2,
Token::LeftCurlyBrace => 1, Token::LeftBrace => 1,
Token::LeftParenthesis => 1, Token::LeftParenthesis => 1,
Token::LeftSquareBrace => 1, Token::LeftBracket => 1,
Token::Less => 1, Token::Less => 1,
Token::LessEqual => 2, Token::LessEqual => 2,
Token::Minus => 1, Token::Minus => 1,
@ -163,9 +167,9 @@ impl<'src> Token<'src> {
Token::Plus => 1, Token::Plus => 1,
Token::PlusEqual => 2, Token::PlusEqual => 2,
Token::Return => 6, Token::Return => 6,
Token::RightCurlyBrace => 1, Token::RightBrace => 1,
Token::RightParenthesis => 1, Token::RightParenthesis => 1,
Token::RightSquareBrace => 1, Token::RightBracket => 1,
Token::Semicolon => 1, Token::Semicolon => 1,
Token::Slash => 1, Token::Slash => 1,
Token::SlashEqual => 2, Token::SlashEqual => 2,
@ -212,9 +216,9 @@ impl<'src> Token<'src> {
Token::Equal => "=", Token::Equal => "=",
Token::Greater => ">", Token::Greater => ">",
Token::GreaterEqual => ">=", Token::GreaterEqual => ">=",
Token::LeftCurlyBrace => "{", Token::LeftBrace => "{",
Token::LeftParenthesis => "(", Token::LeftParenthesis => "(",
Token::LeftSquareBrace => "[", Token::LeftBracket => "[",
Token::Less => "<", Token::Less => "<",
Token::LessEqual => "<=", Token::LessEqual => "<=",
Token::Minus => "-", Token::Minus => "-",
@ -224,9 +228,9 @@ impl<'src> Token<'src> {
Token::Plus => "+", Token::Plus => "+",
Token::PlusEqual => "+=", Token::PlusEqual => "+=",
Token::Return => "return", Token::Return => "return",
Token::RightCurlyBrace => "}", Token::RightBrace => "}",
Token::RightParenthesis => ")", Token::RightParenthesis => ")",
Token::RightSquareBrace => "]", Token::RightBracket => "]",
Token::Semicolon => ";", Token::Semicolon => ";",
Token::Slash => "/", Token::Slash => "/",
Token::SlashEqual => "/=", Token::SlashEqual => "/=",
@ -265,9 +269,9 @@ impl<'src> Token<'src> {
Token::If => TokenOwned::If, Token::If => TokenOwned::If,
Token::Int => TokenOwned::Int, Token::Int => TokenOwned::Int,
Token::Integer(integer) => TokenOwned::Integer(integer.to_string()), Token::Integer(integer) => TokenOwned::Integer(integer.to_string()),
Token::LeftCurlyBrace => TokenOwned::LeftCurlyBrace, Token::LeftBrace => TokenOwned::LeftCurlyBrace,
Token::LeftParenthesis => TokenOwned::LeftParenthesis, Token::LeftParenthesis => TokenOwned::LeftParenthesis,
Token::LeftSquareBrace => TokenOwned::LeftSquareBrace, Token::LeftBracket => TokenOwned::LeftSquareBrace,
Token::Let => TokenOwned::Let, Token::Let => TokenOwned::Let,
Token::Less => TokenOwned::Less, Token::Less => TokenOwned::Less,
Token::LessEqual => TokenOwned::LessOrEqual, Token::LessEqual => TokenOwned::LessOrEqual,
@ -281,9 +285,9 @@ impl<'src> Token<'src> {
Token::Plus => TokenOwned::Plus, Token::Plus => TokenOwned::Plus,
Token::PlusEqual => TokenOwned::PlusEqual, Token::PlusEqual => TokenOwned::PlusEqual,
Token::Return => TokenOwned::Return, Token::Return => TokenOwned::Return,
Token::RightCurlyBrace => TokenOwned::RightCurlyBrace, Token::RightBrace => TokenOwned::RightCurlyBrace,
Token::RightParenthesis => TokenOwned::RightParenthesis, Token::RightParenthesis => TokenOwned::RightParenthesis,
Token::RightSquareBrace => TokenOwned::RightSquareBrace, Token::RightBracket => TokenOwned::RightSquareBrace,
Token::Semicolon => TokenOwned::Semicolon, Token::Semicolon => TokenOwned::Semicolon,
Token::Star => TokenOwned::Star, Token::Star => TokenOwned::Star,
Token::StarEqual => TokenOwned::StarEqual, Token::StarEqual => TokenOwned::StarEqual,
@ -326,9 +330,9 @@ impl<'src> Token<'src> {
Token::If => TokenKind::If, Token::If => TokenKind::If,
Token::Int => TokenKind::Int, Token::Int => TokenKind::Int,
Token::Integer(_) => TokenKind::Integer, Token::Integer(_) => TokenKind::Integer,
Token::LeftCurlyBrace => TokenKind::LeftCurlyBrace, Token::LeftBrace => TokenKind::LeftBrace,
Token::LeftParenthesis => TokenKind::LeftParenthesis, Token::LeftParenthesis => TokenKind::LeftParenthesis,
Token::LeftSquareBrace => TokenKind::LeftSquareBrace, Token::LeftBracket => TokenKind::LeftBracket,
Token::Let => TokenKind::Let, Token::Let => TokenKind::Let,
Token::Less => TokenKind::Less, Token::Less => TokenKind::Less,
Token::LessEqual => TokenKind::LessEqual, Token::LessEqual => TokenKind::LessEqual,
@ -342,9 +346,9 @@ impl<'src> Token<'src> {
Token::Plus => TokenKind::Plus, Token::Plus => TokenKind::Plus,
Token::PlusEqual => TokenKind::PlusEqual, Token::PlusEqual => TokenKind::PlusEqual,
Token::Return => TokenKind::Return, Token::Return => TokenKind::Return,
Token::RightCurlyBrace => TokenKind::RightCurlyBrace, Token::RightBrace => TokenKind::RightBrace,
Token::RightParenthesis => TokenKind::RightParenthesis, Token::RightParenthesis => TokenKind::RightParenthesis,
Token::RightSquareBrace => TokenKind::RightSquareBrace, Token::RightBracket => TokenKind::RightBracket,
Token::Semicolon => TokenKind::Semicolon, Token::Semicolon => TokenKind::Semicolon,
Token::Star => TokenKind::Star, Token::Star => TokenKind::Star,
Token::StarEqual => TokenKind::StarEqual, Token::StarEqual => TokenKind::StarEqual,
@ -381,9 +385,9 @@ impl<'src> Token<'src> {
| Token::Equal | Token::Equal
| Token::Greater | Token::Greater
| Token::GreaterEqual | Token::GreaterEqual
| Token::LeftCurlyBrace | Token::LeftBrace
| Token::LeftParenthesis | Token::LeftParenthesis
| Token::LeftSquareBrace | Token::LeftBracket
| Token::Less | Token::Less
| Token::LessEqual | Token::LessEqual
| Token::Minus | Token::Minus
@ -431,9 +435,9 @@ impl<'src> Display for Token<'src> {
Token::If => write!(f, "if"), Token::If => write!(f, "if"),
Token::Int => write!(f, "int"), Token::Int => write!(f, "int"),
Token::Integer(value) => write!(f, "{value}"), Token::Integer(value) => write!(f, "{value}"),
Token::LeftCurlyBrace => write!(f, "{{"), Token::LeftBrace => write!(f, "{{"),
Token::LeftParenthesis => write!(f, "("), Token::LeftParenthesis => write!(f, "("),
Token::LeftSquareBrace => write!(f, "["), Token::LeftBracket => write!(f, "["),
Token::Let => write!(f, "let"), Token::Let => write!(f, "let"),
Token::Less => write!(f, "<"), Token::Less => write!(f, "<"),
Token::LessEqual => write!(f, "<="), Token::LessEqual => write!(f, "<="),
@ -447,9 +451,9 @@ impl<'src> Display for Token<'src> {
Token::Plus => write!(f, "+"), Token::Plus => write!(f, "+"),
Token::PlusEqual => write!(f, "+="), Token::PlusEqual => write!(f, "+="),
Token::Return => write!(f, "return"), Token::Return => write!(f, "return"),
Token::RightCurlyBrace => write!(f, "}}"), Token::RightBrace => write!(f, "}}"),
Token::RightParenthesis => write!(f, ")"), Token::RightParenthesis => write!(f, ")"),
Token::RightSquareBrace => write!(f, "]"), Token::RightBracket => write!(f, "]"),
Token::Semicolon => write!(f, ";"), Token::Semicolon => write!(f, ";"),
Token::Slash => write!(f, "/"), Token::Slash => write!(f, "/"),
Token::SlashEqual => write!(f, "/="), Token::SlashEqual => write!(f, "/="),
@ -564,9 +568,9 @@ impl Display for TokenOwned {
TokenOwned::If => Token::If.fmt(f), TokenOwned::If => Token::If.fmt(f),
TokenOwned::Int => Token::Int.fmt(f), TokenOwned::Int => Token::Int.fmt(f),
TokenOwned::Integer(integer) => Token::Integer(integer).fmt(f), TokenOwned::Integer(integer) => Token::Integer(integer).fmt(f),
TokenOwned::LeftCurlyBrace => Token::LeftCurlyBrace.fmt(f), TokenOwned::LeftCurlyBrace => Token::LeftBrace.fmt(f),
TokenOwned::LeftParenthesis => Token::LeftParenthesis.fmt(f), TokenOwned::LeftParenthesis => Token::LeftParenthesis.fmt(f),
TokenOwned::LeftSquareBrace => Token::LeftSquareBrace.fmt(f), TokenOwned::LeftSquareBrace => Token::LeftBracket.fmt(f),
TokenOwned::Let => Token::Let.fmt(f), TokenOwned::Let => Token::Let.fmt(f),
TokenOwned::Less => Token::Less.fmt(f), TokenOwned::Less => Token::Less.fmt(f),
TokenOwned::LessOrEqual => Token::LessEqual.fmt(f), TokenOwned::LessOrEqual => Token::LessEqual.fmt(f),
@ -580,9 +584,9 @@ impl Display for TokenOwned {
TokenOwned::Plus => Token::Plus.fmt(f), TokenOwned::Plus => Token::Plus.fmt(f),
TokenOwned::PlusEqual => Token::PlusEqual.fmt(f), TokenOwned::PlusEqual => Token::PlusEqual.fmt(f),
TokenOwned::Return => Token::Return.fmt(f), TokenOwned::Return => Token::Return.fmt(f),
TokenOwned::RightCurlyBrace => Token::RightCurlyBrace.fmt(f), TokenOwned::RightCurlyBrace => Token::RightBrace.fmt(f),
TokenOwned::RightParenthesis => Token::RightParenthesis.fmt(f), TokenOwned::RightParenthesis => Token::RightParenthesis.fmt(f),
TokenOwned::RightSquareBrace => Token::RightSquareBrace.fmt(f), TokenOwned::RightSquareBrace => Token::RightBracket.fmt(f),
TokenOwned::Semicolon => Token::Semicolon.fmt(f), TokenOwned::Semicolon => Token::Semicolon.fmt(f),
TokenOwned::Star => Token::Star.fmt(f), TokenOwned::Star => Token::Star.fmt(f),
TokenOwned::StarEqual => Token::StarEqual.fmt(f), TokenOwned::StarEqual => Token::StarEqual.fmt(f),
@ -604,10 +608,10 @@ impl Display for TokenKind {
TokenKind::Bang => Token::Bang.fmt(f), TokenKind::Bang => Token::Bang.fmt(f),
TokenKind::BangEqual => Token::BangEqual.fmt(f), TokenKind::BangEqual => Token::BangEqual.fmt(f),
TokenKind::Bool => Token::Bool.fmt(f), TokenKind::Bool => Token::Bool.fmt(f),
TokenKind::Boolean => write!(f, "boolean value"), TokenKind::Boolean => write!(f, "boolean"),
TokenKind::Break => Token::Break.fmt(f), TokenKind::Break => Token::Break.fmt(f),
TokenKind::Byte => write!(f, "byte value"), TokenKind::Byte => write!(f, "byte"),
TokenKind::Character => write!(f, "character value"), TokenKind::Character => write!(f, "character"),
TokenKind::Colon => Token::Colon.fmt(f), TokenKind::Colon => Token::Colon.fmt(f),
TokenKind::Comma => Token::Comma.fmt(f), TokenKind::Comma => Token::Comma.fmt(f),
TokenKind::Dot => Token::Dot.fmt(f), TokenKind::Dot => Token::Dot.fmt(f),
@ -618,7 +622,7 @@ impl Display for TokenKind {
TokenKind::Else => Token::Else.fmt(f), TokenKind::Else => Token::Else.fmt(f),
TokenKind::Eof => Token::Eof.fmt(f), TokenKind::Eof => Token::Eof.fmt(f),
TokenKind::Equal => Token::Equal.fmt(f), TokenKind::Equal => Token::Equal.fmt(f),
TokenKind::Float => write!(f, "float value"), TokenKind::Float => write!(f, "float"),
TokenKind::FloatKeyword => Token::FloatKeyword.fmt(f), TokenKind::FloatKeyword => Token::FloatKeyword.fmt(f),
TokenKind::Fn => Token::Fn.fmt(f), TokenKind::Fn => Token::Fn.fmt(f),
TokenKind::Greater => Token::Greater.fmt(f), TokenKind::Greater => Token::Greater.fmt(f),
@ -626,10 +630,10 @@ impl Display for TokenKind {
TokenKind::Identifier => write!(f, "identifier"), TokenKind::Identifier => write!(f, "identifier"),
TokenKind::If => Token::If.fmt(f), TokenKind::If => Token::If.fmt(f),
TokenKind::Int => Token::Int.fmt(f), TokenKind::Int => Token::Int.fmt(f),
TokenKind::Integer => write!(f, "integer value"), TokenKind::Integer => write!(f, "integer"),
TokenKind::LeftCurlyBrace => Token::LeftCurlyBrace.fmt(f), TokenKind::LeftBrace => Token::LeftBrace.fmt(f),
TokenKind::LeftParenthesis => Token::LeftParenthesis.fmt(f), TokenKind::LeftParenthesis => Token::LeftParenthesis.fmt(f),
TokenKind::LeftSquareBrace => Token::LeftSquareBrace.fmt(f), TokenKind::LeftBracket => Token::LeftBracket.fmt(f),
TokenKind::Let => Token::Let.fmt(f), TokenKind::Let => Token::Let.fmt(f),
TokenKind::Less => Token::Less.fmt(f), TokenKind::Less => Token::Less.fmt(f),
TokenKind::LessEqual => Token::LessEqual.fmt(f), TokenKind::LessEqual => Token::LessEqual.fmt(f),
@ -643,16 +647,16 @@ impl Display for TokenKind {
TokenKind::Plus => Token::Plus.fmt(f), TokenKind::Plus => Token::Plus.fmt(f),
TokenKind::PlusEqual => Token::PlusEqual.fmt(f), TokenKind::PlusEqual => Token::PlusEqual.fmt(f),
TokenKind::Return => Token::Return.fmt(f), TokenKind::Return => Token::Return.fmt(f),
TokenKind::RightCurlyBrace => Token::RightCurlyBrace.fmt(f), TokenKind::RightBrace => Token::RightBrace.fmt(f),
TokenKind::RightParenthesis => Token::RightParenthesis.fmt(f), TokenKind::RightParenthesis => Token::RightParenthesis.fmt(f),
TokenKind::RightSquareBrace => Token::RightSquareBrace.fmt(f), TokenKind::RightBracket => Token::RightBracket.fmt(f),
TokenKind::Semicolon => Token::Semicolon.fmt(f), TokenKind::Semicolon => Token::Semicolon.fmt(f),
TokenKind::Star => Token::Star.fmt(f), TokenKind::Star => Token::Star.fmt(f),
TokenKind::StarEqual => Token::StarEqual.fmt(f), TokenKind::StarEqual => Token::StarEqual.fmt(f),
TokenKind::Str => Token::Str.fmt(f), TokenKind::Str => Token::Str.fmt(f),
TokenKind::Slash => Token::Slash.fmt(f), TokenKind::Slash => Token::Slash.fmt(f),
TokenKind::SlashEqual => Token::SlashEqual.fmt(f), TokenKind::SlashEqual => Token::SlashEqual.fmt(f),
TokenKind::String => write!(f, "string value"), TokenKind::String => write!(f, "string"),
TokenKind::Struct => Token::Struct.fmt(f), TokenKind::Struct => Token::Struct.fmt(f),
TokenKind::While => Token::While.fmt(f), TokenKind::While => Token::While.fmt(f),
} }