2024-09-07 08:34:03 +00:00
|
|
|
use std::{
|
|
|
|
fmt::{self, Display, Formatter},
|
|
|
|
num::ParseIntError,
|
|
|
|
};
|
2024-09-07 03:30:43 +00:00
|
|
|
|
|
|
|
use crate::{
|
|
|
|
Chunk, ChunkError, Instruction, LexError, Lexer, Span, Token, TokenKind, TokenOwned, Value,
|
|
|
|
};
|
|
|
|
|
2024-09-07 08:34:03 +00:00
|
|
|
pub fn parse(source: &str) -> Result<Chunk, ParseError> {
|
|
|
|
let lexer = Lexer::new(source);
|
|
|
|
let mut parser = Parser::new(lexer);
|
|
|
|
|
|
|
|
while !parser.is_eof() {
|
|
|
|
parser.parse(Precedence::None)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(parser.chunk)
|
|
|
|
}
|
|
|
|
|
2024-09-07 03:30:43 +00:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct Parser<'src> {
|
|
|
|
lexer: Lexer<'src>,
|
2024-09-07 08:34:03 +00:00
|
|
|
chunk: Chunk,
|
|
|
|
current_token: Option<Token<'src>>,
|
2024-09-07 03:30:43 +00:00
|
|
|
current_position: Span,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'src> Parser<'src> {
|
2024-09-07 08:34:03 +00:00
|
|
|
pub fn new(lexer: Lexer<'src>) -> Self {
|
2024-09-07 03:30:43 +00:00
|
|
|
Parser {
|
|
|
|
lexer,
|
2024-09-07 08:34:03 +00:00
|
|
|
chunk: Chunk::new(),
|
|
|
|
current_token: None,
|
|
|
|
current_position: Span(0, 0),
|
2024-09-07 03:30:43 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn is_eof(&self) -> bool {
|
2024-09-07 08:34:03 +00:00
|
|
|
matches!(self.current_token, Some(Token::Eof))
|
2024-09-07 03:30:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
fn advance(&mut self) -> Result<(), ParseError> {
|
|
|
|
let (token, position) = self.lexer.next_token()?;
|
|
|
|
|
2024-09-07 08:34:03 +00:00
|
|
|
log::trace!("Advancing to token {token} at {position}");
|
|
|
|
|
|
|
|
self.current_token = Some(token);
|
2024-09-07 03:30:43 +00:00
|
|
|
self.current_position = position;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2024-09-07 08:34:03 +00:00
|
|
|
fn current_token_owned(&self) -> TokenOwned {
|
|
|
|
self.current_token
|
|
|
|
.as_ref()
|
|
|
|
.map_or(TokenOwned::Eof, |token| token.to_owned())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn current_token_kind(&self) -> TokenKind {
|
|
|
|
self.current_token
|
|
|
|
.as_ref()
|
|
|
|
.map_or(TokenKind::Eof, |token| token.kind())
|
|
|
|
}
|
|
|
|
|
2024-09-07 03:30:43 +00:00
|
|
|
fn consume(&mut self, expected: TokenKind) -> Result<(), ParseError> {
|
2024-09-07 08:34:03 +00:00
|
|
|
if self.current_token_kind() == expected {
|
2024-09-07 03:30:43 +00:00
|
|
|
self.advance()
|
|
|
|
} else {
|
|
|
|
Err(ParseError::ExpectedToken {
|
|
|
|
expected,
|
2024-09-07 08:34:03 +00:00
|
|
|
found: self.current_token_owned(),
|
2024-09-07 03:30:43 +00:00
|
|
|
position: self.current_position,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-07 08:34:03 +00:00
|
|
|
fn emit_byte(&mut self, byte: u8) {
|
|
|
|
self.chunk.write(byte, self.current_position);
|
2024-09-07 03:30:43 +00:00
|
|
|
}
|
|
|
|
|
2024-09-07 08:34:03 +00:00
|
|
|
fn emit_constant(&mut self, value: Value) -> Result<(), ParseError> {
|
|
|
|
let constant_index = self.chunk.push_constant(value)?;
|
|
|
|
|
|
|
|
self.emit_byte(Instruction::Constant as u8);
|
|
|
|
self.emit_byte(constant_index);
|
|
|
|
|
2024-09-07 03:30:43 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2024-09-07 08:34:03 +00:00
|
|
|
fn parse_integer(&mut self) -> Result<(), ParseError> {
|
|
|
|
if let Some(Token::Integer(text)) = self.current_token {
|
|
|
|
let integer = text.parse::<i64>().unwrap();
|
|
|
|
let value = Value::integer(integer);
|
2024-09-07 03:30:43 +00:00
|
|
|
|
2024-09-07 08:34:03 +00:00
|
|
|
self.emit_constant(value)?;
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_grouped(&mut self) -> Result<(), ParseError> {
|
|
|
|
self.parse_expression()?;
|
|
|
|
|
|
|
|
self.consume(TokenKind::RightParenthesis)?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_unary(&mut self) -> Result<(), ParseError> {
|
|
|
|
if let Some(Token::Minus) = self.current_token {
|
|
|
|
self.advance()?;
|
|
|
|
self.parse_expression()?;
|
|
|
|
self.emit_byte(Instruction::Negate as u8);
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_binary(&mut self) -> Result<(), ParseError> {
|
|
|
|
let operator_position = self.current_position;
|
|
|
|
let operator = self.current_token_kind();
|
|
|
|
let rule = ParseRule::from(&operator);
|
|
|
|
|
|
|
|
self.parse(rule.precedence.increment())?;
|
|
|
|
|
|
|
|
match operator {
|
|
|
|
TokenKind::Plus => self.emit_byte(Instruction::Add as u8),
|
|
|
|
TokenKind::Minus => self.emit_byte(Instruction::Subtract as u8),
|
|
|
|
TokenKind::Star => self.emit_byte(Instruction::Multiply as u8),
|
|
|
|
TokenKind::Slash => self.emit_byte(Instruction::Divide as u8),
|
2024-09-07 03:30:43 +00:00
|
|
|
_ => {
|
|
|
|
return Err(ParseError::ExpectedTokenMultiple {
|
2024-09-07 08:34:03 +00:00
|
|
|
expected: vec![
|
|
|
|
TokenKind::Plus,
|
|
|
|
TokenKind::Minus,
|
|
|
|
TokenKind::Star,
|
|
|
|
TokenKind::Slash,
|
|
|
|
],
|
|
|
|
found: self.current_token_owned(),
|
2024-09-07 03:30:43 +00:00
|
|
|
position: self.current_position,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2024-09-07 08:34:03 +00:00
|
|
|
fn parse_expression(&mut self) -> Result<(), ParseError> {
|
|
|
|
self.parse(Precedence::Assignment)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Pratt parsing functions
|
|
|
|
|
|
|
|
fn parse(&mut self, precedence: Precedence) -> Result<(), ParseError> {
|
|
|
|
log::trace!("Parsing with precedence {precedence}");
|
|
|
|
|
|
|
|
self.advance()?;
|
|
|
|
|
|
|
|
let prefix_rule = ParseRule::from(&self.current_token_kind()).prefix;
|
|
|
|
|
|
|
|
if let Some(prefix) = prefix_rule {
|
|
|
|
log::trace!("Parsing {} as prefix", &self.current_token_owned());
|
|
|
|
|
|
|
|
prefix(self)?;
|
|
|
|
} else {
|
|
|
|
return Err(ParseError::ExpectedPrefix {
|
|
|
|
found: self.current_token_owned(),
|
|
|
|
position: self.current_position,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
while precedence <= ParseRule::from(&self.current_token_kind()).precedence {
|
|
|
|
self.advance()?;
|
|
|
|
|
|
|
|
let infix_rule = ParseRule::from(&self.current_token_kind()).infix;
|
|
|
|
|
|
|
|
if let Some(infix) = infix_rule {
|
|
|
|
log::trace!("Parsing {} as infix", self.current_token_owned());
|
|
|
|
|
|
|
|
infix(self)?;
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-07 03:30:43 +00:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-07 08:34:03 +00:00
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
|
|
|
pub enum Precedence {
|
|
|
|
None = 0,
|
|
|
|
Assignment = 1,
|
|
|
|
Conditional = 2,
|
|
|
|
LogicalOr = 3,
|
|
|
|
LogicalAnd = 4,
|
|
|
|
Equality = 5,
|
|
|
|
Comparison = 6,
|
|
|
|
Term = 7,
|
|
|
|
Factor = 8,
|
|
|
|
Unary = 9,
|
|
|
|
Call = 10,
|
|
|
|
Primary = 11,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Precedence {
|
|
|
|
fn from_byte(byte: u8) -> Self {
|
|
|
|
match byte {
|
|
|
|
0 => Self::None,
|
|
|
|
1 => Self::Assignment,
|
|
|
|
2 => Self::Conditional,
|
|
|
|
3 => Self::LogicalOr,
|
|
|
|
4 => Self::LogicalAnd,
|
|
|
|
5 => Self::Equality,
|
|
|
|
6 => Self::Comparison,
|
|
|
|
7 => Self::Term,
|
|
|
|
8 => Self::Factor,
|
|
|
|
9 => Self::Unary,
|
|
|
|
10 => Self::Call,
|
|
|
|
_ => Self::Primary,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn increment(&self) -> Self {
|
|
|
|
Self::from_byte(*self as u8 + 1)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn decrement(&self) -> Self {
|
|
|
|
Self::from_byte(*self as u8 - 1)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Display for Precedence {
|
|
|
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
|
|
|
write!(f, "{:?}", self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type ParserFunction<'a> = fn(&'_ mut Parser<'a>) -> Result<(), ParseError>;
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy)]
|
|
|
|
pub struct ParseRule<'a> {
|
|
|
|
pub prefix: Option<ParserFunction<'a>>,
|
|
|
|
pub infix: Option<ParserFunction<'a>>,
|
|
|
|
pub precedence: Precedence,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<&TokenKind> for ParseRule<'_> {
|
|
|
|
fn from(token_kind: &TokenKind) -> Self {
|
|
|
|
match token_kind {
|
|
|
|
TokenKind::Eof => ParseRule {
|
|
|
|
prefix: None,
|
|
|
|
infix: None,
|
|
|
|
precedence: Precedence::None,
|
|
|
|
},
|
|
|
|
TokenKind::Identifier => todo!(),
|
|
|
|
TokenKind::Boolean => todo!(),
|
|
|
|
TokenKind::Character => todo!(),
|
|
|
|
TokenKind::Float => todo!(),
|
|
|
|
TokenKind::Integer => ParseRule {
|
|
|
|
prefix: Some(Parser::parse_integer),
|
|
|
|
infix: None,
|
|
|
|
precedence: Precedence::None,
|
|
|
|
},
|
|
|
|
TokenKind::String => todo!(),
|
|
|
|
TokenKind::Async => todo!(),
|
|
|
|
TokenKind::Bool => todo!(),
|
|
|
|
TokenKind::Break => todo!(),
|
|
|
|
TokenKind::Else => todo!(),
|
|
|
|
TokenKind::FloatKeyword => todo!(),
|
|
|
|
TokenKind::If => todo!(),
|
|
|
|
TokenKind::Int => todo!(),
|
|
|
|
TokenKind::Let => todo!(),
|
|
|
|
TokenKind::Loop => todo!(),
|
|
|
|
TokenKind::Map => todo!(),
|
|
|
|
TokenKind::Str => todo!(),
|
|
|
|
TokenKind::While => todo!(),
|
|
|
|
TokenKind::BangEqual => todo!(),
|
|
|
|
TokenKind::Bang => todo!(),
|
|
|
|
TokenKind::Colon => todo!(),
|
|
|
|
TokenKind::Comma => todo!(),
|
|
|
|
TokenKind::Dot => todo!(),
|
|
|
|
TokenKind::DoubleAmpersand => todo!(),
|
|
|
|
TokenKind::DoubleDot => todo!(),
|
|
|
|
TokenKind::DoubleEqual => todo!(),
|
|
|
|
TokenKind::DoublePipe => todo!(),
|
|
|
|
TokenKind::Equal => todo!(),
|
|
|
|
TokenKind::Greater => todo!(),
|
|
|
|
TokenKind::GreaterOrEqual => todo!(),
|
|
|
|
TokenKind::LeftCurlyBrace => todo!(),
|
|
|
|
TokenKind::LeftParenthesis => ParseRule {
|
|
|
|
prefix: Some(Parser::parse_grouped),
|
|
|
|
infix: None,
|
|
|
|
precedence: Precedence::None,
|
|
|
|
},
|
|
|
|
TokenKind::LeftSquareBrace => todo!(),
|
|
|
|
TokenKind::Less => todo!(),
|
|
|
|
TokenKind::LessOrEqual => todo!(),
|
|
|
|
TokenKind::Minus => ParseRule {
|
|
|
|
prefix: Some(Parser::parse_unary),
|
|
|
|
infix: Some(Parser::parse_binary),
|
|
|
|
precedence: Precedence::Term,
|
|
|
|
},
|
|
|
|
TokenKind::MinusEqual => todo!(),
|
|
|
|
TokenKind::Mut => todo!(),
|
|
|
|
TokenKind::Percent => todo!(),
|
|
|
|
TokenKind::Plus => ParseRule {
|
|
|
|
prefix: None,
|
|
|
|
infix: Some(Parser::parse_binary),
|
|
|
|
precedence: Precedence::Term,
|
|
|
|
},
|
|
|
|
TokenKind::PlusEqual => todo!(),
|
|
|
|
TokenKind::RightCurlyBrace => todo!(),
|
|
|
|
TokenKind::RightParenthesis => todo!(),
|
|
|
|
TokenKind::RightSquareBrace => todo!(),
|
|
|
|
TokenKind::Semicolon => todo!(),
|
|
|
|
TokenKind::Star => ParseRule {
|
|
|
|
prefix: None,
|
|
|
|
infix: Some(Parser::parse_binary),
|
|
|
|
precedence: Precedence::Factor,
|
|
|
|
},
|
|
|
|
TokenKind::Struct => todo!(),
|
|
|
|
TokenKind::Slash => ParseRule {
|
|
|
|
prefix: None,
|
|
|
|
infix: Some(Parser::parse_binary),
|
|
|
|
precedence: Precedence::Factor,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-07 03:30:43 +00:00
|
|
|
#[derive(Debug, PartialEq)]
|
|
|
|
pub enum ParseError {
|
2024-09-07 08:34:03 +00:00
|
|
|
ExpectedPrefix {
|
|
|
|
found: TokenOwned,
|
|
|
|
position: Span,
|
|
|
|
},
|
2024-09-07 03:30:43 +00:00
|
|
|
ExpectedToken {
|
|
|
|
expected: TokenKind,
|
|
|
|
found: TokenOwned,
|
|
|
|
position: Span,
|
|
|
|
},
|
|
|
|
ExpectedTokenMultiple {
|
|
|
|
expected: Vec<TokenKind>,
|
|
|
|
found: TokenOwned,
|
|
|
|
position: Span,
|
|
|
|
},
|
|
|
|
|
|
|
|
// Wrappers around foreign errors
|
|
|
|
Chunk(ChunkError),
|
|
|
|
Lex(LexError),
|
|
|
|
ParseIntError(ParseIntError),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<ParseIntError> for ParseError {
|
|
|
|
fn from(error: ParseIntError) -> Self {
|
|
|
|
Self::ParseIntError(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<LexError> for ParseError {
|
|
|
|
fn from(error: LexError) -> Self {
|
|
|
|
Self::Lex(error)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<ChunkError> for ParseError {
|
|
|
|
fn from(error: ChunkError) -> Self {
|
|
|
|
Self::Chunk(error)
|
|
|
|
}
|
|
|
|
}
|
2024-09-07 08:34:03 +00:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn parse_integer() {
|
|
|
|
let source = "42";
|
|
|
|
let test_chunk = parse(source);
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
test_chunk,
|
|
|
|
Ok(Chunk::with_data(
|
|
|
|
vec![(Instruction::Constant as u8, Span(0, 2)), (0, Span(0, 2))],
|
|
|
|
vec![Value::integer(42)]
|
|
|
|
))
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn parse_addition() {
|
|
|
|
env_logger::builder().is_test(true).try_init().unwrap();
|
|
|
|
|
|
|
|
let source = "42 + 42";
|
|
|
|
let test_chunk = parse(source);
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
test_chunk,
|
|
|
|
Ok(Chunk::with_data(
|
|
|
|
vec![
|
|
|
|
(Instruction::Constant as u8, Span(0, 2)),
|
|
|
|
(0, Span(0, 2)),
|
|
|
|
(Instruction::Constant as u8, Span(5, 7)),
|
|
|
|
(1, Span(5, 7)),
|
|
|
|
(Instruction::Add as u8, Span(3, 4)),
|
|
|
|
],
|
|
|
|
vec![Value::integer(42), Value::integer(42)]
|
|
|
|
))
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|