1
0
dust/dust-lang/src/parser.rs

705 lines
20 KiB
Rust
Raw Normal View History

use std::{
fmt::{self, Display, Formatter},
2024-09-07 16:15:47 +00:00
mem,
num::ParseIntError,
};
2024-09-07 03:30:43 +00:00
use crate::{
2024-09-07 16:15:47 +00:00
Chunk, ChunkError, DustError, Identifier, Instruction, LexError, Lexer, Span, Token, TokenKind,
TokenOwned, Value,
2024-09-07 03:30:43 +00:00
};
2024-09-07 16:15:47 +00:00
pub fn parse(source: &str) -> Result<Chunk, DustError> {
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer);
while !parser.is_eof() {
2024-09-07 16:15:47 +00:00
parser
.parse_statement()
.map_err(|error| DustError::Parse { error, source })?;
}
Ok(parser.chunk)
}
2024-09-07 03:30:43 +00:00
#[derive(Debug)]
pub struct Parser<'src> {
lexer: Lexer<'src>,
chunk: Chunk,
2024-09-07 10:38:12 +00:00
previous_token: Token<'src>,
previous_position: Span,
current_token: Token<'src>,
2024-09-07 03:30:43 +00:00
current_position: Span,
}
impl<'src> Parser<'src> {
2024-09-07 10:38:12 +00:00
pub fn new(mut lexer: Lexer<'src>) -> Self {
let (current_token, current_position) =
lexer.next_token().unwrap_or((Token::Eof, Span(0, 0)));
2024-09-07 16:15:47 +00:00
log::trace!("Starting parser with token {current_token} at {current_position}");
2024-09-07 03:30:43 +00:00
Parser {
lexer,
chunk: Chunk::new(),
2024-09-07 10:38:12 +00:00
previous_token: Token::Eof,
previous_position: Span(0, 0),
current_token,
current_position,
2024-09-07 03:30:43 +00:00
}
}
fn is_eof(&self) -> bool {
2024-09-07 10:38:12 +00:00
matches!(self.current_token, Token::Eof)
2024-09-07 03:30:43 +00:00
}
fn advance(&mut self) -> Result<(), ParseError> {
2024-09-07 10:38:12 +00:00
let (new_token, position) = self.lexer.next_token()?;
2024-09-07 03:30:43 +00:00
2024-09-07 10:38:12 +00:00
log::trace!("Advancing to token {new_token} at {position}");
2024-09-07 10:38:12 +00:00
self.previous_token = mem::replace(&mut self.current_token, new_token);
self.previous_position = mem::replace(&mut self.current_position, position);
2024-09-07 03:30:43 +00:00
Ok(())
}
2024-09-07 16:15:47 +00:00
fn allow(&mut self, allowed: TokenKind) -> Result<bool, ParseError> {
if self.current_token.kind() == allowed {
self.advance()?;
Ok(true)
} else {
Ok(false)
}
}
fn expect(&mut self, expected: TokenKind) -> Result<(), ParseError> {
2024-09-07 10:38:12 +00:00
if self.current_token.kind() == expected {
2024-09-07 03:30:43 +00:00
self.advance()
} else {
Err(ParseError::ExpectedToken {
expected,
2024-09-07 10:38:12 +00:00
found: self.current_token.to_owned(),
2024-09-07 03:30:43 +00:00
position: self.current_position,
})
}
}
2024-09-07 08:37:38 +00:00
fn emit_byte(&mut self, byte: u8, position: Span) {
self.chunk.write(byte, position);
2024-09-07 03:30:43 +00:00
}
fn emit_constant(&mut self, value: Value) -> Result<(), ParseError> {
let constant_index = self.chunk.push_constant(value)?;
2024-09-07 10:38:12 +00:00
let position = self.previous_position;
2024-09-07 08:37:38 +00:00
self.emit_byte(Instruction::Constant as u8, position);
self.emit_byte(constant_index, position);
2024-09-07 03:30:43 +00:00
Ok(())
}
2024-09-07 10:38:12 +00:00
fn parse_boolean(&mut self) -> Result<(), ParseError> {
if let Token::Boolean(text) = self.previous_token {
let boolean = text.parse::<bool>().unwrap();
let value = Value::boolean(boolean);
self.emit_constant(value)?;
}
Ok(())
}
2024-09-07 16:15:47 +00:00
fn parse_float(&mut self) -> Result<(), ParseError> {
if let Token::Float(text) = self.previous_token {
let float = text.parse::<f64>().unwrap();
let value = Value::float(float);
self.emit_constant(value)?;
}
Ok(())
}
fn parse_integer(&mut self) -> Result<(), ParseError> {
2024-09-07 10:38:12 +00:00
if let Token::Integer(text) = self.previous_token {
let integer = text.parse::<i64>().unwrap();
let value = Value::integer(integer);
2024-09-07 03:30:43 +00:00
self.emit_constant(value)?;
}
Ok(())
}
2024-09-07 16:15:47 +00:00
fn parse_string(&mut self) -> Result<(), ParseError> {
if let Token::String(text) = self.previous_token {
let value = Value::string(text);
self.emit_constant(value)?;
}
Ok(())
}
fn parse_grouped(&mut self) -> Result<(), ParseError> {
self.parse_expression()?;
2024-09-07 16:15:47 +00:00
self.expect(TokenKind::RightParenthesis)
}
fn parse_unary(&mut self) -> Result<(), ParseError> {
2024-09-07 16:15:47 +00:00
let operator_position = self.previous_position;
2024-09-07 10:38:12 +00:00
let byte = match self.previous_token.kind() {
TokenKind::Minus => Instruction::Negate as u8,
_ => {
return Err(ParseError::ExpectedTokenMultiple {
expected: vec![TokenKind::Minus],
found: self.previous_token.to_owned(),
2024-09-07 16:15:47 +00:00
position: operator_position,
2024-09-07 10:38:12 +00:00
})
}
};
2024-09-07 08:37:38 +00:00
2024-09-07 10:38:12 +00:00
self.parse_expression()?;
2024-09-07 16:15:47 +00:00
self.emit_byte(byte, operator_position);
Ok(())
}
fn parse_binary(&mut self) -> Result<(), ParseError> {
2024-09-07 10:38:12 +00:00
let operator_position = self.previous_position;
let operator = self.previous_token.kind();
let rule = ParseRule::from(&operator);
self.parse(rule.precedence.increment())?;
2024-09-07 08:37:38 +00:00
let byte = match operator {
TokenKind::Plus => Instruction::Add as u8,
TokenKind::Minus => Instruction::Subtract as u8,
TokenKind::Star => Instruction::Multiply as u8,
TokenKind::Slash => Instruction::Divide as u8,
2024-09-07 03:30:43 +00:00
_ => {
return Err(ParseError::ExpectedTokenMultiple {
expected: vec![
TokenKind::Plus,
TokenKind::Minus,
TokenKind::Star,
TokenKind::Slash,
],
2024-09-07 10:38:12 +00:00
found: self.previous_token.to_owned(),
position: operator_position,
2024-09-07 03:30:43 +00:00
})
}
2024-09-07 08:37:38 +00:00
};
self.emit_byte(byte, operator_position);
2024-09-07 03:30:43 +00:00
Ok(())
}
2024-09-07 16:15:47 +00:00
fn parse_variable(&mut self) -> Result<(), ParseError> {
todo!()
}
fn parse_identifier(&mut self) -> Result<u8, ParseError> {
if let Token::Identifier(text) = self.current_token {
self.advance()?;
let identifier = Identifier::new(text);
let identifier_index = self.chunk.push_identifier(identifier)?;
Ok(identifier_index)
} else {
Err(ParseError::ExpectedToken {
expected: TokenKind::Identifier,
found: self.current_token.to_owned(),
position: self.current_position,
})
}
}
fn parse_expression(&mut self) -> Result<(), ParseError> {
2024-09-07 10:38:12 +00:00
self.parse(Precedence::None)
}
2024-09-07 16:15:47 +00:00
fn parse_statement(&mut self) -> Result<(), ParseError> {
match self.current_token {
Token::Let => self.parse_let_assignment()?,
_ => self.parse_expression_statement()?,
}
Ok(())
}
fn parse_expression_statement(&mut self) -> Result<(), ParseError> {
let start = self.current_position.0;
self.parse_expression()?;
if self.allow(TokenKind::Semicolon)? {
let end = self.previous_position.1;
self.emit_byte(Instruction::Pop as u8, Span(start, end));
}
Ok(())
}
fn parse_let_assignment(&mut self) -> Result<(), ParseError> {
self.expect(TokenKind::Let)?;
let position = self.current_position;
let identifier_index = self.parse_identifier()?;
self.expect(TokenKind::Equal)?;
self.parse_expression()?;
self.expect(TokenKind::Semicolon)?;
self.define_variable(identifier_index, position)
}
fn define_variable(&mut self, identifier_index: u8, position: Span) -> Result<(), ParseError> {
self.emit_byte(Instruction::DefineGlobal as u8, position);
self.emit_byte(identifier_index, position);
Ok(())
}
fn parse(&mut self, precedence: Precedence) -> Result<(), ParseError> {
self.advance()?;
2024-09-07 10:38:12 +00:00
if let Some(prefix) = ParseRule::from(&self.previous_token.kind()).prefix {
log::trace!(
"Parsing {} as prefix with precedence {precedence}",
self.previous_token,
);
prefix(self)?;
} else {
2024-09-07 10:38:12 +00:00
return Err(ParseError::ExpectedExpression {
found: self.previous_token.to_owned(),
position: self.previous_position,
});
}
2024-09-07 16:15:47 +00:00
while precedence < ParseRule::from(&self.current_token.kind()).precedence {
self.advance()?;
2024-09-07 10:38:12 +00:00
let infix_rule = ParseRule::from(&self.previous_token.kind()).infix;
if let Some(infix) = infix_rule {
2024-09-07 10:38:12 +00:00
log::trace!(
"Parsing {} as infix with precedence {precedence}",
self.previous_token,
);
infix(self)?;
} else {
break;
}
}
2024-09-07 03:30:43 +00:00
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Precedence {
None = 0,
Assignment = 1,
Conditional = 2,
LogicalOr = 3,
LogicalAnd = 4,
Equality = 5,
Comparison = 6,
Term = 7,
Factor = 8,
Unary = 9,
Call = 10,
Primary = 11,
}
impl Precedence {
fn from_byte(byte: u8) -> Self {
match byte {
0 => Self::None,
1 => Self::Assignment,
2 => Self::Conditional,
3 => Self::LogicalOr,
4 => Self::LogicalAnd,
5 => Self::Equality,
6 => Self::Comparison,
7 => Self::Term,
8 => Self::Factor,
9 => Self::Unary,
10 => Self::Call,
_ => Self::Primary,
}
}
fn increment(&self) -> Self {
Self::from_byte(*self as u8 + 1)
}
}
impl Display for Precedence {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
2024-09-07 10:38:12 +00:00
type ParserFunction<'a> = fn(&mut Parser<'a>) -> Result<(), ParseError>;
#[derive(Debug, Clone, Copy)]
pub struct ParseRule<'a> {
pub prefix: Option<ParserFunction<'a>>,
pub infix: Option<ParserFunction<'a>>,
pub precedence: Precedence,
}
impl From<&TokenKind> for ParseRule<'_> {
fn from(token_kind: &TokenKind) -> Self {
match token_kind {
TokenKind::Eof => ParseRule {
prefix: None,
infix: None,
precedence: Precedence::None,
},
2024-09-07 16:15:47 +00:00
TokenKind::Identifier => ParseRule {
prefix: Some(Parser::parse_variable),
infix: None,
precedence: Precedence::None,
},
2024-09-07 10:38:12 +00:00
TokenKind::Boolean => ParseRule {
prefix: Some(Parser::parse_boolean),
infix: None,
precedence: Precedence::None,
},
TokenKind::Character => todo!(),
2024-09-07 16:15:47 +00:00
TokenKind::Float => ParseRule {
prefix: Some(Parser::parse_float),
infix: None,
precedence: Precedence::None,
},
TokenKind::Integer => ParseRule {
prefix: Some(Parser::parse_integer),
infix: None,
precedence: Precedence::None,
},
2024-09-07 16:15:47 +00:00
TokenKind::String => ParseRule {
prefix: Some(Parser::parse_string),
infix: None,
precedence: Precedence::None,
},
TokenKind::Async => todo!(),
TokenKind::Bool => todo!(),
TokenKind::Break => todo!(),
TokenKind::Else => todo!(),
TokenKind::FloatKeyword => todo!(),
TokenKind::If => todo!(),
TokenKind::Int => todo!(),
TokenKind::Let => todo!(),
TokenKind::Loop => todo!(),
TokenKind::Map => todo!(),
TokenKind::Str => todo!(),
TokenKind::While => todo!(),
TokenKind::BangEqual => todo!(),
TokenKind::Bang => todo!(),
TokenKind::Colon => todo!(),
TokenKind::Comma => todo!(),
TokenKind::Dot => todo!(),
TokenKind::DoubleAmpersand => todo!(),
TokenKind::DoubleDot => todo!(),
TokenKind::DoubleEqual => todo!(),
TokenKind::DoublePipe => todo!(),
TokenKind::Equal => todo!(),
TokenKind::Greater => todo!(),
TokenKind::GreaterOrEqual => todo!(),
TokenKind::LeftCurlyBrace => todo!(),
TokenKind::LeftParenthesis => ParseRule {
prefix: Some(Parser::parse_grouped),
infix: None,
precedence: Precedence::None,
},
TokenKind::LeftSquareBrace => todo!(),
TokenKind::Less => todo!(),
TokenKind::LessOrEqual => todo!(),
TokenKind::Minus => ParseRule {
prefix: Some(Parser::parse_unary),
infix: Some(Parser::parse_binary),
precedence: Precedence::Term,
},
TokenKind::MinusEqual => todo!(),
TokenKind::Mut => todo!(),
TokenKind::Percent => todo!(),
TokenKind::Plus => ParseRule {
prefix: None,
infix: Some(Parser::parse_binary),
precedence: Precedence::Term,
},
TokenKind::PlusEqual => todo!(),
TokenKind::RightCurlyBrace => todo!(),
2024-09-07 10:38:12 +00:00
TokenKind::RightParenthesis => ParseRule {
prefix: None,
infix: None,
precedence: Precedence::None,
},
TokenKind::RightSquareBrace => todo!(),
2024-09-07 16:15:47 +00:00
TokenKind::Semicolon => ParseRule {
prefix: None,
infix: None,
precedence: Precedence::None,
},
TokenKind::Star => ParseRule {
prefix: None,
infix: Some(Parser::parse_binary),
precedence: Precedence::Factor,
},
TokenKind::Struct => todo!(),
TokenKind::Slash => ParseRule {
prefix: None,
infix: Some(Parser::parse_binary),
precedence: Precedence::Factor,
},
}
}
}
2024-09-07 03:30:43 +00:00
#[derive(Debug, PartialEq)]
pub enum ParseError {
2024-09-07 10:38:12 +00:00
ExpectedExpression {
found: TokenOwned,
position: Span,
},
2024-09-07 03:30:43 +00:00
ExpectedToken {
expected: TokenKind,
found: TokenOwned,
position: Span,
},
ExpectedTokenMultiple {
expected: Vec<TokenKind>,
found: TokenOwned,
position: Span,
},
// Wrappers around foreign errors
Chunk(ChunkError),
Lex(LexError),
ParseIntError(ParseIntError),
}
impl From<ParseIntError> for ParseError {
fn from(error: ParseIntError) -> Self {
Self::ParseIntError(error)
}
}
impl From<LexError> for ParseError {
fn from(error: LexError) -> Self {
Self::Lex(error)
}
}
impl From<ChunkError> for ParseError {
fn from(error: ChunkError) -> Self {
Self::Chunk(error)
}
}
#[cfg(test)]
mod tests {
use super::*;
2024-09-07 16:15:47 +00:00
#[test]
fn let_statement() {
let source = "let x = 42;";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(8, 10)),
(0, Span(8, 10)),
(Instruction::DefineGlobal as u8, Span(4, 5)),
(0, Span(4, 5))
],
vec![Value::integer(42)],
vec![Identifier::new("x")]
))
);
}
#[test]
fn string() {
let source = "\"Hello, World!\"";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![(Instruction::Constant as u8, Span(0, 15)), (0, Span(0, 15))],
vec![Value::string("Hello, World!")],
vec![]
))
);
}
#[test]
2024-09-07 10:38:12 +00:00
fn integer() {
let source = "42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![(Instruction::Constant as u8, Span(0, 2)), (0, Span(0, 2))],
2024-09-07 16:15:47 +00:00
vec![Value::integer(42)],
vec![]
))
);
}
#[test]
2024-09-07 10:38:12 +00:00
fn boolean() {
let source = "true";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![(Instruction::Constant as u8, Span(0, 4)), (0, Span(0, 4))],
2024-09-07 16:15:47 +00:00
vec![Value::boolean(true)],
vec![]
2024-09-07 10:38:12 +00:00
))
);
}
#[test]
fn grouping() {
let source = "(42 + 42) * 2";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(1, 3)),
(0, Span(1, 3)),
(Instruction::Constant as u8, Span(6, 8)),
(1, Span(6, 8)),
(Instruction::Add as u8, Span(4, 5)),
2024-09-07 16:15:47 +00:00
(Instruction::Constant as u8, Span(12, 13)),
(2, Span(12, 13)),
(Instruction::Multiply as u8, Span(10, 11)),
],
vec![Value::integer(42), Value::integer(42), Value::integer(2)],
vec![]
))
);
}
#[test]
fn negation() {
let source = "-(42)";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(2, 4)),
(0, Span(2, 4)),
(Instruction::Negate as u8, Span(0, 1)),
2024-09-07 10:38:12 +00:00
],
2024-09-07 16:15:47 +00:00
vec![Value::integer(42)],
vec![]
2024-09-07 10:38:12 +00:00
))
);
}
#[test]
fn addition() {
let source = "42 + 42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(0, 2)),
(0, Span(0, 2)),
(Instruction::Constant as u8, Span(5, 7)),
(1, Span(5, 7)),
(Instruction::Add as u8, Span(3, 4)),
],
2024-09-07 16:15:47 +00:00
vec![Value::integer(42), Value::integer(42)],
vec![]
))
);
}
2024-09-07 10:38:12 +00:00
#[test]
fn subtraction() {
let source = "42 - 42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(0, 2)),
(0, Span(0, 2)),
(Instruction::Constant as u8, Span(5, 7)),
(1, Span(5, 7)),
(Instruction::Subtract as u8, Span(3, 4)),
],
2024-09-07 16:15:47 +00:00
vec![Value::integer(42), Value::integer(42)],
vec![]
2024-09-07 10:38:12 +00:00
))
);
}
#[test]
fn multiplication() {
let source = "42 * 42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(0, 2)),
(0, Span(0, 2)),
(Instruction::Constant as u8, Span(5, 7)),
(1, Span(5, 7)),
(Instruction::Multiply as u8, Span(3, 4)),
],
2024-09-07 16:15:47 +00:00
vec![Value::integer(42), Value::integer(42)],
vec![]
2024-09-07 10:38:12 +00:00
))
);
}
#[test]
fn division() {
let source = "42 / 42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(0, 2)),
(0, Span(0, 2)),
(Instruction::Constant as u8, Span(5, 7)),
(1, Span(5, 7)),
(Instruction::Divide as u8, Span(3, 4)),
],
2024-09-07 16:15:47 +00:00
vec![Value::integer(42), Value::integer(42)],
vec![]
2024-09-07 10:38:12 +00:00
))
);
}
}