dust/dust-lang/src/parse.rs

555 lines
17 KiB
Rust
Raw Normal View History

2024-08-07 16:32:18 +00:00
/// Parsing tools.
///
/// This module provides two parsing options:
/// - `parse` convenience function
/// - `Parser` struct, which parses the input a statement at a time
2024-08-05 04:40:51 +00:00
use std::collections::VecDeque;
2024-08-07 15:38:08 +00:00
use crate::{AbstractSyntaxTree, LexError, Lexer, Node, Span, Statement, Token, Value};
2024-08-04 00:23:52 +00:00
2024-08-07 16:32:18 +00:00
/// Parses the input into an abstract syntax tree.
///
/// # Examples
/// ```
/// # use dust_lang::*;
/// let input = "x = 42";
/// let result = parse(input);
///
/// assert_eq!(
/// result,
/// Ok(AbstractSyntaxTree {
/// nodes: [
/// Node {
/// statement: Statement::Assign(
/// Box::new(Node {
/// statement: Statement::Identifier("x".into()),
/// span: (0, 1),
/// }),
/// Box::new(Node {
/// statement: Statement::Constant(Value::integer(42)),
/// span: (4, 6),
/// })
/// ),
/// span: (0, 6),
/// }
/// ].into(),
/// }),
/// );
/// ```
2024-08-07 15:38:08 +00:00
pub fn parse(input: &str) -> Result<AbstractSyntaxTree, ParseError> {
2024-08-04 23:25:44 +00:00
let lexer = Lexer::new(input);
let mut parser = Parser::new(lexer);
2024-08-05 04:40:51 +00:00
let mut nodes = VecDeque::new();
2024-08-04 23:25:44 +00:00
2024-08-05 02:15:31 +00:00
loop {
2024-08-05 04:40:51 +00:00
let node = parser.parse()?;
2024-08-04 23:25:44 +00:00
2024-08-05 04:40:51 +00:00
nodes.push_back(node);
2024-08-05 00:08:43 +00:00
2024-08-05 02:15:31 +00:00
if let Token::Eof = parser.current.0 {
break;
}
2024-08-05 00:08:43 +00:00
}
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree { nodes })
2024-08-04 00:23:52 +00:00
}
2024-08-07 16:32:18 +00:00
/// Low-level tool for parsing the input a statement at a time.
///
/// # Examples
/// ```
/// # use std::collections::VecDeque;
/// # use dust_lang::*;
/// let input = "x = 42";
/// let lexer = Lexer::new(input);
/// let mut parser = Parser::new(lexer);
/// let mut nodes = VecDeque::new();
///
/// loop {
/// let node = parser.parse().unwrap();
///
/// nodes.push_back(node);
///
/// if let Token::Eof = parser.current().0 {
/// break;
/// }
/// }
///
/// assert_eq!(
/// nodes,
/// Into::<VecDeque<Node>>::into([
/// Node {
/// statement: Statement::Assign(
/// Box::new(Node {
/// statement: Statement::Identifier("x".into()),
/// span: (0, 1),
/// }),
/// Box::new(Node {
/// statement: Statement::Constant(Value::integer(42)),
/// span: (4, 6),
/// })
/// ),
/// span: (0, 6),
/// }
/// ]),
/// );
/// ```
2024-08-04 00:23:52 +00:00
pub struct Parser<'src> {
lexer: Lexer<'src>,
current: (Token, Span),
}
impl<'src> Parser<'src> {
pub fn new(lexer: Lexer<'src>) -> Self {
let mut lexer = lexer;
2024-08-05 00:08:43 +00:00
let current = lexer.next_token().unwrap_or((Token::Eof, (0, 0)));
Parser { lexer, current }
2024-08-04 00:23:52 +00:00
}
2024-08-05 03:11:04 +00:00
pub fn parse(&mut self) -> Result<Node, ParseError> {
2024-08-05 04:40:51 +00:00
self.parse_node(0)
2024-08-04 00:23:52 +00:00
}
2024-08-07 16:32:18 +00:00
pub fn current(&self) -> &(Token, Span) {
&self.current
}
2024-08-04 00:23:52 +00:00
fn next_token(&mut self) -> Result<(), ParseError> {
self.current = self.lexer.next_token()?;
Ok(())
}
2024-08-05 04:40:51 +00:00
fn parse_node(&mut self, precedence: u8) -> Result<Node, ParseError> {
let left_node = self.parse_primary()?;
let left_start = left_node.span.0;
2024-08-04 00:23:52 +00:00
if precedence < self.current_precedence() {
match &self.current {
(Token::Plus, _) => {
self.next_token()?;
2024-08-05 04:40:51 +00:00
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.span.1;
2024-08-04 00:23:52 +00:00
2024-08-05 03:11:04 +00:00
return Ok(Node::new(
2024-08-05 04:40:51 +00:00
Statement::Add(Box::new(left_node), Box::new(right_node)),
2024-08-05 00:08:43 +00:00
(left_start, right_end),
2024-08-04 00:23:52 +00:00
));
}
(Token::Star, _) => {
self.next_token()?;
2024-08-05 04:40:51 +00:00
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.span.1;
2024-08-04 00:23:52 +00:00
2024-08-05 03:11:04 +00:00
return Ok(Node::new(
2024-08-05 04:40:51 +00:00
Statement::Multiply(Box::new(left_node), Box::new(right_node)),
2024-08-05 00:08:43 +00:00
(left_start, right_end),
2024-08-04 00:23:52 +00:00
));
}
(Token::Equal, _) => {
self.next_token()?;
2024-08-05 04:40:51 +00:00
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.span.1;
2024-08-04 00:23:52 +00:00
2024-08-05 03:11:04 +00:00
return Ok(Node::new(
2024-08-05 04:40:51 +00:00
Statement::Assign(Box::new(left_node), Box::new(right_node)),
2024-08-05 00:08:43 +00:00
(left_start, right_end),
2024-08-04 00:23:52 +00:00
));
}
2024-08-05 18:31:08 +00:00
(Token::Dot, _) => {
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.span.1;
return Ok(Node::new(
Statement::PropertyAccess(Box::new(left_node), Box::new(right_node)),
(left_start, right_end),
));
}
2024-08-04 00:23:52 +00:00
_ => {}
}
}
2024-08-05 04:40:51 +00:00
Ok(left_node)
2024-08-04 00:23:52 +00:00
}
2024-08-05 03:11:04 +00:00
fn parse_primary(&mut self) -> Result<Node, ParseError> {
2024-08-04 00:23:52 +00:00
match self.current.clone() {
(Token::Boolean(boolean), span) => {
self.next_token()?;
Ok(Node::new(
Statement::Constant(Value::boolean(boolean)),
span,
))
}
2024-08-05 00:08:43 +00:00
(Token::Float(float), span) => {
2024-08-04 00:23:52 +00:00
self.next_token()?;
2024-08-05 00:08:43 +00:00
2024-08-05 03:11:04 +00:00
Ok(Node::new(Statement::Constant(Value::float(float)), span))
2024-08-05 00:08:43 +00:00
}
(Token::Integer(int), span) => {
self.next_token()?;
2024-08-05 03:11:04 +00:00
Ok(Node::new(Statement::Constant(Value::integer(int)), span))
2024-08-04 00:23:52 +00:00
}
(Token::Identifier(identifier), span) => {
self.next_token()?;
2024-08-05 00:08:43 +00:00
2024-08-05 03:11:04 +00:00
Ok(Node::new(Statement::Identifier(identifier), span))
2024-08-04 00:23:52 +00:00
}
(Token::LeftParenthesis, left_span) => {
self.next_token()?;
2024-08-05 04:40:51 +00:00
let instruction = self.parse_node(0)?;
2024-08-04 00:23:52 +00:00
if let (Token::RightParenthesis, right_span) = self.current {
self.next_token()?;
2024-08-05 03:11:04 +00:00
Ok(Node::new(
2024-08-05 04:40:51 +00:00
instruction.statement,
2024-08-05 00:08:43 +00:00
(left_span.0, right_span.1),
))
2024-08-04 00:23:52 +00:00
} else {
2024-08-05 01:31:18 +00:00
Err(ParseError::ExpectedClosingParenthesis {
actual: self.current.0.clone(),
span: self.current.1,
})
}
}
(Token::LeftSquareBrace, left_span) => {
self.next_token()?;
let mut instructions = Vec::new();
loop {
if let (Token::RightSquareBrace, right_span) = self.current {
self.next_token()?;
2024-08-05 03:11:04 +00:00
return Ok(Node::new(
Statement::List(instructions),
2024-08-05 01:31:18 +00:00
(left_span.0, right_span.1),
));
}
if let (Token::Comma, _) = self.current {
self.next_token()?;
continue;
}
2024-08-05 04:40:51 +00:00
if let Ok(instruction) = self.parse_node(0) {
2024-08-05 01:31:18 +00:00
instructions.push(instruction);
} else {
return Err(ParseError::ExpectedClosingSquareBrace {
actual: self.current.0.clone(),
span: self.current.1,
});
}
2024-08-04 00:23:52 +00:00
}
}
(Token::ReservedIdentifier(reserved), _) => {
self.next_token()?;
Ok(Node::new(
Statement::ReservedIdentifier(reserved),
self.current.1,
))
}
2024-08-04 00:23:52 +00:00
_ => Err(ParseError::UnexpectedToken(self.current.0.clone())),
}
}
fn current_precedence(&self) -> u8 {
2024-08-05 01:31:18 +00:00
match self.current.0 {
2024-08-05 18:31:08 +00:00
Token::Dot => 4,
2024-08-05 01:31:18 +00:00
Token::Equal => 3,
Token::Plus => 1,
Token::Star => 2,
2024-08-04 00:23:52 +00:00
_ => 0,
}
}
}
2024-08-04 23:25:44 +00:00
#[derive(Debug, PartialEq, Clone)]
pub enum ParseError {
2024-08-05 01:31:18 +00:00
ExpectedClosingParenthesis { actual: Token, span: Span },
ExpectedClosingSquareBrace { actual: Token, span: Span },
2024-08-04 23:25:44 +00:00
LexError(LexError),
UnexpectedToken(Token),
}
impl From<LexError> for ParseError {
fn from(v: LexError) -> Self {
Self::LexError(v)
}
}
2024-08-04 00:23:52 +00:00
#[cfg(test)]
mod tests {
2024-08-05 02:15:31 +00:00
use crate::Identifier;
2024-08-04 00:23:52 +00:00
2024-08-04 23:25:44 +00:00
use super::*;
2024-08-04 00:23:52 +00:00
#[test]
fn boolean() {
let input = "true";
assert_eq!(
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(Statement::Constant(Value::boolean(true)), (0, 4))].into()
})
);
}
2024-08-05 18:58:58 +00:00
#[test]
fn list_access() {
let input = "[1, 2, 3].0";
assert_eq!(
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::PropertyAccess(
Box::new(Node::new(
Statement::List(vec![
Node::new(Statement::Constant(Value::integer(1)), (1, 2)),
Node::new(Statement::Constant(Value::integer(2)), (4, 5)),
Node::new(Statement::Constant(Value::integer(3)), (7, 8)),
]),
(0, 9)
)),
Box::new(Node::new(Statement::Constant(Value::integer(0)), (10, 11))),
),
(0, 11),
)]
.into()
})
2024-08-05 18:58:58 +00:00
);
}
2024-08-05 18:31:08 +00:00
#[test]
fn property_access() {
let input = "a.b";
assert_eq!(
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::PropertyAccess(
Box::new(Node::new(
Statement::Identifier(Identifier::new("a")),
(0, 1)
)),
Box::new(Node::new(
Statement::Identifier(Identifier::new("b")),
(2, 3)
)),
),
(0, 3),
)]
.into()
})
2024-08-05 18:31:08 +00:00
);
}
2024-08-05 01:39:57 +00:00
#[test]
fn complex_list() {
let input = "[1, 1 + 1, 2 + (4 * 10)]";
assert_eq!(
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::List(vec![
Node::new(Statement::Constant(Value::integer(1)), (1, 2)),
Node::new(
Statement::Add(
Box::new(Node::new(Statement::Constant(Value::integer(1)), (4, 5))),
Box::new(Node::new(Statement::Constant(Value::integer(1)), (8, 9))),
),
(4, 9),
2024-08-05 04:40:51 +00:00
),
2024-08-07 15:38:08 +00:00
Node::new(
Statement::Add(
Box::new(Node::new(
Statement::Constant(Value::integer(2)),
(11, 12)
)),
Box::new(Node::new(
Statement::Multiply(
Box::new(Node::new(
Statement::Constant(Value::integer(4)),
(16, 17)
)),
Box::new(Node::new(
Statement::Constant(Value::integer(10)),
(20, 22)
)),
),
(15, 23),
),),
),
(11, 23),
2024-08-05 04:40:51 +00:00
),
2024-08-07 15:38:08 +00:00
]),
(0, 24),
)]
.into()
})
2024-08-05 01:39:57 +00:00
);
}
2024-08-05 01:31:18 +00:00
#[test]
fn list() {
let input = "[1, 2]";
assert_eq!(
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::List(vec![
Node::new(Statement::Constant(Value::integer(1)), (1, 2)),
Node::new(Statement::Constant(Value::integer(2)), (4, 5)),
]),
(0, 6),
)]
.into()
})
2024-08-05 01:31:18 +00:00
);
}
#[test]
fn empty_list() {
let input = "[]";
assert_eq!(
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(Statement::List(vec![]), (0, 2))].into()
})
2024-08-05 01:31:18 +00:00
);
}
2024-08-05 00:08:43 +00:00
#[test]
fn float() {
let input = "42.0";
assert_eq!(
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(Statement::Constant(Value::float(42.0)), (0, 4))].into()
})
2024-08-05 00:08:43 +00:00
);
}
2024-08-04 00:23:52 +00:00
#[test]
fn add() {
let input = "1 + 2";
assert_eq!(
2024-08-04 23:25:44 +00:00
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Add(
Box::new(Node::new(Statement::Constant(Value::integer(1)), (0, 1))),
Box::new(Node::new(Statement::Constant(Value::integer(2)), (4, 5))),
),
(0, 5),
)]
.into()
})
2024-08-04 00:23:52 +00:00
);
}
#[test]
fn multiply() {
let input = "1 * 2";
assert_eq!(
2024-08-04 23:25:44 +00:00
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Multiply(
Box::new(Node::new(Statement::Constant(Value::integer(1)), (0, 1))),
Box::new(Node::new(Statement::Constant(Value::integer(2)), (4, 5))),
),
(0, 5),
)]
.into()
})
2024-08-04 00:23:52 +00:00
);
}
#[test]
fn add_and_multiply() {
let input = "1 + 2 * 3";
assert_eq!(
2024-08-04 23:25:44 +00:00
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Add(
Box::new(Node::new(Statement::Constant(Value::integer(1)), (0, 1))),
Box::new(Node::new(
Statement::Multiply(
Box::new(Node::new(Statement::Constant(Value::integer(2)), (4, 5))),
Box::new(Node::new(Statement::Constant(Value::integer(3)), (8, 9))),
),
(4, 9),
)),
),
(0, 9),
)]
.into()
})
2024-08-04 00:23:52 +00:00
);
}
#[test]
fn assignment() {
let input = "a = 1 + 2 * 3";
assert_eq!(
2024-08-04 23:25:44 +00:00
parse(input),
2024-08-07 15:38:08 +00:00
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Assign(
Box::new(Node::new(
Statement::Identifier(Identifier::new("a")),
(0, 1)
)),
Box::new(Node::new(
Statement::Add(
Box::new(Node::new(Statement::Constant(Value::integer(1)), (4, 5))),
Box::new(Node::new(
Statement::Multiply(
Box::new(Node::new(
Statement::Constant(Value::integer(2)),
(8, 9)
)),
Box::new(Node::new(
Statement::Constant(Value::integer(3)),
(12, 13)
)),
),
(8, 13),
)),
),
(4, 13),
)),
),
(0, 13),
)]
.into()
})
2024-08-04 00:23:52 +00:00
);
}
}