From 60f8aab805dae1abd6d11e6af932797baf43d8ec Mon Sep 17 00:00:00 2001 From: Jeff Date: Fri, 9 Aug 2024 14:01:01 -0400 Subject: [PATCH] Refactor pratt parser --- dust-lang/src/lex.rs | 156 ++++++++-------- dust-lang/src/parse.rs | 399 ++++++++++++++++------------------------- dust-lang/src/token.rs | 33 ++-- dust-lang/src/vm.rs | 10 ++ 4 files changed, 268 insertions(+), 330 deletions(-) diff --git a/dust-lang/src/lex.rs b/dust-lang/src/lex.rs index bfed026..1b7dc70 100644 --- a/dust-lang/src/lex.rs +++ b/dust-lang/src/lex.rs @@ -6,7 +6,6 @@ use std::{ error::Error, fmt::{self, Display, Formatter}, - num::{ParseFloatError, ParseIntError}, }; use crate::{Span, Token}; @@ -24,9 +23,9 @@ use crate::{Span, Token}; /// [ /// (Token::Identifier("x"), (0, 1)), /// (Token::Equal, (2, 3)), -/// (Token::Integer(1), (4, 5)), +/// (Token::Integer("1"), (4, 5)), /// (Token::Plus, (6, 7)), -/// (Token::Integer(2), (8, 9)), +/// (Token::Integer("2"), (8, 9)), /// (Token::Eof, (9, 9)), /// ] /// ); @@ -77,9 +76,9 @@ pub fn lex<'chars, 'src: 'chars>(input: &'src str) -> Result, /// [ /// (Token::Identifier("x"), (0, 1)), /// (Token::Equal, (2, 3)), -/// (Token::Integer(1), (4, 5)), +/// (Token::Integer("1"), (4, 5)), /// (Token::Plus, (6, 7)), -/// (Token::Integer(2), (8, 9)), +/// (Token::Integer("2"), (8, 9)), /// (Token::Eof, (9, 9)), /// ] /// ) @@ -110,7 +109,7 @@ impl Lexer { self.position += 9; ( - Token::Float(f64::NEG_INFINITY), + Token::Float("-Infinity"), (self.position - 9, self.position), ) } else { @@ -231,6 +230,17 @@ impl Lexer { (Token::Semicolon, (self.position - 1, self.position)) } + '|' => { + if let Some('|') = self.peek_second_char(source) { + self.position += 2; + + (Token::DoublePipe, (self.position - 2, self.position)) + } else { + self.position += 1; + + return Err(LexError::UnexpectedCharacter(c)); + } + } _ => { self.position += 1; @@ -244,6 +254,15 @@ impl Lexer { Ok((token, span)) } + /// Peek at the next token without consuming the source. + pub fn peek_token<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> { + let token = self.next_token(source)?; + + self.position -= token.0.as_str().len(); + + Ok(token) + } + /// Progress to the next character. fn next_char(&mut self, source: &str) -> Option { if let Some(c) = source[self.position..].chars().next() { @@ -335,14 +354,12 @@ impl Lexer { } } + let text = &source[start_pos..self.position]; + if is_float { - let float = source[start_pos..self.position].parse::()?; - - Ok((Token::Float(float), (start_pos, self.position))) + Ok((Token::Float(text), (start_pos, self.position))) } else { - let integer = source[start_pos..self.position].parse::()?; - - Ok((Token::Integer(integer), (start_pos, self.position))) + Ok((Token::Integer(text), (start_pos, self.position))) } } @@ -363,13 +380,13 @@ impl Lexer { let string = &source[start_pos..self.position]; let token = match string { - "true" => Token::Boolean(true), - "false" => Token::Boolean(false), - "Infinity" => Token::Float(f64::INFINITY), + "true" => Token::Boolean("true"), + "false" => Token::Boolean("false"), + "Infinity" => Token::Float("Infinity"), "is_even" => Token::IsEven, "is_odd" => Token::IsOdd, "length" => Token::Length, - "NaN" => Token::Float(f64::NAN), + "NaN" => Token::Float("NaN"), "read_line" => Token::ReadLine, "write_line" => Token::WriteLine, _ => Token::Identifier(string), @@ -410,16 +427,12 @@ impl Default for Lexer { #[derive(Debug, PartialEq, Clone)] pub enum LexError { - FloatError(ParseFloatError), - IntegerError(ParseIntError), UnexpectedCharacter(char), } impl Error for LexError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { - Self::FloatError(parse_float_error) => Some(parse_float_error), - Self::IntegerError(parse_int_error) => Some(parse_int_error), Self::UnexpectedCharacter(_) => None, } } @@ -428,12 +441,6 @@ impl Error for LexError { impl Display for LexError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { - Self::FloatError(parse_float_error) => { - write!(f, "Failed to parse float: {}", parse_float_error) - } - Self::IntegerError(parse_int_error) => { - write!(f, "Failed to parse integer: {}", parse_int_error) - } Self::UnexpectedCharacter(character) => { write!(f, "Unexpected character: '{}'", character) } @@ -441,22 +448,25 @@ impl Display for LexError { } } -impl From for LexError { - fn from(error: std::num::ParseFloatError) -> Self { - Self::FloatError(error) - } -} - -impl From for LexError { - fn from(error: std::num::ParseIntError) -> Self { - Self::IntegerError(error) - } -} - #[cfg(test)] mod tests { use super::*; + #[test] + fn or() { + let input = "true || false"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Boolean("true"), (0, 4)), + (Token::DoublePipe, (5, 7)), + (Token::Boolean("false"), (8, 13)), + (Token::Eof, (13, 13)), + ]) + ) + } + #[test] fn block() { let input = "{ x = 42; y = 'foobar' }"; @@ -467,7 +477,7 @@ mod tests { (Token::LeftCurlyBrace, (0, 1)), (Token::Identifier("x"), (2, 3)), (Token::Equal, (4, 5)), - (Token::Integer(42), (6, 8)), + (Token::Integer("42"), (6, 8)), (Token::Semicolon, (8, 9)), (Token::Identifier("y"), (10, 11)), (Token::Equal, (12, 13)), @@ -485,9 +495,9 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(42), (0, 2)), + (Token::Integer("42"), (0, 2)), (Token::DoubleEqual, (3, 5)), - (Token::Integer(42), (6, 8)), + (Token::Integer("42"), (6, 8)), (Token::Eof, (8, 8)), ]) ) @@ -500,9 +510,9 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(42), (0, 2)), + (Token::Integer("42"), (0, 2)), (Token::Percent, (3, 4)), - (Token::Integer(2), (5, 6)), + (Token::Integer("2"), (5, 6)), (Token::Eof, (6, 6)), ]) ) @@ -515,9 +525,9 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(42), (0, 2)), + (Token::Integer("42"), (0, 2)), (Token::Slash, (3, 4)), - (Token::Integer(2), (5, 6)), + (Token::Integer("2"), (5, 6)), (Token::Eof, (6, 6)), ]) ) @@ -533,7 +543,7 @@ mod tests { (Token::LeftCurlyBrace, (0, 1)), (Token::Identifier("x"), (2, 3)), (Token::Equal, (4, 5)), - (Token::Integer(42), (6, 8)), + (Token::Integer("42"), (6, 8)), (Token::Comma, (8, 9)), (Token::Identifier("y"), (10, 11)), (Token::Equal, (12, 13)), @@ -591,7 +601,7 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Float(f64::INFINITY), (0, 8)), + (Token::Float("Infinity"), (0, 8)), (Token::Eof, (8, 8)), ]) ) @@ -604,7 +614,7 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Float(f64::NEG_INFINITY), (0, 9)), + (Token::Float("-Infinity"), (0, 9)), (Token::Eof, (9, 9)), ]) ) @@ -614,7 +624,7 @@ mod tests { fn nan() { let input = "NaN"; - assert!(lex(input).is_ok_and(|tokens| tokens[0].0 == Token::Float(f64::NAN))); + assert!(lex(input).is_ok_and(|tokens| tokens[0].0 == Token::Float("NaN"))); } #[test] @@ -623,7 +633,10 @@ mod tests { assert_eq!( lex(input), - Ok(vec![(Token::Float(42.42e42), (0, 8)), (Token::Eof, (8, 8)),]) + Ok(vec![ + (Token::Float("42.42e42"), (0, 8)), + (Token::Eof, (8, 8)), + ]) ) } @@ -634,7 +647,7 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(i64::MAX), (0, 19)), + (Token::Integer("9223372036854775807"), (0, 19)), (Token::Eof, (19, 19)), ]) ) @@ -647,7 +660,7 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(i64::MIN), (0, 20)), + (Token::Integer("-9223372036854775808"), (0, 20)), (Token::Eof, (20, 20)), ]) ) @@ -660,9 +673,9 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(-42), (0, 3)), + (Token::Integer("-42"), (0, 3)), (Token::Minus, (4, 5)), - (Token::Integer(-42), (6, 9)), + (Token::Integer("-42"), (6, 9)), (Token::Eof, (9, 9)), ]) ) @@ -674,7 +687,7 @@ mod tests { assert_eq!( lex(input), - Ok(vec![(Token::Integer(-42), (0, 3)), (Token::Eof, (3, 3))]) + Ok(vec![(Token::Integer("-42"), (0, 3)), (Token::Eof, (3, 3))]) ) } @@ -743,7 +756,7 @@ mod tests { assert_eq!( lex(input), - Ok(vec![(Token::Boolean(true), (0, 4)), (Token::Eof, (4, 4)),]) + Ok(vec![(Token::Boolean("true"), (0, 4)), (Token::Eof, (4, 4)),]) ) } @@ -753,7 +766,10 @@ mod tests { assert_eq!( lex(input), - Ok(vec![(Token::Boolean(false), (0, 5)), (Token::Eof, (5, 5))]) + Ok(vec![ + (Token::Boolean("false"), (0, 5)), + (Token::Eof, (5, 5)) + ]) ) } @@ -764,7 +780,7 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(42), (0, 2)), + (Token::Integer("42"), (0, 2)), (Token::Dot, (2, 3)), (Token::IsEven, (3, 10)), (Token::LeftParenthesis, (10, 11)), @@ -811,7 +827,7 @@ mod tests { assert_eq!( lex(input), - Ok(vec![(Token::Float(1.23), (0, 4)), (Token::Eof, (4, 4)),]) + Ok(vec![(Token::Float("1.23"), (0, 4)), (Token::Eof, (4, 4)),]) ) } @@ -823,7 +839,7 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Float(123456789.123456789), (0, 19)), + (Token::Float("123456789.123456789"), (0, 19)), (Token::Eof, (19, 19)), ]) ) @@ -836,9 +852,9 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(1), (0, 1)), + (Token::Integer("1"), (0, 1)), (Token::Plus, (2, 3)), - (Token::Integer(2), (4, 5)), + (Token::Integer("2"), (4, 5)), (Token::Eof, (5, 5)), ]) ) @@ -851,9 +867,9 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(1), (0, 1)), + (Token::Integer("1"), (0, 1)), (Token::Star, (2, 3)), - (Token::Integer(2), (4, 5)), + (Token::Integer("2"), (4, 5)), (Token::Eof, (5, 5)), ]) ) @@ -866,11 +882,11 @@ mod tests { assert_eq!( lex(input), Ok(vec![ - (Token::Integer(1), (0, 1)), + (Token::Integer("1"), (0, 1)), (Token::Plus, (2, 3)), - (Token::Integer(2), (4, 5)), + (Token::Integer("2"), (4, 5)), (Token::Star, (6, 7)), - (Token::Integer(3), (8, 9)), + (Token::Integer("3"), (8, 9)), (Token::Eof, (9, 9)), ]) ); @@ -885,11 +901,11 @@ mod tests { Ok(vec![ (Token::Identifier("a"), (0, 1)), (Token::Equal, (2, 3)), - (Token::Integer(1), (4, 5)), + (Token::Integer("1"), (4, 5)), (Token::Plus, (6, 7)), - (Token::Integer(2), (8, 9)), + (Token::Integer("2"), (8, 9)), (Token::Star, (10, 11)), - (Token::Integer(3), (12, 13)), + (Token::Integer("3"), (12, 13)), (Token::Eof, (13, 13)), ]) ); diff --git a/dust-lang/src/parse.rs b/dust-lang/src/parse.rs index 6e44197..cc3fdcc 100644 --- a/dust-lang/src/parse.rs +++ b/dust-lang/src/parse.rs @@ -7,6 +7,8 @@ use std::{ collections::VecDeque, error::Error, fmt::{self, Display, Formatter}, + num::{ParseFloatError, ParseIntError}, + str::ParseBoolError, }; use crate::{ @@ -150,243 +152,54 @@ impl<'src> Parser<'src> { } fn parse_node(&mut self, precedence: u8) -> Result, ParseError> { - let left_node = self.parse_primary()?; - let left_start = left_node.position.0; + let left = self.parse_primary()?; if precedence < self.current_precedence() { - match &self.current { - (Token::Dot, _) => { - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::PropertyAccess(Box::new(left_node), Box::new(right_node)), - (left_start, right_end), - )); - } - (Token::DoubleAmpersand, _) => { - let operator = Node::new(BinaryOperator::And, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::DoubleEqual, _) => { - let operator = Node::new(BinaryOperator::Equal, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::Greater, _) => { - let operator = Node::new(BinaryOperator::Greater, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::GreaterEqual, _) => { - let operator = Node::new(BinaryOperator::GreaterOrEqual, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::Less, _) => { - let operator = Node::new(BinaryOperator::Less, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::LessEqual, _) => { - let operator = Node::new(BinaryOperator::LessOrEqual, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::Minus, _) => { - let operator = Node::new(BinaryOperator::Subtract, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::Plus, _) => { - let operator = Node::new(BinaryOperator::Add, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::Semicolon, (_, right_end)) => { - return Ok(Node::new( - Statement::Nil(Box::new(left_node)), - (left_start, *right_end), - )) - } - (Token::Star, _) => { - let operator = Node::new(BinaryOperator::Multiply, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::Slash, _) => { - let operator = Node::new(BinaryOperator::Divide, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - (Token::Percent, _) => { - let operator = Node::new(BinaryOperator::Modulo, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } - _ => {} - } + self.parse_infix(left) + } else { + Ok(left) } - - Ok(left_node) } fn parse_primary(&mut self) -> Result, ParseError> { match self.current { - (Token::Boolean(boolean), span) => { + (Token::Boolean(text), position) => { self.next_token()?; + let boolean = text + .parse() + .map_err(|error| ParseError::BooleanError { error, position })?; + Ok(Node::new( Statement::Constant(Value::boolean(boolean)), - span, + position, )) } - (Token::Float(float), span) => { + (Token::Float(text), position) => { self.next_token()?; - Ok(Node::new(Statement::Constant(Value::float(float)), span)) + let float = text + .parse() + .map_err(|error| ParseError::FloatError { error, position })?; + + Ok(Node::new( + Statement::Constant(Value::float(float)), + position, + )) } - (Token::Integer(int), span) => { + (Token::Integer(text), position) => { self.next_token()?; - Ok(Node::new(Statement::Constant(Value::integer(int)), span)) + let integer = text + .parse() + .map_err(|error| ParseError::IntegerError { error, position })?; + + Ok(Node::new( + Statement::Constant(Value::integer(integer)), + position, + )) } - (Token::Identifier(text), span) => { + (Token::Identifier(text), position) => { self.next_token()?; if let (Token::Equal, _) = self.current { @@ -397,33 +210,36 @@ impl<'src> Parser<'src> { Ok(Node::new( Statement::Assignment { - identifier: Node::new(Identifier::new(text), span), + identifier: Node::new(Identifier::new(text), position), value_node: Box::new(value_node), }, - (span.0, right_end), + (position.0, right_end), )) } else { Ok(Node::new( Statement::Identifier(Identifier::new(text)), - span, + position, )) } } - (Token::String(string), span) => { + (Token::String(string), position) => { self.next_token()?; - Ok(Node::new(Statement::Constant(Value::string(string)), span)) + Ok(Node::new( + Statement::Constant(Value::string(string)), + position, + )) } - (Token::LeftCurlyBrace, left_span) => { + (Token::LeftCurlyBrace, left_position) => { self.next_token()?; // If the next token is a right curly brace, this is an empty map - if let (Token::RightCurlyBrace, right_span) = self.current { + if let (Token::RightCurlyBrace, right_position) = self.current { self.next_token()?; return Ok(Node::new( Statement::Map(Vec::new()), - (left_span.0, right_span.1), + (left_position.0, right_position.1), )); } @@ -431,10 +247,13 @@ impl<'src> Parser<'src> { loop { // If a closing brace is found, return the new statement - if let (Token::RightCurlyBrace, right_span) = self.current { + if let (Token::RightCurlyBrace, right_position) = self.current { self.next_token()?; - return Ok(Node::new(statement.unwrap(), (left_span.0, right_span.1))); + return Ok(Node::new( + statement.unwrap(), + (left_position.0, right_position.1), + )); } let next_node = self.parse_node(0)?; @@ -503,15 +322,15 @@ impl<'src> Parser<'src> { } } } - (Token::LeftParenthesis, left_span) => { + (Token::LeftParenthesis, left_position) => { self.next_token()?; let node = self.parse_node(0)?; - if let (Token::RightParenthesis, right_span) = self.current { + if let (Token::RightParenthesis, right_position) = self.current { self.next_token()?; - Ok(Node::new(node.inner, (left_span.0, right_span.1))) + Ok(Node::new(node.inner, (left_position.0, right_position.1))) } else { Err(ParseError::ExpectedToken { expected: TokenOwned::RightParenthesis, @@ -520,18 +339,18 @@ impl<'src> Parser<'src> { }) } } - (Token::LeftSquareBrace, left_span) => { + (Token::LeftSquareBrace, left_position) => { self.next_token()?; let mut nodes = Vec::new(); loop { - if let (Token::RightSquareBrace, right_span) = self.current { + if let (Token::RightSquareBrace, right_position) = self.current { self.next_token()?; return Ok(Node::new( Statement::List(nodes), - (left_span.0, right_span.1), + (left_position.0, right_position.1), )); } @@ -554,7 +373,7 @@ impl<'src> Parser<'src> { } ( Token::IsEven | Token::IsOdd | Token::Length | Token::ReadLine | Token::WriteLine, - left_span, + left_position, ) => { let function = match self.current.0 { Token::IsEven => BuiltInFunction::IsEven, @@ -611,7 +430,7 @@ impl<'src> Parser<'src> { type_arguments: None, value_arguments, }, - left_span, + left_position, )) } _ => Err(ParseError::UnexpectedToken { @@ -621,10 +440,62 @@ impl<'src> Parser<'src> { } } + fn parse_infix(&mut self, left: Node) -> Result, ParseError> { + let left_start = left.position.0; + + let binary_operator = match &self.current { + (Token::Dot, _) => { + self.next_token()?; + + let right_node = self.parse_node(0)?; + let right_end = right_node.position.1; + + return Ok(Node::new( + Statement::PropertyAccess(Box::new(left), Box::new(right_node)), + (left_start, right_end), + )); + } + (Token::DoubleAmpersand, _) => Node::new(BinaryOperator::And, self.current.1), + (Token::DoubleEqual, _) => Node::new(BinaryOperator::Equal, self.current.1), + (Token::DoublePipe, _) => Node::new(BinaryOperator::Or, self.current.1), + (Token::Greater, _) => Node::new(BinaryOperator::Greater, self.current.1), + (Token::GreaterEqual, _) => Node::new(BinaryOperator::GreaterOrEqual, self.current.1), + (Token::Less, _) => Node::new(BinaryOperator::Less, self.current.1), + (Token::LessEqual, _) => Node::new(BinaryOperator::LessOrEqual, self.current.1), + (Token::Minus, _) => Node::new(BinaryOperator::Subtract, self.current.1), + (Token::Plus, _) => Node::new(BinaryOperator::Add, self.current.1), + (Token::Star, _) => Node::new(BinaryOperator::Multiply, self.current.1), + (Token::Slash, _) => Node::new(BinaryOperator::Divide, self.current.1), + (Token::Percent, _) => Node::new(BinaryOperator::Modulo, self.current.1), + _ => { + self.next_token()?; + + return Err(ParseError::UnexpectedToken { + actual: self.current.0.to_owned(), + position: self.current.1, + }); + } + }; + + self.next_token()?; + + let right = self.parse_node(0)?; + let right_end = right.position.1; + + Ok(Node::new( + Statement::BinaryOperation { + left: Box::new(left), + operator: binary_operator, + right: Box::new(right), + }, + (left_start, right_end), + )) + } + fn current_precedence(&self) -> u8 { match self.current.0 { Token::DoubleEqual => 7, - Token::DoubleAmpersand => 6, + Token::DoubleAmpersand | Token::DoublePipe => 6, Token::Greater | Token::GreaterEqual | Token::Less | Token::LessEqual => 5, Token::Dot => 4, Token::Percent => 3, @@ -639,6 +510,10 @@ impl<'src> Parser<'src> { #[derive(Debug, PartialEq, Clone)] pub enum ParseError { + BooleanError { + error: ParseBoolError, + position: Span, + }, LexError { error: LexError, position: Span, @@ -656,15 +531,26 @@ pub enum ParseError { actual: TokenOwned, position: Span, }, + FloatError { + error: ParseFloatError, + position: Span, + }, + IntegerError { + error: ParseIntError, + position: Span, + }, } impl ParseError { pub fn position(&self) -> Span { match self { - Self::LexError { position, .. } => *position, - Self::ExpectedIdentifier { position, .. } => *position, - Self::ExpectedToken { position, .. } => *position, - Self::UnexpectedToken { position, .. } => *position, + ParseError::BooleanError { position, .. } => *position, + ParseError::ExpectedIdentifier { position, .. } => *position, + ParseError::ExpectedToken { position, .. } => *position, + ParseError::FloatError { position, .. } => *position, + ParseError::IntegerError { position, .. } => *position, + ParseError::LexError { position, .. } => *position, + ParseError::UnexpectedToken { position, .. } => *position, } } } @@ -681,13 +567,16 @@ impl Error for ParseError { impl Display for ParseError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { - Self::LexError { error, .. } => write!(f, "{}", error), + Self::BooleanError { error, .. } => write!(f, "{}", error), Self::ExpectedIdentifier { actual, .. } => { write!(f, "Expected identifier, found {actual}") } Self::ExpectedToken { expected, actual, .. } => write!(f, "Expected token {expected}, found {actual}"), + Self::FloatError { error, .. } => write!(f, "{}", error), + Self::IntegerError { error, .. } => write!(f, "{}", error), + Self::LexError { error, .. } => write!(f, "{}", error), Self::UnexpectedToken { actual, .. } => write!(f, "Unexpected token {actual}"), } } @@ -699,6 +588,32 @@ mod tests { use super::*; + #[test] + fn or() { + let input = "true || false"; + + assert_eq!( + parse(input), + Ok(AbstractSyntaxTree { + nodes: [Node::new( + Statement::BinaryOperation { + left: Box::new(Node::new( + Statement::Constant(Value::boolean(true)), + (0, 4) + )), + operator: Node::new(BinaryOperator::Or, (5, 7)), + right: Box::new(Node::new( + Statement::Constant(Value::boolean(false)), + (8, 13) + )), + }, + (0, 13) + )] + .into() + }) + ); + } + #[test] fn misplaced_semicolon() { let input = ";"; diff --git a/dust-lang/src/token.rs b/dust-lang/src/token.rs index 503ebf4..2440dba 100644 --- a/dust-lang/src/token.rs +++ b/dust-lang/src/token.rs @@ -11,9 +11,9 @@ pub enum Token<'src> { Identifier(&'src str), // Hard-coded values - Boolean(bool), - Float(f64), - Integer(i64), + Boolean(&'src str), + Float(&'src str), + Integer(&'src str), String(&'src str), // Keywords @@ -51,7 +51,7 @@ pub enum Token<'src> { impl<'src> Token<'src> { pub fn to_owned(&self) -> TokenOwned { match self { - Token::Boolean(boolean) => TokenOwned::Boolean(*boolean), + Token::Boolean(boolean) => TokenOwned::Boolean(boolean.to_string()), Token::Comma => TokenOwned::Comma, Token::Dot => TokenOwned::Dot, Token::DoubleAmpersand => TokenOwned::DoubleAmpersand, @@ -59,11 +59,11 @@ impl<'src> Token<'src> { Token::DoublePipe => TokenOwned::DoublePipe, Token::Eof => TokenOwned::Eof, Token::Equal => TokenOwned::Equal, - Token::Float(float) => TokenOwned::Float(*float), + Token::Float(float) => TokenOwned::Float(float.to_string()), Token::Greater => TokenOwned::Greater, Token::GreaterEqual => TokenOwned::GreaterOrEqual, Token::Identifier(text) => TokenOwned::Identifier(text.to_string()), - Token::Integer(integer) => TokenOwned::Integer(*integer), + Token::Integer(integer) => TokenOwned::Integer(integer.to_string()), Token::IsEven => TokenOwned::IsEven, Token::IsOdd => TokenOwned::IsOdd, Token::LeftCurlyBrace => TokenOwned::LeftCurlyBrace, @@ -87,9 +87,11 @@ impl<'src> Token<'src> { } } - pub fn as_str(&self) -> &'static str { + pub fn as_str(&self) -> &str { match self { - Token::Boolean(_) => "boolean", + Token::Boolean(boolean_text) => boolean_text, + Token::Identifier(text) => text, + Token::Integer(integer_text) => integer_text, Token::Comma => ",", Token::Dot => ".", Token::DoubleAmpersand => "&&", @@ -100,8 +102,6 @@ impl<'src> Token<'src> { Token::Float(_) => "float", Token::Greater => ">", Token::GreaterEqual => ">=", - Token::Identifier(_) => "identifier", - Token::Integer(_) => "integer", Token::IsEven => "is_even", Token::IsOdd => "is_odd", Token::LeftCurlyBrace => "{", @@ -128,17 +128,13 @@ impl<'src> Token<'src> { impl<'src> Display for Token<'src> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - f.write_str(self.as_str()) + write!(f, "{}", self.as_str()) } } impl<'src> PartialEq for Token<'src> { fn eq(&self, other: &Self) -> bool { match (self, other) { - // Floats are compared by their bit representation. - (Token::Float(left), Token::Float(right)) => left.to_bits() == right.to_bits(), - - // Compare all other variants normally. (Token::Boolean(left), Token::Boolean(right)) => left == right, (Token::Comma, Token::Comma) => true, (Token::Dot, Token::Dot) => true, @@ -147,6 +143,7 @@ impl<'src> PartialEq for Token<'src> { (Token::DoublePipe, Token::DoublePipe) => true, (Token::Eof, Token::Eof) => true, (Token::Equal, Token::Equal) => true, + (Token::Float(left), Token::Float(right)) => left == right, (Token::Greater, Token::Greater) => true, (Token::GreaterEqual, Token::GreaterEqual) => true, (Token::Identifier(left), Token::Identifier(right)) => left == right, @@ -186,9 +183,9 @@ pub enum TokenOwned { Identifier(String), // Hard-coded values - Boolean(bool), - Float(f64), - Integer(i64), + Boolean(String), + Float(String), + Integer(String), String(String), // Keywords diff --git a/dust-lang/src/vm.rs b/dust-lang/src/vm.rs index 278397c..ba67b19 100644 --- a/dust-lang/src/vm.rs +++ b/dust-lang/src/vm.rs @@ -448,6 +448,16 @@ impl Display for VmError { mod tests { use super::*; + #[test] + fn or() { + let input = "true || false"; + + assert_eq!( + run(input, &mut HashMap::new()), + Ok(Some(Value::boolean(true))) + ); + } + #[test] fn map_equal() { let input = "{ y = 'foo', } == { y = 'foo', }";