1
0

Refactor pratt parser

This commit is contained in:
Jeff 2024-08-09 14:01:01 -04:00
parent ed82f3c64f
commit 60f8aab805
4 changed files with 268 additions and 330 deletions

View File

@ -6,7 +6,6 @@
use std::{ use std::{
error::Error, error::Error,
fmt::{self, Display, Formatter}, fmt::{self, Display, Formatter},
num::{ParseFloatError, ParseIntError},
}; };
use crate::{Span, Token}; use crate::{Span, Token};
@ -24,9 +23,9 @@ use crate::{Span, Token};
/// [ /// [
/// (Token::Identifier("x"), (0, 1)), /// (Token::Identifier("x"), (0, 1)),
/// (Token::Equal, (2, 3)), /// (Token::Equal, (2, 3)),
/// (Token::Integer(1), (4, 5)), /// (Token::Integer("1"), (4, 5)),
/// (Token::Plus, (6, 7)), /// (Token::Plus, (6, 7)),
/// (Token::Integer(2), (8, 9)), /// (Token::Integer("2"), (8, 9)),
/// (Token::Eof, (9, 9)), /// (Token::Eof, (9, 9)),
/// ] /// ]
/// ); /// );
@ -77,9 +76,9 @@ pub fn lex<'chars, 'src: 'chars>(input: &'src str) -> Result<Vec<(Token<'chars>,
/// [ /// [
/// (Token::Identifier("x"), (0, 1)), /// (Token::Identifier("x"), (0, 1)),
/// (Token::Equal, (2, 3)), /// (Token::Equal, (2, 3)),
/// (Token::Integer(1), (4, 5)), /// (Token::Integer("1"), (4, 5)),
/// (Token::Plus, (6, 7)), /// (Token::Plus, (6, 7)),
/// (Token::Integer(2), (8, 9)), /// (Token::Integer("2"), (8, 9)),
/// (Token::Eof, (9, 9)), /// (Token::Eof, (9, 9)),
/// ] /// ]
/// ) /// )
@ -110,7 +109,7 @@ impl Lexer {
self.position += 9; self.position += 9;
( (
Token::Float(f64::NEG_INFINITY), Token::Float("-Infinity"),
(self.position - 9, self.position), (self.position - 9, self.position),
) )
} else { } else {
@ -231,6 +230,17 @@ impl Lexer {
(Token::Semicolon, (self.position - 1, self.position)) (Token::Semicolon, (self.position - 1, self.position))
} }
'|' => {
if let Some('|') = self.peek_second_char(source) {
self.position += 2;
(Token::DoublePipe, (self.position - 2, self.position))
} else {
self.position += 1;
return Err(LexError::UnexpectedCharacter(c));
}
}
_ => { _ => {
self.position += 1; self.position += 1;
@ -244,6 +254,15 @@ impl Lexer {
Ok((token, span)) Ok((token, span))
} }
/// Peek at the next token without consuming the source.
pub fn peek_token<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {
let token = self.next_token(source)?;
self.position -= token.0.as_str().len();
Ok(token)
}
/// Progress to the next character. /// Progress to the next character.
fn next_char(&mut self, source: &str) -> Option<char> { fn next_char(&mut self, source: &str) -> Option<char> {
if let Some(c) = source[self.position..].chars().next() { if let Some(c) = source[self.position..].chars().next() {
@ -335,14 +354,12 @@ impl Lexer {
} }
} }
let text = &source[start_pos..self.position];
if is_float { if is_float {
let float = source[start_pos..self.position].parse::<f64>()?; Ok((Token::Float(text), (start_pos, self.position)))
Ok((Token::Float(float), (start_pos, self.position)))
} else { } else {
let integer = source[start_pos..self.position].parse::<i64>()?; Ok((Token::Integer(text), (start_pos, self.position)))
Ok((Token::Integer(integer), (start_pos, self.position)))
} }
} }
@ -363,13 +380,13 @@ impl Lexer {
let string = &source[start_pos..self.position]; let string = &source[start_pos..self.position];
let token = match string { let token = match string {
"true" => Token::Boolean(true), "true" => Token::Boolean("true"),
"false" => Token::Boolean(false), "false" => Token::Boolean("false"),
"Infinity" => Token::Float(f64::INFINITY), "Infinity" => Token::Float("Infinity"),
"is_even" => Token::IsEven, "is_even" => Token::IsEven,
"is_odd" => Token::IsOdd, "is_odd" => Token::IsOdd,
"length" => Token::Length, "length" => Token::Length,
"NaN" => Token::Float(f64::NAN), "NaN" => Token::Float("NaN"),
"read_line" => Token::ReadLine, "read_line" => Token::ReadLine,
"write_line" => Token::WriteLine, "write_line" => Token::WriteLine,
_ => Token::Identifier(string), _ => Token::Identifier(string),
@ -410,16 +427,12 @@ impl Default for Lexer {
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum LexError { pub enum LexError {
FloatError(ParseFloatError),
IntegerError(ParseIntError),
UnexpectedCharacter(char), UnexpectedCharacter(char),
} }
impl Error for LexError { impl Error for LexError {
fn source(&self) -> Option<&(dyn Error + 'static)> { fn source(&self) -> Option<&(dyn Error + 'static)> {
match self { match self {
Self::FloatError(parse_float_error) => Some(parse_float_error),
Self::IntegerError(parse_int_error) => Some(parse_int_error),
Self::UnexpectedCharacter(_) => None, Self::UnexpectedCharacter(_) => None,
} }
} }
@ -428,12 +441,6 @@ impl Error for LexError {
impl Display for LexError { impl Display for LexError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self { match self {
Self::FloatError(parse_float_error) => {
write!(f, "Failed to parse float: {}", parse_float_error)
}
Self::IntegerError(parse_int_error) => {
write!(f, "Failed to parse integer: {}", parse_int_error)
}
Self::UnexpectedCharacter(character) => { Self::UnexpectedCharacter(character) => {
write!(f, "Unexpected character: '{}'", character) write!(f, "Unexpected character: '{}'", character)
} }
@ -441,22 +448,25 @@ impl Display for LexError {
} }
} }
impl From<ParseFloatError> for LexError {
fn from(error: std::num::ParseFloatError) -> Self {
Self::FloatError(error)
}
}
impl From<ParseIntError> for LexError {
fn from(error: std::num::ParseIntError) -> Self {
Self::IntegerError(error)
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
#[test]
fn or() {
let input = "true || false";
assert_eq!(
lex(input),
Ok(vec![
(Token::Boolean("true"), (0, 4)),
(Token::DoublePipe, (5, 7)),
(Token::Boolean("false"), (8, 13)),
(Token::Eof, (13, 13)),
])
)
}
#[test] #[test]
fn block() { fn block() {
let input = "{ x = 42; y = 'foobar' }"; let input = "{ x = 42; y = 'foobar' }";
@ -467,7 +477,7 @@ mod tests {
(Token::LeftCurlyBrace, (0, 1)), (Token::LeftCurlyBrace, (0, 1)),
(Token::Identifier("x"), (2, 3)), (Token::Identifier("x"), (2, 3)),
(Token::Equal, (4, 5)), (Token::Equal, (4, 5)),
(Token::Integer(42), (6, 8)), (Token::Integer("42"), (6, 8)),
(Token::Semicolon, (8, 9)), (Token::Semicolon, (8, 9)),
(Token::Identifier("y"), (10, 11)), (Token::Identifier("y"), (10, 11)),
(Token::Equal, (12, 13)), (Token::Equal, (12, 13)),
@ -485,9 +495,9 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(42), (0, 2)), (Token::Integer("42"), (0, 2)),
(Token::DoubleEqual, (3, 5)), (Token::DoubleEqual, (3, 5)),
(Token::Integer(42), (6, 8)), (Token::Integer("42"), (6, 8)),
(Token::Eof, (8, 8)), (Token::Eof, (8, 8)),
]) ])
) )
@ -500,9 +510,9 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(42), (0, 2)), (Token::Integer("42"), (0, 2)),
(Token::Percent, (3, 4)), (Token::Percent, (3, 4)),
(Token::Integer(2), (5, 6)), (Token::Integer("2"), (5, 6)),
(Token::Eof, (6, 6)), (Token::Eof, (6, 6)),
]) ])
) )
@ -515,9 +525,9 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(42), (0, 2)), (Token::Integer("42"), (0, 2)),
(Token::Slash, (3, 4)), (Token::Slash, (3, 4)),
(Token::Integer(2), (5, 6)), (Token::Integer("2"), (5, 6)),
(Token::Eof, (6, 6)), (Token::Eof, (6, 6)),
]) ])
) )
@ -533,7 +543,7 @@ mod tests {
(Token::LeftCurlyBrace, (0, 1)), (Token::LeftCurlyBrace, (0, 1)),
(Token::Identifier("x"), (2, 3)), (Token::Identifier("x"), (2, 3)),
(Token::Equal, (4, 5)), (Token::Equal, (4, 5)),
(Token::Integer(42), (6, 8)), (Token::Integer("42"), (6, 8)),
(Token::Comma, (8, 9)), (Token::Comma, (8, 9)),
(Token::Identifier("y"), (10, 11)), (Token::Identifier("y"), (10, 11)),
(Token::Equal, (12, 13)), (Token::Equal, (12, 13)),
@ -591,7 +601,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Float(f64::INFINITY), (0, 8)), (Token::Float("Infinity"), (0, 8)),
(Token::Eof, (8, 8)), (Token::Eof, (8, 8)),
]) ])
) )
@ -604,7 +614,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Float(f64::NEG_INFINITY), (0, 9)), (Token::Float("-Infinity"), (0, 9)),
(Token::Eof, (9, 9)), (Token::Eof, (9, 9)),
]) ])
) )
@ -614,7 +624,7 @@ mod tests {
fn nan() { fn nan() {
let input = "NaN"; let input = "NaN";
assert!(lex(input).is_ok_and(|tokens| tokens[0].0 == Token::Float(f64::NAN))); assert!(lex(input).is_ok_and(|tokens| tokens[0].0 == Token::Float("NaN")));
} }
#[test] #[test]
@ -623,7 +633,10 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![(Token::Float(42.42e42), (0, 8)), (Token::Eof, (8, 8)),]) Ok(vec![
(Token::Float("42.42e42"), (0, 8)),
(Token::Eof, (8, 8)),
])
) )
} }
@ -634,7 +647,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(i64::MAX), (0, 19)), (Token::Integer("9223372036854775807"), (0, 19)),
(Token::Eof, (19, 19)), (Token::Eof, (19, 19)),
]) ])
) )
@ -647,7 +660,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(i64::MIN), (0, 20)), (Token::Integer("-9223372036854775808"), (0, 20)),
(Token::Eof, (20, 20)), (Token::Eof, (20, 20)),
]) ])
) )
@ -660,9 +673,9 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(-42), (0, 3)), (Token::Integer("-42"), (0, 3)),
(Token::Minus, (4, 5)), (Token::Minus, (4, 5)),
(Token::Integer(-42), (6, 9)), (Token::Integer("-42"), (6, 9)),
(Token::Eof, (9, 9)), (Token::Eof, (9, 9)),
]) ])
) )
@ -674,7 +687,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![(Token::Integer(-42), (0, 3)), (Token::Eof, (3, 3))]) Ok(vec![(Token::Integer("-42"), (0, 3)), (Token::Eof, (3, 3))])
) )
} }
@ -743,7 +756,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![(Token::Boolean(true), (0, 4)), (Token::Eof, (4, 4)),]) Ok(vec![(Token::Boolean("true"), (0, 4)), (Token::Eof, (4, 4)),])
) )
} }
@ -753,7 +766,10 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![(Token::Boolean(false), (0, 5)), (Token::Eof, (5, 5))]) Ok(vec![
(Token::Boolean("false"), (0, 5)),
(Token::Eof, (5, 5))
])
) )
} }
@ -764,7 +780,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(42), (0, 2)), (Token::Integer("42"), (0, 2)),
(Token::Dot, (2, 3)), (Token::Dot, (2, 3)),
(Token::IsEven, (3, 10)), (Token::IsEven, (3, 10)),
(Token::LeftParenthesis, (10, 11)), (Token::LeftParenthesis, (10, 11)),
@ -811,7 +827,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![(Token::Float(1.23), (0, 4)), (Token::Eof, (4, 4)),]) Ok(vec![(Token::Float("1.23"), (0, 4)), (Token::Eof, (4, 4)),])
) )
} }
@ -823,7 +839,7 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Float(123456789.123456789), (0, 19)), (Token::Float("123456789.123456789"), (0, 19)),
(Token::Eof, (19, 19)), (Token::Eof, (19, 19)),
]) ])
) )
@ -836,9 +852,9 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(1), (0, 1)), (Token::Integer("1"), (0, 1)),
(Token::Plus, (2, 3)), (Token::Plus, (2, 3)),
(Token::Integer(2), (4, 5)), (Token::Integer("2"), (4, 5)),
(Token::Eof, (5, 5)), (Token::Eof, (5, 5)),
]) ])
) )
@ -851,9 +867,9 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(1), (0, 1)), (Token::Integer("1"), (0, 1)),
(Token::Star, (2, 3)), (Token::Star, (2, 3)),
(Token::Integer(2), (4, 5)), (Token::Integer("2"), (4, 5)),
(Token::Eof, (5, 5)), (Token::Eof, (5, 5)),
]) ])
) )
@ -866,11 +882,11 @@ mod tests {
assert_eq!( assert_eq!(
lex(input), lex(input),
Ok(vec![ Ok(vec![
(Token::Integer(1), (0, 1)), (Token::Integer("1"), (0, 1)),
(Token::Plus, (2, 3)), (Token::Plus, (2, 3)),
(Token::Integer(2), (4, 5)), (Token::Integer("2"), (4, 5)),
(Token::Star, (6, 7)), (Token::Star, (6, 7)),
(Token::Integer(3), (8, 9)), (Token::Integer("3"), (8, 9)),
(Token::Eof, (9, 9)), (Token::Eof, (9, 9)),
]) ])
); );
@ -885,11 +901,11 @@ mod tests {
Ok(vec![ Ok(vec![
(Token::Identifier("a"), (0, 1)), (Token::Identifier("a"), (0, 1)),
(Token::Equal, (2, 3)), (Token::Equal, (2, 3)),
(Token::Integer(1), (4, 5)), (Token::Integer("1"), (4, 5)),
(Token::Plus, (6, 7)), (Token::Plus, (6, 7)),
(Token::Integer(2), (8, 9)), (Token::Integer("2"), (8, 9)),
(Token::Star, (10, 11)), (Token::Star, (10, 11)),
(Token::Integer(3), (12, 13)), (Token::Integer("3"), (12, 13)),
(Token::Eof, (13, 13)), (Token::Eof, (13, 13)),
]) ])
); );

View File

@ -7,6 +7,8 @@ use std::{
collections::VecDeque, collections::VecDeque,
error::Error, error::Error,
fmt::{self, Display, Formatter}, fmt::{self, Display, Formatter},
num::{ParseFloatError, ParseIntError},
str::ParseBoolError,
}; };
use crate::{ use crate::{
@ -150,243 +152,54 @@ impl<'src> Parser<'src> {
} }
fn parse_node(&mut self, precedence: u8) -> Result<Node<Statement>, ParseError> { fn parse_node(&mut self, precedence: u8) -> Result<Node<Statement>, ParseError> {
let left_node = self.parse_primary()?; let left = self.parse_primary()?;
let left_start = left_node.position.0;
if precedence < self.current_precedence() { if precedence < self.current_precedence() {
match &self.current { self.parse_infix(left)
(Token::Dot, _) => { } else {
self.next_token()?; Ok(left)
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::PropertyAccess(Box::new(left_node), Box::new(right_node)),
(left_start, right_end),
));
} }
(Token::DoubleAmpersand, _) => {
let operator = Node::new(BinaryOperator::And, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::DoubleEqual, _) => {
let operator = Node::new(BinaryOperator::Equal, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::Greater, _) => {
let operator = Node::new(BinaryOperator::Greater, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::GreaterEqual, _) => {
let operator = Node::new(BinaryOperator::GreaterOrEqual, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::Less, _) => {
let operator = Node::new(BinaryOperator::Less, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::LessEqual, _) => {
let operator = Node::new(BinaryOperator::LessOrEqual, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::Minus, _) => {
let operator = Node::new(BinaryOperator::Subtract, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::Plus, _) => {
let operator = Node::new(BinaryOperator::Add, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::Semicolon, (_, right_end)) => {
return Ok(Node::new(
Statement::Nil(Box::new(left_node)),
(left_start, *right_end),
))
}
(Token::Star, _) => {
let operator = Node::new(BinaryOperator::Multiply, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::Slash, _) => {
let operator = Node::new(BinaryOperator::Divide, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::Percent, _) => {
let operator = Node::new(BinaryOperator::Modulo, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
_ => {}
}
}
Ok(left_node)
} }
fn parse_primary(&mut self) -> Result<Node<Statement>, ParseError> { fn parse_primary(&mut self) -> Result<Node<Statement>, ParseError> {
match self.current { match self.current {
(Token::Boolean(boolean), span) => { (Token::Boolean(text), position) => {
self.next_token()?; self.next_token()?;
let boolean = text
.parse()
.map_err(|error| ParseError::BooleanError { error, position })?;
Ok(Node::new( Ok(Node::new(
Statement::Constant(Value::boolean(boolean)), Statement::Constant(Value::boolean(boolean)),
span, position,
)) ))
} }
(Token::Float(float), span) => { (Token::Float(text), position) => {
self.next_token()?; self.next_token()?;
Ok(Node::new(Statement::Constant(Value::float(float)), span)) let float = text
.parse()
.map_err(|error| ParseError::FloatError { error, position })?;
Ok(Node::new(
Statement::Constant(Value::float(float)),
position,
))
} }
(Token::Integer(int), span) => { (Token::Integer(text), position) => {
self.next_token()?; self.next_token()?;
Ok(Node::new(Statement::Constant(Value::integer(int)), span)) let integer = text
.parse()
.map_err(|error| ParseError::IntegerError { error, position })?;
Ok(Node::new(
Statement::Constant(Value::integer(integer)),
position,
))
} }
(Token::Identifier(text), span) => { (Token::Identifier(text), position) => {
self.next_token()?; self.next_token()?;
if let (Token::Equal, _) = self.current { if let (Token::Equal, _) = self.current {
@ -397,33 +210,36 @@ impl<'src> Parser<'src> {
Ok(Node::new( Ok(Node::new(
Statement::Assignment { Statement::Assignment {
identifier: Node::new(Identifier::new(text), span), identifier: Node::new(Identifier::new(text), position),
value_node: Box::new(value_node), value_node: Box::new(value_node),
}, },
(span.0, right_end), (position.0, right_end),
)) ))
} else { } else {
Ok(Node::new( Ok(Node::new(
Statement::Identifier(Identifier::new(text)), Statement::Identifier(Identifier::new(text)),
span, position,
)) ))
} }
} }
(Token::String(string), span) => { (Token::String(string), position) => {
self.next_token()?; self.next_token()?;
Ok(Node::new(Statement::Constant(Value::string(string)), span)) Ok(Node::new(
Statement::Constant(Value::string(string)),
position,
))
} }
(Token::LeftCurlyBrace, left_span) => { (Token::LeftCurlyBrace, left_position) => {
self.next_token()?; self.next_token()?;
// If the next token is a right curly brace, this is an empty map // If the next token is a right curly brace, this is an empty map
if let (Token::RightCurlyBrace, right_span) = self.current { if let (Token::RightCurlyBrace, right_position) = self.current {
self.next_token()?; self.next_token()?;
return Ok(Node::new( return Ok(Node::new(
Statement::Map(Vec::new()), Statement::Map(Vec::new()),
(left_span.0, right_span.1), (left_position.0, right_position.1),
)); ));
} }
@ -431,10 +247,13 @@ impl<'src> Parser<'src> {
loop { loop {
// If a closing brace is found, return the new statement // If a closing brace is found, return the new statement
if let (Token::RightCurlyBrace, right_span) = self.current { if let (Token::RightCurlyBrace, right_position) = self.current {
self.next_token()?; self.next_token()?;
return Ok(Node::new(statement.unwrap(), (left_span.0, right_span.1))); return Ok(Node::new(
statement.unwrap(),
(left_position.0, right_position.1),
));
} }
let next_node = self.parse_node(0)?; let next_node = self.parse_node(0)?;
@ -503,15 +322,15 @@ impl<'src> Parser<'src> {
} }
} }
} }
(Token::LeftParenthesis, left_span) => { (Token::LeftParenthesis, left_position) => {
self.next_token()?; self.next_token()?;
let node = self.parse_node(0)?; let node = self.parse_node(0)?;
if let (Token::RightParenthesis, right_span) = self.current { if let (Token::RightParenthesis, right_position) = self.current {
self.next_token()?; self.next_token()?;
Ok(Node::new(node.inner, (left_span.0, right_span.1))) Ok(Node::new(node.inner, (left_position.0, right_position.1)))
} else { } else {
Err(ParseError::ExpectedToken { Err(ParseError::ExpectedToken {
expected: TokenOwned::RightParenthesis, expected: TokenOwned::RightParenthesis,
@ -520,18 +339,18 @@ impl<'src> Parser<'src> {
}) })
} }
} }
(Token::LeftSquareBrace, left_span) => { (Token::LeftSquareBrace, left_position) => {
self.next_token()?; self.next_token()?;
let mut nodes = Vec::new(); let mut nodes = Vec::new();
loop { loop {
if let (Token::RightSquareBrace, right_span) = self.current { if let (Token::RightSquareBrace, right_position) = self.current {
self.next_token()?; self.next_token()?;
return Ok(Node::new( return Ok(Node::new(
Statement::List(nodes), Statement::List(nodes),
(left_span.0, right_span.1), (left_position.0, right_position.1),
)); ));
} }
@ -554,7 +373,7 @@ impl<'src> Parser<'src> {
} }
( (
Token::IsEven | Token::IsOdd | Token::Length | Token::ReadLine | Token::WriteLine, Token::IsEven | Token::IsOdd | Token::Length | Token::ReadLine | Token::WriteLine,
left_span, left_position,
) => { ) => {
let function = match self.current.0 { let function = match self.current.0 {
Token::IsEven => BuiltInFunction::IsEven, Token::IsEven => BuiltInFunction::IsEven,
@ -611,7 +430,7 @@ impl<'src> Parser<'src> {
type_arguments: None, type_arguments: None,
value_arguments, value_arguments,
}, },
left_span, left_position,
)) ))
} }
_ => Err(ParseError::UnexpectedToken { _ => Err(ParseError::UnexpectedToken {
@ -621,10 +440,62 @@ impl<'src> Parser<'src> {
} }
} }
fn parse_infix(&mut self, left: Node<Statement>) -> Result<Node<Statement>, ParseError> {
let left_start = left.position.0;
let binary_operator = match &self.current {
(Token::Dot, _) => {
self.next_token()?;
let right_node = self.parse_node(0)?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::PropertyAccess(Box::new(left), Box::new(right_node)),
(left_start, right_end),
));
}
(Token::DoubleAmpersand, _) => Node::new(BinaryOperator::And, self.current.1),
(Token::DoubleEqual, _) => Node::new(BinaryOperator::Equal, self.current.1),
(Token::DoublePipe, _) => Node::new(BinaryOperator::Or, self.current.1),
(Token::Greater, _) => Node::new(BinaryOperator::Greater, self.current.1),
(Token::GreaterEqual, _) => Node::new(BinaryOperator::GreaterOrEqual, self.current.1),
(Token::Less, _) => Node::new(BinaryOperator::Less, self.current.1),
(Token::LessEqual, _) => Node::new(BinaryOperator::LessOrEqual, self.current.1),
(Token::Minus, _) => Node::new(BinaryOperator::Subtract, self.current.1),
(Token::Plus, _) => Node::new(BinaryOperator::Add, self.current.1),
(Token::Star, _) => Node::new(BinaryOperator::Multiply, self.current.1),
(Token::Slash, _) => Node::new(BinaryOperator::Divide, self.current.1),
(Token::Percent, _) => Node::new(BinaryOperator::Modulo, self.current.1),
_ => {
self.next_token()?;
return Err(ParseError::UnexpectedToken {
actual: self.current.0.to_owned(),
position: self.current.1,
});
}
};
self.next_token()?;
let right = self.parse_node(0)?;
let right_end = right.position.1;
Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left),
operator: binary_operator,
right: Box::new(right),
},
(left_start, right_end),
))
}
fn current_precedence(&self) -> u8 { fn current_precedence(&self) -> u8 {
match self.current.0 { match self.current.0 {
Token::DoubleEqual => 7, Token::DoubleEqual => 7,
Token::DoubleAmpersand => 6, Token::DoubleAmpersand | Token::DoublePipe => 6,
Token::Greater | Token::GreaterEqual | Token::Less | Token::LessEqual => 5, Token::Greater | Token::GreaterEqual | Token::Less | Token::LessEqual => 5,
Token::Dot => 4, Token::Dot => 4,
Token::Percent => 3, Token::Percent => 3,
@ -639,6 +510,10 @@ impl<'src> Parser<'src> {
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum ParseError { pub enum ParseError {
BooleanError {
error: ParseBoolError,
position: Span,
},
LexError { LexError {
error: LexError, error: LexError,
position: Span, position: Span,
@ -656,15 +531,26 @@ pub enum ParseError {
actual: TokenOwned, actual: TokenOwned,
position: Span, position: Span,
}, },
FloatError {
error: ParseFloatError,
position: Span,
},
IntegerError {
error: ParseIntError,
position: Span,
},
} }
impl ParseError { impl ParseError {
pub fn position(&self) -> Span { pub fn position(&self) -> Span {
match self { match self {
Self::LexError { position, .. } => *position, ParseError::BooleanError { position, .. } => *position,
Self::ExpectedIdentifier { position, .. } => *position, ParseError::ExpectedIdentifier { position, .. } => *position,
Self::ExpectedToken { position, .. } => *position, ParseError::ExpectedToken { position, .. } => *position,
Self::UnexpectedToken { position, .. } => *position, ParseError::FloatError { position, .. } => *position,
ParseError::IntegerError { position, .. } => *position,
ParseError::LexError { position, .. } => *position,
ParseError::UnexpectedToken { position, .. } => *position,
} }
} }
} }
@ -681,13 +567,16 @@ impl Error for ParseError {
impl Display for ParseError { impl Display for ParseError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self { match self {
Self::LexError { error, .. } => write!(f, "{}", error), Self::BooleanError { error, .. } => write!(f, "{}", error),
Self::ExpectedIdentifier { actual, .. } => { Self::ExpectedIdentifier { actual, .. } => {
write!(f, "Expected identifier, found {actual}") write!(f, "Expected identifier, found {actual}")
} }
Self::ExpectedToken { Self::ExpectedToken {
expected, actual, .. expected, actual, ..
} => write!(f, "Expected token {expected}, found {actual}"), } => write!(f, "Expected token {expected}, found {actual}"),
Self::FloatError { error, .. } => write!(f, "{}", error),
Self::IntegerError { error, .. } => write!(f, "{}", error),
Self::LexError { error, .. } => write!(f, "{}", error),
Self::UnexpectedToken { actual, .. } => write!(f, "Unexpected token {actual}"), Self::UnexpectedToken { actual, .. } => write!(f, "Unexpected token {actual}"),
} }
} }
@ -699,6 +588,32 @@ mod tests {
use super::*; use super::*;
#[test]
fn or() {
let input = "true || false";
assert_eq!(
parse(input),
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::BinaryOperation {
left: Box::new(Node::new(
Statement::Constant(Value::boolean(true)),
(0, 4)
)),
operator: Node::new(BinaryOperator::Or, (5, 7)),
right: Box::new(Node::new(
Statement::Constant(Value::boolean(false)),
(8, 13)
)),
},
(0, 13)
)]
.into()
})
);
}
#[test] #[test]
fn misplaced_semicolon() { fn misplaced_semicolon() {
let input = ";"; let input = ";";

View File

@ -11,9 +11,9 @@ pub enum Token<'src> {
Identifier(&'src str), Identifier(&'src str),
// Hard-coded values // Hard-coded values
Boolean(bool), Boolean(&'src str),
Float(f64), Float(&'src str),
Integer(i64), Integer(&'src str),
String(&'src str), String(&'src str),
// Keywords // Keywords
@ -51,7 +51,7 @@ pub enum Token<'src> {
impl<'src> Token<'src> { impl<'src> Token<'src> {
pub fn to_owned(&self) -> TokenOwned { pub fn to_owned(&self) -> TokenOwned {
match self { match self {
Token::Boolean(boolean) => TokenOwned::Boolean(*boolean), Token::Boolean(boolean) => TokenOwned::Boolean(boolean.to_string()),
Token::Comma => TokenOwned::Comma, Token::Comma => TokenOwned::Comma,
Token::Dot => TokenOwned::Dot, Token::Dot => TokenOwned::Dot,
Token::DoubleAmpersand => TokenOwned::DoubleAmpersand, Token::DoubleAmpersand => TokenOwned::DoubleAmpersand,
@ -59,11 +59,11 @@ impl<'src> Token<'src> {
Token::DoublePipe => TokenOwned::DoublePipe, Token::DoublePipe => TokenOwned::DoublePipe,
Token::Eof => TokenOwned::Eof, Token::Eof => TokenOwned::Eof,
Token::Equal => TokenOwned::Equal, Token::Equal => TokenOwned::Equal,
Token::Float(float) => TokenOwned::Float(*float), Token::Float(float) => TokenOwned::Float(float.to_string()),
Token::Greater => TokenOwned::Greater, Token::Greater => TokenOwned::Greater,
Token::GreaterEqual => TokenOwned::GreaterOrEqual, Token::GreaterEqual => TokenOwned::GreaterOrEqual,
Token::Identifier(text) => TokenOwned::Identifier(text.to_string()), Token::Identifier(text) => TokenOwned::Identifier(text.to_string()),
Token::Integer(integer) => TokenOwned::Integer(*integer), Token::Integer(integer) => TokenOwned::Integer(integer.to_string()),
Token::IsEven => TokenOwned::IsEven, Token::IsEven => TokenOwned::IsEven,
Token::IsOdd => TokenOwned::IsOdd, Token::IsOdd => TokenOwned::IsOdd,
Token::LeftCurlyBrace => TokenOwned::LeftCurlyBrace, Token::LeftCurlyBrace => TokenOwned::LeftCurlyBrace,
@ -87,9 +87,11 @@ impl<'src> Token<'src> {
} }
} }
pub fn as_str(&self) -> &'static str { pub fn as_str(&self) -> &str {
match self { match self {
Token::Boolean(_) => "boolean", Token::Boolean(boolean_text) => boolean_text,
Token::Identifier(text) => text,
Token::Integer(integer_text) => integer_text,
Token::Comma => ",", Token::Comma => ",",
Token::Dot => ".", Token::Dot => ".",
Token::DoubleAmpersand => "&&", Token::DoubleAmpersand => "&&",
@ -100,8 +102,6 @@ impl<'src> Token<'src> {
Token::Float(_) => "float", Token::Float(_) => "float",
Token::Greater => ">", Token::Greater => ">",
Token::GreaterEqual => ">=", Token::GreaterEqual => ">=",
Token::Identifier(_) => "identifier",
Token::Integer(_) => "integer",
Token::IsEven => "is_even", Token::IsEven => "is_even",
Token::IsOdd => "is_odd", Token::IsOdd => "is_odd",
Token::LeftCurlyBrace => "{", Token::LeftCurlyBrace => "{",
@ -128,17 +128,13 @@ impl<'src> Token<'src> {
impl<'src> Display for Token<'src> { impl<'src> Display for Token<'src> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
f.write_str(self.as_str()) write!(f, "{}", self.as_str())
} }
} }
impl<'src> PartialEq for Token<'src> { impl<'src> PartialEq for Token<'src> {
fn eq(&self, other: &Self) -> bool { fn eq(&self, other: &Self) -> bool {
match (self, other) { match (self, other) {
// Floats are compared by their bit representation.
(Token::Float(left), Token::Float(right)) => left.to_bits() == right.to_bits(),
// Compare all other variants normally.
(Token::Boolean(left), Token::Boolean(right)) => left == right, (Token::Boolean(left), Token::Boolean(right)) => left == right,
(Token::Comma, Token::Comma) => true, (Token::Comma, Token::Comma) => true,
(Token::Dot, Token::Dot) => true, (Token::Dot, Token::Dot) => true,
@ -147,6 +143,7 @@ impl<'src> PartialEq for Token<'src> {
(Token::DoublePipe, Token::DoublePipe) => true, (Token::DoublePipe, Token::DoublePipe) => true,
(Token::Eof, Token::Eof) => true, (Token::Eof, Token::Eof) => true,
(Token::Equal, Token::Equal) => true, (Token::Equal, Token::Equal) => true,
(Token::Float(left), Token::Float(right)) => left == right,
(Token::Greater, Token::Greater) => true, (Token::Greater, Token::Greater) => true,
(Token::GreaterEqual, Token::GreaterEqual) => true, (Token::GreaterEqual, Token::GreaterEqual) => true,
(Token::Identifier(left), Token::Identifier(right)) => left == right, (Token::Identifier(left), Token::Identifier(right)) => left == right,
@ -186,9 +183,9 @@ pub enum TokenOwned {
Identifier(String), Identifier(String),
// Hard-coded values // Hard-coded values
Boolean(bool), Boolean(String),
Float(f64), Float(String),
Integer(i64), Integer(String),
String(String), String(String),
// Keywords // Keywords

View File

@ -448,6 +448,16 @@ impl Display for VmError {
mod tests { mod tests {
use super::*; use super::*;
#[test]
fn or() {
let input = "true || false";
assert_eq!(
run(input, &mut HashMap::new()),
Ok(Some(Value::boolean(true)))
);
}
#[test] #[test]
fn map_equal() { fn map_equal() {
let input = "{ y = 'foo', } == { y = 'foo', }"; let input = "{ y = 'foo', } == { y = 'foo', }";