1
0

Make meticulous changes to pratt parser

This commit is contained in:
Jeff 2024-08-10 00:01:50 -04:00
parent 9ea203f419
commit 2ae75dcdd0
4 changed files with 135 additions and 113 deletions

View File

@ -228,7 +228,10 @@ impl Lexer {
} else { } else {
self.position += 1; self.position += 1;
return Err(LexError::UnexpectedCharacter(c)); return Err(LexError::UnexpectedCharacter {
character: c,
position: self.position,
});
} }
} }
';' => { ';' => {
@ -244,13 +247,19 @@ impl Lexer {
} else { } else {
self.position += 1; self.position += 1;
return Err(LexError::UnexpectedCharacter(c)); return Err(LexError::UnexpectedCharacter {
character: c,
position: self.position,
});
} }
} }
_ => { _ => {
self.position += 1; self.position += 1;
return Err(LexError::UnexpectedCharacter(c)); return Err(LexError::UnexpectedCharacter {
character: c,
position: self.position,
});
} }
} }
} else { } else {
@ -433,13 +442,21 @@ impl Default for Lexer {
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum LexError { pub enum LexError {
UnexpectedCharacter(char), UnexpectedCharacter { character: char, position: usize },
}
impl LexError {
pub fn position(&self) -> Span {
match self {
Self::UnexpectedCharacter { position, .. } => (*position, *position),
}
}
} }
impl Error for LexError { impl Error for LexError {
fn source(&self) -> Option<&(dyn Error + 'static)> { fn source(&self) -> Option<&(dyn Error + 'static)> {
match self { match self {
Self::UnexpectedCharacter(_) => None, Self::UnexpectedCharacter { .. } => None,
} }
} }
} }
@ -447,7 +464,7 @@ impl Error for LexError {
impl Display for LexError { impl Display for LexError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result { fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self { match self {
Self::UnexpectedCharacter(character) => { Self::UnexpectedCharacter { character, .. } => {
write!(f, "Unexpected character: '{}'", character) write!(f, "Unexpected character: '{}'", character)
} }
} }

View File

@ -23,7 +23,7 @@ pub use identifier::Identifier;
pub use lex::{lex, LexError, Lexer}; pub use lex::{lex, LexError, Lexer};
pub use parse::{parse, ParseError, Parser}; pub use parse::{parse, ParseError, Parser};
pub use r#type::Type; pub use r#type::Type;
pub use token::Token; pub use token::{Token, TokenOwned};
pub use value::{Value, ValueError}; pub use value::{Value, ValueError};
pub use vm::{run, Vm, VmError}; pub use vm::{run, Vm, VmError};

View File

@ -12,8 +12,8 @@ use std::{
}; };
use crate::{ use crate::{
abstract_tree::BinaryOperator, built_in_function::BuiltInFunction, token::TokenOwned, AbstractSyntaxTree, BinaryOperator, BuiltInFunction, Identifier, LexError, Lexer, Node, Span,
AbstractSyntaxTree, Identifier, LexError, Lexer, Node, Span, Statement, Token, Value, Statement, Token, TokenOwned, Value,
}; };
/// Parses the input into an abstract syntax tree. /// Parses the input into an abstract syntax tree.
@ -121,46 +121,34 @@ impl<'src> Parser<'src> {
} }
} }
pub fn parse(&mut self) -> Result<Node<Statement>, ParseError> {
self.parse_node(0)
}
pub fn current(&self) -> &(Token, Span) { pub fn current(&self) -> &(Token, Span) {
&self.current &self.current
} }
fn next_token(&mut self) -> Result<(), ParseError> { pub fn parse(&mut self) -> Result<Node<Statement>, ParseError> {
let next = self.lexer.next_token(self.source); self.parse_statement(0)
self.current = match next {
Ok((token, position)) => (token, position),
Err(lex_error) => {
let position = {
self.next_token()?;
self.current.1
};
return Err(ParseError::LexError {
error: lex_error,
position,
});
} }
};
fn next_token(&mut self) -> Result<(), ParseError> {
self.current = self.lexer.next_token(self.source)?;
Ok(()) Ok(())
} }
fn parse_node(&mut self, precedence: u8) -> Result<Node<Statement>, ParseError> { fn parse_statement(&mut self, precedence: u8) -> Result<Node<Statement>, ParseError> {
let left = self.parse_primary()?; let mut left = self.parse_primary()?;
if precedence < self.current_precedence() { while precedence < self.current.0.precedence() {
self.parse_infix(left) if self.current.0.is_postfix() {
left = self.parse_postfix(left)?;
} else { } else {
Ok(left) left = self.parse_infix(left)?;
} }
} }
Ok(left)
}
fn parse_primary(&mut self) -> Result<Node<Statement>, ParseError> { fn parse_primary(&mut self) -> Result<Node<Statement>, ParseError> {
match self.current { match self.current {
(Token::Boolean(text), position) => { (Token::Boolean(text), position) => {
@ -241,7 +229,7 @@ impl<'src> Parser<'src> {
)); ));
} }
let next_node = self.parse_node(0)?; let next_node = self.parse_statement(0)?;
// If the next node is an assignment, this might be a map // If the next node is an assignment, this might be a map
if let Statement::BinaryOperation { if let Statement::BinaryOperation {
@ -264,20 +252,20 @@ impl<'src> Parser<'src> {
if let Statement::Map(map_properties) = if let Statement::Map(map_properties) =
statement.get_or_insert_with(|| Statement::Map(Vec::new())) statement.get_or_insert_with(|| Statement::Map(Vec::new()))
{ {
// Add the new property to the map
map_properties.push((*left, *right));
}
// Ignore commas after properties // Ignore commas after properties
if let Token::Comma = self.current.0 { if let Token::Comma = self.current.0 {
self.next_token()?; self.next_token()?;
} }
// Add the new property to the map
map_properties.push((*left, *right));
}
} }
// Otherwise, the new statement is a block // Otherwise, the new statement is a block
} else if let Statement::Block(statements) = } else if let Statement::Block(statements) =
statement.get_or_insert_with(|| Statement::Block(Vec::new())) statement.get_or_insert_with(|| Statement::Block(Vec::new()))
{ {
// Add the assignment statement to the block // Add the statement to the block
statements.push(next_node); statements.push(next_node);
} }
} }
@ -285,7 +273,7 @@ impl<'src> Parser<'src> {
(Token::LeftParenthesis, left_position) => { (Token::LeftParenthesis, left_position) => {
self.next_token()?; self.next_token()?;
let node = self.parse_node(0)?; let node = self.parse_statement(0)?;
if let (Token::RightParenthesis, right_position) = self.current { if let (Token::RightParenthesis, right_position) = self.current {
self.next_token()?; self.next_token()?;
@ -320,7 +308,7 @@ impl<'src> Parser<'src> {
continue; continue;
} }
if let Ok(instruction) = self.parse_node(0) { if let Ok(instruction) = self.parse_statement(0) {
nodes.push(instruction); nodes.push(instruction);
} else { } else {
return Err(ParseError::ExpectedToken { return Err(ParseError::ExpectedToken {
@ -369,7 +357,7 @@ impl<'src> Parser<'src> {
continue; continue;
} }
if let Ok(node) = self.parse_node(0) { if let Ok(node) = self.parse_statement(0) {
if let Some(ref mut arguments) = value_arguments { if let Some(ref mut arguments) = value_arguments {
arguments.push(node); arguments.push(node);
} else { } else {
@ -403,24 +391,10 @@ impl<'src> Parser<'src> {
fn parse_infix(&mut self, left: Node<Statement>) -> Result<Node<Statement>, ParseError> { fn parse_infix(&mut self, left: Node<Statement>) -> Result<Node<Statement>, ParseError> {
let left_start = left.position.0; let left_start = left.position.0;
// Postfix operations if let Token::Dot = &self.current.0 {
if let Token::Semicolon = &self.current.0 {
self.next_token()?; self.next_token()?;
let right_end = self.current.1 .1; let right = self.parse_statement(Token::Dot.precedence() + 1)?;
return Ok(Node::new(
Statement::Nil(Box::new(left)),
(left_start, right_end),
));
};
// Infix operations
let binary_operator = match &self.current {
(Token::Dot, _) => {
self.next_token()?;
let right = self.parse_node(0)?;
let right_end = right.position.1; let right_end = right.position.1;
return Ok(Node::new( return Ok(Node::new(
@ -428,34 +402,40 @@ impl<'src> Parser<'src> {
(left_start, right_end), (left_start, right_end),
)); ));
} }
(Token::DoubleAmpersand, _) => Node::new(BinaryOperator::And, self.current.1),
(Token::DoubleEqual, _) => Node::new(BinaryOperator::Equal, self.current.1),
(Token::DoublePipe, _) => Node::new(BinaryOperator::Or, self.current.1),
(Token::Equal, _) => Node::new(BinaryOperator::Assign, self.current.1),
(Token::Greater, _) => Node::new(BinaryOperator::Greater, self.current.1),
(Token::GreaterEqual, _) => Node::new(BinaryOperator::GreaterOrEqual, self.current.1),
(Token::Less, _) => Node::new(BinaryOperator::Less, self.current.1),
(Token::LessEqual, _) => Node::new(BinaryOperator::LessOrEqual, self.current.1),
(Token::Minus, _) => Node::new(BinaryOperator::Subtract, self.current.1),
(Token::Plus, _) => Node::new(BinaryOperator::Add, self.current.1),
(Token::PlusEqual, _) => Node::new(BinaryOperator::AddAssign, self.current.1),
(Token::Star, _) => Node::new(BinaryOperator::Multiply, self.current.1),
(Token::Slash, _) => Node::new(BinaryOperator::Divide, self.current.1),
(Token::Percent, _) => Node::new(BinaryOperator::Modulo, self.current.1),
_ => {
self.next_token()?;
let binary_operator = match &self.current.0 {
Token::DoubleAmpersand => Node::new(BinaryOperator::And, self.current.1),
Token::DoubleEqual => Node::new(BinaryOperator::Equal, self.current.1),
Token::DoublePipe => Node::new(BinaryOperator::Or, self.current.1),
Token::Equal => Node::new(BinaryOperator::Assign, self.current.1),
Token::Greater => Node::new(BinaryOperator::Greater, self.current.1),
Token::GreaterEqual => Node::new(BinaryOperator::GreaterOrEqual, self.current.1),
Token::Less => Node::new(BinaryOperator::Less, self.current.1),
Token::LessEqual => Node::new(BinaryOperator::LessOrEqual, self.current.1),
Token::Minus => Node::new(BinaryOperator::Subtract, self.current.1),
Token::Plus => Node::new(BinaryOperator::Add, self.current.1),
Token::PlusEqual => Node::new(BinaryOperator::AddAssign, self.current.1),
Token::Star => Node::new(BinaryOperator::Multiply, self.current.1),
Token::Slash => Node::new(BinaryOperator::Divide, self.current.1),
Token::Percent => Node::new(BinaryOperator::Modulo, self.current.1),
_ => {
return Err(ParseError::UnexpectedToken { return Err(ParseError::UnexpectedToken {
actual: self.current.0.to_owned(), actual: self.current.0.to_owned(),
position: self.current.1, position: self.current.1,
}); });
} }
}; };
let operator_precedence = self.current.0.precedence()
- if self.current.0.is_right_associative() {
1
} else {
0
};
self.next_token()?; self.next_token()?;
let left_start = left.position.0; let left_start = left.position.0;
let right = self.parse_node(0)?; let right = self.parse_statement(operator_precedence)?;
let right_end = right.position.1; let right_end = right.position.1;
Ok(Node::new( Ok(Node::new(
@ -468,32 +448,20 @@ impl<'src> Parser<'src> {
)) ))
} }
fn current_precedence(&self) -> u8 { fn parse_postfix(&mut self, left: Node<Statement>) -> Result<Node<Statement>, ParseError> {
match self.current.0 { if let Token::Semicolon = &self.current.0 {
Token::Semicolon => 10, self.next_token()?;
Token::Equal | Token::PlusEqual => 8,
Token::DoubleEqual => 7,
Token::DoubleAmpersand | Token::DoublePipe => 6,
Token::Greater | Token::GreaterEqual | Token::Less | Token::LessEqual => 5,
Token::Dot => 4,
Token::Percent => 3,
Token::Star => 2,
Token::Slash => 2,
Token::Plus => 1,
Token::Minus => 1,
_ => 0,
}
}
fn peek_token(&mut self) -> Token { let left_start = left.position.0;
self.lexer let operator_end = self.current.1 .1;
.peek_token(self.source)
.map(|(token, _)| token)
.unwrap_or(Token::Eof)
}
fn next_is_postfix(&mut self) -> bool { Ok(Node::new(
matches!(self.peek_token(), Token::Semicolon) Statement::Nil(Box::new(left)),
(left_start, operator_end),
))
} else {
Ok(left)
}
} }
} }
@ -503,10 +471,7 @@ pub enum ParseError {
error: ParseBoolError, error: ParseBoolError,
position: Span, position: Span,
}, },
LexError { LexError(LexError),
error: LexError,
position: Span,
},
ExpectedIdentifier { ExpectedIdentifier {
actual: TokenOwned, actual: TokenOwned,
position: Span, position: Span,
@ -530,6 +495,12 @@ pub enum ParseError {
}, },
} }
impl From<LexError> for ParseError {
fn from(v: LexError) -> Self {
Self::LexError(v)
}
}
impl ParseError { impl ParseError {
pub fn position(&self) -> Span { pub fn position(&self) -> Span {
match self { match self {
@ -538,7 +509,7 @@ impl ParseError {
ParseError::ExpectedToken { position, .. } => *position, ParseError::ExpectedToken { position, .. } => *position,
ParseError::FloatError { position, .. } => *position, ParseError::FloatError { position, .. } => *position,
ParseError::IntegerError { position, .. } => *position, ParseError::IntegerError { position, .. } => *position,
ParseError::LexError { position, .. } => *position, ParseError::LexError(error) => error.position(),
ParseError::UnexpectedToken { position, .. } => *position, ParseError::UnexpectedToken { position, .. } => *position,
} }
} }
@ -547,7 +518,7 @@ impl ParseError {
impl Error for ParseError { impl Error for ParseError {
fn source(&self) -> Option<&(dyn Error + 'static)> { fn source(&self) -> Option<&(dyn Error + 'static)> {
match self { match self {
Self::LexError { error, .. } => Some(error), Self::LexError(error) => Some(error),
_ => None, _ => None,
} }
} }
@ -565,7 +536,7 @@ impl Display for ParseError {
} => write!(f, "Expected token {expected}, found {actual}"), } => write!(f, "Expected token {expected}, found {actual}"),
Self::FloatError { error, .. } => write!(f, "{}", error), Self::FloatError { error, .. } => write!(f, "{}", error),
Self::IntegerError { error, .. } => write!(f, "{}", error), Self::IntegerError { error, .. } => write!(f, "{}", error),
Self::LexError { error, .. } => write!(f, "{}", error), Self::LexError(error) => write!(f, "{}", error),
Self::UnexpectedToken { actual, .. } => write!(f, "Unexpected token {actual}"), Self::UnexpectedToken { actual, .. } => write!(f, "Unexpected token {actual}"),
} }
} }

View File

@ -127,6 +127,40 @@ impl<'src> Token<'src> {
Token::WriteLine => "write_line", Token::WriteLine => "write_line",
} }
} }
pub fn is_eof(&self) -> bool {
matches!(self, Token::Eof)
}
pub fn precedence(&self) -> u8 {
match self {
Token::Equal | Token::PlusEqual => 8,
Token::Semicolon => 7,
Token::DoubleAmpersand | Token::DoublePipe => 6,
Token::Greater
| Token::GreaterEqual
| Token::Less
| Token::LessEqual
| Token::DoubleEqual => 5,
Token::Dot => 4,
Token::Percent => 3,
Token::Star | Token::Slash => 2,
Token::Plus | Token::Minus => 1,
_ => 0,
}
}
pub fn is_left_associative(&self) -> bool {
!self.is_right_associative()
}
pub fn is_right_associative(&self) -> bool {
matches!(self, Token::Semicolon)
}
pub fn is_postfix(&self) -> bool {
matches!(self, Token::Semicolon)
}
} }
impl<'src> Display for Token<'src> { impl<'src> Display for Token<'src> {