From 5ad6012021f72369c46e9a2a464ec84e5a3b1e38 Mon Sep 17 00:00:00 2001 From: Jeff Date: Mon, 12 Aug 2024 16:57:10 -0400 Subject: [PATCH] Refine parsing and lexing --- dust-lang/src/abstract_tree.rs | 72 +++++++++---------- dust-lang/src/analyzer.rs | 55 +++++++++----- dust-lang/src/lexer.rs | 67 ++++++++++++++++++ dust-lang/src/parser.rs | 126 +++++++++++++++++++++++++-------- dust-lang/src/token.rs | 16 ++++- dust-lang/src/vm.rs | 116 +++++++++++++++++++++--------- 6 files changed, 337 insertions(+), 115 deletions(-) diff --git a/dust-lang/src/abstract_tree.rs b/dust-lang/src/abstract_tree.rs index 85b5e96..6824af6 100644 --- a/dust-lang/src/abstract_tree.rs +++ b/dust-lang/src/abstract_tree.rs @@ -144,6 +144,15 @@ impl Statement { None } } + BinaryOperator::ListIndex => { + let left_type = left.inner.expected_type(context)?; + + if let Type::List { item_type } = left_type { + Some(*item_type) + } else { + None + } + } }, Statement::BuiltInFunctionCall { function, .. } => function.expected_return_type(), Statement::Constant(value) => Some(value.r#type(context)), @@ -216,11 +225,26 @@ impl Display for Statement { operator, right, } => { - if let BinaryOperator::FieldAccess = operator.inner { - write!(f, "{left}{operator}{right}") - } else { - write!(f, "{left} {operator} {right}") - } + let operator = match operator.inner { + BinaryOperator::FieldAccess => return write!(f, "{left}.{right}"), + BinaryOperator::ListIndex => return write!(f, "{left}[{right}]"), + BinaryOperator::Add => "+", + BinaryOperator::AddAssign => "+=", + BinaryOperator::Assign => "=", + BinaryOperator::Divide => "/", + BinaryOperator::Equal => "==", + BinaryOperator::Greater => ">", + BinaryOperator::GreaterOrEqual => ">=", + BinaryOperator::Less => "<", + BinaryOperator::LessOrEqual => "<=", + BinaryOperator::Modulo => "%", + BinaryOperator::Multiply => "*", + BinaryOperator::Subtract => "-", + BinaryOperator::And => "&&", + BinaryOperator::Or => "||", + }; + + write!(f, "{left} {operator} {right}") } Statement::BuiltInFunctionCall { function, @@ -359,6 +383,11 @@ impl Display for Statement { } Statement::Nil(node) => write!(f, "{node};"), Statement::UnaryOperation { operator, operand } => { + let operator = match operator.inner { + UnaryOperator::Negate => "-", + UnaryOperator::Not => "!", + }; + write!(f, "{operator}{operand}") } Statement::While { condition, body } => { @@ -370,7 +399,9 @@ impl Display for Statement { #[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] pub enum BinaryOperator { + // Accessors FieldAccess, + ListIndex, // Math Add, @@ -395,39 +426,8 @@ pub enum BinaryOperator { AddAssign, } -impl Display for BinaryOperator { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self { - BinaryOperator::Add => write!(f, "+"), - BinaryOperator::AddAssign => write!(f, "+="), - BinaryOperator::Assign => write!(f, "="), - BinaryOperator::And => write!(f, "&&"), - BinaryOperator::Divide => write!(f, "/"), - BinaryOperator::Equal => write!(f, "=="), - BinaryOperator::FieldAccess => write!(f, "."), - BinaryOperator::Greater => write!(f, ">"), - BinaryOperator::GreaterOrEqual => write!(f, ">="), - BinaryOperator::Less => write!(f, "<"), - BinaryOperator::LessOrEqual => write!(f, "<="), - BinaryOperator::Modulo => write!(f, "%"), - BinaryOperator::Multiply => write!(f, "*"), - BinaryOperator::Or => write!(f, "||"), - BinaryOperator::Subtract => write!(f, "-"), - } - } -} - #[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] pub enum UnaryOperator { Negate, Not, } - -impl Display for UnaryOperator { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match self { - UnaryOperator::Negate => write!(f, "-"), - UnaryOperator::Not => write!(f, "!"), - } - } -} diff --git a/dust-lang/src/analyzer.rs b/dust-lang/src/analyzer.rs index e3f0343..1023132 100644 --- a/dust-lang/src/analyzer.rs +++ b/dust-lang/src/analyzer.rs @@ -105,20 +105,6 @@ impl<'a> Analyzer<'a> { self.analyze_statement(right)?; } - if let Some(Type::List { .. }) = left.inner.expected_type(self.context) { - let right_type = right.inner.expected_type(self.context); - - if let Some(Type::Integer) = right_type { - // Allow indexing lists with integers - } else if let Some(Type::Range) = right_type { - // Allow indexing lists with ranges - } else { - return Err(AnalyzerError::ExpectedIntegerOrRange { - actual: right.as_ref().clone(), - }); - } - } - if let Some(Type::Map { .. }) = left.inner.expected_type(self.context) { if let Some(Type::String) = right.inner.expected_type(self.context) { // Allow indexing maps with strings @@ -129,6 +115,33 @@ impl<'a> Analyzer<'a> { actual: right.as_ref().clone(), }); } + } else { + return Err(AnalyzerError::ExpectedMap { + actual: left.as_ref().clone(), + }); + } + + return Ok(()); + } + + if let BinaryOperator::ListIndex = operator.inner { + self.analyze_statement(left)?; + self.analyze_statement(right)?; + + if let Some(Type::List { .. }) = left.inner.expected_type(self.context) { + let index_type = right.inner.expected_type(self.context); + + if let Some(Type::Integer | Type::Range) = index_type { + // List and index are valid + } else { + return Err(AnalyzerError::ExpectedIntegerOrRange { + actual: right.as_ref().clone(), + }); + } + } else { + return Err(AnalyzerError::ExpectedList { + actual: left.as_ref().clone(), + }); } return Ok(()); @@ -418,6 +431,12 @@ pub enum AnalyzerError { ExpectedIntegerOrRange { actual: Node, }, + ExpectedList { + actual: Node, + }, + ExpectedMap { + actual: Node, + }, ExpectedValue { actual: Node, }, @@ -449,6 +468,8 @@ impl AnalyzerError { AnalyzerError::ExpectedIdentifier { actual, .. } => actual.position, AnalyzerError::ExpectedIdentifierOrString { actual } => actual.position, AnalyzerError::ExpectedIntegerOrRange { actual, .. } => actual.position, + AnalyzerError::ExpectedList { actual } => actual.position, + AnalyzerError::ExpectedMap { actual } => actual.position, AnalyzerError::ExpectedValue { actual } => actual.position, AnalyzerError::ExpectedValueArgumentCount { position, .. } => *position, AnalyzerError::TypeConflict { @@ -478,6 +499,8 @@ impl Display for AnalyzerError { AnalyzerError::ExpectedIntegerOrRange { actual, .. } => { write!(f, "Expected integer or range, found {}", actual) } + AnalyzerError::ExpectedList { actual } => write!(f, "Expected list, found {}", actual), + AnalyzerError::ExpectedMap { actual } => write!(f, "Expected map, found {}", actual), AnalyzerError::ExpectedValue { actual, .. } => { write!(f, "Expected value, found {}", actual) } @@ -516,13 +539,13 @@ mod tests { #[test] fn malformed_list_index() { - let source = "[1, 2, 3].foo"; + let source = "[1, 2, 3]['foo']"; assert_eq!( analyze(source), Err(DustError::AnalyzerError { analyzer_error: AnalyzerError::ExpectedIntegerOrRange { - actual: Node::new(Statement::Identifier(Identifier::new("foo")), (10, 13)), + actual: Node::new(Statement::Constant(Value::string("foo")), (10, 15)), }, source }) diff --git a/dust-lang/src/lexer.rs b/dust-lang/src/lexer.rs index efe1079..f63d3da 100644 --- a/dust-lang/src/lexer.rs +++ b/dust-lang/src/lexer.rs @@ -499,6 +499,73 @@ impl Display for LexError { mod tests { use super::*; + #[test] + fn list_index() { + let input = "[1, 2, 3][1]"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LeftSquareBrace, (0, 1)), + (Token::Integer("1"), (1, 2)), + (Token::Comma, (2, 3)), + (Token::Integer("2"), (4, 5)), + (Token::Comma, (5, 6)), + (Token::Integer("3"), (7, 8)), + (Token::RightSquareBrace, (8, 9)), + (Token::LeftSquareBrace, (9, 10)), + (Token::Integer("1"), (10, 11)), + (Token::RightSquareBrace, (11, 12)), + (Token::Eof, (12, 12)), + ]) + ) + } + + #[test] + fn list() { + let input = "[1, 2, 3]"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LeftSquareBrace, (0, 1)), + (Token::Integer("1"), (1, 2)), + (Token::Comma, (2, 3)), + (Token::Integer("2"), (4, 5)), + (Token::Comma, (5, 6)), + (Token::Integer("3"), (7, 8)), + (Token::RightSquareBrace, (8, 9)), + (Token::Eof, (9, 9)), + ]) + ) + } + + #[test] + fn map_field_access() { + let input = "{a = 1, b = 2, c = 3}.c"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LeftCurlyBrace, (0, 1)), + (Token::Identifier("a"), (1, 2)), + (Token::Equal, (3, 4)), + (Token::Integer("1"), (5, 6)), + (Token::Comma, (6, 7)), + (Token::Identifier("b"), (8, 9)), + (Token::Equal, (10, 11)), + (Token::Integer("2"), (12, 13)), + (Token::Comma, (13, 14)), + (Token::Identifier("c"), (15, 16)), + (Token::Equal, (17, 18)), + (Token::Integer("3"), (19, 20)), + (Token::RightCurlyBrace, (20, 21)), + (Token::Dot, (21, 22)), + (Token::Identifier("c"), (22, 23)), + (Token::Eof, (23, 23)), + ]) + ) + } #[test] fn range() { let input = "0..42"; diff --git a/dust-lang/src/parser.rs b/dust-lang/src/parser.rs index fd9d2b9..ef27b87 100644 --- a/dust-lang/src/parser.rs +++ b/dust-lang/src/parser.rs @@ -483,15 +483,9 @@ impl<'src> Parser<'src> { continue; } - if let Ok(instruction) = self.parse_statement(0) { - nodes.push(instruction); - } else { - return Err(ParseError::ExpectedToken { - expected: TokenKind::RightSquareBrace, - actual: self.current.0.to_owned(), - position: self.current.1, - }); - } + let statement = self.parse_statement(0)?; + + nodes.push(statement); } } ( @@ -712,18 +706,48 @@ impl<'src> Parser<'src> { &mut self, left: Node, ) -> Result<(Node, u8), ParseError> { - let node = if let Token::Semicolon = &self.current.0 { - self.next_token()?; + let node = match &self.current.0 { + Token::LeftSquareBrace => { + self.next_token()?; - let left_start = left.position.0; - let operator_end = self.current.1 .1; + let index = self.parse_statement(0)?; - Node::new(Statement::Nil(Box::new(left)), (left_start, operator_end)) - } else { - return Err(ParseError::UnexpectedToken { - actual: self.current.0.to_owned(), - position: self.current.1, - }); + if let Token::RightSquareBrace = self.current.0 { + self.next_token()?; + } else { + return Err(ParseError::ExpectedToken { + expected: TokenKind::RightSquareBrace, + actual: self.current.0.to_owned(), + position: self.current.1, + }); + } + + let left_start = left.position.0; + let right_end = self.current.1 .1; + + Node::new( + Statement::BinaryOperation { + left: Box::new(left), + operator: Node::new(BinaryOperator::ListIndex, self.current.1), + right: Box::new(index), + }, + (left_start, right_end), + ) + } + Token::Semicolon => { + let left_start = left.position.0; + let operator_end = self.current.1 .1; + + self.next_token()?; + + Node::new(Statement::Nil(Box::new(left)), (left_start, operator_end)) + } + _ => { + return Err(ParseError::UnexpectedToken { + actual: self.current.0.to_owned(), + position: self.current.1, + }); + } }; Ok((node, self.current.0.precedence())) @@ -851,6 +875,52 @@ mod tests { use super::*; + #[test] + fn list_index_nested() { + let input = "[1, [2], 3][1][0]"; + + assert_eq!( + parse(input), + Ok(AbstractSyntaxTree { + nodes: [Node::new( + Statement::BinaryOperation { + left: Box::new(Node::new( + Statement::BinaryOperation { + left: Box::new(Node::new( + Statement::List(vec![ + Node::new(Statement::Constant(Value::integer(1)), (1, 2)), + Node::new( + Statement::List(vec![Node::new( + Statement::Constant(Value::integer(2)), + (5, 6) + )]), + (4, 7) + ), + Node::new(Statement::Constant(Value::integer(3)), (9, 10)) + ]), + (0, 11) + )), + operator: Node::new(BinaryOperator::ListIndex, (0, 0)), + right: Box::new(Node::new( + Statement::Constant(Value::integer(1)), + (12, 13) + )) + }, + (0, 0) + )), + operator: Node::new(BinaryOperator::ListIndex, (0, 0)), + right: Box::new(Node::new( + Statement::Constant(Value::integer(0)), + (15, 16) + )) + }, + (0, 0) + ),] + .into() + }) + ); + } + #[test] fn map_property_nested() { let input = "{ x = { y = 42 } }.x.y"; @@ -940,7 +1010,7 @@ mod tests { }, (0, 5) ))), - (0, 8) + (0, 6) ), Node::new( Statement::UnaryOperation { @@ -1045,7 +1115,7 @@ mod tests { }, (0, 9) ))), - (0, 12) + (0, 10) ), Node::new( Statement::UnaryOperation { @@ -1195,7 +1265,7 @@ mod tests { }, (16, 21) ))), - (16, 24) + (16, 22) ), } }) @@ -1368,7 +1438,7 @@ mod tests { }, (2, 10) ),)), - (2, 15) + (2, 11) ), Node::new( Statement::Nil(Box::new(Node::new( @@ -1385,7 +1455,7 @@ mod tests { }, (12, 20) ),)), - (12, 25) + (12, 21) ), Node::new( Statement::BinaryOperation { @@ -1732,8 +1802,8 @@ mod tests { } #[test] - fn list_access() { - let input = "[1, 2, 3].0"; + fn list_index() { + let input = "[1, 2, 3][0]"; assert_eq!( parse(input), @@ -1748,13 +1818,13 @@ mod tests { ]), (0, 9) )), - operator: Node::new(BinaryOperator::FieldAccess, (9, 10)), + operator: Node::new(BinaryOperator::ListIndex, (12, 12)), right: Box::new(Node::new( Statement::Constant(Value::integer(0)), (10, 11) )), }, - (0, 11), + (0, 12), )] .into() }) diff --git a/dust-lang/src/token.rs b/dust-lang/src/token.rs index 4ad04d3..2224b20 100644 --- a/dust-lang/src/token.rs +++ b/dust-lang/src/token.rs @@ -222,7 +222,8 @@ impl<'src> Token<'src> { pub fn precedence(&self) -> u8 { match self { - Token::Dot => 9, + Token::Dot => 10, + Token::LeftSquareBrace => 9, Token::Star | Token::Slash | Token::Percent => 8, Token::Minus => 7, Token::Plus => 6, @@ -240,7 +241,16 @@ impl<'src> Token<'src> { } pub fn is_left_associative(&self) -> bool { - !self.is_right_associative() + matches!( + self, + Token::DoubleAmpersand + | Token::DoublePipe + | Token::Plus + | Token::Minus + | Token::Star + | Token::Slash + | Token::Percent + ) } pub fn is_right_associative(&self) -> bool { @@ -252,7 +262,7 @@ impl<'src> Token<'src> { } pub fn is_postfix(&self) -> bool { - matches!(self, Token::Semicolon) + matches!(self, Token::LeftSquareBrace | Token::Semicolon) } } diff --git a/dust-lang/src/vm.rs b/dust-lang/src/vm.rs index 673b8be..7919998 100644 --- a/dust-lang/src/vm.rs +++ b/dust-lang/src/vm.rs @@ -148,24 +148,6 @@ impl Vm { }; let right_span = right.position; - if let (Some(list), Statement::Constant(value)) = - (left_value.as_list(), &right.inner) - { - if let Some(index) = value.as_integer() { - let value = list.get(index as usize).cloned(); - - return Ok(value); - } - - if let Some(range) = value.as_range() { - let range = range.start as usize..range.end as usize; - - if let Some(items) = list.get(range) { - return Ok(Some(Value::list(items.to_vec()))); - } - } - } - if let Some(map) = left_value.as_map() { if let Statement::Identifier(identifier) = right.inner { let value = map.get(&identifier).cloned(); @@ -182,10 +164,62 @@ impl Vm { return Ok(value); } } + + return Err(VmError::ExpectedIdentifierOrString { + position: right_span, + }); + } else { + return Err(VmError::ExpectedMap { + position: left_span, + }); + } + } + + if let BinaryOperator::ListIndex = operator.inner { + let list_position = left.position; + let list_value = if let Some(value) = self.run_statement(*left)? { + value + } else { + return Err(VmError::ExpectedValue { + position: list_position, + }); + }; + let list = if let Some(list) = list_value.as_list() { + list + } else { + return Err(VmError::ExpectedList { + position: list_position, + }); + }; + let index_position = right.position; + let index_value = if let Some(value) = self.run_statement(*right)? { + value + } else { + return Err(VmError::ExpectedValue { + position: index_position, + }); + }; + + if let Some(index) = index_value.as_integer() { + return if let Some(value) = list.get(index as usize) { + Ok(Some(value.clone())) + } else { + Ok(None) + }; } - return Err(VmError::ExpectedIdentifierIntegerOrRange { - position: right_span, + if let Some(range) = index_value.as_range() { + let range = range.start as usize..range.end as usize; + + return if let Some(list) = list.get(range) { + Ok(Some(Value::list(list.to_vec()))) + } else { + Ok(None) + }; + } + + return Err(VmError::ExpectedIntegerOrRange { + position: index_position, }); } @@ -594,7 +628,10 @@ pub enum VmError { ExpectedIdentifier { position: Span, }, - ExpectedIdentifierIntegerOrRange { + ExpectedIntegerOrRange { + position: Span, + }, + ExpectedIdentifierOrString { position: Span, }, ExpectedInteger { @@ -603,6 +640,9 @@ pub enum VmError { ExpectedNumber { position: Span, }, + ExpectedMap { + position: Span, + }, ExpectedFunction { actual: Value, position: Span, @@ -632,10 +672,12 @@ impl VmError { Self::BuiltInFunctionError { position, .. } => *position, Self::ExpectedBoolean { position } => *position, Self::ExpectedIdentifier { position } => *position, - Self::ExpectedIdentifierIntegerOrRange { position } => *position, + Self::ExpectedIdentifierOrString { position } => *position, + Self::ExpectedIntegerOrRange { position } => *position, Self::ExpectedInteger { position } => *position, Self::ExpectedFunction { position, .. } => *position, Self::ExpectedList { position } => *position, + Self::ExpectedMap { position } => *position, Self::ExpectedNumber { position } => *position, Self::ExpectedValue { position } => *position, Self::UndefinedVariable { identifier } => identifier.position, @@ -673,7 +715,14 @@ impl Display for VmError { Self::ExpectedIdentifier { position } => { write!(f, "Expected an identifier at position: {:?}", position) } - Self::ExpectedIdentifierIntegerOrRange { position } => { + Self::ExpectedIdentifierOrString { position } => { + write!( + f, + "Expected an identifier or string at position: {:?}", + position + ) + } + Self::ExpectedIntegerOrRange { position } => { write!( f, "Expected an identifier, integer, or range at position: {:?}", @@ -686,6 +735,9 @@ impl Display for VmError { Self::ExpectedList { position } => { write!(f, "Expected a list at position: {:?}", position) } + Self::ExpectedMap { position } => { + write!(f, "Expected a map at position: {:?}", position) + } Self::ExpectedNumber { position } => { write!( f, @@ -712,6 +764,13 @@ impl Display for VmError { mod tests { use super::*; + #[test] + fn list_index_nested() { + let input = "[[1, 2], [42, 4], [5, 6]][1][0]"; + + assert_eq!(run(input), Ok(Some(Value::integer(42)))); + } + #[test] fn map_property() { let input = "{ x = 42 }.x"; @@ -735,7 +794,7 @@ mod tests { #[test] fn list_index_range() { - let input = "[1, 2, 3, 4, 5].1..3"; + let input = "[1, 2, 3, 4, 5][1..3]"; assert_eq!( run(input), @@ -769,7 +828,7 @@ mod tests { #[test] fn list_index() { - let input = "[1, 42, 3].1"; + let input = "[1, 42, 3][1]"; assert_eq!(run(input), Ok(Some(Value::integer(42)))); } @@ -949,13 +1008,6 @@ mod tests { assert_eq!(run(input), Ok(Some(Value::integer(3)))); } - #[test] - fn list_access() { - let input = "[1, 2, 3].1"; - - assert_eq!(run(input), Ok(Some(Value::integer(2)))); - } - #[test] fn add() { let input = "1 + 2";