From ed82f3c64f8c6168b7eede44dcd86d7efb41a796 Mon Sep 17 00:00:00 2001 From: Jeff Date: Fri, 9 Aug 2024 11:41:23 -0400 Subject: [PATCH] Lex, parse and run maps and blocks --- dust-lang/src/abstract_tree.rs | 27 ++- dust-lang/src/analyzer.rs | 8 + dust-lang/src/identifier.rs | 23 +- dust-lang/src/lex.rs | 37 ++++ dust-lang/src/parse.rs | 371 ++++++++++++++++++++++++++------- dust-lang/src/token.rs | 6 + dust-lang/src/vm.rs | 16 +- 7 files changed, 410 insertions(+), 78 deletions(-) diff --git a/dust-lang/src/abstract_tree.rs b/dust-lang/src/abstract_tree.rs index 78042d1..4101b16 100644 --- a/dust-lang/src/abstract_tree.rs +++ b/dust-lang/src/abstract_tree.rs @@ -34,12 +34,15 @@ impl Display for Node { #[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] pub enum Statement { - // Top-level statements + // Variable assignment Assignment { identifier: Node, value_node: Box>, }, + // A sequence of statements + Block(Vec>), + // Logic, math and comparison expressions BinaryOperation { left: Box>, @@ -69,14 +72,19 @@ pub enum Statement { List(Vec>), Map(Vec<(Node, Node)>), - // Hard-coded values + // Hard-coded value Constant(Value), + + // A statement that always returns None. Created with a semicolon, it causes the preceding + // statement to return None. This is analagous to the semicolon or unit type in Rust. + Nil(Box>), } impl Statement { pub fn expected_type(&self, variables: &HashMap) -> Option { match self { Statement::Assignment { .. } => None, + Statement::Block(nodes) => nodes.last().unwrap().inner.expected_type(variables), Statement::BinaryOperation { left, .. } => left.inner.expected_type(variables), Statement::BuiltInFunctionCall { function, .. } => function.expected_return_type(), Statement::Constant(value) => Some(value.r#type(variables)), @@ -105,6 +113,7 @@ impl Statement { Some(Type::Map(types)) } Statement::PropertyAccess(_, _) => None, + Statement::Nil(_) => None, } } } @@ -118,6 +127,19 @@ impl Display for Statement { } => { write!(f, "{identifier} = {value}") } + Statement::Block(statements) => { + write!(f, "{{ ")?; + + for (i, statement) in statements.iter().enumerate() { + if i > 0 { + write!(f, " ")?; + } + + write!(f, "{statement}")?; + } + + write!(f, " }}") + } Statement::BinaryOperation { left, operator, @@ -223,6 +245,7 @@ impl Display for Statement { write!(f, "}}") } + Statement::Nil(node) => write!(f, "{node};"), Statement::PropertyAccess(left, right) => write!(f, "{left}.{right}"), } } diff --git a/dust-lang/src/analyzer.rs b/dust-lang/src/analyzer.rs index 9aacc7c..97a2792 100644 --- a/dust-lang/src/analyzer.rs +++ b/dust-lang/src/analyzer.rs @@ -140,6 +140,11 @@ impl<'a> Analyzer<'a> { } } } + Statement::Block(statements) => { + for statement in statements { + self.analyze_node(statement)?; + } + } Statement::BuiltInFunctionCall { .. } => {} Statement::Constant(_) => {} Statement::FunctionCall { function, .. } => { @@ -194,6 +199,9 @@ impl<'a> Analyzer<'a> { self.analyze_node(right)?; } + Statement::Nil(node) => { + self.analyze_node(node)?; + } } Ok(()) diff --git a/dust-lang/src/identifier.rs b/dust-lang/src/identifier.rs index ec4abed..0da95c9 100644 --- a/dust-lang/src/identifier.rs +++ b/dust-lang/src/identifier.rs @@ -1,4 +1,17 @@ //! Key used to identify a value or type. +//! +//! Identifiers are used to uniquely identify values and types in Dust programs. They are +//! cached to avoid duplication. This means that two identifiers with the same text are the same +//! object in memory. +//! +//! # Examples +//! ``` +//! # use dust_lang::Identifier; +//! let foo = Identifier::new("foo"); +//! let also_foo = Identifier::new("foo"); +//! +//! assert_eq!(foo.hard_count(), 2); +//! ``` use std::{ collections::HashSet, fmt::{self, Display, Formatter}, @@ -8,20 +21,24 @@ use std::{ use serde::{de::Visitor, Deserialize, Serialize}; +/// In-use identifiers. static IDENTIFIER_CACHE: OnceLock>> = OnceLock::new(); +/// Returns the identifier cache. fn identifier_cache<'a>() -> &'a RwLock> { IDENTIFIER_CACHE.get_or_init(|| RwLock::new(HashSet::new())) } /// Key used to identify a value or type. +/// +/// See the [module-level documentation](index.html) for more information. #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] pub struct Identifier(Arc); impl Identifier { + /// Creates a new identifier or returns a clone of an existing one from a cache. pub fn new(text: T) -> Self { let cache = identifier_cache().read().unwrap(); - let new = Identifier(Arc::new(text.to_string())); if cache.contains(&new) { @@ -38,6 +55,10 @@ impl Identifier { pub fn as_str(&self) -> &str { self.0.as_str() } + + pub fn hard_count(&self) -> usize { + Arc::strong_count(&self.0) + } } impl From<&str> for Identifier { diff --git a/dust-lang/src/lex.rs b/dust-lang/src/lex.rs index be01b29..bfed026 100644 --- a/dust-lang/src/lex.rs +++ b/dust-lang/src/lex.rs @@ -215,6 +215,22 @@ impl Lexer { (Token::Percent, (self.position - 1, self.position)) } + '&' => { + if let Some('&') = self.peek_second_char(source) { + self.position += 2; + + (Token::DoubleAmpersand, (self.position - 2, self.position)) + } else { + self.position += 1; + + return Err(LexError::UnexpectedCharacter(c)); + } + } + ';' => { + self.position += 1; + + (Token::Semicolon, (self.position - 1, self.position)) + } _ => { self.position += 1; @@ -441,6 +457,27 @@ impl From for LexError { mod tests { use super::*; + #[test] + fn block() { + let input = "{ x = 42; y = 'foobar' }"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LeftCurlyBrace, (0, 1)), + (Token::Identifier("x"), (2, 3)), + (Token::Equal, (4, 5)), + (Token::Integer(42), (6, 8)), + (Token::Semicolon, (8, 9)), + (Token::Identifier("y"), (10, 11)), + (Token::Equal, (12, 13)), + (Token::String("foobar"), (14, 22)), + (Token::RightCurlyBrace, (23, 24)), + (Token::Eof, (24, 24)), + ]) + ) + } + #[test] fn equal() { let input = "42 == 42"; diff --git a/dust-lang/src/parse.rs b/dust-lang/src/parse.rs index f0a6995..6e44197 100644 --- a/dust-lang/src/parse.rs +++ b/dust-lang/src/parse.rs @@ -166,6 +166,40 @@ impl<'src> Parser<'src> { (left_start, right_end), )); } + (Token::DoubleAmpersand, _) => { + let operator = Node::new(BinaryOperator::And, self.current.1); + + self.next_token()?; + + let right_node = self.parse_node(self.current_precedence())?; + let right_end = right_node.position.1; + + return Ok(Node::new( + Statement::BinaryOperation { + left: Box::new(left_node), + operator, + right: Box::new(right_node), + }, + (left_start, right_end), + )); + } + (Token::DoubleEqual, _) => { + let operator = Node::new(BinaryOperator::Equal, self.current.1); + + self.next_token()?; + + let right_node = self.parse_node(self.current_precedence())?; + let right_end = right_node.position.1; + + return Ok(Node::new( + Statement::BinaryOperation { + left: Box::new(left_node), + operator, + right: Box::new(right_node), + }, + (left_start, right_end), + )); + } (Token::Greater, _) => { let operator = Node::new(BinaryOperator::Greater, self.current.1); @@ -268,6 +302,12 @@ impl<'src> Parser<'src> { (left_start, right_end), )); } + (Token::Semicolon, (_, right_end)) => { + return Ok(Node::new( + Statement::Nil(Box::new(left_node)), + (left_start, *right_end), + )) + } (Token::Star, _) => { let operator = Node::new(BinaryOperator::Multiply, self.current.1); @@ -319,23 +359,6 @@ impl<'src> Parser<'src> { (left_start, right_end), )); } - (Token::DoubleEqual, _) => { - let operator = Node::new(BinaryOperator::Equal, self.current.1); - - self.next_token()?; - - let right_node = self.parse_node(self.current_precedence())?; - let right_end = right_node.position.1; - - return Ok(Node::new( - Statement::BinaryOperation { - left: Box::new(left_node), - operator, - right: Box::new(right_node), - }, - (left_start, right_end), - )); - } _ => {} } } @@ -369,13 +392,13 @@ impl<'src> Parser<'src> { if let (Token::Equal, _) = self.current { self.next_token()?; - let value = self.parse_node(0)?; - let right_end = value.position.1; + let value_node = self.parse_node(0)?; + let right_end = value_node.position.1; Ok(Node::new( Statement::Assignment { identifier: Node::new(Identifier::new(text), span), - value_node: Box::new(value), + value_node: Box::new(value_node), }, (span.0, right_end), )) @@ -394,45 +417,89 @@ impl<'src> Parser<'src> { (Token::LeftCurlyBrace, left_span) => { self.next_token()?; - let mut nodes = Vec::new(); + // If the next token is a right curly brace, this is an empty map + if let (Token::RightCurlyBrace, right_span) = self.current { + self.next_token()?; + + return Ok(Node::new( + Statement::Map(Vec::new()), + (left_span.0, right_span.1), + )); + } + + let mut statement = None; loop { + // If a closing brace is found, return the new statement if let (Token::RightCurlyBrace, right_span) = self.current { self.next_token()?; - return Ok(Node::new( - Statement::Map(nodes), - (left_span.0, right_span.1), - )); + return Ok(Node::new(statement.unwrap(), (left_span.0, right_span.1))); } - let identifier = if let (Token::Identifier(text), right_span) = self.current { - self.next_token()?; + let next_node = self.parse_node(0)?; - Node::new(Identifier::new(text), right_span) - } else { - return Err(ParseError::ExpectedIdentifier { - actual: self.current.0.to_owned(), - position: self.current.1, - }); - }; + // If the next node is an assignment, this might be a map + if let Statement::Assignment { + identifier, + value_node, + } = next_node.inner + { + // If the current token is a comma, right curly brace, or the new + // statement is already a map + if self.current.0 == Token::Comma + || statement + .as_ref() + .is_some_and(|statement| matches!(statement, Statement::Map(_))) + { + // The new statement is a map + if let Statement::Map(map_properties) = + statement.get_or_insert_with(|| Statement::Map(Vec::new())) + { + // Ignore commas after properties + if let Token::Comma = self.current.0 { + self.next_token()?; + } - if let Token::Equal = self.current.0 { - self.next_token()?; - } else { - return Err(ParseError::ExpectedToken { - expected: TokenOwned::Equal, - actual: self.current.0.to_owned(), - position: self.current.1, - }); - } + // Add the new property to the map + map_properties.push((identifier, *value_node)); + } + // Otherwise, the new statement is a block + } else if let Statement::Block(statements) = + statement.get_or_insert_with(|| Statement::Block(Vec::new())) + { + if self.current.0 == Token::Semicolon { + self.next_token()?; - let current_value_node = self.parse_node(0)?; + statements.push(Node::new( + Statement::Nil(Box::new(Node::new( + Statement::Assignment { + identifier, + value_node, + }, + next_node.position, + ))), + (next_node.position.0, self.current.1 .1), + )); - nodes.push((identifier, current_value_node)); + continue; + } else { + statements.push(Node::new( + Statement::Assignment { + identifier, + value_node, + }, + next_node.position, + )); - if let Token::Comma = self.current.0 { - self.next_token()?; + continue; + } + } + } else if let Statement::Block(statements) = + statement.get_or_insert_with(|| Statement::Block(Vec::new())) + { + // Add the assignment statement to the block + statements.push(next_node); } } } @@ -556,7 +623,8 @@ impl<'src> Parser<'src> { fn current_precedence(&self) -> u8 { match self.current.0 { - Token::DoubleEqual => 6, + Token::DoubleEqual => 7, + Token::DoubleAmpersand => 6, Token::Greater | Token::GreaterEqual | Token::Less | Token::LessEqual => 5, Token::Dot => 4, Token::Percent => 3, @@ -577,7 +645,7 @@ pub enum ParseError { }, ExpectedIdentifier { actual: TokenOwned, - position: (usize, usize), + position: Span, }, ExpectedToken { expected: TokenOwned, @@ -631,6 +699,186 @@ mod tests { use super::*; + #[test] + fn misplaced_semicolon() { + let input = ";"; + + assert_eq!( + parse(input), + Err(ParseError::UnexpectedToken { + actual: TokenOwned::Semicolon, + position: (0, 1) + }) + ); + } + + #[test] + fn block_with_one_statement() { + let input = "{ 40 + 2 }"; + + assert_eq!( + parse(input), + Ok(AbstractSyntaxTree { + nodes: [Node::new( + Statement::Block(vec![Node::new( + Statement::BinaryOperation { + left: Box::new(Node::new( + Statement::Constant(Value::integer(40)), + (2, 4) + )), + operator: Node::new(BinaryOperator::Add, (5, 6)), + right: Box::new(Node::new( + Statement::Constant(Value::integer(2)), + (7, 8) + )), + }, + (2, 8) + )]), + (0, 10) + )] + .into() + }) + ); + } + + #[test] + fn block_with_assignment() { + let input = "{ foo = 42; bar = 42; baz = '42' }"; + + assert_eq!( + parse(input), + Ok(AbstractSyntaxTree { + nodes: [Node::new( + Statement::Block(vec![ + Node::new( + Statement::Nil(Box::new(Node::new( + Statement::Assignment { + identifier: Node::new(Identifier::new("foo"), (2, 5)), + value_node: Box::new(Node::new( + Statement::Constant(Value::integer(42)), + (8, 10) + )) + }, + (2, 10) + ),)), + (2, 15) + ), + Node::new( + Statement::Nil(Box::new(Node::new( + Statement::Assignment { + identifier: Node::new(Identifier::new("bar"), (12, 15)), + value_node: Box::new(Node::new( + Statement::Constant(Value::integer(42)), + (18, 20) + )) + }, + (12, 20) + ),)), + (12, 25) + ), + Node::new( + Statement::Assignment { + identifier: Node::new(Identifier::new("baz"), (22, 25)), + value_node: Box::new(Node::new( + Statement::Constant(Value::string("42")), + (28, 32) + )) + }, + (22, 32) + ) + ]), + (0, 34) + )] + .into() + }) + ); + } + + #[test] + fn empty_map() { + let input = "{}"; + + assert_eq!( + parse(input), + Ok(AbstractSyntaxTree { + nodes: [Node::new(Statement::Map(vec![]), (0, 2))].into() + }) + ); + } + + #[test] + fn map_with_trailing_comma() { + let input = "{ foo = 42, bar = 42, baz = '42', }"; + + assert_eq!( + parse(input), + Ok(AbstractSyntaxTree { + nodes: [Node::new( + Statement::Map(vec![ + ( + Node::new(Identifier::new("foo"), (2, 5)), + Node::new(Statement::Constant(Value::integer(42)), (8, 10)) + ), + ( + Node::new(Identifier::new("bar"), (12, 15)), + Node::new(Statement::Constant(Value::integer(42)), (18, 20)) + ), + ( + Node::new(Identifier::new("baz"), (22, 25)), + Node::new(Statement::Constant(Value::string("42")), (28, 32)) + ), + ]), + (0, 35) + )] + .into() + }) + ); + } + + #[test] + fn map_with_two_properties() { + let input = "{ x = 42, y = 'foobar' }"; + + assert_eq!( + parse(input), + Ok(AbstractSyntaxTree { + nodes: [Node::new( + Statement::Map(vec![ + ( + Node::new(Identifier::new("x"), (2, 3)), + Node::new(Statement::Constant(Value::integer(42)), (6, 8)) + ), + ( + Node::new(Identifier::new("y"), (10, 11)), + Node::new(Statement::Constant(Value::string("foobar")), (14, 22)) + ) + ]), + (0, 24) + )] + .into() + }) + ); + } + + #[test] + fn map_with_one_property() { + let input = "{ x = 42, }"; + + assert_eq!( + parse(input), + Ok(AbstractSyntaxTree { + nodes: [Node::new( + Statement::Map(vec![( + Node::new(Identifier::new("x"), (2, 3)), + Node::new(Statement::Constant(Value::integer(42)), (6, 8)) + )]), + (0, 11) + )] + .into() + }) + ); + } + #[test] fn equal() { let input = "42 == 42"; @@ -704,31 +952,6 @@ mod tests { ); } - #[test] - fn map() { - let input = "{ x = 42, y = 'foobar' }"; - - assert_eq!( - parse(input), - Ok(AbstractSyntaxTree { - nodes: [Node::new( - Statement::Map(vec![ - ( - Node::new(Identifier::new("x"), (2, 3)), - Node::new(Statement::Constant(Value::integer(42)), (6, 8)) - ), - ( - Node::new(Identifier::new("y"), (10, 11)), - Node::new(Statement::Constant(Value::string("foobar")), (14, 22)) - ) - ]), - (0, 24) - )] - .into() - }) - ); - } - #[test] fn less_than() { let input = "1 < 2"; diff --git a/dust-lang/src/token.rs b/dust-lang/src/token.rs index 6d60855..503ebf4 100644 --- a/dust-lang/src/token.rs +++ b/dust-lang/src/token.rs @@ -43,6 +43,7 @@ pub enum Token<'src> { RightCurlyBrace, RightParenthesis, RightSquareBrace, + Semicolon, Slash, Star, } @@ -78,6 +79,7 @@ impl<'src> Token<'src> { Token::RightCurlyBrace => TokenOwned::RightCurlyBrace, Token::RightParenthesis => TokenOwned::RightParenthesis, Token::RightSquareBrace => TokenOwned::RightSquareBrace, + Token::Semicolon => TokenOwned::Semicolon, Token::Star => TokenOwned::Star, Token::Slash => TokenOwned::Slash, Token::String(text) => TokenOwned::String(text.to_string()), @@ -115,6 +117,7 @@ impl<'src> Token<'src> { Token::RightCurlyBrace => "}", Token::RightParenthesis => ")", Token::RightSquareBrace => "]", + Token::Semicolon => ";", Token::Star => "*", Token::String(_) => "string", Token::Slash => "/", @@ -163,6 +166,7 @@ impl<'src> PartialEq for Token<'src> { (Token::RightCurlyBrace, Token::RightCurlyBrace) => true, (Token::RightParenthesis, Token::RightParenthesis) => true, (Token::RightSquareBrace, Token::RightSquareBrace) => true, + (Token::Semicolon, Token::Semicolon) => true, (Token::Star, Token::Star) => true, (Token::Slash, Token::Slash) => true, (Token::String(left), Token::String(right)) => left == right, @@ -214,6 +218,7 @@ pub enum TokenOwned { RightCurlyBrace, RightParenthesis, RightSquareBrace, + Semicolon, Star, Slash, } @@ -249,6 +254,7 @@ impl Display for TokenOwned { TokenOwned::RightCurlyBrace => Token::RightCurlyBrace.fmt(f), TokenOwned::RightParenthesis => Token::RightParenthesis.fmt(f), TokenOwned::RightSquareBrace => Token::RightSquareBrace.fmt(f), + TokenOwned::Semicolon => Token::Semicolon.fmt(f), TokenOwned::Star => Token::Star.fmt(f), TokenOwned::Slash => Token::Slash.fmt(f), TokenOwned::String(string) => write!(f, "{string}"), diff --git a/dust-lang/src/vm.rs b/dust-lang/src/vm.rs index 173b48b..278397c 100644 --- a/dust-lang/src/vm.rs +++ b/dust-lang/src/vm.rs @@ -115,6 +115,15 @@ impl Vm { Ok(Some(result)) } + Statement::Block(statements) => { + let mut previous_value = None; + + for statement in statements { + previous_value = self.run_node(statement, variables)?; + } + + Ok(previous_value) + } Statement::BuiltInFunctionCall { function, type_arguments: _, @@ -234,6 +243,11 @@ impl Vm { Ok(Some(Value::map(values))) } + Statement::Nil(node) => { + let _return = self.run_node(*node, variables)?; + + Ok(None) + } Statement::PropertyAccess(left, right) => { let left_span = left.position; let left_value = if let Some(value) = self.run_node(*left, variables)? { @@ -436,7 +450,7 @@ mod tests { #[test] fn map_equal() { - let input = "{ y = 'foo' } == { y = 'foo' }"; + let input = "{ y = 'foo', } == { y = 'foo', }"; assert_eq!( run(input, &mut HashMap::new()),