Lex, parse and run maps and blocks

This commit is contained in:
Jeff 2024-08-09 11:41:23 -04:00
parent f389f7e422
commit ed82f3c64f
7 changed files with 410 additions and 78 deletions

View File

@ -34,12 +34,15 @@ impl<T: Display> Display for Node<T> {
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum Statement {
// Top-level statements
// Variable assignment
Assignment {
identifier: Node<Identifier>,
value_node: Box<Node<Statement>>,
},
// A sequence of statements
Block(Vec<Node<Statement>>),
// Logic, math and comparison expressions
BinaryOperation {
left: Box<Node<Statement>>,
@ -69,14 +72,19 @@ pub enum Statement {
List(Vec<Node<Statement>>),
Map(Vec<(Node<Identifier>, Node<Statement>)>),
// Hard-coded values
// Hard-coded value
Constant(Value),
// A statement that always returns None. Created with a semicolon, it causes the preceding
// statement to return None. This is analagous to the semicolon or unit type in Rust.
Nil(Box<Node<Statement>>),
}
impl Statement {
pub fn expected_type(&self, variables: &HashMap<Identifier, Value>) -> Option<Type> {
match self {
Statement::Assignment { .. } => None,
Statement::Block(nodes) => nodes.last().unwrap().inner.expected_type(variables),
Statement::BinaryOperation { left, .. } => left.inner.expected_type(variables),
Statement::BuiltInFunctionCall { function, .. } => function.expected_return_type(),
Statement::Constant(value) => Some(value.r#type(variables)),
@ -105,6 +113,7 @@ impl Statement {
Some(Type::Map(types))
}
Statement::PropertyAccess(_, _) => None,
Statement::Nil(_) => None,
}
}
}
@ -118,6 +127,19 @@ impl Display for Statement {
} => {
write!(f, "{identifier} = {value}")
}
Statement::Block(statements) => {
write!(f, "{{ ")?;
for (i, statement) in statements.iter().enumerate() {
if i > 0 {
write!(f, " ")?;
}
write!(f, "{statement}")?;
}
write!(f, " }}")
}
Statement::BinaryOperation {
left,
operator,
@ -223,6 +245,7 @@ impl Display for Statement {
write!(f, "}}")
}
Statement::Nil(node) => write!(f, "{node};"),
Statement::PropertyAccess(left, right) => write!(f, "{left}.{right}"),
}
}

View File

@ -140,6 +140,11 @@ impl<'a> Analyzer<'a> {
}
}
}
Statement::Block(statements) => {
for statement in statements {
self.analyze_node(statement)?;
}
}
Statement::BuiltInFunctionCall { .. } => {}
Statement::Constant(_) => {}
Statement::FunctionCall { function, .. } => {
@ -194,6 +199,9 @@ impl<'a> Analyzer<'a> {
self.analyze_node(right)?;
}
Statement::Nil(node) => {
self.analyze_node(node)?;
}
}
Ok(())

View File

@ -1,4 +1,17 @@
//! Key used to identify a value or type.
//!
//! Identifiers are used to uniquely identify values and types in Dust programs. They are
//! cached to avoid duplication. This means that two identifiers with the same text are the same
//! object in memory.
//!
//! # Examples
//! ```
//! # use dust_lang::Identifier;
//! let foo = Identifier::new("foo");
//! let also_foo = Identifier::new("foo");
//!
//! assert_eq!(foo.hard_count(), 2);
//! ```
use std::{
collections::HashSet,
fmt::{self, Display, Formatter},
@ -8,20 +21,24 @@ use std::{
use serde::{de::Visitor, Deserialize, Serialize};
/// In-use identifiers.
static IDENTIFIER_CACHE: OnceLock<RwLock<HashSet<Identifier>>> = OnceLock::new();
/// Returns the identifier cache.
fn identifier_cache<'a>() -> &'a RwLock<HashSet<Identifier>> {
IDENTIFIER_CACHE.get_or_init(|| RwLock::new(HashSet::new()))
}
/// Key used to identify a value or type.
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
pub struct Identifier(Arc<String>);
impl Identifier {
/// Creates a new identifier or returns a clone of an existing one from a cache.
pub fn new<T: ToString>(text: T) -> Self {
let cache = identifier_cache().read().unwrap();
let new = Identifier(Arc::new(text.to_string()));
if cache.contains(&new) {
@ -38,6 +55,10 @@ impl Identifier {
pub fn as_str(&self) -> &str {
self.0.as_str()
}
pub fn hard_count(&self) -> usize {
Arc::strong_count(&self.0)
}
}
impl From<&str> for Identifier {

View File

@ -215,6 +215,22 @@ impl Lexer {
(Token::Percent, (self.position - 1, self.position))
}
'&' => {
if let Some('&') = self.peek_second_char(source) {
self.position += 2;
(Token::DoubleAmpersand, (self.position - 2, self.position))
} else {
self.position += 1;
return Err(LexError::UnexpectedCharacter(c));
}
}
';' => {
self.position += 1;
(Token::Semicolon, (self.position - 1, self.position))
}
_ => {
self.position += 1;
@ -441,6 +457,27 @@ impl From<ParseIntError> for LexError {
mod tests {
use super::*;
#[test]
fn block() {
let input = "{ x = 42; y = 'foobar' }";
assert_eq!(
lex(input),
Ok(vec![
(Token::LeftCurlyBrace, (0, 1)),
(Token::Identifier("x"), (2, 3)),
(Token::Equal, (4, 5)),
(Token::Integer(42), (6, 8)),
(Token::Semicolon, (8, 9)),
(Token::Identifier("y"), (10, 11)),
(Token::Equal, (12, 13)),
(Token::String("foobar"), (14, 22)),
(Token::RightCurlyBrace, (23, 24)),
(Token::Eof, (24, 24)),
])
)
}
#[test]
fn equal() {
let input = "42 == 42";

View File

@ -166,6 +166,40 @@ impl<'src> Parser<'src> {
(left_start, right_end),
));
}
(Token::DoubleAmpersand, _) => {
let operator = Node::new(BinaryOperator::And, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::DoubleEqual, _) => {
let operator = Node::new(BinaryOperator::Equal, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
(Token::Greater, _) => {
let operator = Node::new(BinaryOperator::Greater, self.current.1);
@ -268,6 +302,12 @@ impl<'src> Parser<'src> {
(left_start, right_end),
));
}
(Token::Semicolon, (_, right_end)) => {
return Ok(Node::new(
Statement::Nil(Box::new(left_node)),
(left_start, *right_end),
))
}
(Token::Star, _) => {
let operator = Node::new(BinaryOperator::Multiply, self.current.1);
@ -319,23 +359,6 @@ impl<'src> Parser<'src> {
(left_start, right_end),
));
}
(Token::DoubleEqual, _) => {
let operator = Node::new(BinaryOperator::Equal, self.current.1);
self.next_token()?;
let right_node = self.parse_node(self.current_precedence())?;
let right_end = right_node.position.1;
return Ok(Node::new(
Statement::BinaryOperation {
left: Box::new(left_node),
operator,
right: Box::new(right_node),
},
(left_start, right_end),
));
}
_ => {}
}
}
@ -369,13 +392,13 @@ impl<'src> Parser<'src> {
if let (Token::Equal, _) = self.current {
self.next_token()?;
let value = self.parse_node(0)?;
let right_end = value.position.1;
let value_node = self.parse_node(0)?;
let right_end = value_node.position.1;
Ok(Node::new(
Statement::Assignment {
identifier: Node::new(Identifier::new(text), span),
value_node: Box::new(value),
value_node: Box::new(value_node),
},
(span.0, right_end),
))
@ -394,45 +417,89 @@ impl<'src> Parser<'src> {
(Token::LeftCurlyBrace, left_span) => {
self.next_token()?;
let mut nodes = Vec::new();
// If the next token is a right curly brace, this is an empty map
if let (Token::RightCurlyBrace, right_span) = self.current {
self.next_token()?;
return Ok(Node::new(
Statement::Map(Vec::new()),
(left_span.0, right_span.1),
));
}
let mut statement = None;
loop {
// If a closing brace is found, return the new statement
if let (Token::RightCurlyBrace, right_span) = self.current {
self.next_token()?;
return Ok(Node::new(
Statement::Map(nodes),
(left_span.0, right_span.1),
));
return Ok(Node::new(statement.unwrap(), (left_span.0, right_span.1)));
}
let identifier = if let (Token::Identifier(text), right_span) = self.current {
self.next_token()?;
let next_node = self.parse_node(0)?;
Node::new(Identifier::new(text), right_span)
} else {
return Err(ParseError::ExpectedIdentifier {
actual: self.current.0.to_owned(),
position: self.current.1,
});
};
// If the next node is an assignment, this might be a map
if let Statement::Assignment {
identifier,
value_node,
} = next_node.inner
{
// If the current token is a comma, right curly brace, or the new
// statement is already a map
if self.current.0 == Token::Comma
|| statement
.as_ref()
.is_some_and(|statement| matches!(statement, Statement::Map(_)))
{
// The new statement is a map
if let Statement::Map(map_properties) =
statement.get_or_insert_with(|| Statement::Map(Vec::new()))
{
// Ignore commas after properties
if let Token::Comma = self.current.0 {
self.next_token()?;
}
if let Token::Equal = self.current.0 {
self.next_token()?;
} else {
return Err(ParseError::ExpectedToken {
expected: TokenOwned::Equal,
actual: self.current.0.to_owned(),
position: self.current.1,
});
}
// Add the new property to the map
map_properties.push((identifier, *value_node));
}
// Otherwise, the new statement is a block
} else if let Statement::Block(statements) =
statement.get_or_insert_with(|| Statement::Block(Vec::new()))
{
if self.current.0 == Token::Semicolon {
self.next_token()?;
let current_value_node = self.parse_node(0)?;
statements.push(Node::new(
Statement::Nil(Box::new(Node::new(
Statement::Assignment {
identifier,
value_node,
},
next_node.position,
))),
(next_node.position.0, self.current.1 .1),
));
nodes.push((identifier, current_value_node));
continue;
} else {
statements.push(Node::new(
Statement::Assignment {
identifier,
value_node,
},
next_node.position,
));
if let Token::Comma = self.current.0 {
self.next_token()?;
continue;
}
}
} else if let Statement::Block(statements) =
statement.get_or_insert_with(|| Statement::Block(Vec::new()))
{
// Add the assignment statement to the block
statements.push(next_node);
}
}
}
@ -556,7 +623,8 @@ impl<'src> Parser<'src> {
fn current_precedence(&self) -> u8 {
match self.current.0 {
Token::DoubleEqual => 6,
Token::DoubleEqual => 7,
Token::DoubleAmpersand => 6,
Token::Greater | Token::GreaterEqual | Token::Less | Token::LessEqual => 5,
Token::Dot => 4,
Token::Percent => 3,
@ -577,7 +645,7 @@ pub enum ParseError {
},
ExpectedIdentifier {
actual: TokenOwned,
position: (usize, usize),
position: Span,
},
ExpectedToken {
expected: TokenOwned,
@ -631,6 +699,186 @@ mod tests {
use super::*;
#[test]
fn misplaced_semicolon() {
let input = ";";
assert_eq!(
parse(input),
Err(ParseError::UnexpectedToken {
actual: TokenOwned::Semicolon,
position: (0, 1)
})
);
}
#[test]
fn block_with_one_statement() {
let input = "{ 40 + 2 }";
assert_eq!(
parse(input),
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Block(vec![Node::new(
Statement::BinaryOperation {
left: Box::new(Node::new(
Statement::Constant(Value::integer(40)),
(2, 4)
)),
operator: Node::new(BinaryOperator::Add, (5, 6)),
right: Box::new(Node::new(
Statement::Constant(Value::integer(2)),
(7, 8)
)),
},
(2, 8)
)]),
(0, 10)
)]
.into()
})
);
}
#[test]
fn block_with_assignment() {
let input = "{ foo = 42; bar = 42; baz = '42' }";
assert_eq!(
parse(input),
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Block(vec![
Node::new(
Statement::Nil(Box::new(Node::new(
Statement::Assignment {
identifier: Node::new(Identifier::new("foo"), (2, 5)),
value_node: Box::new(Node::new(
Statement::Constant(Value::integer(42)),
(8, 10)
))
},
(2, 10)
),)),
(2, 15)
),
Node::new(
Statement::Nil(Box::new(Node::new(
Statement::Assignment {
identifier: Node::new(Identifier::new("bar"), (12, 15)),
value_node: Box::new(Node::new(
Statement::Constant(Value::integer(42)),
(18, 20)
))
},
(12, 20)
),)),
(12, 25)
),
Node::new(
Statement::Assignment {
identifier: Node::new(Identifier::new("baz"), (22, 25)),
value_node: Box::new(Node::new(
Statement::Constant(Value::string("42")),
(28, 32)
))
},
(22, 32)
)
]),
(0, 34)
)]
.into()
})
);
}
#[test]
fn empty_map() {
let input = "{}";
assert_eq!(
parse(input),
Ok(AbstractSyntaxTree {
nodes: [Node::new(Statement::Map(vec![]), (0, 2))].into()
})
);
}
#[test]
fn map_with_trailing_comma() {
let input = "{ foo = 42, bar = 42, baz = '42', }";
assert_eq!(
parse(input),
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Map(vec![
(
Node::new(Identifier::new("foo"), (2, 5)),
Node::new(Statement::Constant(Value::integer(42)), (8, 10))
),
(
Node::new(Identifier::new("bar"), (12, 15)),
Node::new(Statement::Constant(Value::integer(42)), (18, 20))
),
(
Node::new(Identifier::new("baz"), (22, 25)),
Node::new(Statement::Constant(Value::string("42")), (28, 32))
),
]),
(0, 35)
)]
.into()
})
);
}
#[test]
fn map_with_two_properties() {
let input = "{ x = 42, y = 'foobar' }";
assert_eq!(
parse(input),
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Map(vec![
(
Node::new(Identifier::new("x"), (2, 3)),
Node::new(Statement::Constant(Value::integer(42)), (6, 8))
),
(
Node::new(Identifier::new("y"), (10, 11)),
Node::new(Statement::Constant(Value::string("foobar")), (14, 22))
)
]),
(0, 24)
)]
.into()
})
);
}
#[test]
fn map_with_one_property() {
let input = "{ x = 42, }";
assert_eq!(
parse(input),
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Map(vec![(
Node::new(Identifier::new("x"), (2, 3)),
Node::new(Statement::Constant(Value::integer(42)), (6, 8))
)]),
(0, 11)
)]
.into()
})
);
}
#[test]
fn equal() {
let input = "42 == 42";
@ -704,31 +952,6 @@ mod tests {
);
}
#[test]
fn map() {
let input = "{ x = 42, y = 'foobar' }";
assert_eq!(
parse(input),
Ok(AbstractSyntaxTree {
nodes: [Node::new(
Statement::Map(vec![
(
Node::new(Identifier::new("x"), (2, 3)),
Node::new(Statement::Constant(Value::integer(42)), (6, 8))
),
(
Node::new(Identifier::new("y"), (10, 11)),
Node::new(Statement::Constant(Value::string("foobar")), (14, 22))
)
]),
(0, 24)
)]
.into()
})
);
}
#[test]
fn less_than() {
let input = "1 < 2";

View File

@ -43,6 +43,7 @@ pub enum Token<'src> {
RightCurlyBrace,
RightParenthesis,
RightSquareBrace,
Semicolon,
Slash,
Star,
}
@ -78,6 +79,7 @@ impl<'src> Token<'src> {
Token::RightCurlyBrace => TokenOwned::RightCurlyBrace,
Token::RightParenthesis => TokenOwned::RightParenthesis,
Token::RightSquareBrace => TokenOwned::RightSquareBrace,
Token::Semicolon => TokenOwned::Semicolon,
Token::Star => TokenOwned::Star,
Token::Slash => TokenOwned::Slash,
Token::String(text) => TokenOwned::String(text.to_string()),
@ -115,6 +117,7 @@ impl<'src> Token<'src> {
Token::RightCurlyBrace => "}",
Token::RightParenthesis => ")",
Token::RightSquareBrace => "]",
Token::Semicolon => ";",
Token::Star => "*",
Token::String(_) => "string",
Token::Slash => "/",
@ -163,6 +166,7 @@ impl<'src> PartialEq for Token<'src> {
(Token::RightCurlyBrace, Token::RightCurlyBrace) => true,
(Token::RightParenthesis, Token::RightParenthesis) => true,
(Token::RightSquareBrace, Token::RightSquareBrace) => true,
(Token::Semicolon, Token::Semicolon) => true,
(Token::Star, Token::Star) => true,
(Token::Slash, Token::Slash) => true,
(Token::String(left), Token::String(right)) => left == right,
@ -214,6 +218,7 @@ pub enum TokenOwned {
RightCurlyBrace,
RightParenthesis,
RightSquareBrace,
Semicolon,
Star,
Slash,
}
@ -249,6 +254,7 @@ impl Display for TokenOwned {
TokenOwned::RightCurlyBrace => Token::RightCurlyBrace.fmt(f),
TokenOwned::RightParenthesis => Token::RightParenthesis.fmt(f),
TokenOwned::RightSquareBrace => Token::RightSquareBrace.fmt(f),
TokenOwned::Semicolon => Token::Semicolon.fmt(f),
TokenOwned::Star => Token::Star.fmt(f),
TokenOwned::Slash => Token::Slash.fmt(f),
TokenOwned::String(string) => write!(f, "{string}"),

View File

@ -115,6 +115,15 @@ impl Vm {
Ok(Some(result))
}
Statement::Block(statements) => {
let mut previous_value = None;
for statement in statements {
previous_value = self.run_node(statement, variables)?;
}
Ok(previous_value)
}
Statement::BuiltInFunctionCall {
function,
type_arguments: _,
@ -234,6 +243,11 @@ impl Vm {
Ok(Some(Value::map(values)))
}
Statement::Nil(node) => {
let _return = self.run_node(*node, variables)?;
Ok(None)
}
Statement::PropertyAccess(left, right) => {
let left_span = left.position;
let left_value = if let Some(value) = self.run_node(*left, variables)? {
@ -436,7 +450,7 @@ mod tests {
#[test]
fn map_equal() {
let input = "{ y = 'foo' } == { y = 'foo' }";
let input = "{ y = 'foo', } == { y = 'foo', }";
assert_eq!(
run(input, &mut HashMap::new()),