From 61f136edd2bf117bba6f8b3e84f26f766881ae47 Mon Sep 17 00:00:00 2001 From: Jeff Date: Mon, 5 Aug 2024 00:40:51 -0400 Subject: [PATCH] Refactor and clean up --- dust-lang/src/abstract_tree.rs | 13 +- dust-lang/src/analyzer.rs | 98 +++++++++++---- dust-lang/src/lex.rs | 17 +-- dust-lang/src/lib.rs | 4 +- dust-lang/src/parse.rs | 212 ++++++++++++++++++--------------- dust-lang/src/vm.rs | 114 +++++++++++++----- 6 files changed, 294 insertions(+), 164 deletions(-) diff --git a/dust-lang/src/abstract_tree.rs b/dust-lang/src/abstract_tree.rs index c9e35c9..dac2b01 100644 --- a/dust-lang/src/abstract_tree.rs +++ b/dust-lang/src/abstract_tree.rs @@ -2,25 +2,28 @@ use crate::{Identifier, Span, Value}; #[derive(Debug, PartialEq, Clone)] pub struct Node { - pub operation: Statement, + pub statement: Statement, pub span: Span, } impl Node { pub fn new(operation: Statement, span: Span) -> Self { - Self { operation, span } + Self { + statement: operation, + span, + } } } #[derive(Debug, PartialEq, Clone)] pub enum Statement { // Top-level statements - Assign(Box<(Node, Node)>), + Assign(Box, Box), // Expressions - Add(Box<(Node, Node)>), + Add(Box, Box), List(Vec), - Multiply(Box<(Node, Node)>), + Multiply(Box, Box), // Hard-coded values Constant(Value), diff --git a/dust-lang/src/analyzer.rs b/dust-lang/src/analyzer.rs index 26d1659..d0b6d74 100644 --- a/dust-lang/src/analyzer.rs +++ b/dust-lang/src/analyzer.rs @@ -1,5 +1,11 @@ use crate::{Node, Span, Statement}; +pub fn analyze(abstract_tree: Vec) -> Result<(), AnalyzerError> { + let analyzer = Analyzer::new(abstract_tree); + + analyzer.analyze() +} + pub struct Analyzer { abstract_tree: Vec, } @@ -18,33 +24,36 @@ impl Analyzer { } fn analyze_node(&self, node: &Node) -> Result<(), AnalyzerError> { - match &node.operation { - Statement::Add(instructions) => { - self.analyze_node(&instructions.0)?; - self.analyze_node(&instructions.1)?; + match &node.statement { + Statement::Add(left, right) => { + self.analyze_node(&left)?; + self.analyze_node(&right)?; } - Statement::Assign(instructions) => { - if let Statement::Identifier(_) = &instructions.0.operation { - // Identifier + Statement::Assign(left, right) => { + if let Statement::Identifier(_) = &left.statement { + // Identifier is in the correct position } else { return Err(AnalyzerError::ExpectedIdentifier { - actual: instructions.0.clone(), + actual: left.as_ref().clone(), }); } - self.analyze_node(&instructions.0)?; - self.analyze_node(&instructions.1)?; + self.analyze_node(&right)?; } Statement::Constant(_) => {} - Statement::Identifier(_) => {} - Statement::List(instructions) => { - for instruction in instructions { - self.analyze_node(instruction)?; + Statement::Identifier(_) => { + return Err(AnalyzerError::UnexpectedIdentifier { + identifier: node.clone(), + }); + } + Statement::List(statements) => { + for statement in statements { + self.analyze_node(statement)?; } } - Statement::Multiply(instructions) => { - self.analyze_node(&instructions.0)?; - self.analyze_node(&instructions.1)?; + Statement::Multiply(left, right) => { + self.analyze_node(&left)?; + self.analyze_node(&right)?; } } @@ -55,22 +64,23 @@ impl Analyzer { #[derive(Clone, Debug, PartialEq)] pub enum AnalyzerError { ExpectedIdentifier { actual: Node }, + UnexpectedIdentifier { identifier: Node }, } #[cfg(test)] mod tests { - use crate::Value; + use crate::{Identifier, Value}; use super::*; #[test] - fn analyze() { + fn assignment_expect_identifier() { let abstract_tree = vec![Node::new( - Statement::Assign(Box::new(( - Node::new(Statement::Constant(Value::integer(1)), (0, 1)), - Node::new(Statement::Constant(Value::integer(2)), (1, 2)), - ))), - (0, 1), + Statement::Assign( + Box::new(Node::new(Statement::Constant(Value::integer(1)), (0, 1))), + Box::new(Node::new(Statement::Constant(Value::integer(2)), (1, 2))), + ), + (0, 2), )]; let analyzer = Analyzer::new(abstract_tree); @@ -82,4 +92,44 @@ mod tests { }) ) } + + #[test] + fn unexpected_identifier_simple() { + let abstract_tree = vec![Node::new( + Statement::Identifier(Identifier::new("x")), + (0, 1), + )]; + + let analyzer = Analyzer::new(abstract_tree); + + assert_eq!( + analyzer.analyze(), + Err(AnalyzerError::UnexpectedIdentifier { + identifier: Node::new(Statement::Identifier(Identifier::new("x")), (0, 1)) + }) + ) + } + + #[test] + fn unexpected_identifier_nested() { + let abstract_tree = vec![Node::new( + Statement::Add( + Box::new(Node::new(Statement::Constant(Value::integer(1)), (0, 1))), + Box::new(Node::new( + Statement::Identifier(Identifier::new("x")), + (1, 2), + )), + ), + (0, 1), + )]; + + let analyzer = Analyzer::new(abstract_tree); + + assert_eq!( + analyzer.analyze(), + Err(AnalyzerError::UnexpectedIdentifier { + identifier: Node::new(Statement::Identifier(Identifier::new("x")), (1, 2)) + }) + ) + } } diff --git a/dust-lang/src/lex.rs b/dust-lang/src/lex.rs index ff0c567..cd19b3f 100644 --- a/dust-lang/src/lex.rs +++ b/dust-lang/src/lex.rs @@ -22,17 +22,20 @@ pub fn lex(input: &str) -> Result, LexError> { #[derive(Debug, Clone)] pub struct Lexer<'a> { - input: &'a str, + source: &'a str, position: usize, } impl<'a> Lexer<'a> { pub fn new(input: &'a str) -> Self { - Lexer { input, position: 0 } + Lexer { + source: input, + position: 0, + } } fn next_char(&mut self) -> Option { - self.input[self.position..].chars().next().map(|c| { + self.source[self.position..].chars().next().map(|c| { self.position += c.len_utf8(); c }) @@ -97,7 +100,7 @@ impl<'a> Lexer<'a> { } fn peek_char(&self) -> Option { - self.input[self.position..].chars().next() + self.source[self.position..].chars().next() } fn lex_number(&mut self) -> Result<(Token, Span), LexError> { @@ -127,11 +130,11 @@ impl<'a> Lexer<'a> { } if is_float { - let float = self.input[start_pos..self.position].parse::()?; + let float = self.source[start_pos..self.position].parse::()?; Ok((Token::Float(float), (start_pos, self.position))) } else { - let integer = self.input[start_pos..self.position].parse::()?; + let integer = self.source[start_pos..self.position].parse::()?; Ok((Token::Integer(integer), (start_pos, self.position))) } @@ -148,7 +151,7 @@ impl<'a> Lexer<'a> { } } - let identifier = &self.input[start_pos..self.position]; + let identifier = &self.source[start_pos..self.position]; let token = Token::Identifier(Identifier::new(identifier)); Ok((token, (start_pos, self.position))) diff --git a/dust-lang/src/lib.rs b/dust-lang/src/lib.rs index 62796ba..ae9f89a 100644 --- a/dust-lang/src/lib.rs +++ b/dust-lang/src/lib.rs @@ -18,13 +18,13 @@ pub mod value; pub mod vm; pub use abstract_tree::{Node, Statement}; -pub use analyzer::Analyzer; +pub use analyzer::{analyze, Analyzer, AnalyzerError}; pub use identifier::Identifier; pub use lex::{lex, LexError, Lexer}; pub use parse::{parse, ParseError, Parser}; pub use r#type::Type; pub use token::Token; pub use value::{Value, ValueError}; -pub use vm::Vm; +pub use vm::{run, Vm, VmError}; pub type Span = (usize, usize); diff --git a/dust-lang/src/parse.rs b/dust-lang/src/parse.rs index 286f75f..8d5dbca 100644 --- a/dust-lang/src/parse.rs +++ b/dust-lang/src/parse.rs @@ -1,24 +1,26 @@ +use std::collections::VecDeque; + use crate::{ lex::{LexError, Lexer}, Node, Span, Statement, Token, Value, }; -pub fn parse(input: &str) -> Result, ParseError> { +pub fn parse(input: &str) -> Result, ParseError> { let lexer = Lexer::new(input); let mut parser = Parser::new(lexer); - let mut instructions = Vec::new(); + let mut nodes = VecDeque::new(); loop { - let instruction = parser.parse()?; + let node = parser.parse()?; - instructions.push(instruction); + nodes.push_back(node); if let Token::Eof = parser.current.0 { break; } } - Ok(instructions) + Ok(nodes) } pub struct Parser<'src> { @@ -35,7 +37,7 @@ impl<'src> Parser<'src> { } pub fn parse(&mut self) -> Result { - self.parse_instruction(0) + self.parse_node(0) } fn next_token(&mut self) -> Result<(), ParseError> { @@ -44,42 +46,42 @@ impl<'src> Parser<'src> { Ok(()) } - fn parse_instruction(&mut self, precedence: u8) -> Result { - let left_instruction = self.parse_primary()?; - let left_start = left_instruction.span.0; + fn parse_node(&mut self, precedence: u8) -> Result { + let left_node = self.parse_primary()?; + let left_start = left_node.span.0; if precedence < self.current_precedence() { match &self.current { (Token::Plus, _) => { self.next_token()?; - let right_instruction = self.parse_instruction(self.current_precedence())?; - let right_end = right_instruction.span.1; + let right_node = self.parse_node(self.current_precedence())?; + let right_end = right_node.span.1; return Ok(Node::new( - Statement::Add(Box::new((left_instruction, right_instruction))), + Statement::Add(Box::new(left_node), Box::new(right_node)), (left_start, right_end), )); } (Token::Star, _) => { self.next_token()?; - let right_instruction = self.parse_instruction(self.current_precedence())?; - let right_end = right_instruction.span.1; + let right_node = self.parse_node(self.current_precedence())?; + let right_end = right_node.span.1; return Ok(Node::new( - Statement::Multiply(Box::new((left_instruction, right_instruction))), + Statement::Multiply(Box::new(left_node), Box::new(right_node)), (left_start, right_end), )); } (Token::Equal, _) => { self.next_token()?; - let right_instruction = self.parse_instruction(self.current_precedence())?; - let right_end = right_instruction.span.1; + let right_node = self.parse_node(self.current_precedence())?; + let right_end = right_node.span.1; return Ok(Node::new( - Statement::Assign(Box::new((left_instruction, right_instruction))), + Statement::Assign(Box::new(left_node), Box::new(right_node)), (left_start, right_end), )); } @@ -87,7 +89,7 @@ impl<'src> Parser<'src> { } } - Ok(left_instruction) + Ok(left_node) } fn parse_primary(&mut self) -> Result { @@ -110,13 +112,13 @@ impl<'src> Parser<'src> { (Token::LeftParenthesis, left_span) => { self.next_token()?; - let instruction = self.parse_instruction(0)?; + let instruction = self.parse_node(0)?; if let (Token::RightParenthesis, right_span) = self.current { self.next_token()?; Ok(Node::new( - instruction.operation, + instruction.statement, (left_span.0, right_span.1), )) } else { @@ -147,7 +149,7 @@ impl<'src> Parser<'src> { continue; } - if let Ok(instruction) = self.parse_instruction(0) { + if let Ok(instruction) = self.parse_node(0) { instructions.push(instruction); } else { return Err(ParseError::ExpectedClosingSquareBrace { @@ -197,32 +199,39 @@ mod tests { assert_eq!( parse(input), - Ok(vec![Node::new( + Ok([Node::new( Statement::List(vec![ Node::new(Statement::Constant(Value::integer(1)), (1, 2)), Node::new( - Statement::Add(Box::new(( - Node::new(Statement::Constant(Value::integer(1)), (4, 5)), - Node::new(Statement::Constant(Value::integer(1)), (8, 9)), - ))), - (4, 9) + Statement::Add( + Box::new(Node::new(Statement::Constant(Value::integer(1)), (4, 5))), + Box::new(Node::new(Statement::Constant(Value::integer(1)), (8, 9))), + ), + (4, 9), ), Node::new( - Statement::Add(Box::new(( - Node::new(Statement::Constant(Value::integer(2)), (11, 12)), - Node::new( - Statement::Multiply(Box::new(( - Node::new(Statement::Constant(Value::integer(4)), (16, 17)), - Node::new(Statement::Constant(Value::integer(10)), (20, 22)), - ))), - (15, 23) - ), - ))), - (11, 23) - ) + Statement::Add( + Box::new(Node::new(Statement::Constant(Value::integer(2)), (11, 12))), + Box::new(Node::new( + Statement::Multiply( + Box::new(Node::new( + Statement::Constant(Value::integer(4)), + (16, 17) + )), + Box::new(Node::new( + Statement::Constant(Value::integer(10)), + (20, 22) + )), + ), + (15, 23), + ),), + ), + (11, 23), + ), ]), - (0, 24) - )]) + (0, 24), + )] + .into()) ); } @@ -232,13 +241,14 @@ mod tests { assert_eq!( parse(input), - Ok(vec![Node::new( + Ok([Node::new( Statement::List(vec![ Node::new(Statement::Constant(Value::integer(1)), (1, 2)), Node::new(Statement::Constant(Value::integer(2)), (4, 5)), ]), - (0, 6) - )]) + (0, 6), + )] + .into()) ); } @@ -248,7 +258,7 @@ mod tests { assert_eq!( parse(input), - Ok(vec![Node::new(Statement::List(vec![]), (0, 2))]) + Ok([Node::new(Statement::List(vec![]), (0, 2))].into()) ); } @@ -258,10 +268,7 @@ mod tests { assert_eq!( parse(input), - Ok(vec![Node::new( - Statement::Constant(Value::float(42.0)), - (0, 4) - )]) + Ok([Node::new(Statement::Constant(Value::float(42.0)), (0, 4))].into()) ); } @@ -271,13 +278,14 @@ mod tests { assert_eq!( parse(input), - Ok(vec![Node::new( - Statement::Add(Box::new(( - Node::new(Statement::Constant(Value::integer(1)), (0, 1)), - Node::new(Statement::Constant(Value::integer(2)), (4, 5)), - ))), - (0, 5) - )]) + Ok([Node::new( + Statement::Add( + Box::new(Node::new(Statement::Constant(Value::integer(1)), (0, 1))), + Box::new(Node::new(Statement::Constant(Value::integer(2)), (4, 5))), + ), + (0, 5), + )] + .into()) ); } @@ -287,13 +295,14 @@ mod tests { assert_eq!( parse(input), - Ok(vec![Node::new( - Statement::Multiply(Box::new(( - Node::new(Statement::Constant(Value::integer(1)), (0, 1)), - Node::new(Statement::Constant(Value::integer(2)), (4, 5)), - ))), - (0, 5) - )]) + Ok([Node::new( + Statement::Multiply( + Box::new(Node::new(Statement::Constant(Value::integer(1)), (0, 1))), + Box::new(Node::new(Statement::Constant(Value::integer(2)), (4, 5))), + ), + (0, 5), + )] + .into()) ); } @@ -303,19 +312,20 @@ mod tests { assert_eq!( parse(input), - Ok(vec![Node::new( - Statement::Add(Box::new(( - Node::new(Statement::Constant(Value::integer(1)), (0, 1)), - Node::new( - Statement::Multiply(Box::new(( - Node::new(Statement::Constant(Value::integer(2)), (4, 5)), - Node::new(Statement::Constant(Value::integer(3)), (8, 9)), - ))), - (4, 9) - ), - ))), - (0, 9) - )]) + Ok([Node::new( + Statement::Add( + Box::new(Node::new(Statement::Constant(Value::integer(1)), (0, 1))), + Box::new(Node::new( + Statement::Multiply( + Box::new(Node::new(Statement::Constant(Value::integer(2)), (4, 5))), + Box::new(Node::new(Statement::Constant(Value::integer(3)), (8, 9))), + ), + (4, 9), + )), + ), + (0, 9), + )] + .into()) ); } @@ -325,25 +335,35 @@ mod tests { assert_eq!( parse(input), - Ok(vec![Node::new( - Statement::Assign(Box::new(( - Node::new(Statement::Identifier(Identifier::new("a")), (0, 1)), - Node::new( - Statement::Add(Box::new(( - Node::new(Statement::Constant(Value::integer(1)), (4, 5)), - Node::new( - Statement::Multiply(Box::new(( - Node::new(Statement::Constant(Value::integer(2)), (8, 9)), - Node::new(Statement::Constant(Value::integer(3)), (12, 13)), - ))), - (8, 13) - ), - ))), - (4, 13) - ), - ))), - (0, 13) - )]) + Ok([Node::new( + Statement::Assign( + Box::new(Node::new( + Statement::Identifier(Identifier::new("a")), + (0, 1) + )), + Box::new(Node::new( + Statement::Add( + Box::new(Node::new(Statement::Constant(Value::integer(1)), (4, 5))), + Box::new(Node::new( + Statement::Multiply( + Box::new(Node::new( + Statement::Constant(Value::integer(2)), + (8, 9) + )), + Box::new(Node::new( + Statement::Constant(Value::integer(3)), + (12, 13) + )), + ), + (8, 13), + )), + ), + (4, 13), + )), + ), + (0, 13), + )] + .into()) ); } } diff --git a/dust-lang/src/vm.rs b/dust-lang/src/vm.rs index 73d182f..32c032c 100644 --- a/dust-lang/src/vm.rs +++ b/dust-lang/src/vm.rs @@ -1,74 +1,128 @@ -use crate::{parse, Node, ParseError, Parser, Statement, Value, ValueError}; +use std::collections::{HashMap, VecDeque}; -pub fn run(input: &str) -> Result, VmError> { +use crate::{parse, Identifier, Node, ParseError, Span, Statement, Value, ValueError}; + +pub fn run( + input: &str, + variables: &mut HashMap, +) -> Result, VmError> { let instructions = parse(input)?; - let vm = Vm::new(instructions); + let mut vm = Vm::new(instructions); - vm.run() + vm.run(variables) } pub struct Vm { - instructions: Vec, + statement_nodes: VecDeque, } impl Vm { - pub fn new(instructions: Vec) -> Self { - Vm { instructions } + pub fn new(statement_nodes: VecDeque) -> Self { + Vm { statement_nodes } } - pub fn run(&self) -> Result, VmError> { + pub fn run( + &mut self, + variables: &mut HashMap, + ) -> Result, VmError> { let mut previous_value = None; - for instruction in &self.instructions { - previous_value = self.run_instruction(instruction)?; + while let Some(node) = self.statement_nodes.pop_front() { + previous_value = self.run_node(node, variables)?; } Ok(previous_value) } - fn run_instruction(&self, instruction: &Node) -> Result, VmError> { - match &instruction.operation { - Statement::Add(instructions) => { - let left = if let Some(value) = self.run_instruction(&instructions.0)? { + fn run_node( + &self, + node: Node, + variables: &mut HashMap, + ) -> Result, VmError> { + match node.statement { + Statement::Add(left, right) => { + let left_span = left.span; + let left = if let Some(value) = self.run_node(*left, variables)? { value } else { - return Err(VmError::ExpectedValue(instructions.0.operation.clone())); + return Err(VmError::ExpectedValue { + position: left_span, + }); }; - let right = if let Some(value) = self.run_instruction(&instructions.1)? { + let right_span = right.span; + let right = if let Some(value) = self.run_node(*right, variables)? { value } else { - return Err(VmError::ExpectedValue(instructions.1.operation.clone())); + return Err(VmError::ExpectedValue { + position: right_span, + }); }; let sum = left.add(&right)?; Ok(Some(sum)) } - Statement::Assign(_) => todo!(), + Statement::Assign(left, right) => { + let identifier = if let Statement::Identifier(identifier) = &left.statement { + identifier + } else { + return Err(VmError::ExpectedValue { + position: left.span, + }); + }; + let right_span = right.span; + let value = if let Some(value) = self.run_node(*right, variables)? { + value + } else { + return Err(VmError::ExpectedValue { + position: right_span, + }); + }; + + variables.insert(identifier.clone(), value); + + Ok(None) + } Statement::Constant(value) => Ok(Some(value.clone())), - Statement::Identifier(_) => todo!(), - Statement::List(_) => todo!(), - Statement::Multiply(_) => todo!(), + Statement::Identifier(_) => Ok(None), + Statement::List(nodes) => { + let values = nodes + .into_iter() + .map(|node| { + let span = node.span; + if let Some(value) = self.run_node(node, variables)? { + Ok(value) + } else { + Err(VmError::ExpectedValue { position: span }) + } + }) + .collect::, VmError>>()?; + + Ok(Some(Value::list(values))) + } + Statement::Multiply(_, _) => todo!(), } } } #[derive(Clone, Debug, PartialEq)] pub enum VmError { - ExpectedValue(Statement), - InvalidOperation(Statement), ParseError(ParseError), ValueError(ValueError), + + // Anaylsis Failures + // These should be prevented by running the analyzer before the VM + ExpectedValue { position: Span }, } impl From for VmError { - fn from(v: ParseError) -> Self { - Self::ParseError(v) + fn from(error: ParseError) -> Self { + Self::ParseError(error) } } impl From for VmError { - fn from(v: ValueError) -> Self { - Self::ValueError(v) + fn from(error: ValueError) -> Self { + Self::ValueError(error) } } @@ -80,13 +134,13 @@ mod tests { fn add() { let input = "1 + 2"; - assert_eq!(run(input), Ok(Some(Value::integer(3)))); + assert_eq!(run(input, &mut HashMap::new()), Ok(Some(Value::integer(3)))); } #[test] fn add_multiple() { - let input = "(a + b = 1)"; + let input = "1 + 2 + 3"; - assert_eq!(run(input), Ok(Some(Value::integer(6)))); + assert_eq!(run(input, &mut HashMap::new()), Ok(Some(Value::integer(6)))); } }