From 03d44434e24ba7a1a9458285b4e9f8b655ad5279 Mon Sep 17 00:00:00 2001 From: Jeff Date: Sat, 7 Sep 2024 06:38:12 -0400 Subject: [PATCH] Refactor parsing --- dust-lang/src/chunk.rs | 125 ++++++++++++++ dust-lang/src/dust_error.rs | 2 +- dust-lang/src/lib.rs | 6 +- dust-lang/src/parser.rs | 240 ++++++++++++++++++++------- dust-lang/src/{bytecode.rs => vm.rs} | 141 ++-------------- 5 files changed, 324 insertions(+), 190 deletions(-) create mode 100644 dust-lang/src/chunk.rs rename dust-lang/src/{bytecode.rs => vm.rs} (72%) diff --git a/dust-lang/src/chunk.rs b/dust-lang/src/chunk.rs new file mode 100644 index 0000000..f686f65 --- /dev/null +++ b/dust-lang/src/chunk.rs @@ -0,0 +1,125 @@ +use std::fmt::{self, Debug, Display, Formatter}; + +use serde::{Deserialize, Serialize}; + +use crate::{Instruction, Span, Value}; + +#[derive(Clone, Eq, PartialEq, Serialize, Deserialize)] +pub struct Chunk { + code: Vec<(u8, Span)>, + constants: Vec, +} + +impl Chunk { + pub fn new() -> Self { + Self { + code: Vec::new(), + constants: Vec::new(), + } + } + + pub fn with_data(code: Vec<(u8, Span)>, constants: Vec) -> Self { + Self { code, constants } + } + + pub fn len(&self) -> usize { + self.code.len() + } + + pub fn is_empty(&self) -> bool { + self.code.is_empty() + } + + pub fn capacity(&self) -> usize { + self.code.capacity() + } + + pub fn read(&self, offset: usize) -> (u8, Span) { + self.code[offset] + } + + pub fn write(&mut self, instruction: u8, position: Span) { + self.code.push((instruction, position)); + } + + pub fn get_constant(&self, index: usize) -> Result<&Value, ChunkError> { + self.constants + .get(index) + .ok_or_else(|| ChunkError::ConstantIndexOutOfBounds(index)) + } + + pub fn push_constant(&mut self, value: Value) -> Result { + let starting_length = self.constants.len(); + + if starting_length + 1 > (u8::MAX as usize) { + Err(ChunkError::Overflow) + } else { + self.constants.push(value); + + Ok(starting_length as u8) + } + } + + pub fn clear(&mut self) { + self.code.clear(); + self.constants.clear(); + } + + pub fn disassemble(&self, name: &str) -> String { + let mut output = String::new(); + + output.push_str("== "); + output.push_str(name); + output.push_str(" ==\n"); + + let mut next_is_index = false; + + for (offset, (byte, position)) in self.code.iter().enumerate() { + if next_is_index { + let index_display = format!("{position} {offset:04} INDEX {byte}\n"); + + output.push_str(&index_display); + + next_is_index = false; + + continue; + } + + let instruction = Instruction::from_byte(*byte).unwrap(); + let instruction_display = + format!("{} {}\n", position, instruction.disassemble(self, offset)); + + output.push_str(&instruction_display); + + if let Instruction::Constant = instruction { + next_is_index = true; + } + } + + output + } +} + +impl Default for Chunk { + fn default() -> Self { + Self::new() + } +} + +impl Display for Chunk { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}", self.disassemble("Chunk")) + } +} + +impl Debug for Chunk { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{self}") + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ChunkError { + ConstantIndexOutOfBounds(usize), + Overflow, +} diff --git a/dust-lang/src/dust_error.rs b/dust-lang/src/dust_error.rs index 8e16b38..85719dc 100644 --- a/dust-lang/src/dust_error.rs +++ b/dust-lang/src/dust_error.rs @@ -1,4 +1,4 @@ -use crate::{bytecode::VmError, LexError, ParseError}; +use crate::{vm::VmError, LexError, ParseError}; pub enum DustError<'src> { LexError { diff --git a/dust-lang/src/lib.rs b/dust-lang/src/lib.rs index 86d221b..43e4bd7 100644 --- a/dust-lang/src/lib.rs +++ b/dust-lang/src/lib.rs @@ -15,7 +15,7 @@ //! //! assert_eq!(the_answer, Some(Value::integer(42))); //! ``` -pub mod bytecode; +pub mod chunk; pub mod constructor; pub mod dust_error; pub mod identifier; @@ -24,8 +24,9 @@ pub mod parser; pub mod token; pub mod r#type; pub mod value; +pub mod vm; -pub use bytecode::{Chunk, ChunkError, Instruction, Vm}; +pub use chunk::{Chunk, ChunkError}; pub use constructor::{ConstructError, Constructor}; pub use dust_error::DustError; pub use identifier::Identifier; @@ -34,6 +35,7 @@ pub use parser::{ParseError, Parser}; pub use r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict}; pub use token::{Token, TokenKind, TokenOwned}; pub use value::{Struct, Value, ValueError}; +pub use vm::{Instruction, Vm}; use std::fmt::{self, Display, Formatter}; diff --git a/dust-lang/src/parser.rs b/dust-lang/src/parser.rs index 4f302eb..410988d 100644 --- a/dust-lang/src/parser.rs +++ b/dust-lang/src/parser.rs @@ -1,6 +1,8 @@ use std::{ fmt::{self, Display, Formatter}, + mem::{self, swap}, num::ParseIntError, + ptr::replace, }; use crate::{ @@ -22,54 +24,49 @@ pub fn parse(source: &str) -> Result { pub struct Parser<'src> { lexer: Lexer<'src>, chunk: Chunk, - current_token: Option>, + previous_token: Token<'src>, + previous_position: Span, + current_token: Token<'src>, current_position: Span, } impl<'src> Parser<'src> { - pub fn new(lexer: Lexer<'src>) -> Self { + pub fn new(mut lexer: Lexer<'src>) -> Self { + let (current_token, current_position) = + lexer.next_token().unwrap_or((Token::Eof, Span(0, 0))); + Parser { lexer, chunk: Chunk::new(), - current_token: None, - current_position: Span(0, 0), + previous_token: Token::Eof, + previous_position: Span(0, 0), + current_token, + current_position, } } fn is_eof(&self) -> bool { - matches!(self.current_token, Some(Token::Eof)) + matches!(self.current_token, Token::Eof) } fn advance(&mut self) -> Result<(), ParseError> { - let (token, position) = self.lexer.next_token()?; + let (new_token, position) = self.lexer.next_token()?; - log::trace!("Advancing to token {token} at {position}"); + log::trace!("Advancing to token {new_token} at {position}"); - self.current_token = Some(token); - self.current_position = position; + self.previous_token = mem::replace(&mut self.current_token, new_token); + self.previous_position = mem::replace(&mut self.current_position, position); Ok(()) } - fn current_token_owned(&self) -> TokenOwned { - self.current_token - .as_ref() - .map_or(TokenOwned::Eof, |token| token.to_owned()) - } - - fn current_token_kind(&self) -> TokenKind { - self.current_token - .as_ref() - .map_or(TokenKind::Eof, |token| token.kind()) - } - fn consume(&mut self, expected: TokenKind) -> Result<(), ParseError> { - if self.current_token_kind() == expected { + if self.current_token.kind() == expected { self.advance() } else { Err(ParseError::ExpectedToken { expected, - found: self.current_token_owned(), + found: self.current_token.to_owned(), position: self.current_position, }) } @@ -81,7 +78,7 @@ impl<'src> Parser<'src> { fn emit_constant(&mut self, value: Value) -> Result<(), ParseError> { let constant_index = self.chunk.push_constant(value)?; - let position = self.current_position; + let position = self.previous_position; self.emit_byte(Instruction::Constant as u8, position); self.emit_byte(constant_index, position); @@ -89,8 +86,19 @@ impl<'src> Parser<'src> { Ok(()) } + fn parse_boolean(&mut self) -> Result<(), ParseError> { + if let Token::Boolean(text) = self.previous_token { + let boolean = text.parse::().unwrap(); + let value = Value::boolean(boolean); + + self.emit_constant(value)?; + } + + Ok(()) + } + fn parse_integer(&mut self) -> Result<(), ParseError> { - if let Some(Token::Integer(text)) = self.current_token { + if let Token::Integer(text) = self.previous_token { let integer = text.parse::().unwrap(); let value = Value::integer(integer); @@ -102,27 +110,30 @@ impl<'src> Parser<'src> { fn parse_grouped(&mut self) -> Result<(), ParseError> { self.parse_expression()?; - - self.consume(TokenKind::RightParenthesis)?; - - Ok(()) + self.consume(TokenKind::RightParenthesis) } fn parse_unary(&mut self) -> Result<(), ParseError> { - if let Some(Token::Minus) = self.current_token { - let operator_position = self.current_position; + let byte = match self.previous_token.kind() { + TokenKind::Minus => Instruction::Negate as u8, + _ => { + return Err(ParseError::ExpectedTokenMultiple { + expected: vec![TokenKind::Minus], + found: self.previous_token.to_owned(), + position: self.previous_position, + }) + } + }; - self.advance()?; - self.parse_expression()?; - self.emit_byte(Instruction::Negate as u8, operator_position); - } + self.parse_expression()?; + self.emit_byte(byte, self.previous_position); Ok(()) } fn parse_binary(&mut self) -> Result<(), ParseError> { - let operator_position = self.current_position; - let operator = self.current_token_kind(); + let operator_position = self.previous_position; + let operator = self.previous_token.kind(); let rule = ParseRule::from(&operator); self.parse(rule.precedence.increment())?; @@ -140,8 +151,8 @@ impl<'src> Parser<'src> { TokenKind::Star, TokenKind::Slash, ], - found: self.current_token_owned(), - position: self.current_position, + found: self.previous_token.to_owned(), + position: operator_position, }) } }; @@ -152,36 +163,36 @@ impl<'src> Parser<'src> { } fn parse_expression(&mut self) -> Result<(), ParseError> { - self.parse(Precedence::Assignment) + self.parse(Precedence::None) } - // Pratt parsing functions - fn parse(&mut self, precedence: Precedence) -> Result<(), ParseError> { - log::trace!("Parsing with precedence {precedence}"); - self.advance()?; - let prefix_rule = ParseRule::from(&self.current_token_kind()).prefix; - - if let Some(prefix) = prefix_rule { - log::trace!("Parsing {} as prefix", &self.current_token_owned()); + if let Some(prefix) = ParseRule::from(&self.previous_token.kind()).prefix { + log::trace!( + "Parsing {} as prefix with precedence {precedence}", + self.previous_token, + ); prefix(self)?; } else { - return Err(ParseError::ExpectedPrefix { - found: self.current_token_owned(), - position: self.current_position, + return Err(ParseError::ExpectedExpression { + found: self.previous_token.to_owned(), + position: self.previous_position, }); } - while precedence <= ParseRule::from(&self.current_token_kind()).precedence { + while precedence <= ParseRule::from(&self.current_token.kind()).precedence { self.advance()?; - let infix_rule = ParseRule::from(&self.current_token_kind()).infix; + let infix_rule = ParseRule::from(&self.previous_token.kind()).infix; if let Some(infix) = infix_rule { - log::trace!("Parsing {} as infix", self.current_token_owned()); + log::trace!( + "Parsing {} as infix with precedence {precedence}", + self.previous_token, + ); infix(self)?; } else { @@ -230,10 +241,6 @@ impl Precedence { fn increment(&self) -> Self { Self::from_byte(*self as u8 + 1) } - - fn decrement(&self) -> Self { - Self::from_byte(*self as u8 - 1) - } } impl Display for Precedence { @@ -242,7 +249,7 @@ impl Display for Precedence { } } -type ParserFunction<'a> = fn(&'_ mut Parser<'a>) -> Result<(), ParseError>; +type ParserFunction<'a> = fn(&mut Parser<'a>) -> Result<(), ParseError>; #[derive(Debug, Clone, Copy)] pub struct ParseRule<'a> { @@ -260,7 +267,11 @@ impl From<&TokenKind> for ParseRule<'_> { precedence: Precedence::None, }, TokenKind::Identifier => todo!(), - TokenKind::Boolean => todo!(), + TokenKind::Boolean => ParseRule { + prefix: Some(Parser::parse_boolean), + infix: None, + precedence: Precedence::None, + }, TokenKind::Character => todo!(), TokenKind::Float => todo!(), TokenKind::Integer => ParseRule { @@ -317,7 +328,11 @@ impl From<&TokenKind> for ParseRule<'_> { }, TokenKind::PlusEqual => todo!(), TokenKind::RightCurlyBrace => todo!(), - TokenKind::RightParenthesis => todo!(), + TokenKind::RightParenthesis => ParseRule { + prefix: None, + infix: None, + precedence: Precedence::None, + }, TokenKind::RightSquareBrace => todo!(), TokenKind::Semicolon => todo!(), TokenKind::Star => ParseRule { @@ -337,7 +352,7 @@ impl From<&TokenKind> for ParseRule<'_> { #[derive(Debug, PartialEq)] pub enum ParseError { - ExpectedPrefix { + ExpectedExpression { found: TokenOwned, position: Span, }, @@ -381,7 +396,7 @@ mod tests { use super::*; #[test] - fn parse_integer() { + fn integer() { let source = "42"; let test_chunk = parse(source); @@ -395,9 +410,46 @@ mod tests { } #[test] - fn parse_addition() { + fn boolean() { + let source = "true"; + let test_chunk = parse(source); + + assert_eq!( + test_chunk, + Ok(Chunk::with_data( + vec![(Instruction::Constant as u8, Span(0, 4)), (0, Span(0, 4))], + vec![Value::boolean(true)] + )) + ); + } + + #[test] + fn grouping() { env_logger::builder().is_test(true).try_init().unwrap(); + let source = "(42 + 42) * 2"; + let test_chunk = parse(source); + + assert_eq!( + test_chunk, + Ok(Chunk::with_data( + vec![ + (Instruction::Constant as u8, Span(1, 3)), + (0, Span(1, 3)), + (Instruction::Constant as u8, Span(6, 8)), + (1, Span(6, 8)), + (Instruction::Add as u8, Span(4, 5)), + (Instruction::Constant as u8, Span(11, 12)), + (0, Span(11, 12)), + (Instruction::Multiply as u8, Span(9, 10)), + ], + vec![Value::integer(42), Value::integer(42), Value::integer(2)] + )) + ); + } + + #[test] + fn addition() { let source = "42 + 42"; let test_chunk = parse(source); @@ -415,4 +467,64 @@ mod tests { )) ); } + + #[test] + fn subtraction() { + let source = "42 - 42"; + let test_chunk = parse(source); + + assert_eq!( + test_chunk, + Ok(Chunk::with_data( + vec![ + (Instruction::Constant as u8, Span(0, 2)), + (0, Span(0, 2)), + (Instruction::Constant as u8, Span(5, 7)), + (1, Span(5, 7)), + (Instruction::Subtract as u8, Span(3, 4)), + ], + vec![Value::integer(42), Value::integer(42)] + )) + ); + } + + #[test] + fn multiplication() { + let source = "42 * 42"; + let test_chunk = parse(source); + + assert_eq!( + test_chunk, + Ok(Chunk::with_data( + vec![ + (Instruction::Constant as u8, Span(0, 2)), + (0, Span(0, 2)), + (Instruction::Constant as u8, Span(5, 7)), + (1, Span(5, 7)), + (Instruction::Multiply as u8, Span(3, 4)), + ], + vec![Value::integer(42), Value::integer(42)] + )) + ); + } + + #[test] + fn division() { + let source = "42 / 42"; + let test_chunk = parse(source); + + assert_eq!( + test_chunk, + Ok(Chunk::with_data( + vec![ + (Instruction::Constant as u8, Span(0, 2)), + (0, Span(0, 2)), + (Instruction::Constant as u8, Span(5, 7)), + (1, Span(5, 7)), + (Instruction::Divide as u8, Span(3, 4)), + ], + vec![Value::integer(42), Value::integer(42)] + )) + ); + } } diff --git a/dust-lang/src/bytecode.rs b/dust-lang/src/vm.rs similarity index 72% rename from dust-lang/src/bytecode.rs rename to dust-lang/src/vm.rs index 1b6a2cf..fcab81e 100644 --- a/dust-lang/src/bytecode.rs +++ b/dust-lang/src/vm.rs @@ -1,8 +1,6 @@ -use std::fmt::{self, Debug, Display, Formatter}; - use serde::{Deserialize, Serialize}; -use crate::{Span, Value, ValueError}; +use crate::{Chunk, ChunkError, Span, Value, ValueError}; #[derive(Debug, Clone, Eq, PartialEq)] pub struct Vm { @@ -31,7 +29,7 @@ impl Vm { match instruction { Instruction::Constant => { let (index, _) = self.read(); - let value = self.read_constant(index as usize); + let value = self.read_constant(index as usize)?; self.stack.push(value); } @@ -106,23 +104,30 @@ impl Vm { pub fn read(&mut self) -> (u8, Span) { self.ip += 1; - self.chunk.code[self.ip - 1] + self.chunk.read(self.ip - 1) } - pub fn read_constant(&self, index: usize) -> Value { - self.chunk.constants[index].clone() + pub fn read_constant(&self, index: usize) -> Result { + Ok(self.chunk.get_constant(index)?.clone()) } } #[derive(Debug, Clone, PartialEq)] pub enum VmError { - ChunkOverflow, InvalidInstruction(u8, Span), StackUnderflow, StackOverflow, + + Chunk(ChunkError), Value(ValueError), } +impl From for VmError { + fn from(error: ChunkError) -> Self { + Self::Chunk(error) + } +} + impl From for VmError { fn from(error: ValueError) -> Self { Self::Value(error) @@ -167,9 +172,12 @@ impl Instruction { match self { Instruction::Constant => { let (index, _) = chunk.read(offset + 1); - let value = &chunk.constants[index as usize]; + let value_display = chunk + .get_constant(index as usize) + .map(|value| value.to_string()) + .unwrap_or_else(|error| format!("{:?}", error)); - format!("{offset:04} CONSTANT {index} {value}") + format!("{offset:04} CONSTANT {index} {value_display}") } Instruction::Return => format!("{offset:04} RETURN"), @@ -185,119 +193,6 @@ impl Instruction { } } -#[derive(Clone, Eq, PartialEq, Serialize, Deserialize)] -pub struct Chunk { - code: Vec<(u8, Span)>, - constants: Vec, -} - -impl Chunk { - pub fn new() -> Self { - Self { - code: Vec::new(), - constants: Vec::new(), - } - } - - pub fn with_data(code: Vec<(u8, Span)>, constants: Vec) -> Self { - Self { code, constants } - } - - pub fn len(&self) -> usize { - self.code.len() - } - - pub fn is_empty(&self) -> bool { - self.code.is_empty() - } - - pub fn capacity(&self) -> usize { - self.code.capacity() - } - - pub fn read(&self, offset: usize) -> (u8, Span) { - self.code[offset] - } - - pub fn write(&mut self, instruction: u8, position: Span) { - self.code.push((instruction, position)); - } - - pub fn push_constant(&mut self, value: Value) -> Result { - let starting_length = self.constants.len(); - - if starting_length + 1 > (u8::MAX as usize) { - Err(ChunkError::Overflow) - } else { - self.constants.push(value); - - Ok(starting_length as u8) - } - } - - pub fn clear(&mut self) { - self.code.clear(); - self.constants.clear(); - } - - pub fn disassemble(&self, name: &str) -> String { - let mut output = String::new(); - - output.push_str("== "); - output.push_str(name); - output.push_str(" ==\n"); - - let mut next_is_index = false; - - for (offset, (byte, position)) in self.code.iter().enumerate() { - if next_is_index { - let index_display = format!("{position} {offset:04} INDEX {byte}\n"); - - output.push_str(&index_display); - - next_is_index = false; - - continue; - } - - let instruction = Instruction::from_byte(*byte).unwrap(); - let instruction_display = - format!("{} {}\n", position, instruction.disassemble(self, offset)); - - output.push_str(&instruction_display); - - if let Instruction::Constant = instruction { - next_is_index = true; - } - } - - output - } -} - -impl Default for Chunk { - fn default() -> Self { - Self::new() - } -} - -impl Display for Chunk { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{}", self.disassemble("Chunk")) - } -} - -impl Debug for Chunk { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{self}") - } -} - -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum ChunkError { - Overflow, -} - #[cfg(test)] pub mod tests { use super::*;