From 812d93048838af48e8b32044d8806221b0a2202d Mon Sep 17 00:00:00 2001 From: Jeff Date: Sat, 7 Sep 2024 04:34:03 -0400 Subject: [PATCH] Continue writing bytecode implementation --- dust-lang/src/bytecode.rs | 84 +++++++-- dust-lang/src/dust_error.rs | 16 ++ dust-lang/src/lib.rs | 2 + dust-lang/src/parser.rs | 347 +++++++++++++++++++++++++++++++++--- dust-lang/src/token.rs | 129 -------------- 5 files changed, 402 insertions(+), 176 deletions(-) create mode 100644 dust-lang/src/dust_error.rs diff --git a/dust-lang/src/bytecode.rs b/dust-lang/src/bytecode.rs index 19fb847..1b6a2cf 100644 --- a/dust-lang/src/bytecode.rs +++ b/dust-lang/src/bytecode.rs @@ -1,9 +1,9 @@ +use std::fmt::{self, Debug, Display, Formatter}; + use serde::{Deserialize, Serialize}; use crate::{Span, Value, ValueError}; -const STACK_SIZE: usize = 256; - #[derive(Debug, Clone, Eq, PartialEq)] pub struct Vm { chunk: Chunk, @@ -12,11 +12,13 @@ pub struct Vm { } impl Vm { + const STACK_SIZE: usize = 256; + pub fn new(chunk: Chunk) -> Self { Self { chunk, ip: 0, - stack: Vec::with_capacity(STACK_SIZE), + stack: Vec::with_capacity(Self::STACK_SIZE), } } @@ -31,7 +33,7 @@ impl Vm { let (index, _) = self.read(); let value = self.read_constant(index as usize); - self.stack.push(value.clone()); + self.stack.push(value); } Instruction::Return => { let value = self.pop()?; @@ -84,7 +86,7 @@ impl Vm { } pub fn push(&mut self, value: Value) -> Result<(), VmError> { - if self.stack.len() == STACK_SIZE { + if self.stack.len() == Self::STACK_SIZE { Err(VmError::StackOverflow) } else { self.stack.push(value); @@ -164,26 +166,26 @@ impl Instruction { pub fn disassemble(&self, chunk: &Chunk, offset: usize) -> String { match self { Instruction::Constant => { - let index = chunk.code[offset + 1].0 as usize; - let value = &chunk.constants[index]; + let (index, _) = chunk.read(offset + 1); + let value = &chunk.constants[index as usize]; - format!("{:04} CONSTANT {} {}", offset, index, value) + format!("{offset:04} CONSTANT {index} {value}") } - Instruction::Return => format!("{:04} RETURN", offset), + Instruction::Return => format!("{offset:04} RETURN"), // Unary - Instruction::Negate => format!("{:04} NEGATE", offset), + Instruction::Negate => format!("{offset:04} NEGATE"), // Binary - Instruction::Add => format!("{:04} ADD", offset), - Instruction::Subtract => format!("{:04} SUBTRACT", offset), - Instruction::Multiply => format!("{:04} MULTIPLY", offset), - Instruction::Divide => format!("{:04} DIVIDE", offset), + Instruction::Add => format!("{offset:04} ADD"), + Instruction::Subtract => format!("{offset:04} SUBTRACT"), + Instruction::Multiply => format!("{offset:04} MULTIPLY"), + Instruction::Divide => format!("{offset:04} DIVIDE"), } } } -#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct Chunk { code: Vec<(u8, Span)>, constants: Vec, @@ -197,6 +199,10 @@ impl Chunk { } } + pub fn with_data(code: Vec<(u8, Span)>, constants: Vec) -> Self { + Self { code, constants } + } + pub fn len(&self) -> usize { self.code.len() } @@ -209,6 +215,10 @@ impl Chunk { self.code.capacity() } + pub fn read(&self, offset: usize) -> (u8, Span) { + self.code[offset] + } + pub fn write(&mut self, instruction: u8, position: Span) { self.code.push((instruction, position)); } @@ -230,14 +240,38 @@ impl Chunk { self.constants.clear(); } - pub fn disassemble(&self, name: &str) { - println!("== {} ==", name); + pub fn disassemble(&self, name: &str) -> String { + let mut output = String::new(); + + output.push_str("== "); + output.push_str(name); + output.push_str(" ==\n"); + + let mut next_is_index = false; for (offset, (byte, position)) in self.code.iter().enumerate() { - let instruction = Instruction::from_byte(*byte).unwrap(); + if next_is_index { + let index_display = format!("{position} {offset:04} INDEX {byte}\n"); - println!("{} {}", position, instruction.disassemble(self, offset)); + output.push_str(&index_display); + + next_is_index = false; + + continue; + } + + let instruction = Instruction::from_byte(*byte).unwrap(); + let instruction_display = + format!("{} {}\n", position, instruction.disassemble(self, offset)); + + output.push_str(&instruction_display); + + if let Instruction::Constant = instruction { + next_is_index = true; + } } + + output } } @@ -247,6 +281,18 @@ impl Default for Chunk { } } +impl Display for Chunk { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{}", self.disassemble("Chunk")) + } +} + +impl Debug for Chunk { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{self}") + } +} + #[derive(Debug, Clone, Copy, PartialEq)] pub enum ChunkError { Overflow, diff --git a/dust-lang/src/dust_error.rs b/dust-lang/src/dust_error.rs new file mode 100644 index 0000000..8e16b38 --- /dev/null +++ b/dust-lang/src/dust_error.rs @@ -0,0 +1,16 @@ +use crate::{bytecode::VmError, LexError, ParseError}; + +pub enum DustError<'src> { + LexError { + error: LexError, + source: &'src str, + }, + ParseError { + error: ParseError, + source: &'src str, + }, + VmError { + error: VmError, + source: &'src str, + }, +} diff --git a/dust-lang/src/lib.rs b/dust-lang/src/lib.rs index d80e9a4..86d221b 100644 --- a/dust-lang/src/lib.rs +++ b/dust-lang/src/lib.rs @@ -17,6 +17,7 @@ //! ``` pub mod bytecode; pub mod constructor; +pub mod dust_error; pub mod identifier; pub mod lexer; pub mod parser; @@ -26,6 +27,7 @@ pub mod value; pub use bytecode::{Chunk, ChunkError, Instruction, Vm}; pub use constructor::{ConstructError, Constructor}; +pub use dust_error::DustError; pub use identifier::Identifier; pub use lexer::{LexError, Lexer}; pub use parser::{ParseError, Parser}; diff --git a/dust-lang/src/parser.rs b/dust-lang/src/parser.rs index 1b70523..6f61d30 100644 --- a/dust-lang/src/parser.rs +++ b/dust-lang/src/parser.rs @@ -1,76 +1,143 @@ -use std::num::ParseIntError; +use std::{ + fmt::{self, Display, Formatter}, + num::ParseIntError, +}; use crate::{ Chunk, ChunkError, Instruction, LexError, Lexer, Span, Token, TokenKind, TokenOwned, Value, }; +pub fn parse(source: &str) -> Result { + let lexer = Lexer::new(source); + let mut parser = Parser::new(lexer); + + while !parser.is_eof() { + parser.parse(Precedence::None)?; + } + + Ok(parser.chunk) +} + #[derive(Debug)] pub struct Parser<'src> { lexer: Lexer<'src>, - current_token: Token<'src>, + chunk: Chunk, + current_token: Option>, current_position: Span, } impl<'src> Parser<'src> { - pub fn new(mut lexer: Lexer<'src>) -> Self { - let (current_token, current_position) = - lexer.next_token().unwrap_or((Token::Eof, Span(0, 0))); - + pub fn new(lexer: Lexer<'src>) -> Self { Parser { lexer, - current_token, - current_position, + chunk: Chunk::new(), + current_token: None, + current_position: Span(0, 0), } } fn is_eof(&self) -> bool { - matches!(self.current_token, Token::Eof) + matches!(self.current_token, Some(Token::Eof)) } fn advance(&mut self) -> Result<(), ParseError> { let (token, position) = self.lexer.next_token()?; - self.current_token = token; + log::trace!("Advancing to token {token} at {position}"); + + self.current_token = Some(token); self.current_position = position; Ok(()) } + fn current_token_owned(&self) -> TokenOwned { + self.current_token + .as_ref() + .map_or(TokenOwned::Eof, |token| token.to_owned()) + } + + fn current_token_kind(&self) -> TokenKind { + self.current_token + .as_ref() + .map_or(TokenKind::Eof, |token| token.kind()) + } + fn consume(&mut self, expected: TokenKind) -> Result<(), ParseError> { - if self.current_token.kind() == expected { + if self.current_token_kind() == expected { self.advance() } else { Err(ParseError::ExpectedToken { expected, - found: self.current_token.to_owned(), + found: self.current_token_owned(), position: self.current_position, }) } } - fn emit_instruction(&mut self, instruction: Instruction, chunk: &mut Chunk) { - chunk.write(instruction as u8, self.current_position); + fn emit_byte(&mut self, byte: u8) { + self.chunk.write(byte, self.current_position); } - fn parse_prefix(&mut self, chunk: &mut Chunk) -> Result<(), ParseError> { + fn emit_constant(&mut self, value: Value) -> Result<(), ParseError> { + let constant_index = self.chunk.push_constant(value)?; + + self.emit_byte(Instruction::Constant as u8); + self.emit_byte(constant_index); + Ok(()) } - fn parse_primary(&mut self, chunk: &mut Chunk) -> Result<(), ParseError> { - match self.current_token { - Token::Integer(text) => { - let integer = text.parse::()?; - let value = Value::integer(integer); - let constant_index = chunk.push_constant(value)?; + fn parse_integer(&mut self) -> Result<(), ParseError> { + if let Some(Token::Integer(text)) = self.current_token { + let integer = text.parse::().unwrap(); + let value = Value::integer(integer); - chunk.write(Instruction::Constant as u8, self.current_position); - chunk.write(constant_index, self.current_position); - } - Token::LeftParenthesis => {} + self.emit_constant(value)?; + } + + Ok(()) + } + + fn parse_grouped(&mut self) -> Result<(), ParseError> { + self.parse_expression()?; + + self.consume(TokenKind::RightParenthesis)?; + + Ok(()) + } + + fn parse_unary(&mut self) -> Result<(), ParseError> { + if let Some(Token::Minus) = self.current_token { + self.advance()?; + self.parse_expression()?; + self.emit_byte(Instruction::Negate as u8); + } + + Ok(()) + } + + fn parse_binary(&mut self) -> Result<(), ParseError> { + let operator_position = self.current_position; + let operator = self.current_token_kind(); + let rule = ParseRule::from(&operator); + + self.parse(rule.precedence.increment())?; + + match operator { + TokenKind::Plus => self.emit_byte(Instruction::Add as u8), + TokenKind::Minus => self.emit_byte(Instruction::Subtract as u8), + TokenKind::Star => self.emit_byte(Instruction::Multiply as u8), + TokenKind::Slash => self.emit_byte(Instruction::Divide as u8), _ => { return Err(ParseError::ExpectedTokenMultiple { - expected: vec![TokenKind::Integer], - found: self.current_token.to_owned(), + expected: vec![ + TokenKind::Plus, + TokenKind::Minus, + TokenKind::Star, + TokenKind::Slash, + ], + found: self.current_token_owned(), position: self.current_position, }) } @@ -79,13 +146,196 @@ impl<'src> Parser<'src> { Ok(()) } - pub fn parse_postfix(&mut self, left: Value, chunk: &mut Chunk) -> Result<(), ParseError> { + fn parse_expression(&mut self) -> Result<(), ParseError> { + self.parse(Precedence::Assignment) + } + + // Pratt parsing functions + + fn parse(&mut self, precedence: Precedence) -> Result<(), ParseError> { + log::trace!("Parsing with precedence {precedence}"); + + self.advance()?; + + let prefix_rule = ParseRule::from(&self.current_token_kind()).prefix; + + if let Some(prefix) = prefix_rule { + log::trace!("Parsing {} as prefix", &self.current_token_owned()); + + prefix(self)?; + } else { + return Err(ParseError::ExpectedPrefix { + found: self.current_token_owned(), + position: self.current_position, + }); + } + + while precedence <= ParseRule::from(&self.current_token_kind()).precedence { + self.advance()?; + + let infix_rule = ParseRule::from(&self.current_token_kind()).infix; + + if let Some(infix) = infix_rule { + log::trace!("Parsing {} as infix", self.current_token_owned()); + + infix(self)?; + } else { + break; + } + } + Ok(()) } } +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum Precedence { + None = 0, + Assignment = 1, + Conditional = 2, + LogicalOr = 3, + LogicalAnd = 4, + Equality = 5, + Comparison = 6, + Term = 7, + Factor = 8, + Unary = 9, + Call = 10, + Primary = 11, +} + +impl Precedence { + fn from_byte(byte: u8) -> Self { + match byte { + 0 => Self::None, + 1 => Self::Assignment, + 2 => Self::Conditional, + 3 => Self::LogicalOr, + 4 => Self::LogicalAnd, + 5 => Self::Equality, + 6 => Self::Comparison, + 7 => Self::Term, + 8 => Self::Factor, + 9 => Self::Unary, + 10 => Self::Call, + _ => Self::Primary, + } + } + + fn increment(&self) -> Self { + Self::from_byte(*self as u8 + 1) + } + + fn decrement(&self) -> Self { + Self::from_byte(*self as u8 - 1) + } +} + +impl Display for Precedence { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +type ParserFunction<'a> = fn(&'_ mut Parser<'a>) -> Result<(), ParseError>; + +#[derive(Debug, Clone, Copy)] +pub struct ParseRule<'a> { + pub prefix: Option>, + pub infix: Option>, + pub precedence: Precedence, +} + +impl From<&TokenKind> for ParseRule<'_> { + fn from(token_kind: &TokenKind) -> Self { + match token_kind { + TokenKind::Eof => ParseRule { + prefix: None, + infix: None, + precedence: Precedence::None, + }, + TokenKind::Identifier => todo!(), + TokenKind::Boolean => todo!(), + TokenKind::Character => todo!(), + TokenKind::Float => todo!(), + TokenKind::Integer => ParseRule { + prefix: Some(Parser::parse_integer), + infix: None, + precedence: Precedence::None, + }, + TokenKind::String => todo!(), + TokenKind::Async => todo!(), + TokenKind::Bool => todo!(), + TokenKind::Break => todo!(), + TokenKind::Else => todo!(), + TokenKind::FloatKeyword => todo!(), + TokenKind::If => todo!(), + TokenKind::Int => todo!(), + TokenKind::Let => todo!(), + TokenKind::Loop => todo!(), + TokenKind::Map => todo!(), + TokenKind::Str => todo!(), + TokenKind::While => todo!(), + TokenKind::BangEqual => todo!(), + TokenKind::Bang => todo!(), + TokenKind::Colon => todo!(), + TokenKind::Comma => todo!(), + TokenKind::Dot => todo!(), + TokenKind::DoubleAmpersand => todo!(), + TokenKind::DoubleDot => todo!(), + TokenKind::DoubleEqual => todo!(), + TokenKind::DoublePipe => todo!(), + TokenKind::Equal => todo!(), + TokenKind::Greater => todo!(), + TokenKind::GreaterOrEqual => todo!(), + TokenKind::LeftCurlyBrace => todo!(), + TokenKind::LeftParenthesis => ParseRule { + prefix: Some(Parser::parse_grouped), + infix: None, + precedence: Precedence::None, + }, + TokenKind::LeftSquareBrace => todo!(), + TokenKind::Less => todo!(), + TokenKind::LessOrEqual => todo!(), + TokenKind::Minus => ParseRule { + prefix: Some(Parser::parse_unary), + infix: Some(Parser::parse_binary), + precedence: Precedence::Term, + }, + TokenKind::MinusEqual => todo!(), + TokenKind::Mut => todo!(), + TokenKind::Percent => todo!(), + TokenKind::Plus => ParseRule { + prefix: None, + infix: Some(Parser::parse_binary), + precedence: Precedence::Term, + }, + TokenKind::PlusEqual => todo!(), + TokenKind::RightCurlyBrace => todo!(), + TokenKind::RightParenthesis => todo!(), + TokenKind::RightSquareBrace => todo!(), + TokenKind::Semicolon => todo!(), + TokenKind::Star => ParseRule { + prefix: None, + infix: Some(Parser::parse_binary), + precedence: Precedence::Factor, + }, + TokenKind::Struct => todo!(), + TokenKind::Slash => ParseRule { + prefix: None, + infix: Some(Parser::parse_binary), + precedence: Precedence::Factor, + }, + } + } +} + #[derive(Debug, PartialEq)] pub enum ParseError { + ExpectedPrefix { + found: TokenOwned, + position: Span, + }, ExpectedToken { expected: TokenKind, found: TokenOwned, @@ -120,3 +370,44 @@ impl From for ParseError { Self::Chunk(error) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_integer() { + let source = "42"; + let test_chunk = parse(source); + + assert_eq!( + test_chunk, + Ok(Chunk::with_data( + vec![(Instruction::Constant as u8, Span(0, 2)), (0, Span(0, 2))], + vec![Value::integer(42)] + )) + ); + } + + #[test] + fn parse_addition() { + env_logger::builder().is_test(true).try_init().unwrap(); + + let source = "42 + 42"; + let test_chunk = parse(source); + + assert_eq!( + test_chunk, + Ok(Chunk::with_data( + vec![ + (Instruction::Constant as u8, Span(0, 2)), + (0, Span(0, 2)), + (Instruction::Constant as u8, Span(5, 7)), + (1, Span(5, 7)), + (Instruction::Add as u8, Span(3, 4)), + ], + vec![Value::integer(42), Value::integer(42)] + )) + ); + } +} diff --git a/dust-lang/src/token.rs b/dust-lang/src/token.rs index 2e41d95..2f2ce04 100644 --- a/dust-lang/src/token.rs +++ b/dust-lang/src/token.rs @@ -227,58 +227,6 @@ impl<'src> Token<'src> { Token::While => TokenKind::While, } } - - pub fn is_eof(&self) -> bool { - matches!(self, Token::Eof) - } - - pub fn precedence(&self) -> u8 { - match self { - Token::Dot => 9, - Token::LeftParenthesis | Token::LeftSquareBrace => 8, - Token::Star | Token::Slash | Token::Percent => 7, - Token::Minus | Token::Plus => 6, - Token::DoubleEqual - | Token::Less - | Token::LessEqual - | Token::Greater - | Token::GreaterEqual => 5, - Token::DoubleAmpersand => 4, - Token::DoublePipe => 3, - Token::DoubleDot => 2, - Token::Equal | Token::MinusEqual | Token::PlusEqual => 1, - _ => 0, - } - } - - pub fn is_left_associative(&self) -> bool { - matches!( - self, - Token::Dot - | Token::DoubleAmpersand - | Token::DoublePipe - | Token::Plus - | Token::Minus - | Token::Star - | Token::Slash - | Token::Percent - ) - } - - pub fn is_right_associative(&self) -> bool { - matches!(self, Token::Equal | Token::MinusEqual | Token::PlusEqual) - } - - pub fn is_prefix(&self) -> bool { - matches!(self, Token::Bang | Token::Minus | Token::Star) - } - - pub fn is_postfix(&self) -> bool { - matches!( - self, - Token::Dot | Token::LeftCurlyBrace | Token::LeftParenthesis | Token::LeftSquareBrace - ) - } } impl<'src> Display for Token<'src> { @@ -572,80 +520,3 @@ impl Display for TokenKind { } } } - -#[cfg(test)] -pub(crate) mod tests { - use super::*; - - pub fn all_tokens<'src>() -> [Token<'src>; 47] { - [ - Token::Async, - Token::Bang, - Token::BangEqual, - Token::Bool, - Token::Break, - Token::Colon, - Token::Comma, - Token::Dot, - Token::DoubleAmpersand, - Token::DoubleDot, - Token::DoubleEqual, - Token::DoublePipe, - Token::Else, - Token::Eof, - Token::Equal, - Token::FloatKeyword, - Token::Greater, - Token::GreaterEqual, - Token::If, - Token::Int, - Token::LeftCurlyBrace, - Token::LeftParenthesis, - Token::LeftSquareBrace, - Token::Let, - Token::Less, - Token::LessEqual, - Token::Map, - Token::Minus, - Token::MinusEqual, - Token::Mut, - Token::Percent, - Token::Plus, - Token::PlusEqual, - Token::RightCurlyBrace, - Token::RightParenthesis, - Token::RightSquareBrace, - Token::Semicolon, - Token::Star, - Token::Str, - Token::Slash, - Token::Boolean("true"), - Token::Float("0.0"), - Token::Integer("0"), - Token::String("string"), - Token::Identifier("foobar"), - Token::Struct, - Token::While, - ] - } - - #[test] - fn token_displays() { - for token in all_tokens().iter() { - let display = token.to_string(); - - assert_eq!(display, token.to_owned().to_string()); - - if let Token::Boolean(_) - | Token::Float(_) - | Token::Identifier(_) - | Token::Integer(_) - | Token::String(_) = token - { - continue; - } else { - assert_eq!(display, token.kind().to_string()); - } - } - } -}