Continue writing bytecode implementation

This commit is contained in:
Jeff 2024-09-07 04:34:03 -04:00
parent 406edda573
commit 812d930488
5 changed files with 402 additions and 176 deletions

View File

@ -1,9 +1,9 @@
use std::fmt::{self, Debug, Display, Formatter};
use serde::{Deserialize, Serialize};
use crate::{Span, Value, ValueError};
const STACK_SIZE: usize = 256;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Vm {
chunk: Chunk,
@ -12,11 +12,13 @@ pub struct Vm {
}
impl Vm {
const STACK_SIZE: usize = 256;
pub fn new(chunk: Chunk) -> Self {
Self {
chunk,
ip: 0,
stack: Vec::with_capacity(STACK_SIZE),
stack: Vec::with_capacity(Self::STACK_SIZE),
}
}
@ -31,7 +33,7 @@ impl Vm {
let (index, _) = self.read();
let value = self.read_constant(index as usize);
self.stack.push(value.clone());
self.stack.push(value);
}
Instruction::Return => {
let value = self.pop()?;
@ -84,7 +86,7 @@ impl Vm {
}
pub fn push(&mut self, value: Value) -> Result<(), VmError> {
if self.stack.len() == STACK_SIZE {
if self.stack.len() == Self::STACK_SIZE {
Err(VmError::StackOverflow)
} else {
self.stack.push(value);
@ -164,26 +166,26 @@ impl Instruction {
pub fn disassemble(&self, chunk: &Chunk, offset: usize) -> String {
match self {
Instruction::Constant => {
let index = chunk.code[offset + 1].0 as usize;
let value = &chunk.constants[index];
let (index, _) = chunk.read(offset + 1);
let value = &chunk.constants[index as usize];
format!("{:04} CONSTANT {} {}", offset, index, value)
format!("{offset:04} CONSTANT {index} {value}")
}
Instruction::Return => format!("{:04} RETURN", offset),
Instruction::Return => format!("{offset:04} RETURN"),
// Unary
Instruction::Negate => format!("{:04} NEGATE", offset),
Instruction::Negate => format!("{offset:04} NEGATE"),
// Binary
Instruction::Add => format!("{:04} ADD", offset),
Instruction::Subtract => format!("{:04} SUBTRACT", offset),
Instruction::Multiply => format!("{:04} MULTIPLY", offset),
Instruction::Divide => format!("{:04} DIVIDE", offset),
Instruction::Add => format!("{offset:04} ADD"),
Instruction::Subtract => format!("{offset:04} SUBTRACT"),
Instruction::Multiply => format!("{offset:04} MULTIPLY"),
Instruction::Divide => format!("{offset:04} DIVIDE"),
}
}
}
#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
#[derive(Clone, Eq, PartialEq, Serialize, Deserialize)]
pub struct Chunk {
code: Vec<(u8, Span)>,
constants: Vec<Value>,
@ -197,6 +199,10 @@ impl Chunk {
}
}
pub fn with_data(code: Vec<(u8, Span)>, constants: Vec<Value>) -> Self {
Self { code, constants }
}
pub fn len(&self) -> usize {
self.code.len()
}
@ -209,6 +215,10 @@ impl Chunk {
self.code.capacity()
}
pub fn read(&self, offset: usize) -> (u8, Span) {
self.code[offset]
}
pub fn write(&mut self, instruction: u8, position: Span) {
self.code.push((instruction, position));
}
@ -230,14 +240,38 @@ impl Chunk {
self.constants.clear();
}
pub fn disassemble(&self, name: &str) {
println!("== {} ==", name);
pub fn disassemble(&self, name: &str) -> String {
let mut output = String::new();
output.push_str("== ");
output.push_str(name);
output.push_str(" ==\n");
let mut next_is_index = false;
for (offset, (byte, position)) in self.code.iter().enumerate() {
let instruction = Instruction::from_byte(*byte).unwrap();
if next_is_index {
let index_display = format!("{position} {offset:04} INDEX {byte}\n");
println!("{} {}", position, instruction.disassemble(self, offset));
output.push_str(&index_display);
next_is_index = false;
continue;
}
let instruction = Instruction::from_byte(*byte).unwrap();
let instruction_display =
format!("{} {}\n", position, instruction.disassemble(self, offset));
output.push_str(&instruction_display);
if let Instruction::Constant = instruction {
next_is_index = true;
}
}
output
}
}
@ -247,6 +281,18 @@ impl Default for Chunk {
}
}
impl Display for Chunk {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}", self.disassemble("Chunk"))
}
}
impl Debug for Chunk {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{self}")
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ChunkError {
Overflow,

View File

@ -0,0 +1,16 @@
use crate::{bytecode::VmError, LexError, ParseError};
pub enum DustError<'src> {
LexError {
error: LexError,
source: &'src str,
},
ParseError {
error: ParseError,
source: &'src str,
},
VmError {
error: VmError,
source: &'src str,
},
}

View File

@ -17,6 +17,7 @@
//! ```
pub mod bytecode;
pub mod constructor;
pub mod dust_error;
pub mod identifier;
pub mod lexer;
pub mod parser;
@ -26,6 +27,7 @@ pub mod value;
pub use bytecode::{Chunk, ChunkError, Instruction, Vm};
pub use constructor::{ConstructError, Constructor};
pub use dust_error::DustError;
pub use identifier::Identifier;
pub use lexer::{LexError, Lexer};
pub use parser::{ParseError, Parser};

View File

@ -1,76 +1,143 @@
use std::num::ParseIntError;
use std::{
fmt::{self, Display, Formatter},
num::ParseIntError,
};
use crate::{
Chunk, ChunkError, Instruction, LexError, Lexer, Span, Token, TokenKind, TokenOwned, Value,
};
pub fn parse(source: &str) -> Result<Chunk, ParseError> {
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer);
while !parser.is_eof() {
parser.parse(Precedence::None)?;
}
Ok(parser.chunk)
}
#[derive(Debug)]
pub struct Parser<'src> {
lexer: Lexer<'src>,
current_token: Token<'src>,
chunk: Chunk,
current_token: Option<Token<'src>>,
current_position: Span,
}
impl<'src> Parser<'src> {
pub fn new(mut lexer: Lexer<'src>) -> Self {
let (current_token, current_position) =
lexer.next_token().unwrap_or((Token::Eof, Span(0, 0)));
pub fn new(lexer: Lexer<'src>) -> Self {
Parser {
lexer,
current_token,
current_position,
chunk: Chunk::new(),
current_token: None,
current_position: Span(0, 0),
}
}
fn is_eof(&self) -> bool {
matches!(self.current_token, Token::Eof)
matches!(self.current_token, Some(Token::Eof))
}
fn advance(&mut self) -> Result<(), ParseError> {
let (token, position) = self.lexer.next_token()?;
self.current_token = token;
log::trace!("Advancing to token {token} at {position}");
self.current_token = Some(token);
self.current_position = position;
Ok(())
}
fn current_token_owned(&self) -> TokenOwned {
self.current_token
.as_ref()
.map_or(TokenOwned::Eof, |token| token.to_owned())
}
fn current_token_kind(&self) -> TokenKind {
self.current_token
.as_ref()
.map_or(TokenKind::Eof, |token| token.kind())
}
fn consume(&mut self, expected: TokenKind) -> Result<(), ParseError> {
if self.current_token.kind() == expected {
if self.current_token_kind() == expected {
self.advance()
} else {
Err(ParseError::ExpectedToken {
expected,
found: self.current_token.to_owned(),
found: self.current_token_owned(),
position: self.current_position,
})
}
}
fn emit_instruction(&mut self, instruction: Instruction, chunk: &mut Chunk) {
chunk.write(instruction as u8, self.current_position);
fn emit_byte(&mut self, byte: u8) {
self.chunk.write(byte, self.current_position);
}
fn parse_prefix(&mut self, chunk: &mut Chunk) -> Result<(), ParseError> {
fn emit_constant(&mut self, value: Value) -> Result<(), ParseError> {
let constant_index = self.chunk.push_constant(value)?;
self.emit_byte(Instruction::Constant as u8);
self.emit_byte(constant_index);
Ok(())
}
fn parse_primary(&mut self, chunk: &mut Chunk) -> Result<(), ParseError> {
match self.current_token {
Token::Integer(text) => {
let integer = text.parse::<i64>()?;
let value = Value::integer(integer);
let constant_index = chunk.push_constant(value)?;
fn parse_integer(&mut self) -> Result<(), ParseError> {
if let Some(Token::Integer(text)) = self.current_token {
let integer = text.parse::<i64>().unwrap();
let value = Value::integer(integer);
chunk.write(Instruction::Constant as u8, self.current_position);
chunk.write(constant_index, self.current_position);
}
Token::LeftParenthesis => {}
self.emit_constant(value)?;
}
Ok(())
}
fn parse_grouped(&mut self) -> Result<(), ParseError> {
self.parse_expression()?;
self.consume(TokenKind::RightParenthesis)?;
Ok(())
}
fn parse_unary(&mut self) -> Result<(), ParseError> {
if let Some(Token::Minus) = self.current_token {
self.advance()?;
self.parse_expression()?;
self.emit_byte(Instruction::Negate as u8);
}
Ok(())
}
fn parse_binary(&mut self) -> Result<(), ParseError> {
let operator_position = self.current_position;
let operator = self.current_token_kind();
let rule = ParseRule::from(&operator);
self.parse(rule.precedence.increment())?;
match operator {
TokenKind::Plus => self.emit_byte(Instruction::Add as u8),
TokenKind::Minus => self.emit_byte(Instruction::Subtract as u8),
TokenKind::Star => self.emit_byte(Instruction::Multiply as u8),
TokenKind::Slash => self.emit_byte(Instruction::Divide as u8),
_ => {
return Err(ParseError::ExpectedTokenMultiple {
expected: vec![TokenKind::Integer],
found: self.current_token.to_owned(),
expected: vec![
TokenKind::Plus,
TokenKind::Minus,
TokenKind::Star,
TokenKind::Slash,
],
found: self.current_token_owned(),
position: self.current_position,
})
}
@ -79,13 +146,196 @@ impl<'src> Parser<'src> {
Ok(())
}
pub fn parse_postfix(&mut self, left: Value, chunk: &mut Chunk) -> Result<(), ParseError> {
fn parse_expression(&mut self) -> Result<(), ParseError> {
self.parse(Precedence::Assignment)
}
// Pratt parsing functions
fn parse(&mut self, precedence: Precedence) -> Result<(), ParseError> {
log::trace!("Parsing with precedence {precedence}");
self.advance()?;
let prefix_rule = ParseRule::from(&self.current_token_kind()).prefix;
if let Some(prefix) = prefix_rule {
log::trace!("Parsing {} as prefix", &self.current_token_owned());
prefix(self)?;
} else {
return Err(ParseError::ExpectedPrefix {
found: self.current_token_owned(),
position: self.current_position,
});
}
while precedence <= ParseRule::from(&self.current_token_kind()).precedence {
self.advance()?;
let infix_rule = ParseRule::from(&self.current_token_kind()).infix;
if let Some(infix) = infix_rule {
log::trace!("Parsing {} as infix", self.current_token_owned());
infix(self)?;
} else {
break;
}
}
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Precedence {
None = 0,
Assignment = 1,
Conditional = 2,
LogicalOr = 3,
LogicalAnd = 4,
Equality = 5,
Comparison = 6,
Term = 7,
Factor = 8,
Unary = 9,
Call = 10,
Primary = 11,
}
impl Precedence {
fn from_byte(byte: u8) -> Self {
match byte {
0 => Self::None,
1 => Self::Assignment,
2 => Self::Conditional,
3 => Self::LogicalOr,
4 => Self::LogicalAnd,
5 => Self::Equality,
6 => Self::Comparison,
7 => Self::Term,
8 => Self::Factor,
9 => Self::Unary,
10 => Self::Call,
_ => Self::Primary,
}
}
fn increment(&self) -> Self {
Self::from_byte(*self as u8 + 1)
}
fn decrement(&self) -> Self {
Self::from_byte(*self as u8 - 1)
}
}
impl Display for Precedence {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{:?}", self)
}
}
type ParserFunction<'a> = fn(&'_ mut Parser<'a>) -> Result<(), ParseError>;
#[derive(Debug, Clone, Copy)]
pub struct ParseRule<'a> {
pub prefix: Option<ParserFunction<'a>>,
pub infix: Option<ParserFunction<'a>>,
pub precedence: Precedence,
}
impl From<&TokenKind> for ParseRule<'_> {
fn from(token_kind: &TokenKind) -> Self {
match token_kind {
TokenKind::Eof => ParseRule {
prefix: None,
infix: None,
precedence: Precedence::None,
},
TokenKind::Identifier => todo!(),
TokenKind::Boolean => todo!(),
TokenKind::Character => todo!(),
TokenKind::Float => todo!(),
TokenKind::Integer => ParseRule {
prefix: Some(Parser::parse_integer),
infix: None,
precedence: Precedence::None,
},
TokenKind::String => todo!(),
TokenKind::Async => todo!(),
TokenKind::Bool => todo!(),
TokenKind::Break => todo!(),
TokenKind::Else => todo!(),
TokenKind::FloatKeyword => todo!(),
TokenKind::If => todo!(),
TokenKind::Int => todo!(),
TokenKind::Let => todo!(),
TokenKind::Loop => todo!(),
TokenKind::Map => todo!(),
TokenKind::Str => todo!(),
TokenKind::While => todo!(),
TokenKind::BangEqual => todo!(),
TokenKind::Bang => todo!(),
TokenKind::Colon => todo!(),
TokenKind::Comma => todo!(),
TokenKind::Dot => todo!(),
TokenKind::DoubleAmpersand => todo!(),
TokenKind::DoubleDot => todo!(),
TokenKind::DoubleEqual => todo!(),
TokenKind::DoublePipe => todo!(),
TokenKind::Equal => todo!(),
TokenKind::Greater => todo!(),
TokenKind::GreaterOrEqual => todo!(),
TokenKind::LeftCurlyBrace => todo!(),
TokenKind::LeftParenthesis => ParseRule {
prefix: Some(Parser::parse_grouped),
infix: None,
precedence: Precedence::None,
},
TokenKind::LeftSquareBrace => todo!(),
TokenKind::Less => todo!(),
TokenKind::LessOrEqual => todo!(),
TokenKind::Minus => ParseRule {
prefix: Some(Parser::parse_unary),
infix: Some(Parser::parse_binary),
precedence: Precedence::Term,
},
TokenKind::MinusEqual => todo!(),
TokenKind::Mut => todo!(),
TokenKind::Percent => todo!(),
TokenKind::Plus => ParseRule {
prefix: None,
infix: Some(Parser::parse_binary),
precedence: Precedence::Term,
},
TokenKind::PlusEqual => todo!(),
TokenKind::RightCurlyBrace => todo!(),
TokenKind::RightParenthesis => todo!(),
TokenKind::RightSquareBrace => todo!(),
TokenKind::Semicolon => todo!(),
TokenKind::Star => ParseRule {
prefix: None,
infix: Some(Parser::parse_binary),
precedence: Precedence::Factor,
},
TokenKind::Struct => todo!(),
TokenKind::Slash => ParseRule {
prefix: None,
infix: Some(Parser::parse_binary),
precedence: Precedence::Factor,
},
}
}
}
#[derive(Debug, PartialEq)]
pub enum ParseError {
ExpectedPrefix {
found: TokenOwned,
position: Span,
},
ExpectedToken {
expected: TokenKind,
found: TokenOwned,
@ -120,3 +370,44 @@ impl From<ChunkError> for ParseError {
Self::Chunk(error)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_integer() {
let source = "42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![(Instruction::Constant as u8, Span(0, 2)), (0, Span(0, 2))],
vec![Value::integer(42)]
))
);
}
#[test]
fn parse_addition() {
env_logger::builder().is_test(true).try_init().unwrap();
let source = "42 + 42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(0, 2)),
(0, Span(0, 2)),
(Instruction::Constant as u8, Span(5, 7)),
(1, Span(5, 7)),
(Instruction::Add as u8, Span(3, 4)),
],
vec![Value::integer(42), Value::integer(42)]
))
);
}
}

View File

@ -227,58 +227,6 @@ impl<'src> Token<'src> {
Token::While => TokenKind::While,
}
}
pub fn is_eof(&self) -> bool {
matches!(self, Token::Eof)
}
pub fn precedence(&self) -> u8 {
match self {
Token::Dot => 9,
Token::LeftParenthesis | Token::LeftSquareBrace => 8,
Token::Star | Token::Slash | Token::Percent => 7,
Token::Minus | Token::Plus => 6,
Token::DoubleEqual
| Token::Less
| Token::LessEqual
| Token::Greater
| Token::GreaterEqual => 5,
Token::DoubleAmpersand => 4,
Token::DoublePipe => 3,
Token::DoubleDot => 2,
Token::Equal | Token::MinusEqual | Token::PlusEqual => 1,
_ => 0,
}
}
pub fn is_left_associative(&self) -> bool {
matches!(
self,
Token::Dot
| Token::DoubleAmpersand
| Token::DoublePipe
| Token::Plus
| Token::Minus
| Token::Star
| Token::Slash
| Token::Percent
)
}
pub fn is_right_associative(&self) -> bool {
matches!(self, Token::Equal | Token::MinusEqual | Token::PlusEqual)
}
pub fn is_prefix(&self) -> bool {
matches!(self, Token::Bang | Token::Minus | Token::Star)
}
pub fn is_postfix(&self) -> bool {
matches!(
self,
Token::Dot | Token::LeftCurlyBrace | Token::LeftParenthesis | Token::LeftSquareBrace
)
}
}
impl<'src> Display for Token<'src> {
@ -572,80 +520,3 @@ impl Display for TokenKind {
}
}
}
#[cfg(test)]
pub(crate) mod tests {
use super::*;
pub fn all_tokens<'src>() -> [Token<'src>; 47] {
[
Token::Async,
Token::Bang,
Token::BangEqual,
Token::Bool,
Token::Break,
Token::Colon,
Token::Comma,
Token::Dot,
Token::DoubleAmpersand,
Token::DoubleDot,
Token::DoubleEqual,
Token::DoublePipe,
Token::Else,
Token::Eof,
Token::Equal,
Token::FloatKeyword,
Token::Greater,
Token::GreaterEqual,
Token::If,
Token::Int,
Token::LeftCurlyBrace,
Token::LeftParenthesis,
Token::LeftSquareBrace,
Token::Let,
Token::Less,
Token::LessEqual,
Token::Map,
Token::Minus,
Token::MinusEqual,
Token::Mut,
Token::Percent,
Token::Plus,
Token::PlusEqual,
Token::RightCurlyBrace,
Token::RightParenthesis,
Token::RightSquareBrace,
Token::Semicolon,
Token::Star,
Token::Str,
Token::Slash,
Token::Boolean("true"),
Token::Float("0.0"),
Token::Integer("0"),
Token::String("string"),
Token::Identifier("foobar"),
Token::Struct,
Token::While,
]
}
#[test]
fn token_displays() {
for token in all_tokens().iter() {
let display = token.to_string();
assert_eq!(display, token.to_owned().to_string());
if let Token::Boolean(_)
| Token::Float(_)
| Token::Identifier(_)
| Token::Integer(_)
| Token::String(_) = token
{
continue;
} else {
assert_eq!(display, token.kind().to_string());
}
}
}
}