1
0

Refactor parsing

This commit is contained in:
Jeff 2024-09-07 06:38:12 -04:00
parent 616f890028
commit 03d44434e2
5 changed files with 324 additions and 190 deletions

125
dust-lang/src/chunk.rs Normal file
View File

@ -0,0 +1,125 @@
use std::fmt::{self, Debug, Display, Formatter};
use serde::{Deserialize, Serialize};
use crate::{Instruction, Span, Value};
#[derive(Clone, Eq, PartialEq, Serialize, Deserialize)]
pub struct Chunk {
code: Vec<(u8, Span)>,
constants: Vec<Value>,
}
impl Chunk {
pub fn new() -> Self {
Self {
code: Vec::new(),
constants: Vec::new(),
}
}
pub fn with_data(code: Vec<(u8, Span)>, constants: Vec<Value>) -> Self {
Self { code, constants }
}
pub fn len(&self) -> usize {
self.code.len()
}
pub fn is_empty(&self) -> bool {
self.code.is_empty()
}
pub fn capacity(&self) -> usize {
self.code.capacity()
}
pub fn read(&self, offset: usize) -> (u8, Span) {
self.code[offset]
}
pub fn write(&mut self, instruction: u8, position: Span) {
self.code.push((instruction, position));
}
pub fn get_constant(&self, index: usize) -> Result<&Value, ChunkError> {
self.constants
.get(index)
.ok_or_else(|| ChunkError::ConstantIndexOutOfBounds(index))
}
pub fn push_constant(&mut self, value: Value) -> Result<u8, ChunkError> {
let starting_length = self.constants.len();
if starting_length + 1 > (u8::MAX as usize) {
Err(ChunkError::Overflow)
} else {
self.constants.push(value);
Ok(starting_length as u8)
}
}
pub fn clear(&mut self) {
self.code.clear();
self.constants.clear();
}
pub fn disassemble(&self, name: &str) -> String {
let mut output = String::new();
output.push_str("== ");
output.push_str(name);
output.push_str(" ==\n");
let mut next_is_index = false;
for (offset, (byte, position)) in self.code.iter().enumerate() {
if next_is_index {
let index_display = format!("{position} {offset:04} INDEX {byte}\n");
output.push_str(&index_display);
next_is_index = false;
continue;
}
let instruction = Instruction::from_byte(*byte).unwrap();
let instruction_display =
format!("{} {}\n", position, instruction.disassemble(self, offset));
output.push_str(&instruction_display);
if let Instruction::Constant = instruction {
next_is_index = true;
}
}
output
}
}
impl Default for Chunk {
fn default() -> Self {
Self::new()
}
}
impl Display for Chunk {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}", self.disassemble("Chunk"))
}
}
impl Debug for Chunk {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{self}")
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ChunkError {
ConstantIndexOutOfBounds(usize),
Overflow,
}

View File

@ -1,4 +1,4 @@
use crate::{bytecode::VmError, LexError, ParseError}; use crate::{vm::VmError, LexError, ParseError};
pub enum DustError<'src> { pub enum DustError<'src> {
LexError { LexError {

View File

@ -15,7 +15,7 @@
//! //!
//! assert_eq!(the_answer, Some(Value::integer(42))); //! assert_eq!(the_answer, Some(Value::integer(42)));
//! ``` //! ```
pub mod bytecode; pub mod chunk;
pub mod constructor; pub mod constructor;
pub mod dust_error; pub mod dust_error;
pub mod identifier; pub mod identifier;
@ -24,8 +24,9 @@ pub mod parser;
pub mod token; pub mod token;
pub mod r#type; pub mod r#type;
pub mod value; pub mod value;
pub mod vm;
pub use bytecode::{Chunk, ChunkError, Instruction, Vm}; pub use chunk::{Chunk, ChunkError};
pub use constructor::{ConstructError, Constructor}; pub use constructor::{ConstructError, Constructor};
pub use dust_error::DustError; pub use dust_error::DustError;
pub use identifier::Identifier; pub use identifier::Identifier;
@ -34,6 +35,7 @@ pub use parser::{ParseError, Parser};
pub use r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict}; pub use r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict};
pub use token::{Token, TokenKind, TokenOwned}; pub use token::{Token, TokenKind, TokenOwned};
pub use value::{Struct, Value, ValueError}; pub use value::{Struct, Value, ValueError};
pub use vm::{Instruction, Vm};
use std::fmt::{self, Display, Formatter}; use std::fmt::{self, Display, Formatter};

View File

@ -1,6 +1,8 @@
use std::{ use std::{
fmt::{self, Display, Formatter}, fmt::{self, Display, Formatter},
mem::{self, swap},
num::ParseIntError, num::ParseIntError,
ptr::replace,
}; };
use crate::{ use crate::{
@ -22,54 +24,49 @@ pub fn parse(source: &str) -> Result<Chunk, ParseError> {
pub struct Parser<'src> { pub struct Parser<'src> {
lexer: Lexer<'src>, lexer: Lexer<'src>,
chunk: Chunk, chunk: Chunk,
current_token: Option<Token<'src>>, previous_token: Token<'src>,
previous_position: Span,
current_token: Token<'src>,
current_position: Span, current_position: Span,
} }
impl<'src> Parser<'src> { impl<'src> Parser<'src> {
pub fn new(lexer: Lexer<'src>) -> Self { pub fn new(mut lexer: Lexer<'src>) -> Self {
let (current_token, current_position) =
lexer.next_token().unwrap_or((Token::Eof, Span(0, 0)));
Parser { Parser {
lexer, lexer,
chunk: Chunk::new(), chunk: Chunk::new(),
current_token: None, previous_token: Token::Eof,
current_position: Span(0, 0), previous_position: Span(0, 0),
current_token,
current_position,
} }
} }
fn is_eof(&self) -> bool { fn is_eof(&self) -> bool {
matches!(self.current_token, Some(Token::Eof)) matches!(self.current_token, Token::Eof)
} }
fn advance(&mut self) -> Result<(), ParseError> { fn advance(&mut self) -> Result<(), ParseError> {
let (token, position) = self.lexer.next_token()?; let (new_token, position) = self.lexer.next_token()?;
log::trace!("Advancing to token {token} at {position}"); log::trace!("Advancing to token {new_token} at {position}");
self.current_token = Some(token); self.previous_token = mem::replace(&mut self.current_token, new_token);
self.current_position = position; self.previous_position = mem::replace(&mut self.current_position, position);
Ok(()) Ok(())
} }
fn current_token_owned(&self) -> TokenOwned {
self.current_token
.as_ref()
.map_or(TokenOwned::Eof, |token| token.to_owned())
}
fn current_token_kind(&self) -> TokenKind {
self.current_token
.as_ref()
.map_or(TokenKind::Eof, |token| token.kind())
}
fn consume(&mut self, expected: TokenKind) -> Result<(), ParseError> { fn consume(&mut self, expected: TokenKind) -> Result<(), ParseError> {
if self.current_token_kind() == expected { if self.current_token.kind() == expected {
self.advance() self.advance()
} else { } else {
Err(ParseError::ExpectedToken { Err(ParseError::ExpectedToken {
expected, expected,
found: self.current_token_owned(), found: self.current_token.to_owned(),
position: self.current_position, position: self.current_position,
}) })
} }
@ -81,7 +78,7 @@ impl<'src> Parser<'src> {
fn emit_constant(&mut self, value: Value) -> Result<(), ParseError> { fn emit_constant(&mut self, value: Value) -> Result<(), ParseError> {
let constant_index = self.chunk.push_constant(value)?; let constant_index = self.chunk.push_constant(value)?;
let position = self.current_position; let position = self.previous_position;
self.emit_byte(Instruction::Constant as u8, position); self.emit_byte(Instruction::Constant as u8, position);
self.emit_byte(constant_index, position); self.emit_byte(constant_index, position);
@ -89,8 +86,19 @@ impl<'src> Parser<'src> {
Ok(()) Ok(())
} }
fn parse_boolean(&mut self) -> Result<(), ParseError> {
if let Token::Boolean(text) = self.previous_token {
let boolean = text.parse::<bool>().unwrap();
let value = Value::boolean(boolean);
self.emit_constant(value)?;
}
Ok(())
}
fn parse_integer(&mut self) -> Result<(), ParseError> { fn parse_integer(&mut self) -> Result<(), ParseError> {
if let Some(Token::Integer(text)) = self.current_token { if let Token::Integer(text) = self.previous_token {
let integer = text.parse::<i64>().unwrap(); let integer = text.parse::<i64>().unwrap();
let value = Value::integer(integer); let value = Value::integer(integer);
@ -102,27 +110,30 @@ impl<'src> Parser<'src> {
fn parse_grouped(&mut self) -> Result<(), ParseError> { fn parse_grouped(&mut self) -> Result<(), ParseError> {
self.parse_expression()?; self.parse_expression()?;
self.consume(TokenKind::RightParenthesis)
self.consume(TokenKind::RightParenthesis)?;
Ok(())
} }
fn parse_unary(&mut self) -> Result<(), ParseError> { fn parse_unary(&mut self) -> Result<(), ParseError> {
if let Some(Token::Minus) = self.current_token { let byte = match self.previous_token.kind() {
let operator_position = self.current_position; TokenKind::Minus => Instruction::Negate as u8,
_ => {
self.advance()?; return Err(ParseError::ExpectedTokenMultiple {
self.parse_expression()?; expected: vec![TokenKind::Minus],
self.emit_byte(Instruction::Negate as u8, operator_position); found: self.previous_token.to_owned(),
position: self.previous_position,
})
} }
};
self.parse_expression()?;
self.emit_byte(byte, self.previous_position);
Ok(()) Ok(())
} }
fn parse_binary(&mut self) -> Result<(), ParseError> { fn parse_binary(&mut self) -> Result<(), ParseError> {
let operator_position = self.current_position; let operator_position = self.previous_position;
let operator = self.current_token_kind(); let operator = self.previous_token.kind();
let rule = ParseRule::from(&operator); let rule = ParseRule::from(&operator);
self.parse(rule.precedence.increment())?; self.parse(rule.precedence.increment())?;
@ -140,8 +151,8 @@ impl<'src> Parser<'src> {
TokenKind::Star, TokenKind::Star,
TokenKind::Slash, TokenKind::Slash,
], ],
found: self.current_token_owned(), found: self.previous_token.to_owned(),
position: self.current_position, position: operator_position,
}) })
} }
}; };
@ -152,36 +163,36 @@ impl<'src> Parser<'src> {
} }
fn parse_expression(&mut self) -> Result<(), ParseError> { fn parse_expression(&mut self) -> Result<(), ParseError> {
self.parse(Precedence::Assignment) self.parse(Precedence::None)
} }
// Pratt parsing functions
fn parse(&mut self, precedence: Precedence) -> Result<(), ParseError> { fn parse(&mut self, precedence: Precedence) -> Result<(), ParseError> {
log::trace!("Parsing with precedence {precedence}");
self.advance()?; self.advance()?;
let prefix_rule = ParseRule::from(&self.current_token_kind()).prefix; if let Some(prefix) = ParseRule::from(&self.previous_token.kind()).prefix {
log::trace!(
if let Some(prefix) = prefix_rule { "Parsing {} as prefix with precedence {precedence}",
log::trace!("Parsing {} as prefix", &self.current_token_owned()); self.previous_token,
);
prefix(self)?; prefix(self)?;
} else { } else {
return Err(ParseError::ExpectedPrefix { return Err(ParseError::ExpectedExpression {
found: self.current_token_owned(), found: self.previous_token.to_owned(),
position: self.current_position, position: self.previous_position,
}); });
} }
while precedence <= ParseRule::from(&self.current_token_kind()).precedence { while precedence <= ParseRule::from(&self.current_token.kind()).precedence {
self.advance()?; self.advance()?;
let infix_rule = ParseRule::from(&self.current_token_kind()).infix; let infix_rule = ParseRule::from(&self.previous_token.kind()).infix;
if let Some(infix) = infix_rule { if let Some(infix) = infix_rule {
log::trace!("Parsing {} as infix", self.current_token_owned()); log::trace!(
"Parsing {} as infix with precedence {precedence}",
self.previous_token,
);
infix(self)?; infix(self)?;
} else { } else {
@ -230,10 +241,6 @@ impl Precedence {
fn increment(&self) -> Self { fn increment(&self) -> Self {
Self::from_byte(*self as u8 + 1) Self::from_byte(*self as u8 + 1)
} }
fn decrement(&self) -> Self {
Self::from_byte(*self as u8 - 1)
}
} }
impl Display for Precedence { impl Display for Precedence {
@ -242,7 +249,7 @@ impl Display for Precedence {
} }
} }
type ParserFunction<'a> = fn(&'_ mut Parser<'a>) -> Result<(), ParseError>; type ParserFunction<'a> = fn(&mut Parser<'a>) -> Result<(), ParseError>;
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct ParseRule<'a> { pub struct ParseRule<'a> {
@ -260,7 +267,11 @@ impl From<&TokenKind> for ParseRule<'_> {
precedence: Precedence::None, precedence: Precedence::None,
}, },
TokenKind::Identifier => todo!(), TokenKind::Identifier => todo!(),
TokenKind::Boolean => todo!(), TokenKind::Boolean => ParseRule {
prefix: Some(Parser::parse_boolean),
infix: None,
precedence: Precedence::None,
},
TokenKind::Character => todo!(), TokenKind::Character => todo!(),
TokenKind::Float => todo!(), TokenKind::Float => todo!(),
TokenKind::Integer => ParseRule { TokenKind::Integer => ParseRule {
@ -317,7 +328,11 @@ impl From<&TokenKind> for ParseRule<'_> {
}, },
TokenKind::PlusEqual => todo!(), TokenKind::PlusEqual => todo!(),
TokenKind::RightCurlyBrace => todo!(), TokenKind::RightCurlyBrace => todo!(),
TokenKind::RightParenthesis => todo!(), TokenKind::RightParenthesis => ParseRule {
prefix: None,
infix: None,
precedence: Precedence::None,
},
TokenKind::RightSquareBrace => todo!(), TokenKind::RightSquareBrace => todo!(),
TokenKind::Semicolon => todo!(), TokenKind::Semicolon => todo!(),
TokenKind::Star => ParseRule { TokenKind::Star => ParseRule {
@ -337,7 +352,7 @@ impl From<&TokenKind> for ParseRule<'_> {
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum ParseError { pub enum ParseError {
ExpectedPrefix { ExpectedExpression {
found: TokenOwned, found: TokenOwned,
position: Span, position: Span,
}, },
@ -381,7 +396,7 @@ mod tests {
use super::*; use super::*;
#[test] #[test]
fn parse_integer() { fn integer() {
let source = "42"; let source = "42";
let test_chunk = parse(source); let test_chunk = parse(source);
@ -395,9 +410,46 @@ mod tests {
} }
#[test] #[test]
fn parse_addition() { fn boolean() {
let source = "true";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![(Instruction::Constant as u8, Span(0, 4)), (0, Span(0, 4))],
vec![Value::boolean(true)]
))
);
}
#[test]
fn grouping() {
env_logger::builder().is_test(true).try_init().unwrap(); env_logger::builder().is_test(true).try_init().unwrap();
let source = "(42 + 42) * 2";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(1, 3)),
(0, Span(1, 3)),
(Instruction::Constant as u8, Span(6, 8)),
(1, Span(6, 8)),
(Instruction::Add as u8, Span(4, 5)),
(Instruction::Constant as u8, Span(11, 12)),
(0, Span(11, 12)),
(Instruction::Multiply as u8, Span(9, 10)),
],
vec![Value::integer(42), Value::integer(42), Value::integer(2)]
))
);
}
#[test]
fn addition() {
let source = "42 + 42"; let source = "42 + 42";
let test_chunk = parse(source); let test_chunk = parse(source);
@ -415,4 +467,64 @@ mod tests {
)) ))
); );
} }
#[test]
fn subtraction() {
let source = "42 - 42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(0, 2)),
(0, Span(0, 2)),
(Instruction::Constant as u8, Span(5, 7)),
(1, Span(5, 7)),
(Instruction::Subtract as u8, Span(3, 4)),
],
vec![Value::integer(42), Value::integer(42)]
))
);
}
#[test]
fn multiplication() {
let source = "42 * 42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(0, 2)),
(0, Span(0, 2)),
(Instruction::Constant as u8, Span(5, 7)),
(1, Span(5, 7)),
(Instruction::Multiply as u8, Span(3, 4)),
],
vec![Value::integer(42), Value::integer(42)]
))
);
}
#[test]
fn division() {
let source = "42 / 42";
let test_chunk = parse(source);
assert_eq!(
test_chunk,
Ok(Chunk::with_data(
vec![
(Instruction::Constant as u8, Span(0, 2)),
(0, Span(0, 2)),
(Instruction::Constant as u8, Span(5, 7)),
(1, Span(5, 7)),
(Instruction::Divide as u8, Span(3, 4)),
],
vec![Value::integer(42), Value::integer(42)]
))
);
}
} }

View File

@ -1,8 +1,6 @@
use std::fmt::{self, Debug, Display, Formatter};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{Span, Value, ValueError}; use crate::{Chunk, ChunkError, Span, Value, ValueError};
#[derive(Debug, Clone, Eq, PartialEq)] #[derive(Debug, Clone, Eq, PartialEq)]
pub struct Vm { pub struct Vm {
@ -31,7 +29,7 @@ impl Vm {
match instruction { match instruction {
Instruction::Constant => { Instruction::Constant => {
let (index, _) = self.read(); let (index, _) = self.read();
let value = self.read_constant(index as usize); let value = self.read_constant(index as usize)?;
self.stack.push(value); self.stack.push(value);
} }
@ -106,23 +104,30 @@ impl Vm {
pub fn read(&mut self) -> (u8, Span) { pub fn read(&mut self) -> (u8, Span) {
self.ip += 1; self.ip += 1;
self.chunk.code[self.ip - 1] self.chunk.read(self.ip - 1)
} }
pub fn read_constant(&self, index: usize) -> Value { pub fn read_constant(&self, index: usize) -> Result<Value, VmError> {
self.chunk.constants[index].clone() Ok(self.chunk.get_constant(index)?.clone())
} }
} }
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone, PartialEq)]
pub enum VmError { pub enum VmError {
ChunkOverflow,
InvalidInstruction(u8, Span), InvalidInstruction(u8, Span),
StackUnderflow, StackUnderflow,
StackOverflow, StackOverflow,
Chunk(ChunkError),
Value(ValueError), Value(ValueError),
} }
impl From<ChunkError> for VmError {
fn from(error: ChunkError) -> Self {
Self::Chunk(error)
}
}
impl From<ValueError> for VmError { impl From<ValueError> for VmError {
fn from(error: ValueError) -> Self { fn from(error: ValueError) -> Self {
Self::Value(error) Self::Value(error)
@ -167,9 +172,12 @@ impl Instruction {
match self { match self {
Instruction::Constant => { Instruction::Constant => {
let (index, _) = chunk.read(offset + 1); let (index, _) = chunk.read(offset + 1);
let value = &chunk.constants[index as usize]; let value_display = chunk
.get_constant(index as usize)
.map(|value| value.to_string())
.unwrap_or_else(|error| format!("{:?}", error));
format!("{offset:04} CONSTANT {index} {value}") format!("{offset:04} CONSTANT {index} {value_display}")
} }
Instruction::Return => format!("{offset:04} RETURN"), Instruction::Return => format!("{offset:04} RETURN"),
@ -185,119 +193,6 @@ impl Instruction {
} }
} }
#[derive(Clone, Eq, PartialEq, Serialize, Deserialize)]
pub struct Chunk {
code: Vec<(u8, Span)>,
constants: Vec<Value>,
}
impl Chunk {
pub fn new() -> Self {
Self {
code: Vec::new(),
constants: Vec::new(),
}
}
pub fn with_data(code: Vec<(u8, Span)>, constants: Vec<Value>) -> Self {
Self { code, constants }
}
pub fn len(&self) -> usize {
self.code.len()
}
pub fn is_empty(&self) -> bool {
self.code.is_empty()
}
pub fn capacity(&self) -> usize {
self.code.capacity()
}
pub fn read(&self, offset: usize) -> (u8, Span) {
self.code[offset]
}
pub fn write(&mut self, instruction: u8, position: Span) {
self.code.push((instruction, position));
}
pub fn push_constant(&mut self, value: Value) -> Result<u8, ChunkError> {
let starting_length = self.constants.len();
if starting_length + 1 > (u8::MAX as usize) {
Err(ChunkError::Overflow)
} else {
self.constants.push(value);
Ok(starting_length as u8)
}
}
pub fn clear(&mut self) {
self.code.clear();
self.constants.clear();
}
pub fn disassemble(&self, name: &str) -> String {
let mut output = String::new();
output.push_str("== ");
output.push_str(name);
output.push_str(" ==\n");
let mut next_is_index = false;
for (offset, (byte, position)) in self.code.iter().enumerate() {
if next_is_index {
let index_display = format!("{position} {offset:04} INDEX {byte}\n");
output.push_str(&index_display);
next_is_index = false;
continue;
}
let instruction = Instruction::from_byte(*byte).unwrap();
let instruction_display =
format!("{} {}\n", position, instruction.disassemble(self, offset));
output.push_str(&instruction_display);
if let Instruction::Constant = instruction {
next_is_index = true;
}
}
output
}
}
impl Default for Chunk {
fn default() -> Self {
Self::new()
}
}
impl Display for Chunk {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}", self.disassemble("Chunk"))
}
}
impl Debug for Chunk {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{self}")
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum ChunkError {
Overflow,
}
#[cfg(test)] #[cfg(test)]
pub mod tests { pub mod tests {
use super::*; use super::*;