Continue writing bytecode implementation

2024-09-07 04:34:03 -04:00 · 2024-09-07 04:34:03 -04:00 · 812d930488
commit 812d930488
parent 406edda573
5 changed files with 402 additions and 176 deletions
--- a/dust-lang/src/bytecode.rs
+++ b/dust-lang/src/bytecode.rs
@ -1,9 +1,9 @@
+use std::fmt::{self, Debug, Display, Formatter};
+
 use serde::{Deserialize, Serialize};

 use crate::{Span, Value, ValueError};

-const STACK_SIZE: usize = 256;
-
 #[derive(Debug, Clone, Eq, PartialEq)]
 pub struct Vm {
    chunk: Chunk,
@ -12,11 +12,13 @@ pub struct Vm {
 }

 impl Vm {
+    const STACK_SIZE: usize = 256;
+
    pub fn new(chunk: Chunk) -> Self {
        Self {
            chunk,
            ip: 0,
-            stack: Vec::with_capacity(STACK_SIZE),
+            stack: Vec::with_capacity(Self::STACK_SIZE),
        }
    }

@ -31,7 +33,7 @@ impl Vm {
                    let (index, _) = self.read();
                    let value = self.read_constant(index as usize);

-                    self.stack.push(value.clone());
+                    self.stack.push(value);
                }
                Instruction::Return => {
                    let value = self.pop()?;
@ -84,7 +86,7 @@ impl Vm {
    }

    pub fn push(&mut self, value: Value) -> Result<(), VmError> {
-        if self.stack.len() == STACK_SIZE {
+        if self.stack.len() == Self::STACK_SIZE {
            Err(VmError::StackOverflow)
        } else {
            self.stack.push(value);
@ -164,26 +166,26 @@ impl Instruction {
    pub fn disassemble(&self, chunk: &Chunk, offset: usize) -> String {
        match self {
            Instruction::Constant => {
-                let index = chunk.code[offset + 1].0 as usize;
-                let value = &chunk.constants[index];
+                let (index, _) = chunk.read(offset + 1);
+                let value = &chunk.constants[index as usize];

-                format!("{:04} CONSTANT {} {}", offset, index, value)
+                format!("{offset:04} CONSTANT {index} {value}")
            }
-            Instruction::Return => format!("{:04} RETURN", offset),
+            Instruction::Return => format!("{offset:04} RETURN"),

            // Unary
-            Instruction::Negate => format!("{:04} NEGATE", offset),
+            Instruction::Negate => format!("{offset:04} NEGATE"),

            // Binary
-            Instruction::Add => format!("{:04} ADD", offset),
-            Instruction::Subtract => format!("{:04} SUBTRACT", offset),
-            Instruction::Multiply => format!("{:04} MULTIPLY", offset),
-            Instruction::Divide => format!("{:04} DIVIDE", offset),
+            Instruction::Add => format!("{offset:04} ADD"),
+            Instruction::Subtract => format!("{offset:04} SUBTRACT"),
+            Instruction::Multiply => format!("{offset:04} MULTIPLY"),
+            Instruction::Divide => format!("{offset:04} DIVIDE"),
        }
    }
 }

-#[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)]
+#[derive(Clone, Eq, PartialEq, Serialize, Deserialize)]
 pub struct Chunk {
    code: Vec<(u8, Span)>,
    constants: Vec<Value>,
@ -197,6 +199,10 @@ impl Chunk {
        }
    }

+    pub fn with_data(code: Vec<(u8, Span)>, constants: Vec<Value>) -> Self {
+        Self { code, constants }
+    }
+
    pub fn len(&self) -> usize {
        self.code.len()
    }
@ -209,6 +215,10 @@ impl Chunk {
        self.code.capacity()
    }

+    pub fn read(&self, offset: usize) -> (u8, Span) {
+        self.code[offset]
+    }
+
    pub fn write(&mut self, instruction: u8, position: Span) {
        self.code.push((instruction, position));
    }
@ -230,14 +240,38 @@ impl Chunk {
        self.constants.clear();
    }

-    pub fn disassemble(&self, name: &str) {
-        println!("== {} ==", name);
+    pub fn disassemble(&self, name: &str) -> String {
+        let mut output = String::new();
+
+        output.push_str("== ");
+        output.push_str(name);
+        output.push_str(" ==\n");
+
+        let mut next_is_index = false;

        for (offset, (byte, position)) in self.code.iter().enumerate() {
-            let instruction = Instruction::from_byte(*byte).unwrap();
+            if next_is_index {
+                let index_display = format!("{position} {offset:04} INDEX {byte}\n");

-            println!("{} {}", position, instruction.disassemble(self, offset));
+                output.push_str(&index_display);
+
+                next_is_index = false;
+
+                continue;
+            }
+
+            let instruction = Instruction::from_byte(*byte).unwrap();
+            let instruction_display =
+                format!("{} {}\n", position, instruction.disassemble(self, offset));
+
+            output.push_str(&instruction_display);
+
+            if let Instruction::Constant = instruction {
+                next_is_index = true;
+            }
        }
+
+        output
    }
 }

@ -247,6 +281,18 @@ impl Default for Chunk {
    }
 }

+impl Display for Chunk {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "{}", self.disassemble("Chunk"))
+    }
+}
+
+impl Debug for Chunk {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "{self}")
+    }
+}
+
 #[derive(Debug, Clone, Copy, PartialEq)]
 pub enum ChunkError {
    Overflow,
--- a/dust-lang/src/dust_error.rs
+++ b/dust-lang/src/dust_error.rs
@ -0,0 +1,16 @@
+use crate::{bytecode::VmError, LexError, ParseError};
+
+pub enum DustError<'src> {
+    LexError {
+        error: LexError,
+        source: &'src str,
+    },
+    ParseError {
+        error: ParseError,
+        source: &'src str,
+    },
+    VmError {
+        error: VmError,
+        source: &'src str,
+    },
+}
--- a/dust-lang/src/lib.rs
+++ b/dust-lang/src/lib.rs
@ -17,6 +17,7 @@
 //! ```
 pub mod bytecode;
 pub mod constructor;
+pub mod dust_error;
 pub mod identifier;
 pub mod lexer;
 pub mod parser;
@ -26,6 +27,7 @@ pub mod value;

 pub use bytecode::{Chunk, ChunkError, Instruction, Vm};
 pub use constructor::{ConstructError, Constructor};
+pub use dust_error::DustError;
 pub use identifier::Identifier;
 pub use lexer::{LexError, Lexer};
 pub use parser::{ParseError, Parser};
--- a/dust-lang/src/parser.rs
+++ b/dust-lang/src/parser.rs
@ -1,76 +1,143 @@
-use std::num::ParseIntError;
+use std::{
+    fmt::{self, Display, Formatter},
+    num::ParseIntError,
+};

 use crate::{
    Chunk, ChunkError, Instruction, LexError, Lexer, Span, Token, TokenKind, TokenOwned, Value,
 };

+pub fn parse(source: &str) -> Result<Chunk, ParseError> {
+    let lexer = Lexer::new(source);
+    let mut parser = Parser::new(lexer);
+
+    while !parser.is_eof() {
+        parser.parse(Precedence::None)?;
+    }
+
+    Ok(parser.chunk)
+}
+
 #[derive(Debug)]
 pub struct Parser<'src> {
    lexer: Lexer<'src>,
-    current_token: Token<'src>,
+    chunk: Chunk,
+    current_token: Option<Token<'src>>,
    current_position: Span,
 }

 impl<'src> Parser<'src> {
-    pub fn new(mut lexer: Lexer<'src>) -> Self {
-        let (current_token, current_position) =
-            lexer.next_token().unwrap_or((Token::Eof, Span(0, 0)));
-
+    pub fn new(lexer: Lexer<'src>) -> Self {
        Parser {
            lexer,
-            current_token,
-            current_position,
+            chunk: Chunk::new(),
+            current_token: None,
+            current_position: Span(0, 0),
        }
    }

    fn is_eof(&self) -> bool {
-        matches!(self.current_token, Token::Eof)
+        matches!(self.current_token, Some(Token::Eof))
    }

    fn advance(&mut self) -> Result<(), ParseError> {
        let (token, position) = self.lexer.next_token()?;

-        self.current_token = token;
+        log::trace!("Advancing to token {token} at {position}");
+
+        self.current_token = Some(token);
        self.current_position = position;

        Ok(())
    }

+    fn current_token_owned(&self) -> TokenOwned {
+        self.current_token
+            .as_ref()
+            .map_or(TokenOwned::Eof, |token| token.to_owned())
+    }
+
+    fn current_token_kind(&self) -> TokenKind {
+        self.current_token
+            .as_ref()
+            .map_or(TokenKind::Eof, |token| token.kind())
+    }
+
    fn consume(&mut self, expected: TokenKind) -> Result<(), ParseError> {
-        if self.current_token.kind() == expected {
+        if self.current_token_kind() == expected {
            self.advance()
        } else {
            Err(ParseError::ExpectedToken {
                expected,
-                found: self.current_token.to_owned(),
+                found: self.current_token_owned(),
                position: self.current_position,
            })
        }
    }

-    fn emit_instruction(&mut self, instruction: Instruction, chunk: &mut Chunk) {
-        chunk.write(instruction as u8, self.current_position);
+    fn emit_byte(&mut self, byte: u8) {
+        self.chunk.write(byte, self.current_position);
    }

-    fn parse_prefix(&mut self, chunk: &mut Chunk) -> Result<(), ParseError> {
+    fn emit_constant(&mut self, value: Value) -> Result<(), ParseError> {
+        let constant_index = self.chunk.push_constant(value)?;
+
+        self.emit_byte(Instruction::Constant as u8);
+        self.emit_byte(constant_index);
+
        Ok(())
    }

-    fn parse_primary(&mut self, chunk: &mut Chunk) -> Result<(), ParseError> {
-        match self.current_token {
-            Token::Integer(text) => {
-                let integer = text.parse::<i64>()?;
-                let value = Value::integer(integer);
-                let constant_index = chunk.push_constant(value)?;
+    fn parse_integer(&mut self) -> Result<(), ParseError> {
+        if let Some(Token::Integer(text)) = self.current_token {
+            let integer = text.parse::<i64>().unwrap();
+            let value = Value::integer(integer);

-                chunk.write(Instruction::Constant as u8, self.current_position);
-                chunk.write(constant_index, self.current_position);
-            }
-            Token::LeftParenthesis => {}
+            self.emit_constant(value)?;
+        }
+
+        Ok(())
+    }
+
+    fn parse_grouped(&mut self) -> Result<(), ParseError> {
+        self.parse_expression()?;
+
+        self.consume(TokenKind::RightParenthesis)?;
+
+        Ok(())
+    }
+
+    fn parse_unary(&mut self) -> Result<(), ParseError> {
+        if let Some(Token::Minus) = self.current_token {
+            self.advance()?;
+            self.parse_expression()?;
+            self.emit_byte(Instruction::Negate as u8);
+        }
+
+        Ok(())
+    }
+
+    fn parse_binary(&mut self) -> Result<(), ParseError> {
+        let operator_position = self.current_position;
+        let operator = self.current_token_kind();
+        let rule = ParseRule::from(&operator);
+
+        self.parse(rule.precedence.increment())?;
+
+        match operator {
+            TokenKind::Plus => self.emit_byte(Instruction::Add as u8),
+            TokenKind::Minus => self.emit_byte(Instruction::Subtract as u8),
+            TokenKind::Star => self.emit_byte(Instruction::Multiply as u8),
+            TokenKind::Slash => self.emit_byte(Instruction::Divide as u8),
            _ => {
                return Err(ParseError::ExpectedTokenMultiple {
-                    expected: vec![TokenKind::Integer],
-                    found: self.current_token.to_owned(),
+                    expected: vec![
+                        TokenKind::Plus,
+                        TokenKind::Minus,
+                        TokenKind::Star,
+                        TokenKind::Slash,
+                    ],
+                    found: self.current_token_owned(),
                    position: self.current_position,
                })
            }
@ -79,13 +146,196 @@ impl<'src> Parser<'src> {
        Ok(())
    }

-    pub fn parse_postfix(&mut self, left: Value, chunk: &mut Chunk) -> Result<(), ParseError> {
+    fn parse_expression(&mut self) -> Result<(), ParseError> {
+        self.parse(Precedence::Assignment)
+    }
+
+    // Pratt parsing functions
+
+    fn parse(&mut self, precedence: Precedence) -> Result<(), ParseError> {
+        log::trace!("Parsing with precedence {precedence}");
+
+        self.advance()?;
+
+        let prefix_rule = ParseRule::from(&self.current_token_kind()).prefix;
+
+        if let Some(prefix) = prefix_rule {
+            log::trace!("Parsing {} as prefix", &self.current_token_owned());
+
+            prefix(self)?;
+        } else {
+            return Err(ParseError::ExpectedPrefix {
+                found: self.current_token_owned(),
+                position: self.current_position,
+            });
+        }
+
+        while precedence <= ParseRule::from(&self.current_token_kind()).precedence {
+            self.advance()?;
+
+            let infix_rule = ParseRule::from(&self.current_token_kind()).infix;
+
+            if let Some(infix) = infix_rule {
+                log::trace!("Parsing {} as infix", self.current_token_owned());
+
+                infix(self)?;
+            } else {
+                break;
+            }
+        }
+
        Ok(())
    }
 }

+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub enum Precedence {
+    None = 0,
+    Assignment = 1,
+    Conditional = 2,
+    LogicalOr = 3,
+    LogicalAnd = 4,
+    Equality = 5,
+    Comparison = 6,
+    Term = 7,
+    Factor = 8,
+    Unary = 9,
+    Call = 10,
+    Primary = 11,
+}
+
+impl Precedence {
+    fn from_byte(byte: u8) -> Self {
+        match byte {
+            0 => Self::None,
+            1 => Self::Assignment,
+            2 => Self::Conditional,
+            3 => Self::LogicalOr,
+            4 => Self::LogicalAnd,
+            5 => Self::Equality,
+            6 => Self::Comparison,
+            7 => Self::Term,
+            8 => Self::Factor,
+            9 => Self::Unary,
+            10 => Self::Call,
+            _ => Self::Primary,
+        }
+    }
+
+    fn increment(&self) -> Self {
+        Self::from_byte(*self as u8 + 1)
+    }
+
+    fn decrement(&self) -> Self {
+        Self::from_byte(*self as u8 - 1)
+    }
+}
+
+impl Display for Precedence {
+    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
+        write!(f, "{:?}", self)
+    }
+}
+
+type ParserFunction<'a> = fn(&'_ mut Parser<'a>) -> Result<(), ParseError>;
+
+#[derive(Debug, Clone, Copy)]
+pub struct ParseRule<'a> {
+    pub prefix: Option<ParserFunction<'a>>,
+    pub infix: Option<ParserFunction<'a>>,
+    pub precedence: Precedence,
+}
+
+impl From<&TokenKind> for ParseRule<'_> {
+    fn from(token_kind: &TokenKind) -> Self {
+        match token_kind {
+            TokenKind::Eof => ParseRule {
+                prefix: None,
+                infix: None,
+                precedence: Precedence::None,
+            },
+            TokenKind::Identifier => todo!(),
+            TokenKind::Boolean => todo!(),
+            TokenKind::Character => todo!(),
+            TokenKind::Float => todo!(),
+            TokenKind::Integer => ParseRule {
+                prefix: Some(Parser::parse_integer),
+                infix: None,
+                precedence: Precedence::None,
+            },
+            TokenKind::String => todo!(),
+            TokenKind::Async => todo!(),
+            TokenKind::Bool => todo!(),
+            TokenKind::Break => todo!(),
+            TokenKind::Else => todo!(),
+            TokenKind::FloatKeyword => todo!(),
+            TokenKind::If => todo!(),
+            TokenKind::Int => todo!(),
+            TokenKind::Let => todo!(),
+            TokenKind::Loop => todo!(),
+            TokenKind::Map => todo!(),
+            TokenKind::Str => todo!(),
+            TokenKind::While => todo!(),
+            TokenKind::BangEqual => todo!(),
+            TokenKind::Bang => todo!(),
+            TokenKind::Colon => todo!(),
+            TokenKind::Comma => todo!(),
+            TokenKind::Dot => todo!(),
+            TokenKind::DoubleAmpersand => todo!(),
+            TokenKind::DoubleDot => todo!(),
+            TokenKind::DoubleEqual => todo!(),
+            TokenKind::DoublePipe => todo!(),
+            TokenKind::Equal => todo!(),
+            TokenKind::Greater => todo!(),
+            TokenKind::GreaterOrEqual => todo!(),
+            TokenKind::LeftCurlyBrace => todo!(),
+            TokenKind::LeftParenthesis => ParseRule {
+                prefix: Some(Parser::parse_grouped),
+                infix: None,
+                precedence: Precedence::None,
+            },
+            TokenKind::LeftSquareBrace => todo!(),
+            TokenKind::Less => todo!(),
+            TokenKind::LessOrEqual => todo!(),
+            TokenKind::Minus => ParseRule {
+                prefix: Some(Parser::parse_unary),
+                infix: Some(Parser::parse_binary),
+                precedence: Precedence::Term,
+            },
+            TokenKind::MinusEqual => todo!(),
+            TokenKind::Mut => todo!(),
+            TokenKind::Percent => todo!(),
+            TokenKind::Plus => ParseRule {
+                prefix: None,
+                infix: Some(Parser::parse_binary),
+                precedence: Precedence::Term,
+            },
+            TokenKind::PlusEqual => todo!(),
+            TokenKind::RightCurlyBrace => todo!(),
+            TokenKind::RightParenthesis => todo!(),
+            TokenKind::RightSquareBrace => todo!(),
+            TokenKind::Semicolon => todo!(),
+            TokenKind::Star => ParseRule {
+                prefix: None,
+                infix: Some(Parser::parse_binary),
+                precedence: Precedence::Factor,
+            },
+            TokenKind::Struct => todo!(),
+            TokenKind::Slash => ParseRule {
+                prefix: None,
+                infix: Some(Parser::parse_binary),
+                precedence: Precedence::Factor,
+            },
+        }
+    }
+}
+
 #[derive(Debug, PartialEq)]
 pub enum ParseError {
+    ExpectedPrefix {
+        found: TokenOwned,
+        position: Span,
+    },
    ExpectedToken {
        expected: TokenKind,
        found: TokenOwned,
@ -120,3 +370,44 @@ impl From<ChunkError> for ParseError {
        Self::Chunk(error)
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn parse_integer() {
+        let source = "42";
+        let test_chunk = parse(source);
+
+        assert_eq!(
+            test_chunk,
+            Ok(Chunk::with_data(
+                vec![(Instruction::Constant as u8, Span(0, 2)), (0, Span(0, 2))],
+                vec![Value::integer(42)]
+            ))
+        );
+    }
+
+    #[test]
+    fn parse_addition() {
+        env_logger::builder().is_test(true).try_init().unwrap();
+
+        let source = "42 + 42";
+        let test_chunk = parse(source);
+
+        assert_eq!(
+            test_chunk,
+            Ok(Chunk::with_data(
+                vec![
+                    (Instruction::Constant as u8, Span(0, 2)),
+                    (0, Span(0, 2)),
+                    (Instruction::Constant as u8, Span(5, 7)),
+                    (1, Span(5, 7)),
+                    (Instruction::Add as u8, Span(3, 4)),
+                ],
+                vec![Value::integer(42), Value::integer(42)]
+            ))
+        );
+    }
+}
--- a/dust-lang/src/token.rs
+++ b/dust-lang/src/token.rs
@ -227,58 +227,6 @@ impl<'src> Token<'src> {
            Token::While => TokenKind::While,
        }
    }
-
-    pub fn is_eof(&self) -> bool {
-        matches!(self, Token::Eof)
-    }
-
-    pub fn precedence(&self) -> u8 {
-        match self {
-            Token::Dot => 9,
-            Token::LeftParenthesis | Token::LeftSquareBrace => 8,
-            Token::Star | Token::Slash | Token::Percent => 7,
-            Token::Minus | Token::Plus => 6,
-            Token::DoubleEqual
-            | Token::Less
-            | Token::LessEqual
-            | Token::Greater
-            | Token::GreaterEqual => 5,
-            Token::DoubleAmpersand => 4,
-            Token::DoublePipe => 3,
-            Token::DoubleDot => 2,
-            Token::Equal | Token::MinusEqual | Token::PlusEqual => 1,
-            _ => 0,
-        }
-    }
-
-    pub fn is_left_associative(&self) -> bool {
-        matches!(
-            self,
-            Token::Dot
-                | Token::DoubleAmpersand
-                | Token::DoublePipe
-                | Token::Plus
-                | Token::Minus
-                | Token::Star
-                | Token::Slash
-                | Token::Percent
-        )
-    }
-
-    pub fn is_right_associative(&self) -> bool {
-        matches!(self, Token::Equal | Token::MinusEqual | Token::PlusEqual)
-    }
-
-    pub fn is_prefix(&self) -> bool {
-        matches!(self, Token::Bang | Token::Minus | Token::Star)
-    }
-
-    pub fn is_postfix(&self) -> bool {
-        matches!(
-            self,
-            Token::Dot | Token::LeftCurlyBrace | Token::LeftParenthesis | Token::LeftSquareBrace
-        )
-    }
 }

 impl<'src> Display for Token<'src> {
@ -572,80 +520,3 @@ impl Display for TokenKind {
        }
    }
 }
-
-#[cfg(test)]
-pub(crate) mod tests {
-    use super::*;
-
-    pub fn all_tokens<'src>() -> [Token<'src>; 47] {
-        [
-            Token::Async,
-            Token::Bang,
-            Token::BangEqual,
-            Token::Bool,
-            Token::Break,
-            Token::Colon,
-            Token::Comma,
-            Token::Dot,
-            Token::DoubleAmpersand,
-            Token::DoubleDot,
-            Token::DoubleEqual,
-            Token::DoublePipe,
-            Token::Else,
-            Token::Eof,
-            Token::Equal,
-            Token::FloatKeyword,
-            Token::Greater,
-            Token::GreaterEqual,
-            Token::If,
-            Token::Int,
-            Token::LeftCurlyBrace,
-            Token::LeftParenthesis,
-            Token::LeftSquareBrace,
-            Token::Let,
-            Token::Less,
-            Token::LessEqual,
-            Token::Map,
-            Token::Minus,
-            Token::MinusEqual,
-            Token::Mut,
-            Token::Percent,
-            Token::Plus,
-            Token::PlusEqual,
-            Token::RightCurlyBrace,
-            Token::RightParenthesis,
-            Token::RightSquareBrace,
-            Token::Semicolon,
-            Token::Star,
-            Token::Str,
-            Token::Slash,
-            Token::Boolean("true"),
-            Token::Float("0.0"),
-            Token::Integer("0"),
-            Token::String("string"),
-            Token::Identifier("foobar"),
-            Token::Struct,
-            Token::While,
-        ]
-    }
-
-    #[test]
-    fn token_displays() {
-        for token in all_tokens().iter() {
-            let display = token.to_string();
-
-            assert_eq!(display, token.to_owned().to_string());
-
-            if let Token::Boolean(_)
-            | Token::Float(_)
-            | Token::Identifier(_)
-            | Token::Integer(_)
-            | Token::String(_) = token
-            {
-                continue;
-            } else {
-                assert_eq!(display, token.kind().to_string());
-            }
-        }
-    }
-}