1
0
This commit is contained in:
Jeff 2024-11-05 19:38:26 -05:00
parent dd13d2efee
commit 3330939128
14 changed files with 139 additions and 94 deletions

View File

@ -1,3 +1,14 @@
//! In-memory representation of a Dust program or function.
//!
//! A chunk consists of a sequence of instructions and their positions, a list of constants, and a
//! list of locals that can be executed by the Dust virtual machine. Chunks have a name when they
//! belong to a named function.
//!
//! # Disassembly
//!
//! Chunks can be disassembled into a human-readable format using the `disassemble` method. The
//! output is designed to be displayed in a terminal and is styled for readability.
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
env::current_exe, env::current_exe,
@ -9,6 +20,9 @@ use serde::{Deserialize, Serialize};
use crate::{Instruction, Span, Type, Value}; use crate::{Instruction, Span, Type, Value};
/// In-memory representation of a Dust program or function.
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Clone, PartialOrd, Ord, Serialize, Deserialize)] #[derive(Clone, PartialOrd, Ord, Serialize, Deserialize)]
pub struct Chunk { pub struct Chunk {
name: Option<String>, name: Option<String>,
@ -125,7 +139,7 @@ impl Chunk {
pub fn begin_scope(&mut self) { pub fn begin_scope(&mut self) {
self.scope_index += 1; self.scope_index += 1;
self.current_scope.width = self.scope_index; self.current_scope.index = self.scope_index;
self.current_scope.depth += 1; self.current_scope.depth += 1;
} }
@ -133,9 +147,9 @@ impl Chunk {
self.current_scope.depth -= 1; self.current_scope.depth -= 1;
if self.current_scope.depth == 0 { if self.current_scope.depth == 0 {
self.current_scope.width = 0; self.current_scope.index = 0;
} else { } else {
self.current_scope.width -= 1; self.current_scope.index -= 1;
} }
} }
@ -206,26 +220,29 @@ pub struct Scope {
/// The level of block nesting. /// The level of block nesting.
pub depth: u8, pub depth: u8,
/// The nth scope in the chunk. /// The nth scope in the chunk.
pub width: u8, pub index: u8,
} }
impl Scope { impl Scope {
pub fn new(depth: u8, width: u8) -> Self { pub fn new(index: u8, width: u8) -> Self {
Self { depth, width } Self {
depth: index,
index: width,
}
} }
pub fn contains(&self, other: &Self) -> bool { pub fn contains(&self, other: &Self) -> bool {
match self.depth.cmp(&other.depth) { match self.depth.cmp(&other.depth) {
Ordering::Less => false, Ordering::Less => false,
Ordering::Greater => self.width >= other.width, Ordering::Greater => self.index >= other.index,
Ordering::Equal => self.width == other.width, Ordering::Equal => self.index == other.index,
} }
} }
} }
impl Display for Scope { impl Display for Scope {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "({}, {})", self.depth, self.width) write!(f, "({}, {})", self.depth, self.index)
} }
} }

View File

@ -1,13 +1,13 @@
//! Top-level Dust errors.use annotate_snippets::{Level, Renderer, Snippet};
use annotate_snippets::{Level, Renderer, Snippet}; use annotate_snippets::{Level, Renderer, Snippet};
use crate::{vm::VmError, LexError, ParseError, Span}; use crate::{vm::VmError, ParseError, Span};
/// A top-level error that can occur during the execution of Dust code.
///
/// This error can display nicely formatted messages with source code annotations.
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
pub enum DustError<'src> { pub enum DustError<'src> {
Lex {
error: LexError,
source: &'src str,
},
Parse { Parse {
error: ParseError, error: ParseError,
source: &'src str, source: &'src str,
@ -52,7 +52,6 @@ impl<'src> DustError<'src> {
report.push_str(&renderer.render(message).to_string()); report.push_str(&renderer.render(message).to_string());
} }
_ => todo!(),
} }
report report

View File

@ -1,8 +1,9 @@
//! Formatting tools
use std::mem::replace; use std::mem::replace;
use colored::{ColoredString, Colorize, CustomColor}; use colored::{ColoredString, Colorize, CustomColor};
use crate::{DustError, LexError, Lexer, Token}; use crate::{DustError, LexError, Lexer, ParseError, Token};
pub fn format(source: &str, line_numbers: bool, colored: bool) -> Result<String, DustError> { pub fn format(source: &str, line_numbers: bool, colored: bool) -> Result<String, DustError> {
let lexer = Lexer::new(source); let lexer = Lexer::new(source);
@ -10,7 +11,10 @@ pub fn format(source: &str, line_numbers: bool, colored: bool) -> Result<String,
.line_numbers(line_numbers) .line_numbers(line_numbers)
.colored(colored) .colored(colored)
.format() .format()
.map_err(|error| DustError::Lex { error, source })?; .map_err(|error| DustError::Parse {
error: ParseError::Lex(error),
source,
})?;
Ok(formatted) Ok(formatted)
} }

View File

@ -1,7 +1,24 @@
//! An operation and its arguments for the Dust virtual machine.
//!
//! Each instruction is a 32-bit unsigned integer that is divided into five fields:
//! - Bits 0-6: The operation code.
//! - Bit 7: A flag indicating whether the B argument is a constant.
//! - Bit 8: A flag indicating whether the C argument is a constant.
//! - Bits 9-16: The A argument.
//! - Bits 17-24: The B argument.
//! - Bits 25-32: The C argument.
//!
//! Be careful when working with instructions directly. When modifying an instruction, be sure to
//! account for the fact that setting the A, B, or C arguments to 0 will have no effect. It is
//! usually best to remove instructions and insert new ones in their place instead of mutating them.
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{Chunk, NativeFunction, Operation}; use crate::{Chunk, NativeFunction, Operation};
/// An operation and its arguments for the Dust virtual machine.
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)] #[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct Instruction(u32); pub struct Instruction(u32);

View File

@ -1,4 +1,4 @@
//! Lexing tools. //! Lexing tools and errors
//! //!
//! This module provides two lexing options: //! This module provides two lexing options:
//! - [`lex`], which lexes the entire input and returns a vector of tokens and their positions //! - [`lex`], which lexes the entire input and returns a vector of tokens and their positions
@ -8,9 +8,9 @@ use std::fmt::{self, Display, Formatter};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use crate::{dust_error::AnnotatedError, DustError, Span, Token}; use crate::{dust_error::AnnotatedError, DustError, ParseError, Span, Token};
/// Lexes the input and return a vector of tokens and their positions. /// Lexes the input and returns a vector of tokens and their positions.
/// ///
/// # Examples /// # Examples
/// ``` /// ```
@ -37,9 +37,10 @@ pub fn lex<'tokens, 'src: 'tokens>(
let mut tokens = Vec::new(); let mut tokens = Vec::new();
loop { loop {
let (token, span) = lexer let (token, span) = lexer.next_token().map_err(|error| DustError::Parse {
.next_token() error: ParseError::Lex(error),
.map_err(|error| DustError::Lex { error, source })?; source,
})?;
let is_eof = matches!(token, Token::Eof); let is_eof = matches!(token, Token::Eof);
tokens.push((token, span)); tokens.push((token, span));
@ -54,36 +55,7 @@ pub fn lex<'tokens, 'src: 'tokens>(
/// Low-level tool for lexing a single token at a time. /// Low-level tool for lexing a single token at a time.
/// ///
/// # Examples /// See the [`lex`] function for an example of how to create and use a Lexer.
/// ```
/// # use dust_lang::*;
/// let input = "x = 1 + 2";
/// let mut lexer = Lexer::new(input);
/// let mut tokens = Vec::new();
///
/// loop {
/// let (token, span) = lexer.next_token().unwrap();
/// let is_eof = matches!(token, Token::Eof);
///
/// tokens.push((token, span));
///
/// if is_eof {
/// break;
/// }
/// }
///
/// assert_eq!(
/// tokens,
/// [
/// (Token::Identifier("x"), Span(0, 1)),
/// (Token::Equal, Span(2, 3)),
/// (Token::Integer("1"), Span(4, 5)),
/// (Token::Plus, Span(6, 7)),
/// (Token::Integer("2"), Span(8, 9)),
/// (Token::Eof, Span(9, 9)),
/// ]
/// )
/// ```
#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] #[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct Lexer<'src> { pub struct Lexer<'src> {
source: &'src str, source: &'src str,

View File

@ -1,28 +1,30 @@
mod chunk; //! The Dust programming language library.
mod dust_error;
mod formatter;
mod instruction;
mod lexer;
mod native_function;
mod operation;
mod parser;
mod token;
mod r#type;
mod value;
mod vm;
pub use chunk::{Chunk, ChunkDisassembler, Local, Scope}; pub mod chunk;
pub use dust_error::{AnnotatedError, DustError}; pub mod dust_error;
pub use formatter::{format, Formatter}; pub mod formatter;
pub use instruction::Instruction; pub mod instruction;
pub use lexer::{lex, LexError, Lexer}; pub mod lexer;
pub use native_function::{NativeFunction, NativeFunctionError}; pub mod native_function;
pub use operation::Operation; pub mod operation;
pub use parser::{parse, ParseError}; pub mod parser;
pub use r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict}; pub mod token;
pub use token::{Token, TokenKind, TokenOwned}; pub mod r#type;
pub use value::{Function, Primitive, Value, ValueError}; pub mod value;
pub use vm::{run, Vm, VmError}; pub mod vm;
pub use crate::chunk::{Chunk, ChunkDisassembler, Local, Scope};
pub use crate::dust_error::{AnnotatedError, DustError};
pub use crate::formatter::{format, Formatter};
pub use crate::instruction::Instruction;
pub use crate::lexer::{lex, LexError, Lexer};
pub use crate::native_function::{NativeFunction, NativeFunctionError};
pub use crate::operation::Operation;
pub use crate::parser::{parse, ParseError};
pub use crate::r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict};
pub use crate::token::{Token, TokenKind, TokenOwned};
pub use crate::value::{Function, Primitive, Value, ValueError};
pub use crate::vm::{run, Vm, VmError};
use std::fmt::Display; use std::fmt::Display;

View File

@ -1,3 +1,7 @@
//! Built-in functions that implement extended functionality.
//!
//! Native functions are used either to implement features that are not possible to implement in
//! Dust itself or that are more efficient to implement in Rust.
use std::{ use std::{
fmt::{self, Display, Formatter}, fmt::{self, Display, Formatter},
io::{self, stdin, stdout, Write}, io::{self, stdin, stdout, Write},
@ -10,6 +14,9 @@ use crate::{AnnotatedError, FunctionType, Instruction, Primitive, Span, Type, Va
macro_rules! impl_from_str_for_native_function { macro_rules! impl_from_str_for_native_function {
($(($name:ident, $byte:literal, $str:expr, $type:expr)),*) => { ($(($name:ident, $byte:literal, $str:expr, $type:expr)),*) => {
/// A dust-native function.
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)] #[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum NativeFunction { pub enum NativeFunction {
$( $(

View File

@ -1,3 +1,9 @@
//! Part of an [Instruction][crate::Instruction], which can be executed by the Dust virtual machine.
//!
//! !!! Warning !!!
//! The byte values of the operations matter. The seventh and eighth bits must be zero so that the
//! [Instruction][crate::Instruction] type can use them as flags.
use std::fmt::{self, Display, Formatter}; use std::fmt::{self, Display, Formatter};
const MOVE: u8 = 0b0000_0000; const MOVE: u8 = 0b0000_0000;
@ -33,6 +39,9 @@ const CALL: u8 = 0b0001_0110;
const CALL_NATIVE: u8 = 0b0001_0111; const CALL_NATIVE: u8 = 0b0001_0111;
const RETURN: u8 = 0b0001_1000; const RETURN: u8 = 0b0001_1000;
/// Part of an [Instruction][crate::Instruction], which can be executed by the Dust virtual machine.)
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Clone, Copy, Debug, PartialEq)] #[derive(Clone, Copy, Debug, PartialEq)]
pub enum Operation { pub enum Operation {
// Stack manipulation // Stack manipulation

View File

@ -1,3 +1,8 @@
//! Parsing tools and errors
//!
//! This module provides two lexing options:
//! - [`parse`], which parsers the entire input and returns a chunk
//! - [`Parser`], which parsers the input a token at a time while assembling a chunk
use std::{ use std::{
fmt::{self, Display, Formatter}, fmt::{self, Display, Formatter},
mem::replace, mem::replace,
@ -13,6 +18,17 @@ use crate::{
NativeFunction, Operation, Scope, Span, Token, TokenKind, TokenOwned, Type, Value, NativeFunction, Operation, Scope, Span, Token, TokenKind, TokenOwned, Type, Value,
}; };
/// Parses the input and returns a chunk.
///
/// # Example
///
/// ```
/// # use dust_lang::parse;
/// let source = "40 + 2 == 42";
/// let chunk = parse(source).unwrap();
///
/// assert_eq!(chunk.len(), 6);
/// ```
pub fn parse(source: &str) -> Result<Chunk, DustError> { pub fn parse(source: &str) -> Result<Chunk, DustError> {
let lexer = Lexer::new(source); let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer).map_err(|error| DustError::Parse { error, source })?; let mut parser = Parser::new(lexer).map_err(|error| DustError::Parse { error, source })?;
@ -24,6 +40,9 @@ pub fn parse(source: &str) -> Result<Chunk, DustError> {
Ok(parser.finish()) Ok(parser.finish())
} }
/// Low-level tool for parsing the input a token at a time while assembling a chunk.
///
/// See the [`parse`] function an example of how to create and use a Parser.
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize)] #[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize)]
struct Parser<'src> { struct Parser<'src> {
lexer: Lexer<'src>, lexer: Lexer<'src>,

View File

@ -5,6 +5,9 @@ use serde::{Deserialize, Serialize};
macro_rules! define_tokens { macro_rules! define_tokens {
($($variant:ident $(($data_type:ty))?),+ $(,)?) => { ($($variant:ident $(($data_type:ty))?),+ $(,)?) => {
/// Source token.
///
/// This is a borrowed type, i.e. some variants contain references to the source text.
#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Default, Serialize, Deserialize)] #[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Default, Serialize, Deserialize)]
pub enum Token<'src> { pub enum Token<'src> {
#[default] #[default]
@ -15,6 +18,9 @@ macro_rules! define_tokens {
} }
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
/// Data-less representation of a source token.
///
/// If a [Token] borrows from the source text, its TokenKind omits the data.
pub enum TokenKind { pub enum TokenKind {
Eof, Eof,
$( $(
@ -439,9 +445,9 @@ impl<'src> Display for Token<'src> {
} }
} }
/// Owned version of `Token`, which owns all the strings. /// Owned representation of a source token.
/// ///
/// This is used for errors. /// If a [Token] borrows from the source text, its TokenOwned omits the data.
#[derive(Debug, PartialEq, Clone)] #[derive(Debug, PartialEq, Clone)]
pub enum TokenOwned { pub enum TokenOwned {
Eof, Eof,

View File

@ -1,14 +1,4 @@
//! Value types. //! Value types and conflict handling.
//!
//! Most types are concrete and specific, the exceptions are the Generic and Any types.
//!
//! Generic types are temporary placeholders that describe a type that will be defined later. The
//! interpreter should use the analysis phase to enforce that all Generic types have a concrete
//! type assigned to them before the program is run.
//!
//! The Any type is used in cases where a value's type does not matter. For example, the standard
//! library's "length" function does not care about the type of item in the list, only the list
//! itself. So the input is defined as `[any]`, i.e. `Type::ListOf(Box::new(Type::Any))`.
use std::{ use std::{
cmp::Ordering, cmp::Ordering,
collections::HashMap, collections::HashMap,
@ -18,8 +8,6 @@ use std::{
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
/// Description of a kind of value. /// Description of a kind of value.
///
/// See the [module documentation](index.html) for more information.
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub enum Type { pub enum Type {
Any, Any,

View File

@ -1,3 +1,4 @@
//! Virtual machine and errors
use std::{cmp::Ordering, mem::replace}; use std::{cmp::Ordering, mem::replace};
use crate::{ use crate::{
@ -13,6 +14,9 @@ pub fn run(source: &str) -> Result<Option<Value>, DustError> {
.map_err(|error| DustError::Runtime { error, source }) .map_err(|error| DustError::Runtime { error, source })
} }
/// Dust virtual machine.
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
pub struct Vm { pub struct Vm {
ip: usize, ip: usize,

View File

@ -23,7 +23,7 @@ fn equality_assignment_long() {
(Instruction::r#return(true), Span(44, 44)), (Instruction::r#return(true), Span(44, 44)),
], ],
vec![Value::integer(4), Value::string("a")], vec![Value::integer(4), Value::string("a")],
vec![Local::new(1, None, false, Scope { depth: 0, width: 0 }, 0)] vec![Local::new(1, None, false, Scope { depth: 0, index: 0 }, 0)]
)), )),
); );

View File

@ -7,7 +7,7 @@ use log::{Level, LevelFilter};
#[derive(Parser)] #[derive(Parser)]
struct Cli { struct Cli {
/// Source code send via command line /// Source code sent via command line
#[arg(short, long)] #[arg(short, long)]
command: Option<String>, command: Option<String>,
@ -31,6 +31,7 @@ struct Cli {
#[arg(long)] #[arg(long)]
style_disassembly: Option<bool>, style_disassembly: Option<bool>,
/// Log level
#[arg(short, long)] #[arg(short, long)]
log: Option<LevelFilter>, log: Option<LevelFilter>,