1
0
This commit is contained in:
Jeff 2024-11-05 19:38:26 -05:00
parent dd13d2efee
commit 3330939128
14 changed files with 139 additions and 94 deletions

View File

@ -1,3 +1,14 @@
//! In-memory representation of a Dust program or function.
//!
//! A chunk consists of a sequence of instructions and their positions, a list of constants, and a
//! list of locals that can be executed by the Dust virtual machine. Chunks have a name when they
//! belong to a named function.
//!
//! # Disassembly
//!
//! Chunks can be disassembled into a human-readable format using the `disassemble` method. The
//! output is designed to be displayed in a terminal and is styled for readability.
use std::{
cmp::Ordering,
env::current_exe,
@ -9,6 +20,9 @@ use serde::{Deserialize, Serialize};
use crate::{Instruction, Span, Type, Value};
/// In-memory representation of a Dust program or function.
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Clone, PartialOrd, Ord, Serialize, Deserialize)]
pub struct Chunk {
name: Option<String>,
@ -125,7 +139,7 @@ impl Chunk {
pub fn begin_scope(&mut self) {
self.scope_index += 1;
self.current_scope.width = self.scope_index;
self.current_scope.index = self.scope_index;
self.current_scope.depth += 1;
}
@ -133,9 +147,9 @@ impl Chunk {
self.current_scope.depth -= 1;
if self.current_scope.depth == 0 {
self.current_scope.width = 0;
self.current_scope.index = 0;
} else {
self.current_scope.width -= 1;
self.current_scope.index -= 1;
}
}
@ -206,26 +220,29 @@ pub struct Scope {
/// The level of block nesting.
pub depth: u8,
/// The nth scope in the chunk.
pub width: u8,
pub index: u8,
}
impl Scope {
pub fn new(depth: u8, width: u8) -> Self {
Self { depth, width }
pub fn new(index: u8, width: u8) -> Self {
Self {
depth: index,
index: width,
}
}
pub fn contains(&self, other: &Self) -> bool {
match self.depth.cmp(&other.depth) {
Ordering::Less => false,
Ordering::Greater => self.width >= other.width,
Ordering::Equal => self.width == other.width,
Ordering::Greater => self.index >= other.index,
Ordering::Equal => self.index == other.index,
}
}
}
impl Display for Scope {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "({}, {})", self.depth, self.width)
write!(f, "({}, {})", self.depth, self.index)
}
}

View File

@ -1,13 +1,13 @@
//! Top-level Dust errors.use annotate_snippets::{Level, Renderer, Snippet};
use annotate_snippets::{Level, Renderer, Snippet};
use crate::{vm::VmError, LexError, ParseError, Span};
use crate::{vm::VmError, ParseError, Span};
/// A top-level error that can occur during the execution of Dust code.
///
/// This error can display nicely formatted messages with source code annotations.
#[derive(Debug, PartialEq)]
pub enum DustError<'src> {
Lex {
error: LexError,
source: &'src str,
},
Parse {
error: ParseError,
source: &'src str,
@ -52,7 +52,6 @@ impl<'src> DustError<'src> {
report.push_str(&renderer.render(message).to_string());
}
_ => todo!(),
}
report

View File

@ -1,8 +1,9 @@
//! Formatting tools
use std::mem::replace;
use colored::{ColoredString, Colorize, CustomColor};
use crate::{DustError, LexError, Lexer, Token};
use crate::{DustError, LexError, Lexer, ParseError, Token};
pub fn format(source: &str, line_numbers: bool, colored: bool) -> Result<String, DustError> {
let lexer = Lexer::new(source);
@ -10,7 +11,10 @@ pub fn format(source: &str, line_numbers: bool, colored: bool) -> Result<String,
.line_numbers(line_numbers)
.colored(colored)
.format()
.map_err(|error| DustError::Lex { error, source })?;
.map_err(|error| DustError::Parse {
error: ParseError::Lex(error),
source,
})?;
Ok(formatted)
}

View File

@ -1,7 +1,24 @@
//! An operation and its arguments for the Dust virtual machine.
//!
//! Each instruction is a 32-bit unsigned integer that is divided into five fields:
//! - Bits 0-6: The operation code.
//! - Bit 7: A flag indicating whether the B argument is a constant.
//! - Bit 8: A flag indicating whether the C argument is a constant.
//! - Bits 9-16: The A argument.
//! - Bits 17-24: The B argument.
//! - Bits 25-32: The C argument.
//!
//! Be careful when working with instructions directly. When modifying an instruction, be sure to
//! account for the fact that setting the A, B, or C arguments to 0 will have no effect. It is
//! usually best to remove instructions and insert new ones in their place instead of mutating them.
use serde::{Deserialize, Serialize};
use crate::{Chunk, NativeFunction, Operation};
/// An operation and its arguments for the Dust virtual machine.
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct Instruction(u32);

View File

@ -1,4 +1,4 @@
//! Lexing tools.
//! Lexing tools and errors
//!
//! This module provides two lexing options:
//! - [`lex`], which lexes the entire input and returns a vector of tokens and their positions
@ -8,9 +8,9 @@ use std::fmt::{self, Display, Formatter};
use serde::{Deserialize, Serialize};
use crate::{dust_error::AnnotatedError, DustError, Span, Token};
use crate::{dust_error::AnnotatedError, DustError, ParseError, Span, Token};
/// Lexes the input and return a vector of tokens and their positions.
/// Lexes the input and returns a vector of tokens and their positions.
///
/// # Examples
/// ```
@ -37,9 +37,10 @@ pub fn lex<'tokens, 'src: 'tokens>(
let mut tokens = Vec::new();
loop {
let (token, span) = lexer
.next_token()
.map_err(|error| DustError::Lex { error, source })?;
let (token, span) = lexer.next_token().map_err(|error| DustError::Parse {
error: ParseError::Lex(error),
source,
})?;
let is_eof = matches!(token, Token::Eof);
tokens.push((token, span));
@ -54,36 +55,7 @@ pub fn lex<'tokens, 'src: 'tokens>(
/// Low-level tool for lexing a single token at a time.
///
/// # Examples
/// ```
/// # use dust_lang::*;
/// let input = "x = 1 + 2";
/// let mut lexer = Lexer::new(input);
/// let mut tokens = Vec::new();
///
/// loop {
/// let (token, span) = lexer.next_token().unwrap();
/// let is_eof = matches!(token, Token::Eof);
///
/// tokens.push((token, span));
///
/// if is_eof {
/// break;
/// }
/// }
///
/// assert_eq!(
/// tokens,
/// [
/// (Token::Identifier("x"), Span(0, 1)),
/// (Token::Equal, Span(2, 3)),
/// (Token::Integer("1"), Span(4, 5)),
/// (Token::Plus, Span(6, 7)),
/// (Token::Integer("2"), Span(8, 9)),
/// (Token::Eof, Span(9, 9)),
/// ]
/// )
/// ```
/// See the [`lex`] function for an example of how to create and use a Lexer.
#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct Lexer<'src> {
source: &'src str,

View File

@ -1,28 +1,30 @@
mod chunk;
mod dust_error;
mod formatter;
mod instruction;
mod lexer;
mod native_function;
mod operation;
mod parser;
mod token;
mod r#type;
mod value;
mod vm;
//! The Dust programming language library.
pub use chunk::{Chunk, ChunkDisassembler, Local, Scope};
pub use dust_error::{AnnotatedError, DustError};
pub use formatter::{format, Formatter};
pub use instruction::Instruction;
pub use lexer::{lex, LexError, Lexer};
pub use native_function::{NativeFunction, NativeFunctionError};
pub use operation::Operation;
pub use parser::{parse, ParseError};
pub use r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict};
pub use token::{Token, TokenKind, TokenOwned};
pub use value::{Function, Primitive, Value, ValueError};
pub use vm::{run, Vm, VmError};
pub mod chunk;
pub mod dust_error;
pub mod formatter;
pub mod instruction;
pub mod lexer;
pub mod native_function;
pub mod operation;
pub mod parser;
pub mod token;
pub mod r#type;
pub mod value;
pub mod vm;
pub use crate::chunk::{Chunk, ChunkDisassembler, Local, Scope};
pub use crate::dust_error::{AnnotatedError, DustError};
pub use crate::formatter::{format, Formatter};
pub use crate::instruction::Instruction;
pub use crate::lexer::{lex, LexError, Lexer};
pub use crate::native_function::{NativeFunction, NativeFunctionError};
pub use crate::operation::Operation;
pub use crate::parser::{parse, ParseError};
pub use crate::r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict};
pub use crate::token::{Token, TokenKind, TokenOwned};
pub use crate::value::{Function, Primitive, Value, ValueError};
pub use crate::vm::{run, Vm, VmError};
use std::fmt::Display;

View File

@ -1,3 +1,7 @@
//! Built-in functions that implement extended functionality.
//!
//! Native functions are used either to implement features that are not possible to implement in
//! Dust itself or that are more efficient to implement in Rust.
use std::{
fmt::{self, Display, Formatter},
io::{self, stdin, stdout, Write},
@ -10,6 +14,9 @@ use crate::{AnnotatedError, FunctionType, Instruction, Primitive, Span, Type, Va
macro_rules! impl_from_str_for_native_function {
($(($name:ident, $byte:literal, $str:expr, $type:expr)),*) => {
/// A dust-native function.
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
pub enum NativeFunction {
$(

View File

@ -1,3 +1,9 @@
//! Part of an [Instruction][crate::Instruction], which can be executed by the Dust virtual machine.
//!
//! !!! Warning !!!
//! The byte values of the operations matter. The seventh and eighth bits must be zero so that the
//! [Instruction][crate::Instruction] type can use them as flags.
use std::fmt::{self, Display, Formatter};
const MOVE: u8 = 0b0000_0000;
@ -33,6 +39,9 @@ const CALL: u8 = 0b0001_0110;
const CALL_NATIVE: u8 = 0b0001_0111;
const RETURN: u8 = 0b0001_1000;
/// Part of an [Instruction][crate::Instruction], which can be executed by the Dust virtual machine.)
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Clone, Copy, Debug, PartialEq)]
pub enum Operation {
// Stack manipulation

View File

@ -1,3 +1,8 @@
//! Parsing tools and errors
//!
//! This module provides two lexing options:
//! - [`parse`], which parsers the entire input and returns a chunk
//! - [`Parser`], which parsers the input a token at a time while assembling a chunk
use std::{
fmt::{self, Display, Formatter},
mem::replace,
@ -13,6 +18,17 @@ use crate::{
NativeFunction, Operation, Scope, Span, Token, TokenKind, TokenOwned, Type, Value,
};
/// Parses the input and returns a chunk.
///
/// # Example
///
/// ```
/// # use dust_lang::parse;
/// let source = "40 + 2 == 42";
/// let chunk = parse(source).unwrap();
///
/// assert_eq!(chunk.len(), 6);
/// ```
pub fn parse(source: &str) -> Result<Chunk, DustError> {
let lexer = Lexer::new(source);
let mut parser = Parser::new(lexer).map_err(|error| DustError::Parse { error, source })?;
@ -24,6 +40,9 @@ pub fn parse(source: &str) -> Result<Chunk, DustError> {
Ok(parser.finish())
}
/// Low-level tool for parsing the input a token at a time while assembling a chunk.
///
/// See the [`parse`] function an example of how to create and use a Parser.
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize)]
struct Parser<'src> {
lexer: Lexer<'src>,

View File

@ -5,6 +5,9 @@ use serde::{Deserialize, Serialize};
macro_rules! define_tokens {
($($variant:ident $(($data_type:ty))?),+ $(,)?) => {
/// Source token.
///
/// This is a borrowed type, i.e. some variants contain references to the source text.
#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Default, Serialize, Deserialize)]
pub enum Token<'src> {
#[default]
@ -15,6 +18,9 @@ macro_rules! define_tokens {
}
#[derive(Debug, PartialEq, Clone)]
/// Data-less representation of a source token.
///
/// If a [Token] borrows from the source text, its TokenKind omits the data.
pub enum TokenKind {
Eof,
$(
@ -439,9 +445,9 @@ impl<'src> Display for Token<'src> {
}
}
/// Owned version of `Token`, which owns all the strings.
/// Owned representation of a source token.
///
/// This is used for errors.
/// If a [Token] borrows from the source text, its TokenOwned omits the data.
#[derive(Debug, PartialEq, Clone)]
pub enum TokenOwned {
Eof,

View File

@ -1,14 +1,4 @@
//! Value types.
//!
//! Most types are concrete and specific, the exceptions are the Generic and Any types.
//!
//! Generic types are temporary placeholders that describe a type that will be defined later. The
//! interpreter should use the analysis phase to enforce that all Generic types have a concrete
//! type assigned to them before the program is run.
//!
//! The Any type is used in cases where a value's type does not matter. For example, the standard
//! library's "length" function does not care about the type of item in the list, only the list
//! itself. So the input is defined as `[any]`, i.e. `Type::ListOf(Box::new(Type::Any))`.
//! Value types and conflict handling.
use std::{
cmp::Ordering,
collections::HashMap,
@ -18,8 +8,6 @@ use std::{
use serde::{Deserialize, Serialize};
/// Description of a kind of value.
///
/// See the [module documentation](index.html) for more information.
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
pub enum Type {
Any,

View File

@ -1,3 +1,4 @@
//! Virtual machine and errors
use std::{cmp::Ordering, mem::replace};
use crate::{
@ -13,6 +14,9 @@ pub fn run(source: &str) -> Result<Option<Value>, DustError> {
.map_err(|error| DustError::Runtime { error, source })
}
/// Dust virtual machine.
///
/// See the [module-level documentation](index.html) for more information.
#[derive(Debug, Eq, PartialEq)]
pub struct Vm {
ip: usize,

View File

@ -23,7 +23,7 @@ fn equality_assignment_long() {
(Instruction::r#return(true), Span(44, 44)),
],
vec![Value::integer(4), Value::string("a")],
vec![Local::new(1, None, false, Scope { depth: 0, width: 0 }, 0)]
vec![Local::new(1, None, false, Scope { depth: 0, index: 0 }, 0)]
)),
);

View File

@ -7,7 +7,7 @@ use log::{Level, LevelFilter};
#[derive(Parser)]
struct Cli {
/// Source code send via command line
/// Source code sent via command line
#[arg(short, long)]
command: Option<String>,
@ -31,6 +31,7 @@ struct Cli {
#[arg(long)]
style_disassembly: Option<bool>,
/// Log level
#[arg(short, long)]
log: Option<LevelFilter>,