Add docs
This commit is contained in:
parent
dd13d2efee
commit
3330939128
@ -1,3 +1,14 @@
|
||||
//! In-memory representation of a Dust program or function.
|
||||
//!
|
||||
//! A chunk consists of a sequence of instructions and their positions, a list of constants, and a
|
||||
//! list of locals that can be executed by the Dust virtual machine. Chunks have a name when they
|
||||
//! belong to a named function.
|
||||
//!
|
||||
//! # Disassembly
|
||||
//!
|
||||
//! Chunks can be disassembled into a human-readable format using the `disassemble` method. The
|
||||
//! output is designed to be displayed in a terminal and is styled for readability.
|
||||
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
env::current_exe,
|
||||
@ -9,6 +20,9 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{Instruction, Span, Type, Value};
|
||||
|
||||
/// In-memory representation of a Dust program or function.
|
||||
///
|
||||
/// See the [module-level documentation](index.html) for more information.
|
||||
#[derive(Clone, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct Chunk {
|
||||
name: Option<String>,
|
||||
@ -125,7 +139,7 @@ impl Chunk {
|
||||
|
||||
pub fn begin_scope(&mut self) {
|
||||
self.scope_index += 1;
|
||||
self.current_scope.width = self.scope_index;
|
||||
self.current_scope.index = self.scope_index;
|
||||
self.current_scope.depth += 1;
|
||||
}
|
||||
|
||||
@ -133,9 +147,9 @@ impl Chunk {
|
||||
self.current_scope.depth -= 1;
|
||||
|
||||
if self.current_scope.depth == 0 {
|
||||
self.current_scope.width = 0;
|
||||
self.current_scope.index = 0;
|
||||
} else {
|
||||
self.current_scope.width -= 1;
|
||||
self.current_scope.index -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -206,26 +220,29 @@ pub struct Scope {
|
||||
/// The level of block nesting.
|
||||
pub depth: u8,
|
||||
/// The nth scope in the chunk.
|
||||
pub width: u8,
|
||||
pub index: u8,
|
||||
}
|
||||
|
||||
impl Scope {
|
||||
pub fn new(depth: u8, width: u8) -> Self {
|
||||
Self { depth, width }
|
||||
pub fn new(index: u8, width: u8) -> Self {
|
||||
Self {
|
||||
depth: index,
|
||||
index: width,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains(&self, other: &Self) -> bool {
|
||||
match self.depth.cmp(&other.depth) {
|
||||
Ordering::Less => false,
|
||||
Ordering::Greater => self.width >= other.width,
|
||||
Ordering::Equal => self.width == other.width,
|
||||
Ordering::Greater => self.index >= other.index,
|
||||
Ordering::Equal => self.index == other.index,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Scope {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "({}, {})", self.depth, self.width)
|
||||
write!(f, "({}, {})", self.depth, self.index)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,13 +1,13 @@
|
||||
//! Top-level Dust errors.use annotate_snippets::{Level, Renderer, Snippet};
|
||||
use annotate_snippets::{Level, Renderer, Snippet};
|
||||
|
||||
use crate::{vm::VmError, LexError, ParseError, Span};
|
||||
use crate::{vm::VmError, ParseError, Span};
|
||||
|
||||
/// A top-level error that can occur during the execution of Dust code.
|
||||
///
|
||||
/// This error can display nicely formatted messages with source code annotations.
|
||||
#[derive(Debug, PartialEq)]
|
||||
pub enum DustError<'src> {
|
||||
Lex {
|
||||
error: LexError,
|
||||
source: &'src str,
|
||||
},
|
||||
Parse {
|
||||
error: ParseError,
|
||||
source: &'src str,
|
||||
@ -52,7 +52,6 @@ impl<'src> DustError<'src> {
|
||||
|
||||
report.push_str(&renderer.render(message).to_string());
|
||||
}
|
||||
_ => todo!(),
|
||||
}
|
||||
|
||||
report
|
||||
|
@ -1,8 +1,9 @@
|
||||
//! Formatting tools
|
||||
use std::mem::replace;
|
||||
|
||||
use colored::{ColoredString, Colorize, CustomColor};
|
||||
|
||||
use crate::{DustError, LexError, Lexer, Token};
|
||||
use crate::{DustError, LexError, Lexer, ParseError, Token};
|
||||
|
||||
pub fn format(source: &str, line_numbers: bool, colored: bool) -> Result<String, DustError> {
|
||||
let lexer = Lexer::new(source);
|
||||
@ -10,7 +11,10 @@ pub fn format(source: &str, line_numbers: bool, colored: bool) -> Result<String,
|
||||
.line_numbers(line_numbers)
|
||||
.colored(colored)
|
||||
.format()
|
||||
.map_err(|error| DustError::Lex { error, source })?;
|
||||
.map_err(|error| DustError::Parse {
|
||||
error: ParseError::Lex(error),
|
||||
source,
|
||||
})?;
|
||||
|
||||
Ok(formatted)
|
||||
}
|
||||
|
@ -1,7 +1,24 @@
|
||||
//! An operation and its arguments for the Dust virtual machine.
|
||||
//!
|
||||
//! Each instruction is a 32-bit unsigned integer that is divided into five fields:
|
||||
//! - Bits 0-6: The operation code.
|
||||
//! - Bit 7: A flag indicating whether the B argument is a constant.
|
||||
//! - Bit 8: A flag indicating whether the C argument is a constant.
|
||||
//! - Bits 9-16: The A argument.
|
||||
//! - Bits 17-24: The B argument.
|
||||
//! - Bits 25-32: The C argument.
|
||||
//!
|
||||
//! Be careful when working with instructions directly. When modifying an instruction, be sure to
|
||||
//! account for the fact that setting the A, B, or C arguments to 0 will have no effect. It is
|
||||
//! usually best to remove instructions and insert new ones in their place instead of mutating them.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{Chunk, NativeFunction, Operation};
|
||||
|
||||
/// An operation and its arguments for the Dust virtual machine.
|
||||
///
|
||||
/// See the [module-level documentation](index.html) for more information.
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||
pub struct Instruction(u32);
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
//! Lexing tools.
|
||||
//! Lexing tools and errors
|
||||
//!
|
||||
//! This module provides two lexing options:
|
||||
//! - [`lex`], which lexes the entire input and returns a vector of tokens and their positions
|
||||
@ -8,9 +8,9 @@ use std::fmt::{self, Display, Formatter};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::{dust_error::AnnotatedError, DustError, Span, Token};
|
||||
use crate::{dust_error::AnnotatedError, DustError, ParseError, Span, Token};
|
||||
|
||||
/// Lexes the input and return a vector of tokens and their positions.
|
||||
/// Lexes the input and returns a vector of tokens and their positions.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
@ -37,9 +37,10 @@ pub fn lex<'tokens, 'src: 'tokens>(
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
loop {
|
||||
let (token, span) = lexer
|
||||
.next_token()
|
||||
.map_err(|error| DustError::Lex { error, source })?;
|
||||
let (token, span) = lexer.next_token().map_err(|error| DustError::Parse {
|
||||
error: ParseError::Lex(error),
|
||||
source,
|
||||
})?;
|
||||
let is_eof = matches!(token, Token::Eof);
|
||||
|
||||
tokens.push((token, span));
|
||||
@ -54,36 +55,7 @@ pub fn lex<'tokens, 'src: 'tokens>(
|
||||
|
||||
/// Low-level tool for lexing a single token at a time.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # use dust_lang::*;
|
||||
/// let input = "x = 1 + 2";
|
||||
/// let mut lexer = Lexer::new(input);
|
||||
/// let mut tokens = Vec::new();
|
||||
///
|
||||
/// loop {
|
||||
/// let (token, span) = lexer.next_token().unwrap();
|
||||
/// let is_eof = matches!(token, Token::Eof);
|
||||
///
|
||||
/// tokens.push((token, span));
|
||||
///
|
||||
/// if is_eof {
|
||||
/// break;
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// assert_eq!(
|
||||
/// tokens,
|
||||
/// [
|
||||
/// (Token::Identifier("x"), Span(0, 1)),
|
||||
/// (Token::Equal, Span(2, 3)),
|
||||
/// (Token::Integer("1"), Span(4, 5)),
|
||||
/// (Token::Plus, Span(6, 7)),
|
||||
/// (Token::Integer("2"), Span(8, 9)),
|
||||
/// (Token::Eof, Span(9, 9)),
|
||||
/// ]
|
||||
/// )
|
||||
/// ```
|
||||
/// See the [`lex`] function for an example of how to create and use a Lexer.
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub struct Lexer<'src> {
|
||||
source: &'src str,
|
||||
|
@ -1,28 +1,30 @@
|
||||
mod chunk;
|
||||
mod dust_error;
|
||||
mod formatter;
|
||||
mod instruction;
|
||||
mod lexer;
|
||||
mod native_function;
|
||||
mod operation;
|
||||
mod parser;
|
||||
mod token;
|
||||
mod r#type;
|
||||
mod value;
|
||||
mod vm;
|
||||
//! The Dust programming language library.
|
||||
|
||||
pub use chunk::{Chunk, ChunkDisassembler, Local, Scope};
|
||||
pub use dust_error::{AnnotatedError, DustError};
|
||||
pub use formatter::{format, Formatter};
|
||||
pub use instruction::Instruction;
|
||||
pub use lexer::{lex, LexError, Lexer};
|
||||
pub use native_function::{NativeFunction, NativeFunctionError};
|
||||
pub use operation::Operation;
|
||||
pub use parser::{parse, ParseError};
|
||||
pub use r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict};
|
||||
pub use token::{Token, TokenKind, TokenOwned};
|
||||
pub use value::{Function, Primitive, Value, ValueError};
|
||||
pub use vm::{run, Vm, VmError};
|
||||
pub mod chunk;
|
||||
pub mod dust_error;
|
||||
pub mod formatter;
|
||||
pub mod instruction;
|
||||
pub mod lexer;
|
||||
pub mod native_function;
|
||||
pub mod operation;
|
||||
pub mod parser;
|
||||
pub mod token;
|
||||
pub mod r#type;
|
||||
pub mod value;
|
||||
pub mod vm;
|
||||
|
||||
pub use crate::chunk::{Chunk, ChunkDisassembler, Local, Scope};
|
||||
pub use crate::dust_error::{AnnotatedError, DustError};
|
||||
pub use crate::formatter::{format, Formatter};
|
||||
pub use crate::instruction::Instruction;
|
||||
pub use crate::lexer::{lex, LexError, Lexer};
|
||||
pub use crate::native_function::{NativeFunction, NativeFunctionError};
|
||||
pub use crate::operation::Operation;
|
||||
pub use crate::parser::{parse, ParseError};
|
||||
pub use crate::r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict};
|
||||
pub use crate::token::{Token, TokenKind, TokenOwned};
|
||||
pub use crate::value::{Function, Primitive, Value, ValueError};
|
||||
pub use crate::vm::{run, Vm, VmError};
|
||||
|
||||
use std::fmt::Display;
|
||||
|
||||
|
@ -1,3 +1,7 @@
|
||||
//! Built-in functions that implement extended functionality.
|
||||
//!
|
||||
//! Native functions are used either to implement features that are not possible to implement in
|
||||
//! Dust itself or that are more efficient to implement in Rust.
|
||||
use std::{
|
||||
fmt::{self, Display, Formatter},
|
||||
io::{self, stdin, stdout, Write},
|
||||
@ -10,6 +14,9 @@ use crate::{AnnotatedError, FunctionType, Instruction, Primitive, Span, Type, Va
|
||||
|
||||
macro_rules! impl_from_str_for_native_function {
|
||||
($(($name:ident, $byte:literal, $str:expr, $type:expr)),*) => {
|
||||
/// A dust-native function.
|
||||
///
|
||||
/// See the [module-level documentation](index.html) for more information.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Serialize, Deserialize)]
|
||||
pub enum NativeFunction {
|
||||
$(
|
||||
|
@ -1,3 +1,9 @@
|
||||
//! Part of an [Instruction][crate::Instruction], which can be executed by the Dust virtual machine.
|
||||
//!
|
||||
//! !!! Warning !!!
|
||||
//! The byte values of the operations matter. The seventh and eighth bits must be zero so that the
|
||||
//! [Instruction][crate::Instruction] type can use them as flags.
|
||||
|
||||
use std::fmt::{self, Display, Formatter};
|
||||
|
||||
const MOVE: u8 = 0b0000_0000;
|
||||
@ -33,6 +39,9 @@ const CALL: u8 = 0b0001_0110;
|
||||
const CALL_NATIVE: u8 = 0b0001_0111;
|
||||
const RETURN: u8 = 0b0001_1000;
|
||||
|
||||
/// Part of an [Instruction][crate::Instruction], which can be executed by the Dust virtual machine.)
|
||||
///
|
||||
/// See the [module-level documentation](index.html) for more information.
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub enum Operation {
|
||||
// Stack manipulation
|
||||
|
@ -1,3 +1,8 @@
|
||||
//! Parsing tools and errors
|
||||
//!
|
||||
//! This module provides two lexing options:
|
||||
//! - [`parse`], which parsers the entire input and returns a chunk
|
||||
//! - [`Parser`], which parsers the input a token at a time while assembling a chunk
|
||||
use std::{
|
||||
fmt::{self, Display, Formatter},
|
||||
mem::replace,
|
||||
@ -13,6 +18,17 @@ use crate::{
|
||||
NativeFunction, Operation, Scope, Span, Token, TokenKind, TokenOwned, Type, Value,
|
||||
};
|
||||
|
||||
/// Parses the input and returns a chunk.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// # use dust_lang::parse;
|
||||
/// let source = "40 + 2 == 42";
|
||||
/// let chunk = parse(source).unwrap();
|
||||
///
|
||||
/// assert_eq!(chunk.len(), 6);
|
||||
/// ```
|
||||
pub fn parse(source: &str) -> Result<Chunk, DustError> {
|
||||
let lexer = Lexer::new(source);
|
||||
let mut parser = Parser::new(lexer).map_err(|error| DustError::Parse { error, source })?;
|
||||
@ -24,6 +40,9 @@ pub fn parse(source: &str) -> Result<Chunk, DustError> {
|
||||
Ok(parser.finish())
|
||||
}
|
||||
|
||||
/// Low-level tool for parsing the input a token at a time while assembling a chunk.
|
||||
///
|
||||
/// See the [`parse`] function an example of how to create and use a Parser.
|
||||
#[derive(Debug, Clone, Eq, PartialEq, PartialOrd, Ord, Serialize)]
|
||||
struct Parser<'src> {
|
||||
lexer: Lexer<'src>,
|
||||
|
@ -5,6 +5,9 @@ use serde::{Deserialize, Serialize};
|
||||
|
||||
macro_rules! define_tokens {
|
||||
($($variant:ident $(($data_type:ty))?),+ $(,)?) => {
|
||||
/// Source token.
|
||||
///
|
||||
/// This is a borrowed type, i.e. some variants contain references to the source text.
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, PartialOrd, Ord, Default, Serialize, Deserialize)]
|
||||
pub enum Token<'src> {
|
||||
#[default]
|
||||
@ -15,6 +18,9 @@ macro_rules! define_tokens {
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
/// Data-less representation of a source token.
|
||||
///
|
||||
/// If a [Token] borrows from the source text, its TokenKind omits the data.
|
||||
pub enum TokenKind {
|
||||
Eof,
|
||||
$(
|
||||
@ -439,9 +445,9 @@ impl<'src> Display for Token<'src> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Owned version of `Token`, which owns all the strings.
|
||||
/// Owned representation of a source token.
|
||||
///
|
||||
/// This is used for errors.
|
||||
/// If a [Token] borrows from the source text, its TokenOwned omits the data.
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub enum TokenOwned {
|
||||
Eof,
|
||||
|
@ -1,14 +1,4 @@
|
||||
//! Value types.
|
||||
//!
|
||||
//! Most types are concrete and specific, the exceptions are the Generic and Any types.
|
||||
//!
|
||||
//! Generic types are temporary placeholders that describe a type that will be defined later. The
|
||||
//! interpreter should use the analysis phase to enforce that all Generic types have a concrete
|
||||
//! type assigned to them before the program is run.
|
||||
//!
|
||||
//! The Any type is used in cases where a value's type does not matter. For example, the standard
|
||||
//! library's "length" function does not care about the type of item in the list, only the list
|
||||
//! itself. So the input is defined as `[any]`, i.e. `Type::ListOf(Box::new(Type::Any))`.
|
||||
//! Value types and conflict handling.
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
collections::HashMap,
|
||||
@ -18,8 +8,6 @@ use std::{
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Description of a kind of value.
|
||||
///
|
||||
/// See the [module documentation](index.html) for more information.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub enum Type {
|
||||
Any,
|
||||
|
@ -1,3 +1,4 @@
|
||||
//! Virtual machine and errors
|
||||
use std::{cmp::Ordering, mem::replace};
|
||||
|
||||
use crate::{
|
||||
@ -13,6 +14,9 @@ pub fn run(source: &str) -> Result<Option<Value>, DustError> {
|
||||
.map_err(|error| DustError::Runtime { error, source })
|
||||
}
|
||||
|
||||
/// Dust virtual machine.
|
||||
///
|
||||
/// See the [module-level documentation](index.html) for more information.
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct Vm {
|
||||
ip: usize,
|
||||
|
@ -23,7 +23,7 @@ fn equality_assignment_long() {
|
||||
(Instruction::r#return(true), Span(44, 44)),
|
||||
],
|
||||
vec![Value::integer(4), Value::string("a")],
|
||||
vec![Local::new(1, None, false, Scope { depth: 0, width: 0 }, 0)]
|
||||
vec![Local::new(1, None, false, Scope { depth: 0, index: 0 }, 0)]
|
||||
)),
|
||||
);
|
||||
|
||||
|
@ -7,7 +7,7 @@ use log::{Level, LevelFilter};
|
||||
|
||||
#[derive(Parser)]
|
||||
struct Cli {
|
||||
/// Source code send via command line
|
||||
/// Source code sent via command line
|
||||
#[arg(short, long)]
|
||||
command: Option<String>,
|
||||
|
||||
@ -31,6 +31,7 @@ struct Cli {
|
||||
#[arg(long)]
|
||||
style_disassembly: Option<bool>,
|
||||
|
||||
/// Log level
|
||||
#[arg(short, long)]
|
||||
log: Option<LevelFilter>,
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user