From 3e852cf6063fc199667144701beaceca2ae0b1c4 Mon Sep 17 00:00:00 2001 From: Jeff Date: Tue, 10 Dec 2024 10:03:11 -0500 Subject: [PATCH] Write docs; Improve errors --- README.md | 270 ++++++++++++++++++---- dust-cli/src/main.rs | 2 +- dust-lang/src/compiler/error.rs | 255 +++++++++++++++++++++ dust-lang/src/compiler/mod.rs | 328 +-------------------------- dust-lang/src/compiler/optimize.rs | 20 +- dust-lang/src/dust_error.rs | 42 ++-- dust-lang/src/lexer.rs | 64 +----- dust-lang/src/native_function/mod.rs | 22 +- dust-lang/src/vm.rs | 36 +-- wl-copy | 7 + 10 files changed, 545 insertions(+), 501 deletions(-) create mode 100644 dust-lang/src/compiler/error.rs create mode 100644 wl-copy diff --git a/README.md b/README.md index 18a91d4..ff56472 100644 --- a/README.md +++ b/README.md @@ -2,17 +2,14 @@ A programming language that is **fast**, **safe** and **easy to use**. -Dust has a simple, expressive syntax that is easy to read and write. This includes a powerful yet -syntactically modest type system with extensive inference capabilities. +Dust's syntax, safety features and evaluation model are inspired by Rust. The instruction set, +optimization strategies and virtual machine are inspired by Lua and academic research in the field +(see the [Inspiration](README#Inspiration). Unlike Rust and most other compiled languages, Dust has +a very low time to execution. Unlike Lua and most other interpreted languages, Dust enforces static +typing during compilation, with a simple yet powerful type system that enhances clarity and prevents +bugs. -The syntax, safety features and evaluation model are inspired by Rust. The instruction set, -optimization strategies and virtual machine are inspired by Lua and academic research (see the -[Inspiration][] section below). Unlike Rust and other compiled languages, Dust has a very low time -to execution. Simple programs compile in milliseconds, even on modest hardware. Unlike Lua and most -other interpreted languages, Dust is type-safe, with a simple yet powerful type system that enhances -clarity and prevent bugs. - -```dust +```rust write_line("Enter your name...") let name = read_line() @@ -20,15 +17,157 @@ let name = read_line() write_line("Hello " + name + "!") ``` -## Overview +```rust +fn fib (n: int) -> int { + if n <= 0 { return 0 } + if n == 1 { return 1 } + + fib(n - 1) + fib(n - 2) +} + +write_line(fib(25)) +``` + +Dust uses the same library for error reporting as Rust, which provides ample opportunities to show +the user where they went wrong and how to fix it. Helpful error messages are a high priority and the +language will not be considered stable until they are consistently informative and actionable. + +``` +error: Compilation Error: Cannot add these types + | +1 | 40 + 2.0 + | -- info: A value of type "int" was used here. + | +1 | 40 + 2.0 + | --- info: A value of type "float" was used here. + | +1 | 40 + 2.0 + | -------- help: Type "int" cannot be added to type "float". Try converting one of the values to the other type. + | +``` ## Project Status -**Dust is under active development and is not yet ready for general use.** Dust is an ambitious -project that acts as a continuous experiment in language design. Features may be redesigned and -reimplemented at will when they do not meet the project's performance and usability goals. This -approach maximizes the development experience as a learning opportunity and enforces a high standard -of quality but slows down the process of delivering features to users. +**Dust is under active development and is not yet ready for general use.** + +**Features discussed in this README may be unimplemented, partially implemented, temporarily removed +or only available on a seperate branch.** + +Dust is an ambitious project that acts as a continuous experiment in language design. Features may +be redesigned and reimplemented at will when they do not meet the project's performance or +usability goals. This approach maximizes the development experience as a learning opportunity and +enforces a high standard of quality but slows down the process of delivering features to users. +Eventually, Dust will reach a stable release and will be ready for general use. As the project +approaches this milestone, the experimental nature of the project will be reduced and a replaced +with a focus on stability and improvement. + +## Language Overview + +### Syntax + +Dust belongs to the C-like family of languages, with an imperative syntax that will be familiar to +many programmers. Dust code looks a lot like Ruby, JavaScript, TypeScript and other members of the +family but Rust is its primary point of reference for syntax. Rust was chosen as a syntax model +because its imperative code is *obvious* and *familiar*. Those qualities are aligned with Dust's +emphasis on safety and usability. However, some differences exist because Dust is a simpler language +that can tolerate more relaxed syntax. For example, Dust has more relaxed rules about semicolons: +they can be used to suppress values (like in Rust) but are not required at the end of every +statement. + +In this example, these semicolons are optional. Because these `let` statements do not return a +value, the semicolons have nothing to suppress and are ignored. + +```dust +let a = 40; +let b = 2; + +write_line("The answer is ", a + b); +``` + +One could write the above program without any semicolons at all. + +```dust +let x = 10 +let y = 3 + +write_line("The remainder is ", x % y) +``` + +The next example produces a compiler error because the `if` block returns a value of type `int` but +the `else` block does not return a value at all. Dust does not allow branches of the same `if/else` +statement to return different types of values. In this case, adding a semicolon after the `777` +expression fixes the error by supressing the value. + +```dust +let input = read_line() + +if input == "42" { + write_line("You got it! Here's your reward.") + + 777 +} else { + write_line("That is not the answer.") +} +``` + +Remember that even if some syntax is optional, that does not mean it should always be omitted or is +not useful. Aside from their practical use, semicolons provide a visual barrier between statements +written on the same line. Dust's design philosophy is to provide a balance between strictness and +expressiveness so that the language is applicable to a wide range of use cases. A web server with a +team of developers may prefer a more long-form style of code with lots of line breaks while a user +writing Dust on the command line may prefer a more terse style without sacrificing readability. + +```dust +let a = 0; let b = 1; let c = 2; let list = [a, b, c]; + +write_line("Here's our list: ", list) +``` + +### Safety + +#### Type System + +All variables have a type that is established when the variable is declared. This usually does not +require that the type be explicitly stated, Dust can infer the type from the value. Types are also +associated with the arms of `if/else` statements and the return values of functions, which prevents +different runtime scenarios from producing different types of values. + +#### Null-Free + +There is no `null` or `undefined` value in Dust. All values and variables must be initialized to one +of the supported value types. This eliminates a whole class of bugs that permeate many other +languages. "I call it my billion-dollar mistake. It was the invention of the null reference in +1965." - Tony Hoare + +Dust *does* have a `none` type, which should not be confused for being `null`-like. Like the `()` or +"unit" type in Rust, `none` exists as a type but not as a value. It indicates the lack of a value +from a function, expression or statement. A variable cannot be assigned to `none`. + +#### Memory Safety + + + +### Values, Variables and Types + +Dust supports the following basic values: + +- Boolean: `true` or `false` +- Byte: An unsigned 8-bit integer +- Character: A Unicode scalar value +- Float: A 64-bit floating-point number +- Function: An executable chunk of code +- Integer: A signed 64-bit integer +- String: A UTF-8 encoded string + +Dust's "basic" values are conceptually similar because they are singular as opposed to composite. +Most of these values are stored on the stack but some are heap-allocated. A Dust string is a +sequence of bytes that are encoded in UTF-8. Even though it could be seen as a composite of byte +values, strings are considered "basic" because they are parsed directly from tokens and behave as +singular values. Shorter strings are stored on the stack while longer strings are heap-allocated. +Dust offers built-in native functions that can manipulate strings by accessing their bytes or +reading them as a sequence of characters. + + ## Feature Progress @@ -72,6 +211,7 @@ maintain a docket of what is being worked on, what is coming next and what can b - Types - [X] Basic types for each kind of basic value - [X] Generalized types: `num`, `any`, `none` + - [ ] Type conversion (safe, explicit and coercion-free) - [ ] `struct` types - [ ] `enum` types - [ ] Type aliases @@ -92,11 +232,29 @@ maintain a docket of what is being worked on, what is coming next and what can b - [ ] Type arguments - Control Flow - [X] If/Else + - [ ] Match - [ ] Loops - [ ] `for` - [ ] `loop` - [X] `while` - - [ ] Match +- Native Functions + - Assertions + - [X] `assert` + - [ ] `assert_eq` + - [ ] `assert_ne` + - [ ] `panic` + - I/O + - [ ] `read` + - [X] `read_line` + - [X] `write` + - [X] `write_line` + - String Functions + - List Functions + - Map Functions + - Math Functions + - Filesystem Functions + - Network Functions + - System Functions ## Implementation @@ -107,6 +265,16 @@ code and check the compiled chunk, then run the source and check the output of t It is important to maintain a high level of quality by writing meaningful tests and preferring to compile and run programs in an optimal way before adding new features. +### Command Line Interface + +Dust's command line interface and developer experience are inspired by tools like Bun and especially +Cargo, the Rust package manager that includes everything from project creation to documentation +generation to code formatting to much more. Dust's CLI has started by exposing the most imporant +features for debugging and developing the language itself. Tokenization, compiling, disassembling +and running Dust code are currently supported. The CLI will eventually support a REPL, code +formatting, linting and other features that enhance the development experience and make Dust more +fun and easy to use. + ### Lexer and Tokens The lexer emits tokens from the source code. Dust makes extensive use of Rust's zero-copy @@ -128,21 +296,23 @@ sequence of tokens into a chunk. Each token is given a precedence and may have a parser. The parsers are just functions that modify the compiler and its output. For example, when the compiler encounters a boolean token, its prefix parser is the `parse_boolean` function, which emits a `LoadBoolean` instruction. An integer token's prefix parser is `parse_integer`, which emits -a `LoadConstant` instruction and adds the integer to the constant list. Tokens with infix parsers -include the math operators, which emit `Add`, `Subtract`, `Multiply`, `Divide`, and `Modulo` +a `LoadConstant` instruction and adds the integer to the constants list. Tokens with infix parsers +include the math operators, which emit `Add`, `Subtract`, `Multiply`, `Divide`, `Modulo` and `Power` instructions. Functions are compiled into their own chunks, which are stored in the constant list. A function's -arguments are stored in the locals list. The VM must later bind the arguments to runtime values by -assigning each argument a register and associating the register with the local. +arguments are stored in its locals list. Before the function is run, the VM must bind the arguments +to values by filling locals' corresponding registers. Instead of copying the arguments, the VM uses +a pointer to one of the parent's registers or constants. #### Optimizing When generating instructions for a register-based virtual machine, there are opportunities to optimize the generated code by using fewer instructions or fewer registers. While it is best to -output optimal code in the first place, it is not always possible. Dust's compiler modifies the -instruction list during parsing to apply optimizations before the chunk is completed. There is no -separate optimization pass, and the compiler cannot be run in a mode that disables optimizations. +output optimal code in the first place, it is not always possible. Dust's uses a single-pass +compiler and therefore applies optimizations immeadiately after the opportunity becomes available. +There is no separate optimization pass and the compiler cannot be run in a mode that disables +optimizations. #### Type Checking @@ -153,6 +323,8 @@ from instruction arguments, the compiler also checks the types of function argum of `if`/`else` statements. The compiler always checks types on the fly, so there is no need for a separate type-checking pass. +Type information is removed from the instructions list before the chunk is created, so the VM (which +is entirely type-agnostic) never sees it. ### Instructions @@ -198,53 +370,60 @@ because of the 5 bit format. ##### Arithmetic -Arithmetic instructions use every field except for D. The A field is the destination register, the B +Arithmetic instructions use the A, B and C fields. The A field is the destination register, the B and C fields are the arguments, and the flags indicate whether the arguments are constants. - ADD: Adds two values and stores the result in a register. Unlike the other arithmetic operations, - the ADD instruction can also be used to concatenate strings and characters. + the ADD instruction can also be used to concatenate strings and/or characters. Characters are the + only type of value that can perform a kind of implicit conversion. Although the character itself + is not converted, its underlying bytes are concatenated to the string. - SUBTRACT: Subtracts one argument from another and stores the result in a register. -- MULTIPLY: Multiplies two arguments and stores the result in a register. +- MULTIPLY: Multiplies one argument by another and stores the result in a register. - DIVIDE: Divides one value by another and stores the result in a register. - MODULO: Calculates the division remainder of two values and stores the result in a register. - POWER: Raises one value to the power of another and stores the result in a register. -##### Logic +##### Logic and Control Flow Logic instructions work differently from arithmetic and comparison instructions, but they are still -essentially binary operations with a left and a right argument. Rather than performing some -calculation and storing a result, the logic instructions perform a check on the left-hand argument -and, based on the result, either skip the right-hand argument or allow it to be executed. A `TEST` -is always followed by a `JUMP`. If the left argument passes the test (a boolean equality check), the -`JUMP` instruction is skipped and the right argument is executed. If the left argument fails the -test, the `JUMP` is not skipped and it jumps past the right argument. +essentially binary operations with a left and a right argument. These areguments, however, are other +instructions. This is reminiscent of a stack-based virtual machine in which the arguments are found +in the stack rather than having their location encoded in the instruction. The logic instructions +perform a check on the left-hand argument and, based on the result, either skip the right-hand +argument or allow it to be executed. A `TEST` is always followed by a `JUMP`. If the left argument +passes the test (a boolean equality check), the `JUMP` instruction is skipped and the right argument +is executed. If the left argument fails the test, the `JUMP` is not skipped and it jumps past the +right argument. - TEST - TEST_SET + + ##### Comparison + + - EQUAL - LESS - LESS_EQUAL ##### Unary operations + + - NEGATE - NOT ##### Execution + + - CALL - CALL_NATIVE - JUMP - RETURN - -The A, B, and C -fields are used for usually used as indexes into the constant list or stack, but they can also hold -other information, like the number of arguments for a function call. - ### Virtual Machine The virtual machine is simple and efficient. It uses a stack of registers, which can hold values or @@ -288,14 +467,17 @@ on Lua optimizations covered in this paper. Liup was helpful for a quick yet efficient primer on getting stack-based and register-based virtual machines up and running. The included code examples show how to implement both types of VMs in C. The performance comparison between the two types of VMs is worth reading for anyone who is trying to -choose between the two. Some of the benchmarks described in the paper inspired similar benchmarks +choose between the two[^1]. Some of the benchmarks described in the paper inspired similar benchmarks used in this project to compare Dust to other languages. ## License Dust is licensed under the GNU General Public License v3.0. See the `LICENSE` file for details. -[Crafting Interpreters]: https://craftinginterpreters.com/ -[The Implementation of Lua 5.0]: https://www.lua.org/doc/jucs05.pdf -[A No-Frills Introduction to Lua 5.1 VM Instructions]: https://www.mcours.net/cours/pdf/hasclic3/hasssclic818.pdf -[A Performance Survey on Stack-based and Register-based Virtual Machines^3]: https://arxiv.org/abs/1611.00467 +## References + +[^1]: [Crafting Interpreters](https://craftinginterpreters.com/) +[^2]: [The Implementation of Lua 5.0](https://www.lua.org/doc/jucs05.pdf) +[^3]: [A No-Frills Introduction to Lua 5.1 VM Instructions](https://www.mcours.net/cours/pdf/hasclic3/hasssclic818.pdf) +[^4]: [A Performance Survey on Stack-based and Register-based Virtual Machines](https://arxiv.org/abs/1611.00467) +[^5]: [List of C-family programming languages](https://en.wikipedia.org/wiki/List_of_C-family_programming_languages) diff --git a/dust-cli/src/main.rs b/dust-cli/src/main.rs index f426f72..3e4c7b4 100644 --- a/dust-cli/src/main.rs +++ b/dust-cli/src/main.rs @@ -4,7 +4,7 @@ use std::{fs::read_to_string, path::PathBuf}; use clap::{Args, Parser}; use colored::Colorize; -use dust_lang::{compile, lex, run, CompileError, DustError, Lexer, Span, Token}; +use dust_lang::{compile, run, CompileError, DustError, Lexer, Span, Token}; use log::{Level, LevelFilter}; #[derive(Parser)] diff --git a/dust-lang/src/compiler/error.rs b/dust-lang/src/compiler/error.rs new file mode 100644 index 0000000..e6ac5f1 --- /dev/null +++ b/dust-lang/src/compiler/error.rs @@ -0,0 +1,255 @@ +use std::num::{ParseFloatError, ParseIntError}; + +use smallvec::{smallvec, SmallVec}; + +use crate::{AnnotatedError, LexError, Scope, Span, TokenKind, TokenOwned, Type, TypeConflict}; + +/// Compilation errors +#[derive(Clone, Debug, PartialEq)] +pub enum CompileError { + // Token errors + ExpectedToken { + expected: TokenKind, + found: TokenOwned, + position: Span, + }, + ExpectedTokenMultiple { + expected: &'static [TokenKind], + found: TokenOwned, + position: Span, + }, + + // Parsing errors + CannotChainComparison { + position: Span, + }, + ExpectedExpression { + found: TokenOwned, + position: Span, + }, + ExpectedFunction { + found: TokenOwned, + actual_type: Type, + position: Span, + }, + ExpectedFunctionType { + found: Type, + position: Span, + }, + InvalidAssignmentTarget { + found: TokenOwned, + position: Span, + }, + UnexpectedReturn { + position: Span, + }, + + // Variable errors + CannotMutateImmutableVariable { + identifier: String, + position: Span, + }, + ExpectedMutableVariable { + found: TokenOwned, + position: Span, + }, + UndeclaredVariable { + identifier: String, + position: Span, + }, + VariableOutOfScope { + identifier: String, + variable_scope: Scope, + access_scope: Scope, + position: Span, + }, + + // Type errors + CannotAddType { + argument_type: Type, + position: Span, + }, + CannotAddArguments { + left_type: Type, + left_position: Span, + right_type: Type, + right_position: Span, + }, + CannotDivideType { + argument_type: Type, + position: Span, + }, + CannotDivideArguments { + left_type: Type, + right_type: Type, + position: Span, + }, + CannotModuloType { + argument_type: Type, + position: Span, + }, + CannotModuloArguments { + left_type: Type, + right_type: Type, + position: Span, + }, + CannotMultiplyType { + argument_type: Type, + position: Span, + }, + CannotMultiplyArguments { + left_type: Type, + right_type: Type, + position: Span, + }, + CannotSubtractType { + argument_type: Type, + position: Span, + }, + CannotSubtractArguments { + left_type: Type, + right_type: Type, + position: Span, + }, + CannotResolveRegisterType { + register_index: usize, + position: Span, + }, + CannotResolveVariableType { + identifier: String, + position: Span, + }, + IfElseBranchMismatch { + conflict: TypeConflict, + position: Span, + }, + IfMissingElse { + position: Span, + }, + ListItemTypeConflict { + conflict: TypeConflict, + position: Span, + }, + ReturnTypeConflict { + conflict: TypeConflict, + position: Span, + }, + + // Chunk errors + ConstantIndexOutOfBounds { + index: usize, + position: Span, + }, + InstructionIndexOutOfBounds { + index: usize, + position: Span, + }, + LocalIndexOutOfBounds { + index: usize, + position: Span, + }, + + // Wrappers around foreign errors + Lex(LexError), + ParseFloatError { + error: ParseFloatError, + position: Span, + }, + ParseIntError { + error: ParseIntError, + position: Span, + }, +} + +impl CompileError {} + +impl AnnotatedError for CompileError { + fn title() -> &'static str { + "Compilation Error" + } + + fn description(&self) -> &'static str { + match self { + Self::CannotAddArguments { .. } => "Cannot add these types", + Self::CannotAddType { .. } => "Cannot add to this type", + Self::CannotChainComparison { .. } => "Cannot chain comparison operations", + Self::CannotDivideArguments { .. } => "Cannot divide these types", + Self::CannotDivideType { .. } => "Cannot divide this type", + Self::CannotModuloArguments { .. } => "Cannot modulo these types", + Self::CannotModuloType { .. } => "Cannot modulo this type", + Self::CannotMutateImmutableVariable { .. } => "Cannot mutate immutable variable", + Self::CannotMultiplyArguments { .. } => "Cannot multiply these types", + Self::CannotMultiplyType { .. } => "Cannot multiply this type", + Self::CannotResolveRegisterType { .. } => "Cannot resolve register type", + Self::CannotResolveVariableType { .. } => "Cannot resolve type", + Self::CannotSubtractType { .. } => "Cannot subtract from this type", + Self::CannotSubtractArguments { .. } => "Cannot subtract these types", + Self::ConstantIndexOutOfBounds { .. } => "Constant index out of bounds", + Self::ExpectedExpression { .. } => "Expected an expression", + Self::ExpectedFunction { .. } => "Expected a function", + Self::ExpectedFunctionType { .. } => "Expected a function type", + Self::ExpectedMutableVariable { .. } => "Expected a mutable variable", + Self::ExpectedToken { .. } => "Expected a specific token", + Self::ExpectedTokenMultiple { .. } => "Expected one of multiple tokens", + Self::IfElseBranchMismatch { .. } => "Type mismatch in if/else branches", + Self::IfMissingElse { .. } => "If statement missing else branch", + Self::InstructionIndexOutOfBounds { .. } => "Instruction index out of bounds", + Self::InvalidAssignmentTarget { .. } => "Invalid assignment target", + Self::Lex(error) => error.description(), + Self::ListItemTypeConflict { .. } => "List item type conflict", + Self::LocalIndexOutOfBounds { .. } => "Local index out of bounds", + Self::ParseFloatError { .. } => "Failed to parse float", + Self::ParseIntError { .. } => "Failed to parse integer", + Self::ReturnTypeConflict { .. } => "Return type conflict", + Self::UndeclaredVariable { .. } => "Undeclared variable", + Self::UnexpectedReturn { .. } => "Unexpected return", + Self::VariableOutOfScope { .. } => "Variable out of scope", + } + } + + fn detail_snippets(&self) -> SmallVec<[(String, Span); 2]> { + match self { + Self::CannotAddArguments { + left_type, + left_position, + right_type, + right_position, + } => { + smallvec![ + ( + format!("A value of type \"{left_type}\" was used here."), + *left_position + ), + ( + format!("A value of type \"{right_type}\" was used here."), + *right_position + ) + ] + } + _ => todo!(), + } + } + + fn help_snippets(&self) -> SmallVec<[(String, Span); 2]> { + match self { + Self::CannotAddArguments { + left_type, + left_position, + right_type, + right_position, + } => { + smallvec![( + format!("Type \"{left_type}\" cannot be added to type \"{right_type}\". Try converting one of the values to the other type."), + Span(left_position.0, right_position.1) + )] + } + _ => todo!(), + } + } +} + +impl From for CompileError { + fn from(error: LexError) -> Self { + Self::Lex(error) + } +} diff --git a/dust-lang/src/compiler/mod.rs b/dust-lang/src/compiler/mod.rs index a66d460..505129a 100644 --- a/dust-lang/src/compiler/mod.rs +++ b/dust-lang/src/compiler/mod.rs @@ -4,12 +4,14 @@ //! - [`compile`] borrows a string and returns a chunk, handling the entire compilation process and //! turning any resulting [`ComplileError`] into a [`DustError`]. //! - [`Compiler`] uses a lexer to get tokens and assembles a chunk. +mod error; mod optimize; +pub use error::CompileError; + use std::{ fmt::{self, Display, Formatter}, mem::replace, - num::{ParseFloatError, ParseIntError}, }; use colored::Colorize; @@ -21,9 +23,8 @@ use crate::{ Call, CallNative, Close, GetLocal, Jump, LoadConstant, LoadList, LoadSelf, Move, Negate, Not, Return, SetLocal, Test, }, - AnnotatedError, Argument, Chunk, ConcreteValue, DustError, DustString, FunctionType, - Instruction, LexError, Lexer, Local, NativeFunction, Operation, Scope, Span, Token, TokenKind, - TokenOwned, Type, TypeConflict, + Argument, Chunk, ConcreteValue, DustError, DustString, FunctionType, Instruction, Lexer, Local, + NativeFunction, Operation, Scope, Span, Token, TokenKind, Type, }; /// Compiles the input and returns a chunk. @@ -1755,8 +1756,9 @@ impl<'src> Compiler<'src> { } else { Err(CompileError::CannotAddArguments { left_type: left.clone(), + left_position: *left_position, right_type: right.clone(), - position: Span(left_position.0, right_position.1), + right_position: *right_position, }) } } @@ -2204,319 +2206,3 @@ impl From<&Token<'_>> for ParseRule<'_> { } } } - -/// Compilation errors -#[derive(Clone, Debug, PartialEq)] -pub enum CompileError { - // Token errors - ExpectedToken { - expected: TokenKind, - found: TokenOwned, - position: Span, - }, - ExpectedTokenMultiple { - expected: &'static [TokenKind], - found: TokenOwned, - position: Span, - }, - - // Parsing errors - CannotChainComparison { - position: Span, - }, - ExpectedExpression { - found: TokenOwned, - position: Span, - }, - ExpectedFunction { - found: TokenOwned, - actual_type: Type, - position: Span, - }, - ExpectedFunctionType { - found: Type, - position: Span, - }, - InvalidAssignmentTarget { - found: TokenOwned, - position: Span, - }, - UnexpectedReturn { - position: Span, - }, - - // Variable errors - CannotMutateImmutableVariable { - identifier: String, - position: Span, - }, - ExpectedMutableVariable { - found: TokenOwned, - position: Span, - }, - UndeclaredVariable { - identifier: String, - position: Span, - }, - VariableOutOfScope { - identifier: String, - variable_scope: Scope, - access_scope: Scope, - position: Span, - }, - - // Type errors - CannotAddType { - argument_type: Type, - position: Span, - }, - CannotAddArguments { - left_type: Type, - right_type: Type, - position: Span, - }, - CannotDivideType { - argument_type: Type, - position: Span, - }, - CannotDivideArguments { - left_type: Type, - right_type: Type, - position: Span, - }, - CannotModuloType { - argument_type: Type, - position: Span, - }, - CannotModuloArguments { - left_type: Type, - right_type: Type, - position: Span, - }, - CannotMultiplyType { - argument_type: Type, - position: Span, - }, - CannotMultiplyArguments { - left_type: Type, - right_type: Type, - position: Span, - }, - CannotSubtractType { - argument_type: Type, - position: Span, - }, - CannotSubtractArguments { - left_type: Type, - right_type: Type, - position: Span, - }, - CannotResolveRegisterType { - register_index: usize, - position: Span, - }, - CannotResolveVariableType { - identifier: String, - position: Span, - }, - IfElseBranchMismatch { - conflict: TypeConflict, - position: Span, - }, - IfMissingElse { - position: Span, - }, - ListItemTypeConflict { - conflict: TypeConflict, - position: Span, - }, - ReturnTypeConflict { - conflict: TypeConflict, - position: Span, - }, - - // Chunk errors - ConstantIndexOutOfBounds { - index: usize, - position: Span, - }, - InstructionIndexOutOfBounds { - index: usize, - position: Span, - }, - LocalIndexOutOfBounds { - index: usize, - position: Span, - }, - - // Wrappers around foreign errors - Lex(LexError), - ParseFloatError { - error: ParseFloatError, - position: Span, - }, - ParseIntError { - error: ParseIntError, - position: Span, - }, -} - -impl AnnotatedError for CompileError { - fn title() -> &'static str { - "Compilation Error" - } - - fn description(&self) -> &'static str { - match self { - Self::CannotAddArguments { .. } => "Cannot add these types", - Self::CannotAddType { .. } => "Cannot add to this type", - Self::CannotChainComparison { .. } => "Cannot chain comparison operations", - Self::CannotDivideArguments { .. } => "Cannot divide these types", - Self::CannotDivideType { .. } => "Cannot divide this type", - Self::CannotModuloArguments { .. } => "Cannot modulo these types", - Self::CannotModuloType { .. } => "Cannot modulo this type", - Self::CannotMutateImmutableVariable { .. } => "Cannot mutate immutable variable", - Self::CannotMultiplyArguments { .. } => "Cannot multiply these types", - Self::CannotMultiplyType { .. } => "Cannot multiply this type", - Self::CannotResolveRegisterType { .. } => "Cannot resolve register type", - Self::CannotResolveVariableType { .. } => "Cannot resolve type", - Self::CannotSubtractType { .. } => "Cannot subtract from this type", - Self::CannotSubtractArguments { .. } => "Cannot subtract these types", - Self::ConstantIndexOutOfBounds { .. } => "Constant index out of bounds", - Self::ExpectedExpression { .. } => "Expected an expression", - Self::ExpectedFunction { .. } => "Expected a function", - Self::ExpectedFunctionType { .. } => "Expected a function type", - Self::ExpectedMutableVariable { .. } => "Expected a mutable variable", - Self::ExpectedToken { .. } => "Expected a specific token", - Self::ExpectedTokenMultiple { .. } => "Expected one of multiple tokens", - Self::IfElseBranchMismatch { .. } => "Type mismatch in if/else branches", - Self::IfMissingElse { .. } => "If statement missing else branch", - Self::InstructionIndexOutOfBounds { .. } => "Instruction index out of bounds", - Self::InvalidAssignmentTarget { .. } => "Invalid assignment target", - Self::Lex(error) => error.description(), - Self::ListItemTypeConflict { .. } => "List item type conflict", - Self::LocalIndexOutOfBounds { .. } => "Local index out of bounds", - Self::ParseFloatError { .. } => "Failed to parse float", - Self::ParseIntError { .. } => "Failed to parse integer", - Self::ReturnTypeConflict { .. } => "Return type conflict", - Self::UndeclaredVariable { .. } => "Undeclared variable", - Self::UnexpectedReturn { .. } => "Unexpected return", - Self::VariableOutOfScope { .. } => "Variable out of scope", - } - } - - fn details(&self) -> Option { - match self { - Self::CannotMutateImmutableVariable { identifier, .. } => { - Some(format!("{identifier} is immutable")) - } - Self::ExpectedExpression { found, .. } => Some(format!("Found {found}")), - Self::ExpectedFunction { found, actual_type, .. } => { - Some(format!("Expected \"{found}\" to be a function but it has type {actual_type}")) - } - Self::ExpectedFunctionType { found, .. } => { - Some(format!("Expected a function type but found {found}")) - } - Self::ExpectedToken { - expected, found, .. - } => Some(format!("Expected {expected} but found {found}")), - Self::ExpectedTokenMultiple { - expected, found, .. - } => { - let mut details = String::from("Expected"); - - for (index, token) in expected.iter().enumerate() { - details.push_str(&format!(" {token}")); - - if index < expected.len() - 2 { - details.push_str(", "); - } - - if index == expected.len() - 2 { - details.push_str(" or"); - } - } - - details.push_str(&format!(" but found {found}")); - - Some(details) - } - Self::ExpectedMutableVariable { found, .. } => Some(format!("Found {found}")), - Self::IfElseBranchMismatch { - conflict: TypeConflict { expected, actual }, - .. - } => Some( - format!("This if block evaluates to type \"{expected}\" but the else block evaluates to \"{actual}\"") - ), - Self::IfMissingElse { .. } => Some( - "This \"if\" expression evaluates to a value but is missing an else block" - .to_string(), - ), - Self::InvalidAssignmentTarget { found, .. } => { - Some(format!("Cannot assign to {found}")) - } - Self::Lex(error) => error.details(), - Self::ParseFloatError { error, .. } => Some(error.to_string()), - Self::ParseIntError { error, .. } => Some(error.to_string()), - Self::ReturnTypeConflict { - conflict: TypeConflict { expected, actual }, - .. - } => Some(format!( - "Expected return type \"{expected}\" but found \"{actual}\"" - )), - Self::UndeclaredVariable { identifier, .. } => { - Some(format!("{identifier} has not been declared")) - } - Self::UnexpectedReturn { .. } => None, - Self::VariableOutOfScope { identifier, .. } => { - Some(format!("{identifier} is out of scope")) - } - _ => None, - } - } - - fn position(&self) -> Span { - match self { - Self::CannotAddArguments { position, .. } => *position, - Self::CannotAddType { position, .. } => *position, - Self::CannotChainComparison { position } => *position, - Self::CannotDivideArguments { position, .. } => *position, - Self::CannotDivideType { position, .. } => *position, - Self::CannotModuloArguments { position, .. } => *position, - Self::CannotModuloType { position, .. } => *position, - Self::CannotMutateImmutableVariable { position, .. } => *position, - Self::CannotMultiplyArguments { position, .. } => *position, - Self::CannotMultiplyType { position, .. } => *position, - Self::CannotResolveRegisterType { position, .. } => *position, - Self::CannotResolveVariableType { position, .. } => *position, - Self::CannotSubtractArguments { position, .. } => *position, - Self::CannotSubtractType { position, .. } => *position, - Self::ConstantIndexOutOfBounds { position, .. } => *position, - Self::ExpectedExpression { position, .. } => *position, - Self::ExpectedFunction { position, .. } => *position, - Self::ExpectedFunctionType { position, .. } => *position, - Self::ExpectedMutableVariable { position, .. } => *position, - Self::ExpectedToken { position, .. } => *position, - Self::ExpectedTokenMultiple { position, .. } => *position, - Self::IfElseBranchMismatch { position, .. } => *position, - Self::IfMissingElse { position } => *position, - Self::InstructionIndexOutOfBounds { position, .. } => *position, - Self::InvalidAssignmentTarget { position, .. } => *position, - Self::Lex(error) => error.position(), - Self::ListItemTypeConflict { position, .. } => *position, - Self::LocalIndexOutOfBounds { position, .. } => *position, - Self::ParseFloatError { position, .. } => *position, - Self::ParseIntError { position, .. } => *position, - Self::ReturnTypeConflict { position, .. } => *position, - Self::UndeclaredVariable { position, .. } => *position, - Self::UnexpectedReturn { position } => *position, - Self::VariableOutOfScope { position, .. } => *position, - } - } -} - -impl From for CompileError { - fn from(error: LexError) -> Self { - Self::Lex(error) - } -} diff --git a/dust-lang/src/compiler/optimize.rs b/dust-lang/src/compiler/optimize.rs index 2249485..4f64332 100644 --- a/dust-lang/src/compiler/optimize.rs +++ b/dust-lang/src/compiler/optimize.rs @@ -20,11 +20,11 @@ use crate::{Compiler, Operation}; /// ``` /// /// The instructions must be in the following order: -/// - `Equal`, `Less` or `LessEqual` -/// - `Test` -/// - `Jump` -/// - `LoadBoolean` -/// - `LoadBoolean` +/// - `EQUAL`, `LESS` or `LESS_EQUAL` +/// - `TEST` +/// - `JUMP` +/// - `LOAD_BOOLEAN` +/// - `LOAD_BOOLEAN` pub fn optimize_test_with_explicit_booleans(compiler: &mut Compiler) { if matches!( compiler.get_last_operations(), @@ -54,7 +54,7 @@ pub fn optimize_test_with_explicit_booleans(compiler: &mut Compiler) { /// Optimizes a control flow pattern. /// -/// Test instructions (which are always followed by a jump) can be optimized when the next +/// TEST instructions (which are always followed by a JUMP) can be optimized when the next /// instructions are two constant or boolean loaders. The first loader is set to skip an instruction /// if it is run while the second loader is modified to use the first's register. Foregoing the use /// a jump instruction is an optimization but consolidating the registers is a necessity. This is @@ -62,10 +62,10 @@ pub fn optimize_test_with_explicit_booleans(compiler: &mut Compiler) { /// would not know at compile time which branch would be executed at runtime. /// /// The instructions must be in the following order: -/// - `Test` -/// - `Jump` -/// - `LoadBoolean` or `LoadConstant` -/// - `LoadBoolean` or `LoadConstant` +/// - `TEST` +/// - `JUMP` +/// - `LOAD_BOOLEAN` or `LOAD_CONSTANT` +/// - `LOAD_BOOLEAN` or `LOAD_CONSTANT` pub fn optimize_test_with_loader_arguments(compiler: &mut Compiler) { if !matches!( compiler.get_last_operations(), diff --git a/dust-lang/src/dust_error.rs b/dust-lang/src/dust_error.rs index 4db2a64..a7c851f 100644 --- a/dust-lang/src/dust_error.rs +++ b/dust-lang/src/dust_error.rs @@ -2,7 +2,8 @@ //! annotations. use std::fmt::{self, Display, Formatter}; -use annotate_snippets::{Level, Renderer, Snippet}; +use annotate_snippets::{Annotation, Level, Renderer, Snippet}; +use smallvec::SmallVec; use crate::{CompileError, Span, VmError}; @@ -29,14 +30,18 @@ impl<'src> DustError<'src> { } pub fn report(&self) -> String { - let (position, title, description, details) = self.error_data(); + let (title, description, detail_snippets, help_snippets) = self.error_data(); let label = format!("{}: {}", title, description); - let details = details.unwrap_or_else(|| "While parsing this code".to_string()); - let message = Level::Error.title(&label).snippet( - Snippet::source(self.source()) - .fold(false) - .annotation(Level::Error.span(position.0..position.1).label(&details)), - ); + let message = Level::Error + .title(&label) + .snippets(detail_snippets.iter().map(|(details, position)| { + Snippet::source(self.source()) + .annotation(Level::Info.span(position.0..position.1).label(details)) + })) + .snippets(help_snippets.iter().map(|(help, position)| { + Snippet::source(self.source()) + .annotation(Level::Help.span(position.0..position.1).label(help)) + })); let mut report = String::new(); let renderer = Renderer::styled(); @@ -45,19 +50,26 @@ impl<'src> DustError<'src> { report } - fn error_data(&self) -> (Span, &str, &str, Option) { + fn error_data( + &self, + ) -> ( + &str, + &str, + SmallVec<[(String, Span); 2]>, + SmallVec<[(String, Span); 2]>, + ) { match self { Self::Compile { error, .. } => ( - error.position(), CompileError::title(), error.description(), - error.details(), + error.detail_snippets(), + error.help_snippets(), ), Self::Runtime { error, .. } => ( - error.position(), VmError::title(), error.description(), - error.details(), + error.detail_snippets(), + error.help_snippets(), ), } } @@ -79,6 +91,6 @@ impl Display for DustError<'_> { pub trait AnnotatedError { fn title() -> &'static str; fn description(&self) -> &'static str; - fn details(&self) -> Option; - fn position(&self) -> Span; + fn detail_snippets(&self) -> SmallVec<[(String, Span); 2]>; + fn help_snippets(&self) -> SmallVec<[(String, Span); 2]>; } diff --git a/dust-lang/src/lexer.rs b/dust-lang/src/lexer.rs index bf213a5..685f1fa 100644 --- a/dust-lang/src/lexer.rs +++ b/dust-lang/src/lexer.rs @@ -3,9 +3,6 @@ //! This module provides two lexing options: //! - [`lex`], which lexes the entire input and returns a vector of tokens and their positions //! - [`Lexer`], which lexes the input a token at a time - -use std::fmt::{self, Display, Formatter}; - use serde::{Deserialize, Serialize}; use crate::{dust_error::AnnotatedError, CompileError, DustError, Span, Token}; @@ -747,65 +744,12 @@ impl AnnotatedError for LexError { } } - fn details(&self) -> Option { - match self { - Self::ExpectedAsciiHexDigit { actual, .. } => Some(format!( - "Expected ASCII hex digit (0-9 or A-F), found \"{}\"", - actual - .map(|character| character.to_string()) - .unwrap_or("end of input".to_string()) - )), - Self::ExpectedCharacter { - expected, actual, .. - } => Some(format!( - "Expected character \"{}\", found \"{}\"", - expected, actual - )), - Self::ExpectedCharacterMultiple { - expected, actual, .. - } => { - let mut details = "Expected one of the following characters ".to_string(); - - for (i, c) in expected.iter().enumerate() { - if i == expected.len() - 1 { - details.push_str(", or "); - } else if i > 0 { - details.push_str(", "); - } - details.push(*c); - } - - details.push_str(&format!(" but found {}", actual)); - - Some(details) - } - Self::UnexpectedCharacter { actual, .. } => { - Some(format!("Unexpected character \"{}\"", actual)) - } - Self::UnexpectedEndOfFile { .. } => Some("Unexpected end of file".to_string()), - } + fn detail_snippets(&self) -> smallvec::SmallVec<[(String, Span); 2]> { + todo!() } - fn position(&self) -> Span { - match self { - Self::ExpectedAsciiHexDigit { position, .. } => Span(*position, *position), - Self::ExpectedCharacter { position, .. } => Span(*position, *position), - Self::ExpectedCharacterMultiple { position, .. } => Span(*position, *position), - Self::UnexpectedCharacter { position, .. } => Span(*position, *position), - Self::UnexpectedEndOfFile { position } => Span(*position, *position), - } - } -} - -impl Display for LexError { - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - write!(f, "{}", self.description())?; - - if let Some(details) = self.details() { - write!(f, ": {}", details)?; - } - - Ok(()) + fn help_snippets(&self) -> smallvec::SmallVec<[(String, Span); 2]> { + todo!() } } diff --git a/dust-lang/src/native_function/mod.rs b/dust-lang/src/native_function/mod.rs index 5968648..30cd8cb 100644 --- a/dust-lang/src/native_function/mod.rs +++ b/dust-lang/src/native_function/mod.rs @@ -289,25 +289,11 @@ impl AnnotatedError for NativeFunctionError { } } - fn details(&self) -> Option { - match self { - NativeFunctionError::ExpectedArgumentCount { - expected, found, .. - } => Some(format!("Expected {} arguments, found {}", expected, found)), - NativeFunctionError::Panic { message, .. } => message.clone(), - NativeFunctionError::Parse { error, .. } => Some(format!("{}", error)), - NativeFunctionError::Io { error, .. } => Some(format!("{}", error)), - NativeFunctionError::Vm(error) => error.details(), - } + fn detail_snippets(&self) -> SmallVec<[(String, Span); 2]> { + todo!() } - fn position(&self) -> Span { - match self { - NativeFunctionError::ExpectedArgumentCount { position, .. } => *position, - NativeFunctionError::Panic { position, .. } => *position, - NativeFunctionError::Parse { position, .. } => *position, - NativeFunctionError::Io { position, .. } => *position, - NativeFunctionError::Vm(error) => error.position(), - } + fn help_snippets(&self) -> SmallVec<[(String, Span); 2]> { + todo!() } } diff --git a/dust-lang/src/vm.rs b/dust-lang/src/vm.rs index e5c058a..25106eb 100644 --- a/dust-lang/src/vm.rs +++ b/dust-lang/src/vm.rs @@ -911,39 +911,11 @@ impl AnnotatedError for VmError { } } - fn details(&self) -> Option { - match self { - Self::EmptyRegister { index, .. } => Some(format!("Register R{index} is empty")), - Self::ExpectedFunction { found, .. } => Some(format!("{found} is not a function")), - - Self::RegisterIndexOutOfBounds { index, .. } => { - Some(format!("Register {index} does not exist")) - } - Self::NativeFunction(error) => error.details(), - Self::Value { error, .. } => Some(error.to_string()), - Self::ValueDisplay { error, .. } => Some(error.to_string() + " while displaying value"), - _ => None, - } + fn detail_snippets(&self) -> SmallVec<[(String, Span); 2]> { + todo!() } - fn position(&self) -> Span { - match self { - Self::ConstantIndexOutOfBounds { position, .. } => *position, - Self::EmptyRegister { position, .. } => *position, - Self::ExpectedBoolean { position, .. } => *position, - Self::ExpectedConcreteValue { position, .. } => *position, - Self::ExpectedFunction { position, .. } => *position, - Self::ExpectedParent { position } => *position, - Self::ExpectedValue { position, .. } => *position, - Self::InstructionIndexOutOfBounds { position, .. } => *position, - Self::LocalIndexOutOfBounds { position, .. } => *position, - Self::NativeFunction(error) => error.position(), - Self::RegisterIndexOutOfBounds { position, .. } => *position, - Self::StackOverflow { position } => *position, - Self::StackUnderflow { position } => *position, - Self::UndefinedLocal { position, .. } => *position, - Self::Value { position, .. } => *position, - Self::ValueDisplay { position, .. } => *position, - } + fn help_snippets(&self) -> SmallVec<[(String, Span); 2]> { + todo!() } } diff --git a/wl-copy b/wl-copy new file mode 100644 index 0000000..7051543 --- /dev/null +++ b/wl-copy @@ -0,0 +1,7 @@ + Finished `dev` profile [optimized + debuginfo] target(s) in 0.02s + Running `target/debug/dust -c '42 + true'` +error: Compilation Error: Cannot add to this type + | +1 | 42 + true + | ^^^^ While parsing this code + |