diff --git a/Cargo.lock b/Cargo.lock
index 94f61d5..57c926e 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -190,6 +190,27 @@ version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15"
+[[package]]
+name = "color-print"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3aa954171903797d5623e047d9ab69d91b493657917bdfb8c2c80ecaf9cdb6f4"
+dependencies = [
+ "color-print-proc-macro",
+]
+
+[[package]]
+name = "color-print-proc-macro"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "692186b5ebe54007e45a59aea47ece9eb4108e141326c304cdc91699a7118a22"
+dependencies = [
+ "nom",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "colorchoice"
version = "1.0.3"
@@ -299,7 +320,7 @@ name = "dust-cli"
version = "0.5.0"
dependencies = [
"clap 4.5.20",
- "colored",
+ "color-print",
"dust-lang",
"env_logger",
"log",
@@ -495,6 +516,22 @@ version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
[[package]]
name = "num-traits"
version = "0.2.19"
diff --git a/README.md b/README.md
index f1d2026..05e26f0 100644
--- a/README.md
+++ b/README.md
@@ -15,9 +15,15 @@ optimization strategies and virtual machine are based on Lua. Unlike Rust and ot
compile to machine code, Dust has a very low time to execution. Unlike Lua and most other
interpreted languages, Dust enforces static typing to improve clarity and prevent bugs. While some
languages currently offer high-level features and strict typing (e.g. TypeScript), Dust has a simple
-approach to syntax that offers flexibility and expressiveness while still being *obvious* an
-audience of programmers, even those who don't know the language. Dust is for programmers who prefer
-their code to be simple and clear rather than complex and clever.
+approach to syntax that offers flexibility and expressiveness while still being *obvious*, even
+those who know how to code but don't know the language. Dust is developed with an emphasis on
+achieving foundational soundness before adding new features. Dust's planned features and design
+favor programmers who prefer their code to be simple and clear rather than clever and complex.
+
+**Dust is under active development and is not yet ready for general use.**
+
+**Features discussed in this README may be unimplemented, partially implemented or temporarily
+removed**
```rust
write_line("Enter your name...")
@@ -29,8 +35,11 @@ write_line("Hello " + name + "!")
```rust
fn fib (n: int) -> int {
- if n <= 0 { return 0 }
- if n == 1 { return 1 }
+ if n <= 0 {
+ return 0
+ } else if n == 1 {
+ return 1
+ }
fib(n - 1) + fib(n - 2)
}
@@ -38,80 +47,98 @@ fn fib (n: int) -> int {
write_line(fib(25))
```
-Dust uses a register-based VM with its own set of 32-bit instructions and a custom compiler to emit
-the instructions. This should not be confused with a machine code compiler. Despite its compile-time
-guarantees, Dust falls into the category of interpreted languages. Competing with the runtime
-performance of Rust or C++ *is not* a goal. Competing with the approachability and simplicity of
-those languages *is* a goal. On the other hand Dust *does* intend to be faster than Python, Ruby and
-NodeJS while also offering a superior development experience and more reliable code due to its
-static typing. Dust's development approach is informed by some books[^1] and
-academic research[^4] as well as practical insight from papers[^2] written by language authors.
-See the [Inspiration](README#Inspiration) section for more information or keep reading to learn
-about Dust's features.
+Dust uses a custom register-based virtual machine with its own set of instructions and a compiler
+based on recursive descet to emit them. This should not be confused with a machine code compiler.
+Despite having **compile-time guarantees**, Dust falls into the category of interpreted languages.
+Competing with the runtime performance of Rust or C++ *is not* a goal. Competing with the
+approachability and simplicity of those languages *is* a goal. On the other hand Dust *does* intend
+to be faster than Python, Ruby and NodeJS while also offering a superior development experience and
+more reliable code due to its static typing. Dust's development approach is informed by some
+books[^1] and academic research[^4] as well as practical insight from papers[^2] written by language
+authors. See the [Inspiration](README#Inspiration) section for more information or keep reading to
+learn about Dust's features.
## Goals
-This project has lofty goals. In addition to being a wishlist, these goals should be used to provide
-a framework for driving the project forward and making decisions about what to prioritize.
+This project's goal is to deliver a language that not only *works* but that offers genunine value
+due to a unique combination of design choices and a high-quality implementation. As mentioned in the
+first sentence, Dust's general aspirations are to be **fast**, **safe** and **easy**.
-- **Fast Compilation**: Despite its compile-time abstractions, Dust should compile and start
- executing quickly. The compilation time should feel negligible to the user.
-- **Fast Execution**: Dust should be generally faster than Python, Ruby and NodeJS. It should be
- competitive with other modern register-based VM languages like Lua and JavaScript Core.
-- **Safety**: Static types should prevent runtime errors and improve code quality, offering a
- superior development experience despite some additional constraints.
-- **Approachability**: Dust should be easier to learn than Rust or C++. Its syntax should be
- familiar to users of other C-like languages to the point that even a new user can read Dust code
- and understand what it does.
-- **Web Assembly Support**: The `dust` executable and, by extension, the `dust-lang` library, should
- be able to able to compile to WebAssembly and Dust should be able to run in a browser with WASM
- support. While running on the browser offers some fun opportunities, this is primarally a goal
- because of WASM's potential to become a general-purpose cross-platform runtime.
-- **Extended Type System**: Beyond specifying the types of variables and function arguments, Dust
- should offer a rich yet simple type system that allows users to define their own types and compose
- them with static guarantees about their identity and behavior.
-- **Excellent Errors**: Dust should provide helpful error messages that guide the user to the source
- of the problem and suggest a solution. Errors should be a helpful learning ressource for users
- rather than a source of frustration.
-- **High-Quality Documentation**: Dust's documentation should be easy to locate and understand.
- Users should feel confident that the documentation is up-to-date and accurate.
-- **All-In-One Binary**: The `dust` executable should aspire to be the only tool a user needs to run
- Dust code, visualize Dust programs, compile them to intermediate representations, analyze runtime
- behavior, run a REPL, format code and more as the scope of the project grows. Similar CLI tools
- like Cargo and Bun have set a high standard for what a single executable can do.
-- **Advanced Goals**: Dust could one day grow to the point that users will want to share their
- libraries and distribute their programs. In the unlikely event that Dust becomes popular, it could
- warrant an ecosystem consisting of package management with a central repository, a standard
- library, a community of users and an organization to maintain the language. These are not within
- the scope of the project at this time but it may be possible one day if the project is able to
- realize its other goals. This is included here for maximum ambitiousness.
+- **Easy**
+ - **Simple Syntax** Dust should be easier to learn than most programming languages. Its syntax
+ should be familiar to users of other C-like languages to the point that even a new user can read
+ Dust code and understand what it does. Rather than being dumbed down by a lack of features, Dust
+ should be powerful and elegant in its simplicity, seeking a maximum of capability with a minimum
+ of complexity. When advanced features are added, they should never obstruct existing features,
+ including readability. Even the advanced type system should be clear and unintimidating.
+ - **Excellent Errors** Dust should provide helpful error messages that guide the user to the
+ source of the problem and suggest a solution. Errors should be a helpful learning ressource for
+ users rather than a source of frustration.
+ - **Relevant Documentation** Users should have the resources they need to learn Dust and write
+ code in it. They should know where to look for answers and how to reach out for help.
+- **Safe**
+ - **Static Types** Typing should prevent runtime errors and improve code quality, offering a
+ superior development experience despite some additional constraints. Like any good statically
+ typed language, users should feel confident in the type-consistency of their code and not want
+ to go back to a dynamically typed language.
+ - **Memory Safety** Dust should be free of memory bugs. Being implemented in Rust makes this easy
+ but, to accomodate long-running programs, Dust still requires a memory management strategy.
+ Dust's design is to use a separate thread for garbage collection, allowing the main thread to
+ continue executing code while the garbage collector looks for unused memory.
+- **Fast**
+ - **Fast Compilation** Despite its compile-time abstractions, Dust should compile and start
+ executing quickly. The compilation time should feel negligible to the user.
+ - **Fast Execution** Dust should be generally faster than Python, Ruby and NodeJS. It should be
+ competitive with highly optimized, modern, register-based VM languages like Lua. Dust should
+ be benchmarked during development to inform decisions about performance.
+ - **Low Resource Usage** Despite its performance, Dust's use of memory and CPU power should be
+ conservative and predictable enough to accomodate a wide range of devices.
-## Non-Goals
+These are the project's general design goals. There are many more implementation goals. Among them
+are:
-Some features are simply out of scope for Dust. As a project's design becomes an implementation,
-decisions about what a project *will not* do are required to clarify the project's direction and
-purpose for both the developers and the users.
-
-- **Machine Code Compilation**: Dust is not intended to compete with Rust or C++ in terms of runtime
- performance.
-- **Complex Abstractions**: Dust will not introduce users to new, exotic syntax or convoluted
- patterns that reduce the clarity of a program. Dust will not support complex paradigm-specific
- abstractions like inheritance or currying. Dust will remain neither object-oriented nor
- functional, preferring to expand its features without committing to a single paradigm.
-- **Gradual Typing**: Dust's compiler handles the complexities of *static* typing and all value and
- variable types are known before a program runs. The VM is and should remain type-agnostic, leaving
- it to the sole responsibility of execution.
+ - Effortless Concurrency: Dust should offer an excellent experience for writing multi-threaded
+ programs. The language's native functions should offer an API for spawning threads, sending
+ messages and waiting for results. When using these features, Dust should be much faster than any
+ single-threaded language. However, Dust should be fast even when running on a single thread.
+ Single-threaded performce is the best predictor of multi-threaded performance so continuing to
+ optimize how each thread executes instructions, accesses memory and moves pointers is the best
+ way to ensure that Dust is fast in all scenarios.
+ - Embeddability: The library should be easy to use so that Dust can be built into other
+ applications. Dust should compile to WebAssembly and offer examples of how to use it in a web
+ application. The user should be able to query the VM for information about the program's state
+ and control the program's execution. It should be possible to view and modify the value of a
+ variable and inspect the call stack.
+ - Data Fluency: Dust's value type should support conversion to and from arbitrary data in formats
+ like JSON, YAML, TOML and CSV. Pulling data into a Dust program should be easy, with built-in
+ functions offering conversion for the most widely used formats.
+ - Portability: Dust should run on as many architectures and operating systems as possible. Using
+ fewer dependencies and avoiding platform-specific code will help Dust achieve this goal. The
+ Dust library should be available as a WebAssembly module.
+ - Developer Experience: Dust should be fun and easy to use. That implies easy installation and the
+ availability of tutorials and how-to guides. The CLI should be predictable and feature-rich,
+ with features that make it easy to write and debug Dust code like formatting, bytecode
+ disassembly and logging.
+ - Advanced Type System: Dust should implement composite types, aliases and generics. The type
+ system should use a descriptive syntax that is easy to understand. Dust's type system should be
+ static, meaning that types are checked before a program reaches the VM. Dust is not a
+ graduallly typed language, its VM is and should remain type-agnostic.
+ - Thorough Testing: Primarily, the output of Dust's compiler and VM should be tested with programs
+ that cover all of the language's features. The tests should be actively maintained and should be
+ changed frequently to reflect a growing project that is constantly discovering new optimizations
+ and opportunities for improvement.
## Project Status
-**Dust is under active development and is not yet ready for general use.**
+This project is maintained by a single developer. For now, its primary home is on a private git
+server. The GitHub mirror is updated automatically and should carry the latest branches. There are
+no other contributors at this time but the project is open to feedback and should eventually accept
+contributions.
-**Features discussed in this README may be unimplemented, partially implemented, temporarily removed
-or only available on a seperate branch.**
-
-Dust is an ambitious project that acts as a continuous experiment in language design. Features may
-be redesigned and reimplemented at will when they do not meet the project's performance or
-usability goals. This approach maximizes the development experience as a learning opportunity and
+For now, both the library API and the implementation details are freely changed and the CLI has not
+been published. Dust is both an ambitious project and a continuous experiment in language design.
+Features may be redesigned and reimplemented at will when they do not meet the project's performance
+or usability goals. This approach maximizes the development experience as a learning opportunity and
enforces a high standard of quality but slows down the process of delivering features to users.
Eventually, Dust will reach a stable release and will be ready for general use. As the project
approaches this milestone, the experimental nature of the project will be reduced and a replaced
@@ -119,15 +146,33 @@ with a focus on stability and improvement.
## Language Overview
-### Syntax
+This is a quick overview of Dust's syntax features. It skips over the aspects that are familiar to
+most programmers such as creating variables, using binary operators and printing to the console.
+Eventually there should be a complete reference for the syntax.
+
+### Syntax and Evaluation
Dust belongs to the C-like family of languages[^5], with an imperative syntax that will be familiar
to many programmers. Dust code looks a lot like Ruby, JavaScript, TypeScript and other members of
the family but Rust is its primary point of reference for syntax. Rust was chosen as a syntax model
-because its imperative code is *obvious* and *familiar*. Those qualities are aligned with Dust's
-emphasis on safety and usability. However, some differences exist because Dust is a simpler language
-that can tolerate more relaxed syntax. The most significant difference between Dust's syntax and
-evaluation model and Rust's is the handling of semicolons.
+because its imperative code is *obvious by design* and *widely familiar*. Those qualities are
+aligned with Dust's emphasis on usability.
+
+However, some differences exist. Dust *evaluates* all of the code in the file while Rust only
+initiates from a "main" function. Dust's execution model is more like one found in a scripting
+language. If we put `42 + 42 == 84` into a file and run it, it will return `true` because the outer
+context is, in a sense, the "main" function.
+
+So while the syntax is by no means compatible, it is superficially similar, even to the point that
+syntax highlighting for Rust code works well with Dust code. This is not a design goal but a happy
+coincidence.
+
+### Semicolons
+
+Dust borrowed Rust's approach to semicolons and their effect on evaluation and relaxed the rules to
+accomated different styles of coding. Rust, for example, isn't design for command lines or REPLs but
+Dust could be well-suited to those applications. Dust needs to work in a source file or in an ad-hoc
+one-liner sent to the CLI. Thus, semicolons are optional in most cases.
There are two things you need to know about semicolons in Dust:
@@ -147,7 +192,7 @@ let b = 2;
write_line("The answer is ", a + b);
```
-Removing the semicolons does not alter the execution pattern.
+Removing the semicolons does not alter the execution pattern or the return value.
```rust
let x = 10
@@ -167,15 +212,16 @@ let input = read_line()
let reward = if input == "42" {
write_line("You got it! Here's your reward.")
- 777
+ 777 // <- We need a semicolon here
} else {
write_line(input, " is not the answer.")
-};
+}
```
-Understanding that semicolons suppress values is also important for understanding Dust's evaluation
-model. Dust is composed of statements and expressions. If a statement ends in an expression without
-a trailing semicolon, the statement evaluates to the value produced by that expression. However, if
+### Statements and Expressions
+
+Dust is composed of statements and expressions. If a statement ends in an expression without a
+trailing semicolon, the statement evaluates to the value produced by that expression. However, if
the expression's value is suppressed with a semicolon, the statement does not evaluate to a value.
This is identical to Rust's evaluation model. That means that the following code will not compile:
@@ -187,13 +233,14 @@ let a = { 40 + 2; }
The `a` variable is assigned to the value produced by a block. The block contains an expression that
is suppressed by a semicolon, so the block does not evaluate to a value. Therefore, the `a` variable
would have to be uninitialized (which Dust does not allow) or result in a runtime error (which Dust
-avoids at all costs). We can fix this code by movinf the semicolon to the end of the block. In this
-position it suppresses the value of the entire `let` statement. The above examples showed that a
-`let` statement never evaluates to a value, so the semicolon has no effect on the program's behavior
-and could be omitted altogether.
+avoids at all costs). We can fix this code by moving the semicolon to the end of the block. In this
+position it suppresses the value of the entire `let` statement. As we saw above, a `let` statement
+never evaluates to a value, so the semicolon has no effect on the program's behavior and could be
+omitted altogether.
```rust
let a = { 40 + 2 }; // This is fine
+let a = { 40 + 2 } // This is also fine
```
Only the final expression in a block is returned. When a `let` statement is combined with an
@@ -219,11 +266,17 @@ program could be modified to return no value by simply adding a semicolon at the
Compared to JavaScript, Dust's evaluation model is more predictable, less error-prone and will never
trap the user into a frustating hunt for a missing semicolon. Compared to Rust, Dust's evaluation
-model is essentialy the same but with more relaxed rules about semicolons. In JavaScript, semicolons
-are both *required* and *meaningless*, which is a source of confusion for many developers. In Rust,
-they are *required* and *meaningful*, which provides excellent consistency but lacks flexibility.
+model is more accomodating without sacrificing expressiveness. In Rust, semicolons are *required*
+and *meaningful*, which provides excellent consistency but lacks flexibility. In JavaScript,
+semicolons are *required* and *meaningless*, which is a source of confusion for many developers.
-### Safety
+### Control Flow
+
+-- TODO --
+
+### Functions
+
+-- TODO --
#### Type System
@@ -245,11 +298,13 @@ from a function, expression or statement. A variable cannot be assigned to `none
#### Immutability by Default
+TODO
+
#### Memory Safety
-
+TODO
-### Values, Variables and Types
+### Basic Values
Dust supports the following basic values:
@@ -269,288 +324,19 @@ singular values. Shorter strings are stored on the stack while longer strings ar
Dust offers built-in native functions that can manipulate strings by accessing their bytes or
reading them as a sequence of characters.
-
+### Composite Values
-## Feature Progress
-
-This list is a rough outline of the features that are planned to be implemented as soon as possible.
-*This is **not** an exhaustive list of all planned features.* This list is updated and rearranged to
-maintain a docket of what is being worked on, what is coming next and what can be revisited later.
-
-- [X] Lexer
-- [X] Compiler
-- [X] VM
-- [X] Disassembler (for chunk debugging)
-- [ ] Formatter
-- [ ] CLI REPL
-- [X] Compile dust's binary and library to WASM
-- [ ] Browser-based REPL
-- CLI
- - [X] Run source
- - [X] Compile source to a chunk and show disassembly
- - [X] Tokenize using the lexer and show token list
- - [ ] Format using a built-in formatter
- - [ ] Compile to and run from intermediate formats
- - [ ] JSON
- - [ ] Postcard
- - [ ] Integrated REPL
-- Basic Values
- - [X] No `null` or `undefined` values
- - [X] Booleans
- - [X] Bytes (unsigned 8-bit)
- - [X] Characters (Unicode scalar value)
- - [X] Floats (64-bit)
- - [X] Functions
- - [X] Integers (signed 64-bit)
- - [X] Strings (UTF-8)
-- Composite Values
- - [X] Concrete lists
- - [X] Abstract lists (optimization)
- - [ ] Concrete maps
- - [ ] Abstract maps (optimization)
- - [ ] Ranges
- - [ ] Tuples (fixed-size constant lists)
- - [ ] Structs
- - [ ] Enums
-- Types
- - [X] Basic types for each kind of basic value
- - [X] Generalized types: `num`, `any`, `none`
- - [ ] Type conversion (safe, explicit and coercion-free)
- - [ ] `struct` types
- - [ ] `enum` types
- - [ ] Type aliases
- - [ ] Type arguments
- - [ ] Compile-time type checking
- - [ ] Function returns
- - [X] If/Else branches
- - [ ] Instruction arguments
-- Variables
- - [X] Immutable by default
- - [X] Block scope
- - [X] Statically typed
- - [X] Copy-free identifiers are stored in the chunk as string constants
-- Functions
- - [X] First-class value
- - [X] Statically typed arguments and returns
- - [X] Pure (no "closure" of local variables, arguments are the only input)
- - [ ] Type arguments
-- Control Flow
- - [X] If/Else
- - [ ] Match
- - [ ] Loops
- - [ ] `for`
- - [ ] `loop`
- - [X] `while`
-- Native Functions
- - Assertions
- - [X] `assert`
- - [ ] `assert_eq`
- - [ ] `assert_ne`
- - [ ] `panic`
- - I/O
- - [ ] `read`
- - [X] `read_line`
- - [X] `write`
- - [X] `write_line`
- - Miniature Standard Library of Native Functions
- - [ ] Byte Functions
- - [ ] Character Functions
- - [ ] Float Functions
- - [ ] Integer Functions
- - [ ] String Functions
- - [ ] List Functions
- - [ ] Map Functions
- - [ ] Math Functions
- - [ ] Filesystem Functions
- - [ ] Network Functions
- - [ ] System Functions
- - [ ] Randomization Functions
-
-## Implementation
-
-Dust is implemented in Rust and is divided into several parts, most importantly the lexer, compiler,
-and virtual machine. All of Dust's components are designed with performance in mind and the codebase
-uses as few dependencies as possible. The code is tested by integration tests that compile source
-code and check the compiled chunk, then run the source and check the output of the virtual machine.
-It is important to maintain a high level of quality by writing meaningful tests and preferring to
-compile and run programs in an optimal way before adding new features.
-
-### Command Line Interface
-
-Dust's command line interface and developer experience are inspired by tools like Bun and especially
-Cargo, the Rust package manager that includes everything from project creation to documentation
-generation to code formatting to much more. Dust's CLI has started by exposing the most imporant
-features for debugging and developing the language itself. Tokenization, compiling, disassembling
-and running Dust code are currently supported. The CLI will eventually support a REPL, code
-formatting, linting and other features that enhance the development experience and make Dust more
-fun and easy to use.
-
-### Lexer and Tokens
-
-The lexer emits tokens from the source code. Dust makes extensive use of Rust's zero-copy
-capabilities to avoid unnecessary allocations when creating tokens. A token, depending on its type,
-may contain a reference to some data from the source code. The data is only copied in the case of an
-error. In a successfully executed program, no part of the source code is copied unless it is a
-string literal or identifier.
-
-### Compiler
-
-The compiler creates a chunk, which contains all of the data needed by the virtual machine to run a
-Dust program. It does so by emitting bytecode instructions, constants and locals while parsing the
-tokens, which are generated one at a time by the lexer.
-
-#### Parsing
-
-Dust's compiler uses a custom Pratt parser, a kind of recursive descent parser, to translate a
-sequence of tokens into a chunk. Each token is given a precedence and may have a prefix and/or infix
-parser. The parsers are just functions that modify the compiler and its output. For example, when
-the compiler encounters a boolean token, its prefix parser is the `parse_boolean` function, which
-emits a `LoadBoolean` instruction. An integer token's prefix parser is `parse_integer`, which emits
-a `LoadConstant` instruction and adds the integer to the constants list. Tokens with infix parsers
-include the math operators, which emit `Add`, `Subtract`, `Multiply`, `Divide`, `Modulo` and `Power`
-instructions.
-
-Functions are compiled into their own chunks, which are stored in the constant list. A function's
-arguments are stored in its locals list. Before the function is run, the VM must bind the arguments
-to values by filling locals' corresponding registers. Instead of copying the arguments, the VM uses
-a pointer to one of the parent's registers or constants.
-
-#### Instruction Optimization
-
-When generating instructions for a register-based virtual machine, there are opportunities to
-optimize the generated code by using fewer instructions or fewer registers. While it is best to
-output optimal code in the first place, it is not always possible. Dust's uses a single-pass
-compiler and therefore applies optimizations immeadiately after the opportunity becomes available.
-There is no separate optimization pass and the compiler cannot be run in a mode that disables
-optimizations.
-
-#### Type Checking
-
-Dust's compiler associates each emitted instruction with a type. This allows the compiler to enforce
-compatibility when values are used in expressions. For example, the compiler will not allow a string
-to be added to an integer, but it will allow either to be added to another of the same type. Aside
-from instruction arguments, the compiler also checks the types of function arguments and the blocks
-of `if`/`else` statements.
-
-The compiler always checks types on the fly, so there is no need for a separate type-checking pass.
-Type information is removed from the instructions list before the chunk is created, so the VM (which
-is entirely type-agnostic) never sees it.
-
-### Instructions
-
-Dust's virtual machine uses 32-bit instructions, which encode seven pieces of information:
-
-Bit | Description
------ | -----------
-0-4 | Operation code
-5 | Flag indicating if the B field is a constant
-6 | Flag indicating if the C field is a constant
-7 | D field (boolean)
-8-15 | A field (unsigned 8-bit integer)
-16-23 | B field (unsigned 8-bit integer)
-24-31 | C field (unsigned 8-bit integer)
-
-#### Operations
-
-The 1.0 version of Dust will have more than the current number of operations but cannot exceed 32
-because of the 5 bit format.
-
-##### Stack manipulation
-
-- MOVE: Makes a register's value available in another register by using a pointer. This avoids
- copying the value or invalidating the original register.
-- CLOSE: Sets a range of registers to the "empty" state.
-
-##### Value loaders
-
-- LOAD_BOOLEAN: Loads a boolean to a register. Booleans known at compile-time are not stored in the
- constant list. Instead, they are encoded in the instruction itself.
-- LOAD_CONSTANT: Loads a constant from the constant list to a register. The VM avoids copying the
- constant by using a pointer with the constant's index.
-- LOAD_LIST: Creates a list abstraction from a range of registers and loads it to a register.
-- LOAD_MAP: Creates a map abstraction from a range of registers and loads it to a register.
-- LOAD_SELF: Creates an abstraction that represents the current function and loads it to a register.
-
-##### Variable operations
-
-- GET_LOCAL: Loads a variable's value to a register by using a pointer to point to the variable's
- canonical register (i.e. the register whose index is stored in the locals list).
-- SET_LOCAL: Changes a variable's register to a pointer to another register, effectively changing
- the variable's value.
-
-##### Arithmetic
-
-Arithmetic instructions use the A, B and C fields. The A field is the destination register, the B
-and C fields are the arguments, and the flags indicate whether the arguments are constants.
-
-- ADD: Adds two values and stores the result in a register. Unlike the other arithmetic operations,
- the ADD instruction can also be used to concatenate strings and/or characters. Characters are the
- only type of value that can perform a kind of implicit conversion. Although the character itself
- is not converted, its underlying bytes are concatenated to the string.
-- SUBTRACT: Subtracts one argument from another and stores the result in a register.
-- MULTIPLY: Multiplies one argument by another and stores the result in a register.
-- DIVIDE: Divides one value by another and stores the result in a register.
-- MODULO: Calculates the division remainder of two values and stores the result in a register.
-- POWER: Raises one value to the power of another and stores the result in a register.
-
-##### Logic and Control Flow
-
-Logic instructions work differently from arithmetic and comparison instructions, but they are still
-essentially binary operations with a left and a right argument. These areguments, however, are other
-instructions. This is reminiscent of a stack-based virtual machine in which the arguments are found
-in the stack rather than having their location encoded in the instruction. The logic instructions
-perform a check on the left-hand argument and, based on the result, either skip the right-hand
-argument or allow it to be executed. A `TEST` is always followed by a `JUMP`. If the left argument
-passes the test (a boolean equality check), the `JUMP` instruction is skipped and the right argument
-is executed. If the left argument fails the test, the `JUMP` is not skipped and it jumps past the
-right argument.
-
-- TEST
-- TEST_SET
-
-
-
-##### Comparison
-
-
-
-- EQUAL
-- LESS
-- LESS_EQUAL
-
-##### Unary operations
-
-
-
-- NEGATE
-- NOT
-
-##### Execution
-
-
-
-- CALL
-- CALL_NATIVE
-- JUMP
-- RETURN
-
-### Virtual Machine
-
-The virtual machine is simple and efficient. It uses a stack of registers, which can hold values or
-pointers. Pointers can point to values in the constant list or the stack itself.
-
-While the compiler has multiple responsibilities that warrant more complexity, the VM is simple
-enough to use a very straightforward design. The VM's `run` function uses a simple `while` loop with
-a `match` statement to execute instructions. When it reaches a `Return` instruction, it breaks the
-loop and optionally returns a value.
+TODO
## Previous Implementations
Dust has gone through several iterations, each with its own design choices. It was originally
implemented with a syntax tree generated by an external parser, then a parser generator, and finally
a custom parser. Eventually the language was rewritten to use bytecode instructions and a virtual
-machine. The current implementation is by far the most performant and the general design is unlikely
-to change.
+machine. The current implementation: compiling to bytecode with custom lexing and parsing for a
+register-based VM, is by far the most performant and the general design is unlikely to change,
+although it has been optimized and refactored several times. For example, the VM was refactored to
+manage multiple threads.
Dust previously had a more complex type system with type arguments (or "generics") and a simple
model for asynchronous execution of statements. Both of these features were removed to simplify the
diff --git a/bench/addictive_addition/addictive_addition.ds b/bench/addictive_addition/addictive_addition.ds
index 8b6b2d2..cba1b4c 100644
--- a/bench/addictive_addition/addictive_addition.ds
+++ b/bench/addictive_addition/addictive_addition.ds
@@ -1,9 +1,5 @@
let mut i = 0
while i < 5_000_000 {
-
- if i % 100000 == 0 {
- write_line(i)
- }
i += 1
}
diff --git a/bench/addictive_addition/addictive_addition.java b/bench/addictive_addition/addictive_addition.java
new file mode 100644
index 0000000..b30da65
--- /dev/null
+++ b/bench/addictive_addition/addictive_addition.java
@@ -0,0 +1,10 @@
+class AddictiveAddition {
+
+ public static void main(String[] args) {
+ int i = 0;
+
+ while (i < 5_000_000) {
+ i++;
+ }
+ }
+}
diff --git a/bench/addictive_addition/addictive_addition.rb b/bench/addictive_addition/addictive_addition.rb
new file mode 100644
index 0000000..928e576
--- /dev/null
+++ b/bench/addictive_addition/addictive_addition.rb
@@ -0,0 +1,5 @@
+i = 0
+
+while i < 5_000_000
+ i += 1
+end
diff --git a/bench/addictive_addition/results.md b/bench/addictive_addition/results.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/bench/addictive_addition/results.md
@@ -0,0 +1 @@
+
diff --git a/bench/addictive_addition/run.sh b/bench/addictive_addition/run.sh
index 46c43d4..b6509a3 100644
--- a/bench/addictive_addition/run.sh
+++ b/bench/addictive_addition/run.sh
@@ -3,9 +3,12 @@ hyperfine \
--shell none \
--prepare 'sync' \
--warmup 5 \
+ --export-markdown results.md \
'../../target/release/dust addictive_addition.ds' \
'node addictive_addition.js' \
'deno addictive_addition.js' \
'bun addictive_addition.js' \
'python addictive_addition.py' \
- 'lua addictive_addition.lua'
+ 'lua addictive_addition.lua' \
+ 'ruby addictive_addition.rb' \
+ 'java addictive_addition.java'
diff --git a/dust-cli/Cargo.toml b/dust-cli/Cargo.toml
index b1f3b83..b1953c6 100644
--- a/dust-cli/Cargo.toml
+++ b/dust-cli/Cargo.toml
@@ -1,6 +1,6 @@
[package]
name = "dust-cli"
-description = "Dust Programming Language CLI"
+description = "Tool for running and debugging Dust programs"
authors = ["Jeff Anderson"]
edition.workspace = true
license.workspace = true
@@ -14,7 +14,7 @@ path = "src/main.rs"
[dependencies]
clap = { version = "4.5.14", features = ["cargo", "color", "derive", "help", "wrap_help"] }
-colored = "2.1.0"
+color-print = "0.3.7"
dust-lang = { path = "../dust-lang" }
env_logger = "0.11.5"
log = "0.4.22"
diff --git a/dust-cli/src/main.rs b/dust-cli/src/main.rs
index a7a54cb..ffcc090 100644
--- a/dust-cli/src/main.rs
+++ b/dust-cli/src/main.rs
@@ -5,44 +5,71 @@ use std::{fs::read_to_string, path::PathBuf};
use clap::builder::StyledStr;
use clap::{
builder::{styling::AnsiColor, Styles},
- ArgAction, Args, ColorChoice, Parser, ValueHint,
+ crate_authors, crate_description, crate_version, ArgAction, Args, ColorChoice, Parser,
+ Subcommand, ValueHint,
};
-use clap::{crate_authors, crate_description, crate_version};
-use colored::Colorize;
+use color_print::cstr;
use dust_lang::{CompileError, Compiler, DustError, DustString, Lexer, Span, Token, Vm};
use log::{Level, LevelFilter};
-const HELP_TEMPLATE: &str = "\
+const HELP_TEMPLATE: &str = cstr!(
+ "\
+Dust CLI
+────────
{about}
-{version}
-{author}
+Version: {version}
+Author: {author}
+License: GPL-3.0 ⚖️
-{usage-heading}
-{usage}
+Usage
+─────
+{tab}{usage}
-{all-args}
-";
+Options
+───────
+{options}
+
+Modes
+─────
+{subcommands}
+
+Arguments
+─────────
+{positionals}
+
+"
+);
+
+const STYLES: Styles = Styles::styled()
+ .header(AnsiColor::BrightMagenta.on_default().bold())
+ .usage(AnsiColor::BrightWhite.on_default().bold())
+ .literal(AnsiColor::BrightCyan.on_default())
+ .placeholder(AnsiColor::BrightMagenta.on_default())
+ .error(AnsiColor::BrightRed.on_default().bold())
+ .valid(AnsiColor::Blue.on_default())
+ .invalid(AnsiColor::BrightRed.on_default());
#[derive(Parser)]
#[clap(
version = crate_version!(),
author = crate_authors!(),
about = crate_description!(),
- term_width = 80,
color = ColorChoice::Auto,
- styles = Styles::styled()
- .header(AnsiColor::BrightMagenta.on_default().bold())
- .usage(AnsiColor::BrightWhite.on_default().bold())
- .literal(AnsiColor::BrightCyan.on_default())
- .placeholder(AnsiColor::BrightGreen.on_default())
- .error(AnsiColor::BrightRed.on_default().bold())
- .valid(AnsiColor::Blue.on_default())
- .invalid(AnsiColor::BrightRed.on_default()),
disable_help_flag = true,
disable_version_flag = true,
- help_template = StyledStr::from(HELP_TEMPLATE.bright_white().bold().to_string()),
+ help_template = StyledStr::from(HELP_TEMPLATE),
+ styles = STYLES,
+ term_width = 80,
)]
struct Cli {
+ /// Print help information for this or the selected subcommand
+ #[arg(short, long, action = ArgAction::Help)]
+ help: bool,
+
+ /// Print version information
+ #[arg(short, long, action = ArgAction::Version)]
+ version: bool,
+
/// Log level, overrides the DUST_LOG environment variable
#[arg(
short,
@@ -50,125 +77,121 @@ struct Cli {
value_name = "LOG_LEVEL",
value_parser = ["info", "trace", "debug"],
)]
- #[clap(help_heading = Some("- Options"))]
log: Option,
- #[arg(short, long, action = ArgAction::Help)]
- #[clap(help_heading = Some("- Options"))]
- help: bool,
-
- #[arg(short, long, action = ArgAction::Version)]
- #[clap(help_heading = Some("- Options"))]
- version: bool,
-
- #[command(flatten)]
- mode: Modes,
-
- #[command(flatten)]
- source: Source,
-}
-
-#[derive(Args)]
-#[group(multiple = true, requires = "run")]
-struct RunOptions {
- /// Print the time taken for compilation and execution
- #[arg(long)]
- #[clap(help_heading = Some("- Run Options"))]
- time: bool,
-
- /// Do not print the run result
- #[arg(long)]
- #[clap(help_heading = Some("- Run Options"))]
- no_output: bool,
-
- /// Custom program name, overrides the file name
- #[arg(long)]
- #[clap(help_heading = Some("- Run Options"))]
- program_name: Option,
-}
-
-#[derive(Args)]
-#[group(multiple = false)]
-struct Modes {
- /// Run the source code (default)
- ///
- /// Use the RUN OPTIONS to control this mode
- #[arg(short, long, default_value = "true")]
- #[clap(help_heading = Some("- Modes"))]
- run: bool,
-
- #[command(flatten)]
- run_options: RunOptions,
-
- /// Compile a chunk and show the disassembly
- #[arg(short, long)]
- #[clap(help_heading = Some("- Modes"))]
- disassemble: bool,
-
- /// Lex and display tokens from the source code
- #[arg(short, long)]
- #[clap(help_heading = Some("- Modes"))]
- tokenize: bool,
-
- /// Style disassembly or tokenization output
- #[arg(short, long, default_value = "true")]
- #[clap(help_heading = Some("- Modes"))]
- style: bool,
-}
-
-#[derive(Args, Clone)]
-#[group(required = true, multiple = false)]
-struct Source {
- /// Source code to use instead of a file
+ /// Source code to run instead of a file
#[arg(short, long, value_hint = ValueHint::Other, value_name = "SOURCE")]
- #[clap(help_heading = Some("- Input"))]
command: Option,
/// Read source code from stdin
#[arg(long)]
- #[clap(help_heading = Some("- Input"))]
stdin: bool,
+ #[command(subcommand)]
+ mode: Mode,
+
/// Path to a source code file
#[arg(value_hint = ValueHint::FilePath)]
- #[clap(help_heading = Some("- Input"))]
file: Option,
}
+#[derive(Subcommand)]
+#[clap(
+ help_template = StyledStr::from(HELP_TEMPLATE),
+ styles = STYLES,
+)]
+enum Mode {
+ /// Compile and run the program (default)
+ #[command(short_flag = 'r')]
+ Run {
+ #[arg(short, long, action = ArgAction::Help)]
+ #[clap(help_heading = Some("Options"))]
+ help: bool,
+
+ /// Print the time taken for compilation and execution
+ #[arg(long)]
+ #[clap(help_heading = Some("Run Options"))]
+ time: bool,
+
+ /// Do not print the program's return value
+ #[arg(long)]
+ #[clap(help_heading = Some("Run Options"))]
+ no_output: bool,
+
+ /// Custom program name, overrides the file name
+ #[arg(long)]
+ #[clap(help_heading = Some("Run Options"))]
+ name: Option,
+ },
+
+ /// Compile and print the bytecode disassembly
+ #[command(short_flag = 'd')]
+ Disassemble {
+ #[arg(short, long, action = ArgAction::Help)]
+ #[clap(help_heading = Some("Options"))]
+ help: bool,
+
+ /// Style disassembly output
+ #[arg(short, long, default_value = "true")]
+ #[clap(help_heading = Some("Disassemble Options"))]
+ style: bool,
+
+ /// Custom program name, overrides the file name
+ #[arg(long)]
+ #[clap(help_heading = Some("Disassemble Options"))]
+ name: Option,
+ },
+
+ /// Lex the source code and print the tokens
+ #[command(short_flag = 't')]
+ Tokenize {
+ #[arg(short, long, action = ArgAction::Help)]
+ #[clap(help_heading = Some("Options"))]
+ help: bool,
+
+ /// Style token output
+ #[arg(short, long, default_value = "true")]
+ #[clap(help_heading = Some("Tokenize Options"))]
+ style: bool,
+ },
+}
+
+#[derive(Args, Clone)]
+#[group(required = true, multiple = false)]
+struct Source {}
+
fn main() {
let start_time = Instant::now();
- let mut logger = env_logger::builder();
+ // let mut logger = env_logger::builder();
- logger.format(move |buf, record| {
- let elapsed = format!("T+{:.04}", start_time.elapsed().as_secs_f32()).dimmed();
- let level_display = match record.level() {
- Level::Info => "INFO".bold().white(),
- Level::Debug => "DEBUG".bold().blue(),
- Level::Warn => "WARN".bold().yellow(),
- Level::Error => "ERROR".bold().red(),
- Level::Trace => "TRACE".bold().purple(),
- };
- let display = format!("[{elapsed}] {level_display:5} {args}", args = record.args());
+ // logger.format(move |buf, record| {
+ // let elapsed = format!("T+{:.04}", start_time.elapsed().as_secs_f32()).dimmed();
+ // let level_display = match record.level() {
+ // Level::Info => "INFO".bold().white(),
+ // Level::Debug => "DEBUG".bold().blue(),
+ // Level::Warn => "WARN".bold().yellow(),
+ // Level::Error => "ERROR".bold().red(),
+ // Level::Trace => "TRACE".bold().purple(),
+ // };
+ // let display = format!("[{elapsed}] {level_display:5} {args}", args = record.args());
- writeln!(buf, "{display}")
- });
+ // writeln!(buf, "{display}")
+ // });
let Cli {
log,
- source: Source {
- command,
- file,
- stdin,
- },
+ command,
+ stdin,
mode,
+ file,
..
} = Cli::parse();
- if let Some(level) = log {
- logger.filter_level(level).init();
- } else {
- logger.parse_env("DUST_LOG").init();
- }
+ // if let Some(level) = log {
+ // logger.filter_level(level).init();
+ // } else {
+ // logger.parse_env("DUST_LOG").init();
+ // }
let (source, file_name) = if let Some(source) = command {
(source, None)
@@ -190,9 +213,17 @@ fn main() {
(source, file_name)
};
- let program_name = mode.run_options.program_name.or(file_name);
+ let program_name = match &mode {
+ Mode::Run { name, .. } => name,
+ Mode::Disassemble { name, .. } => name,
+ Mode::Tokenize { .. } => &None,
+ }
+ .iter()
+ .next()
+ .cloned()
+ .or(file_name);
- if mode.disassemble {
+ if let Mode::Disassemble { style, .. } = mode {
let lexer = Lexer::new(&source);
let mut compiler = match Compiler::new(lexer) {
Ok(compiler) => compiler,
@@ -217,16 +248,16 @@ fn main() {
chunk
.disassembler(&mut stdout)
- .style(mode.style)
+ .style(style)
.source(&source)
- .width(70)
+ .width(80)
.disassemble()
.expect("Failed to write disassembly to stdout");
return;
}
- if mode.tokenize {
+ if let Mode::Tokenize { style, .. } = mode {
let mut lexer = Lexer::new(&source);
let mut next_token = || -> Option<(Token, Span, bool)> {
match lexer.next_token() {
@@ -271,46 +302,51 @@ fn main() {
return;
}
- let lexer = Lexer::new(&source);
- let mut compiler = match Compiler::new(lexer) {
- Ok(compiler) => compiler,
- Err(error) => {
- handle_compile_error(error, &source);
+ if let Mode::Run {
+ time, no_output, ..
+ } = mode
+ {
+ let lexer = Lexer::new(&source);
+ let mut compiler = match Compiler::new(lexer) {
+ Ok(compiler) => compiler,
+ Err(error) => {
+ handle_compile_error(error, &source);
- return;
+ return;
+ }
+ };
+
+ match compiler.compile() {
+ Ok(()) => {}
+ Err(error) => {
+ handle_compile_error(error, &source);
+
+ return;
+ }
}
- };
- match compiler.compile() {
- Ok(()) => {}
- Err(error) => {
- handle_compile_error(error, &source);
+ let chunk = compiler.finish(program_name);
+ let compile_end = start_time.elapsed();
- return;
+ if time {
+ print_time(compile_end);
}
- }
- let chunk = compiler.finish(program_name);
- let compile_end = start_time.elapsed();
+ let vm = Vm::new(chunk);
+ let return_value = vm.run();
+ let run_end = start_time.elapsed();
- if mode.run_options.time {
- print_time(compile_end);
- }
-
- let vm = Vm::new(chunk);
- let return_value = vm.run();
- let run_end = start_time.elapsed();
-
- if let Some(value) = return_value {
- if !mode.run_options.no_output {
- println!("{}", value)
+ if let Some(value) = return_value {
+ if !no_output {
+ println!("{}", value)
+ }
}
- }
- if mode.run_options.time {
- let run_time = run_end - compile_end;
+ if time {
+ let run_time = run_end - compile_end;
- print_time(run_time);
+ print_time(run_time);
+ }
}
}