diff --git a/Cargo.lock b/Cargo.lock index 94f61d5..57c926e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -190,6 +190,27 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67ba02a97a2bd10f4b59b25c7973101c79642302776489e030cd13cdab09ed15" +[[package]] +name = "color-print" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3aa954171903797d5623e047d9ab69d91b493657917bdfb8c2c80ecaf9cdb6f4" +dependencies = [ + "color-print-proc-macro", +] + +[[package]] +name = "color-print-proc-macro" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "692186b5ebe54007e45a59aea47ece9eb4108e141326c304cdc91699a7118a22" +dependencies = [ + "nom", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "colorchoice" version = "1.0.3" @@ -299,7 +320,7 @@ name = "dust-cli" version = "0.5.0" dependencies = [ "clap 4.5.20", - "colored", + "color-print", "dust-lang", "env_logger", "log", @@ -495,6 +516,22 @@ version = "2.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "num-traits" version = "0.2.19" diff --git a/README.md b/README.md index f1d2026..05e26f0 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,15 @@ optimization strategies and virtual machine are based on Lua. Unlike Rust and ot compile to machine code, Dust has a very low time to execution. Unlike Lua and most other interpreted languages, Dust enforces static typing to improve clarity and prevent bugs. While some languages currently offer high-level features and strict typing (e.g. TypeScript), Dust has a simple -approach to syntax that offers flexibility and expressiveness while still being *obvious* an -audience of programmers, even those who don't know the language. Dust is for programmers who prefer -their code to be simple and clear rather than complex and clever. +approach to syntax that offers flexibility and expressiveness while still being *obvious*, even +those who know how to code but don't know the language. Dust is developed with an emphasis on +achieving foundational soundness before adding new features. Dust's planned features and design +favor programmers who prefer their code to be simple and clear rather than clever and complex. + +**Dust is under active development and is not yet ready for general use.** + +**Features discussed in this README may be unimplemented, partially implemented or temporarily +removed** ```rust write_line("Enter your name...") @@ -29,8 +35,11 @@ write_line("Hello " + name + "!") ```rust fn fib (n: int) -> int { - if n <= 0 { return 0 } - if n == 1 { return 1 } + if n <= 0 { + return 0 + } else if n == 1 { + return 1 + } fib(n - 1) + fib(n - 2) } @@ -38,80 +47,98 @@ fn fib (n: int) -> int { write_line(fib(25)) ``` -Dust uses a register-based VM with its own set of 32-bit instructions and a custom compiler to emit -the instructions. This should not be confused with a machine code compiler. Despite its compile-time -guarantees, Dust falls into the category of interpreted languages. Competing with the runtime -performance of Rust or C++ *is not* a goal. Competing with the approachability and simplicity of -those languages *is* a goal. On the other hand Dust *does* intend to be faster than Python, Ruby and -NodeJS while also offering a superior development experience and more reliable code due to its -static typing. Dust's development approach is informed by some books[^1] and -academic research[^4] as well as practical insight from papers[^2] written by language authors. -See the [Inspiration](README#Inspiration) section for more information or keep reading to learn -about Dust's features. +Dust uses a custom register-based virtual machine with its own set of instructions and a compiler +based on recursive descet to emit them. This should not be confused with a machine code compiler. +Despite having **compile-time guarantees**, Dust falls into the category of interpreted languages. +Competing with the runtime performance of Rust or C++ *is not* a goal. Competing with the +approachability and simplicity of those languages *is* a goal. On the other hand Dust *does* intend +to be faster than Python, Ruby and NodeJS while also offering a superior development experience and +more reliable code due to its static typing. Dust's development approach is informed by some +books[^1] and academic research[^4] as well as practical insight from papers[^2] written by language +authors. See the [Inspiration](README#Inspiration) section for more information or keep reading to +learn about Dust's features. ## Goals -This project has lofty goals. In addition to being a wishlist, these goals should be used to provide -a framework for driving the project forward and making decisions about what to prioritize. +This project's goal is to deliver a language that not only *works* but that offers genunine value +due to a unique combination of design choices and a high-quality implementation. As mentioned in the +first sentence, Dust's general aspirations are to be **fast**, **safe** and **easy**. -- **Fast Compilation**: Despite its compile-time abstractions, Dust should compile and start - executing quickly. The compilation time should feel negligible to the user. -- **Fast Execution**: Dust should be generally faster than Python, Ruby and NodeJS. It should be - competitive with other modern register-based VM languages like Lua and JavaScript Core. -- **Safety**: Static types should prevent runtime errors and improve code quality, offering a - superior development experience despite some additional constraints. -- **Approachability**: Dust should be easier to learn than Rust or C++. Its syntax should be - familiar to users of other C-like languages to the point that even a new user can read Dust code - and understand what it does. -- **Web Assembly Support**: The `dust` executable and, by extension, the `dust-lang` library, should - be able to able to compile to WebAssembly and Dust should be able to run in a browser with WASM - support. While running on the browser offers some fun opportunities, this is primarally a goal - because of WASM's potential to become a general-purpose cross-platform runtime. -- **Extended Type System**: Beyond specifying the types of variables and function arguments, Dust - should offer a rich yet simple type system that allows users to define their own types and compose - them with static guarantees about their identity and behavior. -- **Excellent Errors**: Dust should provide helpful error messages that guide the user to the source - of the problem and suggest a solution. Errors should be a helpful learning ressource for users - rather than a source of frustration. -- **High-Quality Documentation**: Dust's documentation should be easy to locate and understand. - Users should feel confident that the documentation is up-to-date and accurate. -- **All-In-One Binary**: The `dust` executable should aspire to be the only tool a user needs to run - Dust code, visualize Dust programs, compile them to intermediate representations, analyze runtime - behavior, run a REPL, format code and more as the scope of the project grows. Similar CLI tools - like Cargo and Bun have set a high standard for what a single executable can do. -- **Advanced Goals**: Dust could one day grow to the point that users will want to share their - libraries and distribute their programs. In the unlikely event that Dust becomes popular, it could - warrant an ecosystem consisting of package management with a central repository, a standard - library, a community of users and an organization to maintain the language. These are not within - the scope of the project at this time but it may be possible one day if the project is able to - realize its other goals. This is included here for maximum ambitiousness. +- **Easy** + - **Simple Syntax** Dust should be easier to learn than most programming languages. Its syntax + should be familiar to users of other C-like languages to the point that even a new user can read + Dust code and understand what it does. Rather than being dumbed down by a lack of features, Dust + should be powerful and elegant in its simplicity, seeking a maximum of capability with a minimum + of complexity. When advanced features are added, they should never obstruct existing features, + including readability. Even the advanced type system should be clear and unintimidating. + - **Excellent Errors** Dust should provide helpful error messages that guide the user to the + source of the problem and suggest a solution. Errors should be a helpful learning ressource for + users rather than a source of frustration. + - **Relevant Documentation** Users should have the resources they need to learn Dust and write + code in it. They should know where to look for answers and how to reach out for help. +- **Safe** + - **Static Types** Typing should prevent runtime errors and improve code quality, offering a + superior development experience despite some additional constraints. Like any good statically + typed language, users should feel confident in the type-consistency of their code and not want + to go back to a dynamically typed language. + - **Memory Safety** Dust should be free of memory bugs. Being implemented in Rust makes this easy + but, to accomodate long-running programs, Dust still requires a memory management strategy. + Dust's design is to use a separate thread for garbage collection, allowing the main thread to + continue executing code while the garbage collector looks for unused memory. +- **Fast** + - **Fast Compilation** Despite its compile-time abstractions, Dust should compile and start + executing quickly. The compilation time should feel negligible to the user. + - **Fast Execution** Dust should be generally faster than Python, Ruby and NodeJS. It should be + competitive with highly optimized, modern, register-based VM languages like Lua. Dust should + be benchmarked during development to inform decisions about performance. + - **Low Resource Usage** Despite its performance, Dust's use of memory and CPU power should be + conservative and predictable enough to accomodate a wide range of devices. -## Non-Goals +These are the project's general design goals. There are many more implementation goals. Among them +are: -Some features are simply out of scope for Dust. As a project's design becomes an implementation, -decisions about what a project *will not* do are required to clarify the project's direction and -purpose for both the developers and the users. - -- **Machine Code Compilation**: Dust is not intended to compete with Rust or C++ in terms of runtime - performance. -- **Complex Abstractions**: Dust will not introduce users to new, exotic syntax or convoluted - patterns that reduce the clarity of a program. Dust will not support complex paradigm-specific - abstractions like inheritance or currying. Dust will remain neither object-oriented nor - functional, preferring to expand its features without committing to a single paradigm. -- **Gradual Typing**: Dust's compiler handles the complexities of *static* typing and all value and - variable types are known before a program runs. The VM is and should remain type-agnostic, leaving - it to the sole responsibility of execution. + - Effortless Concurrency: Dust should offer an excellent experience for writing multi-threaded + programs. The language's native functions should offer an API for spawning threads, sending + messages and waiting for results. When using these features, Dust should be much faster than any + single-threaded language. However, Dust should be fast even when running on a single thread. + Single-threaded performce is the best predictor of multi-threaded performance so continuing to + optimize how each thread executes instructions, accesses memory and moves pointers is the best + way to ensure that Dust is fast in all scenarios. + - Embeddability: The library should be easy to use so that Dust can be built into other + applications. Dust should compile to WebAssembly and offer examples of how to use it in a web + application. The user should be able to query the VM for information about the program's state + and control the program's execution. It should be possible to view and modify the value of a + variable and inspect the call stack. + - Data Fluency: Dust's value type should support conversion to and from arbitrary data in formats + like JSON, YAML, TOML and CSV. Pulling data into a Dust program should be easy, with built-in + functions offering conversion for the most widely used formats. + - Portability: Dust should run on as many architectures and operating systems as possible. Using + fewer dependencies and avoiding platform-specific code will help Dust achieve this goal. The + Dust library should be available as a WebAssembly module. + - Developer Experience: Dust should be fun and easy to use. That implies easy installation and the + availability of tutorials and how-to guides. The CLI should be predictable and feature-rich, + with features that make it easy to write and debug Dust code like formatting, bytecode + disassembly and logging. + - Advanced Type System: Dust should implement composite types, aliases and generics. The type + system should use a descriptive syntax that is easy to understand. Dust's type system should be + static, meaning that types are checked before a program reaches the VM. Dust is not a + graduallly typed language, its VM is and should remain type-agnostic. + - Thorough Testing: Primarily, the output of Dust's compiler and VM should be tested with programs + that cover all of the language's features. The tests should be actively maintained and should be + changed frequently to reflect a growing project that is constantly discovering new optimizations + and opportunities for improvement. ## Project Status -**Dust is under active development and is not yet ready for general use.** +This project is maintained by a single developer. For now, its primary home is on a private git +server. The GitHub mirror is updated automatically and should carry the latest branches. There are +no other contributors at this time but the project is open to feedback and should eventually accept +contributions. -**Features discussed in this README may be unimplemented, partially implemented, temporarily removed -or only available on a seperate branch.** - -Dust is an ambitious project that acts as a continuous experiment in language design. Features may -be redesigned and reimplemented at will when they do not meet the project's performance or -usability goals. This approach maximizes the development experience as a learning opportunity and +For now, both the library API and the implementation details are freely changed and the CLI has not +been published. Dust is both an ambitious project and a continuous experiment in language design. +Features may be redesigned and reimplemented at will when they do not meet the project's performance +or usability goals. This approach maximizes the development experience as a learning opportunity and enforces a high standard of quality but slows down the process of delivering features to users. Eventually, Dust will reach a stable release and will be ready for general use. As the project approaches this milestone, the experimental nature of the project will be reduced and a replaced @@ -119,15 +146,33 @@ with a focus on stability and improvement. ## Language Overview -### Syntax +This is a quick overview of Dust's syntax features. It skips over the aspects that are familiar to +most programmers such as creating variables, using binary operators and printing to the console. +Eventually there should be a complete reference for the syntax. + +### Syntax and Evaluation Dust belongs to the C-like family of languages[^5], with an imperative syntax that will be familiar to many programmers. Dust code looks a lot like Ruby, JavaScript, TypeScript and other members of the family but Rust is its primary point of reference for syntax. Rust was chosen as a syntax model -because its imperative code is *obvious* and *familiar*. Those qualities are aligned with Dust's -emphasis on safety and usability. However, some differences exist because Dust is a simpler language -that can tolerate more relaxed syntax. The most significant difference between Dust's syntax and -evaluation model and Rust's is the handling of semicolons. +because its imperative code is *obvious by design* and *widely familiar*. Those qualities are +aligned with Dust's emphasis on usability. + +However, some differences exist. Dust *evaluates* all of the code in the file while Rust only +initiates from a "main" function. Dust's execution model is more like one found in a scripting +language. If we put `42 + 42 == 84` into a file and run it, it will return `true` because the outer +context is, in a sense, the "main" function. + +So while the syntax is by no means compatible, it is superficially similar, even to the point that +syntax highlighting for Rust code works well with Dust code. This is not a design goal but a happy +coincidence. + +### Semicolons + +Dust borrowed Rust's approach to semicolons and their effect on evaluation and relaxed the rules to +accomated different styles of coding. Rust, for example, isn't design for command lines or REPLs but +Dust could be well-suited to those applications. Dust needs to work in a source file or in an ad-hoc +one-liner sent to the CLI. Thus, semicolons are optional in most cases. There are two things you need to know about semicolons in Dust: @@ -147,7 +192,7 @@ let b = 2; write_line("The answer is ", a + b); ``` -Removing the semicolons does not alter the execution pattern. +Removing the semicolons does not alter the execution pattern or the return value. ```rust let x = 10 @@ -167,15 +212,16 @@ let input = read_line() let reward = if input == "42" { write_line("You got it! Here's your reward.") - 777 + 777 // <- We need a semicolon here } else { write_line(input, " is not the answer.") -}; +} ``` -Understanding that semicolons suppress values is also important for understanding Dust's evaluation -model. Dust is composed of statements and expressions. If a statement ends in an expression without -a trailing semicolon, the statement evaluates to the value produced by that expression. However, if +### Statements and Expressions + +Dust is composed of statements and expressions. If a statement ends in an expression without a +trailing semicolon, the statement evaluates to the value produced by that expression. However, if the expression's value is suppressed with a semicolon, the statement does not evaluate to a value. This is identical to Rust's evaluation model. That means that the following code will not compile: @@ -187,13 +233,14 @@ let a = { 40 + 2; } The `a` variable is assigned to the value produced by a block. The block contains an expression that is suppressed by a semicolon, so the block does not evaluate to a value. Therefore, the `a` variable would have to be uninitialized (which Dust does not allow) or result in a runtime error (which Dust -avoids at all costs). We can fix this code by movinf the semicolon to the end of the block. In this -position it suppresses the value of the entire `let` statement. The above examples showed that a -`let` statement never evaluates to a value, so the semicolon has no effect on the program's behavior -and could be omitted altogether. +avoids at all costs). We can fix this code by moving the semicolon to the end of the block. In this +position it suppresses the value of the entire `let` statement. As we saw above, a `let` statement +never evaluates to a value, so the semicolon has no effect on the program's behavior and could be +omitted altogether. ```rust let a = { 40 + 2 }; // This is fine +let a = { 40 + 2 } // This is also fine ``` Only the final expression in a block is returned. When a `let` statement is combined with an @@ -219,11 +266,17 @@ program could be modified to return no value by simply adding a semicolon at the Compared to JavaScript, Dust's evaluation model is more predictable, less error-prone and will never trap the user into a frustating hunt for a missing semicolon. Compared to Rust, Dust's evaluation -model is essentialy the same but with more relaxed rules about semicolons. In JavaScript, semicolons -are both *required* and *meaningless*, which is a source of confusion for many developers. In Rust, -they are *required* and *meaningful*, which provides excellent consistency but lacks flexibility. +model is more accomodating without sacrificing expressiveness. In Rust, semicolons are *required* +and *meaningful*, which provides excellent consistency but lacks flexibility. In JavaScript, +semicolons are *required* and *meaningless*, which is a source of confusion for many developers. -### Safety +### Control Flow + +-- TODO -- + +### Functions + +-- TODO -- #### Type System @@ -245,11 +298,13 @@ from a function, expression or statement. A variable cannot be assigned to `none #### Immutability by Default +TODO + #### Memory Safety - +TODO -### Values, Variables and Types +### Basic Values Dust supports the following basic values: @@ -269,288 +324,19 @@ singular values. Shorter strings are stored on the stack while longer strings ar Dust offers built-in native functions that can manipulate strings by accessing their bytes or reading them as a sequence of characters. - +### Composite Values -## Feature Progress - -This list is a rough outline of the features that are planned to be implemented as soon as possible. -*This is **not** an exhaustive list of all planned features.* This list is updated and rearranged to -maintain a docket of what is being worked on, what is coming next and what can be revisited later. - -- [X] Lexer -- [X] Compiler -- [X] VM -- [X] Disassembler (for chunk debugging) -- [ ] Formatter -- [ ] CLI REPL -- [X] Compile dust's binary and library to WASM -- [ ] Browser-based REPL -- CLI - - [X] Run source - - [X] Compile source to a chunk and show disassembly - - [X] Tokenize using the lexer and show token list - - [ ] Format using a built-in formatter - - [ ] Compile to and run from intermediate formats - - [ ] JSON - - [ ] Postcard - - [ ] Integrated REPL -- Basic Values - - [X] No `null` or `undefined` values - - [X] Booleans - - [X] Bytes (unsigned 8-bit) - - [X] Characters (Unicode scalar value) - - [X] Floats (64-bit) - - [X] Functions - - [X] Integers (signed 64-bit) - - [X] Strings (UTF-8) -- Composite Values - - [X] Concrete lists - - [X] Abstract lists (optimization) - - [ ] Concrete maps - - [ ] Abstract maps (optimization) - - [ ] Ranges - - [ ] Tuples (fixed-size constant lists) - - [ ] Structs - - [ ] Enums -- Types - - [X] Basic types for each kind of basic value - - [X] Generalized types: `num`, `any`, `none` - - [ ] Type conversion (safe, explicit and coercion-free) - - [ ] `struct` types - - [ ] `enum` types - - [ ] Type aliases - - [ ] Type arguments - - [ ] Compile-time type checking - - [ ] Function returns - - [X] If/Else branches - - [ ] Instruction arguments -- Variables - - [X] Immutable by default - - [X] Block scope - - [X] Statically typed - - [X] Copy-free identifiers are stored in the chunk as string constants -- Functions - - [X] First-class value - - [X] Statically typed arguments and returns - - [X] Pure (no "closure" of local variables, arguments are the only input) - - [ ] Type arguments -- Control Flow - - [X] If/Else - - [ ] Match - - [ ] Loops - - [ ] `for` - - [ ] `loop` - - [X] `while` -- Native Functions - - Assertions - - [X] `assert` - - [ ] `assert_eq` - - [ ] `assert_ne` - - [ ] `panic` - - I/O - - [ ] `read` - - [X] `read_line` - - [X] `write` - - [X] `write_line` - - Miniature Standard Library of Native Functions - - [ ] Byte Functions - - [ ] Character Functions - - [ ] Float Functions - - [ ] Integer Functions - - [ ] String Functions - - [ ] List Functions - - [ ] Map Functions - - [ ] Math Functions - - [ ] Filesystem Functions - - [ ] Network Functions - - [ ] System Functions - - [ ] Randomization Functions - -## Implementation - -Dust is implemented in Rust and is divided into several parts, most importantly the lexer, compiler, -and virtual machine. All of Dust's components are designed with performance in mind and the codebase -uses as few dependencies as possible. The code is tested by integration tests that compile source -code and check the compiled chunk, then run the source and check the output of the virtual machine. -It is important to maintain a high level of quality by writing meaningful tests and preferring to -compile and run programs in an optimal way before adding new features. - -### Command Line Interface - -Dust's command line interface and developer experience are inspired by tools like Bun and especially -Cargo, the Rust package manager that includes everything from project creation to documentation -generation to code formatting to much more. Dust's CLI has started by exposing the most imporant -features for debugging and developing the language itself. Tokenization, compiling, disassembling -and running Dust code are currently supported. The CLI will eventually support a REPL, code -formatting, linting and other features that enhance the development experience and make Dust more -fun and easy to use. - -### Lexer and Tokens - -The lexer emits tokens from the source code. Dust makes extensive use of Rust's zero-copy -capabilities to avoid unnecessary allocations when creating tokens. A token, depending on its type, -may contain a reference to some data from the source code. The data is only copied in the case of an -error. In a successfully executed program, no part of the source code is copied unless it is a -string literal or identifier. - -### Compiler - -The compiler creates a chunk, which contains all of the data needed by the virtual machine to run a -Dust program. It does so by emitting bytecode instructions, constants and locals while parsing the -tokens, which are generated one at a time by the lexer. - -#### Parsing - -Dust's compiler uses a custom Pratt parser, a kind of recursive descent parser, to translate a -sequence of tokens into a chunk. Each token is given a precedence and may have a prefix and/or infix -parser. The parsers are just functions that modify the compiler and its output. For example, when -the compiler encounters a boolean token, its prefix parser is the `parse_boolean` function, which -emits a `LoadBoolean` instruction. An integer token's prefix parser is `parse_integer`, which emits -a `LoadConstant` instruction and adds the integer to the constants list. Tokens with infix parsers -include the math operators, which emit `Add`, `Subtract`, `Multiply`, `Divide`, `Modulo` and `Power` -instructions. - -Functions are compiled into their own chunks, which are stored in the constant list. A function's -arguments are stored in its locals list. Before the function is run, the VM must bind the arguments -to values by filling locals' corresponding registers. Instead of copying the arguments, the VM uses -a pointer to one of the parent's registers or constants. - -#### Instruction Optimization - -When generating instructions for a register-based virtual machine, there are opportunities to -optimize the generated code by using fewer instructions or fewer registers. While it is best to -output optimal code in the first place, it is not always possible. Dust's uses a single-pass -compiler and therefore applies optimizations immeadiately after the opportunity becomes available. -There is no separate optimization pass and the compiler cannot be run in a mode that disables -optimizations. - -#### Type Checking - -Dust's compiler associates each emitted instruction with a type. This allows the compiler to enforce -compatibility when values are used in expressions. For example, the compiler will not allow a string -to be added to an integer, but it will allow either to be added to another of the same type. Aside -from instruction arguments, the compiler also checks the types of function arguments and the blocks -of `if`/`else` statements. - -The compiler always checks types on the fly, so there is no need for a separate type-checking pass. -Type information is removed from the instructions list before the chunk is created, so the VM (which -is entirely type-agnostic) never sees it. - -### Instructions - -Dust's virtual machine uses 32-bit instructions, which encode seven pieces of information: - -Bit | Description ------ | ----------- -0-4 | Operation code -5 | Flag indicating if the B field is a constant -6 | Flag indicating if the C field is a constant -7 | D field (boolean) -8-15 | A field (unsigned 8-bit integer) -16-23 | B field (unsigned 8-bit integer) -24-31 | C field (unsigned 8-bit integer) - -#### Operations - -The 1.0 version of Dust will have more than the current number of operations but cannot exceed 32 -because of the 5 bit format. - -##### Stack manipulation - -- MOVE: Makes a register's value available in another register by using a pointer. This avoids - copying the value or invalidating the original register. -- CLOSE: Sets a range of registers to the "empty" state. - -##### Value loaders - -- LOAD_BOOLEAN: Loads a boolean to a register. Booleans known at compile-time are not stored in the - constant list. Instead, they are encoded in the instruction itself. -- LOAD_CONSTANT: Loads a constant from the constant list to a register. The VM avoids copying the - constant by using a pointer with the constant's index. -- LOAD_LIST: Creates a list abstraction from a range of registers and loads it to a register. -- LOAD_MAP: Creates a map abstraction from a range of registers and loads it to a register. -- LOAD_SELF: Creates an abstraction that represents the current function and loads it to a register. - -##### Variable operations - -- GET_LOCAL: Loads a variable's value to a register by using a pointer to point to the variable's - canonical register (i.e. the register whose index is stored in the locals list). -- SET_LOCAL: Changes a variable's register to a pointer to another register, effectively changing - the variable's value. - -##### Arithmetic - -Arithmetic instructions use the A, B and C fields. The A field is the destination register, the B -and C fields are the arguments, and the flags indicate whether the arguments are constants. - -- ADD: Adds two values and stores the result in a register. Unlike the other arithmetic operations, - the ADD instruction can also be used to concatenate strings and/or characters. Characters are the - only type of value that can perform a kind of implicit conversion. Although the character itself - is not converted, its underlying bytes are concatenated to the string. -- SUBTRACT: Subtracts one argument from another and stores the result in a register. -- MULTIPLY: Multiplies one argument by another and stores the result in a register. -- DIVIDE: Divides one value by another and stores the result in a register. -- MODULO: Calculates the division remainder of two values and stores the result in a register. -- POWER: Raises one value to the power of another and stores the result in a register. - -##### Logic and Control Flow - -Logic instructions work differently from arithmetic and comparison instructions, but they are still -essentially binary operations with a left and a right argument. These areguments, however, are other -instructions. This is reminiscent of a stack-based virtual machine in which the arguments are found -in the stack rather than having their location encoded in the instruction. The logic instructions -perform a check on the left-hand argument and, based on the result, either skip the right-hand -argument or allow it to be executed. A `TEST` is always followed by a `JUMP`. If the left argument -passes the test (a boolean equality check), the `JUMP` instruction is skipped and the right argument -is executed. If the left argument fails the test, the `JUMP` is not skipped and it jumps past the -right argument. - -- TEST -- TEST_SET - - - -##### Comparison - - - -- EQUAL -- LESS -- LESS_EQUAL - -##### Unary operations - - - -- NEGATE -- NOT - -##### Execution - - - -- CALL -- CALL_NATIVE -- JUMP -- RETURN - -### Virtual Machine - -The virtual machine is simple and efficient. It uses a stack of registers, which can hold values or -pointers. Pointers can point to values in the constant list or the stack itself. - -While the compiler has multiple responsibilities that warrant more complexity, the VM is simple -enough to use a very straightforward design. The VM's `run` function uses a simple `while` loop with -a `match` statement to execute instructions. When it reaches a `Return` instruction, it breaks the -loop and optionally returns a value. +TODO ## Previous Implementations Dust has gone through several iterations, each with its own design choices. It was originally implemented with a syntax tree generated by an external parser, then a parser generator, and finally a custom parser. Eventually the language was rewritten to use bytecode instructions and a virtual -machine. The current implementation is by far the most performant and the general design is unlikely -to change. +machine. The current implementation: compiling to bytecode with custom lexing and parsing for a +register-based VM, is by far the most performant and the general design is unlikely to change, +although it has been optimized and refactored several times. For example, the VM was refactored to +manage multiple threads. Dust previously had a more complex type system with type arguments (or "generics") and a simple model for asynchronous execution of statements. Both of these features were removed to simplify the diff --git a/bench/addictive_addition/addictive_addition.ds b/bench/addictive_addition/addictive_addition.ds index 8b6b2d2..cba1b4c 100644 --- a/bench/addictive_addition/addictive_addition.ds +++ b/bench/addictive_addition/addictive_addition.ds @@ -1,9 +1,5 @@ let mut i = 0 while i < 5_000_000 { - - if i % 100000 == 0 { - write_line(i) - } i += 1 } diff --git a/bench/addictive_addition/addictive_addition.java b/bench/addictive_addition/addictive_addition.java new file mode 100644 index 0000000..b30da65 --- /dev/null +++ b/bench/addictive_addition/addictive_addition.java @@ -0,0 +1,10 @@ +class AddictiveAddition { + + public static void main(String[] args) { + int i = 0; + + while (i < 5_000_000) { + i++; + } + } +} diff --git a/bench/addictive_addition/addictive_addition.rb b/bench/addictive_addition/addictive_addition.rb new file mode 100644 index 0000000..928e576 --- /dev/null +++ b/bench/addictive_addition/addictive_addition.rb @@ -0,0 +1,5 @@ +i = 0 + +while i < 5_000_000 + i += 1 +end diff --git a/bench/addictive_addition/results.md b/bench/addictive_addition/results.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/bench/addictive_addition/results.md @@ -0,0 +1 @@ + diff --git a/bench/addictive_addition/run.sh b/bench/addictive_addition/run.sh index 46c43d4..b6509a3 100644 --- a/bench/addictive_addition/run.sh +++ b/bench/addictive_addition/run.sh @@ -3,9 +3,12 @@ hyperfine \ --shell none \ --prepare 'sync' \ --warmup 5 \ + --export-markdown results.md \ '../../target/release/dust addictive_addition.ds' \ 'node addictive_addition.js' \ 'deno addictive_addition.js' \ 'bun addictive_addition.js' \ 'python addictive_addition.py' \ - 'lua addictive_addition.lua' + 'lua addictive_addition.lua' \ + 'ruby addictive_addition.rb' \ + 'java addictive_addition.java' diff --git a/dust-cli/Cargo.toml b/dust-cli/Cargo.toml index b1f3b83..b1953c6 100644 --- a/dust-cli/Cargo.toml +++ b/dust-cli/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "dust-cli" -description = "Dust Programming Language CLI" +description = "Tool for running and debugging Dust programs" authors = ["Jeff Anderson"] edition.workspace = true license.workspace = true @@ -14,7 +14,7 @@ path = "src/main.rs" [dependencies] clap = { version = "4.5.14", features = ["cargo", "color", "derive", "help", "wrap_help"] } -colored = "2.1.0" +color-print = "0.3.7" dust-lang = { path = "../dust-lang" } env_logger = "0.11.5" log = "0.4.22" diff --git a/dust-cli/src/main.rs b/dust-cli/src/main.rs index a7a54cb..ffcc090 100644 --- a/dust-cli/src/main.rs +++ b/dust-cli/src/main.rs @@ -5,44 +5,71 @@ use std::{fs::read_to_string, path::PathBuf}; use clap::builder::StyledStr; use clap::{ builder::{styling::AnsiColor, Styles}, - ArgAction, Args, ColorChoice, Parser, ValueHint, + crate_authors, crate_description, crate_version, ArgAction, Args, ColorChoice, Parser, + Subcommand, ValueHint, }; -use clap::{crate_authors, crate_description, crate_version}; -use colored::Colorize; +use color_print::cstr; use dust_lang::{CompileError, Compiler, DustError, DustString, Lexer, Span, Token, Vm}; use log::{Level, LevelFilter}; -const HELP_TEMPLATE: &str = "\ +const HELP_TEMPLATE: &str = cstr!( + "\ +Dust CLI +──────── {about} -{version} -{author} +Version: {version} +Author: {author} +License: GPL-3.0 ⚖️ -{usage-heading} -{usage} +Usage +───── +{tab}{usage} -{all-args} -"; +Options +─────── +{options} + +Modes +───── +{subcommands} + +Arguments +───────── +{positionals} + +" +); + +const STYLES: Styles = Styles::styled() + .header(AnsiColor::BrightMagenta.on_default().bold()) + .usage(AnsiColor::BrightWhite.on_default().bold()) + .literal(AnsiColor::BrightCyan.on_default()) + .placeholder(AnsiColor::BrightMagenta.on_default()) + .error(AnsiColor::BrightRed.on_default().bold()) + .valid(AnsiColor::Blue.on_default()) + .invalid(AnsiColor::BrightRed.on_default()); #[derive(Parser)] #[clap( version = crate_version!(), author = crate_authors!(), about = crate_description!(), - term_width = 80, color = ColorChoice::Auto, - styles = Styles::styled() - .header(AnsiColor::BrightMagenta.on_default().bold()) - .usage(AnsiColor::BrightWhite.on_default().bold()) - .literal(AnsiColor::BrightCyan.on_default()) - .placeholder(AnsiColor::BrightGreen.on_default()) - .error(AnsiColor::BrightRed.on_default().bold()) - .valid(AnsiColor::Blue.on_default()) - .invalid(AnsiColor::BrightRed.on_default()), disable_help_flag = true, disable_version_flag = true, - help_template = StyledStr::from(HELP_TEMPLATE.bright_white().bold().to_string()), + help_template = StyledStr::from(HELP_TEMPLATE), + styles = STYLES, + term_width = 80, )] struct Cli { + /// Print help information for this or the selected subcommand + #[arg(short, long, action = ArgAction::Help)] + help: bool, + + /// Print version information + #[arg(short, long, action = ArgAction::Version)] + version: bool, + /// Log level, overrides the DUST_LOG environment variable #[arg( short, @@ -50,125 +77,121 @@ struct Cli { value_name = "LOG_LEVEL", value_parser = ["info", "trace", "debug"], )] - #[clap(help_heading = Some("- Options"))] log: Option, - #[arg(short, long, action = ArgAction::Help)] - #[clap(help_heading = Some("- Options"))] - help: bool, - - #[arg(short, long, action = ArgAction::Version)] - #[clap(help_heading = Some("- Options"))] - version: bool, - - #[command(flatten)] - mode: Modes, - - #[command(flatten)] - source: Source, -} - -#[derive(Args)] -#[group(multiple = true, requires = "run")] -struct RunOptions { - /// Print the time taken for compilation and execution - #[arg(long)] - #[clap(help_heading = Some("- Run Options"))] - time: bool, - - /// Do not print the run result - #[arg(long)] - #[clap(help_heading = Some("- Run Options"))] - no_output: bool, - - /// Custom program name, overrides the file name - #[arg(long)] - #[clap(help_heading = Some("- Run Options"))] - program_name: Option, -} - -#[derive(Args)] -#[group(multiple = false)] -struct Modes { - /// Run the source code (default) - /// - /// Use the RUN OPTIONS to control this mode - #[arg(short, long, default_value = "true")] - #[clap(help_heading = Some("- Modes"))] - run: bool, - - #[command(flatten)] - run_options: RunOptions, - - /// Compile a chunk and show the disassembly - #[arg(short, long)] - #[clap(help_heading = Some("- Modes"))] - disassemble: bool, - - /// Lex and display tokens from the source code - #[arg(short, long)] - #[clap(help_heading = Some("- Modes"))] - tokenize: bool, - - /// Style disassembly or tokenization output - #[arg(short, long, default_value = "true")] - #[clap(help_heading = Some("- Modes"))] - style: bool, -} - -#[derive(Args, Clone)] -#[group(required = true, multiple = false)] -struct Source { - /// Source code to use instead of a file + /// Source code to run instead of a file #[arg(short, long, value_hint = ValueHint::Other, value_name = "SOURCE")] - #[clap(help_heading = Some("- Input"))] command: Option, /// Read source code from stdin #[arg(long)] - #[clap(help_heading = Some("- Input"))] stdin: bool, + #[command(subcommand)] + mode: Mode, + /// Path to a source code file #[arg(value_hint = ValueHint::FilePath)] - #[clap(help_heading = Some("- Input"))] file: Option, } +#[derive(Subcommand)] +#[clap( + help_template = StyledStr::from(HELP_TEMPLATE), + styles = STYLES, +)] +enum Mode { + /// Compile and run the program (default) + #[command(short_flag = 'r')] + Run { + #[arg(short, long, action = ArgAction::Help)] + #[clap(help_heading = Some("Options"))] + help: bool, + + /// Print the time taken for compilation and execution + #[arg(long)] + #[clap(help_heading = Some("Run Options"))] + time: bool, + + /// Do not print the program's return value + #[arg(long)] + #[clap(help_heading = Some("Run Options"))] + no_output: bool, + + /// Custom program name, overrides the file name + #[arg(long)] + #[clap(help_heading = Some("Run Options"))] + name: Option, + }, + + /// Compile and print the bytecode disassembly + #[command(short_flag = 'd')] + Disassemble { + #[arg(short, long, action = ArgAction::Help)] + #[clap(help_heading = Some("Options"))] + help: bool, + + /// Style disassembly output + #[arg(short, long, default_value = "true")] + #[clap(help_heading = Some("Disassemble Options"))] + style: bool, + + /// Custom program name, overrides the file name + #[arg(long)] + #[clap(help_heading = Some("Disassemble Options"))] + name: Option, + }, + + /// Lex the source code and print the tokens + #[command(short_flag = 't')] + Tokenize { + #[arg(short, long, action = ArgAction::Help)] + #[clap(help_heading = Some("Options"))] + help: bool, + + /// Style token output + #[arg(short, long, default_value = "true")] + #[clap(help_heading = Some("Tokenize Options"))] + style: bool, + }, +} + +#[derive(Args, Clone)] +#[group(required = true, multiple = false)] +struct Source {} + fn main() { let start_time = Instant::now(); - let mut logger = env_logger::builder(); + // let mut logger = env_logger::builder(); - logger.format(move |buf, record| { - let elapsed = format!("T+{:.04}", start_time.elapsed().as_secs_f32()).dimmed(); - let level_display = match record.level() { - Level::Info => "INFO".bold().white(), - Level::Debug => "DEBUG".bold().blue(), - Level::Warn => "WARN".bold().yellow(), - Level::Error => "ERROR".bold().red(), - Level::Trace => "TRACE".bold().purple(), - }; - let display = format!("[{elapsed}] {level_display:5} {args}", args = record.args()); + // logger.format(move |buf, record| { + // let elapsed = format!("T+{:.04}", start_time.elapsed().as_secs_f32()).dimmed(); + // let level_display = match record.level() { + // Level::Info => "INFO".bold().white(), + // Level::Debug => "DEBUG".bold().blue(), + // Level::Warn => "WARN".bold().yellow(), + // Level::Error => "ERROR".bold().red(), + // Level::Trace => "TRACE".bold().purple(), + // }; + // let display = format!("[{elapsed}] {level_display:5} {args}", args = record.args()); - writeln!(buf, "{display}") - }); + // writeln!(buf, "{display}") + // }); let Cli { log, - source: Source { - command, - file, - stdin, - }, + command, + stdin, mode, + file, .. } = Cli::parse(); - if let Some(level) = log { - logger.filter_level(level).init(); - } else { - logger.parse_env("DUST_LOG").init(); - } + // if let Some(level) = log { + // logger.filter_level(level).init(); + // } else { + // logger.parse_env("DUST_LOG").init(); + // } let (source, file_name) = if let Some(source) = command { (source, None) @@ -190,9 +213,17 @@ fn main() { (source, file_name) }; - let program_name = mode.run_options.program_name.or(file_name); + let program_name = match &mode { + Mode::Run { name, .. } => name, + Mode::Disassemble { name, .. } => name, + Mode::Tokenize { .. } => &None, + } + .iter() + .next() + .cloned() + .or(file_name); - if mode.disassemble { + if let Mode::Disassemble { style, .. } = mode { let lexer = Lexer::new(&source); let mut compiler = match Compiler::new(lexer) { Ok(compiler) => compiler, @@ -217,16 +248,16 @@ fn main() { chunk .disassembler(&mut stdout) - .style(mode.style) + .style(style) .source(&source) - .width(70) + .width(80) .disassemble() .expect("Failed to write disassembly to stdout"); return; } - if mode.tokenize { + if let Mode::Tokenize { style, .. } = mode { let mut lexer = Lexer::new(&source); let mut next_token = || -> Option<(Token, Span, bool)> { match lexer.next_token() { @@ -271,46 +302,51 @@ fn main() { return; } - let lexer = Lexer::new(&source); - let mut compiler = match Compiler::new(lexer) { - Ok(compiler) => compiler, - Err(error) => { - handle_compile_error(error, &source); + if let Mode::Run { + time, no_output, .. + } = mode + { + let lexer = Lexer::new(&source); + let mut compiler = match Compiler::new(lexer) { + Ok(compiler) => compiler, + Err(error) => { + handle_compile_error(error, &source); - return; + return; + } + }; + + match compiler.compile() { + Ok(()) => {} + Err(error) => { + handle_compile_error(error, &source); + + return; + } } - }; - match compiler.compile() { - Ok(()) => {} - Err(error) => { - handle_compile_error(error, &source); + let chunk = compiler.finish(program_name); + let compile_end = start_time.elapsed(); - return; + if time { + print_time(compile_end); } - } - let chunk = compiler.finish(program_name); - let compile_end = start_time.elapsed(); + let vm = Vm::new(chunk); + let return_value = vm.run(); + let run_end = start_time.elapsed(); - if mode.run_options.time { - print_time(compile_end); - } - - let vm = Vm::new(chunk); - let return_value = vm.run(); - let run_end = start_time.elapsed(); - - if let Some(value) = return_value { - if !mode.run_options.no_output { - println!("{}", value) + if let Some(value) = return_value { + if !no_output { + println!("{}", value) + } } - } - if mode.run_options.time { - let run_time = run_end - compile_end; + if time { + let run_time = run_end - compile_end; - print_time(run_time); + print_time(run_time); + } } }