From 406edda573ac5c6ce059615e98728d8327ab4e62 Mon Sep 17 00:00:00 2001 From: Jeff Date: Fri, 6 Sep 2024 23:30:43 -0400 Subject: [PATCH] Begin parser --- dust-lang/src/bytecode.rs | 146 +++-- dust-lang/src/lexer.rs | 1291 +++++++++++++++++++++++++++++++++++++ dust-lang/src/lib.rs | 16 +- dust-lang/src/parser.rs | 122 ++++ dust-lang/src/token.rs | 651 +++++++++++++++++++ 5 files changed, 2170 insertions(+), 56 deletions(-) create mode 100644 dust-lang/src/lexer.rs create mode 100644 dust-lang/src/parser.rs create mode 100644 dust-lang/src/token.rs diff --git a/dust-lang/src/bytecode.rs b/dust-lang/src/bytecode.rs index e60ea9a..19fb847 100644 --- a/dust-lang/src/bytecode.rs +++ b/dust-lang/src/bytecode.rs @@ -22,11 +22,14 @@ impl Vm { pub fn interpret(&mut self) -> Result, VmError> { loop { - let instruction = self.read_instruction(); + let (byte, position) = self.read(); + let instruction = Instruction::from_byte(byte) + .ok_or_else(|| VmError::InvalidInstruction(byte, position))?; match instruction { - Instruction::Constant(index) => { - let value = self.read_constant(*index); + Instruction::Constant => { + let (index, _) = self.read(); + let value = self.read_constant(index as usize); self.stack.push(value.clone()); } @@ -77,8 +80,6 @@ impl Vm { self.stack.push(quotient); } } - - self.ip += 1; } } @@ -100,10 +101,10 @@ impl Vm { } } - pub fn read_instruction(&self) -> &Instruction { - let (instruction, _) = &self.chunk.code[self.ip]; + pub fn read(&mut self) -> (u8, Span) { + self.ip += 1; - instruction + self.chunk.code[self.ip - 1] } pub fn read_constant(&self, index: usize) -> Value { @@ -113,6 +114,8 @@ impl Vm { #[derive(Debug, Clone, PartialEq)] pub enum VmError { + ChunkOverflow, + InvalidInstruction(u8, Span), StackUnderflow, StackOverflow, Value(ValueError), @@ -126,24 +129,43 @@ impl From for VmError { #[derive(Debug, Clone, Copy, Eq, PartialEq, Serialize, Deserialize)] pub enum Instruction { - Constant(usize), - Return, + Constant = 0, + Return = 1, // Unary - Negate, + Negate = 2, // Binary - Add, - Subtract, - Multiply, - Divide, + Add = 3, + Subtract = 4, + Multiply = 5, + Divide = 6, } impl Instruction { + pub fn from_byte(byte: u8) -> Option { + match byte { + 0 => Some(Self::Constant), + 1 => Some(Self::Return), + + // Unary + 2 => Some(Self::Negate), + + // Binary + 3 => Some(Self::Add), + 4 => Some(Self::Subtract), + 5 => Some(Self::Multiply), + 6 => Some(Self::Divide), + + _ => None, + } + } + pub fn disassemble(&self, chunk: &Chunk, offset: usize) -> String { match self { - Instruction::Constant(index) => { - let value = &chunk.constants[*index]; + Instruction::Constant => { + let index = chunk.code[offset + 1].0 as usize; + let value = &chunk.constants[index]; format!("{:04} CONSTANT {} {}", offset, index, value) } @@ -163,7 +185,7 @@ impl Instruction { #[derive(Debug, Clone, Eq, PartialEq, Serialize, Deserialize)] pub struct Chunk { - code: Vec<(Instruction, Span)>, + code: Vec<(u8, Span)>, constants: Vec, } @@ -187,14 +209,20 @@ impl Chunk { self.code.capacity() } - pub fn write(&mut self, instruction: Instruction, position: Span) { + pub fn write(&mut self, instruction: u8, position: Span) { self.code.push((instruction, position)); } - pub fn push_constant(&mut self, value: Value) -> usize { - self.constants.push(value); + pub fn push_constant(&mut self, value: Value) -> Result { + let starting_length = self.constants.len(); - self.constants.len() - 1 + if starting_length + 1 > (u8::MAX as usize) { + Err(ChunkError::Overflow) + } else { + self.constants.push(value); + + Ok(starting_length as u8) + } } pub fn clear(&mut self) { @@ -205,7 +233,9 @@ impl Chunk { pub fn disassemble(&self, name: &str) { println!("== {} ==", name); - for (offset, (instruction, position)) in self.code.iter().enumerate() { + for (offset, (byte, position)) in self.code.iter().enumerate() { + let instruction = Instruction::from_byte(*byte).unwrap(); + println!("{} {}", position, instruction.disassemble(self, offset)); } } @@ -217,6 +247,11 @@ impl Default for Chunk { } } +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ChunkError { + Overflow, +} + #[cfg(test)] pub mod tests { use super::*; @@ -224,11 +259,12 @@ pub mod tests { #[test] fn negation() { let mut chunk = Chunk::new(); - let constant = chunk.push_constant(Value::integer(42)); + let constant = chunk.push_constant(Value::integer(42)).unwrap(); - chunk.write(Instruction::Constant(constant), Span(0, 1)); - chunk.write(Instruction::Negate, Span(4, 5)); - chunk.write(Instruction::Return, Span(2, 3)); + chunk.write(Instruction::Constant as u8, Span(0, 1)); + chunk.write(constant, Span(2, 3)); + chunk.write(Instruction::Negate as u8, Span(4, 5)); + chunk.write(Instruction::Return as u8, Span(2, 3)); let mut vm = Vm::new(chunk); let result = vm.interpret(); @@ -239,13 +275,15 @@ pub mod tests { #[test] fn addition() { let mut chunk = Chunk::new(); - let left = chunk.push_constant(Value::integer(42)); - let right = chunk.push_constant(Value::integer(23)); + let left = chunk.push_constant(Value::integer(42)).unwrap(); + let right = chunk.push_constant(Value::integer(23)).unwrap(); - chunk.write(Instruction::Constant(left), Span(0, 1)); - chunk.write(Instruction::Constant(right), Span(2, 3)); - chunk.write(Instruction::Add, Span(4, 5)); - chunk.write(Instruction::Return, Span(6, 7)); + chunk.write(Instruction::Constant as u8, Span(0, 1)); + chunk.write(left, Span(2, 3)); + chunk.write(Instruction::Constant as u8, Span(4, 5)); + chunk.write(right, Span(6, 7)); + chunk.write(Instruction::Add as u8, Span(8, 9)); + chunk.write(Instruction::Return as u8, Span(10, 11)); let mut vm = Vm::new(chunk); let result = vm.interpret(); @@ -256,13 +294,15 @@ pub mod tests { #[test] fn subtraction() { let mut chunk = Chunk::new(); - let left = chunk.push_constant(Value::integer(42)); - let right = chunk.push_constant(Value::integer(23)); + let left = chunk.push_constant(Value::integer(42)).unwrap(); + let right = chunk.push_constant(Value::integer(23)).unwrap(); - chunk.write(Instruction::Constant(left), Span(0, 1)); - chunk.write(Instruction::Constant(right), Span(2, 3)); - chunk.write(Instruction::Subtract, Span(4, 5)); - chunk.write(Instruction::Return, Span(6, 7)); + chunk.write(Instruction::Constant as u8, Span(0, 1)); + chunk.write(left, Span(2, 3)); + chunk.write(Instruction::Constant as u8, Span(4, 5)); + chunk.write(right, Span(6, 7)); + chunk.write(Instruction::Subtract as u8, Span(8, 9)); + chunk.write(Instruction::Return as u8, Span(10, 11)); let mut vm = Vm::new(chunk); let result = vm.interpret(); @@ -273,13 +313,15 @@ pub mod tests { #[test] fn multiplication() { let mut chunk = Chunk::new(); - let left = chunk.push_constant(Value::integer(42)); - let right = chunk.push_constant(Value::integer(23)); + let left = chunk.push_constant(Value::integer(42)).unwrap(); + let right = chunk.push_constant(Value::integer(23)).unwrap(); - chunk.write(Instruction::Constant(left), Span(0, 1)); - chunk.write(Instruction::Constant(right), Span(2, 3)); - chunk.write(Instruction::Multiply, Span(4, 5)); - chunk.write(Instruction::Return, Span(6, 7)); + chunk.write(Instruction::Constant as u8, Span(0, 1)); + chunk.write(left, Span(2, 3)); + chunk.write(Instruction::Constant as u8, Span(4, 5)); + chunk.write(right, Span(6, 7)); + chunk.write(Instruction::Multiply as u8, Span(8, 9)); + chunk.write(Instruction::Return as u8, Span(10, 11)); let mut vm = Vm::new(chunk); let result = vm.interpret(); @@ -291,13 +333,15 @@ pub mod tests { fn division() { let mut chunk = Chunk::new(); - let left = chunk.push_constant(Value::integer(42)); - let right = chunk.push_constant(Value::integer(23)); + let left = chunk.push_constant(Value::integer(42)).unwrap(); + let right = chunk.push_constant(Value::integer(23)).unwrap(); - chunk.write(Instruction::Constant(left), Span(0, 1)); - chunk.write(Instruction::Constant(right), Span(2, 3)); - chunk.write(Instruction::Divide, Span(4, 5)); - chunk.write(Instruction::Return, Span(6, 7)); + chunk.write(Instruction::Constant as u8, Span(0, 1)); + chunk.write(left, Span(2, 3)); + chunk.write(Instruction::Constant as u8, Span(4, 5)); + chunk.write(right, Span(6, 7)); + chunk.write(Instruction::Divide as u8, Span(8, 9)); + chunk.write(Instruction::Return as u8, Span(10, 11)); let mut vm = Vm::new(chunk); let result = vm.interpret(); diff --git a/dust-lang/src/lexer.rs b/dust-lang/src/lexer.rs new file mode 100644 index 0000000..70b082a --- /dev/null +++ b/dust-lang/src/lexer.rs @@ -0,0 +1,1291 @@ +//! Lexing tools. +//! +//! This module provides two lexing options: +//! - [`lex`], which lexes the entire input and returns a vector of tokens and their positions +//! - [`Lexer`], which lexes the input a token at a time +use std::fmt::{self, Display, Formatter}; + +use crate::{Span, Token}; + +/// Lexes the input and return a vector of tokens and their positions. +/// +/// # Examples +/// ``` +/// # use dust_lang::*; +/// let input = "x = 1 + 2"; +/// let tokens = lex(input).unwrap(); +/// +/// assert_eq!( +/// tokens, +/// [ +/// (Token::Identifier("x"), (0, 1)), +/// (Token::Equal, (2, 3)), +/// (Token::Integer("1"), (4, 5)), +/// (Token::Plus, (6, 7)), +/// (Token::Integer("2"), (8, 9)), +/// (Token::Eof, (9, 9)), +/// ] +/// ); +/// ``` +pub fn lex<'chars, 'src: 'chars>( + source: &'src str, +) -> Result, Span)>, LexError> { + let mut lexer = Lexer::new(source); + let mut tokens = Vec::new(); + + loop { + let (token, span) = lexer.next_token()?; + let is_eof = matches!(token, Token::Eof); + + tokens.push((token, span)); + + if is_eof { + break; + } + } + + Ok(tokens) +} + +/// Low-level tool for lexing a single token at a time. +/// +/// # Examples +/// ``` +/// # use dust_lang::*; +/// let input = "x = 1 + 2"; +/// let mut lexer = Lexer::new(input); +/// let mut tokens = Vec::new(); +/// +/// loop { +/// let (token, span) = lexer.next_token().unwrap(); +/// let is_eof = matches!(token, Token::Eof); +/// +/// tokens.push((token, span)); +/// +/// if is_eof { +/// break; +/// } +/// } +/// +/// assert_eq!( +/// tokens, +/// [ +/// (Token::Identifier("x"), (0, 1)), +/// (Token::Equal, (2, 3)), +/// (Token::Integer("1"), (4, 5)), +/// (Token::Plus, (6, 7)), +/// (Token::Integer("2"), (8, 9)), +/// (Token::Eof, (9, 9)), +/// ] +/// ) +/// ``` +#[derive(Debug, Clone)] +pub struct Lexer<'src> { + source: &'src str, + position: usize, +} + +impl<'src> Lexer<'src> { + /// Create a new lexer for the given input. + pub fn new(source: &'src str) -> Self { + Lexer { + source, + position: 0, + } + } + + /// Produce the next token. + pub fn next_token(&mut self) -> Result<(Token<'src>, Span), LexError> { + self.skip_whitespace(); + + let (token, span) = if let Some(c) = self.peek_char() { + match c { + '0'..='9' => self.lex_number()?, + '-' => { + let second_char = self.peek_second_char(); + + if let Some('=') = second_char { + self.position += 2; + + (Token::MinusEqual, Span(self.position - 2, self.position)) + } else if let Some('0'..='9') = second_char { + self.lex_number()? + } else if "-Infinity" == self.peek_chars(9) { + self.position += 9; + + ( + Token::Float("-Infinity"), + Span(self.position - 9, self.position), + ) + } else { + self.position += 1; + + (Token::Minus, Span(self.position - 1, self.position)) + } + } + 'a'..='z' | 'A'..='Z' => self.lex_alphanumeric()?, + '"' => self.lex_string()?, + '\'' => { + self.position += 1; + + if let Some(c) = self.peek_char() { + self.position += 1; + + if let Some('\'') = self.peek_char() { + self.position += 1; + + (Token::Character(c), Span(self.position - 3, self.position)) + } else { + return Err(LexError::ExpectedCharacter { + expected: '\'', + actual: c, + position: self.position, + }); + } + } else { + return Err(LexError::UnexpectedEndOfFile { + position: self.position, + }); + } + } + '+' => { + if let Some('=') = self.peek_second_char() { + self.position += 2; + + (Token::PlusEqual, Span(self.position - 2, self.position)) + } else { + self.position += 1; + + (Token::Plus, Span(self.position - 1, self.position)) + } + } + '*' => { + self.position += 1; + + (Token::Star, Span(self.position - 1, self.position)) + } + '(' => { + self.position += 1; + + ( + Token::LeftParenthesis, + Span(self.position - 1, self.position), + ) + } + ')' => { + self.position += 1; + + ( + Token::RightParenthesis, + Span(self.position - 1, self.position), + ) + } + '=' => { + if let Some('=') = self.peek_second_char() { + self.position += 2; + + (Token::DoubleEqual, Span(self.position - 2, self.position)) + } else { + self.position += 1; + + (Token::Equal, Span(self.position - 1, self.position)) + } + } + '[' => { + self.position += 1; + + ( + Token::LeftSquareBrace, + Span(self.position - 1, self.position), + ) + } + ']' => { + self.position += 1; + + ( + Token::RightSquareBrace, + Span(self.position - 1, self.position), + ) + } + ',' => { + self.position += 1; + + (Token::Comma, Span(self.position - 1, self.position)) + } + '.' => { + if let Some('.') = self.peek_second_char() { + self.position += 2; + + (Token::DoubleDot, Span(self.position - 2, self.position)) + } else { + self.position += 1; + + (Token::Dot, Span(self.position - 1, self.position)) + } + } + '>' => { + if let Some('=') = self.peek_second_char() { + self.position += 2; + + (Token::GreaterEqual, Span(self.position - 2, self.position)) + } else { + self.position += 1; + + (Token::Greater, Span(self.position - 1, self.position)) + } + } + '<' => { + if let Some('=') = self.peek_second_char() { + self.position += 2; + + (Token::LessEqual, Span(self.position - 2, self.position)) + } else { + self.position += 1; + + (Token::Less, Span(self.position - 1, self.position)) + } + } + '{' => { + self.position += 1; + + ( + Token::LeftCurlyBrace, + Span(self.position - 1, self.position), + ) + } + '}' => { + self.position += 1; + + ( + Token::RightCurlyBrace, + Span(self.position - 1, self.position), + ) + } + '/' => { + self.position += 1; + + (Token::Slash, Span(self.position - 1, self.position)) + } + '%' => { + self.position += 1; + + (Token::Percent, Span(self.position - 1, self.position)) + } + '&' => { + if let Some('&') = self.peek_second_char() { + self.position += 2; + + ( + Token::DoubleAmpersand, + Span(self.position - 2, self.position), + ) + } else { + self.position += 1; + + return Err(LexError::UnexpectedCharacter { + actual: c, + position: self.position, + }); + } + } + ';' => { + self.position += 1; + + (Token::Semicolon, Span(self.position - 1, self.position)) + } + '|' => { + if let Some('|') = self.peek_second_char() { + self.position += 2; + + (Token::DoublePipe, Span(self.position - 2, self.position)) + } else { + self.position += 1; + + return Err(LexError::UnexpectedCharacter { + actual: c, + position: self.position, + }); + } + } + '!' => { + self.position += 1; + + (Token::Bang, Span(self.position - 1, self.position)) + } + ':' => { + self.position += 1; + + (Token::Colon, Span(self.position - 1, self.position)) + } + _ => { + return Err(LexError::UnexpectedCharacter { + actual: c, + position: self.position, + }); + } + } + } else { + (Token::Eof, Span(self.position, self.position)) + }; + + Ok((token, span)) + } + + /// Peek at the next token without consuming the source. + pub fn peek_token(&mut self) -> Result<(Token<'src>, Span), LexError> { + let token = self.next_token()?; + + self.position -= token.0.len(); + + Ok(token) + } + + /// Progress to the next character. + fn next_char(&mut self) -> Option { + if let Some(c) = self.source[self.position..].chars().next() { + self.position += c.len_utf8(); + + Some(c) + } else { + None + } + } + + /// Skip whitespace characters. + fn skip_whitespace(&mut self) { + while let Some(c) = self.peek_char() { + if c.is_whitespace() { + self.next_char(); + } else { + break; + } + } + } + + /// Peek at the next character without consuming it. + fn peek_char(&self) -> Option { + self.source[self.position..].chars().next() + } + + /// Peek at the second-to-next character without consuming it. + fn peek_second_char(&self) -> Option { + self.source[self.position..].chars().nth(1) + } + + /// Peek the next `n` characters without consuming them. + fn peek_chars(&self, n: usize) -> &'src str { + let remaining_source = &self.source[self.position..]; + + if remaining_source.len() < n { + remaining_source + } else { + &remaining_source[..n] + } + } + + /// Lex an integer or float token. + fn lex_number(&mut self) -> Result<(Token<'src>, Span), LexError> { + let start_pos = self.position; + let mut is_float = false; + + if let Some('-') = self.peek_char() { + self.next_char(); + } + + while let Some(c) = self.peek_char() { + if c == '.' { + if let Some('0'..='9') = self.peek_second_char() { + if !is_float { + self.next_char(); + } + + self.next_char(); + + loop { + let peek_char = self.peek_char(); + + if let Some('0'..='9') = peek_char { + self.next_char(); + } else if let Some('e') = peek_char { + if let Some('0'..='9') = self.peek_second_char() { + self.next_char(); + self.next_char(); + } else { + break; + } + } else { + break; + } + } + + is_float = true; + } else { + break; + } + } + + if c.is_ascii_digit() { + self.next_char(); + } else { + break; + } + } + + let text = &self.source[start_pos..self.position]; + + if is_float { + Ok((Token::Float(text), Span(start_pos, self.position))) + } else { + Ok((Token::Integer(text), Span(start_pos, self.position))) + } + } + + /// Lex an identifier token. + fn lex_alphanumeric(&mut self) -> Result<(Token<'src>, Span), LexError> { + let start_pos = self.position; + + while let Some(c) = self.peek_char() { + if c.is_ascii_alphanumeric() || c == '_' { + self.next_char(); + } else { + break; + } + } + + let string = &self.source[start_pos..self.position]; + let token = match string { + "Infinity" => Token::Float("Infinity"), + "NaN" => Token::Float("NaN"), + "async" => Token::Async, + "bool" => Token::Bool, + "break" => Token::Break, + "else" => Token::Else, + "false" => Token::Boolean("false"), + "float" => Token::FloatKeyword, + "if" => Token::If, + "int" => Token::Int, + "let" => Token::Let, + "loop" => Token::Loop, + "map" => Token::Map, + "mut" => Token::Mut, + "struct" => Token::Struct, + "true" => Token::Boolean("true"), + "while" => Token::While, + _ => Token::Identifier(string), + }; + + Ok((token, Span(start_pos, self.position))) + } + + fn lex_string(&mut self) -> Result<(Token<'src>, Span), LexError> { + let start_pos = self.position; + + self.next_char(); + + while let Some(c) = self.peek_char() { + if c == '"' { + self.next_char(); + break; + } else { + self.next_char(); + } + } + + let text = &self.source[start_pos + 1..self.position - 1]; + + Ok((Token::String(text), Span(start_pos, self.position))) + } +} + +#[derive(Debug, PartialEq, Clone)] +pub enum LexError { + ExpectedCharacter { + expected: char, + actual: char, + position: usize, + }, + UnexpectedCharacter { + actual: char, + position: usize, + }, + UnexpectedEndOfFile { + position: usize, + }, +} + +impl LexError { + pub fn position(&self) -> Span { + match self { + Self::ExpectedCharacter { position, .. } => Span(*position, *position), + Self::UnexpectedCharacter { position, .. } => Span(*position, *position), + Self::UnexpectedEndOfFile { position } => Span(*position, *position), + } + } +} + +impl Display for LexError { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Self::ExpectedCharacter { + expected, actual, .. + } => write!(f, "Expected character '{expected}', found '{actual}'"), + Self::UnexpectedCharacter { actual, .. } => { + write!(f, "Unexpected character '{actual}'") + } + Self::UnexpectedEndOfFile { .. } => { + write!(f, "Unexpected end of file") + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn character() { + let input = "'a'"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Character('a'), Span(0, 3)), + (Token::Eof, Span(3, 3)), + ]) + ); + } + + #[test] + fn map_expression() { + let input = "map { x = \"1\", y = 2, z = 3.0 }"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Map, Span(0, 3)), + (Token::LeftCurlyBrace, Span(4, 5)), + (Token::Identifier("x"), Span(6, 7)), + (Token::Equal, Span(8, 9)), + (Token::String("1"), Span(10, 13)), + (Token::Comma, Span(13, 14)), + (Token::Identifier("y"), Span(15, 16)), + (Token::Equal, Span(17, 18)), + (Token::Integer("2"), Span(19, 20)), + (Token::Comma, Span(20, 21)), + (Token::Identifier("z"), Span(22, 23)), + (Token::Equal, Span(24, 25)), + (Token::Float("3.0"), Span(26, 29)), + (Token::RightCurlyBrace, Span(30, 31)), + (Token::Eof, Span(31, 31)), + ]) + ); + } + + #[test] + fn let_statement() { + let input = "let x = 42"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Let, Span(0, 3)), + (Token::Identifier("x"), Span(4, 5)), + (Token::Equal, Span(6, 7)), + (Token::Integer("42"), Span(8, 10)), + (Token::Eof, Span(10, 10)), + ]) + ); + } + + #[test] + fn unit_struct() { + let input = "struct Foo"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Struct, Span(0, 6)), + (Token::Identifier("Foo"), Span(7, 10)), + (Token::Eof, Span(10, 10)), + ]) + ); + } + + #[test] + fn tuple_struct() { + let input = "struct Foo(int, float)"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Struct, Span(0, 6)), + (Token::Identifier("Foo"), Span(7, 10)), + (Token::LeftParenthesis, Span(10, 11)), + (Token::Int, Span(11, 14)), + (Token::Comma, Span(14, 15)), + (Token::FloatKeyword, Span(16, 21)), + (Token::RightParenthesis, Span(21, 22)), + (Token::Eof, Span(22, 22)) + ]) + ); + } + + #[test] + fn fields_struct() { + let input = "struct FooBar { foo: int, bar: float }"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Struct, Span(0, 6)), + (Token::Identifier("FooBar"), Span(7, 13)), + (Token::LeftCurlyBrace, Span(14, 15)), + (Token::Identifier("foo"), Span(16, 19)), + (Token::Colon, Span(19, 20)), + (Token::Int, Span(21, 24)), + (Token::Comma, Span(24, 25)), + (Token::Identifier("bar"), Span(26, 29)), + (Token::Colon, Span(29, 30)), + (Token::FloatKeyword, Span(31, 36)), + (Token::RightCurlyBrace, Span(37, 38)), + (Token::Eof, Span(38, 38)) + ]) + ); + } + + #[test] + fn list_index() { + let input = "[1, 2, 3][1]"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LeftSquareBrace, Span(0, 1)), + (Token::Integer("1"), Span(1, 2)), + (Token::Comma, Span(2, 3)), + (Token::Integer("2"), Span(4, 5)), + (Token::Comma, Span(5, 6)), + (Token::Integer("3"), Span(7, 8)), + (Token::RightSquareBrace, Span(8, 9)), + (Token::LeftSquareBrace, Span(9, 10)), + (Token::Integer("1"), Span(10, 11)), + (Token::RightSquareBrace, Span(11, 12)), + (Token::Eof, Span(12, 12)), + ]) + ) + } + + #[test] + fn list() { + let input = "[1, 2, 3]"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LeftSquareBrace, Span(0, 1)), + (Token::Integer("1"), Span(1, 2)), + (Token::Comma, Span(2, 3)), + (Token::Integer("2"), Span(4, 5)), + (Token::Comma, Span(5, 6)), + (Token::Integer("3"), Span(7, 8)), + (Token::RightSquareBrace, Span(8, 9)), + (Token::Eof, Span(9, 9)), + ]) + ) + } + + #[test] + fn map_field_access() { + let input = "{a = 1, b = 2, c = 3}.c"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LeftCurlyBrace, Span(0, 1)), + (Token::Identifier("a"), Span(1, 2)), + (Token::Equal, Span(3, 4)), + (Token::Integer("1"), Span(5, 6)), + (Token::Comma, Span(6, 7)), + (Token::Identifier("b"), Span(8, 9)), + (Token::Equal, Span(10, 11)), + (Token::Integer("2"), Span(12, 13)), + (Token::Comma, Span(13, 14)), + (Token::Identifier("c"), Span(15, 16)), + (Token::Equal, Span(17, 18)), + (Token::Integer("3"), Span(19, 20)), + (Token::RightCurlyBrace, Span(20, 21)), + (Token::Dot, Span(21, 22)), + (Token::Identifier("c"), Span(22, 23)), + (Token::Eof, Span(23, 23)), + ]) + ) + } + + #[test] + fn range() { + let input = "0..42"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("0"), Span(0, 1)), + (Token::DoubleDot, Span(1, 3)), + (Token::Integer("42"), Span(3, 5)), + (Token::Eof, Span(5, 5)) + ]) + ); + } + + #[test] + fn negate_expression() { + let input = "x = -42; -x"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Identifier("x"), Span(0, 1)), + (Token::Equal, Span(2, 3)), + (Token::Integer("-42"), Span(4, 7)), + (Token::Semicolon, Span(7, 8)), + (Token::Minus, Span(9, 10)), + (Token::Identifier("x"), Span(10, 11)), + (Token::Eof, Span(11, 11)) + ]) + ); + } + + #[test] + fn not_expression() { + let input = "!true; !false"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Bang, Span(0, 1)), + (Token::Boolean("true"), Span(1, 5)), + (Token::Semicolon, Span(5, 6)), + (Token::Bang, Span(7, 8)), + (Token::Boolean("false"), Span(8, 13)), + (Token::Eof, Span(13, 13)) + ]) + ); + } + + #[test] + fn if_else() { + let input = "if x < 10 { x + 1 } else { x }"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::If, Span(0, 2)), + (Token::Identifier("x"), Span(3, 4)), + (Token::Less, Span(5, 6)), + (Token::Integer("10"), Span(7, 9)), + (Token::LeftCurlyBrace, Span(10, 11)), + (Token::Identifier("x"), Span(12, 13)), + (Token::Plus, Span(14, 15)), + (Token::Integer("1"), Span(16, 17)), + (Token::RightCurlyBrace, Span(18, 19)), + (Token::Else, Span(20, 24)), + (Token::LeftCurlyBrace, Span(25, 26)), + (Token::Identifier("x"), Span(27, 28)), + (Token::RightCurlyBrace, Span(29, 30)), + (Token::Eof, Span(30, 30)), + ]) + ) + } + + #[test] + fn while_loop() { + let input = "while x < 10 { x += 1 }"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::While, Span(0, 5)), + (Token::Identifier("x"), Span(6, 7)), + (Token::Less, Span(8, 9)), + (Token::Integer("10"), Span(10, 12)), + (Token::LeftCurlyBrace, Span(13, 14)), + (Token::Identifier("x"), Span(15, 16)), + (Token::PlusEqual, Span(17, 19)), + (Token::Integer("1"), Span(20, 21)), + (Token::RightCurlyBrace, Span(22, 23)), + (Token::Eof, Span(23, 23)), + ]) + ) + } + + #[test] + fn add_assign() { + let input = "x += 42"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Identifier("x"), Span(0, 1)), + (Token::PlusEqual, Span(2, 4)), + (Token::Integer("42"), Span(5, 7)), + (Token::Eof, Span(7, 7)), + ]) + ) + } + + #[test] + fn or() { + let input = "true || false"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Boolean("true"), Span(0, 4)), + (Token::DoublePipe, Span(5, 7)), + (Token::Boolean("false"), Span(8, 13)), + (Token::Eof, Span(13, 13)), + ]) + ) + } + + #[test] + fn block() { + let input = "{ x = 42; y = \"foobar\" }"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LeftCurlyBrace, Span(0, 1)), + (Token::Identifier("x"), Span(2, 3)), + (Token::Equal, Span(4, 5)), + (Token::Integer("42"), Span(6, 8)), + (Token::Semicolon, Span(8, 9)), + (Token::Identifier("y"), Span(10, 11)), + (Token::Equal, Span(12, 13)), + (Token::String("foobar"), Span(14, 22)), + (Token::RightCurlyBrace, Span(23, 24)), + (Token::Eof, Span(24, 24)), + ]) + ) + } + + #[test] + fn equal() { + let input = "42 == 42"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("42"), Span(0, 2)), + (Token::DoubleEqual, Span(3, 5)), + (Token::Integer("42"), Span(6, 8)), + (Token::Eof, Span(8, 8)), + ]) + ) + } + + #[test] + fn modulo() { + let input = "42 % 2"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("42"), Span(0, 2)), + (Token::Percent, Span(3, 4)), + (Token::Integer("2"), Span(5, 6)), + (Token::Eof, Span(6, 6)), + ]) + ) + } + + #[test] + fn divide() { + let input = "42 / 2"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("42"), Span(0, 2)), + (Token::Slash, Span(3, 4)), + (Token::Integer("2"), Span(5, 6)), + (Token::Eof, Span(6, 6)), + ]) + ) + } + + #[test] + fn greater_than() { + let input = ">"; + + assert_eq!( + lex(input), + Ok(vec![(Token::Greater, Span(0, 1)), (Token::Eof, Span(1, 1))]) + ) + } + + #[test] + fn greater_than_or_equal() { + let input = ">="; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::GreaterEqual, Span(0, 2)), + (Token::Eof, Span(2, 2)) + ]) + ) + } + + #[test] + fn less_than() { + let input = "<"; + + assert_eq!( + lex(input), + Ok(vec![(Token::Less, Span(0, 1)), (Token::Eof, Span(1, 1))]) + ) + } + + #[test] + fn less_than_or_equal() { + let input = "<="; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LessEqual, Span(0, 2)), + (Token::Eof, Span(2, 2)) + ]) + ) + } + + #[test] + fn infinity() { + let input = "Infinity"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Float("Infinity"), Span(0, 8)), + (Token::Eof, Span(8, 8)), + ]) + ) + } + + #[test] + fn negative_infinity() { + let input = "-Infinity"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Float("-Infinity"), Span(0, 9)), + (Token::Eof, Span(9, 9)), + ]) + ) + } + + #[test] + fn nan() { + let input = "NaN"; + + assert!(lex(input).is_ok_and(|tokens| tokens[0].0 == Token::Float("NaN"))); + } + + #[test] + fn complex_float() { + let input = "42.42e42"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Float("42.42e42"), Span(0, 8)), + (Token::Eof, Span(8, 8)), + ]) + ) + } + + #[test] + fn max_integer() { + let input = "9223372036854775807"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("9223372036854775807"), Span(0, 19)), + (Token::Eof, Span(19, 19)), + ]) + ) + } + + #[test] + fn min_integer() { + let input = "-9223372036854775808"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("-9223372036854775808"), Span(0, 20)), + (Token::Eof, Span(20, 20)), + ]) + ) + } + + #[test] + fn subtract_negative_integers() { + let input = "-42 - -42"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("-42"), Span(0, 3)), + (Token::Minus, Span(4, 5)), + (Token::Integer("-42"), Span(6, 9)), + (Token::Eof, Span(9, 9)), + ]) + ) + } + + #[test] + fn negative_integer() { + let input = "-42"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("-42"), Span(0, 3)), + (Token::Eof, Span(3, 3)) + ]) + ) + } + + #[test] + fn read_line() { + let input = "read_line()"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Identifier("read_line"), Span(0, 9)), + (Token::LeftParenthesis, Span(9, 10)), + (Token::RightParenthesis, Span(10, 11)), + (Token::Eof, Span(11, 11)), + ]) + ) + } + + #[test] + fn write_line() { + let input = "write_line(\"Hello, world!\")"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Identifier("write_line"), Span(0, 10)), + (Token::LeftParenthesis, Span(10, 11)), + (Token::String("Hello, world!"), Span(11, 26)), + (Token::RightParenthesis, Span(26, 27)), + (Token::Eof, Span(27, 27)), + ]) + ) + } + + #[test] + fn string_concatenation() { + let input = "\"Hello, \" + \"world!\""; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::String("Hello, "), Span(0, 9)), + (Token::Plus, Span(10, 11)), + (Token::String("world!"), Span(12, 20)), + (Token::Eof, Span(20, 20)), + ]) + ) + } + + #[test] + fn string() { + let input = "\"Hello, world!\""; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::String("Hello, world!"), Span(0, 15)), + (Token::Eof, Span(15, 15)), + ]) + ) + } + + #[test] + fn r#true() { + let input = "true"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Boolean("true"), Span(0, 4)), + (Token::Eof, Span(4, 4)), + ]) + ) + } + + #[test] + fn r#false() { + let input = "false"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Boolean("false"), Span(0, 5)), + (Token::Eof, Span(5, 5)) + ]) + ) + } + + #[test] + fn property_access_function_call() { + let input = "42.is_even()"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("42"), Span(0, 2)), + (Token::Dot, Span(2, 3)), + (Token::Identifier("is_even"), Span(3, 10)), + (Token::LeftParenthesis, Span(10, 11)), + (Token::RightParenthesis, Span(11, 12)), + (Token::Eof, Span(12, 12)), + ]) + ) + } + + #[test] + fn empty() { + let input = ""; + + assert_eq!(lex(input), Ok(vec![(Token::Eof, Span(0, 0))])) + } + + #[test] + fn reserved_identifier() { + let input = "length"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Identifier("length"), Span(0, 6)), + (Token::Eof, Span(6, 6)), + ]) + ) + } + + #[test] + fn square_braces() { + let input = "[]"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::LeftSquareBrace, Span(0, 1)), + (Token::RightSquareBrace, Span(1, 2)), + (Token::Eof, Span(2, 2)), + ]) + ) + } + + #[test] + fn small_float() { + let input = "1.23"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Float("1.23"), Span(0, 4)), + (Token::Eof, Span(4, 4)), + ]) + ) + } + + #[test] + #[allow(clippy::excessive_precision)] + fn big_float() { + let input = "123456789.123456789"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Float("123456789.123456789"), Span(0, 19)), + (Token::Eof, Span(19, 19)), + ]) + ) + } + + #[test] + fn add() { + let input = "1 + 2"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("1"), Span(0, 1)), + (Token::Plus, Span(2, 3)), + (Token::Integer("2"), Span(4, 5)), + (Token::Eof, Span(5, 5)), + ]) + ) + } + + #[test] + fn multiply() { + let input = "1 * 2"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("1"), Span(0, 1)), + (Token::Star, Span(2, 3)), + (Token::Integer("2"), Span(4, 5)), + (Token::Eof, Span(5, 5)), + ]) + ) + } + + #[test] + fn add_and_multiply() { + let input = "1 + 2 * 3"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("1"), Span(0, 1)), + (Token::Plus, Span(2, 3)), + (Token::Integer("2"), Span(4, 5)), + (Token::Star, Span(6, 7)), + (Token::Integer("3"), Span(8, 9)), + (Token::Eof, Span(9, 9)), + ]) + ); + } + + #[test] + fn assignment() { + let input = "a = 1 + 2 * 3"; + + assert_eq!( + lex(input,), + Ok(vec![ + (Token::Identifier("a"), Span(0, 1)), + (Token::Equal, Span(2, 3)), + (Token::Integer("1"), Span(4, 5)), + (Token::Plus, Span(6, 7)), + (Token::Integer("2"), Span(8, 9)), + (Token::Star, Span(10, 11)), + (Token::Integer("3"), Span(12, 13)), + (Token::Eof, Span(13, 13)), + ]) + ); + } +} diff --git a/dust-lang/src/lib.rs b/dust-lang/src/lib.rs index fce38b5..d80e9a4 100644 --- a/dust-lang/src/lib.rs +++ b/dust-lang/src/lib.rs @@ -18,14 +18,20 @@ pub mod bytecode; pub mod constructor; pub mod identifier; +pub mod lexer; +pub mod parser; +pub mod token; pub mod r#type; pub mod value; -pub use bytecode::*; -pub use constructor::*; -pub use identifier::*; -pub use r#type::*; -pub use value::*; +pub use bytecode::{Chunk, ChunkError, Instruction, Vm}; +pub use constructor::{ConstructError, Constructor}; +pub use identifier::Identifier; +pub use lexer::{LexError, Lexer}; +pub use parser::{ParseError, Parser}; +pub use r#type::{EnumType, FunctionType, RangeableType, StructType, Type, TypeConflict}; +pub use token::{Token, TokenKind, TokenOwned}; +pub use value::{Struct, Value, ValueError}; use std::fmt::{self, Display, Formatter}; diff --git a/dust-lang/src/parser.rs b/dust-lang/src/parser.rs new file mode 100644 index 0000000..1b70523 --- /dev/null +++ b/dust-lang/src/parser.rs @@ -0,0 +1,122 @@ +use std::num::ParseIntError; + +use crate::{ + Chunk, ChunkError, Instruction, LexError, Lexer, Span, Token, TokenKind, TokenOwned, Value, +}; + +#[derive(Debug)] +pub struct Parser<'src> { + lexer: Lexer<'src>, + current_token: Token<'src>, + current_position: Span, +} + +impl<'src> Parser<'src> { + pub fn new(mut lexer: Lexer<'src>) -> Self { + let (current_token, current_position) = + lexer.next_token().unwrap_or((Token::Eof, Span(0, 0))); + + Parser { + lexer, + current_token, + current_position, + } + } + + fn is_eof(&self) -> bool { + matches!(self.current_token, Token::Eof) + } + + fn advance(&mut self) -> Result<(), ParseError> { + let (token, position) = self.lexer.next_token()?; + + self.current_token = token; + self.current_position = position; + + Ok(()) + } + + fn consume(&mut self, expected: TokenKind) -> Result<(), ParseError> { + if self.current_token.kind() == expected { + self.advance() + } else { + Err(ParseError::ExpectedToken { + expected, + found: self.current_token.to_owned(), + position: self.current_position, + }) + } + } + + fn emit_instruction(&mut self, instruction: Instruction, chunk: &mut Chunk) { + chunk.write(instruction as u8, self.current_position); + } + + fn parse_prefix(&mut self, chunk: &mut Chunk) -> Result<(), ParseError> { + Ok(()) + } + + fn parse_primary(&mut self, chunk: &mut Chunk) -> Result<(), ParseError> { + match self.current_token { + Token::Integer(text) => { + let integer = text.parse::()?; + let value = Value::integer(integer); + let constant_index = chunk.push_constant(value)?; + + chunk.write(Instruction::Constant as u8, self.current_position); + chunk.write(constant_index, self.current_position); + } + Token::LeftParenthesis => {} + _ => { + return Err(ParseError::ExpectedTokenMultiple { + expected: vec![TokenKind::Integer], + found: self.current_token.to_owned(), + position: self.current_position, + }) + } + } + + Ok(()) + } + + pub fn parse_postfix(&mut self, left: Value, chunk: &mut Chunk) -> Result<(), ParseError> { + Ok(()) + } +} + +#[derive(Debug, PartialEq)] +pub enum ParseError { + ExpectedToken { + expected: TokenKind, + found: TokenOwned, + position: Span, + }, + ExpectedTokenMultiple { + expected: Vec, + found: TokenOwned, + position: Span, + }, + + // Wrappers around foreign errors + Chunk(ChunkError), + Lex(LexError), + ParseIntError(ParseIntError), +} + +impl From for ParseError { + fn from(error: ParseIntError) -> Self { + Self::ParseIntError(error) + } +} + +impl From for ParseError { + fn from(error: LexError) -> Self { + Self::Lex(error) + } +} + +impl From for ParseError { + fn from(error: ChunkError) -> Self { + Self::Chunk(error) + } +} diff --git a/dust-lang/src/token.rs b/dust-lang/src/token.rs new file mode 100644 index 0000000..2e41d95 --- /dev/null +++ b/dust-lang/src/token.rs @@ -0,0 +1,651 @@ +//! Token and TokenOwned types. +use std::fmt::{self, Display, Formatter}; + +use serde::{Deserialize, Serialize}; + +/// Source code token. +#[derive(Debug, Serialize, Deserialize, PartialEq)] +pub enum Token<'src> { + // End of file + Eof, + + // Hard-coded values + Boolean(&'src str), + Character(char), + Float(&'src str), + Identifier(&'src str), + Integer(&'src str), + String(&'src str), + + // Keywords + Async, + Bool, + Break, + Else, + FloatKeyword, + If, + Int, + Let, + Loop, + Map, + Mut, + Str, + Struct, + While, + + // Symbols + BangEqual, + Bang, + Colon, + Comma, + Dot, + DoubleAmpersand, + DoubleDot, + DoubleEqual, + DoublePipe, + Equal, + Greater, + GreaterEqual, + LeftCurlyBrace, + LeftParenthesis, + LeftSquareBrace, + Less, + LessEqual, + Minus, + MinusEqual, + Percent, + Plus, + PlusEqual, + RightCurlyBrace, + RightParenthesis, + RightSquareBrace, + Semicolon, + Slash, + Star, +} + +impl<'src> Token<'src> { + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + match self { + Token::Eof => 0, + Token::Boolean(text) => text.len(), + Token::Character(_) => 3, + Token::Float(text) => text.len(), + Token::Identifier(text) => text.len(), + Token::Integer(text) => text.len(), + Token::String(text) => text.len() + 2, + Token::Async => 5, + Token::Bool => 4, + Token::Break => 5, + Token::Else => 4, + Token::FloatKeyword => 5, + Token::If => 2, + Token::Int => 3, + Token::Let => 3, + Token::Loop => 4, + Token::Map => 3, + Token::Mut => 3, + Token::Str => 3, + Token::Struct => 6, + Token::While => 5, + Token::BangEqual => 2, + Token::Bang => 1, + Token::Colon => 1, + Token::Comma => 1, + Token::Dot => 1, + Token::DoubleAmpersand => 2, + Token::DoubleDot => 2, + Token::DoubleEqual => 2, + Token::DoublePipe => 2, + Token::Equal => 1, + Token::Greater => 1, + Token::GreaterEqual => 2, + Token::LeftCurlyBrace => 1, + Token::LeftParenthesis => 1, + Token::LeftSquareBrace => 1, + Token::Less => 1, + Token::LessEqual => 2, + Token::Minus => 1, + Token::MinusEqual => 2, + Token::Percent => 1, + Token::Plus => 1, + Token::PlusEqual => 2, + Token::RightCurlyBrace => 1, + Token::RightParenthesis => 1, + Token::RightSquareBrace => 1, + Token::Semicolon => 1, + Token::Slash => 1, + Token::Star => 1, + } + } + + pub fn to_owned(&self) -> TokenOwned { + match self { + Token::Async => TokenOwned::Async, + Token::BangEqual => TokenOwned::BangEqual, + Token::Bang => TokenOwned::Bang, + Token::Bool => TokenOwned::Bool, + Token::Boolean(boolean) => TokenOwned::Boolean(boolean.to_string()), + Token::Break => TokenOwned::Break, + Token::Character(character) => TokenOwned::Character(*character), + Token::Colon => TokenOwned::Colon, + Token::Comma => TokenOwned::Comma, + Token::Dot => TokenOwned::Dot, + Token::DoubleAmpersand => TokenOwned::DoubleAmpersand, + Token::DoubleDot => TokenOwned::DoubleDot, + Token::DoubleEqual => TokenOwned::DoubleEqual, + Token::DoublePipe => TokenOwned::DoublePipe, + Token::Else => TokenOwned::Else, + Token::Eof => TokenOwned::Eof, + Token::Equal => TokenOwned::Equal, + Token::Float(float) => TokenOwned::Float(float.to_string()), + Token::FloatKeyword => TokenOwned::FloatKeyword, + Token::Greater => TokenOwned::Greater, + Token::GreaterEqual => TokenOwned::GreaterOrEqual, + Token::Identifier(text) => TokenOwned::Identifier(text.to_string()), + Token::If => TokenOwned::If, + Token::Int => TokenOwned::Int, + Token::Integer(integer) => TokenOwned::Integer(integer.to_string()), + Token::LeftCurlyBrace => TokenOwned::LeftCurlyBrace, + Token::LeftParenthesis => TokenOwned::LeftParenthesis, + Token::LeftSquareBrace => TokenOwned::LeftSquareBrace, + Token::Let => TokenOwned::Let, + Token::Less => TokenOwned::Less, + Token::LessEqual => TokenOwned::LessOrEqual, + Token::Loop => TokenOwned::Loop, + Token::Map => TokenOwned::Map, + Token::Minus => TokenOwned::Minus, + Token::MinusEqual => TokenOwned::MinusEqual, + Token::Mut => TokenOwned::Mut, + Token::Percent => TokenOwned::Percent, + Token::Plus => TokenOwned::Plus, + Token::PlusEqual => TokenOwned::PlusEqual, + Token::RightCurlyBrace => TokenOwned::RightCurlyBrace, + Token::RightParenthesis => TokenOwned::RightParenthesis, + Token::RightSquareBrace => TokenOwned::RightSquareBrace, + Token::Semicolon => TokenOwned::Semicolon, + Token::Star => TokenOwned::Star, + Token::Slash => TokenOwned::Slash, + Token::String(text) => TokenOwned::String(text.to_string()), + Token::Str => TokenOwned::Str, + Token::Struct => TokenOwned::Struct, + Token::While => TokenOwned::While, + } + } + + pub fn kind(&self) -> TokenKind { + match self { + Token::Async => TokenKind::Async, + Token::BangEqual => TokenKind::BangEqual, + Token::Bang => TokenKind::Bang, + Token::Bool => TokenKind::Bool, + Token::Boolean(_) => TokenKind::Boolean, + Token::Break => TokenKind::Break, + Token::Character(_) => TokenKind::Character, + Token::Colon => TokenKind::Colon, + Token::Comma => TokenKind::Comma, + Token::Dot => TokenKind::Dot, + Token::DoubleAmpersand => TokenKind::DoubleAmpersand, + Token::DoubleDot => TokenKind::DoubleDot, + Token::DoubleEqual => TokenKind::DoubleEqual, + Token::DoublePipe => TokenKind::DoublePipe, + Token::Else => TokenKind::Else, + Token::Eof => TokenKind::Eof, + Token::Equal => TokenKind::Equal, + Token::Float(_) => TokenKind::Float, + Token::FloatKeyword => TokenKind::FloatKeyword, + Token::Greater => TokenKind::Greater, + Token::GreaterEqual => TokenKind::GreaterOrEqual, + Token::Identifier(_) => TokenKind::Identifier, + Token::If => TokenKind::If, + Token::Int => TokenKind::Int, + Token::Integer(_) => TokenKind::Integer, + Token::LeftCurlyBrace => TokenKind::LeftCurlyBrace, + Token::LeftParenthesis => TokenKind::LeftParenthesis, + Token::LeftSquareBrace => TokenKind::LeftSquareBrace, + Token::Let => TokenKind::Let, + Token::Less => TokenKind::Less, + Token::LessEqual => TokenKind::LessOrEqual, + Token::Loop => TokenKind::Loop, + Token::Map => TokenKind::Map, + Token::Minus => TokenKind::Minus, + Token::MinusEqual => TokenKind::MinusEqual, + Token::Mut => TokenKind::Mut, + Token::Percent => TokenKind::Percent, + Token::Plus => TokenKind::Plus, + Token::PlusEqual => TokenKind::PlusEqual, + Token::RightCurlyBrace => TokenKind::RightCurlyBrace, + Token::RightParenthesis => TokenKind::RightParenthesis, + Token::RightSquareBrace => TokenKind::RightSquareBrace, + Token::Semicolon => TokenKind::Semicolon, + Token::Star => TokenKind::Star, + Token::Slash => TokenKind::Slash, + Token::Str => TokenKind::Str, + Token::String(_) => TokenKind::String, + Token::Struct => TokenKind::Struct, + Token::While => TokenKind::While, + } + } + + pub fn is_eof(&self) -> bool { + matches!(self, Token::Eof) + } + + pub fn precedence(&self) -> u8 { + match self { + Token::Dot => 9, + Token::LeftParenthesis | Token::LeftSquareBrace => 8, + Token::Star | Token::Slash | Token::Percent => 7, + Token::Minus | Token::Plus => 6, + Token::DoubleEqual + | Token::Less + | Token::LessEqual + | Token::Greater + | Token::GreaterEqual => 5, + Token::DoubleAmpersand => 4, + Token::DoublePipe => 3, + Token::DoubleDot => 2, + Token::Equal | Token::MinusEqual | Token::PlusEqual => 1, + _ => 0, + } + } + + pub fn is_left_associative(&self) -> bool { + matches!( + self, + Token::Dot + | Token::DoubleAmpersand + | Token::DoublePipe + | Token::Plus + | Token::Minus + | Token::Star + | Token::Slash + | Token::Percent + ) + } + + pub fn is_right_associative(&self) -> bool { + matches!(self, Token::Equal | Token::MinusEqual | Token::PlusEqual) + } + + pub fn is_prefix(&self) -> bool { + matches!(self, Token::Bang | Token::Minus | Token::Star) + } + + pub fn is_postfix(&self) -> bool { + matches!( + self, + Token::Dot | Token::LeftCurlyBrace | Token::LeftParenthesis | Token::LeftSquareBrace + ) + } +} + +impl<'src> Display for Token<'src> { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Token::Async => write!(f, "async"), + Token::BangEqual => write!(f, "!="), + Token::Bang => write!(f, "!"), + Token::Bool => write!(f, "bool"), + Token::Boolean(value) => write!(f, "{}", value), + Token::Break => write!(f, "break"), + Token::Character(value) => write!(f, "'{}'", value), + Token::Colon => write!(f, ":"), + Token::Comma => write!(f, ","), + Token::Dot => write!(f, "."), + Token::DoubleAmpersand => write!(f, "&&"), + Token::DoubleDot => write!(f, ".."), + Token::DoubleEqual => write!(f, "=="), + Token::DoublePipe => write!(f, "||"), + Token::Else => write!(f, "else"), + Token::Eof => write!(f, "EOF"), + Token::Equal => write!(f, "="), + Token::Float(value) => write!(f, "{}", value), + Token::FloatKeyword => write!(f, "float"), + Token::Greater => write!(f, ">"), + Token::GreaterEqual => write!(f, ">="), + Token::Identifier(value) => write!(f, "{}", value), + Token::If => write!(f, "if"), + Token::Int => write!(f, "int"), + Token::Integer(value) => write!(f, "{}", value), + Token::LeftCurlyBrace => write!(f, "{{"), + Token::LeftParenthesis => write!(f, "("), + Token::LeftSquareBrace => write!(f, "["), + Token::Let => write!(f, "let"), + Token::Less => write!(f, "<"), + Token::LessEqual => write!(f, "<="), + Token::Loop => write!(f, "loop"), + Token::Map => write!(f, "map"), + Token::Minus => write!(f, "-"), + Token::MinusEqual => write!(f, "-="), + Token::Mut => write!(f, "mut"), + Token::Percent => write!(f, "%"), + Token::Plus => write!(f, "+"), + Token::PlusEqual => write!(f, "+="), + Token::RightCurlyBrace => write!(f, "}}"), + Token::RightParenthesis => write!(f, ")"), + Token::RightSquareBrace => write!(f, "]"), + Token::Semicolon => write!(f, ";"), + Token::Slash => write!(f, "/"), + Token::Star => write!(f, "*"), + Token::Str => write!(f, "str"), + Token::String(value) => write!(f, "\"{}\"", value), + Token::Struct => write!(f, "struct"), + Token::While => write!(f, "while"), + } + } +} + +/// Owned version of `Token`, which owns all the strings. +/// +/// This is used for errors. +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] +pub enum TokenOwned { + Eof, + + Identifier(String), + + // Hard-coded values + Boolean(String), + Character(char), + Float(String), + Integer(String), + String(String), + + // Keywords + Bool, + Break, + Else, + FloatKeyword, + If, + Int, + Let, + Loop, + Map, + Mut, + Str, + While, + + // Symbols + Async, + Bang, + BangEqual, + Colon, + Comma, + Dot, + DoubleAmpersand, + DoubleDot, + DoubleEqual, + DoublePipe, + Equal, + Greater, + GreaterOrEqual, + LeftCurlyBrace, + LeftParenthesis, + LeftSquareBrace, + Less, + LessOrEqual, + Minus, + MinusEqual, + Percent, + Plus, + PlusEqual, + RightCurlyBrace, + RightParenthesis, + RightSquareBrace, + Semicolon, + Star, + Struct, + Slash, +} + +impl Display for TokenOwned { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + TokenOwned::Async => Token::Async.fmt(f), + TokenOwned::Bang => Token::Bang.fmt(f), + TokenOwned::BangEqual => Token::BangEqual.fmt(f), + TokenOwned::Bool => Token::Bool.fmt(f), + TokenOwned::Boolean(boolean) => Token::Boolean(boolean).fmt(f), + TokenOwned::Break => Token::Break.fmt(f), + TokenOwned::Character(character) => Token::Character(*character).fmt(f), + TokenOwned::Colon => Token::Colon.fmt(f), + TokenOwned::Comma => Token::Comma.fmt(f), + TokenOwned::Dot => Token::Dot.fmt(f), + TokenOwned::DoubleAmpersand => Token::DoubleAmpersand.fmt(f), + TokenOwned::DoubleDot => Token::DoubleDot.fmt(f), + TokenOwned::DoubleEqual => Token::DoubleEqual.fmt(f), + TokenOwned::DoublePipe => Token::DoublePipe.fmt(f), + TokenOwned::Else => Token::Else.fmt(f), + TokenOwned::Eof => Token::Eof.fmt(f), + TokenOwned::Equal => Token::Equal.fmt(f), + TokenOwned::Float(float) => Token::Float(float).fmt(f), + TokenOwned::FloatKeyword => Token::FloatKeyword.fmt(f), + TokenOwned::Greater => Token::Greater.fmt(f), + TokenOwned::GreaterOrEqual => Token::GreaterEqual.fmt(f), + TokenOwned::Identifier(text) => Token::Identifier(text).fmt(f), + TokenOwned::If => Token::If.fmt(f), + TokenOwned::Int => Token::Int.fmt(f), + TokenOwned::Integer(integer) => Token::Integer(integer).fmt(f), + TokenOwned::LeftCurlyBrace => Token::LeftCurlyBrace.fmt(f), + TokenOwned::LeftParenthesis => Token::LeftParenthesis.fmt(f), + TokenOwned::LeftSquareBrace => Token::LeftSquareBrace.fmt(f), + TokenOwned::Let => Token::Let.fmt(f), + TokenOwned::Less => Token::Less.fmt(f), + TokenOwned::LessOrEqual => Token::LessEqual.fmt(f), + TokenOwned::Loop => Token::Loop.fmt(f), + TokenOwned::Map => Token::Map.fmt(f), + TokenOwned::Minus => Token::Minus.fmt(f), + TokenOwned::MinusEqual => Token::MinusEqual.fmt(f), + TokenOwned::Mut => Token::Mut.fmt(f), + TokenOwned::Percent => Token::Percent.fmt(f), + TokenOwned::Plus => Token::Plus.fmt(f), + TokenOwned::PlusEqual => Token::PlusEqual.fmt(f), + TokenOwned::RightCurlyBrace => Token::RightCurlyBrace.fmt(f), + TokenOwned::RightParenthesis => Token::RightParenthesis.fmt(f), + TokenOwned::RightSquareBrace => Token::RightSquareBrace.fmt(f), + TokenOwned::Semicolon => Token::Semicolon.fmt(f), + TokenOwned::Star => Token::Star.fmt(f), + TokenOwned::Slash => Token::Slash.fmt(f), + TokenOwned::Str => Token::Str.fmt(f), + TokenOwned::String(string) => Token::String(string).fmt(f), + TokenOwned::Struct => Token::Struct.fmt(f), + TokenOwned::While => Token::While.fmt(f), + } + } +} + +/// Token representation that holds no data. +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] +pub enum TokenKind { + Eof, + + Identifier, + + // Hard-coded values + Boolean, + Character, + Float, + Integer, + String, + + // Keywords + Async, + Bool, + Break, + Else, + FloatKeyword, + If, + Int, + Let, + Loop, + Map, + Str, + While, + + // Symbols + BangEqual, + Bang, + Colon, + Comma, + Dot, + DoubleAmpersand, + DoubleDot, + DoubleEqual, + DoublePipe, + Equal, + Greater, + GreaterOrEqual, + LeftCurlyBrace, + LeftParenthesis, + LeftSquareBrace, + Less, + LessOrEqual, + Minus, + MinusEqual, + Mut, + Percent, + Plus, + PlusEqual, + RightCurlyBrace, + RightParenthesis, + RightSquareBrace, + Semicolon, + Star, + Struct, + Slash, +} + +impl Display for TokenKind { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + TokenKind::Async => Token::Async.fmt(f), + TokenKind::Bang => Token::Bang.fmt(f), + TokenKind::BangEqual => Token::BangEqual.fmt(f), + TokenKind::Bool => Token::Bool.fmt(f), + TokenKind::Boolean => write!(f, "boolean value"), + TokenKind::Break => Token::Break.fmt(f), + TokenKind::Character => write!(f, "character value"), + TokenKind::Colon => Token::Colon.fmt(f), + TokenKind::Comma => Token::Comma.fmt(f), + TokenKind::Dot => Token::Dot.fmt(f), + TokenKind::DoubleAmpersand => Token::DoubleAmpersand.fmt(f), + TokenKind::DoubleDot => Token::DoubleDot.fmt(f), + TokenKind::DoubleEqual => Token::DoubleEqual.fmt(f), + TokenKind::DoublePipe => Token::DoublePipe.fmt(f), + TokenKind::Else => Token::Else.fmt(f), + TokenKind::Eof => Token::Eof.fmt(f), + TokenKind::Equal => Token::Equal.fmt(f), + TokenKind::Float => write!(f, "float value"), + TokenKind::FloatKeyword => Token::FloatKeyword.fmt(f), + TokenKind::Greater => Token::Greater.fmt(f), + TokenKind::GreaterOrEqual => Token::GreaterEqual.fmt(f), + TokenKind::Identifier => write!(f, "identifier"), + TokenKind::If => Token::If.fmt(f), + TokenKind::Int => Token::Int.fmt(f), + TokenKind::Integer => write!(f, "integer value"), + TokenKind::LeftCurlyBrace => Token::LeftCurlyBrace.fmt(f), + TokenKind::LeftParenthesis => Token::LeftParenthesis.fmt(f), + TokenKind::LeftSquareBrace => Token::LeftSquareBrace.fmt(f), + TokenKind::Let => Token::Let.fmt(f), + TokenKind::Less => Token::Less.fmt(f), + TokenKind::LessOrEqual => Token::LessEqual.fmt(f), + TokenKind::Loop => Token::Loop.fmt(f), + TokenKind::Map => Token::Map.fmt(f), + TokenKind::Minus => Token::Minus.fmt(f), + TokenKind::MinusEqual => Token::MinusEqual.fmt(f), + TokenKind::Mut => Token::Mut.fmt(f), + TokenKind::Percent => Token::Percent.fmt(f), + TokenKind::Plus => Token::Plus.fmt(f), + TokenKind::PlusEqual => Token::PlusEqual.fmt(f), + TokenKind::RightCurlyBrace => Token::RightCurlyBrace.fmt(f), + TokenKind::RightParenthesis => Token::RightParenthesis.fmt(f), + TokenKind::RightSquareBrace => Token::RightSquareBrace.fmt(f), + TokenKind::Semicolon => Token::Semicolon.fmt(f), + TokenKind::Star => Token::Star.fmt(f), + TokenKind::Str => Token::Str.fmt(f), + TokenKind::Slash => Token::Slash.fmt(f), + TokenKind::String => write!(f, "string value"), + TokenKind::Struct => Token::Struct.fmt(f), + TokenKind::While => Token::While.fmt(f), + } + } +} + +#[cfg(test)] +pub(crate) mod tests { + use super::*; + + pub fn all_tokens<'src>() -> [Token<'src>; 47] { + [ + Token::Async, + Token::Bang, + Token::BangEqual, + Token::Bool, + Token::Break, + Token::Colon, + Token::Comma, + Token::Dot, + Token::DoubleAmpersand, + Token::DoubleDot, + Token::DoubleEqual, + Token::DoublePipe, + Token::Else, + Token::Eof, + Token::Equal, + Token::FloatKeyword, + Token::Greater, + Token::GreaterEqual, + Token::If, + Token::Int, + Token::LeftCurlyBrace, + Token::LeftParenthesis, + Token::LeftSquareBrace, + Token::Let, + Token::Less, + Token::LessEqual, + Token::Map, + Token::Minus, + Token::MinusEqual, + Token::Mut, + Token::Percent, + Token::Plus, + Token::PlusEqual, + Token::RightCurlyBrace, + Token::RightParenthesis, + Token::RightSquareBrace, + Token::Semicolon, + Token::Star, + Token::Str, + Token::Slash, + Token::Boolean("true"), + Token::Float("0.0"), + Token::Integer("0"), + Token::String("string"), + Token::Identifier("foobar"), + Token::Struct, + Token::While, + ] + } + + #[test] + fn token_displays() { + for token in all_tokens().iter() { + let display = token.to_string(); + + assert_eq!(display, token.to_owned().to_string()); + + if let Token::Boolean(_) + | Token::Float(_) + | Token::Identifier(_) + | Token::Integer(_) + | Token::String(_) = token + { + continue; + } else { + assert_eq!(display, token.kind().to_string()); + } + } + } +}