From 0d0a2d2237e593f2a9ac2bd522770ea625529e29 Mon Sep 17 00:00:00 2001 From: Jeff Date: Fri, 23 Aug 2024 05:24:48 -0400 Subject: [PATCH] Add character literal tokens; Change strings to double quotes only --- dust-lang/src/lexer.rs | 114 ++++++++++++++++---------- dust-lang/src/parser.rs | 17 +--- dust-lang/src/token.rs | 173 +++++++++++++++++++++++++++------------- dust-lang/src/vm.rs | 20 ++--- 4 files changed, 203 insertions(+), 121 deletions(-) diff --git a/dust-lang/src/lexer.rs b/dust-lang/src/lexer.rs index 9bc87c7..241c4f0 100644 --- a/dust-lang/src/lexer.rs +++ b/dust-lang/src/lexer.rs @@ -132,7 +132,29 @@ impl<'src> Lexer<'src> { } 'a'..='z' | 'A'..='Z' => self.lex_alphanumeric()?, '"' => self.lex_string('"')?, - '\'' => self.lex_string('\'')?, + '\'' => { + self.position += 1; + + if let Some(c) = self.peek_char() { + self.position += 1; + + if let Some('\'') = self.peek_char() { + self.position += 1; + + (Token::Character(c), (self.position - 3, self.position)) + } else { + return Err(LexError::ExpectedCharacter { + expected: '\'', + actual: c, + position: self.position, + }); + } + } else { + return Err(LexError::UnexpectedEndOfFile { + position: self.position, + }); + } + } '+' => { if let Some('=') = self.peek_second_char() { self.position += 2; @@ -247,7 +269,7 @@ impl<'src> Lexer<'src> { self.position += 1; return Err(LexError::UnexpectedCharacter { - character: c, + actual: c, position: self.position, }); } @@ -266,7 +288,7 @@ impl<'src> Lexer<'src> { self.position += 1; return Err(LexError::UnexpectedCharacter { - character: c, + actual: c, position: self.position, }); } @@ -285,7 +307,7 @@ impl<'src> Lexer<'src> { self.position += 1; return Err(LexError::UnexpectedCharacter { - character: c, + actual: c, position: self.position, }); } @@ -301,7 +323,7 @@ impl<'src> Lexer<'src> { pub fn peek_token(&mut self) -> Result<(Token<'src>, Span), LexError> { let token = self.next_token()?; - self.position -= token.0.as_str().len(); + self.position -= token.0.len(); Ok(token) } @@ -465,21 +487,26 @@ impl<'src> Lexer<'src> { #[derive(Debug, PartialEq, Clone)] pub enum LexError { - UnexpectedCharacter { character: char, position: usize }, + ExpectedCharacter { + expected: char, + actual: char, + position: usize, + }, + UnexpectedCharacter { + actual: char, + position: usize, + }, + UnexpectedEndOfFile { + position: usize, + }, } impl LexError { pub fn position(&self) -> Span { match self { - Self::UnexpectedCharacter { position, .. } => (*position, *position), - } - } -} - -impl Error for LexError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - match self { - Self::UnexpectedCharacter { .. } => None, + Self::ExpectedCharacter { position, .. } => (*position, *position + 1), + Self::UnexpectedCharacter { position, .. } => (*position, *position + 1), + Self::UnexpectedEndOfFile { position } => (*position, *position), } } } @@ -487,8 +514,20 @@ impl Error for LexError { impl Display for LexError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { - Self::UnexpectedCharacter { character, .. } => { - write!(f, "Unexpected character: '{}'", character) + Self::ExpectedCharacter { + expected, + actual, + position, + } => write!( + f, + "Expected character '{}' at {:?}, found '{}'", + expected, position, actual + ), + Self::UnexpectedCharacter { actual, position } => { + write!(f, "Unexpected character at {:?}: '{}'", position, actual) + } + Self::UnexpectedEndOfFile { position } => { + write!(f, "Unexpected end of file at {:?}", position) } } } @@ -498,9 +537,19 @@ impl Display for LexError { mod tests { use super::*; + #[test] + fn character() { + let input = "'a'"; + + assert_eq!( + lex(input), + Ok(vec![(Token::Character('a'), (0, 3)), (Token::Eof, (3, 3)),]) + ); + } + #[test] fn map_expression() { - let input = "map { x = '1', y = 2, z = 3.0 }"; + let input = "map { x = \"1\", y = 2, z = 3.0 }"; assert_eq!( lex(input), @@ -791,7 +840,7 @@ mod tests { #[test] fn block() { - let input = "{ x = 42; y = 'foobar' }"; + let input = "{ x = 42; y = \"foobar\" }"; assert_eq!( lex(input), @@ -855,27 +904,6 @@ mod tests { ) } - #[test] - fn map() { - let input = "{ x = 42, y = 'foobar' }"; - - assert_eq!( - lex(input), - Ok(vec![ - (Token::LeftCurlyBrace, (0, 1)), - (Token::Identifier("x"), (2, 3)), - (Token::Equal, (4, 5)), - (Token::Integer("42"), (6, 8)), - (Token::Comma, (8, 9)), - (Token::Identifier("y"), (10, 11)), - (Token::Equal, (12, 13)), - (Token::String("foobar"), (14, 22)), - (Token::RightCurlyBrace, (23, 24)), - (Token::Eof, (24, 24)), - ]) - ) - } - #[test] fn greater_than() { let input = ">"; @@ -1030,7 +1058,7 @@ mod tests { #[test] fn write_line() { - let input = "write_line('Hello, world!')"; + let input = "write_line(\"Hello, world!\")"; assert_eq!( lex(input), @@ -1046,7 +1074,7 @@ mod tests { #[test] fn string_concatenation() { - let input = "'Hello, ' + 'world!'"; + let input = "\"Hello, \" + \"world!\""; assert_eq!( lex(input), @@ -1061,7 +1089,7 @@ mod tests { #[test] fn string() { - let input = "'Hello, world!'"; + let input = "\"Hello, world!\""; assert_eq!( lex(input), diff --git a/dust-lang/src/parser.rs b/dust-lang/src/parser.rs index 22eb7ea..e233c57 100644 --- a/dust-lang/src/parser.rs +++ b/dust-lang/src/parser.rs @@ -1116,15 +1116,6 @@ impl ParseError { } } -impl Error for ParseError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - match self { - Self::Lex(error) => Some(error), - _ => None, - } - } -} - impl Display for ParseError { fn fmt(&self, f: &mut Formatter) -> fmt::Result { match self { @@ -1215,7 +1206,7 @@ mod tests { #[test] fn map_expression() { - let source = "map { x = '1', y = 2, z = 3.0 }"; + let source = "map { x = \"1\", y = 2, z = 3.0 }"; assert_eq!( parse(source), @@ -1361,7 +1352,7 @@ mod tests { #[test] fn tuple_struct_access() { - let source = "Foo(42, 'bar').0"; + let source = "Foo(42, \"bar\").0"; assert_eq!( parse(source), @@ -1827,7 +1818,7 @@ mod tests { #[test] fn block_with_assignment() { - let source = "{ foo = 42; bar = 42; baz = '42' }"; + let source = "{ foo = 42; bar = 42; baz = \"42\" }"; assert_eq!( parse(source), @@ -2000,7 +1991,7 @@ mod tests { #[test] fn string_concatenation() { - let source = "'Hello, ' + 'World!'"; + let source = "\"Hello, \" + \"World!\""; assert_eq!( parse(source), diff --git a/dust-lang/src/token.rs b/dust-lang/src/token.rs index 9e613fe..02285e3 100644 --- a/dust-lang/src/token.rs +++ b/dust-lang/src/token.rs @@ -1,5 +1,8 @@ //! Token and TokenOwned types. -use std::fmt::{self, Display, Formatter}; +use std::{ + borrow::Borrow, + fmt::{self, Display, Formatter}, +}; use serde::{Deserialize, Serialize}; @@ -11,6 +14,7 @@ pub enum Token<'src> { // Hard-coded values Boolean(&'src str), + Character(char), Float(&'src str), Identifier(&'src str), Integer(&'src str), @@ -64,6 +68,61 @@ pub enum Token<'src> { } impl<'src> Token<'src> { + #[allow(clippy::len_without_is_empty)] + pub fn len(&self) -> usize { + match self { + Token::Eof => 0, + Token::Boolean(text) => text.len(), + Token::Character(_) => 3, + Token::Float(text) => text.len(), + Token::Identifier(text) => text.len(), + Token::Integer(text) => text.len(), + Token::String(text) => text.len() + 2, + Token::Async => 5, + Token::Bool => 4, + Token::Break => 5, + Token::Else => 4, + Token::FloatKeyword => 5, + Token::If => 2, + Token::Int => 3, + Token::Let => 3, + Token::Loop => 4, + Token::Map => 3, + Token::Mut => 3, + Token::Str => 3, + Token::Struct => 6, + Token::While => 5, + Token::BangEqual => 2, + Token::Bang => 1, + Token::Colon => 1, + Token::Comma => 1, + Token::Dot => 1, + Token::DoubleAmpersand => 2, + Token::DoubleDot => 2, + Token::DoubleEqual => 2, + Token::DoublePipe => 2, + Token::Equal => 1, + Token::Greater => 1, + Token::GreaterEqual => 2, + Token::LeftCurlyBrace => 1, + Token::LeftParenthesis => 1, + Token::LeftSquareBrace => 1, + Token::Less => 1, + Token::LessEqual => 2, + Token::Minus => 1, + Token::MinusEqual => 2, + Token::Percent => 1, + Token::Plus => 1, + Token::PlusEqual => 2, + Token::RightCurlyBrace => 1, + Token::RightParenthesis => 1, + Token::RightSquareBrace => 1, + Token::Semicolon => 1, + Token::Slash => 1, + Token::Star => 1, + } + } + pub fn to_owned(&self) -> TokenOwned { match self { Token::Async => TokenOwned::Async, @@ -72,6 +131,7 @@ impl<'src> Token<'src> { Token::Bool => TokenOwned::Bool, Token::Boolean(boolean) => TokenOwned::Boolean(boolean.to_string()), Token::Break => TokenOwned::Break, + Token::Character(character) => TokenOwned::Character(*character), Token::Colon => TokenOwned::Colon, Token::Comma => TokenOwned::Comma, Token::Dot => TokenOwned::Dot, @@ -117,60 +177,6 @@ impl<'src> Token<'src> { } } - pub fn as_str(&self) -> &str { - match self { - Token::Boolean(boolean_text) => boolean_text, - Token::Float(float_text) => float_text, - Token::Identifier(text) => text, - Token::Integer(integer_text) => integer_text, - Token::String(text) => text, - - Token::Async => "async", - Token::BangEqual => "!=", - Token::Bang => "!", - Token::Bool => "bool", - Token::Break => "break", - Token::Colon => ":", - Token::Comma => ",", - Token::Dot => ".", - Token::DoubleAmpersand => "&&", - Token::DoubleDot => "..", - Token::DoubleEqual => "==", - Token::DoublePipe => "||", - Token::Else => "else", - Token::Eof => "EOF", - Token::Equal => "=", - Token::FloatKeyword => "float", - Token::Greater => ">", - Token::GreaterEqual => ">=", - Token::If => "if", - Token::Int => "int", - Token::LeftCurlyBrace => "{", - Token::LeftParenthesis => "(", - Token::LeftSquareBrace => "[", - Token::Let => "let", - Token::Less => "<", - Token::LessEqual => "<=", - Token::Loop => "loop", - Token::Map => "map", - Token::Minus => "-", - Token::MinusEqual => "-=", - Token::Mut => "mut", - Token::Percent => "%", - Token::Plus => "+", - Token::PlusEqual => "+=", - Token::RightCurlyBrace => "}", - Token::RightParenthesis => ")", - Token::RightSquareBrace => "]", - Token::Semicolon => ";", - Token::Star => "*", - Token::Slash => "/", - Token::Str => "str", - Token::Struct => "struct", - Token::While => "while", - } - } - pub fn kind(&self) -> TokenKind { match self { Token::Async => TokenKind::Async, @@ -179,6 +185,7 @@ impl<'src> Token<'src> { Token::Bool => TokenKind::Bool, Token::Boolean(_) => TokenKind::Boolean, Token::Break => TokenKind::Break, + Token::Character(_) => TokenKind::Character, Token::Colon => TokenKind::Colon, Token::Comma => TokenKind::Comma, Token::Dot => TokenKind::Dot, @@ -279,7 +286,57 @@ impl<'src> Token<'src> { impl<'src> Display for Token<'src> { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!(f, "{}", self.as_str()) + match self { + Token::Async => write!(f, "async"), + Token::BangEqual => write!(f, "!="), + Token::Bang => write!(f, "!"), + Token::Bool => write!(f, "bool"), + Token::Boolean(value) => write!(f, "{}", value), + Token::Break => write!(f, "break"), + Token::Character(value) => write!(f, "'{}'", value), + Token::Colon => write!(f, ":"), + Token::Comma => write!(f, ","), + Token::Dot => write!(f, "."), + Token::DoubleAmpersand => write!(f, "&&"), + Token::DoubleDot => write!(f, ".."), + Token::DoubleEqual => write!(f, "=="), + Token::DoublePipe => write!(f, "||"), + Token::Else => write!(f, "else"), + Token::Eof => write!(f, "EOF"), + Token::Equal => write!(f, "="), + Token::Float(value) => write!(f, "{}", value), + Token::FloatKeyword => write!(f, "float"), + Token::Greater => write!(f, ">"), + Token::GreaterEqual => write!(f, ">="), + Token::Identifier(value) => write!(f, "{}", value), + Token::If => write!(f, "if"), + Token::Int => write!(f, "int"), + Token::Integer(value) => write!(f, "{}", value), + Token::LeftCurlyBrace => write!(f, "{{"), + Token::LeftParenthesis => write!(f, "("), + Token::LeftSquareBrace => write!(f, "["), + Token::Let => write!(f, "let"), + Token::Less => write!(f, "<"), + Token::LessEqual => write!(f, "<="), + Token::Loop => write!(f, "loop"), + Token::Map => write!(f, "map"), + Token::Minus => write!(f, "-"), + Token::MinusEqual => write!(f, "-="), + Token::Mut => write!(f, "mut"), + Token::Percent => write!(f, "%"), + Token::Plus => write!(f, "+"), + Token::PlusEqual => write!(f, "+="), + Token::RightCurlyBrace => write!(f, "}}"), + Token::RightParenthesis => write!(f, ")"), + Token::RightSquareBrace => write!(f, "]"), + Token::Semicolon => write!(f, ";"), + Token::Slash => write!(f, "/"), + Token::Star => write!(f, "*"), + Token::Str => write!(f, "str"), + Token::String(value) => write!(f, "\"{}\"", value), + Token::Struct => write!(f, "struct"), + Token::While => write!(f, "while"), + } } } @@ -294,6 +351,7 @@ pub enum TokenOwned { // Hard-coded values Boolean(String), + Character(char), Float(String), Integer(String), String(String), @@ -354,6 +412,7 @@ impl Display for TokenOwned { TokenOwned::Bool => Token::Bool.fmt(f), TokenOwned::Boolean(boolean) => Token::Boolean(boolean).fmt(f), TokenOwned::Break => Token::Break.fmt(f), + TokenOwned::Character(character) => Token::Character(*character).fmt(f), TokenOwned::Colon => Token::Colon.fmt(f), TokenOwned::Comma => Token::Comma.fmt(f), TokenOwned::Dot => Token::Dot.fmt(f), @@ -409,6 +468,7 @@ pub enum TokenKind { // Hard-coded values Boolean, + Character, Float, Integer, String, @@ -469,6 +529,7 @@ impl Display for TokenKind { TokenKind::Bool => Token::Bool.fmt(f), TokenKind::Boolean => write!(f, "boolean value"), TokenKind::Break => Token::Break.fmt(f), + TokenKind::Character => write!(f, "character value"), TokenKind::Colon => Token::Colon.fmt(f), TokenKind::Comma => Token::Comma.fmt(f), TokenKind::Dot => Token::Dot.fmt(f), diff --git a/dust-lang/src/vm.rs b/dust-lang/src/vm.rs index 5edeb50..b0f670c 100644 --- a/dust-lang/src/vm.rs +++ b/dust-lang/src/vm.rs @@ -113,7 +113,11 @@ impl Vm { statement: Statement, collect_garbage: bool, ) -> Result, RuntimeError> { - log::debug!("Running statement: {}", statement); + log::trace!( + "Running statement at {:?}: {}", + statement.position(), + statement + ); let position = statement.position(); let result = match statement { @@ -232,7 +236,11 @@ impl Vm { expression: Expression, collect_garbage: bool, ) -> Result { - log::debug!("Running expression: {}", expression); + log::trace!( + "Running expression at {:?}: {}", + expression.position(), + expression + ); let position = expression.position(); let evaluation_result = match expression { @@ -291,8 +299,6 @@ impl Vm { } fn run_identifier(&self, identifier: Node) -> Result { - log::debug!("Running identifier: {}", identifier); - let get_data = self.context.get_data(&identifier.inner).map_err(|error| { RuntimeError::ContextError { error, @@ -323,8 +329,6 @@ impl Vm { struct_expression: StructExpression, collect_garbage: bool, ) -> Result { - log::debug!("Running struct expression: {struct_expression}"); - let StructExpression::Fields { name, fields } = struct_expression; let position = name.position; @@ -723,8 +727,6 @@ impl Vm { call_expression: CallExpression, collect_garbage: bool, ) -> Result { - log::debug!("Running call expression: {call_expression}"); - let CallExpression { invoker, arguments } = call_expression; let invoker_position = invoker.position(); @@ -1409,7 +1411,7 @@ mod tests { #[test] fn string_index() { - let input = "'foo'[0]"; + let input = "\"foo\"[0]"; assert_eq!(run(input), Ok(Some(Value::character('f')))); }