From 0ba54e9717b2e1ac66e81243911b8e4ab9b9c41a Mon Sep 17 00:00:00 2001 From: Jeff Date: Mon, 12 Aug 2024 10:08:34 -0400 Subject: [PATCH] Add another token type; Add ranges --- dust-lang/src/lexer.rs | 25 ++++++- dust-lang/src/lib.rs | 2 +- dust-lang/src/parser.rs | 61 ++++++++++++---- dust-lang/src/token.rs | 152 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 223 insertions(+), 17 deletions(-) diff --git a/dust-lang/src/lexer.rs b/dust-lang/src/lexer.rs index 421b0ec..f7c2cfb 100644 --- a/dust-lang/src/lexer.rs +++ b/dust-lang/src/lexer.rs @@ -174,9 +174,15 @@ impl Lexer { (Token::Comma, (self.position - 1, self.position)) } '.' => { - self.position += 1; + if let Some('.') = self.peek_second_char(source) { + self.position += 2; - (Token::Dot, (self.position - 1, self.position)) + (Token::DoubleDot, (self.position - 2, self.position)) + } else { + self.position += 1; + + (Token::Dot, (self.position - 1, self.position)) + } } '>' => { if let Some('=') = self.peek_second_char(source) { @@ -484,6 +490,21 @@ impl Display for LexError { mod tests { use super::*; + #[test] + fn range() { + let input = "0..42"; + + assert_eq!( + lex(input), + Ok(vec![ + (Token::Integer("0"), (0, 1)), + (Token::DoubleDot, (1, 3)), + (Token::Integer("42"), (3, 5)), + (Token::Eof, (5, 5)) + ]) + ); + } + #[test] fn negate_expression() { let input = "x = -42; -x"; diff --git a/dust-lang/src/lib.rs b/dust-lang/src/lib.rs index 415f3dd..4cfeffa 100644 --- a/dust-lang/src/lib.rs +++ b/dust-lang/src/lib.rs @@ -23,7 +23,7 @@ pub use identifier::Identifier; pub use lexer::{lex, LexError, Lexer}; pub use parser::{parse, ParseError, Parser}; pub use r#type::Type; -pub use token::{Token, TokenOwned}; +pub use token::{Token, TokenKind, TokenOwned}; pub use value::{Value, ValueError}; pub use vm::{run, Vm, VmError}; diff --git a/dust-lang/src/parser.rs b/dust-lang/src/parser.rs index 6d98a48..d92880b 100644 --- a/dust-lang/src/parser.rs +++ b/dust-lang/src/parser.rs @@ -13,7 +13,7 @@ use std::{ use crate::{ AbstractSyntaxTree, BinaryOperator, BuiltInFunction, DustError, Identifier, LexError, Lexer, - Node, Span, Statement, Token, TokenOwned, UnaryOperator, Value, + Node, Span, Statement, Token, TokenKind, TokenOwned, UnaryOperator, Value, }; /// Parses the input into an abstract syntax tree. @@ -225,13 +225,36 @@ impl<'src> Parser<'src> { self.next_token()?; let integer = text - .parse() + .parse::() .map_err(|error| ParseError::IntegerError { error, position })?; - Ok(Node::new( - Statement::Constant(Value::integer(integer)), - position, - )) + if let Token::DoubleDot = self.current.0 { + self.next_token()?; + + if let Token::Integer(range_end) = self.current.0 { + self.next_token()?; + + let range_end = range_end + .parse::() + .map_err(|error| ParseError::IntegerError { error, position })?; + + Ok(Node::new( + Statement::Constant(Value::range(integer..range_end)), + (position.0, self.current.1 .1), + )) + } else { + Err(ParseError::ExpectedToken { + expected: TokenKind::Integer, + actual: self.current.0.to_owned(), + position: (position.0, self.current.1 .1), + }) + } + } else { + Ok(Node::new( + Statement::Constant(Value::integer(integer)), + position, + )) + } } (Token::Identifier(text), position) => { self.next_token()?; @@ -433,7 +456,7 @@ impl<'src> Parser<'src> { Ok(Node::new(node.inner, (left_position.0, right_position.1))) } else { Err(ParseError::ExpectedToken { - expected: TokenOwned::RightParenthesis, + expected: TokenKind::RightParenthesis, actual: self.current.0.to_owned(), position: self.current.1, }) @@ -464,7 +487,7 @@ impl<'src> Parser<'src> { nodes.push(instruction); } else { return Err(ParseError::ExpectedToken { - expected: TokenOwned::RightSquareBrace, + expected: TokenKind::RightSquareBrace, actual: self.current.0.to_owned(), position: self.current.1, }); @@ -496,7 +519,7 @@ impl<'src> Parser<'src> { self.next_token()?; } else { return Err(ParseError::ExpectedToken { - expected: TokenOwned::LeftParenthesis, + expected: TokenKind::LeftParenthesis, actual: self.current.0.to_owned(), position: self.current.1, }); @@ -523,7 +546,7 @@ impl<'src> Parser<'src> { } } else { return Err(ParseError::ExpectedToken { - expected: TokenOwned::RightParenthesis, + expected: TokenKind::RightParenthesis, actual: self.current.0.to_owned(), position: self.current.1, }); @@ -547,7 +570,7 @@ impl<'src> Parser<'src> { if let Token::LeftCurlyBrace = self.current.0 { } else { return Err(ParseError::ExpectedToken { - expected: TokenOwned::LeftCurlyBrace, + expected: TokenKind::LeftCurlyBrace, actual: self.current.0.to_owned(), position: self.current.1, }); @@ -707,7 +730,7 @@ impl<'src> Parser<'src> { self.next_token()?; } else { return Err(ParseError::ExpectedToken { - expected: TokenOwned::LeftCurlyBrace, + expected: TokenKind::LeftCurlyBrace, actual: self.current.0.to_owned(), position: self.current.1, }); @@ -749,7 +772,7 @@ pub enum ParseError { position: Span, }, ExpectedToken { - expected: TokenOwned, + expected: TokenKind, actual: TokenOwned, position: Span, }, @@ -822,6 +845,18 @@ mod tests { use super::*; + #[test] + fn range() { + let input = "0..42"; + + assert_eq!( + parse(input), + Ok(AbstractSyntaxTree { + nodes: [Node::new(Statement::Constant(Value::range(0..42)), (0, 5))].into() + }) + ); + } + #[test] fn negate_variable() { let input = "a = 1; -a"; diff --git a/dust-lang/src/token.rs b/dust-lang/src/token.rs index 882d210..1cde745 100644 --- a/dust-lang/src/token.rs +++ b/dust-lang/src/token.rs @@ -32,6 +32,7 @@ pub enum Token<'src> { Comma, Dot, DoubleAmpersand, + DoubleDot, DoubleEqual, DoublePipe, Equal, @@ -62,6 +63,7 @@ impl<'src> Token<'src> { Token::Comma => TokenOwned::Comma, Token::Dot => TokenOwned::Dot, Token::DoubleAmpersand => TokenOwned::DoubleAmpersand, + Token::DoubleDot => TokenOwned::DoubleDot, Token::DoubleEqual => TokenOwned::DoubleEqual, Token::DoublePipe => TokenOwned::DoublePipe, Token::Else => TokenOwned::Else, @@ -110,6 +112,7 @@ impl<'src> Token<'src> { Token::Comma => ",", Token::Dot => ".", Token::DoubleAmpersand => "&&", + Token::DoubleDot => "..", Token::DoubleEqual => "==", Token::DoublePipe => "||", Token::Else => "else", @@ -144,6 +147,51 @@ impl<'src> Token<'src> { } } + pub fn kind(&self) -> TokenKind { + match self { + Token::Bang => TokenKind::Bang, + Token::Boolean(_) => TokenKind::Boolean, + Token::Comma => TokenKind::Comma, + Token::Dot => TokenKind::Dot, + Token::DoubleAmpersand => TokenKind::DoubleAmpersand, + Token::DoubleDot => TokenKind::DoubleDot, + Token::DoubleEqual => TokenKind::DoubleEqual, + Token::DoublePipe => TokenKind::DoublePipe, + Token::Else => TokenKind::Else, + Token::Eof => TokenKind::Eof, + Token::Equal => TokenKind::Equal, + Token::Float(_) => TokenKind::Float, + Token::Greater => TokenKind::Greater, + Token::GreaterEqual => TokenKind::GreaterOrEqual, + Token::Identifier(_) => TokenKind::Identifier, + Token::If => TokenKind::If, + Token::Integer(_) => TokenKind::Integer, + Token::IsEven => TokenKind::IsEven, + Token::IsOdd => TokenKind::IsOdd, + Token::LeftCurlyBrace => TokenKind::LeftCurlyBrace, + Token::LeftParenthesis => TokenKind::LeftParenthesis, + Token::LeftSquareBrace => TokenKind::LeftSquareBrace, + Token::Length => TokenKind::Length, + Token::Less => TokenKind::Less, + Token::LessEqual => TokenKind::LessOrEqual, + Token::Minus => TokenKind::Minus, + Token::Percent => TokenKind::Percent, + Token::Plus => TokenKind::Plus, + Token::PlusEqual => TokenKind::PlusEqual, + Token::ReadLine => TokenKind::ReadLine, + Token::RightCurlyBrace => TokenKind::RightCurlyBrace, + Token::RightParenthesis => TokenKind::RightParenthesis, + Token::RightSquareBrace => TokenKind::RightSquareBrace, + Token::Semicolon => TokenKind::Semicolon, + Token::Star => TokenKind::Star, + Token::Slash => TokenKind::Slash, + Token::String(_) => TokenKind::String, + Token::ToString => TokenKind::ToString, + Token::While => TokenKind::While, + Token::WriteLine => TokenKind::WriteLine, + } + } + pub fn is_eof(&self) -> bool { matches!(self, Token::Eof) } @@ -162,7 +210,7 @@ impl<'src> Token<'src> { Token::DoubleAmpersand => 4, Token::DoublePipe => 3, Token::Equal | Token::PlusEqual => 2, - Token::Semicolon => 1, + Token::DoubleDot | Token::Semicolon => 1, _ => 0, } } @@ -198,6 +246,7 @@ impl<'src> PartialEq for Token<'src> { (Token::Comma, Token::Comma) => true, (Token::Dot, Token::Dot) => true, (Token::DoubleAmpersand, Token::DoubleAmpersand) => true, + (Token::DoubleDot, Token::DoubleDot) => true, (Token::DoubleEqual, Token::DoubleEqual) => true, (Token::DoublePipe, Token::DoublePipe) => true, (Token::Else, Token::Else) => true, @@ -267,6 +316,7 @@ pub enum TokenOwned { Comma, Dot, DoubleAmpersand, + DoubleDot, DoubleEqual, DoublePipe, Equal, @@ -297,6 +347,7 @@ impl Display for TokenOwned { TokenOwned::Comma => Token::Comma.fmt(f), TokenOwned::Dot => Token::Dot.fmt(f), TokenOwned::DoubleAmpersand => Token::DoubleAmpersand.fmt(f), + TokenOwned::DoubleDot => Token::DoubleDot.fmt(f), TokenOwned::DoubleEqual => Token::DoubleEqual.fmt(f), TokenOwned::DoublePipe => Token::DoublePipe.fmt(f), TokenOwned::Else => Token::Else.fmt(f), @@ -334,3 +385,102 @@ impl Display for TokenOwned { } } } + +/// Token representation that holds no data. +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] +pub enum TokenKind { + Eof, + + Identifier, + + // Hard-coded values + Boolean, + Float, + Integer, + String, + + // Keywords + Else, + If, + IsEven, + IsOdd, + Length, + ReadLine, + ToString, + While, + WriteLine, + + // Symbols + Bang, + Comma, + Dot, + DoubleAmpersand, + DoubleDot, + DoubleEqual, + DoublePipe, + Equal, + Greater, + GreaterOrEqual, + LeftCurlyBrace, + LeftParenthesis, + LeftSquareBrace, + Less, + LessOrEqual, + Minus, + Percent, + Plus, + PlusEqual, + RightCurlyBrace, + RightParenthesis, + RightSquareBrace, + Semicolon, + Star, + Slash, +} + +impl Display for TokenKind { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + match self { + TokenKind::Bang => Token::Bang.fmt(f), + TokenKind::Boolean => write!(f, "boolean"), + TokenKind::Comma => Token::Comma.fmt(f), + TokenKind::Dot => Token::Dot.fmt(f), + TokenKind::DoubleAmpersand => Token::DoubleAmpersand.fmt(f), + TokenKind::DoubleDot => Token::DoubleDot.fmt(f), + TokenKind::DoubleEqual => Token::DoubleEqual.fmt(f), + TokenKind::DoublePipe => Token::DoublePipe.fmt(f), + TokenKind::Else => Token::Else.fmt(f), + TokenKind::Eof => Token::Eof.fmt(f), + TokenKind::Equal => Token::Equal.fmt(f), + TokenKind::Float => write!(f, "float"), + TokenKind::Greater => Token::Greater.fmt(f), + TokenKind::GreaterOrEqual => Token::GreaterEqual.fmt(f), + TokenKind::Identifier => write!(f, "identifier"), + TokenKind::If => Token::If.fmt(f), + TokenKind::Integer => write!(f, "integer"), + TokenKind::IsEven => Token::IsEven.fmt(f), + TokenKind::IsOdd => Token::IsOdd.fmt(f), + TokenKind::LeftCurlyBrace => Token::LeftCurlyBrace.fmt(f), + TokenKind::LeftParenthesis => Token::LeftParenthesis.fmt(f), + TokenKind::LeftSquareBrace => Token::LeftSquareBrace.fmt(f), + TokenKind::Length => Token::Length.fmt(f), + TokenKind::Less => Token::Less.fmt(f), + TokenKind::LessOrEqual => Token::LessEqual.fmt(f), + TokenKind::Minus => Token::Minus.fmt(f), + TokenKind::Percent => Token::Percent.fmt(f), + TokenKind::Plus => Token::Plus.fmt(f), + TokenKind::PlusEqual => Token::PlusEqual.fmt(f), + TokenKind::ReadLine => Token::ReadLine.fmt(f), + TokenKind::RightCurlyBrace => Token::RightCurlyBrace.fmt(f), + TokenKind::RightParenthesis => Token::RightParenthesis.fmt(f), + TokenKind::RightSquareBrace => Token::RightSquareBrace.fmt(f), + TokenKind::Semicolon => Token::Semicolon.fmt(f), + TokenKind::Star => Token::Star.fmt(f), + TokenKind::Slash => Token::Slash.fmt(f), + TokenKind::String => write!(f, "string"), + TokenKind::ToString => Token::ToString.fmt(f), + TokenKind::While => Token::While.fmt(f), + TokenKind::WriteLine => Token::WriteLine.fmt(f), + } + } +}