From bb7cda12421b701507378a358b06bfe000b867c2 Mon Sep 17 00:00:00 2001 From: Jeff Date: Wed, 20 Mar 2024 11:43:47 -0400 Subject: [PATCH] Fix lexer and all broken tests --- dust-lang/src/lexer.rs | 148 ++++++++++++++++++++++------------- dust-lang/src/parser.rs | 60 +++++++------- dust-lang/tests/functions.rs | 2 +- dust-lang/tests/structs.rs | 4 +- dust-lang/tests/values.rs | 2 +- dust-lang/tests/variables.rs | 4 +- 6 files changed, 130 insertions(+), 90 deletions(-) diff --git a/dust-lang/src/lexer.rs b/dust-lang/src/lexer.rs index bd3f196..427abc6 100644 --- a/dust-lang/src/lexer.rs +++ b/dust-lang/src/lexer.rs @@ -1,6 +1,6 @@ use std::fmt::{self, Display, Formatter}; -use chumsky::{prelude::*, text::whitespace}; +use chumsky::prelude::*; use crate::error::Error; @@ -13,7 +13,48 @@ pub enum Token<'src> { Identifier(&'src str), Operator(Operator), Control(Control), - Keyword(&'src str), + Keyword(Keyword), +} + +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum Keyword { + Any, + Bool, + Break, + Else, + Float, + Int, + If, + List, + Map, + None, + Range, + Struct, + Str, + Loop, + While, +} + +impl Display for Keyword { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match self { + Keyword::Any => write!(f, "any"), + Keyword::Bool => write!(f, "bool"), + Keyword::Break => write!(f, "break"), + Keyword::Else => write!(f, "else"), + Keyword::Float => write!(f, "float"), + Keyword::Int => write!(f, "int"), + Keyword::If => write!(f, "if"), + Keyword::List => write!(f, "list"), + Keyword::Map => write!(f, "map"), + Keyword::None => write!(f, "none"), + Keyword::Range => write!(f, "range"), + Keyword::Struct => write!(f, "struct"), + Keyword::Str => write!(f, "str"), + Keyword::Loop => write!(f, "loop"), + Keyword::While => write!(f, "while"), + } + } } #[derive(Copy, Clone, Debug, PartialEq)] @@ -108,7 +149,7 @@ impl<'src> Display for Token<'src> { Token::Identifier(string) => write!(f, "{string}"), Token::Operator(operator) => write!(f, "{operator}"), Token::Control(control) => write!(f, "{control}"), - Token::Keyword(string) => write!(f, "{string}"), + Token::Keyword(keyword) => write!(f, "{keyword}"), } } } @@ -127,8 +168,8 @@ pub fn lexer<'src>() -> impl Parser< extra::Err>>, > { let boolean = choice(( - just("true").padded().to(Token::Boolean(true)), - just("false").padded().to(Token::Boolean(false)), + just("true").to(Token::Boolean(true)), + just("false").to(Token::Boolean(false)), )); let float_numeric = just('-') @@ -172,64 +213,63 @@ pub fn lexer<'src>() -> impl Parser< let operator = choice(( // logic - just("&&").padded().to(Operator::And), - just("==").padded().to(Operator::Equal), - just("!=").padded().to(Operator::NotEqual), - just(">=").padded().to(Operator::GreaterOrEqual), - just("<=").padded().to(Operator::LessOrEqual), - just(">").padded().to(Operator::Greater), - just("<").padded().to(Operator::Less), - just("!").padded().to(Operator::Not), - just("!=").padded().to(Operator::NotEqual), - just("||").padded().to(Operator::Or), + just("&&").to(Operator::And), + just("==").to(Operator::Equal), + just("!=").to(Operator::NotEqual), + just(">=").to(Operator::GreaterOrEqual), + just("<=").to(Operator::LessOrEqual), + just(">").to(Operator::Greater), + just("<").to(Operator::Less), + just("!").to(Operator::Not), + just("!=").to(Operator::NotEqual), + just("||").to(Operator::Or), // assignment - just("=").padded().to(Operator::Assign), - just("+=").padded().to(Operator::AddAssign), - just("-=").padded().to(Operator::SubAssign), + just("=").to(Operator::Assign), + just("+=").to(Operator::AddAssign), + just("-=").to(Operator::SubAssign), // math - just("+").padded().to(Operator::Add), - just("-").padded().to(Operator::Subtract), - just("*").padded().to(Operator::Multiply), - just("/").padded().to(Operator::Divide), - just("%").padded().to(Operator::Modulo), + just("+").to(Operator::Add), + just("-").to(Operator::Subtract), + just("*").to(Operator::Multiply), + just("/").to(Operator::Divide), + just("%").to(Operator::Modulo), )) .map(Token::Operator); let control = choice(( - just("->").padded().to(Control::Arrow), - just("{").padded().to(Control::CurlyOpen), - just("}").padded().to(Control::CurlyClose), - just("[").padded().to(Control::SquareOpen), - just("]").padded().to(Control::SquareClose), - just("(").padded().to(Control::ParenOpen), - just(")").padded().to(Control::ParenClose), - just(",").padded().to(Control::Comma), - just(";").padded().to(Control::Semicolon), - just("::").padded().to(Control::DoubleColon), - just(":").padded().to(Control::Colon), - just("..").padded().to(Control::DoubleDot), - just(".").padded().to(Control::Dot), + just("->").to(Control::Arrow), + just("{").to(Control::CurlyOpen), + just("}").to(Control::CurlyClose), + just("[").to(Control::SquareOpen), + just("]").to(Control::SquareClose), + just("(").to(Control::ParenOpen), + just(")").to(Control::ParenClose), + just(",").to(Control::Comma), + just(";").to(Control::Semicolon), + just("::").to(Control::DoubleColon), + just(":").to(Control::Colon), + just("..").to(Control::DoubleDot), + just(".").to(Control::Dot), )) .map(Token::Control); let keyword = choice(( - just("any").padded(), - just("bool").padded(), - just("break").padded(), - just("else").padded(), - just("float").padded(), - just("int").padded(), - just("if").padded(), - just("list").padded(), - just("map").padded(), - just("none").padded(), - just("range").padded(), - just("struct").padded(), - just("str").padded(), - just("loop").padded(), - just("while").padded(), + just("any").to(Keyword::Any), + just("bool").to(Keyword::Bool), + just("break").to(Keyword::Break), + just("else").to(Keyword::Else), + just("float").to(Keyword::Float), + just("int").to(Keyword::Int), + just("if").to(Keyword::If), + just("list").to(Keyword::List), + just("map").to(Keyword::Map), + just("none").to(Keyword::None), + just("range").to(Keyword::Range), + just("struct").to(Keyword::Struct), + just("str").to(Keyword::Str), + just("loop").to(Keyword::Loop), + just("while").to(Keyword::While), )) - .delimited_by(whitespace(), whitespace()) .map(Token::Keyword); choice(( @@ -263,7 +303,7 @@ mod tests { lex("1 + 1").unwrap(), vec![ (Token::Integer(1), (0..1).into()), - (Token::Operator(Operator::Add), (2..4).into()), + (Token::Operator(Operator::Add), (2..3).into()), (Token::Integer(1), (4..5).into()) ] ) @@ -271,7 +311,7 @@ mod tests { #[test] fn keywords() { - assert_eq!(lex("int").unwrap()[0].0, Token::Keyword("int")) + assert_eq!(lex("int").unwrap()[0].0, Token::Keyword(Keyword::Int)) } #[test] diff --git a/dust-lang/src/parser.rs b/dust-lang/src/parser.rs index e198ac3..4967733 100644 --- a/dust-lang/src/parser.rs +++ b/dust-lang/src/parser.rs @@ -5,7 +5,7 @@ use chumsky::{input::SpannedInput, pratt::*, prelude::*}; use crate::{ abstract_tree::*, error::Error, - lexer::{Control, Operator, Token}, + lexer::{Control, Keyword, Operator, Token}, }; pub type ParserInput<'src> = @@ -80,7 +80,7 @@ pub fn parser<'src>() -> impl Parser< return_type: Box::new(return_type), }); - let list_of = just(Token::Keyword("list")) + let list_of = just(Token::Keyword(Keyword::List)) .ignore_then(r#type.clone().delimited_by( just(Token::Control(Control::ParenOpen)), just(Token::Control(Control::ParenClose)), @@ -101,14 +101,14 @@ pub fn parser<'src>() -> impl Parser< function_type, list_of, list_exact, - just(Token::Keyword("any")).to(Type::Any), - just(Token::Keyword("bool")).to(Type::Boolean), - just(Token::Keyword("float")).to(Type::Float), - just(Token::Keyword("int")).to(Type::Integer), - just(Token::Keyword("none")).to(Type::None), - just(Token::Keyword("range")).to(Type::Range), - just(Token::Keyword("str")).to(Type::String), - just(Token::Keyword("list")).to(Type::List), + just(Token::Keyword(Keyword::Any)).to(Type::Any), + just(Token::Keyword(Keyword::Bool)).to(Type::Boolean), + just(Token::Keyword(Keyword::Float)).to(Type::Float), + just(Token::Keyword(Keyword::Int)).to(Type::Integer), + just(Token::Keyword(Keyword::None)).to(Type::None), + just(Token::Keyword(Keyword::Range)).to(Type::Range), + just(Token::Keyword(Keyword::Str)).to(Type::String), + just(Token::Keyword(Keyword::List)).to(Type::List), identifier.clone().try_map(move |identifier, span| { custom_types .0 @@ -395,7 +395,7 @@ pub fn parser<'src>() -> impl Parser< Statement::Expression(node).with_position(position) }); - let r#break = just(Token::Keyword("break")) + let r#break = just(Token::Keyword(Keyword::Break)) .map_with(|_, state| Statement::Break.with_position(state.span())); let assignment = positioned_identifier @@ -422,14 +422,14 @@ pub fn parser<'src>() -> impl Parser< .at_least(1) .collect() .delimited_by( - just(Token::Keyword("loop")).then(just(Token::Control(Control::CurlyOpen))), + just(Token::Keyword(Keyword::Loop)).then(just(Token::Control(Control::CurlyOpen))), just(Token::Control(Control::CurlyClose)), ) .map_with(|statements, state| { Statement::Loop(Loop::new(statements)).with_position(state.span()) }); - let r#while = just(Token::Keyword("while")) + let r#while = just(Token::Keyword(Keyword::While)) .ignore_then(positioned_expression.clone()) .then( positioned_statement @@ -445,11 +445,11 @@ pub fn parser<'src>() -> impl Parser< Statement::While(While::new(expression, statements)).with_position(state.span()) }); - let if_else = just(Token::Keyword("if")) + let if_else = just(Token::Keyword(Keyword::If)) .ignore_then(positioned_expression.clone()) .then(block.clone()) .then( - just(Token::Keyword("else")) + just(Token::Keyword(Keyword::Else)) .ignore_then(block.clone()) .or_not(), ) @@ -460,7 +460,7 @@ pub fn parser<'src>() -> impl Parser< let structure_field_definition = identifier.clone().then(type_specification.clone()); - let structure_definition = just(Token::Keyword("struct")) + let structure_definition = just(Token::Keyword(Keyword::Struct)) .ignore_then(identifier.clone()) .then( structure_field_definition @@ -595,14 +595,14 @@ mod tests { assert_eq!( parse(&lex("while true { output('hi') }").unwrap()).unwrap()[0].node, Statement::While(While::new( - Expression::Value(ValueNode::Boolean(true)).with_position((6, 11)), + Expression::Value(ValueNode::Boolean(true)).with_position((6, 10)), vec![ Statement::Expression(Expression::FunctionCall(FunctionCall::new( Expression::Identifier(Identifier::new("output")).with_position((13, 19)), vec![Expression::Value(ValueNode::String("hi".to_string())) .with_position((20, 24))] ))) - .with_position((13, 26)) + .with_position((13, 25)) ] )) ) @@ -614,7 +614,7 @@ mod tests { parse(&lex("foobar : bool = true").unwrap()).unwrap()[0].node, Statement::Assignment(Assignment::new( Identifier::new("foobar").with_position((0, 6)), - Some(Type::Boolean.with_position((9, 14))), + Some(Type::Boolean.with_position((9, 13))), AssignmentOperator::Assign, Statement::Expression(Expression::Value(ValueNode::Boolean(true))) .with_position((16, 20)) @@ -628,7 +628,7 @@ mod tests { parse(&lex("foobar: list = []").unwrap()).unwrap()[0].node, Statement::Assignment(Assignment::new( Identifier::new("foobar").with_position((0, 6)), - Some(Type::List.with_position((8, 13))), + Some(Type::List.with_position((8, 12))), AssignmentOperator::Assign, Statement::Expression(Expression::Value(ValueNode::List(vec![]))) .with_position((15, 17)) @@ -642,7 +642,7 @@ mod tests { parse(&lex("foobar : list(bool) = [true]").unwrap()).unwrap()[0].node, Statement::Assignment(Assignment::new( Identifier::new("foobar").with_position((0, 6)), - Some(Type::ListOf(Box::new(Type::Boolean)).with_position((9, 20))), + Some(Type::ListOf(Box::new(Type::Boolean)).with_position((9, 19))), AssignmentOperator::Assign, Statement::Expression(Expression::Value(ValueNode::List(vec![Expression::Value( ValueNode::Boolean(true) @@ -659,7 +659,7 @@ mod tests { parse(&lex("foobar : [bool, str] = [true, '42']").unwrap()).unwrap()[0], Statement::Assignment(Assignment::new( Identifier::new("foobar").with_position((0, 6)), - Some(Type::ListExact(vec![Type::Boolean, Type::String]).with_position((9, 21))), + Some(Type::ListExact(vec![Type::Boolean, Type::String]).with_position((9, 20))), AssignmentOperator::Assign, Statement::Expression(Expression::Value(ValueNode::List(vec![ Expression::Value(ValueNode::Boolean(true)).with_position((24, 28)), @@ -682,7 +682,7 @@ mod tests { parameter_types: vec![], return_type: Box::new(Type::Any) } - .with_position((9, 19)) + .with_position((9, 18)) ), AssignmentOperator::Assign, Statement::Expression(Expression::Identifier(Identifier::new("some_function"))) @@ -716,7 +716,7 @@ mod tests { parse(&lex("(x: int) : int { x }").unwrap()).unwrap()[0].node, Statement::Expression(Expression::Value(ValueNode::Function { parameters: vec![(Identifier::new("x"), Type::Integer.with_position((4, 7)))], - return_type: Type::Integer.with_position((11, 15)), + return_type: Type::Integer.with_position((11, 14)), body: Block::new(vec![Statement::Expression(Expression::Identifier( Identifier::new("x") ),) @@ -731,7 +731,7 @@ mod tests { assert_eq!( parse(&lex("if true { 'foo' }").unwrap()).unwrap()[0].node, Statement::IfElse(IfElse::new( - Expression::Value(ValueNode::Boolean(true)).with_position((3, 8)), + Expression::Value(ValueNode::Boolean(true)).with_position((3, 7)), Block::new(vec![Statement::Expression(Expression::Value( ValueNode::String("foo".to_string()) ),) @@ -746,7 +746,7 @@ mod tests { assert_eq!( parse(&lex("if true {'foo' } else { 'bar' }").unwrap()).unwrap()[0].node, Statement::IfElse(IfElse::new( - Expression::Value(ValueNode::Boolean(true)).with_position((3, 8)), + Expression::Value(ValueNode::Boolean(true)).with_position((3, 7)), Block::new(vec![Statement::Expression(Expression::Value( ValueNode::String("foo".to_string()) ),) @@ -829,7 +829,7 @@ mod tests { Expression::Value(ValueNode::Integer(2)).with_position((14, 15)) ))) .with_position((10, 15)), - Block::new(vec![Statement::Break.with_position((18, 24))]), + Block::new(vec![Statement::Break.with_position((18, 23))]), Some(Block::new(vec![Statement::Assignment(Assignment::new( Identifier::new("i").with_position((33, 34)), None, @@ -839,7 +839,7 @@ mod tests { )) .with_position((33, 39))])) ),) - .with_position((7, 42))])) + .with_position((7, 41))])) ); } @@ -936,7 +936,7 @@ mod tests { parse(&lex("foobar: int = 1").unwrap()).unwrap()[0].node, Statement::Assignment(Assignment::new( Identifier::new("foobar").with_position((0, 6)), - Some(Type::Integer.with_position((8, 12))), + Some(Type::Integer.with_position((8, 11))), AssignmentOperator::Assign, Statement::Expression(Expression::Value(ValueNode::Integer(1))) .with_position((14, 15)) @@ -985,7 +985,7 @@ mod tests { ))) .with_position((13, 19)), ))) - .with_position((0, 21)), + .with_position((0, 20)), Expression::Value(ValueNode::Boolean(true)).with_position((24, 28)) ))),) ); diff --git a/dust-lang/tests/functions.rs b/dust-lang/tests/functions.rs index 393d666..c8498e6 100644 --- a/dust-lang/tests/functions.rs +++ b/dust-lang/tests/functions.rs @@ -62,7 +62,7 @@ fn function_context_does_not_capture_values() { ), Err(vec![Error::Validation { error: ValidationError::VariableNotFound(Identifier::new("x")), - position: (32, 66).into() + position: (32, 52).into() }]) ); diff --git a/dust-lang/tests/structs.rs b/dust-lang/tests/structs.rs index 83832d9..11ca475 100644 --- a/dust-lang/tests/structs.rs +++ b/dust-lang/tests/structs.rs @@ -53,7 +53,7 @@ fn field_type_error() { actual_position: (128, 134).into(), expected_position: (56, 59).into() }, - position: (96, 166).into() + position: (96, 153).into() }]) ) } @@ -102,7 +102,7 @@ fn undefined_struct() { ), Err(vec![Error::Validation { error: error::ValidationError::TypeNotFound(Identifier::new("Foo")), - position: (17, 82).into() + position: (17, 69).into() }]) ) } diff --git a/dust-lang/tests/values.rs b/dust-lang/tests/values.rs index 0c13866..773e16c 100644 --- a/dust-lang/tests/values.rs +++ b/dust-lang/tests/values.rs @@ -142,7 +142,7 @@ fn map_type_errors() { expected: Type::Boolean }, actual_position: (15, 20).into(), - expected_position: (8, 13).into(), + expected_position: (8, 12).into(), }, position: (0, 22).into() }]) diff --git a/dust-lang/tests/variables.rs b/dust-lang/tests/variables.rs index 5bd2cd6..7574717 100644 --- a/dust-lang/tests/variables.rs +++ b/dust-lang/tests/variables.rs @@ -31,7 +31,7 @@ fn set_variable_with_type_error() { expected: Type::String }, actual_position: (14, 18).into(), - expected_position: (8, 12).into() + expected_position: (8, 11).into() }, position: (0, 18).into() }]) @@ -44,7 +44,7 @@ fn function_variable() { interpret("foobar = (x: int): int { x }; foobar"), Ok(Some(Value::function( vec![(Identifier::new("x"), Type::Integer.with_position((13, 16)))], - Type::Integer.with_position((19, 23)), + Type::Integer.with_position((19, 22)), Block::new(vec![Statement::Expression(Expression::Identifier( Identifier::new("x") ))