dust/src/parser.rs

536 lines
18 KiB
Rust
Raw Normal View History

use std::{cell::RefCell, collections::HashMap};
2024-02-25 18:49:26 +00:00
use chumsky::{input::SpannedInput, pratt::*, prelude::*};
use crate::{abstract_tree::*, error::Error, lexer::Token};
type ParserInput<'tokens, 'src> =
SpannedInput<Token<'src>, SimpleSpan, &'tokens [(Token<'src>, SimpleSpan)]>;
fn parser<'tokens, 'src: 'tokens>() -> impl Parser<
'tokens,
ParserInput<'tokens, 'src>,
2024-02-26 21:27:01 +00:00
Vec<(Statement<'src>, SimpleSpan)>,
2024-02-25 18:49:26 +00:00
extra::Err<Rich<'tokens, Token<'src>, SimpleSpan>>,
> {
let identifiers: RefCell<HashMap<&str, Identifier>> = RefCell::new(HashMap::new());
2024-02-26 21:27:01 +00:00
let identifier = select! {
Token::Identifier(text) => {
let mut identifiers = identifiers.borrow_mut();
if let Some(identifier) = identifiers.get(&text) {
identifier.clone()
} else {
let new = Identifier::new(text);
identifiers.insert(text, new.clone());
new
}
}
2024-02-26 21:27:01 +00:00
};
2024-02-25 18:49:26 +00:00
2024-02-26 21:27:01 +00:00
let expression = recursive(|expression| {
2024-02-25 18:49:26 +00:00
let basic_value = select! {
2024-02-26 21:27:01 +00:00
Token::Boolean(boolean) => ValueNode::Boolean(boolean),
Token::Integer(integer) => ValueNode::Integer(integer),
Token::Float(float) => ValueNode::Float(float),
Token::String(string) => ValueNode::String(string),
}
.map(|value| Expression::Value(value))
.boxed();
2024-02-25 18:49:26 +00:00
2024-02-26 21:27:01 +00:00
let identifier_expression = identifier
.clone()
2024-02-26 21:27:01 +00:00
.map(|identifier| Expression::Identifier(identifier))
.boxed();
let list = expression
2024-02-25 18:49:26 +00:00
.clone()
.separated_by(just(Token::Control(",")))
2024-02-25 18:49:26 +00:00
.allow_trailing()
.collect()
.delimited_by(just(Token::Control("[")), just(Token::Control("]")))
.map(|list| Expression::Value(ValueNode::List(list)))
.boxed();
2024-02-25 18:49:26 +00:00
let r#enum = identifier
.clone()
.then_ignore(just(Token::Control("::")))
.then(identifier.clone())
.map(|(name, variant)| Expression::Value(ValueNode::Enum(name, variant)))
.boxed();
2024-02-26 21:27:01 +00:00
let atom = choice((
identifier_expression.clone(),
basic_value.clone(),
list.clone(),
r#enum.clone(),
2024-02-26 21:27:01 +00:00
expression
.clone()
.delimited_by(just(Token::Control("(")), just(Token::Control(")"))),
2024-02-25 18:49:26 +00:00
));
2024-02-26 21:27:01 +00:00
let logic = atom
.pratt((
prefix(2, just(Token::Operator("!")), |expression| {
Expression::Logic(Box::new(Logic::Not(expression)))
}),
infix(left(1), just(Token::Operator("==")), |left, right| {
Expression::Logic(Box::new(Logic::Equal(left, right)))
}),
infix(left(1), just(Token::Operator("!=")), |left, right| {
Expression::Logic(Box::new(Logic::NotEqual(left, right)))
}),
infix(left(1), just(Token::Operator(">")), |left, right| {
Expression::Logic(Box::new(Logic::Greater(left, right)))
}),
infix(left(1), just(Token::Operator("<")), |left, right| {
Expression::Logic(Box::new(Logic::Less(left, right)))
}),
infix(left(1), just(Token::Operator(">=")), |left, right| {
Expression::Logic(Box::new(Logic::GreaterOrEqual(left, right)))
}),
infix(left(1), just(Token::Operator("<=")), |left, right| {
Expression::Logic(Box::new(Logic::LessOrEqual(left, right)))
}),
infix(left(1), just(Token::Operator("&&")), |left, right| {
Expression::Logic(Box::new(Logic::And(left, right)))
}),
infix(left(1), just(Token::Operator("||")), |left, right| {
Expression::Logic(Box::new(Logic::Or(left, right)))
}),
))
.boxed();
choice((r#enum, logic, identifier_expression, list, basic_value))
2024-02-26 21:27:01 +00:00
});
let statement = recursive(|statement| {
let expression_statement = expression
.map(|expression| Statement::Expression(expression))
.boxed();
let type_specification = just(Token::Control(":")).ignore_then(choice((
just(Token::Keyword("bool")).to(Type::Boolean),
just(Token::Keyword("float")).to(Type::Float),
just(Token::Keyword("int")).to(Type::Integer),
just(Token::Keyword("range")).to(Type::Range),
just(Token::Keyword("str")).to(Type::String),
identifier
.clone()
.map(|identifier| Type::Custom(identifier)),
)));
2024-02-25 18:49:26 +00:00
let assignment = identifier
.then(type_specification.clone().or_not())
2024-02-25 18:49:26 +00:00
.then_ignore(just(Token::Operator("=")))
.then(statement.clone())
.map(|((identifier, r#type), statement)| {
Statement::Assignment(Assignment::new(identifier, r#type, statement))
2024-02-26 21:27:01 +00:00
})
.boxed();
2024-02-25 18:49:26 +00:00
2024-02-28 23:16:25 +00:00
let block = statement
.clone()
.separated_by(just(Token::Control(";")).or_not())
.collect()
.delimited_by(just(Token::Control("{")), just(Token::Control("}")))
.map(|statements| Statement::Block(Block::new(statements)))
.boxed();
choice((assignment, expression_statement, block))
2024-02-26 21:27:01 +00:00
});
2024-02-25 18:49:26 +00:00
2024-02-26 21:27:01 +00:00
statement
.map_with(|item, state| (item, state.span()))
.repeated()
.collect()
2024-02-25 18:49:26 +00:00
}
2024-02-26 21:27:01 +00:00
pub fn parse<'tokens, 'src: 'tokens>(
tokens: &'tokens [(Token<'src>, SimpleSpan)],
) -> Result<Vec<(Statement<'src>, SimpleSpan)>, Error<'tokens>> {
2024-02-25 18:49:26 +00:00
parser()
.parse(tokens.spanned((0..0).into()))
.into_result()
.map_err(|error| Error::Parse(error))
}
#[cfg(test)]
mod tests {
2024-02-26 21:27:01 +00:00
use crate::{abstract_tree::Logic, lexer::lex};
2024-02-25 18:49:26 +00:00
use super::*;
2024-02-28 23:16:25 +00:00
#[test]
fn block() {
assert_eq!(
parse(&lex("{ x }").unwrap()).unwrap()[0].0,
Statement::Block(Block::new(vec![Statement::Expression(
Expression::Identifier(Identifier::new("x"))
),]))
);
assert_eq!(
parse(
&lex("
{
x;
y;
z
}
")
.unwrap()
)
.unwrap()[0]
.0,
Statement::Block(Block::new(vec![
Statement::Expression(Expression::Identifier(Identifier::new("x"))),
Statement::Expression(Expression::Identifier(Identifier::new("y"))),
Statement::Expression(Expression::Identifier(Identifier::new("z"))),
]))
);
assert_eq!(
parse(
&lex("
{
1 == 1
z
}
")
.unwrap()
)
.unwrap()[0]
.0,
Statement::Block(Block::new(vec![
Statement::Expression(Expression::Logic(Box::new(Logic::Equal(
Expression::Value(ValueNode::Integer(1)),
Expression::Value(ValueNode::Integer(1))
)))),
Statement::Expression(Expression::Identifier(Identifier::new("z"))),
]))
);
}
2024-02-25 18:49:26 +00:00
#[test]
fn identifier() {
assert_eq!(
parse(&lex("x").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Identifier(Identifier::new("x")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("foobar").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Identifier(Identifier::new("foobar")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("HELLO").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Identifier(Identifier::new("HELLO")))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn assignment() {
assert_eq!(
parse(&lex("foobar = 1").unwrap()).unwrap()[0].0,
Statement::Assignment(Assignment::new(
Identifier::new("foobar"),
None,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(1)))
2024-02-25 18:49:26 +00:00
)),
);
}
#[test]
fn assignment_with_type() {
assert_eq!(
parse(&lex("foobar: int = 1").unwrap()).unwrap()[0].0,
Statement::Assignment(Assignment::new(
Identifier::new("foobar"),
Some(Type::Integer),
Statement::Expression(Expression::Value(ValueNode::Integer(1)))
)),
);
assert_eq!(
parse(&lex("foobar: Foo = Foo::Bar").unwrap()).unwrap()[0].0,
Statement::Assignment(Assignment::new(
Identifier::new("foobar"),
Some(Type::Custom(Identifier::new("Foo"))),
Statement::Expression(Expression::Value(ValueNode::Enum(
Identifier::new("Foo"),
Identifier::new("Bar")
)))
)),
);
}
2024-02-25 18:49:26 +00:00
#[test]
fn logic() {
assert_eq!(
parse(&lex("x == 1").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("x")),
Expression::Value(ValueNode::Integer(1))
))))
2024-02-25 18:49:26 +00:00
);
2024-02-28 22:02:30 +00:00
assert_eq!(
parse(&lex("(x == 1) && (y == 2)").unwrap()).unwrap()[0].0,
Statement::Expression(Expression::Logic(Box::new(Logic::And(
Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("x")),
Expression::Value(ValueNode::Integer(1))
))),
Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("y")),
Expression::Value(ValueNode::Integer(2))
))),
))))
);
assert_eq!(
parse(&lex("(x == 1) && (y == 2) && true").unwrap()).unwrap()[0].0,
Statement::Expression(Expression::Logic(Box::new(Logic::And(
Expression::Logic(Box::new(Logic::And(
Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("x")),
Expression::Value(ValueNode::Integer(1))
))),
Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("y")),
Expression::Value(ValueNode::Integer(2))
))),
))),
Expression::Value(ValueNode::Boolean(true))
))))
);
2024-02-25 18:49:26 +00:00
}
#[test]
fn r#enum() {
assert_eq!(
parse(&lex("Option::None").unwrap()).unwrap()[0].0,
Statement::Expression(Expression::Value(ValueNode::Enum(
Identifier::new("Option"),
Identifier::new("None")
)))
);
}
2024-02-25 18:49:26 +00:00
#[test]
fn list() {
assert_eq!(
parse(&lex("[]").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::List(Vec::with_capacity(0))))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("[42]").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::List(vec![Expression::Value(
ValueNode::Integer(42)
)])))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("[42, 'foo', 'bar', [1, 2, 3,]]").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::List(vec![
Expression::Value(ValueNode::Integer(42)),
Expression::Value(ValueNode::String("foo")),
Expression::Value(ValueNode::String("bar")),
Expression::Value(ValueNode::List(vec![
Expression::Value(ValueNode::Integer(1)),
Expression::Value(ValueNode::Integer(2)),
Expression::Value(ValueNode::Integer(3)),
2024-02-25 18:49:26 +00:00
]))
2024-02-26 21:27:01 +00:00
])),)
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn r#true() {
assert_eq!(
parse(&lex("true").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Boolean(true)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn r#false() {
assert_eq!(
parse(&lex("false").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Boolean(false)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn positive_float() {
assert_eq!(
parse(&lex("0.0").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(0.0)))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("42.0").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(42.0)))
2024-02-25 18:49:26 +00:00
);
let max_float = f64::MAX.to_string() + ".0";
assert_eq!(
parse(&lex(&max_float).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::MAX)))
2024-02-25 18:49:26 +00:00
);
let min_positive_float = f64::MIN_POSITIVE.to_string();
assert_eq!(
parse(&lex(&min_positive_float).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::MIN_POSITIVE)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn negative_float() {
assert_eq!(
parse(&lex("-0.0").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(-0.0)))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("-42.0").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(-42.0)))
2024-02-25 18:49:26 +00:00
);
let min_float = f64::MIN.to_string() + ".0";
assert_eq!(
parse(&lex(&min_float).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::MIN)))
2024-02-25 18:49:26 +00:00
);
let max_negative_float = format!("-{}", f64::MIN_POSITIVE);
assert_eq!(
parse(&lex(&max_negative_float).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(-f64::MIN_POSITIVE)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn other_float() {
assert_eq!(
parse(&lex("Infinity").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::INFINITY)))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("-Infinity").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::NEG_INFINITY)))
2024-02-25 18:49:26 +00:00
);
2024-02-26 21:27:01 +00:00
if let Statement::Expression(Expression::Value(ValueNode::Float(float))) =
&parse(&lex("NaN").unwrap()).unwrap()[0].0
{
assert!(float.is_nan());
} else {
panic!("Expected a float.");
2024-02-25 18:49:26 +00:00
}
}
#[test]
fn positive_integer() {
for i in 0..10 {
let source = i.to_string();
let statements = parse(&lex(&source).unwrap()).unwrap();
2024-02-26 21:27:01 +00:00
assert_eq!(
statements[0].0,
Statement::Expression(Expression::Value(ValueNode::Integer(i)))
)
2024-02-25 18:49:26 +00:00
}
assert_eq!(
parse(&lex("42").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(42)))
2024-02-25 18:49:26 +00:00
);
let maximum_integer = i64::MAX.to_string();
assert_eq!(
parse(&lex(&maximum_integer).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(i64::MAX)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn negative_integer() {
for i in -9..1 {
let source = i.to_string();
let statements = parse(&lex(&source).unwrap()).unwrap();
2024-02-26 21:27:01 +00:00
assert_eq!(
statements[0].0,
Statement::Expression(Expression::Value(ValueNode::Integer(i)))
)
2024-02-25 18:49:26 +00:00
}
assert_eq!(
parse(&lex("-42").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(-42)))
2024-02-25 18:49:26 +00:00
);
let minimum_integer = i64::MIN.to_string();
assert_eq!(
parse(&lex(&minimum_integer).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(i64::MIN)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn double_quoted_string() {
assert_eq!(
parse(&lex("\"\"").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("\"42\"").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("42")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("\"foobar\"").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("foobar")))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn single_quoted_string() {
assert_eq!(
parse(&lex("''").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("'42'").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("42")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("'foobar'").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("foobar")))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn grave_quoted_string() {
assert_eq!(
parse(&lex("``").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("`42`").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("42")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("`foobar`").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("foobar")))
2024-02-25 18:49:26 +00:00
);
}
}