1
0
dust/src/parser.rs

616 lines
20 KiB
Rust
Raw Normal View History

use std::{cell::RefCell, collections::HashMap};
2024-02-25 18:49:26 +00:00
use chumsky::{input::SpannedInput, pratt::*, prelude::*};
use crate::{abstract_tree::*, error::Error, lexer::Token};
2024-03-06 20:36:58 +00:00
pub type DustParser<'src> = Boxed<
'src,
'src,
ParserInput<'src>,
Vec<(Statement<'src>, SimpleSpan)>,
extra::Err<Rich<'src, Token<'src>, SimpleSpan>>,
>;
pub type ParserInput<'src> =
SpannedInput<Token<'src>, SimpleSpan, &'src [(Token<'src>, SimpleSpan)]>;
2024-02-25 18:49:26 +00:00
2024-03-06 20:36:58 +00:00
pub fn parse<'src>(
tokens: &'src [(Token<'src>, SimpleSpan)],
) -> Result<Vec<(Statement<'src>, SimpleSpan)>, Vec<Error>> {
2024-03-02 01:17:55 +00:00
parser()
2024-03-06 20:36:58 +00:00
.parse(tokens.spanned((tokens.len()..tokens.len()).into()))
2024-03-02 01:17:55 +00:00
.into_result()
2024-03-06 20:36:58 +00:00
.map_err(|errors| errors.into_iter().map(|error| error.into()).collect())
2024-03-02 01:17:55 +00:00
}
2024-03-06 20:36:58 +00:00
pub fn parser<'src>() -> DustParser<'src> {
let identifiers: RefCell<HashMap<&str, Identifier>> = RefCell::new(HashMap::new());
2024-02-26 21:27:01 +00:00
let identifier = select! {
Token::Identifier(text) => {
let mut identifiers = identifiers.borrow_mut();
if let Some(identifier) = identifiers.get(&text) {
identifier.clone()
} else {
let new = Identifier::new(text);
identifiers.insert(text, new.clone());
new
}
}
2024-02-26 21:27:01 +00:00
};
2024-02-25 18:49:26 +00:00
2024-02-26 21:27:01 +00:00
let expression = recursive(|expression| {
2024-02-25 18:49:26 +00:00
let basic_value = select! {
2024-02-26 21:27:01 +00:00
Token::Boolean(boolean) => ValueNode::Boolean(boolean),
Token::Integer(integer) => ValueNode::Integer(integer),
Token::Float(float) => ValueNode::Float(float),
Token::String(string) => ValueNode::String(string),
}
.map(|value| Expression::Value(value))
.boxed();
2024-02-25 18:49:26 +00:00
2024-02-26 21:27:01 +00:00
let identifier_expression = identifier
.clone()
2024-02-26 21:27:01 +00:00
.map(|identifier| Expression::Identifier(identifier))
.boxed();
let list = expression
2024-02-25 18:49:26 +00:00
.clone()
.separated_by(just(Token::Control(",")))
2024-02-25 18:49:26 +00:00
.allow_trailing()
.collect()
.delimited_by(just(Token::Control("[")), just(Token::Control("]")))
.map(|list| Expression::Value(ValueNode::List(list)))
.boxed();
2024-02-25 18:49:26 +00:00
let r#enum = identifier
.clone()
.then_ignore(just(Token::Control("::")))
.then(identifier.clone())
.map(|(name, variant)| Expression::Value(ValueNode::Enum(name, variant)))
.boxed();
2024-02-26 21:27:01 +00:00
let atom = choice((
identifier_expression.clone(),
basic_value.clone(),
list.clone(),
r#enum.clone(),
2024-02-26 21:27:01 +00:00
expression
.clone()
.delimited_by(just(Token::Control("(")), just(Token::Control(")"))),
2024-02-25 18:49:26 +00:00
));
2024-02-26 21:27:01 +00:00
let logic = atom
.pratt((
prefix(2, just(Token::Operator("!")), |expression| {
Expression::Logic(Box::new(Logic::Not(expression)))
}),
infix(left(1), just(Token::Operator("==")), |left, right| {
Expression::Logic(Box::new(Logic::Equal(left, right)))
}),
infix(left(1), just(Token::Operator("!=")), |left, right| {
Expression::Logic(Box::new(Logic::NotEqual(left, right)))
}),
infix(left(1), just(Token::Operator(">")), |left, right| {
Expression::Logic(Box::new(Logic::Greater(left, right)))
}),
infix(left(1), just(Token::Operator("<")), |left, right| {
Expression::Logic(Box::new(Logic::Less(left, right)))
}),
infix(left(1), just(Token::Operator(">=")), |left, right| {
Expression::Logic(Box::new(Logic::GreaterOrEqual(left, right)))
}),
infix(left(1), just(Token::Operator("<=")), |left, right| {
Expression::Logic(Box::new(Logic::LessOrEqual(left, right)))
}),
2024-03-07 03:15:35 +00:00
infix(right(1), just(Token::Operator("&&")), |left, right| {
2024-02-26 21:27:01 +00:00
Expression::Logic(Box::new(Logic::And(left, right)))
}),
2024-03-07 03:15:35 +00:00
infix(right(1), just(Token::Operator("||")), |left, right| {
2024-02-26 21:27:01 +00:00
Expression::Logic(Box::new(Logic::Or(left, right)))
}),
))
.boxed();
choice((r#enum, logic, identifier_expression, list, basic_value))
2024-02-26 21:27:01 +00:00
});
let statement = recursive(|statement| {
let expression_statement = expression
.map(|expression| Statement::Expression(expression))
.boxed();
2024-02-29 02:34:14 +00:00
let basic_type = choice((
just(Token::Keyword("bool")).to(Type::Boolean),
just(Token::Keyword("float")).to(Type::Float),
just(Token::Keyword("int")).to(Type::Integer),
just(Token::Keyword("range")).to(Type::Range),
just(Token::Keyword("str")).to(Type::String),
2024-02-29 02:34:14 +00:00
just(Token::Keyword("list")).to(Type::List),
));
let type_arguments = basic_type
.clone()
.delimited_by(just(Token::Control("(")), just(Token::Control(")")));
let type_specification = just(Token::Control(":")).ignore_then(choice((
basic_type
.clone()
.separated_by(just(Token::Control(",")))
.collect()
.delimited_by(just(Token::Control("[")), just(Token::Control("]")))
.map(|types| Type::ListExact(types)),
just(Token::Keyword("list"))
.then(type_arguments)
.map(|(_, item_type)| Type::ListOf(Box::new(item_type))),
basic_type.clone(),
identifier
.clone()
.map(|identifier| Type::Custom(identifier)),
)));
2024-02-25 18:49:26 +00:00
let assignment = identifier
.then(type_specification.clone().or_not())
2024-02-25 18:49:26 +00:00
.then_ignore(just(Token::Operator("=")))
.then(statement.clone())
.map(|((identifier, r#type), statement)| {
Statement::Assignment(Assignment::new(identifier, r#type, statement))
2024-02-26 21:27:01 +00:00
})
.boxed();
2024-02-25 18:49:26 +00:00
2024-02-28 23:16:25 +00:00
let block = statement
.clone()
2024-03-07 00:45:41 +00:00
.repeated()
2024-02-28 23:16:25 +00:00
.collect()
.delimited_by(just(Token::Control("{")), just(Token::Control("}")))
.map(|statements| Statement::Block(Block::new(statements)))
.boxed();
2024-03-02 01:17:55 +00:00
let r#loop = statement
.clone()
2024-03-07 00:45:41 +00:00
.repeated()
2024-03-02 01:17:55 +00:00
.collect()
.delimited_by(
just(Token::Keyword("loop")).then(just(Token::Control("{"))),
just(Token::Control("}")),
)
.map(|statements| Statement::Loop(Loop::new(statements)))
.boxed();
choice((assignment, expression_statement, block, r#loop))
2024-03-07 00:45:41 +00:00
.then_ignore(just(Token::Control(";")).or_not())
2024-02-26 21:27:01 +00:00
});
2024-02-25 18:49:26 +00:00
2024-02-26 21:27:01 +00:00
statement
.map_with(|item, state| (item, state.span()))
.repeated()
.collect()
2024-03-06 20:36:58 +00:00
.boxed()
2024-02-25 18:49:26 +00:00
}
#[cfg(test)]
mod tests {
2024-02-26 21:27:01 +00:00
use crate::{abstract_tree::Logic, lexer::lex};
2024-02-25 18:49:26 +00:00
use super::*;
2024-03-02 01:17:55 +00:00
#[test]
fn r#loop() {
assert_eq!(
parse(&lex("loop {}").unwrap()).unwrap()[0].0,
Statement::Loop(Loop::new(vec![]))
);
}
2024-02-28 23:16:25 +00:00
#[test]
fn block() {
assert_eq!(
parse(&lex("{ x }").unwrap()).unwrap()[0].0,
Statement::Block(Block::new(vec![Statement::Expression(
Expression::Identifier(Identifier::new("x"))
),]))
);
assert_eq!(
parse(
&lex("
{
x;
y;
z
}
")
.unwrap()
)
.unwrap()[0]
.0,
Statement::Block(Block::new(vec![
Statement::Expression(Expression::Identifier(Identifier::new("x"))),
Statement::Expression(Expression::Identifier(Identifier::new("y"))),
Statement::Expression(Expression::Identifier(Identifier::new("z"))),
]))
);
assert_eq!(
parse(
&lex("
{
1 == 1
z
}
")
.unwrap()
)
.unwrap()[0]
.0,
Statement::Block(Block::new(vec![
Statement::Expression(Expression::Logic(Box::new(Logic::Equal(
Expression::Value(ValueNode::Integer(1)),
Expression::Value(ValueNode::Integer(1))
)))),
Statement::Expression(Expression::Identifier(Identifier::new("z"))),
]))
);
}
2024-02-25 18:49:26 +00:00
#[test]
fn identifier() {
assert_eq!(
parse(&lex("x").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Identifier(Identifier::new("x")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("foobar").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Identifier(Identifier::new("foobar")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("HELLO").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Identifier(Identifier::new("HELLO")))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn assignment() {
assert_eq!(
parse(&lex("foobar = 1").unwrap()).unwrap()[0].0,
Statement::Assignment(Assignment::new(
Identifier::new("foobar"),
None,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(1)))
2024-02-25 18:49:26 +00:00
)),
);
}
#[test]
2024-02-29 02:34:14 +00:00
fn assignment_with_basic_type() {
assert_eq!(
parse(&lex("foobar: int = 1").unwrap()).unwrap()[0].0,
Statement::Assignment(Assignment::new(
Identifier::new("foobar"),
Some(Type::Integer),
Statement::Expression(Expression::Value(ValueNode::Integer(1)))
)),
);
2024-02-29 02:34:14 +00:00
}
2024-02-29 02:34:14 +00:00
#[test]
fn assignment_with_custom_type() {
assert_eq!(
parse(&lex("foobar: Foo = Foo::Bar").unwrap()).unwrap()[0].0,
Statement::Assignment(Assignment::new(
Identifier::new("foobar"),
Some(Type::Custom(Identifier::new("Foo"))),
Statement::Expression(Expression::Value(ValueNode::Enum(
Identifier::new("Foo"),
Identifier::new("Bar")
)))
)),
);
}
2024-02-29 02:34:14 +00:00
#[test]
fn assignment_with_list_types() {
assert_eq!(
parse(&lex("foobar: list = []").unwrap()).unwrap()[0].0,
Statement::Assignment(Assignment::new(
Identifier::new("foobar"),
Some(Type::List),
Statement::Expression(Expression::Value(ValueNode::List(vec![])))
)),
);
assert_eq!(
parse(&lex("foobar: list(int) = []").unwrap()).unwrap()[0].0,
Statement::Assignment(Assignment::new(
Identifier::new("foobar"),
Some(Type::ListOf(Box::new(Type::Integer))),
Statement::Expression(Expression::Value(ValueNode::List(vec![])))
)),
);
assert_eq!(
parse(&lex("foobar: [int, str] = [ 42, 'foo' ]").unwrap()).unwrap()[0].0,
Statement::Assignment(Assignment::new(
Identifier::new("foobar"),
Some(Type::ListExact(vec![Type::Integer, Type::String])),
Statement::Expression(Expression::Value(ValueNode::List(vec![
Expression::Value(ValueNode::Integer(42)),
Expression::Value(ValueNode::String("foo"))
])))
)),
);
}
2024-02-25 18:49:26 +00:00
#[test]
fn logic() {
assert_eq!(
parse(&lex("x == 1").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("x")),
Expression::Value(ValueNode::Integer(1))
))))
2024-02-25 18:49:26 +00:00
);
2024-02-28 22:02:30 +00:00
assert_eq!(
parse(&lex("(x == 1) && (y == 2)").unwrap()).unwrap()[0].0,
Statement::Expression(Expression::Logic(Box::new(Logic::And(
Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("x")),
Expression::Value(ValueNode::Integer(1))
))),
Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("y")),
Expression::Value(ValueNode::Integer(2))
))),
))))
);
assert_eq!(
parse(&lex("(x == 1) && (y == 2) && true").unwrap()).unwrap()[0].0,
Statement::Expression(Expression::Logic(Box::new(Logic::And(
Expression::Logic(Box::new(Logic::And(
Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("x")),
Expression::Value(ValueNode::Integer(1))
))),
Expression::Logic(Box::new(Logic::Equal(
Expression::Identifier(Identifier::new("y")),
Expression::Value(ValueNode::Integer(2))
))),
))),
Expression::Value(ValueNode::Boolean(true))
))))
);
2024-02-25 18:49:26 +00:00
}
#[test]
fn r#enum() {
assert_eq!(
parse(&lex("Option::None").unwrap()).unwrap()[0].0,
Statement::Expression(Expression::Value(ValueNode::Enum(
Identifier::new("Option"),
Identifier::new("None")
)))
);
}
2024-02-25 18:49:26 +00:00
#[test]
fn list() {
assert_eq!(
parse(&lex("[]").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::List(Vec::with_capacity(0))))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("[42]").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::List(vec![Expression::Value(
ValueNode::Integer(42)
)])))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("[42, 'foo', 'bar', [1, 2, 3,]]").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::List(vec![
Expression::Value(ValueNode::Integer(42)),
Expression::Value(ValueNode::String("foo")),
Expression::Value(ValueNode::String("bar")),
Expression::Value(ValueNode::List(vec![
Expression::Value(ValueNode::Integer(1)),
Expression::Value(ValueNode::Integer(2)),
Expression::Value(ValueNode::Integer(3)),
2024-02-25 18:49:26 +00:00
]))
2024-02-26 21:27:01 +00:00
])),)
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn r#true() {
assert_eq!(
parse(&lex("true").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Boolean(true)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn r#false() {
assert_eq!(
parse(&lex("false").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Boolean(false)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn positive_float() {
assert_eq!(
parse(&lex("0.0").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(0.0)))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("42.0").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(42.0)))
2024-02-25 18:49:26 +00:00
);
let max_float = f64::MAX.to_string() + ".0";
assert_eq!(
parse(&lex(&max_float).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::MAX)))
2024-02-25 18:49:26 +00:00
);
let min_positive_float = f64::MIN_POSITIVE.to_string();
assert_eq!(
parse(&lex(&min_positive_float).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::MIN_POSITIVE)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn negative_float() {
assert_eq!(
parse(&lex("-0.0").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(-0.0)))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("-42.0").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(-42.0)))
2024-02-25 18:49:26 +00:00
);
let min_float = f64::MIN.to_string() + ".0";
assert_eq!(
parse(&lex(&min_float).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::MIN)))
2024-02-25 18:49:26 +00:00
);
let max_negative_float = format!("-{}", f64::MIN_POSITIVE);
assert_eq!(
parse(&lex(&max_negative_float).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(-f64::MIN_POSITIVE)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn other_float() {
assert_eq!(
parse(&lex("Infinity").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::INFINITY)))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("-Infinity").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Float(f64::NEG_INFINITY)))
2024-02-25 18:49:26 +00:00
);
2024-02-26 21:27:01 +00:00
if let Statement::Expression(Expression::Value(ValueNode::Float(float))) =
&parse(&lex("NaN").unwrap()).unwrap()[0].0
{
assert!(float.is_nan());
} else {
panic!("Expected a float.");
2024-02-25 18:49:26 +00:00
}
}
#[test]
fn positive_integer() {
for i in 0..10 {
let source = i.to_string();
2024-03-06 20:36:58 +00:00
let tokens = lex(&source).unwrap();
let statements = parse(&tokens).unwrap();
2024-02-25 18:49:26 +00:00
2024-02-26 21:27:01 +00:00
assert_eq!(
statements[0].0,
Statement::Expression(Expression::Value(ValueNode::Integer(i)))
)
2024-02-25 18:49:26 +00:00
}
assert_eq!(
parse(&lex("42").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(42)))
2024-02-25 18:49:26 +00:00
);
let maximum_integer = i64::MAX.to_string();
assert_eq!(
parse(&lex(&maximum_integer).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(i64::MAX)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn negative_integer() {
for i in -9..1 {
let source = i.to_string();
2024-03-06 20:36:58 +00:00
let tokens = lex(&source).unwrap();
let statements = parse(&tokens).unwrap();
2024-02-25 18:49:26 +00:00
2024-02-26 21:27:01 +00:00
assert_eq!(
statements[0].0,
Statement::Expression(Expression::Value(ValueNode::Integer(i)))
)
2024-02-25 18:49:26 +00:00
}
assert_eq!(
parse(&lex("-42").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(-42)))
2024-02-25 18:49:26 +00:00
);
let minimum_integer = i64::MIN.to_string();
assert_eq!(
parse(&lex(&minimum_integer).unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::Integer(i64::MIN)))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn double_quoted_string() {
assert_eq!(
parse(&lex("\"\"").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("\"42\"").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("42")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("\"foobar\"").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("foobar")))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn single_quoted_string() {
assert_eq!(
parse(&lex("''").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("'42'").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("42")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("'foobar'").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("foobar")))
2024-02-25 18:49:26 +00:00
);
}
#[test]
fn grave_quoted_string() {
assert_eq!(
parse(&lex("``").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("`42`").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("42")))
2024-02-25 18:49:26 +00:00
);
assert_eq!(
parse(&lex("`foobar`").unwrap()).unwrap()[0].0,
2024-02-26 21:27:01 +00:00
Statement::Expression(Expression::Value(ValueNode::String("foobar")))
2024-02-25 18:49:26 +00:00
);
}
}