Continue parser experiment

This commit is contained in:
Jeff 2024-02-23 12:14:15 -05:00
parent bdbd1fc412
commit bec6eb5aeb
3 changed files with 420 additions and 114 deletions

59
Cargo.lock generated
View File

@ -14,6 +14,15 @@ dependencies = [
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.16"
@ -44,12 +53,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.9.3"
version = "1.0.0-alpha.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9"
checksum = "b9c28d4e5dd9a9262a38b231153591da6ce1471b818233f4727985d3dd0ed93c"
dependencies = [
"hashbrown",
"regex-automata",
"serde",
"stacker",
"unicode-ident",
]
[[package]]
@ -76,6 +88,12 @@ version = "0.2.153"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
[[package]]
name = "memchr"
version = "2.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
[[package]]
name = "once_cell"
version = "1.19.0"
@ -109,6 +127,43 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "regex-automata"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]]
name = "serde"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.197"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "stacker"
version = "0.1.15"

View File

@ -14,4 +14,4 @@ opt-level = 3
[dependencies]
ariadne = "0.4.0"
chumsky = "0.9.3"
chumsky = "1.0.0-alpha.6"

View File

@ -6,6 +6,50 @@ use std::{
use chumsky::{prelude::*, Parser};
#[derive(Clone, Debug, PartialEq)]
pub enum Statement {
Assignment(Box<Assignment>),
Expression(Expression),
Sequence(Vec<Statement>),
}
impl Statement {
pub fn value(value: Value) -> Statement {
Statement::Expression(Expression::Value(value))
}
}
#[derive(Clone, Debug, PartialEq)]
pub struct Assignment {
identifier: Identifier,
statement: Statement,
}
#[derive(Clone, Debug, PartialEq)]
pub enum Expression {
Logic(Box<Logic>),
Value(Value),
}
#[derive(Clone, Debug, PartialEq)]
pub struct Logic {
left: Expression,
operator: LogicOperator,
right: Expression,
}
#[derive(Clone, Debug, PartialEq)]
pub enum LogicOperator {
Equal,
NotEqual,
Greater,
Less,
GreaterOrEqual,
LessOrEqual,
And,
Or,
}
#[derive(Clone, Debug, PartialEq)]
pub enum Value {
Boolean(bool),
@ -20,6 +64,12 @@ pub enum Value {
#[derive(Clone, Debug, PartialEq)]
pub struct Identifier(String);
impl Identifier {
pub fn new(text: impl ToString) -> Self {
Identifier(text.to_string())
}
}
impl Display for Value {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
@ -34,109 +84,221 @@ impl Display for Value {
}
}
pub fn parser() -> impl Parser<char, Value, Error = Simple<char>> {
let boolean = just("true")
.or(just("false"))
.map(|s: &str| Value::Boolean(s.parse().unwrap()));
pub fn parser<'src>() -> impl Parser<'src, &'src str, Statement> {
let value = recursive(|value| {
let boolean = just("true")
.or(just("false"))
.map(|s: &str| Value::Boolean(s.parse().unwrap()));
let float_numeric = just('-')
.or_not()
.then(text::int(10))
.then(just('.').then(text::digits(10)))
.map(|((negative, before), (_, after))| {
let combined = before + "." + &after;
let float_numeric = just('-')
.or_not()
.then(text::int(10))
.then(just('.').then(text::digits(10)))
.to_slice()
.map(|text: &str| Value::Float(text.parse().unwrap()));
if negative.is_some() {
Value::Float(-combined.parse::<f64>().unwrap())
} else {
Value::Float(combined.parse().unwrap())
}
});
let float_other = choice((just("Infinity"), just("-Infinity"), just("NaN")))
.map(|text| Value::Float(text.parse().unwrap()));
let float_other = choice((just("Infinity"), just("-Infinity"), just("NaN")))
.map(|text| Value::Float(text.parse().unwrap()));
let float = choice((float_numeric, float_other));
let float = choice((float_numeric, float_other));
let integer = just('-').or_not().then(text::int(10).padded()).map(
|(negative, integer_text): (Option<char>, &str)| {
let integer = integer_text.parse::<i64>().unwrap();
let integer = just('-')
.or_not()
.then(text::int(10).padded())
.map(|(c, s)| {
if let Some(c) = c {
c.to_string() + &s
} else {
s
}
})
.map(|s: String| Value::Integer(s.parse().unwrap()));
if negative.is_some() {
Value::Integer(-integer)
} else {
Value::Integer(integer)
}
},
);
let delimited_string = |delimiter| {
just(delimiter)
.ignore_then(none_of(delimiter).repeated())
.then_ignore(just(delimiter))
.map(|chars| Value::String(chars.into_iter().collect()))
};
let delimited_string = |delimiter| {
just(delimiter)
.ignore_then(none_of(delimiter).repeated())
.then_ignore(just(delimiter))
.to_slice()
.map(|text: &str| Value::String(text.to_string()))
};
let string = choice((
delimited_string('\''),
delimited_string('"'),
delimited_string('`'),
));
let string = choice((
delimited_string('\''),
delimited_string('"'),
delimited_string('`'),
));
boolean.or(float).or(integer).or(string).then_ignore(end())
let list = value
.clone()
.separated_by(just(',').padded())
.allow_trailing()
.collect()
.padded()
.delimited_by(just('['), just(']'))
.map(|values| Value::List(values));
choice((boolean, float, integer, string, list))
});
let expression = recursive(|expression| {
let logic = expression
.clone()
.then(choice((
just("==").to(LogicOperator::Equal),
just("!=").to(LogicOperator::NotEqual),
just(">").to(LogicOperator::Greater),
just("<").to(LogicOperator::Less),
just(">=").to(LogicOperator::GreaterOrEqual),
just("<=").to(LogicOperator::LessOrEqual),
just("&&").to(LogicOperator::And),
just("||").to(LogicOperator::Or),
)))
.padded()
.then(expression)
.map(|((left, operator), right)| {
Expression::Logic(Box::new(Logic {
left,
operator,
right,
}))
});
let value = value.map(|value| Expression::Value(value));
choice((logic, value))
});
let statement = recursive(|statement| {
let assignment = text::ident()
.map(|text| Identifier::new(text))
.then(just("=").padded())
.then(statement)
.map(|((identifier, _), statement)| {
Statement::Assignment(Box::new(Assignment {
identifier,
statement,
}))
});
let expression = expression.map(|expression| Statement::Expression(expression));
choice((assignment, expression))
});
statement.then_ignore(end())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_list() {
assert_eq!(
parser().parse("[]").unwrap(),
Statement::value(Value::List(vec![]))
);
assert_eq!(
parser().parse("[42]").unwrap(),
Statement::value(Value::List(vec![Value::Integer(42)]))
);
assert_eq!(
parser().parse("[42, 'foo', \"bar\", [1, 2, 3,]]").unwrap(),
Statement::value(Value::List(vec![
Value::Integer(42),
Value::String("foo".to_string()),
Value::String("bar".to_string()),
Value::List(vec![
Value::Integer(1),
Value::Integer(2),
Value::Integer(3),
])
]))
);
}
#[test]
fn parse_true() {
assert_eq!(parser().parse("true"), Ok(Value::Boolean(true)));
assert_eq!(
parser().parse("true").unwrap(),
Statement::value(Value::Boolean(true))
);
}
#[test]
fn parse_false() {
assert_eq!(parser().parse("false"), Ok(Value::Boolean(false)));
assert_eq!(
parser().parse("false").unwrap(),
Statement::value(Value::Boolean(false))
);
}
#[test]
fn parse_positive_float() {
assert_eq!(parser().parse("0.0"), Ok(Value::Float(0.0)));
assert_eq!(parser().parse("42.0"), Ok(Value::Float(42.0)));
assert_eq!(
parser().parse(f64::MAX.to_string() + ".0"),
Ok(Value::Float(f64::MAX))
parser().parse("0.0").unwrap(),
Statement::value(Value::Float(0.0))
);
assert_eq!(
parser().parse(f64::MIN_POSITIVE.to_string()),
Ok(Value::Float(f64::MIN_POSITIVE))
parser().parse("42.0").unwrap(),
Statement::value(Value::Float(42.0))
);
let max_float = f64::MAX.to_string() + ".0";
assert_eq!(
parser().parse(&max_float).unwrap(),
Statement::value(Value::Float(f64::MAX))
);
let min_positive_float = f64::MIN_POSITIVE.to_string();
assert_eq!(
parser().parse(&min_positive_float).unwrap(),
Statement::value(Value::Float(f64::MIN_POSITIVE))
);
}
#[test]
fn parse_negative_float() {
assert_eq!(parser().parse("-0.0"), Ok(Value::Float(-0.0)));
assert_eq!(parser().parse("-42.0"), Ok(Value::Float(-42.0)));
assert_eq!(
parser().parse(f64::MIN.to_string() + ".0"),
Ok(Value::Float(f64::MIN))
parser().parse("-0.0").unwrap(),
Statement::value(Value::Float(-0.0))
);
assert_eq!(
parser().parse("-".to_string() + &f64::MIN_POSITIVE.to_string()),
Ok(Value::Float(-f64::MIN_POSITIVE))
parser().parse("-42.0").unwrap(),
Statement::value(Value::Float(-42.0))
);
let min_float = f64::MIN.to_string() + ".0";
assert_eq!(
parser().parse(&min_float).unwrap(),
Statement::value(Value::Float(f64::MIN))
);
let max_negative_float = f64::MIN_POSITIVE.to_string();
assert_eq!(
parser().parse(&max_negative_float).unwrap(),
Statement::value(Value::Float(-f64::MIN_POSITIVE))
);
}
#[test]
fn parse_other_float() {
assert_eq!(parser().parse("Infinity"), Ok(Value::Float(f64::INFINITY)));
assert_eq!(
parser().parse("-Infinity"),
Ok(Value::Float(f64::NEG_INFINITY))
parser().parse("Infinity").unwrap(),
Statement::value(Value::Float(f64::INFINITY))
);
assert_eq!(
parser().parse("-Infinity").unwrap(),
Statement::value(Value::Float(f64::NEG_INFINITY))
);
if let Value::Float(float) = parser().parse("NaN").unwrap() {
if let Statement::Expression(Expression::Value(Value::Float(float))) =
parser().parse("NaN").unwrap()
{
assert!(float.is_nan())
} else {
panic!("Expected a float.")
@ -145,82 +307,171 @@ mod tests {
#[test]
fn parse_positive_integer() {
let parser = parser();
assert_eq!(parser.parse("0"), Ok(Value::Integer(0)));
assert_eq!(parser.parse("1"), Ok(Value::Integer(1)));
assert_eq!(parser.parse("2"), Ok(Value::Integer(2)));
assert_eq!(parser.parse("3"), Ok(Value::Integer(3)));
assert_eq!(parser.parse("4"), Ok(Value::Integer(4)));
assert_eq!(parser.parse("5"), Ok(Value::Integer(5)));
assert_eq!(parser.parse("6"), Ok(Value::Integer(6)));
assert_eq!(parser.parse("7"), Ok(Value::Integer(7)));
assert_eq!(parser.parse("8"), Ok(Value::Integer(8)));
assert_eq!(parser.parse("9"), Ok(Value::Integer(9)));
assert_eq!(parser.parse("42"), Ok(Value::Integer(42)));
assert_eq!(
parser.parse(i64::MAX.to_string()),
Ok(Value::Integer(i64::MAX))
parser().parse("0").unwrap(),
Statement::value(Value::Integer(0))
);
assert_eq!(
parser().parse("1").unwrap(),
Statement::value(Value::Integer(1))
);
assert_eq!(
parser().parse("2").unwrap(),
Statement::value(Value::Integer(2))
);
assert_eq!(
parser().parse("3").unwrap(),
Statement::value(Value::Integer(3))
);
assert_eq!(
parser().parse("4").unwrap(),
Statement::value(Value::Integer(4))
);
assert_eq!(
parser().parse("5").unwrap(),
Statement::value(Value::Integer(5))
);
assert_eq!(
parser().parse("6").unwrap(),
Statement::value(Value::Integer(6))
);
assert_eq!(
parser().parse("7").unwrap(),
Statement::value(Value::Integer(7))
);
assert_eq!(
parser().parse("8").unwrap(),
Statement::value(Value::Integer(8))
);
assert_eq!(
parser().parse("9").unwrap(),
Statement::value(Value::Integer(9))
);
assert_eq!(
parser().parse("42").unwrap(),
Statement::value(Value::Integer(42))
);
let maximum_integer = i64::MAX.to_string();
assert_eq!(
parser().parse(&maximum_integer).unwrap(),
Statement::value(Value::Integer(i64::MAX))
);
}
#[test]
fn parse_negative_integer() {
let parser = parser();
assert_eq!(parser.parse("-0"), Ok(Value::Integer(-0)));
assert_eq!(parser.parse("-1"), Ok(Value::Integer(-1)));
assert_eq!(parser.parse("-2"), Ok(Value::Integer(-2)));
assert_eq!(parser.parse("-3"), Ok(Value::Integer(-3)));
assert_eq!(parser.parse("-4"), Ok(Value::Integer(-4)));
assert_eq!(parser.parse("-5"), Ok(Value::Integer(-5)));
assert_eq!(parser.parse("-6"), Ok(Value::Integer(-6)));
assert_eq!(parser.parse("-7"), Ok(Value::Integer(-7)));
assert_eq!(parser.parse("-8"), Ok(Value::Integer(-8)));
assert_eq!(parser.parse("-9"), Ok(Value::Integer(-9)));
assert_eq!(parser.parse("-42"), Ok(Value::Integer(-42)));
assert_eq!(
parser.parse(i64::MIN.to_string()),
Ok(Value::Integer(i64::MIN))
parser().parse("-0").unwrap(),
Statement::value(Value::Integer(-0))
);
assert_eq!(
parser().parse("-1").unwrap(),
Statement::value(Value::Integer(-1))
);
assert_eq!(
parser().parse("-2").unwrap(),
Statement::value(Value::Integer(-2))
);
assert_eq!(
parser().parse("-3").unwrap(),
Statement::value(Value::Integer(-3))
);
assert_eq!(
parser().parse("-4").unwrap(),
Statement::value(Value::Integer(-4))
);
assert_eq!(
parser().parse("-5").unwrap(),
Statement::value(Value::Integer(-5))
);
assert_eq!(
parser().parse("-6").unwrap(),
Statement::value(Value::Integer(-6))
);
assert_eq!(
parser().parse("-7").unwrap(),
Statement::value(Value::Integer(-7))
);
assert_eq!(
parser().parse("-8").unwrap(),
Statement::value(Value::Integer(-8))
);
assert_eq!(
parser().parse("-9").unwrap(),
Statement::value(Value::Integer(-9))
);
assert_eq!(
parser().parse("-42").unwrap(),
Statement::value(Value::Integer(-42))
);
let minimum_integer = i64::MIN.to_string();
assert_eq!(
parser().parse(&minimum_integer).unwrap(),
Statement::value(Value::Integer(i64::MIN))
);
}
#[test]
fn double_quoted_string() {
let parser = parser();
assert_eq!(parser.parse("\"\""), Ok(Value::String("".to_string())));
assert_eq!(parser.parse("\"1\""), Ok(Value::String("1".to_string())));
assert_eq!(parser.parse("\"42\""), Ok(Value::String("42".to_string())));
assert_eq!(
parser.parse("\"foobar\""),
Ok(Value::String("foobar".to_string()))
parser().parse("\"\"").unwrap(),
Statement::value(Value::String("".to_string()))
);
assert_eq!(
parser().parse("\"1\"").unwrap(),
Statement::value(Value::String("1".to_string()))
);
assert_eq!(
parser().parse("\"42\"").unwrap(),
Statement::value(Value::String("42".to_string()))
);
assert_eq!(
parser().parse("\"foobar\"").unwrap(),
Statement::value(Value::String("foobar".to_string()))
);
}
#[test]
fn single_quoted_string() {
let parser = parser();
assert_eq!(parser.parse("''"), Ok(Value::String("".to_string())));
assert_eq!(parser.parse("'1'"), Ok(Value::String("1".to_string())));
assert_eq!(parser.parse("'42'"), Ok(Value::String("42".to_string())));
assert_eq!(
parser.parse("'foobar'"),
Ok(Value::String("foobar".to_string()))
parser().parse("''").unwrap(),
Statement::value(Value::String("".to_string()))
);
assert_eq!(
parser().parse("'1'").unwrap(),
Statement::value(Value::String("1".to_string()))
);
assert_eq!(
parser().parse("'42'").unwrap(),
Statement::value(Value::String("42".to_string()))
);
assert_eq!(
parser().parse("'foobar'").unwrap(),
Statement::value(Value::String("foobar".to_string()))
);
}
#[test]
fn grave_quoted_string() {
let parser = parser();
assert_eq!(parser.parse("``"), Ok(Value::String("".to_string())));
assert_eq!(parser.parse("`1`"), Ok(Value::String("1".to_string())));
assert_eq!(parser.parse("`42`"), Ok(Value::String("42".to_string())));
assert_eq!(
parser.parse("`foobar`"),
Ok(Value::String("foobar".to_string()))
parser().parse("``").unwrap(),
Statement::value(Value::String("".to_string()))
);
assert_eq!(
parser().parse("`1`").unwrap(),
Statement::value(Value::String("1".to_string()))
);
assert_eq!(
parser().parse("`42`").unwrap(),
Statement::value(Value::String("42".to_string()))
);
assert_eq!(
parser().parse("`foobar`").unwrap(),
Statement::value(Value::String("foobar".to_string()))
);
}
}