From bec6eb5aeb8aa3cb68b636172a3811b831ad91ca Mon Sep 17 00:00:00 2001 From: Jeff Date: Fri, 23 Feb 2024 12:14:15 -0500 Subject: [PATCH] Continue parser experiment --- Cargo.lock | 59 ++++++- Cargo.toml | 2 +- src/lib.rs | 473 ++++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 420 insertions(+), 114 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 612dc6d..23253d9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -14,6 +14,15 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + [[package]] name = "allocator-api2" version = "0.2.16" @@ -44,12 +53,15 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chumsky" -version = "0.9.3" +version = "1.0.0-alpha.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8eebd66744a15ded14960ab4ccdbfb51ad3b81f51f3f04a80adac98c985396c9" +checksum = "b9c28d4e5dd9a9262a38b231153591da6ce1471b818233f4727985d3dd0ed93c" dependencies = [ "hashbrown", + "regex-automata", + "serde", "stacker", + "unicode-ident", ] [[package]] @@ -76,6 +88,12 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + [[package]] name = "once_cell" version = "1.19.0" @@ -109,6 +127,43 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "regex-automata" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da" + +[[package]] +name = "serde" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.197" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "stacker" version = "0.1.15" diff --git a/Cargo.toml b/Cargo.toml index a4bf0a9..5906cc8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,4 +14,4 @@ opt-level = 3 [dependencies] ariadne = "0.4.0" -chumsky = "0.9.3" +chumsky = "1.0.0-alpha.6" diff --git a/src/lib.rs b/src/lib.rs index 7481dca..fa90e03 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,50 @@ use std::{ use chumsky::{prelude::*, Parser}; +#[derive(Clone, Debug, PartialEq)] +pub enum Statement { + Assignment(Box), + Expression(Expression), + Sequence(Vec), +} + +impl Statement { + pub fn value(value: Value) -> Statement { + Statement::Expression(Expression::Value(value)) + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Assignment { + identifier: Identifier, + statement: Statement, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Expression { + Logic(Box), + Value(Value), +} + +#[derive(Clone, Debug, PartialEq)] +pub struct Logic { + left: Expression, + operator: LogicOperator, + right: Expression, +} + +#[derive(Clone, Debug, PartialEq)] +pub enum LogicOperator { + Equal, + NotEqual, + Greater, + Less, + GreaterOrEqual, + LessOrEqual, + And, + Or, +} + #[derive(Clone, Debug, PartialEq)] pub enum Value { Boolean(bool), @@ -20,6 +64,12 @@ pub enum Value { #[derive(Clone, Debug, PartialEq)] pub struct Identifier(String); +impl Identifier { + pub fn new(text: impl ToString) -> Self { + Identifier(text.to_string()) + } +} + impl Display for Value { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { @@ -34,109 +84,221 @@ impl Display for Value { } } -pub fn parser() -> impl Parser> { - let boolean = just("true") - .or(just("false")) - .map(|s: &str| Value::Boolean(s.parse().unwrap())); +pub fn parser<'src>() -> impl Parser<'src, &'src str, Statement> { + let value = recursive(|value| { + let boolean = just("true") + .or(just("false")) + .map(|s: &str| Value::Boolean(s.parse().unwrap())); - let float_numeric = just('-') - .or_not() - .then(text::int(10)) - .then(just('.').then(text::digits(10))) - .map(|((negative, before), (_, after))| { - let combined = before + "." + &after; + let float_numeric = just('-') + .or_not() + .then(text::int(10)) + .then(just('.').then(text::digits(10))) + .to_slice() + .map(|text: &str| Value::Float(text.parse().unwrap())); - if negative.is_some() { - Value::Float(-combined.parse::().unwrap()) - } else { - Value::Float(combined.parse().unwrap()) - } - }); + let float_other = choice((just("Infinity"), just("-Infinity"), just("NaN"))) + .map(|text| Value::Float(text.parse().unwrap())); - let float_other = choice((just("Infinity"), just("-Infinity"), just("NaN"))) - .map(|text| Value::Float(text.parse().unwrap())); + let float = choice((float_numeric, float_other)); - let float = choice((float_numeric, float_other)); + let integer = just('-').or_not().then(text::int(10).padded()).map( + |(negative, integer_text): (Option, &str)| { + let integer = integer_text.parse::().unwrap(); - let integer = just('-') - .or_not() - .then(text::int(10).padded()) - .map(|(c, s)| { - if let Some(c) = c { - c.to_string() + &s - } else { - s - } - }) - .map(|s: String| Value::Integer(s.parse().unwrap())); + if negative.is_some() { + Value::Integer(-integer) + } else { + Value::Integer(integer) + } + }, + ); - let delimited_string = |delimiter| { - just(delimiter) - .ignore_then(none_of(delimiter).repeated()) - .then_ignore(just(delimiter)) - .map(|chars| Value::String(chars.into_iter().collect())) - }; + let delimited_string = |delimiter| { + just(delimiter) + .ignore_then(none_of(delimiter).repeated()) + .then_ignore(just(delimiter)) + .to_slice() + .map(|text: &str| Value::String(text.to_string())) + }; - let string = choice(( - delimited_string('\''), - delimited_string('"'), - delimited_string('`'), - )); + let string = choice(( + delimited_string('\''), + delimited_string('"'), + delimited_string('`'), + )); - boolean.or(float).or(integer).or(string).then_ignore(end()) + let list = value + .clone() + .separated_by(just(',').padded()) + .allow_trailing() + .collect() + .padded() + .delimited_by(just('['), just(']')) + .map(|values| Value::List(values)); + + choice((boolean, float, integer, string, list)) + }); + + let expression = recursive(|expression| { + let logic = expression + .clone() + .then(choice(( + just("==").to(LogicOperator::Equal), + just("!=").to(LogicOperator::NotEqual), + just(">").to(LogicOperator::Greater), + just("<").to(LogicOperator::Less), + just(">=").to(LogicOperator::GreaterOrEqual), + just("<=").to(LogicOperator::LessOrEqual), + just("&&").to(LogicOperator::And), + just("||").to(LogicOperator::Or), + ))) + .padded() + .then(expression) + .map(|((left, operator), right)| { + Expression::Logic(Box::new(Logic { + left, + operator, + right, + })) + }); + + let value = value.map(|value| Expression::Value(value)); + + choice((logic, value)) + }); + + let statement = recursive(|statement| { + let assignment = text::ident() + .map(|text| Identifier::new(text)) + .then(just("=").padded()) + .then(statement) + .map(|((identifier, _), statement)| { + Statement::Assignment(Box::new(Assignment { + identifier, + statement, + })) + }); + + let expression = expression.map(|expression| Statement::Expression(expression)); + + choice((assignment, expression)) + }); + + statement.then_ignore(end()) } #[cfg(test)] mod tests { use super::*; + #[test] + fn parse_list() { + assert_eq!( + parser().parse("[]").unwrap(), + Statement::value(Value::List(vec![])) + ); + assert_eq!( + parser().parse("[42]").unwrap(), + Statement::value(Value::List(vec![Value::Integer(42)])) + ); + assert_eq!( + parser().parse("[42, 'foo', \"bar\", [1, 2, 3,]]").unwrap(), + Statement::value(Value::List(vec![ + Value::Integer(42), + Value::String("foo".to_string()), + Value::String("bar".to_string()), + Value::List(vec![ + Value::Integer(1), + Value::Integer(2), + Value::Integer(3), + ]) + ])) + ); + } + #[test] fn parse_true() { - assert_eq!(parser().parse("true"), Ok(Value::Boolean(true))); + assert_eq!( + parser().parse("true").unwrap(), + Statement::value(Value::Boolean(true)) + ); } #[test] fn parse_false() { - assert_eq!(parser().parse("false"), Ok(Value::Boolean(false))); + assert_eq!( + parser().parse("false").unwrap(), + Statement::value(Value::Boolean(false)) + ); } #[test] fn parse_positive_float() { - assert_eq!(parser().parse("0.0"), Ok(Value::Float(0.0))); - assert_eq!(parser().parse("42.0"), Ok(Value::Float(42.0))); assert_eq!( - parser().parse(f64::MAX.to_string() + ".0"), - Ok(Value::Float(f64::MAX)) + parser().parse("0.0").unwrap(), + Statement::value(Value::Float(0.0)) ); assert_eq!( - parser().parse(f64::MIN_POSITIVE.to_string()), - Ok(Value::Float(f64::MIN_POSITIVE)) + parser().parse("42.0").unwrap(), + Statement::value(Value::Float(42.0)) + ); + + let max_float = f64::MAX.to_string() + ".0"; + + assert_eq!( + parser().parse(&max_float).unwrap(), + Statement::value(Value::Float(f64::MAX)) + ); + + let min_positive_float = f64::MIN_POSITIVE.to_string(); + + assert_eq!( + parser().parse(&min_positive_float).unwrap(), + Statement::value(Value::Float(f64::MIN_POSITIVE)) ); } #[test] fn parse_negative_float() { - assert_eq!(parser().parse("-0.0"), Ok(Value::Float(-0.0))); - assert_eq!(parser().parse("-42.0"), Ok(Value::Float(-42.0))); assert_eq!( - parser().parse(f64::MIN.to_string() + ".0"), - Ok(Value::Float(f64::MIN)) + parser().parse("-0.0").unwrap(), + Statement::value(Value::Float(-0.0)) ); assert_eq!( - parser().parse("-".to_string() + &f64::MIN_POSITIVE.to_string()), - Ok(Value::Float(-f64::MIN_POSITIVE)) + parser().parse("-42.0").unwrap(), + Statement::value(Value::Float(-42.0)) + ); + + let min_float = f64::MIN.to_string() + ".0"; + + assert_eq!( + parser().parse(&min_float).unwrap(), + Statement::value(Value::Float(f64::MIN)) + ); + + let max_negative_float = f64::MIN_POSITIVE.to_string(); + + assert_eq!( + parser().parse(&max_negative_float).unwrap(), + Statement::value(Value::Float(-f64::MIN_POSITIVE)) ); } #[test] fn parse_other_float() { - assert_eq!(parser().parse("Infinity"), Ok(Value::Float(f64::INFINITY))); assert_eq!( - parser().parse("-Infinity"), - Ok(Value::Float(f64::NEG_INFINITY)) + parser().parse("Infinity").unwrap(), + Statement::value(Value::Float(f64::INFINITY)) + ); + assert_eq!( + parser().parse("-Infinity").unwrap(), + Statement::value(Value::Float(f64::NEG_INFINITY)) ); - if let Value::Float(float) = parser().parse("NaN").unwrap() { + if let Statement::Expression(Expression::Value(Value::Float(float))) = + parser().parse("NaN").unwrap() + { assert!(float.is_nan()) } else { panic!("Expected a float.") @@ -145,82 +307,171 @@ mod tests { #[test] fn parse_positive_integer() { - let parser = parser(); - - assert_eq!(parser.parse("0"), Ok(Value::Integer(0))); - assert_eq!(parser.parse("1"), Ok(Value::Integer(1))); - assert_eq!(parser.parse("2"), Ok(Value::Integer(2))); - assert_eq!(parser.parse("3"), Ok(Value::Integer(3))); - assert_eq!(parser.parse("4"), Ok(Value::Integer(4))); - assert_eq!(parser.parse("5"), Ok(Value::Integer(5))); - assert_eq!(parser.parse("6"), Ok(Value::Integer(6))); - assert_eq!(parser.parse("7"), Ok(Value::Integer(7))); - assert_eq!(parser.parse("8"), Ok(Value::Integer(8))); - assert_eq!(parser.parse("9"), Ok(Value::Integer(9))); - assert_eq!(parser.parse("42"), Ok(Value::Integer(42))); assert_eq!( - parser.parse(i64::MAX.to_string()), - Ok(Value::Integer(i64::MAX)) + parser().parse("0").unwrap(), + Statement::value(Value::Integer(0)) + ); + assert_eq!( + parser().parse("1").unwrap(), + Statement::value(Value::Integer(1)) + ); + assert_eq!( + parser().parse("2").unwrap(), + Statement::value(Value::Integer(2)) + ); + assert_eq!( + parser().parse("3").unwrap(), + Statement::value(Value::Integer(3)) + ); + assert_eq!( + parser().parse("4").unwrap(), + Statement::value(Value::Integer(4)) + ); + assert_eq!( + parser().parse("5").unwrap(), + Statement::value(Value::Integer(5)) + ); + assert_eq!( + parser().parse("6").unwrap(), + Statement::value(Value::Integer(6)) + ); + assert_eq!( + parser().parse("7").unwrap(), + Statement::value(Value::Integer(7)) + ); + assert_eq!( + parser().parse("8").unwrap(), + Statement::value(Value::Integer(8)) + ); + assert_eq!( + parser().parse("9").unwrap(), + Statement::value(Value::Integer(9)) + ); + assert_eq!( + parser().parse("42").unwrap(), + Statement::value(Value::Integer(42)) + ); + + let maximum_integer = i64::MAX.to_string(); + + assert_eq!( + parser().parse(&maximum_integer).unwrap(), + Statement::value(Value::Integer(i64::MAX)) ); } #[test] fn parse_negative_integer() { - let parser = parser(); - - assert_eq!(parser.parse("-0"), Ok(Value::Integer(-0))); - assert_eq!(parser.parse("-1"), Ok(Value::Integer(-1))); - assert_eq!(parser.parse("-2"), Ok(Value::Integer(-2))); - assert_eq!(parser.parse("-3"), Ok(Value::Integer(-3))); - assert_eq!(parser.parse("-4"), Ok(Value::Integer(-4))); - assert_eq!(parser.parse("-5"), Ok(Value::Integer(-5))); - assert_eq!(parser.parse("-6"), Ok(Value::Integer(-6))); - assert_eq!(parser.parse("-7"), Ok(Value::Integer(-7))); - assert_eq!(parser.parse("-8"), Ok(Value::Integer(-8))); - assert_eq!(parser.parse("-9"), Ok(Value::Integer(-9))); - assert_eq!(parser.parse("-42"), Ok(Value::Integer(-42))); assert_eq!( - parser.parse(i64::MIN.to_string()), - Ok(Value::Integer(i64::MIN)) + parser().parse("-0").unwrap(), + Statement::value(Value::Integer(-0)) + ); + assert_eq!( + parser().parse("-1").unwrap(), + Statement::value(Value::Integer(-1)) + ); + assert_eq!( + parser().parse("-2").unwrap(), + Statement::value(Value::Integer(-2)) + ); + assert_eq!( + parser().parse("-3").unwrap(), + Statement::value(Value::Integer(-3)) + ); + assert_eq!( + parser().parse("-4").unwrap(), + Statement::value(Value::Integer(-4)) + ); + assert_eq!( + parser().parse("-5").unwrap(), + Statement::value(Value::Integer(-5)) + ); + assert_eq!( + parser().parse("-6").unwrap(), + Statement::value(Value::Integer(-6)) + ); + assert_eq!( + parser().parse("-7").unwrap(), + Statement::value(Value::Integer(-7)) + ); + assert_eq!( + parser().parse("-8").unwrap(), + Statement::value(Value::Integer(-8)) + ); + assert_eq!( + parser().parse("-9").unwrap(), + Statement::value(Value::Integer(-9)) + ); + assert_eq!( + parser().parse("-42").unwrap(), + Statement::value(Value::Integer(-42)) + ); + + let minimum_integer = i64::MIN.to_string(); + + assert_eq!( + parser().parse(&minimum_integer).unwrap(), + Statement::value(Value::Integer(i64::MIN)) ); } #[test] fn double_quoted_string() { - let parser = parser(); - - assert_eq!(parser.parse("\"\""), Ok(Value::String("".to_string()))); - assert_eq!(parser.parse("\"1\""), Ok(Value::String("1".to_string()))); - assert_eq!(parser.parse("\"42\""), Ok(Value::String("42".to_string()))); assert_eq!( - parser.parse("\"foobar\""), - Ok(Value::String("foobar".to_string())) + parser().parse("\"\"").unwrap(), + Statement::value(Value::String("".to_string())) + ); + assert_eq!( + parser().parse("\"1\"").unwrap(), + Statement::value(Value::String("1".to_string())) + ); + assert_eq!( + parser().parse("\"42\"").unwrap(), + Statement::value(Value::String("42".to_string())) + ); + assert_eq!( + parser().parse("\"foobar\"").unwrap(), + Statement::value(Value::String("foobar".to_string())) ); } #[test] fn single_quoted_string() { - let parser = parser(); - - assert_eq!(parser.parse("''"), Ok(Value::String("".to_string()))); - assert_eq!(parser.parse("'1'"), Ok(Value::String("1".to_string()))); - assert_eq!(parser.parse("'42'"), Ok(Value::String("42".to_string()))); assert_eq!( - parser.parse("'foobar'"), - Ok(Value::String("foobar".to_string())) + parser().parse("''").unwrap(), + Statement::value(Value::String("".to_string())) + ); + assert_eq!( + parser().parse("'1'").unwrap(), + Statement::value(Value::String("1".to_string())) + ); + assert_eq!( + parser().parse("'42'").unwrap(), + Statement::value(Value::String("42".to_string())) + ); + assert_eq!( + parser().parse("'foobar'").unwrap(), + Statement::value(Value::String("foobar".to_string())) ); } #[test] fn grave_quoted_string() { - let parser = parser(); - - assert_eq!(parser.parse("``"), Ok(Value::String("".to_string()))); - assert_eq!(parser.parse("`1`"), Ok(Value::String("1".to_string()))); - assert_eq!(parser.parse("`42`"), Ok(Value::String("42".to_string()))); assert_eq!( - parser.parse("`foobar`"), - Ok(Value::String("foobar".to_string())) + parser().parse("``").unwrap(), + Statement::value(Value::String("".to_string())) + ); + assert_eq!( + parser().parse("`1`").unwrap(), + Statement::value(Value::String("1".to_string())) + ); + assert_eq!( + parser().parse("`42`").unwrap(), + Statement::value(Value::String("42".to_string())) + ); + assert_eq!( + parser().parse("`foobar`").unwrap(), + Statement::value(Value::String("foobar".to_string())) ); } }