From 67d68cd97457c9628248dc77d009aff7635d72c0 Mon Sep 17 00:00:00 2001 From: Sebastian Schmidt Date: Sat, 30 Mar 2019 11:54:19 +0100 Subject: [PATCH] Add string constants * Implement string parsing in tokenizer * Implement escape sequences `\"` and `\\` * Document changes * Update change log Relates to #35 --- CHANGELOG.md | 2 + README.md | 4 +- src/context/mod.rs | 4 +- src/error/display.rs | 1 + src/error/mod.rs | 5 ++- src/lib.rs | 6 +-- src/token/display.rs | 1 + src/token/mod.rs | 89 ++++++++++++++++++++++++++++++++------------ src/tree/mod.rs | 9 +++-- tests/integration.rs | 16 +++++++- 10 files changed, 100 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 510926b..b47d973 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ ### Added + * String constants + ### Removed ### Changed diff --git a/README.md b/README.md index b75b010..02ddeb4 100644 --- a/README.md +++ b/README.md @@ -221,12 +221,12 @@ They return the result as the type it was passed into the function. ### Values Operators take values as arguments and produce values as results. -Values can be boolean, integer or floating point numbers, tuples or the empty type. -Strings are supported as well, but there are no operations defined for them yet. +Values can be boolean, integer or floating point numbers, strings, tuples or the empty type. Values are denoted as displayed in the following table. | Value type | Example | |------------|---------| +| `Value::String` | `"abc"`, `""`, `"a\"b\\c"` | | `Value::Boolean` | `true`, `false` | | `Value::Int` | `3`, `-9`, `0`, `135412` | | `Value::Float` | `3.`, `.35`, `1.00`, `0.5`, `123.554` | diff --git a/src/context/mod.rs b/src/context/mod.rs index 72694f1..d58199b 100644 --- a/src/context/mod.rs +++ b/src/context/mod.rs @@ -1,9 +1,9 @@ use std::collections::HashMap; -use EvalexprError; -use EvalexprResult; use function::Function; use value::value_type::ValueType; +use EvalexprError; +use EvalexprResult; use crate::value::Value; diff --git a/src/error/display.rs b/src/error/display.rs index 9857dc0..3bfd640 100644 --- a/src/error/display.rs +++ b/src/error/display.rs @@ -82,6 +82,7 @@ impl fmt::Display for EvalexprError { write!(f, "Error modulating {} % {}", dividend, divisor) }, ContextNotManipulable => write!(f, "Cannot manipulate context"), + IllegalEscapeSequence(string) => write!(f, "Illegal escape sequence: {}", string), CustomMessage(message) => write!(f, "Error: {}", message), } } diff --git a/src/error/mod.rs b/src/error/mod.rs index 20ea316..106f0ff 100644 --- a/src/error/mod.rs +++ b/src/error/mod.rs @@ -6,7 +6,7 @@ //! They are meant as shortcuts to not write the same error checking code everywhere. use token::PartialToken; -use value::{TupleType, value_type::ValueType}; +use value::{value_type::ValueType, TupleType}; use crate::value::Value; @@ -163,6 +163,9 @@ pub enum EvalexprError { /// A modification was attempted on a `Context` that does not allow modifications. ContextNotManipulable, + /// An escape sequence within a string literal is illegal. + IllegalEscapeSequence(String), + /// A custom error explained by its message. CustomMessage(String), } diff --git a/src/lib.rs b/src/lib.rs index 51624d5..73cab86 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -208,12 +208,12 @@ //! ### Values //! //! Operators take values as arguments and produce values as results. -//! Values can be boolean, integer or floating point numbers, tuples or the empty type. -//! Strings are supported as well, but there are no operations defined for them yet. +//! Values can be boolean, integer or floating point numbers, strings, tuples or the empty type. //! Values are denoted as displayed in the following table. //! //! | Value type | Example | //! |------------|---------| +//! | `Value::String` | `"abc"`, `""`, `"a\"b\\c"` | //! | `Value::Boolean` | `true`, `false` | //! | `Value::Int` | `3`, `-9`, `0`, `135412` | //! | `Value::Float` | `3.`, `.35`, `1.00`, `0.5`, `123.554` | @@ -361,7 +361,7 @@ pub use function::Function; pub use interface::*; pub use tree::Node; pub use value::{ - EMPTY_VALUE, EmptyType, FloatType, IntType, TupleType, Value, value_type::ValueType, + value_type::ValueType, EmptyType, FloatType, IntType, TupleType, Value, EMPTY_VALUE, }; mod context; diff --git a/src/token/display.rs b/src/token/display.rs index 8d73954..f696e6c 100644 --- a/src/token/display.rs +++ b/src/token/display.rs @@ -38,6 +38,7 @@ impl fmt::Display for Token { Float(float) => float.fmt(f), Int(int) => int.fmt(f), Boolean(boolean) => boolean.fmt(f), + String(string) => fmt::Debug::fmt(string, f), } } } diff --git a/src/token/mod.rs b/src/token/mod.rs index 8d7a99e..d3a3fe4 100644 --- a/src/token/mod.rs +++ b/src/token/mod.rs @@ -38,6 +38,7 @@ pub enum Token { Float(FloatType), Int(IntType), Boolean(bool), + String(String), } #[derive(Clone, Debug, PartialEq)] @@ -63,6 +64,12 @@ fn char_to_partial_token(c: char) -> PartialToken { '%' => PartialToken::Token(Token::Percent), '^' => PartialToken::Token(Token::Hat), + '(' => PartialToken::Token(Token::LBrace), + ')' => PartialToken::Token(Token::RBrace), + + ',' => PartialToken::Token(Token::Comma), + ';' => PartialToken::Token(Token::Semicolon), + '=' => PartialToken::Eq, '!' => PartialToken::ExclamationMark, '>' => PartialToken::Gt, @@ -70,12 +77,6 @@ fn char_to_partial_token(c: char) -> PartialToken { '&' => PartialToken::Ampersand, '|' => PartialToken::VerticalBar, - '(' => PartialToken::Token(Token::LBrace), - ')' => PartialToken::Token(Token::RBrace), - - ',' => PartialToken::Token(Token::Comma), - ';' => PartialToken::Token(Token::Semicolon), - c => { if c.is_whitespace() { PartialToken::Whitespace @@ -118,6 +119,7 @@ impl Token { Token::Float(_) => true, Token::Int(_) => true, Token::Boolean(_) => true, + Token::String(_) => true, } } @@ -152,31 +154,70 @@ impl Token { Token::Float(_) => true, Token::Int(_) => true, Token::Boolean(_) => true, + Token::String(_) => true, } } } -/// Converts a string to a vector of partial tokens. -fn str_to_tokens(string: &str) -> Vec { - let mut result = Vec::new(); - for c in string.chars() { - let partial_token = char_to_partial_token(c); +/// Parses an escape sequence within a string literal. +fn parse_escape_sequence>(iter: &mut Iter) -> EvalexprResult { + match iter.next() { + Some('"') => Ok('"'), + Some('\\') => Ok('\\'), + Some(c) => Err(EvalexprError::IllegalEscapeSequence(format!("\\{}", c))), + None => Err(EvalexprError::IllegalEscapeSequence(format!("\\"))), + } +} - let if_let_successful = - if let (Some(PartialToken::Literal(last)), PartialToken::Literal(literal)) = - (result.last_mut(), &partial_token) - { - last.push_str(literal); - true - } else { - false - }; +/// Parses a string value from the given character iterator. +/// +/// The first character from the iterator is interpreted as first character of the string. +/// The string is terminated by a double quote `"`. +/// Occurrences of `"` within the string can be escaped with `\`. +/// The backslash needs to be escaped with another backslash `\`. +fn parse_string_literal>( + mut iter: &mut Iter, +) -> EvalexprResult { + let mut result = String::new(); - if !if_let_successful { - result.push(partial_token); + while let Some(c) = iter.next() { + match c { + '"' => break, + '\\' => result.push(parse_escape_sequence(&mut iter)?), + c => result.push(c), } } - result + + Ok(PartialToken::Token(Token::String(result))) +} + +/// Converts a string to a vector of partial tokens. +fn str_to_partial_tokens(string: &str) -> EvalexprResult> { + let mut result = Vec::new(); + let mut iter = string.chars().peekable(); + + while let Some(c) = iter.next() { + if c == '"' { + result.push(parse_string_literal(&mut iter)?); + } else { + let partial_token = char_to_partial_token(c); + + let if_let_successful = + if let (Some(PartialToken::Literal(last)), PartialToken::Literal(literal)) = + (result.last_mut(), &partial_token) + { + last.push_str(literal); + true + } else { + false + }; + + if !if_let_successful { + result.push(partial_token); + } + } + } + Ok(result) } /// Resolves all partial tokens by converting them to complex tokens. @@ -255,5 +296,5 @@ fn partial_tokens_to_tokens(mut tokens: &[PartialToken]) -> EvalexprResult EvalexprResult> { - partial_tokens_to_tokens(&str_to_tokens(string)) + partial_tokens_to_tokens(&str_to_partial_tokens(string)?) } diff --git a/src/tree/mod.rs b/src/tree/mod.rs index ee2300e..12b3de7 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -1,9 +1,9 @@ +use token::Token; +use value::{TupleType, EMPTY_VALUE}; use EmptyContext; use EmptyType; use FloatType; use IntType; -use token::Token; -use value::{EMPTY_VALUE, TupleType}; use crate::{ context::Context, @@ -397,9 +397,10 @@ pub(crate) fn tokens_to_operator_tree(tokens: Vec) -> EvalexprResult Some(Node::new(Const::new(Value::Float(number)))), - Token::Int(number) => Some(Node::new(Const::new(Value::Int(number)))), + Token::Float(float) => Some(Node::new(Const::new(Value::Float(float)))), + Token::Int(int) => Some(Node::new(Const::new(Value::Int(int)))), Token::Boolean(boolean) => Some(Node::new(Const::new(Value::Boolean(boolean)))), + Token::String(string) => Some(Node::new(Const::new(Value::String(string)))), }; if let Some(node) = node { diff --git a/tests/integration.rs b/tests/integration.rs index 7f21a31..42fa35c 100644 --- a/tests/integration.rs +++ b/tests/integration.rs @@ -1,6 +1,6 @@ extern crate evalexpr; -use evalexpr::{*, error::*}; +use evalexpr::{error::*, *}; #[test] fn test_unary_examples() { @@ -539,6 +539,20 @@ fn test_expression_chaining() { ); } +#[test] +fn test_strings() { + let mut context = HashMapContext::new(); + assert_eq!(eval("\"string\""), Ok(Value::from("string"))); + assert_eq!( + eval_with_context_mut("a = \"a string\"", &mut context), + Ok(Value::Empty) + ); + assert_eq!( + eval_boolean_with_context("a == \"a string\"", &context), + Ok(true) + ); +} + #[cfg(feature = "serde")] #[test] fn test_serde() {