1
0
dust/src/token.rs
2023-08-22 11:40:50 -04:00

563 lines
17 KiB
Rust

use std::fmt::{self, Display, Formatter};
use crate::error::{Error, Result};
#[derive(Clone, PartialEq, Debug)]
pub enum Token {
// Arithmetic
Plus,
Minus,
Star,
Slash,
Percent,
Hat,
// Logic
Eq,
Neq,
Gt,
Lt,
Geq,
Leq,
And,
Or,
Not,
// Precedence
LBrace,
RBrace,
// Assignment
Assign,
PlusAssign,
MinusAssign,
StarAssign,
SlashAssign,
PercentAssign,
HatAssign,
AndAssign,
OrAssign,
// Special
Comma,
Semicolon,
Yield(String, String),
// Values, Variables and Functions
Identifier(String),
Float(f64),
Int(i64),
Boolean(bool),
String(String),
Function(String),
}
/// A partial token is an input character whose meaning depends on the characters around it.
#[derive(Clone, Debug, PartialEq)]
pub enum PartialToken {
/// A partial token that unambiguously maps to a single token.
Token(Token),
/// A partial token that is a literal.
Literal(String),
/// A plus character '+'.
Plus,
/// A minus character '-'.
Minus,
/// A star character '*'.
Star,
/// A slash character '/'.
Slash,
/// A percent character '%'.
Percent,
/// A hat character '^'.
Hat,
/// A whitespace character, e.g. ' '.
Whitespace,
/// An equal-to character '='.
Eq,
/// An exclamation mark character '!'.
ExclamationMark,
/// A greater-than character '>'.
Gt,
/// A lower-than character '<'.
Lt,
/// An ampersand character '&'.
Ampersand,
/// A vertical bar character '|'.
VerticalBar,
}
// Make this a const fn as soon as is_whitespace and to_string get stable (issue #57563)
fn char_to_partial_token(c: char) -> PartialToken {
match c {
'+' => PartialToken::Plus,
'-' => PartialToken::Minus,
'*' => PartialToken::Star,
'/' => PartialToken::Slash,
'%' => PartialToken::Percent,
'^' => PartialToken::Hat,
'(' => PartialToken::Token(Token::LBrace),
')' => PartialToken::Token(Token::RBrace),
',' => PartialToken::Token(Token::Comma),
';' => PartialToken::Token(Token::Semicolon),
'=' => PartialToken::Eq,
'!' => PartialToken::ExclamationMark,
'>' => PartialToken::Gt,
'<' => PartialToken::Lt,
'&' => PartialToken::Ampersand,
'|' => PartialToken::VerticalBar,
c => {
if c.is_whitespace() {
PartialToken::Whitespace
} else {
PartialToken::Literal(c.to_string())
}
}
}
}
impl Token {
#[cfg(not(tarpaulin_include))]
pub(crate) const fn is_leftsided_value(&self) -> bool {
match self {
Token::Plus => false,
Token::Minus => false,
Token::Star => false,
Token::Slash => false,
Token::Percent => false,
Token::Hat => false,
Token::Eq => false,
Token::Neq => false,
Token::Gt => false,
Token::Lt => false,
Token::Geq => false,
Token::Leq => false,
Token::And => false,
Token::Or => false,
Token::Not => false,
Token::LBrace => true,
Token::RBrace => false,
Token::Comma => false,
Token::Semicolon => false,
Token::Yield(_, _) => false,
Token::Assign => false,
Token::PlusAssign => false,
Token::MinusAssign => false,
Token::StarAssign => false,
Token::SlashAssign => false,
Token::PercentAssign => false,
Token::HatAssign => false,
Token::AndAssign => false,
Token::OrAssign => false,
Token::Identifier(_) => true,
Token::Float(_) => true,
Token::Int(_) => true,
Token::Boolean(_) => true,
Token::String(_) => true,
Token::Function(_) => true,
}
}
#[cfg(not(tarpaulin_include))]
pub(crate) const fn is_rightsided_value(&self) -> bool {
match self {
Token::Plus => false,
Token::Minus => false,
Token::Star => false,
Token::Slash => false,
Token::Percent => false,
Token::Hat => false,
Token::Eq => false,
Token::Neq => false,
Token::Gt => false,
Token::Lt => false,
Token::Geq => false,
Token::Leq => false,
Token::And => false,
Token::Or => false,
Token::Not => false,
Token::LBrace => false,
Token::RBrace => true,
Token::Comma => false,
Token::Semicolon => false,
Token::Yield(_, _) => false,
Token::Assign => false,
Token::PlusAssign => false,
Token::MinusAssign => false,
Token::StarAssign => false,
Token::SlashAssign => false,
Token::PercentAssign => false,
Token::HatAssign => false,
Token::AndAssign => false,
Token::OrAssign => false,
Token::Identifier(_) => true,
Token::Float(_) => true,
Token::Int(_) => true,
Token::Boolean(_) => true,
Token::String(_) => true,
Token::Function(_) => true,
}
}
#[cfg(not(tarpaulin_include))]
pub(crate) fn is_assignment(&self) -> bool {
use Token::*;
matches!(
self,
Assign
| PlusAssign
| MinusAssign
| StarAssign
| SlashAssign
| PercentAssign
| HatAssign
| AndAssign
| OrAssign
)
}
}
/// Parses an escape sequence within a string literal.
fn parse_escape_sequence<Iter: Iterator<Item = char>>(iter: &mut Iter) -> Result<char> {
match iter.next() {
Some('"') => Ok('"'),
Some('\\') => Ok('\\'),
Some(c) => Err(Error::IllegalEscapeSequence(format!("\\{}", c))),
None => Err(Error::IllegalEscapeSequence("\\".to_string())),
}
}
/// Parses a string value from the given character iterator.
///
/// The first character from the iterator is interpreted as first character of the string.
/// The string is terminated by a double quote `"`.
/// Occurrences of `"` within the string can be escaped with `\`.
/// The backslash needs to be escaped with another backslash `\`.
fn parse_string_literal<Iter: Iterator<Item = char>>(mut iter: &mut Iter) -> Result<PartialToken> {
let mut result = String::new();
while let Some(c) = iter.next() {
match c {
'"' => break,
'\\' => result.push(parse_escape_sequence(&mut iter)?),
c => result.push(c),
}
}
Ok(PartialToken::Token(Token::String(result)))
}
fn parse_function<Iter: Iterator<Item = char>>(mut iter: &mut Iter) -> Result<PartialToken> {
let mut result = String::new();
while let Some(c) = iter.next() {
match c {
'\'' => break,
'\\' => result.push(parse_escape_sequence(&mut iter)?),
c => result.push(c),
}
}
Ok(PartialToken::Token(Token::Function(result)))
}
/// Converts a string to a vector of partial tokens.
fn str_to_partial_tokens(string: &str) -> Result<Vec<PartialToken>> {
let mut result = Vec::new();
let mut iter = string.chars().peekable();
while let Some(c) = iter.next() {
if c == '"' {
result.push(parse_string_literal(&mut iter)?);
} else if c == '\'' {
result.push(parse_function(&mut iter)?)
} else {
let partial_token = char_to_partial_token(c);
let if_let_successful =
if let (Some(PartialToken::Literal(last)), PartialToken::Literal(literal)) =
(result.last_mut(), &partial_token)
{
last.push_str(literal);
true
} else {
false
};
if !if_let_successful {
result.push(partial_token);
}
}
}
Ok(result)
}
/// Resolves all partial tokens by converting them to complex tokens.
fn partial_tokens_to_tokens(mut tokens: &[PartialToken]) -> Result<Vec<Token>> {
let mut result = Vec::new();
while !tokens.is_empty() {
let first = tokens[0].clone();
let second = tokens.get(1).cloned();
let third = tokens.get(2).cloned();
let mut cutoff = 2;
result.extend(
match first {
PartialToken::Token(token) => {
cutoff = 1;
Some(token)
}
PartialToken::Plus => match second {
Some(PartialToken::Eq) => Some(Token::PlusAssign),
_ => {
cutoff = 1;
Some(Token::Plus)
}
},
PartialToken::Minus => match second {
Some(PartialToken::Eq) => Some(Token::MinusAssign),
_ => {
cutoff = 1;
Some(Token::Minus)
}
},
PartialToken::Star => match second {
Some(PartialToken::Eq) => Some(Token::StarAssign),
_ => {
cutoff = 1;
Some(Token::Star)
}
},
PartialToken::Slash => match second {
Some(PartialToken::Eq) => Some(Token::SlashAssign),
_ => {
cutoff = 1;
Some(Token::Slash)
}
},
PartialToken::Percent => match second {
Some(PartialToken::Eq) => Some(Token::PercentAssign),
_ => {
cutoff = 1;
Some(Token::Percent)
}
},
PartialToken::Hat => match second {
Some(PartialToken::Eq) => Some(Token::HatAssign),
_ => {
cutoff = 1;
Some(Token::Hat)
}
},
PartialToken::Literal(literal) => {
cutoff = 1;
if let Ok(number) = parse_dec_or_hex(&literal) {
Some(Token::Int(number))
} else if let Ok(number) = literal.parse::<f64>() {
Some(Token::Float(number))
} else if let Ok(boolean) = literal.parse::<bool>() {
Some(Token::Boolean(boolean))
} else {
// If there are two tokens following this one, check if the next one is
// a plus or a minus. If so, then attempt to parse all three tokens as a
// scientific notation number of the form `<coefficient>e{+,-}<exponent>`,
// for example [Literal("10e"), Minus, Literal("3")] => "1e-3".parse().
match (second, third) {
(Some(second), Some(third))
if second == PartialToken::Minus
|| second == PartialToken::Plus =>
{
if let Ok(number) =
format!("{}{}{}", literal, second, third).parse::<f64>()
{
cutoff = 3;
Some(Token::Float(number))
} else {
Some(Token::Identifier(literal.to_string()))
}
}
_ => Some(Token::Identifier(literal.to_string())),
}
}
}
PartialToken::Whitespace => {
cutoff = 1;
None
}
PartialToken::Eq => match second {
Some(PartialToken::Eq) => Some(Token::Eq),
_ => {
cutoff = 1;
Some(Token::Assign)
}
},
PartialToken::ExclamationMark => match second {
Some(PartialToken::Eq) => Some(Token::Neq),
_ => {
cutoff = 1;
Some(Token::Not)
}
},
PartialToken::Gt => match second {
Some(PartialToken::Eq) => Some(Token::Geq),
_ => {
cutoff = 1;
Some(Token::Gt)
}
},
PartialToken::Lt => match second {
Some(PartialToken::Eq) => Some(Token::Leq),
_ => {
cutoff = 1;
Some(Token::Lt)
}
},
PartialToken::Ampersand => match second {
Some(PartialToken::Ampersand) => match third {
Some(PartialToken::Eq) => {
cutoff = 3;
Some(Token::AndAssign)
}
_ => Some(Token::And),
},
_ => return Err(Error::unmatched_partial_token(first, second)),
},
PartialToken::VerticalBar => match second {
Some(PartialToken::VerticalBar) => match third {
Some(PartialToken::Eq) => {
cutoff = 3;
Some(Token::OrAssign)
}
_ => Some(Token::Or),
},
_ => return Err(Error::unmatched_partial_token(first, second)),
},
}
.into_iter(),
);
tokens = &tokens[cutoff..];
}
Ok(result)
}
impl Display for Token {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use self::Token::*;
match self {
Plus => write!(f, "+"),
Minus => write!(f, "-"),
Star => write!(f, "*"),
Slash => write!(f, "/"),
Percent => write!(f, "%"),
Hat => write!(f, "^"),
// Logic
Eq => write!(f, "=="),
Neq => write!(f, "!="),
Gt => write!(f, ">"),
Lt => write!(f, "<"),
Geq => write!(f, ">="),
Leq => write!(f, "<="),
And => write!(f, "&&"),
Or => write!(f, "||"),
Not => write!(f, "!"),
// Precedence
LBrace => write!(f, "("),
RBrace => write!(f, ")"),
// Assignment
Assign => write!(f, "="),
PlusAssign => write!(f, "+="),
MinusAssign => write!(f, "-="),
StarAssign => write!(f, "*="),
SlashAssign => write!(f, "/="),
PercentAssign => write!(f, "%="),
HatAssign => write!(f, "^="),
AndAssign => write!(f, "&&="),
OrAssign => write!(f, "||="),
// Special
Comma => write!(f, ","),
Semicolon => write!(f, ";"),
// Values => write!(f, ""), Variables and Functions
Identifier(identifier) => identifier.fmt(f),
Float(float) => float.fmt(f),
Int(int) => int.fmt(f),
Boolean(boolean) => boolean.fmt(f),
String(string) => fmt::Debug::fmt(string, f),
Function(string) => write!(f, "'{string}'"),
Yield(_, _) => todo!(),
}
}
}
impl Display for PartialToken {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
use self::PartialToken::*;
match self {
Token(token) => token.fmt(f),
Literal(literal) => literal.fmt(f),
Whitespace => write!(f, " "),
Plus => write!(f, "+"),
Minus => write!(f, "-"),
Star => write!(f, "*"),
Slash => write!(f, "/"),
Percent => write!(f, "%"),
Hat => write!(f, "^"),
Eq => write!(f, "="),
ExclamationMark => write!(f, "!"),
Gt => write!(f, ">"),
Lt => write!(f, "<"),
Ampersand => write!(f, "&"),
VerticalBar => write!(f, "|"),
}
}
}
pub(crate) fn tokenize(string: &str) -> Result<Vec<Token>> {
partial_tokens_to_tokens(&str_to_partial_tokens(string)?)
}
fn parse_dec_or_hex(literal: &str) -> std::result::Result<i64, std::num::ParseIntError> {
if let Some(literal) = literal.strip_prefix("0x") {
literal.parse()
} else {
literal.parse()
}
}
#[cfg(test)]
mod tests {
use crate::token::{tokenize, Token};
#[test]
fn assignment_lhs_is_identifier() {
let tokens = tokenize("a = 1").unwrap();
assert_eq!(
tokens.as_slice(),
[
Token::Identifier("a".to_string()),
Token::Assign,
Token::Int(1)
]
);
}
}