Add character literal tokens; Change strings to double quotes only

This commit is contained in:
Jeff 2024-08-23 05:24:48 -04:00
parent 23782c37a3
commit 0d0a2d2237
4 changed files with 203 additions and 121 deletions

View File

@ -132,7 +132,29 @@ impl<'src> Lexer<'src> {
}
'a'..='z' | 'A'..='Z' => self.lex_alphanumeric()?,
'"' => self.lex_string('"')?,
'\'' => self.lex_string('\'')?,
'\'' => {
self.position += 1;
if let Some(c) = self.peek_char() {
self.position += 1;
if let Some('\'') = self.peek_char() {
self.position += 1;
(Token::Character(c), (self.position - 3, self.position))
} else {
return Err(LexError::ExpectedCharacter {
expected: '\'',
actual: c,
position: self.position,
});
}
} else {
return Err(LexError::UnexpectedEndOfFile {
position: self.position,
});
}
}
'+' => {
if let Some('=') = self.peek_second_char() {
self.position += 2;
@ -247,7 +269,7 @@ impl<'src> Lexer<'src> {
self.position += 1;
return Err(LexError::UnexpectedCharacter {
character: c,
actual: c,
position: self.position,
});
}
@ -266,7 +288,7 @@ impl<'src> Lexer<'src> {
self.position += 1;
return Err(LexError::UnexpectedCharacter {
character: c,
actual: c,
position: self.position,
});
}
@ -285,7 +307,7 @@ impl<'src> Lexer<'src> {
self.position += 1;
return Err(LexError::UnexpectedCharacter {
character: c,
actual: c,
position: self.position,
});
}
@ -301,7 +323,7 @@ impl<'src> Lexer<'src> {
pub fn peek_token(&mut self) -> Result<(Token<'src>, Span), LexError> {
let token = self.next_token()?;
self.position -= token.0.as_str().len();
self.position -= token.0.len();
Ok(token)
}
@ -465,21 +487,26 @@ impl<'src> Lexer<'src> {
#[derive(Debug, PartialEq, Clone)]
pub enum LexError {
UnexpectedCharacter { character: char, position: usize },
ExpectedCharacter {
expected: char,
actual: char,
position: usize,
},
UnexpectedCharacter {
actual: char,
position: usize,
},
UnexpectedEndOfFile {
position: usize,
},
}
impl LexError {
pub fn position(&self) -> Span {
match self {
Self::UnexpectedCharacter { position, .. } => (*position, *position),
}
}
}
impl Error for LexError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::UnexpectedCharacter { .. } => None,
Self::ExpectedCharacter { position, .. } => (*position, *position + 1),
Self::UnexpectedCharacter { position, .. } => (*position, *position + 1),
Self::UnexpectedEndOfFile { position } => (*position, *position),
}
}
}
@ -487,8 +514,20 @@ impl Error for LexError {
impl Display for LexError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Self::UnexpectedCharacter { character, .. } => {
write!(f, "Unexpected character: '{}'", character)
Self::ExpectedCharacter {
expected,
actual,
position,
} => write!(
f,
"Expected character '{}' at {:?}, found '{}'",
expected, position, actual
),
Self::UnexpectedCharacter { actual, position } => {
write!(f, "Unexpected character at {:?}: '{}'", position, actual)
}
Self::UnexpectedEndOfFile { position } => {
write!(f, "Unexpected end of file at {:?}", position)
}
}
}
@ -498,9 +537,19 @@ impl Display for LexError {
mod tests {
use super::*;
#[test]
fn character() {
let input = "'a'";
assert_eq!(
lex(input),
Ok(vec![(Token::Character('a'), (0, 3)), (Token::Eof, (3, 3)),])
);
}
#[test]
fn map_expression() {
let input = "map { x = '1', y = 2, z = 3.0 }";
let input = "map { x = \"1\", y = 2, z = 3.0 }";
assert_eq!(
lex(input),
@ -791,7 +840,7 @@ mod tests {
#[test]
fn block() {
let input = "{ x = 42; y = 'foobar' }";
let input = "{ x = 42; y = \"foobar\" }";
assert_eq!(
lex(input),
@ -855,27 +904,6 @@ mod tests {
)
}
#[test]
fn map() {
let input = "{ x = 42, y = 'foobar' }";
assert_eq!(
lex(input),
Ok(vec![
(Token::LeftCurlyBrace, (0, 1)),
(Token::Identifier("x"), (2, 3)),
(Token::Equal, (4, 5)),
(Token::Integer("42"), (6, 8)),
(Token::Comma, (8, 9)),
(Token::Identifier("y"), (10, 11)),
(Token::Equal, (12, 13)),
(Token::String("foobar"), (14, 22)),
(Token::RightCurlyBrace, (23, 24)),
(Token::Eof, (24, 24)),
])
)
}
#[test]
fn greater_than() {
let input = ">";
@ -1030,7 +1058,7 @@ mod tests {
#[test]
fn write_line() {
let input = "write_line('Hello, world!')";
let input = "write_line(\"Hello, world!\")";
assert_eq!(
lex(input),
@ -1046,7 +1074,7 @@ mod tests {
#[test]
fn string_concatenation() {
let input = "'Hello, ' + 'world!'";
let input = "\"Hello, \" + \"world!\"";
assert_eq!(
lex(input),
@ -1061,7 +1089,7 @@ mod tests {
#[test]
fn string() {
let input = "'Hello, world!'";
let input = "\"Hello, world!\"";
assert_eq!(
lex(input),

View File

@ -1116,15 +1116,6 @@ impl ParseError {
}
}
impl Error for ParseError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::Lex(error) => Some(error),
_ => None,
}
}
}
impl Display for ParseError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
@ -1215,7 +1206,7 @@ mod tests {
#[test]
fn map_expression() {
let source = "map { x = '1', y = 2, z = 3.0 }";
let source = "map { x = \"1\", y = 2, z = 3.0 }";
assert_eq!(
parse(source),
@ -1361,7 +1352,7 @@ mod tests {
#[test]
fn tuple_struct_access() {
let source = "Foo(42, 'bar').0";
let source = "Foo(42, \"bar\").0";
assert_eq!(
parse(source),
@ -1827,7 +1818,7 @@ mod tests {
#[test]
fn block_with_assignment() {
let source = "{ foo = 42; bar = 42; baz = '42' }";
let source = "{ foo = 42; bar = 42; baz = \"42\" }";
assert_eq!(
parse(source),
@ -2000,7 +1991,7 @@ mod tests {
#[test]
fn string_concatenation() {
let source = "'Hello, ' + 'World!'";
let source = "\"Hello, \" + \"World!\"";
assert_eq!(
parse(source),

View File

@ -1,5 +1,8 @@
//! Token and TokenOwned types.
use std::fmt::{self, Display, Formatter};
use std::{
borrow::Borrow,
fmt::{self, Display, Formatter},
};
use serde::{Deserialize, Serialize};
@ -11,6 +14,7 @@ pub enum Token<'src> {
// Hard-coded values
Boolean(&'src str),
Character(char),
Float(&'src str),
Identifier(&'src str),
Integer(&'src str),
@ -64,6 +68,61 @@ pub enum Token<'src> {
}
impl<'src> Token<'src> {
#[allow(clippy::len_without_is_empty)]
pub fn len(&self) -> usize {
match self {
Token::Eof => 0,
Token::Boolean(text) => text.len(),
Token::Character(_) => 3,
Token::Float(text) => text.len(),
Token::Identifier(text) => text.len(),
Token::Integer(text) => text.len(),
Token::String(text) => text.len() + 2,
Token::Async => 5,
Token::Bool => 4,
Token::Break => 5,
Token::Else => 4,
Token::FloatKeyword => 5,
Token::If => 2,
Token::Int => 3,
Token::Let => 3,
Token::Loop => 4,
Token::Map => 3,
Token::Mut => 3,
Token::Str => 3,
Token::Struct => 6,
Token::While => 5,
Token::BangEqual => 2,
Token::Bang => 1,
Token::Colon => 1,
Token::Comma => 1,
Token::Dot => 1,
Token::DoubleAmpersand => 2,
Token::DoubleDot => 2,
Token::DoubleEqual => 2,
Token::DoublePipe => 2,
Token::Equal => 1,
Token::Greater => 1,
Token::GreaterEqual => 2,
Token::LeftCurlyBrace => 1,
Token::LeftParenthesis => 1,
Token::LeftSquareBrace => 1,
Token::Less => 1,
Token::LessEqual => 2,
Token::Minus => 1,
Token::MinusEqual => 2,
Token::Percent => 1,
Token::Plus => 1,
Token::PlusEqual => 2,
Token::RightCurlyBrace => 1,
Token::RightParenthesis => 1,
Token::RightSquareBrace => 1,
Token::Semicolon => 1,
Token::Slash => 1,
Token::Star => 1,
}
}
pub fn to_owned(&self) -> TokenOwned {
match self {
Token::Async => TokenOwned::Async,
@ -72,6 +131,7 @@ impl<'src> Token<'src> {
Token::Bool => TokenOwned::Bool,
Token::Boolean(boolean) => TokenOwned::Boolean(boolean.to_string()),
Token::Break => TokenOwned::Break,
Token::Character(character) => TokenOwned::Character(*character),
Token::Colon => TokenOwned::Colon,
Token::Comma => TokenOwned::Comma,
Token::Dot => TokenOwned::Dot,
@ -117,60 +177,6 @@ impl<'src> Token<'src> {
}
}
pub fn as_str(&self) -> &str {
match self {
Token::Boolean(boolean_text) => boolean_text,
Token::Float(float_text) => float_text,
Token::Identifier(text) => text,
Token::Integer(integer_text) => integer_text,
Token::String(text) => text,
Token::Async => "async",
Token::BangEqual => "!=",
Token::Bang => "!",
Token::Bool => "bool",
Token::Break => "break",
Token::Colon => ":",
Token::Comma => ",",
Token::Dot => ".",
Token::DoubleAmpersand => "&&",
Token::DoubleDot => "..",
Token::DoubleEqual => "==",
Token::DoublePipe => "||",
Token::Else => "else",
Token::Eof => "EOF",
Token::Equal => "=",
Token::FloatKeyword => "float",
Token::Greater => ">",
Token::GreaterEqual => ">=",
Token::If => "if",
Token::Int => "int",
Token::LeftCurlyBrace => "{",
Token::LeftParenthesis => "(",
Token::LeftSquareBrace => "[",
Token::Let => "let",
Token::Less => "<",
Token::LessEqual => "<=",
Token::Loop => "loop",
Token::Map => "map",
Token::Minus => "-",
Token::MinusEqual => "-=",
Token::Mut => "mut",
Token::Percent => "%",
Token::Plus => "+",
Token::PlusEqual => "+=",
Token::RightCurlyBrace => "}",
Token::RightParenthesis => ")",
Token::RightSquareBrace => "]",
Token::Semicolon => ";",
Token::Star => "*",
Token::Slash => "/",
Token::Str => "str",
Token::Struct => "struct",
Token::While => "while",
}
}
pub fn kind(&self) -> TokenKind {
match self {
Token::Async => TokenKind::Async,
@ -179,6 +185,7 @@ impl<'src> Token<'src> {
Token::Bool => TokenKind::Bool,
Token::Boolean(_) => TokenKind::Boolean,
Token::Break => TokenKind::Break,
Token::Character(_) => TokenKind::Character,
Token::Colon => TokenKind::Colon,
Token::Comma => TokenKind::Comma,
Token::Dot => TokenKind::Dot,
@ -279,7 +286,57 @@ impl<'src> Token<'src> {
impl<'src> Display for Token<'src> {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "{}", self.as_str())
match self {
Token::Async => write!(f, "async"),
Token::BangEqual => write!(f, "!="),
Token::Bang => write!(f, "!"),
Token::Bool => write!(f, "bool"),
Token::Boolean(value) => write!(f, "{}", value),
Token::Break => write!(f, "break"),
Token::Character(value) => write!(f, "'{}'", value),
Token::Colon => write!(f, ":"),
Token::Comma => write!(f, ","),
Token::Dot => write!(f, "."),
Token::DoubleAmpersand => write!(f, "&&"),
Token::DoubleDot => write!(f, ".."),
Token::DoubleEqual => write!(f, "=="),
Token::DoublePipe => write!(f, "||"),
Token::Else => write!(f, "else"),
Token::Eof => write!(f, "EOF"),
Token::Equal => write!(f, "="),
Token::Float(value) => write!(f, "{}", value),
Token::FloatKeyword => write!(f, "float"),
Token::Greater => write!(f, ">"),
Token::GreaterEqual => write!(f, ">="),
Token::Identifier(value) => write!(f, "{}", value),
Token::If => write!(f, "if"),
Token::Int => write!(f, "int"),
Token::Integer(value) => write!(f, "{}", value),
Token::LeftCurlyBrace => write!(f, "{{"),
Token::LeftParenthesis => write!(f, "("),
Token::LeftSquareBrace => write!(f, "["),
Token::Let => write!(f, "let"),
Token::Less => write!(f, "<"),
Token::LessEqual => write!(f, "<="),
Token::Loop => write!(f, "loop"),
Token::Map => write!(f, "map"),
Token::Minus => write!(f, "-"),
Token::MinusEqual => write!(f, "-="),
Token::Mut => write!(f, "mut"),
Token::Percent => write!(f, "%"),
Token::Plus => write!(f, "+"),
Token::PlusEqual => write!(f, "+="),
Token::RightCurlyBrace => write!(f, "}}"),
Token::RightParenthesis => write!(f, ")"),
Token::RightSquareBrace => write!(f, "]"),
Token::Semicolon => write!(f, ";"),
Token::Slash => write!(f, "/"),
Token::Star => write!(f, "*"),
Token::Str => write!(f, "str"),
Token::String(value) => write!(f, "\"{}\"", value),
Token::Struct => write!(f, "struct"),
Token::While => write!(f, "while"),
}
}
}
@ -294,6 +351,7 @@ pub enum TokenOwned {
// Hard-coded values
Boolean(String),
Character(char),
Float(String),
Integer(String),
String(String),
@ -354,6 +412,7 @@ impl Display for TokenOwned {
TokenOwned::Bool => Token::Bool.fmt(f),
TokenOwned::Boolean(boolean) => Token::Boolean(boolean).fmt(f),
TokenOwned::Break => Token::Break.fmt(f),
TokenOwned::Character(character) => Token::Character(*character).fmt(f),
TokenOwned::Colon => Token::Colon.fmt(f),
TokenOwned::Comma => Token::Comma.fmt(f),
TokenOwned::Dot => Token::Dot.fmt(f),
@ -409,6 +468,7 @@ pub enum TokenKind {
// Hard-coded values
Boolean,
Character,
Float,
Integer,
String,
@ -469,6 +529,7 @@ impl Display for TokenKind {
TokenKind::Bool => Token::Bool.fmt(f),
TokenKind::Boolean => write!(f, "boolean value"),
TokenKind::Break => Token::Break.fmt(f),
TokenKind::Character => write!(f, "character value"),
TokenKind::Colon => Token::Colon.fmt(f),
TokenKind::Comma => Token::Comma.fmt(f),
TokenKind::Dot => Token::Dot.fmt(f),

View File

@ -113,7 +113,11 @@ impl Vm {
statement: Statement,
collect_garbage: bool,
) -> Result<Option<Evaluation>, RuntimeError> {
log::debug!("Running statement: {}", statement);
log::trace!(
"Running statement at {:?}: {}",
statement.position(),
statement
);
let position = statement.position();
let result = match statement {
@ -232,7 +236,11 @@ impl Vm {
expression: Expression,
collect_garbage: bool,
) -> Result<Evaluation, RuntimeError> {
log::debug!("Running expression: {}", expression);
log::trace!(
"Running expression at {:?}: {}",
expression.position(),
expression
);
let position = expression.position();
let evaluation_result = match expression {
@ -291,8 +299,6 @@ impl Vm {
}
fn run_identifier(&self, identifier: Node<Identifier>) -> Result<Evaluation, RuntimeError> {
log::debug!("Running identifier: {}", identifier);
let get_data = self.context.get_data(&identifier.inner).map_err(|error| {
RuntimeError::ContextError {
error,
@ -323,8 +329,6 @@ impl Vm {
struct_expression: StructExpression,
collect_garbage: bool,
) -> Result<Evaluation, RuntimeError> {
log::debug!("Running struct expression: {struct_expression}");
let StructExpression::Fields { name, fields } = struct_expression;
let position = name.position;
@ -723,8 +727,6 @@ impl Vm {
call_expression: CallExpression,
collect_garbage: bool,
) -> Result<Evaluation, RuntimeError> {
log::debug!("Running call expression: {call_expression}");
let CallExpression { invoker, arguments } = call_expression;
let invoker_position = invoker.position();
@ -1409,7 +1411,7 @@ mod tests {
#[test]
fn string_index() {
let input = "'foo'[0]";
let input = "\"foo\"[0]";
assert_eq!(run(input), Ok(Some(Value::character('f'))));
}