dust/dust-lang/src/lex.rs

952 lines
25 KiB
Rust
Raw Normal View History

2024-08-05 04:54:12 +00:00
//! Lexing tools.
//!
//! This module provides two lexing options:
//! - [`lex`], which lexes the entire input and returns a vector of tokens and their positions
//! - [`Lexer`], which lexes the input a token at a time
2024-08-09 00:58:56 +00:00
use std::{
error::Error,
fmt::{self, Display, Formatter},
};
2024-08-04 23:41:00 +00:00
use crate::{Span, Token};
2024-08-04 00:23:52 +00:00
2024-08-07 16:32:18 +00:00
/// Lexes the input and return a vector of tokens and their positions.
2024-08-07 16:13:49 +00:00
///
/// # Examples
/// ```
/// # use dust_lang::*;
/// let input = "x = 1 + 2";
/// let tokens = lex(input).unwrap();
///
/// assert_eq!(
/// tokens,
/// [
/// (Token::Identifier("x"), (0, 1)),
2024-08-07 16:13:49 +00:00
/// (Token::Equal, (2, 3)),
2024-08-09 18:01:01 +00:00
/// (Token::Integer("1"), (4, 5)),
2024-08-07 16:13:49 +00:00
/// (Token::Plus, (6, 7)),
2024-08-09 18:01:01 +00:00
/// (Token::Integer("2"), (8, 9)),
2024-08-07 16:13:49 +00:00
/// (Token::Eof, (9, 9)),
/// ]
/// );
/// ```
pub fn lex<'chars, 'src: 'chars>(input: &'src str) -> Result<Vec<(Token<'chars>, Span)>, LexError> {
let mut lexer = Lexer::new();
2024-08-04 00:23:52 +00:00
let mut tokens = Vec::new();
loop {
let (token, span) = lexer.next_token(input)?;
2024-08-04 00:23:52 +00:00
let is_eof = matches!(token, Token::Eof);
tokens.push((token, span));
if is_eof {
break;
}
}
Ok(tokens)
}
#[derive(Debug, Clone)]
2024-08-05 04:54:12 +00:00
/// Low-level tool for lexing a single token at a time.
2024-08-07 16:13:49 +00:00
///
2024-08-09 00:19:07 +00:00
/// **Note**: It is a logic error to call `next_token` with different inputs.
///
2024-08-07 16:13:49 +00:00
/// # Examples
/// ```
/// # use dust_lang::*;
/// let input = "x = 1 + 2";
/// let mut lexer = Lexer::new();
2024-08-07 16:13:49 +00:00
/// let mut tokens = Vec::new();
///
/// loop {
/// let (token, span) = lexer.next_token(input).unwrap();
2024-08-07 16:13:49 +00:00
/// let is_eof = matches!(token, Token::Eof);
///
/// tokens.push((token, span));
///
/// if is_eof {
/// break;
/// }
/// }
///
/// assert_eq!(
/// tokens,
/// [
/// (Token::Identifier("x"), (0, 1)),
2024-08-07 16:13:49 +00:00
/// (Token::Equal, (2, 3)),
2024-08-09 18:01:01 +00:00
/// (Token::Integer("1"), (4, 5)),
2024-08-07 16:13:49 +00:00
/// (Token::Plus, (6, 7)),
2024-08-09 18:01:01 +00:00
/// (Token::Integer("2"), (8, 9)),
2024-08-07 16:13:49 +00:00
/// (Token::Eof, (9, 9)),
/// ]
/// )
/// ```
pub struct Lexer {
2024-08-04 00:23:52 +00:00
position: usize,
}
impl Lexer {
2024-08-05 04:54:12 +00:00
/// Create a new lexer for the given input.
pub fn new() -> Self {
Lexer { position: 0 }
2024-08-04 00:23:52 +00:00
}
2024-08-05 04:54:12 +00:00
/// Produce the next token.
2024-08-09 00:19:07 +00:00
///
/// It is a logic error to call this method with different inputs.
pub fn next_token<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {
self.skip_whitespace(source);
2024-08-04 00:23:52 +00:00
let (token, span) = if let Some(c) = self.peek_char(source) {
2024-08-04 00:23:52 +00:00
match c {
'0'..='9' => self.lex_number(source)?,
'-' => {
if let Some('0'..='9') = self.peek_second_char(source) {
self.lex_number(source)?
2024-08-09 04:31:38 +00:00
} else if "-Infinity" == self.peek_chars(source, 9) {
self.position += 9;
2024-08-09 05:43:58 +00:00
2024-08-09 04:31:38 +00:00
(
2024-08-09 18:01:01 +00:00
Token::Float("-Infinity"),
2024-08-09 04:31:38 +00:00
(self.position - 9, self.position),
)
} else {
self.position += 1;
2024-08-09 05:43:58 +00:00
(Token::Minus, (self.position - 1, self.position))
}
}
2024-08-09 11:02:55 +00:00
'a'..='z' | 'A'..='Z' => self.lex_alphanumeric(source)?,
'"' => self.lex_string('"', source)?,
'\'' => self.lex_string('\'', source)?,
2024-08-04 00:23:52 +00:00
'+' => {
2024-08-09 22:14:46 +00:00
if let Some('=') = self.peek_second_char(source) {
self.position += 2;
2024-08-09 05:43:58 +00:00
2024-08-09 22:14:46 +00:00
(Token::PlusEqual, (self.position - 2, self.position))
} else {
self.position += 1;
(Token::Plus, (self.position - 1, self.position))
}
2024-08-04 00:23:52 +00:00
}
'*' => {
self.position += 1;
2024-08-09 05:43:58 +00:00
2024-08-04 00:23:52 +00:00
(Token::Star, (self.position - 1, self.position))
}
'(' => {
self.position += 1;
2024-08-09 05:43:58 +00:00
2024-08-04 00:23:52 +00:00
(Token::LeftParenthesis, (self.position - 1, self.position))
}
')' => {
self.position += 1;
2024-08-09 05:43:58 +00:00
2024-08-04 00:23:52 +00:00
(Token::RightParenthesis, (self.position - 1, self.position))
}
'=' => {
2024-08-09 11:15:09 +00:00
if let Some('=') = self.peek_second_char(source) {
self.position += 2;
2024-08-09 05:43:58 +00:00
2024-08-09 11:15:09 +00:00
(Token::DoubleEqual, (self.position - 2, self.position))
} else {
self.position += 1;
(Token::Equal, (self.position - 1, self.position))
}
2024-08-04 00:23:52 +00:00
}
2024-08-05 01:31:18 +00:00
'[' => {
self.position += 1;
2024-08-09 05:43:58 +00:00
2024-08-05 01:31:18 +00:00
(Token::LeftSquareBrace, (self.position - 1, self.position))
}
']' => {
self.position += 1;
2024-08-09 05:43:58 +00:00
2024-08-05 01:31:18 +00:00
(Token::RightSquareBrace, (self.position - 1, self.position))
}
',' => {
self.position += 1;
2024-08-09 05:43:58 +00:00
2024-08-05 01:31:18 +00:00
(Token::Comma, (self.position - 1, self.position))
}
2024-08-05 18:31:08 +00:00
'.' => {
self.position += 1;
2024-08-09 05:43:58 +00:00
2024-08-05 18:31:08 +00:00
(Token::Dot, (self.position - 1, self.position))
}
'>' => {
if let Some('=') = self.peek_second_char(source) {
self.position += 2;
(Token::GreaterEqual, (self.position - 2, self.position))
} else {
self.position += 1;
(Token::Greater, (self.position - 1, self.position))
}
}
'<' => {
if let Some('=') = self.peek_second_char(source) {
self.position += 2;
(Token::LessEqual, (self.position - 2, self.position))
} else {
self.position += 1;
(Token::Less, (self.position - 1, self.position))
}
}
2024-08-09 09:18:39 +00:00
'{' => {
self.position += 1;
(Token::LeftCurlyBrace, (self.position - 1, self.position))
}
'}' => {
self.position += 1;
(Token::RightCurlyBrace, (self.position - 1, self.position))
}
2024-08-09 10:46:24 +00:00
'/' => {
self.position += 1;
(Token::Slash, (self.position - 1, self.position))
}
2024-08-09 11:02:55 +00:00
'%' => {
self.position += 1;
(Token::Percent, (self.position - 1, self.position))
}
2024-08-09 15:41:23 +00:00
'&' => {
if let Some('&') = self.peek_second_char(source) {
self.position += 2;
(Token::DoubleAmpersand, (self.position - 2, self.position))
} else {
self.position += 1;
return Err(LexError::UnexpectedCharacter {
character: c,
position: self.position,
});
2024-08-09 15:41:23 +00:00
}
}
';' => {
self.position += 1;
(Token::Semicolon, (self.position - 1, self.position))
}
2024-08-09 18:01:01 +00:00
'|' => {
if let Some('|') = self.peek_second_char(source) {
self.position += 2;
(Token::DoublePipe, (self.position - 2, self.position))
} else {
self.position += 1;
return Err(LexError::UnexpectedCharacter {
character: c,
position: self.position,
});
2024-08-09 18:01:01 +00:00
}
}
2024-08-09 05:43:58 +00:00
_ => {
self.position += 1;
return Err(LexError::UnexpectedCharacter {
character: c,
position: self.position,
});
2024-08-09 05:43:58 +00:00
}
2024-08-04 00:23:52 +00:00
}
} else {
(Token::Eof, (self.position, self.position))
};
Ok((token, span))
}
2024-08-09 18:01:01 +00:00
/// Peek at the next token without consuming the source.
pub fn peek_token<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {
let token = self.next_token(source)?;
self.position -= token.0.as_str().len();
Ok(token)
}
/// Progress to the next character.
fn next_char(&mut self, source: &str) -> Option<char> {
2024-08-09 05:55:34 +00:00
if let Some(c) = source[self.position..].chars().next() {
self.position += c.len_utf8();
2024-08-09 05:55:34 +00:00
Some(c)
} else {
None
}
}
2024-08-05 04:54:12 +00:00
/// Skip whitespace characters.
fn skip_whitespace(&mut self, source: &str) {
while let Some(c) = self.peek_char(source) {
2024-08-04 00:23:52 +00:00
if c.is_whitespace() {
self.next_char(source);
2024-08-04 00:23:52 +00:00
} else {
break;
}
}
}
2024-08-05 04:54:12 +00:00
/// Peek at the next character without consuming it.
fn peek_char(&self, source: &str) -> Option<char> {
source[self.position..].chars().next()
2024-08-04 00:23:52 +00:00
}
2024-08-05 22:34:20 +00:00
/// Peek at the second-to-next character without consuming it.
fn peek_second_char(&self, source: &str) -> Option<char> {
source[self.position..].chars().nth(1)
2024-08-07 14:41:27 +00:00
}
2024-08-09 04:31:38 +00:00
/// Peek the next `n` characters without consuming them.
fn peek_chars<'src>(&self, source: &'src str, n: usize) -> &'src str {
let remaining_source = &source[self.position..];
if remaining_source.len() < n {
remaining_source
} else {
&remaining_source[..n]
}
}
2024-08-05 04:54:12 +00:00
/// Lex an integer or float token.
fn lex_number<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {
2024-08-04 00:23:52 +00:00
let start_pos = self.position;
2024-08-04 23:41:00 +00:00
let mut is_float = false;
2024-08-04 00:23:52 +00:00
if let Some('-') = self.peek_char(source) {
self.next_char(source);
}
while let Some(c) = self.peek_char(source) {
2024-08-04 23:41:00 +00:00
if c == '.' {
if let Some('0'..='9') = self.peek_second_char(source) {
2024-08-05 22:34:20 +00:00
if !is_float {
self.next_char(source);
2024-08-05 22:34:20 +00:00
}
2024-08-04 23:41:00 +00:00
self.next_char(source);
2024-08-05 18:31:08 +00:00
2024-08-09 04:31:38 +00:00
loop {
let peek_char = self.peek_char(source);
if let Some('0'..='9') = peek_char {
self.next_char(source);
} else if let Some('e') = peek_char {
if let Some('0'..='9') = self.peek_second_char(source) {
self.next_char(source);
self.next_char(source);
} else {
break;
}
} else {
break;
}
2024-08-04 23:41:00 +00:00
}
2024-08-05 22:34:20 +00:00
is_float = true;
} else {
break;
2024-08-04 23:41:00 +00:00
}
}
2024-08-04 00:23:52 +00:00
if c.is_ascii_digit() {
self.next_char(source);
2024-08-04 00:23:52 +00:00
} else {
break;
}
}
2024-08-09 18:01:01 +00:00
let text = &source[start_pos..self.position];
2024-08-04 00:23:52 +00:00
2024-08-09 18:01:01 +00:00
if is_float {
Ok((Token::Float(text), (start_pos, self.position)))
2024-08-04 23:41:00 +00:00
} else {
2024-08-09 18:01:01 +00:00
Ok((Token::Integer(text), (start_pos, self.position)))
2024-08-04 23:41:00 +00:00
}
2024-08-04 00:23:52 +00:00
}
2024-08-05 04:54:12 +00:00
/// Lex an identifier token.
2024-08-09 11:02:55 +00:00
fn lex_alphanumeric<'src>(
&mut self,
source: &'src str,
) -> Result<(Token<'src>, Span), LexError> {
2024-08-04 00:23:52 +00:00
let start_pos = self.position;
while let Some(c) = self.peek_char(source) {
2024-08-09 11:02:55 +00:00
if c.is_ascii_alphanumeric() || c == '_' {
self.next_char(source);
2024-08-04 00:23:52 +00:00
} else {
break;
}
}
let string = &source[start_pos..self.position];
let token = match string {
2024-08-09 18:01:01 +00:00
"true" => Token::Boolean("true"),
"false" => Token::Boolean("false"),
"Infinity" => Token::Float("Infinity"),
"is_even" => Token::IsEven,
"is_odd" => Token::IsOdd,
"length" => Token::Length,
2024-08-09 18:01:01 +00:00
"NaN" => Token::Float("NaN"),
"read_line" => Token::ReadLine,
"write_line" => Token::WriteLine,
_ => Token::Identifier(string),
};
2024-08-04 00:23:52 +00:00
Ok((token, (start_pos, self.position)))
}
2024-08-08 17:08:53 +00:00
fn lex_string<'src>(
&mut self,
delimiter: char,
source: &'src str,
) -> Result<(Token<'src>, Span), LexError> {
2024-08-08 17:08:53 +00:00
let start_pos = self.position;
self.next_char(source);
2024-08-08 17:08:53 +00:00
while let Some(c) = self.peek_char(source) {
2024-08-08 17:08:53 +00:00
if c == delimiter {
self.next_char(source);
2024-08-08 17:08:53 +00:00
break;
} else {
self.next_char(source);
2024-08-08 17:08:53 +00:00
}
}
let text = &source[start_pos + 1..self.position - 1];
Ok((Token::String(text), (start_pos, self.position)))
}
}
impl Default for Lexer {
fn default() -> Self {
Self::new()
2024-08-08 17:08:53 +00:00
}
2024-08-04 00:23:52 +00:00
}
2024-08-04 23:25:44 +00:00
#[derive(Debug, PartialEq, Clone)]
pub enum LexError {
UnexpectedCharacter { character: char, position: usize },
}
impl LexError {
pub fn position(&self) -> Span {
match self {
Self::UnexpectedCharacter { position, .. } => (*position, *position),
}
}
2024-08-04 23:25:44 +00:00
}
2024-08-09 00:58:56 +00:00
impl Error for LexError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
Self::UnexpectedCharacter { .. } => None,
2024-08-09 00:58:56 +00:00
}
}
}
impl Display for LexError {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
match self {
Self::UnexpectedCharacter { character, .. } => {
2024-08-09 04:31:38 +00:00
write!(f, "Unexpected character: '{}'", character)
}
2024-08-09 00:58:56 +00:00
}
}
}
2024-08-04 23:25:44 +00:00
#[cfg(test)]
mod tests {
use super::*;
2024-08-09 22:14:46 +00:00
#[test]
fn add_assign() {
let input = "x += 42";
assert_eq!(
lex(input),
Ok(vec![
(Token::Identifier("x"), (0, 1)),
(Token::PlusEqual, (2, 4)),
(Token::Integer("42"), (5, 7)),
(Token::Eof, (7, 7)),
])
)
}
2024-08-09 18:01:01 +00:00
#[test]
fn or() {
let input = "true || false";
assert_eq!(
lex(input),
Ok(vec![
(Token::Boolean("true"), (0, 4)),
(Token::DoublePipe, (5, 7)),
(Token::Boolean("false"), (8, 13)),
(Token::Eof, (13, 13)),
])
)
}
2024-08-09 15:41:23 +00:00
#[test]
fn block() {
let input = "{ x = 42; y = 'foobar' }";
assert_eq!(
lex(input),
Ok(vec![
(Token::LeftCurlyBrace, (0, 1)),
(Token::Identifier("x"), (2, 3)),
(Token::Equal, (4, 5)),
2024-08-09 18:01:01 +00:00
(Token::Integer("42"), (6, 8)),
2024-08-09 15:41:23 +00:00
(Token::Semicolon, (8, 9)),
(Token::Identifier("y"), (10, 11)),
(Token::Equal, (12, 13)),
(Token::String("foobar"), (14, 22)),
(Token::RightCurlyBrace, (23, 24)),
(Token::Eof, (24, 24)),
])
)
}
2024-08-09 11:15:09 +00:00
#[test]
fn equal() {
let input = "42 == 42";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("42"), (0, 2)),
2024-08-09 11:15:09 +00:00
(Token::DoubleEqual, (3, 5)),
2024-08-09 18:01:01 +00:00
(Token::Integer("42"), (6, 8)),
2024-08-09 11:15:09 +00:00
(Token::Eof, (8, 8)),
])
)
}
2024-08-09 11:02:55 +00:00
#[test]
fn modulo() {
let input = "42 % 2";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("42"), (0, 2)),
2024-08-09 11:02:55 +00:00
(Token::Percent, (3, 4)),
2024-08-09 18:01:01 +00:00
(Token::Integer("2"), (5, 6)),
2024-08-09 11:02:55 +00:00
(Token::Eof, (6, 6)),
])
)
}
2024-08-09 10:46:24 +00:00
#[test]
fn divide() {
let input = "42 / 2";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("42"), (0, 2)),
2024-08-09 10:46:24 +00:00
(Token::Slash, (3, 4)),
2024-08-09 18:01:01 +00:00
(Token::Integer("2"), (5, 6)),
2024-08-09 10:46:24 +00:00
(Token::Eof, (6, 6)),
])
)
}
2024-08-09 09:18:39 +00:00
#[test]
fn map() {
let input = "{ x = 42, y = 'foobar' }";
assert_eq!(
lex(input),
Ok(vec![
(Token::LeftCurlyBrace, (0, 1)),
(Token::Identifier("x"), (2, 3)),
(Token::Equal, (4, 5)),
2024-08-09 18:01:01 +00:00
(Token::Integer("42"), (6, 8)),
2024-08-09 09:18:39 +00:00
(Token::Comma, (8, 9)),
(Token::Identifier("y"), (10, 11)),
(Token::Equal, (12, 13)),
(Token::String("foobar"), (14, 22)),
(Token::RightCurlyBrace, (23, 24)),
(Token::Eof, (24, 24)),
])
)
}
#[test]
fn greater_than() {
let input = ">";
assert_eq!(
lex(input),
Ok(vec![(Token::Greater, (0, 1)), (Token::Eof, (1, 1))])
)
}
#[test]
fn greater_than_or_equal() {
let input = ">=";
assert_eq!(
lex(input),
Ok(vec![(Token::GreaterEqual, (0, 2)), (Token::Eof, (2, 2))])
)
}
#[test]
fn less_than() {
let input = "<";
assert_eq!(
lex(input),
Ok(vec![(Token::Less, (0, 1)), (Token::Eof, (1, 1))])
)
}
#[test]
fn less_than_or_equal() {
let input = "<=";
assert_eq!(
lex(input),
Ok(vec![(Token::LessEqual, (0, 2)), (Token::Eof, (2, 2))])
)
}
2024-08-09 04:31:38 +00:00
#[test]
fn infinity() {
let input = "Infinity";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Float("Infinity"), (0, 8)),
2024-08-09 04:31:38 +00:00
(Token::Eof, (8, 8)),
])
)
}
#[test]
fn negative_infinity() {
let input = "-Infinity";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Float("-Infinity"), (0, 9)),
2024-08-09 04:31:38 +00:00
(Token::Eof, (9, 9)),
])
)
}
#[test]
fn nan() {
let input = "NaN";
2024-08-09 18:01:01 +00:00
assert!(lex(input).is_ok_and(|tokens| tokens[0].0 == Token::Float("NaN")));
2024-08-09 04:31:38 +00:00
}
#[test]
fn complex_float() {
let input = "42.42e42";
assert_eq!(
lex(input),
2024-08-09 18:01:01 +00:00
Ok(vec![
(Token::Float("42.42e42"), (0, 8)),
(Token::Eof, (8, 8)),
])
2024-08-09 04:31:38 +00:00
)
}
#[test]
fn max_integer() {
let input = "9223372036854775807";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("9223372036854775807"), (0, 19)),
(Token::Eof, (19, 19)),
])
)
}
#[test]
fn min_integer() {
let input = "-9223372036854775808";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("-9223372036854775808"), (0, 20)),
(Token::Eof, (20, 20)),
])
)
}
#[test]
fn subtract_negative_integers() {
let input = "-42 - -42";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("-42"), (0, 3)),
(Token::Minus, (4, 5)),
2024-08-09 18:01:01 +00:00
(Token::Integer("-42"), (6, 9)),
(Token::Eof, (9, 9)),
])
)
}
#[test]
fn negative_integer() {
let input = "-42";
assert_eq!(
lex(input),
2024-08-09 18:01:01 +00:00
Ok(vec![(Token::Integer("-42"), (0, 3)), (Token::Eof, (3, 3))])
)
}
2024-08-08 17:57:53 +00:00
#[test]
fn read_line() {
let input = "read_line()";
assert_eq!(
lex(input),
Ok(vec![
(Token::ReadLine, (0, 9)),
(Token::LeftParenthesis, (9, 10)),
(Token::RightParenthesis, (10, 11)),
(Token::Eof, (11, 11)),
])
)
}
#[test]
fn write_line() {
let input = "write_line('Hello, world!')";
assert_eq!(
lex(input),
Ok(vec![
(Token::WriteLine, (0, 10)),
(Token::LeftParenthesis, (10, 11)),
(Token::String("Hello, world!"), (11, 26)),
2024-08-08 17:57:53 +00:00
(Token::RightParenthesis, (26, 27)),
(Token::Eof, (27, 27)),
])
)
}
2024-08-08 17:08:53 +00:00
#[test]
fn string_concatenation() {
let input = "'Hello, ' + 'world!'";
assert_eq!(
lex(input),
Ok(vec![
(Token::String("Hello, "), (0, 9)),
2024-08-08 17:08:53 +00:00
(Token::Plus, (10, 11)),
(Token::String("world!"), (12, 20)),
2024-08-08 17:08:53 +00:00
(Token::Eof, (20, 20)),
])
)
}
#[test]
fn string() {
let input = "'Hello, world!'";
assert_eq!(
lex(input),
Ok(vec![
(Token::String("Hello, world!"), (0, 15)),
2024-08-08 17:08:53 +00:00
(Token::Eof, (15, 15)),
])
)
}
2024-08-07 14:41:27 +00:00
#[test]
fn r#true() {
let input = "true";
assert_eq!(
lex(input),
2024-08-09 18:01:01 +00:00
Ok(vec![(Token::Boolean("true"), (0, 4)), (Token::Eof, (4, 4)),])
2024-08-07 14:41:27 +00:00
)
}
#[test]
fn r#false() {
let input = "false";
assert_eq!(
lex(input),
2024-08-09 18:01:01 +00:00
Ok(vec![
(Token::Boolean("false"), (0, 5)),
(Token::Eof, (5, 5))
])
2024-08-07 14:41:27 +00:00
)
}
2024-08-05 22:34:20 +00:00
#[test]
fn property_access_function_call() {
let input = "42.is_even()";
2024-08-05 22:34:20 +00:00
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("42"), (0, 2)),
2024-08-05 22:34:20 +00:00
(Token::Dot, (2, 3)),
(Token::IsEven, (3, 10)),
(Token::LeftParenthesis, (10, 11)),
(Token::RightParenthesis, (11, 12)),
(Token::Eof, (12, 12)),
2024-08-05 22:34:20 +00:00
])
)
}
#[test]
fn empty() {
let input = "";
assert_eq!(lex(input), Ok(vec![(Token::Eof, (0, 0))]))
}
#[test]
fn reserved_identifier() {
let input = "length";
assert_eq!(
lex(input),
Ok(vec![(Token::Length, (0, 6)), (Token::Eof, (6, 6)),])
)
}
2024-08-05 01:31:18 +00:00
#[test]
fn square_braces() {
let input = "[]";
assert_eq!(
lex(input),
Ok(vec![
(Token::LeftSquareBrace, (0, 1)),
(Token::RightSquareBrace, (1, 2)),
(Token::Eof, (2, 2)),
])
)
}
2024-08-04 23:41:00 +00:00
#[test]
2024-08-05 00:08:43 +00:00
fn small_float() {
2024-08-04 23:41:00 +00:00
let input = "1.23";
assert_eq!(
lex(input),
2024-08-09 18:01:01 +00:00
Ok(vec![(Token::Float("1.23"), (0, 4)), (Token::Eof, (4, 4)),])
2024-08-04 23:41:00 +00:00
)
}
2024-08-05 00:08:43 +00:00
#[test]
#[allow(clippy::excessive_precision)]
fn big_float() {
let input = "123456789.123456789";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Float("123456789.123456789"), (0, 19)),
2024-08-05 00:08:43 +00:00
(Token::Eof, (19, 19)),
])
)
}
2024-08-04 23:25:44 +00:00
#[test]
fn add() {
let input = "1 + 2";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("1"), (0, 1)),
2024-08-04 23:25:44 +00:00
(Token::Plus, (2, 3)),
2024-08-09 18:01:01 +00:00
(Token::Integer("2"), (4, 5)),
2024-08-04 23:25:44 +00:00
(Token::Eof, (5, 5)),
])
)
}
#[test]
fn multiply() {
let input = "1 * 2";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("1"), (0, 1)),
2024-08-04 23:25:44 +00:00
(Token::Star, (2, 3)),
2024-08-09 18:01:01 +00:00
(Token::Integer("2"), (4, 5)),
2024-08-04 23:25:44 +00:00
(Token::Eof, (5, 5)),
])
)
}
#[test]
fn add_and_multiply() {
let input = "1 + 2 * 3";
assert_eq!(
lex(input),
Ok(vec![
2024-08-09 18:01:01 +00:00
(Token::Integer("1"), (0, 1)),
2024-08-04 23:25:44 +00:00
(Token::Plus, (2, 3)),
2024-08-09 18:01:01 +00:00
(Token::Integer("2"), (4, 5)),
2024-08-04 23:25:44 +00:00
(Token::Star, (6, 7)),
2024-08-09 18:01:01 +00:00
(Token::Integer("3"), (8, 9)),
2024-08-04 23:25:44 +00:00
(Token::Eof, (9, 9)),
])
);
}
#[test]
fn assignment() {
let input = "a = 1 + 2 * 3";
assert_eq!(
lex(input,),
Ok(vec![
(Token::Identifier("a"), (0, 1)),
2024-08-04 23:25:44 +00:00
(Token::Equal, (2, 3)),
2024-08-09 18:01:01 +00:00
(Token::Integer("1"), (4, 5)),
2024-08-04 23:25:44 +00:00
(Token::Plus, (6, 7)),
2024-08-09 18:01:01 +00:00
(Token::Integer("2"), (8, 9)),
2024-08-04 23:25:44 +00:00
(Token::Star, (10, 11)),
2024-08-09 18:01:01 +00:00
(Token::Integer("3"), (12, 13)),
2024-08-04 23:25:44 +00:00
(Token::Eof, (13, 13)),
])
);
}
}