dust/dust-lang/src/lex.rs

//! Lexing tools.
//!
//! This module provides two lexing options:
//! - [`lex`], which lexes the entire input and returns a vector of tokens and their positions
//! - [`Lexer`], which lexes the input a token at a time
use std::num::{ParseFloatError, ParseIntError};

use crate::{Span, Token};

/// Lexes the input and return a vector of tokens and their positions.
///
/// # Examples
/// ```
/// # use dust_lang::*;
/// let input = "x = 1 + 2";
/// let tokens = lex(input).unwrap();
///
/// assert_eq!(
///     tokens,
///     [
///         (Token::Identifier("x"), (0, 1)),
///         (Token::Equal, (2, 3)),
///         (Token::Integer(1), (4, 5)),
///         (Token::Plus, (6, 7)),
///         (Token::Integer(2), (8, 9)),
///         (Token::Eof, (9, 9)),
///     ]
/// );
/// ```
pub fn lex<'chars, 'src: 'chars>(input: &'src str) -> Result<Vec<(Token<'chars>, Span)>, LexError> {
    let mut lexer = Lexer::new();
    let mut tokens = Vec::new();

    loop {
        let (token, span) = lexer.next_token(input)?;
        let is_eof = matches!(token, Token::Eof);

        tokens.push((token, span));

        if is_eof {
            break;
        }
    }

    Ok(tokens)
}

#[derive(Debug, Clone)]
/// Low-level tool for lexing a single token at a time.
///
/// **Note**: It is a logic error to call `next_token` with different inputs.
///
/// # Examples
/// ```
/// # use dust_lang::*;
/// let input = "x = 1 + 2";
/// let mut lexer = Lexer::new();
/// let mut tokens = Vec::new();
///
/// loop {
///     let (token, span) = lexer.next_token(input).unwrap();
///     let is_eof = matches!(token, Token::Eof);
///
///     tokens.push((token, span));
///
///     if is_eof {
///         break;
///     }
/// }
///
/// assert_eq!(
///     tokens,
///     [
///         (Token::Identifier("x"), (0, 1)),
///         (Token::Equal, (2, 3)),
///         (Token::Integer(1), (4, 5)),
///         (Token::Plus, (6, 7)),
///         (Token::Integer(2), (8, 9)),
///         (Token::Eof, (9, 9)),
///     ]
/// )
/// ```
pub struct Lexer {
    position: usize,
}

impl Lexer {
    /// Create a new lexer for the given input.
    pub fn new() -> Self {
        Lexer { position: 0 }
    }

    /// Produce the next token.
    ///
    /// It is a logic error to call this method with different inputs.
    pub fn next_token<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {
        self.skip_whitespace(source);

        let (token, span) = if let Some(c) = self.peek_char(source) {
            match c {
                '0'..='9' => self.lex_number(source)?,
                'a'..='z' | 'A'..='Z' => self.lex_alphabetical(source)?,
                '"' => self.lex_string('"', source)?,
                '\'' => self.lex_string('\'', source)?,
                '+' => {
                    self.position += 1;
                    (Token::Plus, (self.position - 1, self.position))
                }
                '*' => {
                    self.position += 1;
                    (Token::Star, (self.position - 1, self.position))
                }
                '(' => {
                    self.position += 1;
                    (Token::LeftParenthesis, (self.position - 1, self.position))
                }
                ')' => {
                    self.position += 1;
                    (Token::RightParenthesis, (self.position - 1, self.position))
                }
                '=' => {
                    self.position += 1;
                    (Token::Equal, (self.position - 1, self.position))
                }
                '[' => {
                    self.position += 1;
                    (Token::LeftSquareBrace, (self.position - 1, self.position))
                }
                ']' => {
                    self.position += 1;
                    (Token::RightSquareBrace, (self.position - 1, self.position))
                }
                ',' => {
                    self.position += 1;
                    (Token::Comma, (self.position - 1, self.position))
                }
                '.' => {
                    self.position += 1;
                    (Token::Dot, (self.position - 1, self.position))
                }
                _ => (Token::Eof, (self.position, self.position)),
            }
        } else {
            (Token::Eof, (self.position, self.position))
        };

        Ok((token, span))
    }

    /// Progress to the next character.
    fn next_char(&mut self, source: &str) -> Option<char> {
        source[self.position..].chars().next().map(|c| {
            self.position += c.len_utf8();
            c
        })
    }

    /// Skip whitespace characters.
    fn skip_whitespace(&mut self, source: &str) {
        while let Some(c) = self.peek_char(source) {
            if c.is_whitespace() {
                self.next_char(source);
            } else {
                break;
            }
        }
    }

    /// Peek at the next character without consuming it.
    fn peek_char(&self, source: &str) -> Option<char> {
        source[self.position..].chars().next()
    }

    /// Peek at the second-to-next character without consuming it.
    fn peek_second_char(&self, source: &str) -> Option<char> {
        source[self.position..].chars().nth(1)
    }

    /// Lex an integer or float token.
    fn lex_number<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {
        let start_pos = self.position;
        let mut is_float = false;

        while let Some(c) = self.peek_char(source) {
            if c == '.' {
                if let Some('0'..='9') = self.peek_second_char(source) {
                    if !is_float {
                        self.next_char(source);
                    }

                    self.next_char(source);

                    while let Some('0'..='9') = self.peek_char(source) {
                        self.next_char(source);
                    }

                    is_float = true;
                } else {
                    break;
                }
            }

            if c.is_ascii_digit() {
                self.next_char(source);
            } else {
                break;
            }
        }

        if is_float {
            let float = source[start_pos..self.position].parse::<f64>()?;

            Ok((Token::Float(float), (start_pos, self.position)))
        } else {
            let integer = source[start_pos..self.position].parse::<i64>()?;

            Ok((Token::Integer(integer), (start_pos, self.position)))
        }
    }

    /// Lex an identifier token.
    fn lex_alphabetical<'src>(
        &mut self,
        source: &'src str,
    ) -> Result<(Token<'src>, Span), LexError> {
        let start_pos = self.position;

        while let Some(c) = self.peek_char(source) {
            if c.is_ascii_alphanumeric() || c == '_' {
                self.next_char(source);
            } else {
                break;
            }
        }

        let string = &source[start_pos..self.position];
        let token = match string {
            "true" => Token::Boolean(true),
            "false" => Token::Boolean(false),
            "is_even" => Token::IsEven,
            "is_odd" => Token::IsOdd,
            "length" => Token::Length,
            "read_line" => Token::ReadLine,
            "write_line" => Token::WriteLine,
            _ => Token::Identifier(string),
        };

        Ok((token, (start_pos, self.position)))
    }

    fn lex_string<'src>(
        &mut self,
        delimiter: char,
        source: &'src str,
    ) -> Result<(Token<'src>, Span), LexError> {
        let start_pos = self.position;

        self.next_char(source);

        while let Some(c) = self.peek_char(source) {
            if c == delimiter {
                self.next_char(source);
                break;
            } else {
                self.next_char(source);
            }
        }

        let text = &source[start_pos + 1..self.position - 1];

        Ok((Token::String(text), (start_pos, self.position)))
    }
}

impl Default for Lexer {
    fn default() -> Self {
        Self::new()
    }
}

#[derive(Debug, PartialEq, Clone)]
pub enum LexError {
    FloatError(ParseFloatError),
    IntegerError(ParseIntError),
}

impl From<ParseFloatError> for LexError {
    fn from(error: std::num::ParseFloatError) -> Self {
        Self::FloatError(error)
    }
}

impl From<ParseIntError> for LexError {
    fn from(error: std::num::ParseIntError) -> Self {
        Self::IntegerError(error)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn read_line() {
        let input = "read_line()";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::ReadLine, (0, 9)),
                (Token::LeftParenthesis, (9, 10)),
                (Token::RightParenthesis, (10, 11)),
                (Token::Eof, (11, 11)),
            ])
        )
    }

    #[test]
    fn write_line() {
        let input = "write_line('Hello, world!')";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::WriteLine, (0, 10)),
                (Token::LeftParenthesis, (10, 11)),
                (Token::String("Hello, world!"), (11, 26)),
                (Token::RightParenthesis, (26, 27)),
                (Token::Eof, (27, 27)),
            ])
        )
    }

    #[test]
    fn string_concatenation() {
        let input = "'Hello, ' + 'world!'";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::String("Hello, "), (0, 9)),
                (Token::Plus, (10, 11)),
                (Token::String("world!"), (12, 20)),
                (Token::Eof, (20, 20)),
            ])
        )
    }

    #[test]
    fn string() {
        let input = "'Hello, world!'";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::String("Hello, world!"), (0, 15)),
                (Token::Eof, (15, 15)),
            ])
        )
    }

    #[test]
    fn r#true() {
        let input = "true";

        assert_eq!(
            lex(input),
            Ok(vec![(Token::Boolean(true), (0, 4)), (Token::Eof, (4, 4)),])
        )
    }

    #[test]
    fn r#false() {
        let input = "false";

        assert_eq!(
            lex(input),
            Ok(vec![(Token::Boolean(false), (0, 5)), (Token::Eof, (5, 5))])
        )
    }

    #[test]
    fn property_access_function_call() {
        let input = "42.is_even()";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::Integer(42), (0, 2)),
                (Token::Dot, (2, 3)),
                (Token::IsEven, (3, 10)),
                (Token::LeftParenthesis, (10, 11)),
                (Token::RightParenthesis, (11, 12)),
                (Token::Eof, (12, 12)),
            ])
        )
    }

    #[test]
    fn empty() {
        let input = "";

        assert_eq!(lex(input), Ok(vec![(Token::Eof, (0, 0))]))
    }

    #[test]
    fn reserved_identifier() {
        let input = "length";

        assert_eq!(
            lex(input),
            Ok(vec![(Token::Length, (0, 6)), (Token::Eof, (6, 6)),])
        )
    }

    #[test]
    fn square_braces() {
        let input = "[]";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::LeftSquareBrace, (0, 1)),
                (Token::RightSquareBrace, (1, 2)),
                (Token::Eof, (2, 2)),
            ])
        )
    }

    #[test]
    fn small_float() {
        let input = "1.23";

        assert_eq!(
            lex(input),
            Ok(vec![(Token::Float(1.23), (0, 4)), (Token::Eof, (4, 4)),])
        )
    }

    #[test]
    #[allow(clippy::excessive_precision)]
    fn big_float() {
        let input = "123456789.123456789";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::Float(123456789.123456789), (0, 19)),
                (Token::Eof, (19, 19)),
            ])
        )
    }

    #[test]
    fn add() {
        let input = "1 + 2";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::Integer(1), (0, 1)),
                (Token::Plus, (2, 3)),
                (Token::Integer(2), (4, 5)),
                (Token::Eof, (5, 5)),
            ])
        )
    }

    #[test]
    fn multiply() {
        let input = "1 * 2";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::Integer(1), (0, 1)),
                (Token::Star, (2, 3)),
                (Token::Integer(2), (4, 5)),
                (Token::Eof, (5, 5)),
            ])
        )
    }

    #[test]
    fn add_and_multiply() {
        let input = "1 + 2 * 3";

        assert_eq!(
            lex(input),
            Ok(vec![
                (Token::Integer(1), (0, 1)),
                (Token::Plus, (2, 3)),
                (Token::Integer(2), (4, 5)),
                (Token::Star, (6, 7)),
                (Token::Integer(3), (8, 9)),
                (Token::Eof, (9, 9)),
            ])
        );
    }

    #[test]
    fn assignment() {
        let input = "a = 1 + 2 * 3";

        assert_eq!(
            lex(input,),
            Ok(vec![
                (Token::Identifier("a"), (0, 1)),
                (Token::Equal, (2, 3)),
                (Token::Integer(1), (4, 5)),
                (Token::Plus, (6, 7)),
                (Token::Integer(2), (8, 9)),
                (Token::Star, (10, 11)),
                (Token::Integer(3), (12, 13)),
                (Token::Eof, (13, 13)),
            ])
        );
    }
}
Add docs 2024-08-05 04:54:12 +00:00			`//! Lexing tools.`
			`//!`
			`//! This module provides two lexing options:`
			//! - [`lex`], which lexes the entire input and returns a vector of tokens and their positions
			//! - [`Lexer`], which lexes the input a token at a time
Add float lexing 2024-08-04 23:41:00 +00:00			`use std::num::{ParseFloatError, ParseIntError};`

Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`use crate::{Span, Token};`
Overhaul project structure 2024-08-04 00:23:52 +00:00
Add parse examples 2024-08-07 16:32:18 +00:00			`/// Lexes the input and return a vector of tokens and their positions.`
Add examples 2024-08-07 16:13:49 +00:00			`///`
			`/// # Examples`
			/// ```
			`/// # use dust_lang::*;`
			`/// let input = "x = 1 + 2";`
			`/// let tokens = lex(input).unwrap();`
			`///`
			`/// assert_eq!(`
			`/// tokens,`
			`/// [`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`/// (Token::Identifier("x"), (0, 1)),`
Add examples 2024-08-07 16:13:49 +00:00			`/// (Token::Equal, (2, 3)),`
			`/// (Token::Integer(1), (4, 5)),`
			`/// (Token::Plus, (6, 7)),`
			`/// (Token::Integer(2), (8, 9)),`
			`/// (Token::Eof, (9, 9)),`
			`/// ]`
			`/// );`
			/// ```
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`pub fn lex<'chars, 'src: 'chars>(input: &'src str) -> Result<Vec<(Token<'chars>, Span)>, LexError> {`
			`let mut lexer = Lexer::new();`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`let mut tokens = Vec::new();`

			`loop {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`let (token, span) = lexer.next_token(input)?;`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`let is_eof = matches!(token, Token::Eof);`

			`tokens.push((token, span));`

			`if is_eof {`
			`break;`
			`}`
			`}`

			`Ok(tokens)`
			`}`

			`#[derive(Debug, Clone)]`
Add docs 2024-08-05 04:54:12 +00:00			`/// Low-level tool for lexing a single token at a time.`
Add examples 2024-08-07 16:13:49 +00:00			`///`
Refactor TokenOwned and add some docs 2024-08-09 00:19:07 +00:00			/// Note: It is a logic error to call `next_token` with different inputs.
			`///`
Add examples 2024-08-07 16:13:49 +00:00			`/// # Examples`
			/// ```
			`/// # use dust_lang::*;`
			`/// let input = "x = 1 + 2";`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`/// let mut lexer = Lexer::new();`
Add examples 2024-08-07 16:13:49 +00:00			`/// let mut tokens = Vec::new();`
			`///`
			`/// loop {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`/// let (token, span) = lexer.next_token(input).unwrap();`
Add examples 2024-08-07 16:13:49 +00:00			`/// let is_eof = matches!(token, Token::Eof);`
			`///`
			`/// tokens.push((token, span));`
			`///`
			`/// if is_eof {`
			`/// break;`
			`/// }`
			`/// }`
			`///`
			`/// assert_eq!(`
			`/// tokens,`
			`/// [`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`/// (Token::Identifier("x"), (0, 1)),`
Add examples 2024-08-07 16:13:49 +00:00			`/// (Token::Equal, (2, 3)),`
			`/// (Token::Integer(1), (4, 5)),`
			`/// (Token::Plus, (6, 7)),`
			`/// (Token::Integer(2), (8, 9)),`
			`/// (Token::Eof, (9, 9)),`
			`/// ]`
			`/// )`
			/// ```
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`pub struct Lexer {`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`position: usize,`
			`}`

Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`impl Lexer {`
Add docs 2024-08-05 04:54:12 +00:00			`/// Create a new lexer for the given input.`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`pub fn new() -> Self {`
			`Lexer { position: 0 }`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`}`

Add docs 2024-08-05 04:54:12 +00:00			`/// Produce the next token.`
Refactor TokenOwned and add some docs 2024-08-09 00:19:07 +00:00			`///`
			`/// It is a logic error to call this method with different inputs.`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`pub fn next_token<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {`
			`self.skip_whitespace(source);`
Overhaul project structure 2024-08-04 00:23:52 +00:00
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`let (token, span) = if let Some(c) = self.peek_char(source) {`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`match c {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`'0'..='9' => self.lex_number(source)?,`
			`'a'..='z' \| 'A'..='Z' => self.lex_alphabetical(source)?,`
			`'"' => self.lex_string('"', source)?,`
			`'\'' => self.lex_string('\'', source)?,`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`'+' => {`
			`self.position += 1;`
			`(Token::Plus, (self.position - 1, self.position))`
			`}`
			`'*' => {`
			`self.position += 1;`
			`(Token::Star, (self.position - 1, self.position))`
			`}`
			`'(' => {`
			`self.position += 1;`
			`(Token::LeftParenthesis, (self.position - 1, self.position))`
			`}`
			`')' => {`
			`self.position += 1;`
			`(Token::RightParenthesis, (self.position - 1, self.position))`
			`}`
			`'=' => {`
			`self.position += 1;`
			`(Token::Equal, (self.position - 1, self.position))`
			`}`
Add list parsing 2024-08-05 01:31:18 +00:00			`'[' => {`
			`self.position += 1;`
			`(Token::LeftSquareBrace, (self.position - 1, self.position))`
			`}`
			`']' => {`
			`self.position += 1;`
			`(Token::RightSquareBrace, (self.position - 1, self.position))`
			`}`
			`',' => {`
			`self.position += 1;`
			`(Token::Comma, (self.position - 1, self.position))`
			`}`
Implement property access 2024-08-05 18:31:08 +00:00			`'.' => {`
			`self.position += 1;`
			`(Token::Dot, (self.position - 1, self.position))`
			`}`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`_ => (Token::Eof, (self.position, self.position)),`
			`}`
			`} else {`
			`(Token::Eof, (self.position, self.position))`
			`};`

			`Ok((token, span))`
			`}`

Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`/// Progress to the next character.`
			`fn next_char(&mut self, source: &str) -> Option<char> {`
			`source[self.position..].chars().next().map(\|c\| {`
			`self.position += c.len_utf8();`
			`c`
			`})`
			`}`

Add docs 2024-08-05 04:54:12 +00:00			`/// Skip whitespace characters.`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`fn skip_whitespace(&mut self, source: &str) {`
			`while let Some(c) = self.peek_char(source) {`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`if c.is_whitespace() {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`self.next_char(source);`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`} else {`
			`break;`
			`}`
			`}`
			`}`

Add docs 2024-08-05 04:54:12 +00:00			`/// Peek at the next character without consuming it.`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`fn peek_char(&self, source: &str) -> Option<char> {`
			`source[self.position..].chars().next()`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`}`

Implement is_even and is_odd 2024-08-05 22:34:20 +00:00			`/// Peek at the second-to-next character without consuming it.`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`fn peek_second_char(&self, source: &str) -> Option<char> {`
			`source[self.position..].chars().nth(1)`
Implement lexing for boolean values 2024-08-07 14:41:27 +00:00			`}`

Add docs 2024-08-05 04:54:12 +00:00			`/// Lex an integer or float token.`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`fn lex_number<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`let start_pos = self.position;`
Add float lexing 2024-08-04 23:41:00 +00:00			`let mut is_float = false;`
Overhaul project structure 2024-08-04 00:23:52 +00:00
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`while let Some(c) = self.peek_char(source) {`
Add float lexing 2024-08-04 23:41:00 +00:00			`if c == '.' {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`if let Some('0'..='9') = self.peek_second_char(source) {`
Implement is_even and is_odd 2024-08-05 22:34:20 +00:00			`if !is_float {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`self.next_char(source);`
Implement is_even and is_odd 2024-08-05 22:34:20 +00:00			`}`
Add float lexing 2024-08-04 23:41:00 +00:00
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`self.next_char(source);`
Implement property access 2024-08-05 18:31:08 +00:00
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`while let Some('0'..='9') = self.peek_char(source) {`
			`self.next_char(source);`
Add float lexing 2024-08-04 23:41:00 +00:00			`}`
Implement is_even and is_odd 2024-08-05 22:34:20 +00:00
			`is_float = true;`
			`} else {`
			`break;`
Add float lexing 2024-08-04 23:41:00 +00:00			`}`
			`}`

Overhaul project structure 2024-08-04 00:23:52 +00:00			`if c.is_ascii_digit() {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`self.next_char(source);`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`} else {`
			`break;`
			`}`
			`}`

Add float lexing 2024-08-04 23:41:00 +00:00			`if is_float {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`let float = source[start_pos..self.position].parse::<f64>()?;`
Overhaul project structure 2024-08-04 00:23:52 +00:00
Add float lexing 2024-08-04 23:41:00 +00:00			`Ok((Token::Float(float), (start_pos, self.position)))`
			`} else {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`let integer = source[start_pos..self.position].parse::<i64>()?;`
Add float lexing 2024-08-04 23:41:00 +00:00
Build spans into instructions 2024-08-05 00:08:43 +00:00			`Ok((Token::Integer(integer), (start_pos, self.position)))`
Add float lexing 2024-08-04 23:41:00 +00:00			`}`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`}`

Add docs 2024-08-05 04:54:12 +00:00			`/// Lex an identifier token.`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`fn lex_alphabetical<'src>(`
			`&mut self,`
			`source: &'src str,`
			`) -> Result<(Token<'src>, Span), LexError> {`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`let start_pos = self.position;`

Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`while let Some(c) = self.peek_char(source) {`
Implement is_even and is_odd 2024-08-05 22:34:20 +00:00			`if c.is_ascii_alphanumeric() \|\| c == '_' {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`self.next_char(source);`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`} else {`
			`break;`
			`}`
			`}`

Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`let string = &source[start_pos..self.position];`
Begin adding support for more built-in properties 2024-08-05 19:54:48 +00:00			`let token = match string {`
Implement lexing for boolean values 2024-08-07 14:41:27 +00:00			`"true" => Token::Boolean(true),`
			`"false" => Token::Boolean(false),`
Begin implementing better built-in features 2024-08-07 22:24:25 +00:00			`"is_even" => Token::IsEven,`
			`"is_odd" => Token::IsOdd,`
			`"length" => Token::Length,`
Add lexing and parsing for I/O built-in functions; Refactor built-in function parsing 2024-08-08 17:49:40 +00:00			`"read_line" => Token::ReadLine,`
			`"write_line" => Token::WriteLine,`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`_ => Token::Identifier(string),`
Begin adding support for more built-in properties 2024-08-05 19:54:48 +00:00			`};`
Overhaul project structure 2024-08-04 00:23:52 +00:00
			`Ok((token, (start_pos, self.position)))`
			`}`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`fn lex_string<'src>(`
			`&mut self,`
			`delimiter: char,`
			`source: &'src str,`
			`) -> Result<(Token<'src>, Span), LexError> {`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00			`let start_pos = self.position;`

Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`self.next_char(source);`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`while let Some(c) = self.peek_char(source) {`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00			`if c == delimiter {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`self.next_char(source);`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00			`break;`
			`} else {`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`self.next_char(source);`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00			`}`
			`}`

Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`let text = &source[start_pos + 1..self.position - 1];`

			`Ok((Token::String(text), (start_pos, self.position)))`
			`}`
			`}`

			`impl Default for Lexer {`
			`fn default() -> Self {`
			`Self::new()`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00			`}`
Overhaul project structure 2024-08-04 00:23:52 +00:00			`}`
Add tests and clean up 2024-08-04 23:25:44 +00:00
			`#[derive(Debug, PartialEq, Clone)]`
			`pub enum LexError {`
Add float lexing 2024-08-04 23:41:00 +00:00			`FloatError(ParseFloatError),`
			`IntegerError(ParseIntError),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`}`

Add float lexing 2024-08-04 23:41:00 +00:00			`impl From<ParseFloatError> for LexError {`
			`fn from(error: std::num::ParseFloatError) -> Self {`
			`Self::FloatError(error)`
			`}`
			`}`

			`impl From<ParseIntError> for LexError {`
			`fn from(error: std::num::ParseIntError) -> Self {`
			`Self::IntegerError(error)`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`}`
			`}`

			`#[cfg(test)]`
			`mod tests {`
			`use super::*;`

Clean up 2024-08-08 17:57:53 +00:00			`#[test]`
			`fn read_line() {`
			`let input = "read_line()";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
			`(Token::ReadLine, (0, 9)),`
			`(Token::LeftParenthesis, (9, 10)),`
			`(Token::RightParenthesis, (10, 11)),`
			`(Token::Eof, (11, 11)),`
			`])`
			`)`
			`}`

			`#[test]`
			`fn write_line() {`
			`let input = "write_line('Hello, world!')";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
			`(Token::WriteLine, (0, 10)),`
			`(Token::LeftParenthesis, (10, 11)),`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`(Token::String("Hello, world!"), (11, 26)),`
Clean up 2024-08-08 17:57:53 +00:00			`(Token::RightParenthesis, (26, 27)),`
			`(Token::Eof, (27, 27)),`
			`])`
			`)`
			`}`

Lex strings and string concatenation 2024-08-08 17:08:53 +00:00			`#[test]`
			`fn string_concatenation() {`
			`let input = "'Hello, ' + 'world!'";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`(Token::String("Hello, "), (0, 9)),`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00			`(Token::Plus, (10, 11)),`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`(Token::String("world!"), (12, 20)),`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00			`(Token::Eof, (20, 20)),`
			`])`
			`)`
			`}`

			`#[test]`
			`fn string() {`
			`let input = "'Hello, world!'";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`(Token::String("Hello, world!"), (0, 15)),`
Lex strings and string concatenation 2024-08-08 17:08:53 +00:00			`(Token::Eof, (15, 15)),`
			`])`
			`)`
			`}`

Implement lexing for boolean values 2024-08-07 14:41:27 +00:00			`#[test]`
			`fn r#true() {`
			`let input = "true";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![(Token::Boolean(true), (0, 4)), (Token::Eof, (4, 4)),])`
			`)`
			`}`

			`#[test]`
			`fn r#false() {`
			`let input = "false";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![(Token::Boolean(false), (0, 5)), (Token::Eof, (5, 5))])`
			`)`
			`}`

Implement is_even and is_odd 2024-08-05 22:34:20 +00:00			`#[test]`
Begin implementing better built-in features 2024-08-07 22:24:25 +00:00			`fn property_access_function_call() {`
			`let input = "42.is_even()";`
Implement is_even and is_odd 2024-08-05 22:34:20 +00:00
			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
			`(Token::Integer(42), (0, 2)),`
			`(Token::Dot, (2, 3)),`
Begin implementing better built-in features 2024-08-07 22:24:25 +00:00			`(Token::IsEven, (3, 10)),`
			`(Token::LeftParenthesis, (10, 11)),`
			`(Token::RightParenthesis, (11, 12)),`
			`(Token::Eof, (12, 12)),`
Implement is_even and is_odd 2024-08-05 22:34:20 +00:00			`])`
			`)`
			`}`

			`#[test]`
			`fn empty() {`
			`let input = "";`

			`assert_eq!(lex(input), Ok(vec![(Token::Eof, (0, 0))]))`
			`}`

Begin adding support for more built-in properties 2024-08-05 19:54:48 +00:00			`#[test]`
			`fn reserved_identifier() {`
			`let input = "length";`

			`assert_eq!(`
			`lex(input),`
Begin implementing better built-in features 2024-08-07 22:24:25 +00:00			`Ok(vec![(Token::Length, (0, 6)), (Token::Eof, (6, 6)),])`
Begin adding support for more built-in properties 2024-08-05 19:54:48 +00:00			`)`
			`}`

Add list parsing 2024-08-05 01:31:18 +00:00			`#[test]`
			`fn square_braces() {`
			`let input = "[]";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
			`(Token::LeftSquareBrace, (0, 1)),`
			`(Token::RightSquareBrace, (1, 2)),`
			`(Token::Eof, (2, 2)),`
			`])`
			`)`
			`}`

Add float lexing 2024-08-04 23:41:00 +00:00			`#[test]`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`fn small_float() {`
Add float lexing 2024-08-04 23:41:00 +00:00			`let input = "1.23";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![(Token::Float(1.23), (0, 4)), (Token::Eof, (4, 4)),])`
			`)`
			`}`

Build spans into instructions 2024-08-05 00:08:43 +00:00			`#[test]`
			`#[allow(clippy::excessive_precision)]`
			`fn big_float() {`
			`let input = "123456789.123456789";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
			`(Token::Float(123456789.123456789), (0, 19)),`
			`(Token::Eof, (19, 19)),`
			`])`
			`)`
			`}`

Add tests and clean up 2024-08-04 23:25:44 +00:00			`#[test]`
			`fn add() {`
			`let input = "1 + 2";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(1), (0, 1)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Plus, (2, 3)),`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(2), (4, 5)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Eof, (5, 5)),`
			`])`
			`)`
			`}`

			`#[test]`
			`fn multiply() {`
			`let input = "1 * 2";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(1), (0, 1)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Star, (2, 3)),`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(2), (4, 5)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Eof, (5, 5)),`
			`])`
			`)`
			`}`

			`#[test]`
			`fn add_and_multiply() {`
			`let input = "1 + 2 * 3";`

			`assert_eq!(`
			`lex(input),`
			`Ok(vec![`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(1), (0, 1)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Plus, (2, 3)),`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(2), (4, 5)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Star, (6, 7)),`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(3), (8, 9)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Eof, (9, 9)),`
			`])`
			`);`
			`}`

			`#[test]`
			`fn assignment() {`
			`let input = "a = 1 + 2 * 3";`

			`assert_eq!(`
			`lex(input,),`
			`Ok(vec![`
Borrow string slices instead of copying them in lexing 2024-08-08 20:19:14 +00:00			`(Token::Identifier("a"), (0, 1)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Equal, (2, 3)),`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(1), (4, 5)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Plus, (6, 7)),`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(2), (8, 9)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Star, (10, 11)),`
Build spans into instructions 2024-08-05 00:08:43 +00:00			`(Token::Integer(3), (12, 13)),`
Add tests and clean up 2024-08-04 23:25:44 +00:00			`(Token::Eof, (13, 13)),`
			`])`
			`);`
			`}`
			`}`