Compare commits
4 Commits
a52e78150e
...
fa2ce8a0bf
Author | SHA1 | Date | |
---|---|---|---|
fa2ce8a0bf | |||
bf519ec087 | |||
1c24286696 | |||
d5d51e9849 |
@ -1,5 +1,5 @@
|
|||||||
use std::{
|
use std::{
|
||||||
collections::HashMap,
|
collections::HashSet,
|
||||||
fmt::{self, Display, Formatter},
|
fmt::{self, Display, Formatter},
|
||||||
hash::Hash,
|
hash::Hash,
|
||||||
sync::{Arc, OnceLock, RwLock},
|
sync::{Arc, OnceLock, RwLock},
|
||||||
@ -7,26 +7,26 @@ use std::{
|
|||||||
|
|
||||||
use serde::{de::Visitor, Deserialize, Serialize};
|
use serde::{de::Visitor, Deserialize, Serialize};
|
||||||
|
|
||||||
static IDENTIFIER_CACHE: OnceLock<RwLock<HashMap<String, Identifier>>> = OnceLock::new();
|
static IDENTIFIER_CACHE: OnceLock<RwLock<HashSet<Identifier>>> = OnceLock::new();
|
||||||
|
|
||||||
fn identifier_cache<'a>() -> &'a RwLock<HashMap<String, Identifier>> {
|
fn identifier_cache<'a>() -> &'a RwLock<HashSet<Identifier>> {
|
||||||
IDENTIFIER_CACHE.get_or_init(|| RwLock::new(HashMap::new()))
|
IDENTIFIER_CACHE.get_or_init(|| RwLock::new(HashSet::new()))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
||||||
pub struct Identifier(Arc<String>);
|
pub struct Identifier(Arc<String>);
|
||||||
|
|
||||||
impl Identifier {
|
impl Identifier {
|
||||||
pub fn new(text: &str) -> Self {
|
pub fn new<T: ToString>(text: T) -> Self {
|
||||||
let cache = identifier_cache();
|
let cache = identifier_cache();
|
||||||
|
|
||||||
if let Some(identifier) = cache.read().unwrap().get(text).cloned() {
|
|
||||||
return identifier;
|
|
||||||
}
|
|
||||||
|
|
||||||
let new = Identifier(Arc::new(text.to_string()));
|
let new = Identifier(Arc::new(text.to_string()));
|
||||||
|
|
||||||
cache.write().unwrap().insert(text.to_string(), new.clone());
|
if let Some(identifier) = cache.read().unwrap().get(&new).cloned() {
|
||||||
|
return identifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
cache.write().unwrap().insert(new.clone());
|
||||||
|
|
||||||
new
|
new
|
||||||
}
|
}
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
//! - [`Lexer`], which lexes the input a token at a time
|
//! - [`Lexer`], which lexes the input a token at a time
|
||||||
use std::num::{ParseFloatError, ParseIntError};
|
use std::num::{ParseFloatError, ParseIntError};
|
||||||
|
|
||||||
use crate::{Identifier, Span, Token};
|
use crate::{Span, Token};
|
||||||
|
|
||||||
/// Lexes the input and return a vector of tokens and their positions.
|
/// Lexes the input and return a vector of tokens and their positions.
|
||||||
///
|
///
|
||||||
@ -18,7 +18,7 @@ use crate::{Identifier, Span, Token};
|
|||||||
/// assert_eq!(
|
/// assert_eq!(
|
||||||
/// tokens,
|
/// tokens,
|
||||||
/// [
|
/// [
|
||||||
/// (Token::Identifier(Identifier::new("x")), (0, 1)),
|
/// (Token::Identifier("x"), (0, 1)),
|
||||||
/// (Token::Equal, (2, 3)),
|
/// (Token::Equal, (2, 3)),
|
||||||
/// (Token::Integer(1), (4, 5)),
|
/// (Token::Integer(1), (4, 5)),
|
||||||
/// (Token::Plus, (6, 7)),
|
/// (Token::Plus, (6, 7)),
|
||||||
@ -27,12 +27,12 @@ use crate::{Identifier, Span, Token};
|
|||||||
/// ]
|
/// ]
|
||||||
/// );
|
/// );
|
||||||
/// ```
|
/// ```
|
||||||
pub fn lex(input: &str) -> Result<Vec<(Token, Span)>, LexError> {
|
pub fn lex<'chars, 'src: 'chars>(input: &'src str) -> Result<Vec<(Token<'chars>, Span)>, LexError> {
|
||||||
let mut lexer = Lexer::new(input);
|
let mut lexer = Lexer::new();
|
||||||
let mut tokens = Vec::new();
|
let mut tokens = Vec::new();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
let (token, span) = lexer.next_token()?;
|
let (token, span) = lexer.next_token(input)?;
|
||||||
let is_eof = matches!(token, Token::Eof);
|
let is_eof = matches!(token, Token::Eof);
|
||||||
|
|
||||||
tokens.push((token, span));
|
tokens.push((token, span));
|
||||||
@ -48,15 +48,17 @@ pub fn lex(input: &str) -> Result<Vec<(Token, Span)>, LexError> {
|
|||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
/// Low-level tool for lexing a single token at a time.
|
/// Low-level tool for lexing a single token at a time.
|
||||||
///
|
///
|
||||||
|
/// **Note**: It is a logic error to call `next_token` with different inputs.
|
||||||
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
/// ```
|
/// ```
|
||||||
/// # use dust_lang::*;
|
/// # use dust_lang::*;
|
||||||
/// let input = "x = 1 + 2";
|
/// let input = "x = 1 + 2";
|
||||||
/// let mut lexer = Lexer::new(input);
|
/// let mut lexer = Lexer::new();
|
||||||
/// let mut tokens = Vec::new();
|
/// let mut tokens = Vec::new();
|
||||||
///
|
///
|
||||||
/// loop {
|
/// loop {
|
||||||
/// let (token, span) = lexer.next_token().unwrap();
|
/// let (token, span) = lexer.next_token(input).unwrap();
|
||||||
/// let is_eof = matches!(token, Token::Eof);
|
/// let is_eof = matches!(token, Token::Eof);
|
||||||
///
|
///
|
||||||
/// tokens.push((token, span));
|
/// tokens.push((token, span));
|
||||||
@ -69,7 +71,7 @@ pub fn lex(input: &str) -> Result<Vec<(Token, Span)>, LexError> {
|
|||||||
/// assert_eq!(
|
/// assert_eq!(
|
||||||
/// tokens,
|
/// tokens,
|
||||||
/// [
|
/// [
|
||||||
/// (Token::Identifier(Identifier::new("x")), (0, 1)),
|
/// (Token::Identifier("x"), (0, 1)),
|
||||||
/// (Token::Equal, (2, 3)),
|
/// (Token::Equal, (2, 3)),
|
||||||
/// (Token::Integer(1), (4, 5)),
|
/// (Token::Integer(1), (4, 5)),
|
||||||
/// (Token::Plus, (6, 7)),
|
/// (Token::Plus, (6, 7)),
|
||||||
@ -78,38 +80,28 @@ pub fn lex(input: &str) -> Result<Vec<(Token, Span)>, LexError> {
|
|||||||
/// ]
|
/// ]
|
||||||
/// )
|
/// )
|
||||||
/// ```
|
/// ```
|
||||||
pub struct Lexer<'a> {
|
pub struct Lexer {
|
||||||
source: &'a str,
|
|
||||||
position: usize,
|
position: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Lexer<'a> {
|
impl Lexer {
|
||||||
/// Create a new lexer for the given input.
|
/// Create a new lexer for the given input.
|
||||||
pub fn new(input: &'a str) -> Self {
|
pub fn new() -> Self {
|
||||||
Lexer {
|
Lexer { position: 0 }
|
||||||
source: input,
|
|
||||||
position: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Progress to the next character.
|
|
||||||
fn next_char(&mut self) -> Option<char> {
|
|
||||||
self.source[self.position..].chars().next().map(|c| {
|
|
||||||
self.position += c.len_utf8();
|
|
||||||
c
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Produce the next token.
|
/// Produce the next token.
|
||||||
pub fn next_token(&mut self) -> Result<(Token, Span), LexError> {
|
///
|
||||||
self.skip_whitespace();
|
/// It is a logic error to call this method with different inputs.
|
||||||
|
pub fn next_token<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {
|
||||||
|
self.skip_whitespace(source);
|
||||||
|
|
||||||
let (token, span) = if let Some(c) = self.peek_char() {
|
let (token, span) = if let Some(c) = self.peek_char(source) {
|
||||||
match c {
|
match c {
|
||||||
'0'..='9' => self.lex_number()?,
|
'0'..='9' => self.lex_number(source)?,
|
||||||
'a'..='z' | 'A'..='Z' => self.lex_alphabetical()?,
|
'a'..='z' | 'A'..='Z' => self.lex_alphabetical(source)?,
|
||||||
'"' => self.lex_string('"')?,
|
'"' => self.lex_string('"', source)?,
|
||||||
'\'' => self.lex_string('\'')?,
|
'\'' => self.lex_string('\'', source)?,
|
||||||
'+' => {
|
'+' => {
|
||||||
self.position += 1;
|
self.position += 1;
|
||||||
(Token::Plus, (self.position - 1, self.position))
|
(Token::Plus, (self.position - 1, self.position))
|
||||||
@ -155,11 +147,19 @@ impl<'a> Lexer<'a> {
|
|||||||
Ok((token, span))
|
Ok((token, span))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Progress to the next character.
|
||||||
|
fn next_char(&mut self, source: &str) -> Option<char> {
|
||||||
|
source[self.position..].chars().next().map(|c| {
|
||||||
|
self.position += c.len_utf8();
|
||||||
|
c
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// Skip whitespace characters.
|
/// Skip whitespace characters.
|
||||||
fn skip_whitespace(&mut self) {
|
fn skip_whitespace(&mut self, source: &str) {
|
||||||
while let Some(c) = self.peek_char() {
|
while let Some(c) = self.peek_char(source) {
|
||||||
if c.is_whitespace() {
|
if c.is_whitespace() {
|
||||||
self.next_char();
|
self.next_char(source);
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -167,44 +167,31 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Peek at the next character without consuming it.
|
/// Peek at the next character without consuming it.
|
||||||
fn peek_char(&self) -> Option<char> {
|
fn peek_char(&self, source: &str) -> Option<char> {
|
||||||
self.source[self.position..].chars().next()
|
source[self.position..].chars().next()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Peek at the second-to-next character without consuming it.
|
/// Peek at the second-to-next character without consuming it.
|
||||||
fn peek_second_char(&self) -> Option<char> {
|
fn peek_second_char(&self, source: &str) -> Option<char> {
|
||||||
self.source[self.position..].chars().nth(1)
|
source[self.position..].chars().nth(1)
|
||||||
}
|
|
||||||
|
|
||||||
fn _peek_until_whitespace(&self) -> Option<&str> {
|
|
||||||
let start = self.position;
|
|
||||||
let end = self.source[self.position..]
|
|
||||||
.find(char::is_whitespace)
|
|
||||||
.map(|i| i + start);
|
|
||||||
|
|
||||||
if let Some(end) = end {
|
|
||||||
Some(&self.source[start..end])
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lex an integer or float token.
|
/// Lex an integer or float token.
|
||||||
fn lex_number(&mut self) -> Result<(Token, Span), LexError> {
|
fn lex_number<'src>(&mut self, source: &'src str) -> Result<(Token<'src>, Span), LexError> {
|
||||||
let start_pos = self.position;
|
let start_pos = self.position;
|
||||||
let mut is_float = false;
|
let mut is_float = false;
|
||||||
|
|
||||||
while let Some(c) = self.peek_char() {
|
while let Some(c) = self.peek_char(source) {
|
||||||
if c == '.' {
|
if c == '.' {
|
||||||
if let Some('0'..='9') = self.peek_second_char() {
|
if let Some('0'..='9') = self.peek_second_char(source) {
|
||||||
if !is_float {
|
if !is_float {
|
||||||
self.next_char();
|
self.next_char(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
self.next_char();
|
self.next_char(source);
|
||||||
|
|
||||||
while let Some('0'..='9') = self.peek_char() {
|
while let Some('0'..='9') = self.peek_char(source) {
|
||||||
self.next_char();
|
self.next_char(source);
|
||||||
}
|
}
|
||||||
|
|
||||||
is_float = true;
|
is_float = true;
|
||||||
@ -214,36 +201,39 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if c.is_ascii_digit() {
|
if c.is_ascii_digit() {
|
||||||
self.next_char();
|
self.next_char(source);
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_float {
|
if is_float {
|
||||||
let float = self.source[start_pos..self.position].parse::<f64>()?;
|
let float = source[start_pos..self.position].parse::<f64>()?;
|
||||||
|
|
||||||
Ok((Token::Float(float), (start_pos, self.position)))
|
Ok((Token::Float(float), (start_pos, self.position)))
|
||||||
} else {
|
} else {
|
||||||
let integer = self.source[start_pos..self.position].parse::<i64>()?;
|
let integer = source[start_pos..self.position].parse::<i64>()?;
|
||||||
|
|
||||||
Ok((Token::Integer(integer), (start_pos, self.position)))
|
Ok((Token::Integer(integer), (start_pos, self.position)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lex an identifier token.
|
/// Lex an identifier token.
|
||||||
fn lex_alphabetical(&mut self) -> Result<(Token, Span), LexError> {
|
fn lex_alphabetical<'src>(
|
||||||
|
&mut self,
|
||||||
|
source: &'src str,
|
||||||
|
) -> Result<(Token<'src>, Span), LexError> {
|
||||||
let start_pos = self.position;
|
let start_pos = self.position;
|
||||||
|
|
||||||
while let Some(c) = self.peek_char() {
|
while let Some(c) = self.peek_char(source) {
|
||||||
if c.is_ascii_alphanumeric() || c == '_' {
|
if c.is_ascii_alphanumeric() || c == '_' {
|
||||||
self.next_char();
|
self.next_char(source);
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let string = &self.source[start_pos..self.position];
|
let string = &source[start_pos..self.position];
|
||||||
let token = match string {
|
let token = match string {
|
||||||
"true" => Token::Boolean(true),
|
"true" => Token::Boolean(true),
|
||||||
"false" => Token::Boolean(false),
|
"false" => Token::Boolean(false),
|
||||||
@ -252,31 +242,39 @@ impl<'a> Lexer<'a> {
|
|||||||
"length" => Token::Length,
|
"length" => Token::Length,
|
||||||
"read_line" => Token::ReadLine,
|
"read_line" => Token::ReadLine,
|
||||||
"write_line" => Token::WriteLine,
|
"write_line" => Token::WriteLine,
|
||||||
_ => Token::Identifier(Identifier::new(string)),
|
_ => Token::Identifier(string),
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok((token, (start_pos, self.position)))
|
Ok((token, (start_pos, self.position)))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lex_string(&mut self, delimiter: char) -> Result<(Token, Span), LexError> {
|
fn lex_string<'src>(
|
||||||
|
&mut self,
|
||||||
|
delimiter: char,
|
||||||
|
source: &'src str,
|
||||||
|
) -> Result<(Token<'src>, Span), LexError> {
|
||||||
let start_pos = self.position;
|
let start_pos = self.position;
|
||||||
|
|
||||||
self.next_char();
|
self.next_char(source);
|
||||||
|
|
||||||
while let Some(c) = self.peek_char() {
|
while let Some(c) = self.peek_char(source) {
|
||||||
if c == delimiter {
|
if c == delimiter {
|
||||||
self.next_char();
|
self.next_char(source);
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
self.next_char();
|
self.next_char(source);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let string = &self.source[start_pos + 1..self.position - 1];
|
let text = &source[start_pos + 1..self.position - 1];
|
||||||
Ok((
|
|
||||||
Token::String(string.to_string()),
|
Ok((Token::String(text), (start_pos, self.position)))
|
||||||
(start_pos, self.position),
|
}
|
||||||
))
|
}
|
||||||
|
|
||||||
|
impl Default for Lexer {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::new()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -302,6 +300,37 @@ impl From<ParseIntError> for LexError {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn read_line() {
|
||||||
|
let input = "read_line()";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
lex(input),
|
||||||
|
Ok(vec![
|
||||||
|
(Token::ReadLine, (0, 9)),
|
||||||
|
(Token::LeftParenthesis, (9, 10)),
|
||||||
|
(Token::RightParenthesis, (10, 11)),
|
||||||
|
(Token::Eof, (11, 11)),
|
||||||
|
])
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn write_line() {
|
||||||
|
let input = "write_line('Hello, world!')";
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
lex(input),
|
||||||
|
Ok(vec![
|
||||||
|
(Token::WriteLine, (0, 10)),
|
||||||
|
(Token::LeftParenthesis, (10, 11)),
|
||||||
|
(Token::String("Hello, world!"), (11, 26)),
|
||||||
|
(Token::RightParenthesis, (26, 27)),
|
||||||
|
(Token::Eof, (27, 27)),
|
||||||
|
])
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn string_concatenation() {
|
fn string_concatenation() {
|
||||||
let input = "'Hello, ' + 'world!'";
|
let input = "'Hello, ' + 'world!'";
|
||||||
@ -309,9 +338,9 @@ mod tests {
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
lex(input),
|
lex(input),
|
||||||
Ok(vec![
|
Ok(vec![
|
||||||
(Token::String("Hello, ".to_string()), (0, 9)),
|
(Token::String("Hello, "), (0, 9)),
|
||||||
(Token::Plus, (10, 11)),
|
(Token::Plus, (10, 11)),
|
||||||
(Token::String("world!".to_string()), (12, 20)),
|
(Token::String("world!"), (12, 20)),
|
||||||
(Token::Eof, (20, 20)),
|
(Token::Eof, (20, 20)),
|
||||||
])
|
])
|
||||||
)
|
)
|
||||||
@ -324,7 +353,7 @@ mod tests {
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
lex(input),
|
lex(input),
|
||||||
Ok(vec![
|
Ok(vec![
|
||||||
(Token::String("Hello, world!".to_string()), (0, 15)),
|
(Token::String("Hello, world!"), (0, 15)),
|
||||||
(Token::Eof, (15, 15)),
|
(Token::Eof, (15, 15)),
|
||||||
])
|
])
|
||||||
)
|
)
|
||||||
@ -476,7 +505,7 @@ mod tests {
|
|||||||
assert_eq!(
|
assert_eq!(
|
||||||
lex(input,),
|
lex(input,),
|
||||||
Ok(vec![
|
Ok(vec![
|
||||||
(Token::Identifier(Identifier::new("a")), (0, 1)),
|
(Token::Identifier("a"), (0, 1)),
|
||||||
(Token::Equal, (2, 3)),
|
(Token::Equal, (2, 3)),
|
||||||
(Token::Integer(1), (4, 5)),
|
(Token::Integer(1), (4, 5)),
|
||||||
(Token::Plus, (6, 7)),
|
(Token::Plus, (6, 7)),
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
built_in_function::BuiltInFunction, AbstractSyntaxTree, LexError, Lexer, Node, Span, Statement,
|
built_in_function::BuiltInFunction, token::TokenOwned, AbstractSyntaxTree, Identifier,
|
||||||
Token, Value,
|
LexError, Lexer, Node, Span, Statement, Token, Value,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Parses the input into an abstract syntax tree.
|
/// Parses the input into an abstract syntax tree.
|
||||||
@ -40,8 +40,8 @@ use crate::{
|
|||||||
/// );
|
/// );
|
||||||
/// ```
|
/// ```
|
||||||
pub fn parse(input: &str) -> Result<AbstractSyntaxTree, ParseError> {
|
pub fn parse(input: &str) -> Result<AbstractSyntaxTree, ParseError> {
|
||||||
let lexer = Lexer::new(input);
|
let lexer = Lexer::new();
|
||||||
let mut parser = Parser::new(lexer);
|
let mut parser = Parser::new(input, lexer);
|
||||||
let mut nodes = VecDeque::new();
|
let mut nodes = VecDeque::new();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
@ -64,8 +64,8 @@ pub fn parse(input: &str) -> Result<AbstractSyntaxTree, ParseError> {
|
|||||||
/// # use std::collections::VecDeque;
|
/// # use std::collections::VecDeque;
|
||||||
/// # use dust_lang::*;
|
/// # use dust_lang::*;
|
||||||
/// let input = "x = 42";
|
/// let input = "x = 42";
|
||||||
/// let lexer = Lexer::new(input);
|
/// let lexer = Lexer::new();
|
||||||
/// let mut parser = Parser::new(lexer);
|
/// let mut parser = Parser::new(input, lexer);
|
||||||
/// let mut nodes = VecDeque::new();
|
/// let mut nodes = VecDeque::new();
|
||||||
///
|
///
|
||||||
/// loop {
|
/// loop {
|
||||||
@ -98,16 +98,21 @@ pub fn parse(input: &str) -> Result<AbstractSyntaxTree, ParseError> {
|
|||||||
/// );
|
/// );
|
||||||
/// ```
|
/// ```
|
||||||
pub struct Parser<'src> {
|
pub struct Parser<'src> {
|
||||||
lexer: Lexer<'src>,
|
source: &'src str,
|
||||||
current: (Token, Span),
|
lexer: Lexer,
|
||||||
|
current: (Token<'src>, Span),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'src> Parser<'src> {
|
impl<'src> Parser<'src> {
|
||||||
pub fn new(lexer: Lexer<'src>) -> Self {
|
pub fn new(source: &'src str, lexer: Lexer) -> Self {
|
||||||
let mut lexer = lexer;
|
let mut lexer = lexer;
|
||||||
let current = lexer.next_token().unwrap_or((Token::Eof, (0, 0)));
|
let current = lexer.next_token(source).unwrap_or((Token::Eof, (0, 0)));
|
||||||
|
|
||||||
Parser { lexer, current }
|
Parser {
|
||||||
|
source,
|
||||||
|
lexer,
|
||||||
|
current,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse(&mut self) -> Result<Node, ParseError> {
|
pub fn parse(&mut self) -> Result<Node, ParseError> {
|
||||||
@ -119,7 +124,7 @@ impl<'src> Parser<'src> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn next_token(&mut self) -> Result<(), ParseError> {
|
fn next_token(&mut self) -> Result<(), ParseError> {
|
||||||
self.current = self.lexer.next_token()?;
|
self.current = self.lexer.next_token(self.source)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -182,7 +187,7 @@ impl<'src> Parser<'src> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn parse_primary(&mut self) -> Result<Node, ParseError> {
|
fn parse_primary(&mut self) -> Result<Node, ParseError> {
|
||||||
match self.current.clone() {
|
match self.current {
|
||||||
(Token::Boolean(boolean), span) => {
|
(Token::Boolean(boolean), span) => {
|
||||||
self.next_token()?;
|
self.next_token()?;
|
||||||
|
|
||||||
@ -201,10 +206,13 @@ impl<'src> Parser<'src> {
|
|||||||
|
|
||||||
Ok(Node::new(Statement::Constant(Value::integer(int)), span))
|
Ok(Node::new(Statement::Constant(Value::integer(int)), span))
|
||||||
}
|
}
|
||||||
(Token::Identifier(identifier), span) => {
|
(Token::Identifier(text), span) => {
|
||||||
self.next_token()?;
|
self.next_token()?;
|
||||||
|
|
||||||
Ok(Node::new(Statement::Identifier(identifier), span))
|
Ok(Node::new(
|
||||||
|
Statement::Identifier(Identifier::new(text)),
|
||||||
|
span,
|
||||||
|
))
|
||||||
}
|
}
|
||||||
(Token::String(string), span) => {
|
(Token::String(string), span) => {
|
||||||
self.next_token()?;
|
self.next_token()?;
|
||||||
@ -222,7 +230,7 @@ impl<'src> Parser<'src> {
|
|||||||
Ok(Node::new(node.statement, (left_span.0, right_span.1)))
|
Ok(Node::new(node.statement, (left_span.0, right_span.1)))
|
||||||
} else {
|
} else {
|
||||||
Err(ParseError::ExpectedClosingParenthesis {
|
Err(ParseError::ExpectedClosingParenthesis {
|
||||||
actual: self.current.0.clone(),
|
actual: self.current.0.to_owned(),
|
||||||
span: self.current.1,
|
span: self.current.1,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -252,7 +260,7 @@ impl<'src> Parser<'src> {
|
|||||||
nodes.push(instruction);
|
nodes.push(instruction);
|
||||||
} else {
|
} else {
|
||||||
return Err(ParseError::ExpectedClosingSquareBrace {
|
return Err(ParseError::ExpectedClosingSquareBrace {
|
||||||
actual: self.current.0.clone(),
|
actual: self.current.0.to_owned(),
|
||||||
span: self.current.1,
|
span: self.current.1,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -277,7 +285,7 @@ impl<'src> Parser<'src> {
|
|||||||
self.next_token()?;
|
self.next_token()?;
|
||||||
} else {
|
} else {
|
||||||
return Err(ParseError::ExpectedOpeningParenthesis {
|
return Err(ParseError::ExpectedOpeningParenthesis {
|
||||||
actual: self.current.0.clone(),
|
actual: self.current.0.to_owned(),
|
||||||
span: self.current.1,
|
span: self.current.1,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -303,7 +311,7 @@ impl<'src> Parser<'src> {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
return Err(ParseError::ExpectedClosingParenthesis {
|
return Err(ParseError::ExpectedClosingParenthesis {
|
||||||
actual: self.current.0.clone(),
|
actual: self.current.0.to_owned(),
|
||||||
span: self.current.1,
|
span: self.current.1,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -318,7 +326,7 @@ impl<'src> Parser<'src> {
|
|||||||
left_span,
|
left_span,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
_ => Err(ParseError::UnexpectedToken(self.current.0.clone())),
|
_ => Err(ParseError::UnexpectedToken(self.current.0.to_owned())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -335,11 +343,12 @@ impl<'src> Parser<'src> {
|
|||||||
|
|
||||||
#[derive(Debug, PartialEq, Clone)]
|
#[derive(Debug, PartialEq, Clone)]
|
||||||
pub enum ParseError {
|
pub enum ParseError {
|
||||||
ExpectedClosingParenthesis { actual: Token, span: Span },
|
|
||||||
ExpectedClosingSquareBrace { actual: Token, span: Span },
|
|
||||||
ExpectedOpeningParenthesis { actual: Token, span: Span },
|
|
||||||
LexError(LexError),
|
LexError(LexError),
|
||||||
UnexpectedToken(Token),
|
|
||||||
|
ExpectedClosingParenthesis { actual: TokenOwned, span: Span },
|
||||||
|
ExpectedClosingSquareBrace { actual: TokenOwned, span: Span },
|
||||||
|
ExpectedOpeningParenthesis { actual: TokenOwned, span: Span },
|
||||||
|
UnexpectedToken(TokenOwned),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<LexError> for ParseError {
|
impl From<LexError> for ParseError {
|
||||||
|
@ -2,13 +2,100 @@ use std::fmt::{self, Display, Formatter};
|
|||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::Identifier;
|
/// Source code token.
|
||||||
|
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||||
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
|
pub enum Token<'src> {
|
||||||
pub enum Token {
|
|
||||||
Eof,
|
Eof,
|
||||||
|
|
||||||
Identifier(Identifier),
|
Identifier(&'src str),
|
||||||
|
|
||||||
|
// Hard-coded values
|
||||||
|
Boolean(bool),
|
||||||
|
Float(f64),
|
||||||
|
Integer(i64),
|
||||||
|
String(&'src str),
|
||||||
|
|
||||||
|
// Keywords
|
||||||
|
IsEven,
|
||||||
|
IsOdd,
|
||||||
|
Length,
|
||||||
|
ReadLine,
|
||||||
|
WriteLine,
|
||||||
|
|
||||||
|
// Symbols
|
||||||
|
Comma,
|
||||||
|
Dot,
|
||||||
|
Equal,
|
||||||
|
LeftParenthesis,
|
||||||
|
LeftSquareBrace,
|
||||||
|
Plus,
|
||||||
|
RightParenthesis,
|
||||||
|
RightSquareBrace,
|
||||||
|
Star,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src> Token<'src> {
|
||||||
|
pub fn to_owned(&self) -> TokenOwned {
|
||||||
|
match self {
|
||||||
|
Token::Eof => TokenOwned::Eof,
|
||||||
|
Token::Identifier(text) => TokenOwned::Identifier(text.to_string()),
|
||||||
|
Token::Boolean(boolean) => TokenOwned::Boolean(*boolean),
|
||||||
|
Token::Float(float) => TokenOwned::Float(*float),
|
||||||
|
Token::Integer(integer) => TokenOwned::Integer(*integer),
|
||||||
|
Token::String(text) => TokenOwned::String(text.to_string()),
|
||||||
|
Token::IsEven => TokenOwned::IsEven,
|
||||||
|
Token::IsOdd => TokenOwned::IsOdd,
|
||||||
|
Token::Length => TokenOwned::Length,
|
||||||
|
Token::ReadLine => TokenOwned::ReadLine,
|
||||||
|
Token::WriteLine => TokenOwned::WriteLine,
|
||||||
|
Token::Comma => TokenOwned::Comma,
|
||||||
|
Token::Dot => TokenOwned::Dot,
|
||||||
|
Token::Equal => TokenOwned::Equal,
|
||||||
|
Token::Plus => TokenOwned::Plus,
|
||||||
|
Token::Star => TokenOwned::Star,
|
||||||
|
Token::LeftParenthesis => TokenOwned::LeftParenthesis,
|
||||||
|
Token::RightParenthesis => TokenOwned::RightParenthesis,
|
||||||
|
Token::LeftSquareBrace => TokenOwned::LeftSquareBrace,
|
||||||
|
Token::RightSquareBrace => TokenOwned::RightSquareBrace,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'src> Display for Token<'src> {
|
||||||
|
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
Token::Eof => write!(f, "EOF"),
|
||||||
|
Token::Identifier(text) => write!(f, "{text}"),
|
||||||
|
Token::Boolean(boolean) => write!(f, "{boolean}"),
|
||||||
|
Token::Float(float) => write!(f, "{float}"),
|
||||||
|
Token::Integer(integer) => write!(f, "{integer}"),
|
||||||
|
Token::String(string) => write!(f, "{string}"),
|
||||||
|
Token::IsEven => write!(f, "is_even"),
|
||||||
|
Token::IsOdd => write!(f, "is_odd"),
|
||||||
|
Token::Length => write!(f, "length"),
|
||||||
|
Token::ReadLine => write!(f, "read_line"),
|
||||||
|
Token::WriteLine => write!(f, "write_line"),
|
||||||
|
Token::Comma => write!(f, ","),
|
||||||
|
Token::Dot => write!(f, "."),
|
||||||
|
Token::Equal => write!(f, "="),
|
||||||
|
Token::Plus => write!(f, "+"),
|
||||||
|
Token::Star => write!(f, "*"),
|
||||||
|
Token::LeftParenthesis => write!(f, "("),
|
||||||
|
Token::RightParenthesis => write!(f, ")"),
|
||||||
|
Token::LeftSquareBrace => write!(f, "["),
|
||||||
|
Token::RightSquareBrace => write!(f, "]"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Owned version of `Token`, which owns all the strings.
|
||||||
|
///
|
||||||
|
/// This is used for errors.
|
||||||
|
#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)]
|
||||||
|
pub enum TokenOwned {
|
||||||
|
Eof,
|
||||||
|
|
||||||
|
Identifier(String),
|
||||||
|
|
||||||
// Hard-coded values
|
// Hard-coded values
|
||||||
Boolean(bool),
|
Boolean(bool),
|
||||||
@ -34,30 +121,3 @@ pub enum Token {
|
|||||||
RightSquareBrace,
|
RightSquareBrace,
|
||||||
Star,
|
Star,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for Token {
|
|
||||||
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
|
|
||||||
match self {
|
|
||||||
Token::Eof => write!(f, "EOF"),
|
|
||||||
Token::Identifier(identifier) => write!(f, "{identifier}"),
|
|
||||||
Token::Boolean(boolean) => write!(f, "{boolean}"),
|
|
||||||
Token::Float(float) => write!(f, "{float}"),
|
|
||||||
Token::Integer(integer) => write!(f, "{integer}"),
|
|
||||||
Token::String(string) => write!(f, "{string}"),
|
|
||||||
Token::IsEven => write!(f, "is_even"),
|
|
||||||
Token::IsOdd => write!(f, "is_odd"),
|
|
||||||
Token::Length => write!(f, "length"),
|
|
||||||
Token::ReadLine => write!(f, "read_line"),
|
|
||||||
Token::WriteLine => write!(f, "write_line"),
|
|
||||||
Token::Comma => write!(f, ","),
|
|
||||||
Token::Dot => write!(f, "."),
|
|
||||||
Token::Equal => write!(f, "="),
|
|
||||||
Token::Plus => write!(f, "+"),
|
|
||||||
Token::Star => write!(f, "*"),
|
|
||||||
Token::LeftParenthesis => write!(f, "("),
|
|
||||||
Token::RightParenthesis => write!(f, ")"),
|
|
||||||
Token::LeftSquareBrace => write!(f, "["),
|
|
||||||
Token::RightSquareBrace => write!(f, "]"),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user