From 158457c18805bec4a0df223691bea47032c13336 Mon Sep 17 00:00:00 2001 From: Akshay Date: Sat, 20 Mar 2021 21:34:59 +0530 Subject: begin work on parser --- src/lisp/error.rs | 2 + src/lisp/lex.rs | 14 +-- src/lisp/parse.rs | 253 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 262 insertions(+), 7 deletions(-) create mode 100644 src/lisp/parse.rs diff --git a/src/lisp/error.rs b/src/lisp/error.rs index cde1e31..b789f7f 100644 --- a/src/lisp/error.rs +++ b/src/lisp/error.rs @@ -1,3 +1,5 @@ +use crate::lisp::lex::Span; + #[derive(Debug, PartialEq, Copy, Clone)] pub enum LispError { ParseError, diff --git a/src/lisp/lex.rs b/src/lisp/lex.rs index a1bea5f..3b1389d 100644 --- a/src/lisp/lex.rs +++ b/src/lisp/lex.rs @@ -6,13 +6,12 @@ use super::error::LispError; pub enum Token<'a> { LeftParen, RightParen, - Comment(&'a str), Float(&'a str), Integer(&'a str), - Char(&'a str), + // Char(&'a str), String(&'a str), Name(&'a str), - Keyword(&'a str), + // Keyword(&'a str), BackQuote, Comma, CommaAt, @@ -25,13 +24,12 @@ impl<'a> fmt::Display for Token<'a> { match self { Token::LeftParen => write!(f, "("), Token::RightParen => write!(f, ")"), - Token::Comment(_) => write!(f, "comment"), Token::Float(_) => write!(f, "float"), Token::Integer(_) => write!(f, "integer"), - Token::Char(_) => write!(f, "char"), + // Token::Char(_) => write!(f, "char"), Token::String(_) => write!(f, "string"), Token::Name(_) => write!(f, "name"), - Token::Keyword(_) => write!(f, "keyword"), + // Token::Keyword(_) => write!(f, "keyword"), Token::BackQuote => write!(f, "`"), Token::Comma => write!(f, ","), Token::CommaAt => write!(f, ",@"), @@ -48,7 +46,7 @@ pub struct Span { } impl Span { - fn empty(pos: u32) -> Self { + pub fn empty(pos: u32) -> Self { Self { low: pos, high: pos, @@ -299,5 +297,7 @@ mod tests { assert_eq!(tokens("; foo"), []); assert_eq!(tokens("1; foo"), [(sp(0, 1), Token::Integer("1"))]); + assert_eq!(tokens(">="), [(sp(0, 2), Token::Name(">="))]); + assert_eq!(tokens("&&"), [(sp(0, 2), Token::Name("&&"))]); } } diff --git a/src/lisp/parse.rs b/src/lisp/parse.rs new file mode 100644 index 0000000..13a9eff --- /dev/null +++ b/src/lisp/parse.rs @@ -0,0 +1,253 @@ +use crate::lisp::{ + error::LispError, + lex::{Lexer, Span, Token}, + number::LispNumber, + LispExpr, +}; + +pub struct Parser<'lex> { + lexer: Lexer<'lex>, + cur_token: Option<(Span, Token<'lex>)>, +} + +// pub struct ParseError { +// pub span: Span, +// pub kind: ParseErrorKind, +// } +// +// pub enum ParseErrorKind { +// InvalidLiteral, +// InvalidToken, +// LiteralParseError, +// MissingCloseParen, +// UnbalancedComma, +// UnexpectedEof, +// UnexpectedToken { +// expected: &'static str, +// found: &'static str, +// }, +// UnmatchedParen, +// UnterminatedString, +// } + +enum Group { + Backticks(i32), + CommaAt, + Quotes(u32), + Parens(Vec), +} + +impl<'lex> Parser<'lex> { + pub fn new(lexer: Lexer<'lex>) -> Self { + Self { + lexer, + cur_token: None, + } + } + + pub fn parse_expr(&mut self) -> Result { + let mut stack = Vec::new(); + let mut total_backticks = 0; + loop { + let (span, token) = self.next()?; + let r: Result = match token { + Token::LeftParen => { + stack.push(Group::Parens(Vec::new())); + continue; + } + Token::RightParen => { + let group = stack.pop().ok_or_else( + || LispError::ParseError, // unmatched paren here + )?; + match group { + Group::Parens(v) => Ok(LispExpr::List(v)), + _ => Err(LispError::ParseError), + } + } + Token::Float(f) => f + .parse::() + .map(|n| LispExpr::Number(LispNumber::Float(n))) + .map_err(|_| LispError::ParseError), + Token::Integer(i) => i + .parse::() + .map(|n| LispExpr::Number(LispNumber::Integer(n))) + .map_err(|_| LispError::ParseError), + Token::String(s) => Ok(LispExpr::StringLit(s.into())), + Token::Name(n) => Ok(name_expr(n)), + Token::BackQuote => { + total_backticks += 1; + if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() { + *n += 1; + continue; + } + stack.push(Group::Backticks(1)); + continue; + } + Token::Comma => { + if total_backticks <= 0 { + return Err(LispError::ParseError); // unbalanced comma + } + total_backticks -= 1; + if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() { + *n -= 1; + continue; + } + stack.push(Group::Backticks(-1)); + continue; + } + Token::CommaAt => { + if total_backticks <= 0 { + return Err(LispError::ParseError); // unbalanced comma + } + total_backticks -= 1; + stack.push(Group::CommaAt); + continue; + } + Token::Quote => { + if let Some(&mut Group::Quotes(ref mut n)) = stack.last_mut() { + *n += 1; + continue; + } + stack.push(Group::Quotes(1)); + continue; + } + Token::End => { + let any_paren = stack.iter().any(|group| match *group { + Group::Parens(_) => true, + _ => false, + }); + + if any_paren { + Err(LispError::ParseError) // unbalanced paren + } else { + Err(LispError::ParseError) // unexpected eof + } + } + }; + let mut v = r?; + loop { + match stack.last_mut() { + None => return Ok(v), + Some(&mut Group::Parens(ref mut values)) => { + values.push(v); + break; + } + _ => (), + } + + let group = stack.pop().unwrap(); + + match group { + Group::Backticks(n) if n > 0 => { + total_backticks -= n; + v = v.quasiquote(n as u32); + } + Group::Backticks(n) if n < 0 => { + total_backticks -= n; + v = v.comma((-n) as u32); + } + Group::CommaAt => { + total_backticks += 1; + v = v.comma_at(1); + } + Group::Quotes(n) => v = v.quote(n), + _ => (), + } + } + } + } + + fn next(&mut self) -> Result<(Span, Token<'lex>), LispError> { + let r = self.peek()?; + self.cur_token = None; + Ok(r) + } + + fn peek(&mut self) -> Result<(Span, Token<'lex>), LispError> { + if let Some(tok) = self.cur_token { + Ok(tok) + } else { + let tok = self.lexer.next_token()?; + self.cur_token = Some(tok); + Ok(tok) + } + } + + pub fn parse_single_expr(&mut self) -> Result { + let expr = self.parse_expr()?; + + match self.next()? { + (_, Token::End) => Ok(expr), + _ => Err(LispError::ParseError), // too many tokens + } + } + + pub fn parse_exprs(&mut self) -> Result, LispError> { + let mut res = Vec::new(); + loop { + match self.peek()? { + (_, Token::End) => break, + _ => res.push(self.parse_expr()?), + } + } + Ok(res) + } +} + +fn name_expr(input: &str) -> LispExpr { + match input { + "#t" => LispExpr::BoolLit(true), + "#f" => LispExpr::BoolLit(false), + _ => LispExpr::Ident(input.into()), + } +} + +#[cfg(test)] +mod tests { + use super::*; + fn parse(input: &str) -> Result { + let mut parser = Parser::new(Lexer::new(input, 0)); + + parser.parse_single_expr() + } + + #[test] + fn parse_lisp_expr() { + assert_eq!( + parse("1.5").unwrap(), + LispExpr::Number(LispNumber::Float(1.5)) + ); + + assert_eq!( + parse(r#""hello""#).unwrap(), + LispExpr::StringLit(r#""hello""#.into()) + ); + + assert_eq!(parse("foo").unwrap(), LispExpr::Ident("foo".into())); + + let items = (1..=5) + .map(LispNumber::Integer) + .map(LispExpr::Number) + .collect::>(); + assert_eq!(parse("(1 2 3 4 5)").unwrap(), LispExpr::List(items)); + + let foo = LispExpr::Ident("foo".into()); + let bar = LispExpr::Comma(Box::new(LispExpr::Ident("bar".into())), 1); + assert_eq!( + parse("`(foo ,bar)").unwrap(), + LispExpr::Quasiquote(Box::new(LispExpr::List(vec![foo, bar])), 1) + ) + } + + #[should_panic] + #[test] + fn unbalanced_comma() { + parse("`(foo ,,bar)").unwrap(); + } + + #[should_panic] + #[test] + fn unbalanced_paren() { + parse("(foo").unwrap(); + } +} -- cgit v1.2.3