aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkshay <[email protected]>2021-03-20 16:04:59 +0000
committerAkshay <[email protected]>2021-03-20 16:04:59 +0000
commit158457c18805bec4a0df223691bea47032c13336 (patch)
treee33a8b29fa1af4238fe10657ca9bdfc24990b4d3
parent07432dabd3fbc7fcaaec01d50f4036868dca88dd (diff)
begin work on parser
-rw-r--r--src/lisp/error.rs2
-rw-r--r--src/lisp/lex.rs14
-rw-r--r--src/lisp/parse.rs253
3 files changed, 262 insertions, 7 deletions
diff --git a/src/lisp/error.rs b/src/lisp/error.rs
index cde1e31..b789f7f 100644
--- a/src/lisp/error.rs
+++ b/src/lisp/error.rs
@@ -1,3 +1,5 @@
1use crate::lisp::lex::Span;
2
1#[derive(Debug, PartialEq, Copy, Clone)] 3#[derive(Debug, PartialEq, Copy, Clone)]
2pub enum LispError { 4pub enum LispError {
3 ParseError, 5 ParseError,
diff --git a/src/lisp/lex.rs b/src/lisp/lex.rs
index a1bea5f..3b1389d 100644
--- a/src/lisp/lex.rs
+++ b/src/lisp/lex.rs
@@ -6,13 +6,12 @@ use super::error::LispError;
6pub enum Token<'a> { 6pub enum Token<'a> {
7 LeftParen, 7 LeftParen,
8 RightParen, 8 RightParen,
9 Comment(&'a str),
10 Float(&'a str), 9 Float(&'a str),
11 Integer(&'a str), 10 Integer(&'a str),
12 Char(&'a str), 11 // Char(&'a str),
13 String(&'a str), 12 String(&'a str),
14 Name(&'a str), 13 Name(&'a str),
15 Keyword(&'a str), 14 // Keyword(&'a str),
16 BackQuote, 15 BackQuote,
17 Comma, 16 Comma,
18 CommaAt, 17 CommaAt,
@@ -25,13 +24,12 @@ impl<'a> fmt::Display for Token<'a> {
25 match self { 24 match self {
26 Token::LeftParen => write!(f, "("), 25 Token::LeftParen => write!(f, "("),
27 Token::RightParen => write!(f, ")"), 26 Token::RightParen => write!(f, ")"),
28 Token::Comment(_) => write!(f, "comment"),
29 Token::Float(_) => write!(f, "float"), 27 Token::Float(_) => write!(f, "float"),
30 Token::Integer(_) => write!(f, "integer"), 28 Token::Integer(_) => write!(f, "integer"),
31 Token::Char(_) => write!(f, "char"), 29 // Token::Char(_) => write!(f, "char"),
32 Token::String(_) => write!(f, "string"), 30 Token::String(_) => write!(f, "string"),
33 Token::Name(_) => write!(f, "name"), 31 Token::Name(_) => write!(f, "name"),
34 Token::Keyword(_) => write!(f, "keyword"), 32 // Token::Keyword(_) => write!(f, "keyword"),
35 Token::BackQuote => write!(f, "`"), 33 Token::BackQuote => write!(f, "`"),
36 Token::Comma => write!(f, ","), 34 Token::Comma => write!(f, ","),
37 Token::CommaAt => write!(f, ",@"), 35 Token::CommaAt => write!(f, ",@"),
@@ -48,7 +46,7 @@ pub struct Span {
48} 46}
49 47
50impl Span { 48impl Span {
51 fn empty(pos: u32) -> Self { 49 pub fn empty(pos: u32) -> Self {
52 Self { 50 Self {
53 low: pos, 51 low: pos,
54 high: pos, 52 high: pos,
@@ -299,5 +297,7 @@ mod tests {
299 297
300 assert_eq!(tokens("; foo"), []); 298 assert_eq!(tokens("; foo"), []);
301 assert_eq!(tokens("1; foo"), [(sp(0, 1), Token::Integer("1"))]); 299 assert_eq!(tokens("1; foo"), [(sp(0, 1), Token::Integer("1"))]);
300 assert_eq!(tokens(">="), [(sp(0, 2), Token::Name(">="))]);
301 assert_eq!(tokens("&&"), [(sp(0, 2), Token::Name("&&"))]);
302 } 302 }
303} 303}
diff --git a/src/lisp/parse.rs b/src/lisp/parse.rs
new file mode 100644
index 0000000..13a9eff
--- /dev/null
+++ b/src/lisp/parse.rs
@@ -0,0 +1,253 @@
1use crate::lisp::{
2 error::LispError,
3 lex::{Lexer, Span, Token},
4 number::LispNumber,
5 LispExpr,
6};
7
8pub struct Parser<'lex> {
9 lexer: Lexer<'lex>,
10 cur_token: Option<(Span, Token<'lex>)>,
11}
12
13// pub struct ParseError {
14// pub span: Span,
15// pub kind: ParseErrorKind,
16// }
17//
18// pub enum ParseErrorKind {
19// InvalidLiteral,
20// InvalidToken,
21// LiteralParseError,
22// MissingCloseParen,
23// UnbalancedComma,
24// UnexpectedEof,
25// UnexpectedToken {
26// expected: &'static str,
27// found: &'static str,
28// },
29// UnmatchedParen,
30// UnterminatedString,
31// }
32
33enum Group {
34 Backticks(i32),
35 CommaAt,
36 Quotes(u32),
37 Parens(Vec<LispExpr>),
38}
39
40impl<'lex> Parser<'lex> {
41 pub fn new(lexer: Lexer<'lex>) -> Self {
42 Self {
43 lexer,
44 cur_token: None,
45 }
46 }
47
48 pub fn parse_expr(&mut self) -> Result<LispExpr, LispError> {
49 let mut stack = Vec::new();
50 let mut total_backticks = 0;
51 loop {
52 let (span, token) = self.next()?;
53 let r: Result<LispExpr, LispError> = match token {
54 Token::LeftParen => {
55 stack.push(Group::Parens(Vec::new()));
56 continue;
57 }
58 Token::RightParen => {
59 let group = stack.pop().ok_or_else(
60 || LispError::ParseError, // unmatched paren here
61 )?;
62 match group {
63 Group::Parens(v) => Ok(LispExpr::List(v)),
64 _ => Err(LispError::ParseError),
65 }
66 }
67 Token::Float(f) => f
68 .parse::<f64>()
69 .map(|n| LispExpr::Number(LispNumber::Float(n)))
70 .map_err(|_| LispError::ParseError),
71 Token::Integer(i) => i
72 .parse::<i64>()
73 .map(|n| LispExpr::Number(LispNumber::Integer(n)))
74 .map_err(|_| LispError::ParseError),
75 Token::String(s) => Ok(LispExpr::StringLit(s.into())),
76 Token::Name(n) => Ok(name_expr(n)),
77 Token::BackQuote => {
78 total_backticks += 1;
79 if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() {
80 *n += 1;
81 continue;
82 }
83 stack.push(Group::Backticks(1));
84 continue;
85 }
86 Token::Comma => {
87 if total_backticks <= 0 {
88 return Err(LispError::ParseError); // unbalanced comma
89 }
90 total_backticks -= 1;
91 if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() {
92 *n -= 1;
93 continue;
94 }
95 stack.push(Group::Backticks(-1));
96 continue;
97 }
98 Token::CommaAt => {
99 if total_backticks <= 0 {
100 return Err(LispError::ParseError); // unbalanced comma
101 }
102 total_backticks -= 1;
103 stack.push(Group::CommaAt);
104 continue;
105 }
106 Token::Quote => {
107 if let Some(&mut Group::Quotes(ref mut n)) = stack.last_mut() {
108 *n += 1;
109 continue;
110 }
111 stack.push(Group::Quotes(1));
112 continue;
113 }
114 Token::End => {
115 let any_paren = stack.iter().any(|group| match *group {
116 Group::Parens(_) => true,
117 _ => false,
118 });
119
120 if any_paren {
121 Err(LispError::ParseError) // unbalanced paren
122 } else {
123 Err(LispError::ParseError) // unexpected eof
124 }
125 }
126 };
127 let mut v = r?;
128 loop {
129 match stack.last_mut() {
130 None => return Ok(v),
131 Some(&mut Group::Parens(ref mut values)) => {
132 values.push(v);
133 break;
134 }
135 _ => (),
136 }
137
138 let group = stack.pop().unwrap();
139
140 match group {
141 Group::Backticks(n) if n > 0 => {
142 total_backticks -= n;
143 v = v.quasiquote(n as u32);
144 }
145 Group::Backticks(n) if n < 0 => {
146 total_backticks -= n;
147 v = v.comma((-n) as u32);
148 }
149 Group::CommaAt => {
150 total_backticks += 1;
151 v = v.comma_at(1);
152 }
153 Group::Quotes(n) => v = v.quote(n),
154 _ => (),
155 }
156 }
157 }
158 }
159
160 fn next(&mut self) -> Result<(Span, Token<'lex>), LispError> {
161 let r = self.peek()?;
162 self.cur_token = None;
163 Ok(r)
164 }
165
166 fn peek(&mut self) -> Result<(Span, Token<'lex>), LispError> {
167 if let Some(tok) = self.cur_token {
168 Ok(tok)
169 } else {
170 let tok = self.lexer.next_token()?;
171 self.cur_token = Some(tok);
172 Ok(tok)
173 }
174 }
175
176 pub fn parse_single_expr(&mut self) -> Result<LispExpr, LispError> {
177 let expr = self.parse_expr()?;
178
179 match self.next()? {
180 (_, Token::End) => Ok(expr),
181 _ => Err(LispError::ParseError), // too many tokens
182 }
183 }
184
185 pub fn parse_exprs(&mut self) -> Result<Vec<LispExpr>, LispError> {
186 let mut res = Vec::new();
187 loop {
188 match self.peek()? {
189 (_, Token::End) => break,
190 _ => res.push(self.parse_expr()?),
191 }
192 }
193 Ok(res)
194 }
195}
196
197fn name_expr(input: &str) -> LispExpr {
198 match input {
199 "#t" => LispExpr::BoolLit(true),
200 "#f" => LispExpr::BoolLit(false),
201 _ => LispExpr::Ident(input.into()),
202 }
203}
204
205#[cfg(test)]
206mod tests {
207 use super::*;
208 fn parse(input: &str) -> Result<LispExpr, LispError> {
209 let mut parser = Parser::new(Lexer::new(input, 0));
210
211 parser.parse_single_expr()
212 }
213
214 #[test]
215 fn parse_lisp_expr() {
216 assert_eq!(
217 parse("1.5").unwrap(),
218 LispExpr::Number(LispNumber::Float(1.5))
219 );
220
221 assert_eq!(
222 parse(r#""hello""#).unwrap(),
223 LispExpr::StringLit(r#""hello""#.into())
224 );
225
226 assert_eq!(parse("foo").unwrap(), LispExpr::Ident("foo".into()));
227
228 let items = (1..=5)
229 .map(LispNumber::Integer)
230 .map(LispExpr::Number)
231 .collect::<Vec<_>>();
232 assert_eq!(parse("(1 2 3 4 5)").unwrap(), LispExpr::List(items));
233
234 let foo = LispExpr::Ident("foo".into());
235 let bar = LispExpr::Comma(Box::new(LispExpr::Ident("bar".into())), 1);
236 assert_eq!(
237 parse("`(foo ,bar)").unwrap(),
238 LispExpr::Quasiquote(Box::new(LispExpr::List(vec![foo, bar])), 1)
239 )
240 }
241
242 #[should_panic]
243 #[test]
244 fn unbalanced_comma() {
245 parse("`(foo ,,bar)").unwrap();
246 }
247
248 #[should_panic]
249 #[test]
250 fn unbalanced_paren() {
251 parse("(foo").unwrap();
252 }
253}