diff options
-rw-r--r-- | src/lisp/error.rs | 2 | ||||
-rw-r--r-- | src/lisp/lex.rs | 14 | ||||
-rw-r--r-- | src/lisp/parse.rs | 253 |
3 files changed, 262 insertions, 7 deletions
diff --git a/src/lisp/error.rs b/src/lisp/error.rs index cde1e31..b789f7f 100644 --- a/src/lisp/error.rs +++ b/src/lisp/error.rs | |||
@@ -1,3 +1,5 @@ | |||
1 | use crate::lisp::lex::Span; | ||
2 | |||
1 | #[derive(Debug, PartialEq, Copy, Clone)] | 3 | #[derive(Debug, PartialEq, Copy, Clone)] |
2 | pub enum LispError { | 4 | pub enum LispError { |
3 | ParseError, | 5 | ParseError, |
diff --git a/src/lisp/lex.rs b/src/lisp/lex.rs index a1bea5f..3b1389d 100644 --- a/src/lisp/lex.rs +++ b/src/lisp/lex.rs | |||
@@ -6,13 +6,12 @@ use super::error::LispError; | |||
6 | pub enum Token<'a> { | 6 | pub enum Token<'a> { |
7 | LeftParen, | 7 | LeftParen, |
8 | RightParen, | 8 | RightParen, |
9 | Comment(&'a str), | ||
10 | Float(&'a str), | 9 | Float(&'a str), |
11 | Integer(&'a str), | 10 | Integer(&'a str), |
12 | Char(&'a str), | 11 | // Char(&'a str), |
13 | String(&'a str), | 12 | String(&'a str), |
14 | Name(&'a str), | 13 | Name(&'a str), |
15 | Keyword(&'a str), | 14 | // Keyword(&'a str), |
16 | BackQuote, | 15 | BackQuote, |
17 | Comma, | 16 | Comma, |
18 | CommaAt, | 17 | CommaAt, |
@@ -25,13 +24,12 @@ impl<'a> fmt::Display for Token<'a> { | |||
25 | match self { | 24 | match self { |
26 | Token::LeftParen => write!(f, "("), | 25 | Token::LeftParen => write!(f, "("), |
27 | Token::RightParen => write!(f, ")"), | 26 | Token::RightParen => write!(f, ")"), |
28 | Token::Comment(_) => write!(f, "comment"), | ||
29 | Token::Float(_) => write!(f, "float"), | 27 | Token::Float(_) => write!(f, "float"), |
30 | Token::Integer(_) => write!(f, "integer"), | 28 | Token::Integer(_) => write!(f, "integer"), |
31 | Token::Char(_) => write!(f, "char"), | 29 | // Token::Char(_) => write!(f, "char"), |
32 | Token::String(_) => write!(f, "string"), | 30 | Token::String(_) => write!(f, "string"), |
33 | Token::Name(_) => write!(f, "name"), | 31 | Token::Name(_) => write!(f, "name"), |
34 | Token::Keyword(_) => write!(f, "keyword"), | 32 | // Token::Keyword(_) => write!(f, "keyword"), |
35 | Token::BackQuote => write!(f, "`"), | 33 | Token::BackQuote => write!(f, "`"), |
36 | Token::Comma => write!(f, ","), | 34 | Token::Comma => write!(f, ","), |
37 | Token::CommaAt => write!(f, ",@"), | 35 | Token::CommaAt => write!(f, ",@"), |
@@ -48,7 +46,7 @@ pub struct Span { | |||
48 | } | 46 | } |
49 | 47 | ||
50 | impl Span { | 48 | impl Span { |
51 | fn empty(pos: u32) -> Self { | 49 | pub fn empty(pos: u32) -> Self { |
52 | Self { | 50 | Self { |
53 | low: pos, | 51 | low: pos, |
54 | high: pos, | 52 | high: pos, |
@@ -299,5 +297,7 @@ mod tests { | |||
299 | 297 | ||
300 | assert_eq!(tokens("; foo"), []); | 298 | assert_eq!(tokens("; foo"), []); |
301 | assert_eq!(tokens("1; foo"), [(sp(0, 1), Token::Integer("1"))]); | 299 | assert_eq!(tokens("1; foo"), [(sp(0, 1), Token::Integer("1"))]); |
300 | assert_eq!(tokens(">="), [(sp(0, 2), Token::Name(">="))]); | ||
301 | assert_eq!(tokens("&&"), [(sp(0, 2), Token::Name("&&"))]); | ||
302 | } | 302 | } |
303 | } | 303 | } |
diff --git a/src/lisp/parse.rs b/src/lisp/parse.rs new file mode 100644 index 0000000..13a9eff --- /dev/null +++ b/src/lisp/parse.rs | |||
@@ -0,0 +1,253 @@ | |||
1 | use crate::lisp::{ | ||
2 | error::LispError, | ||
3 | lex::{Lexer, Span, Token}, | ||
4 | number::LispNumber, | ||
5 | LispExpr, | ||
6 | }; | ||
7 | |||
8 | pub struct Parser<'lex> { | ||
9 | lexer: Lexer<'lex>, | ||
10 | cur_token: Option<(Span, Token<'lex>)>, | ||
11 | } | ||
12 | |||
13 | // pub struct ParseError { | ||
14 | // pub span: Span, | ||
15 | // pub kind: ParseErrorKind, | ||
16 | // } | ||
17 | // | ||
18 | // pub enum ParseErrorKind { | ||
19 | // InvalidLiteral, | ||
20 | // InvalidToken, | ||
21 | // LiteralParseError, | ||
22 | // MissingCloseParen, | ||
23 | // UnbalancedComma, | ||
24 | // UnexpectedEof, | ||
25 | // UnexpectedToken { | ||
26 | // expected: &'static str, | ||
27 | // found: &'static str, | ||
28 | // }, | ||
29 | // UnmatchedParen, | ||
30 | // UnterminatedString, | ||
31 | // } | ||
32 | |||
33 | enum Group { | ||
34 | Backticks(i32), | ||
35 | CommaAt, | ||
36 | Quotes(u32), | ||
37 | Parens(Vec<LispExpr>), | ||
38 | } | ||
39 | |||
40 | impl<'lex> Parser<'lex> { | ||
41 | pub fn new(lexer: Lexer<'lex>) -> Self { | ||
42 | Self { | ||
43 | lexer, | ||
44 | cur_token: None, | ||
45 | } | ||
46 | } | ||
47 | |||
48 | pub fn parse_expr(&mut self) -> Result<LispExpr, LispError> { | ||
49 | let mut stack = Vec::new(); | ||
50 | let mut total_backticks = 0; | ||
51 | loop { | ||
52 | let (span, token) = self.next()?; | ||
53 | let r: Result<LispExpr, LispError> = match token { | ||
54 | Token::LeftParen => { | ||
55 | stack.push(Group::Parens(Vec::new())); | ||
56 | continue; | ||
57 | } | ||
58 | Token::RightParen => { | ||
59 | let group = stack.pop().ok_or_else( | ||
60 | || LispError::ParseError, // unmatched paren here | ||
61 | )?; | ||
62 | match group { | ||
63 | Group::Parens(v) => Ok(LispExpr::List(v)), | ||
64 | _ => Err(LispError::ParseError), | ||
65 | } | ||
66 | } | ||
67 | Token::Float(f) => f | ||
68 | .parse::<f64>() | ||
69 | .map(|n| LispExpr::Number(LispNumber::Float(n))) | ||
70 | .map_err(|_| LispError::ParseError), | ||
71 | Token::Integer(i) => i | ||
72 | .parse::<i64>() | ||
73 | .map(|n| LispExpr::Number(LispNumber::Integer(n))) | ||
74 | .map_err(|_| LispError::ParseError), | ||
75 | Token::String(s) => Ok(LispExpr::StringLit(s.into())), | ||
76 | Token::Name(n) => Ok(name_expr(n)), | ||
77 | Token::BackQuote => { | ||
78 | total_backticks += 1; | ||
79 | if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() { | ||
80 | *n += 1; | ||
81 | continue; | ||
82 | } | ||
83 | stack.push(Group::Backticks(1)); | ||
84 | continue; | ||
85 | } | ||
86 | Token::Comma => { | ||
87 | if total_backticks <= 0 { | ||
88 | return Err(LispError::ParseError); // unbalanced comma | ||
89 | } | ||
90 | total_backticks -= 1; | ||
91 | if let Some(&mut Group::Backticks(ref mut n)) = stack.last_mut() { | ||
92 | *n -= 1; | ||
93 | continue; | ||
94 | } | ||
95 | stack.push(Group::Backticks(-1)); | ||
96 | continue; | ||
97 | } | ||
98 | Token::CommaAt => { | ||
99 | if total_backticks <= 0 { | ||
100 | return Err(LispError::ParseError); // unbalanced comma | ||
101 | } | ||
102 | total_backticks -= 1; | ||
103 | stack.push(Group::CommaAt); | ||
104 | continue; | ||
105 | } | ||
106 | Token::Quote => { | ||
107 | if let Some(&mut Group::Quotes(ref mut n)) = stack.last_mut() { | ||
108 | *n += 1; | ||
109 | continue; | ||
110 | } | ||
111 | stack.push(Group::Quotes(1)); | ||
112 | continue; | ||
113 | } | ||
114 | Token::End => { | ||
115 | let any_paren = stack.iter().any(|group| match *group { | ||
116 | Group::Parens(_) => true, | ||
117 | _ => false, | ||
118 | }); | ||
119 | |||
120 | if any_paren { | ||
121 | Err(LispError::ParseError) // unbalanced paren | ||
122 | } else { | ||
123 | Err(LispError::ParseError) // unexpected eof | ||
124 | } | ||
125 | } | ||
126 | }; | ||
127 | let mut v = r?; | ||
128 | loop { | ||
129 | match stack.last_mut() { | ||
130 | None => return Ok(v), | ||
131 | Some(&mut Group::Parens(ref mut values)) => { | ||
132 | values.push(v); | ||
133 | break; | ||
134 | } | ||
135 | _ => (), | ||
136 | } | ||
137 | |||
138 | let group = stack.pop().unwrap(); | ||
139 | |||
140 | match group { | ||
141 | Group::Backticks(n) if n > 0 => { | ||
142 | total_backticks -= n; | ||
143 | v = v.quasiquote(n as u32); | ||
144 | } | ||
145 | Group::Backticks(n) if n < 0 => { | ||
146 | total_backticks -= n; | ||
147 | v = v.comma((-n) as u32); | ||
148 | } | ||
149 | Group::CommaAt => { | ||
150 | total_backticks += 1; | ||
151 | v = v.comma_at(1); | ||
152 | } | ||
153 | Group::Quotes(n) => v = v.quote(n), | ||
154 | _ => (), | ||
155 | } | ||
156 | } | ||
157 | } | ||
158 | } | ||
159 | |||
160 | fn next(&mut self) -> Result<(Span, Token<'lex>), LispError> { | ||
161 | let r = self.peek()?; | ||
162 | self.cur_token = None; | ||
163 | Ok(r) | ||
164 | } | ||
165 | |||
166 | fn peek(&mut self) -> Result<(Span, Token<'lex>), LispError> { | ||
167 | if let Some(tok) = self.cur_token { | ||
168 | Ok(tok) | ||
169 | } else { | ||
170 | let tok = self.lexer.next_token()?; | ||
171 | self.cur_token = Some(tok); | ||
172 | Ok(tok) | ||
173 | } | ||
174 | } | ||
175 | |||
176 | pub fn parse_single_expr(&mut self) -> Result<LispExpr, LispError> { | ||
177 | let expr = self.parse_expr()?; | ||
178 | |||
179 | match self.next()? { | ||
180 | (_, Token::End) => Ok(expr), | ||
181 | _ => Err(LispError::ParseError), // too many tokens | ||
182 | } | ||
183 | } | ||
184 | |||
185 | pub fn parse_exprs(&mut self) -> Result<Vec<LispExpr>, LispError> { | ||
186 | let mut res = Vec::new(); | ||
187 | loop { | ||
188 | match self.peek()? { | ||
189 | (_, Token::End) => break, | ||
190 | _ => res.push(self.parse_expr()?), | ||
191 | } | ||
192 | } | ||
193 | Ok(res) | ||
194 | } | ||
195 | } | ||
196 | |||
197 | fn name_expr(input: &str) -> LispExpr { | ||
198 | match input { | ||
199 | "#t" => LispExpr::BoolLit(true), | ||
200 | "#f" => LispExpr::BoolLit(false), | ||
201 | _ => LispExpr::Ident(input.into()), | ||
202 | } | ||
203 | } | ||
204 | |||
205 | #[cfg(test)] | ||
206 | mod tests { | ||
207 | use super::*; | ||
208 | fn parse(input: &str) -> Result<LispExpr, LispError> { | ||
209 | let mut parser = Parser::new(Lexer::new(input, 0)); | ||
210 | |||
211 | parser.parse_single_expr() | ||
212 | } | ||
213 | |||
214 | #[test] | ||
215 | fn parse_lisp_expr() { | ||
216 | assert_eq!( | ||
217 | parse("1.5").unwrap(), | ||
218 | LispExpr::Number(LispNumber::Float(1.5)) | ||
219 | ); | ||
220 | |||
221 | assert_eq!( | ||
222 | parse(r#""hello""#).unwrap(), | ||
223 | LispExpr::StringLit(r#""hello""#.into()) | ||
224 | ); | ||
225 | |||
226 | assert_eq!(parse("foo").unwrap(), LispExpr::Ident("foo".into())); | ||
227 | |||
228 | let items = (1..=5) | ||
229 | .map(LispNumber::Integer) | ||
230 | .map(LispExpr::Number) | ||
231 | .collect::<Vec<_>>(); | ||
232 | assert_eq!(parse("(1 2 3 4 5)").unwrap(), LispExpr::List(items)); | ||
233 | |||
234 | let foo = LispExpr::Ident("foo".into()); | ||
235 | let bar = LispExpr::Comma(Box::new(LispExpr::Ident("bar".into())), 1); | ||
236 | assert_eq!( | ||
237 | parse("`(foo ,bar)").unwrap(), | ||
238 | LispExpr::Quasiquote(Box::new(LispExpr::List(vec![foo, bar])), 1) | ||
239 | ) | ||
240 | } | ||
241 | |||
242 | #[should_panic] | ||
243 | #[test] | ||
244 | fn unbalanced_comma() { | ||
245 | parse("`(foo ,,bar)").unwrap(); | ||
246 | } | ||
247 | |||
248 | #[should_panic] | ||
249 | #[test] | ||
250 | fn unbalanced_paren() { | ||
251 | parse("(foo").unwrap(); | ||
252 | } | ||
253 | } | ||