diff options
-rw-r--r-- | src/lisp/error.rs | 4 | ||||
-rw-r--r-- | src/lisp/lex.rs | 303 | ||||
-rw-r--r-- | src/lisp/mod.rs | 46 | ||||
-rw-r--r-- | src/lisp/number.rs | 70 |
4 files changed, 423 insertions, 0 deletions
diff --git a/src/lisp/error.rs b/src/lisp/error.rs new file mode 100644 index 0000000..cde1e31 --- /dev/null +++ b/src/lisp/error.rs | |||
@@ -0,0 +1,4 @@ | |||
1 | #[derive(Debug, PartialEq, Copy, Clone)] | ||
2 | pub enum LispError { | ||
3 | ParseError, | ||
4 | } | ||
diff --git a/src/lisp/lex.rs b/src/lisp/lex.rs new file mode 100644 index 0000000..a1bea5f --- /dev/null +++ b/src/lisp/lex.rs | |||
@@ -0,0 +1,303 @@ | |||
1 | use std::{fmt, str::CharIndices}; | ||
2 | |||
3 | use super::error::LispError; | ||
4 | |||
5 | #[derive(Copy, Clone, Debug, Eq, PartialEq)] | ||
6 | pub enum Token<'a> { | ||
7 | LeftParen, | ||
8 | RightParen, | ||
9 | Comment(&'a str), | ||
10 | Float(&'a str), | ||
11 | Integer(&'a str), | ||
12 | Char(&'a str), | ||
13 | String(&'a str), | ||
14 | Name(&'a str), | ||
15 | Keyword(&'a str), | ||
16 | BackQuote, | ||
17 | Comma, | ||
18 | CommaAt, | ||
19 | Quote, | ||
20 | End, | ||
21 | } | ||
22 | |||
23 | impl<'a> fmt::Display for Token<'a> { | ||
24 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
25 | match self { | ||
26 | Token::LeftParen => write!(f, "("), | ||
27 | Token::RightParen => write!(f, ")"), | ||
28 | Token::Comment(_) => write!(f, "comment"), | ||
29 | Token::Float(_) => write!(f, "float"), | ||
30 | Token::Integer(_) => write!(f, "integer"), | ||
31 | Token::Char(_) => write!(f, "char"), | ||
32 | Token::String(_) => write!(f, "string"), | ||
33 | Token::Name(_) => write!(f, "name"), | ||
34 | Token::Keyword(_) => write!(f, "keyword"), | ||
35 | Token::BackQuote => write!(f, "`"), | ||
36 | Token::Comma => write!(f, ","), | ||
37 | Token::CommaAt => write!(f, ",@"), | ||
38 | Token::Quote => write!(f, "'"), | ||
39 | Token::End => write!(f, "EOF"), | ||
40 | } | ||
41 | } | ||
42 | } | ||
43 | |||
44 | #[derive(Debug, Copy, Clone, PartialEq)] | ||
45 | pub struct Span { | ||
46 | low: u32, | ||
47 | high: u32, | ||
48 | } | ||
49 | |||
50 | impl Span { | ||
51 | fn empty(pos: u32) -> Self { | ||
52 | Self { | ||
53 | low: pos, | ||
54 | high: pos, | ||
55 | } | ||
56 | } | ||
57 | } | ||
58 | |||
59 | pub struct Lexer<'input> { | ||
60 | input: &'input str, | ||
61 | cur_pos: u32, | ||
62 | offset: u32, | ||
63 | } | ||
64 | |||
65 | impl<'a> Lexer<'a> { | ||
66 | pub fn new(input: &'a str, offset: u32) -> Self { | ||
67 | Self { | ||
68 | input, | ||
69 | cur_pos: 0, | ||
70 | offset, | ||
71 | } | ||
72 | } | ||
73 | pub fn next_token(&mut self) -> Result<(Span, Token<'a>), LispError> { | ||
74 | let mut chars = self.input.char_indices(); | ||
75 | |||
76 | while let Some((ind, chr)) = chars.next() { | ||
77 | let low = self.cur_pos; | ||
78 | let res = match chr { | ||
79 | '(' => Ok((1, Token::LeftParen)), | ||
80 | ')' => Ok((1, Token::RightParen)), | ||
81 | '\'' => Ok((1, Token::Quote)), | ||
82 | '`' => Ok((1, Token::BackQuote)), | ||
83 | ',' => match chars.next() { | ||
84 | Some((_, '@')) => Ok((2, Token::CommaAt)), | ||
85 | _ => Ok((1, Token::Comma)), | ||
86 | }, | ||
87 | '#' => parse_name(&self.input[ind..]), | ||
88 | '-' | '0'..='9' => parse_number(&self.input[ind..]), | ||
89 | '"' => parse_string(&self.input[ind..]), | ||
90 | ';' => { | ||
91 | self.cur_pos += consume_comment(ind, &mut chars) as u32; | ||
92 | continue; | ||
93 | } | ||
94 | _ if is_ident(chr) => parse_name(&self.input[ind..]), | ||
95 | ch if ch.is_whitespace() => { | ||
96 | self.cur_pos += ch.len_utf8() as u32; | ||
97 | continue; | ||
98 | } | ||
99 | ch => { | ||
100 | eprintln!("some unexpected character: {}", ch); | ||
101 | Err(LispError::ParseError) | ||
102 | } | ||
103 | }; | ||
104 | let (size, token) = match res { | ||
105 | Ok(v) => v, | ||
106 | Err(_) => return Err(LispError::ParseError), | ||
107 | }; | ||
108 | self.cur_pos += size as u32; | ||
109 | self.input = &self.input[ind + size..]; | ||
110 | let sp = Span { | ||
111 | low, | ||
112 | high: low + size as u32, | ||
113 | }; | ||
114 | return Ok((sp, token)); | ||
115 | } | ||
116 | self.input = &self.input[..0]; | ||
117 | return Ok((Span::empty(self.cur_pos), Token::End)); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | fn parse_number<'a>(mut input: &'a str) -> Result<(usize, Token<'a>), LispError> { | ||
122 | let mut dot = false; | ||
123 | let mut size = 0; | ||
124 | let mut chars = input.chars(); | ||
125 | |||
126 | if let Some(v) = chars.next() { | ||
127 | if v == '-' { | ||
128 | size += 1; | ||
129 | input = &input[1..]; | ||
130 | } else if v.is_digit(10) { | ||
131 | size += 1; | ||
132 | } | ||
133 | } | ||
134 | |||
135 | while let Some(chr) = chars.next() { | ||
136 | if chr.is_digit(10) { | ||
137 | size += 1; | ||
138 | } else if chr == '.' { | ||
139 | if !dot { | ||
140 | dot = true; | ||
141 | size += 1; | ||
142 | } else { | ||
143 | return Err(LispError::ParseError); | ||
144 | } | ||
145 | } else if !is_ident(chr) { | ||
146 | break; | ||
147 | } else { | ||
148 | return Err(LispError::ParseError); | ||
149 | } | ||
150 | } | ||
151 | let tok = if dot { | ||
152 | Token::Float(&input[..size]) | ||
153 | } else { | ||
154 | Token::Integer(&input[..size]) | ||
155 | }; | ||
156 | return Ok((size, tok)); | ||
157 | } | ||
158 | |||
159 | fn parse_string<'a>(input: &'a str) -> Result<(usize, Token<'a>), LispError> { | ||
160 | // count opening quote | ||
161 | let mut size = 1; | ||
162 | let mut chars = input.char_indices().skip(1); | ||
163 | while let Some((ind, chr)) = chars.next() { | ||
164 | match chr { | ||
165 | '\\' => { | ||
166 | let _ = chars.next(); | ||
167 | } | ||
168 | '"' => { | ||
169 | size += ind; | ||
170 | break; | ||
171 | } | ||
172 | _ => (), | ||
173 | } | ||
174 | } | ||
175 | return Ok((size, Token::String(&input[..size]))); | ||
176 | } | ||
177 | |||
178 | fn is_ident(ch: char) -> bool { | ||
179 | match ch { | ||
180 | '!' | '$' | '%' | '&' | '*' | '+' | '-' | '.' | '/' | '<' | '=' | '>' | '?' | '^' | '_' | ||
181 | | '|' | '#' => true, | ||
182 | _ if ch.is_alphanumeric() => true, | ||
183 | _ => false, | ||
184 | } | ||
185 | } | ||
186 | |||
187 | fn consume_comment(start: usize, chars: &mut CharIndices) -> usize { | ||
188 | let mut last = start; | ||
189 | |||
190 | for (ind, ch) in chars { | ||
191 | last = ind; | ||
192 | if ch == '\n' { | ||
193 | break; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | last - start + 1 | ||
198 | } | ||
199 | |||
200 | fn parse_name<'a>(input: &'a str) -> Result<(usize, Token<'a>), LispError> { | ||
201 | for (ind, chr) in input.char_indices() { | ||
202 | if !is_ident(chr) { | ||
203 | return Ok((ind, Token::Name(&input[..ind]))); | ||
204 | } | ||
205 | } | ||
206 | return Ok((input.len(), Token::Name(input))); | ||
207 | } | ||
208 | |||
209 | #[cfg(test)] | ||
210 | mod tests { | ||
211 | use super::*; | ||
212 | |||
213 | fn sp(low: u32, high: u32) -> Span { | ||
214 | Span { low, high } | ||
215 | } | ||
216 | |||
217 | fn tokens(input: &str) -> Vec<(Span, Token)> { | ||
218 | let mut lexer = Lexer::new(input, 0); | ||
219 | let mut tokens = Vec::new(); | ||
220 | loop { | ||
221 | match lexer.next_token().unwrap() { | ||
222 | (_, Token::End) => break, | ||
223 | t => tokens.push(t), | ||
224 | } | ||
225 | } | ||
226 | tokens | ||
227 | } | ||
228 | |||
229 | #[test] | ||
230 | fn string_parsing() { | ||
231 | let input = r#""hello there""#; | ||
232 | let parsed = parse_string(input).unwrap(); | ||
233 | assert_eq!(parsed.0, 13); | ||
234 | assert_eq!(parsed.1, Token::String(r#""hello there""#)); | ||
235 | } | ||
236 | |||
237 | #[test] | ||
238 | fn integer_parsing() { | ||
239 | let input = "12345"; | ||
240 | let parsed = parse_number(input).unwrap(); | ||
241 | assert_eq!(parsed.0, 5); | ||
242 | assert_eq!(parsed.1, Token::Integer("12345")); | ||
243 | } | ||
244 | |||
245 | #[test] | ||
246 | fn float_parsing() { | ||
247 | let input = "12.345"; | ||
248 | let parsed = parse_number(input).unwrap(); | ||
249 | assert_eq!(parsed.0, 6); | ||
250 | assert_eq!(parsed.1, Token::Float("12.345")); | ||
251 | } | ||
252 | |||
253 | #[test] | ||
254 | fn lexer() { | ||
255 | assert_eq!( | ||
256 | tokens("1 2 3"), | ||
257 | [ | ||
258 | (sp(0, 1), Token::Integer("1")), | ||
259 | (sp(2, 3), Token::Integer("2")), | ||
260 | (sp(4, 5), Token::Integer("3")) | ||
261 | ] | ||
262 | ); | ||
263 | |||
264 | assert_eq!( | ||
265 | tokens("1 foo 3"), | ||
266 | [ | ||
267 | (sp(0, 1), Token::Integer("1")), | ||
268 | (sp(2, 5), Token::Name("foo")), | ||
269 | (sp(6, 7), Token::Integer("3")) | ||
270 | ] | ||
271 | ); | ||
272 | |||
273 | assert_eq!(tokens("foo"), [(sp(0, 3), Token::Name("foo")),]); | ||
274 | assert_eq!(tokens("#t"), [(sp(0, 2), Token::Name("#t")),]); | ||
275 | |||
276 | assert_eq!( | ||
277 | tokens("1 \"foo\" 3"), | ||
278 | [ | ||
279 | (sp(0, 1), Token::Integer("1")), | ||
280 | (sp(2, 7), Token::String(r#""foo""#)), | ||
281 | (sp(8, 9), Token::Integer("3")) | ||
282 | ] | ||
283 | ); | ||
284 | |||
285 | assert_eq!( | ||
286 | tokens("(* 1 (+ 2 3))"), | ||
287 | [ | ||
288 | (sp(0, 1), Token::LeftParen), | ||
289 | (sp(1, 2), Token::Name("*")), | ||
290 | (sp(3, 4), Token::Integer("1")), | ||
291 | (sp(5, 6), Token::LeftParen), | ||
292 | (sp(6, 7), Token::Name("+")), | ||
293 | (sp(8, 9), Token::Integer("2")), | ||
294 | (sp(10, 11), Token::Integer("3")), | ||
295 | (sp(11, 12), Token::RightParen), | ||
296 | (sp(12, 13), Token::RightParen), | ||
297 | ] | ||
298 | ); | ||
299 | |||
300 | assert_eq!(tokens("; foo"), []); | ||
301 | assert_eq!(tokens("1; foo"), [(sp(0, 1), Token::Integer("1"))]); | ||
302 | } | ||
303 | } | ||
diff --git a/src/lisp/mod.rs b/src/lisp/mod.rs new file mode 100644 index 0000000..5d8965f --- /dev/null +++ b/src/lisp/mod.rs | |||
@@ -0,0 +1,46 @@ | |||
1 | use std::fmt; | ||
2 | |||
3 | use number::LispNumber; | ||
4 | |||
5 | mod error; | ||
6 | mod lex; | ||
7 | mod number; | ||
8 | |||
9 | #[derive(Debug, PartialEq)] | ||
10 | pub enum LispExpr { | ||
11 | Number(LispNumber), | ||
12 | List(Vec<LispExpr>), | ||
13 | StringLit(String), | ||
14 | BoolLit(bool), | ||
15 | Ident(String), | ||
16 | Function(LispFunction), | ||
17 | } | ||
18 | |||
19 | impl fmt::Display for LispExpr { | ||
20 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
21 | match self { | ||
22 | LispExpr::Number(n) => write!(f, "{}", n)?, | ||
23 | LispExpr::List(l) => { | ||
24 | for expr in l.iter() { | ||
25 | write!(f, " {} ", expr)? | ||
26 | } | ||
27 | } | ||
28 | LispExpr::StringLit(s) => write!(f, "{:?}", s)?, | ||
29 | LispExpr::BoolLit(b) => { | ||
30 | if *b { | ||
31 | write!(f, "#t")? | ||
32 | } else { | ||
33 | write!(f, "#f")? | ||
34 | } | ||
35 | } | ||
36 | LispExpr::Ident(s) => write!(f, "{}", s)?, | ||
37 | LispExpr::Function(_) => write!(f, "<#procedure>")?, | ||
38 | }; | ||
39 | Ok(()) | ||
40 | } | ||
41 | } | ||
42 | |||
43 | pub type Environment = Vec<(String, LispExpr)>; | ||
44 | |||
45 | #[derive(Debug, PartialEq)] | ||
46 | struct LispFunction {} | ||
diff --git a/src/lisp/number.rs b/src/lisp/number.rs new file mode 100644 index 0000000..ddadbe6 --- /dev/null +++ b/src/lisp/number.rs | |||
@@ -0,0 +1,70 @@ | |||
1 | use std::{ | ||
2 | fmt::*, | ||
3 | ops::{Add, Div, Mul, Sub}, | ||
4 | }; | ||
5 | |||
6 | #[derive(Debug, Copy, Clone)] | ||
7 | pub enum LispNumber { | ||
8 | Integer(i64), | ||
9 | Float(f64), | ||
10 | } | ||
11 | |||
12 | impl Add for LispNumber { | ||
13 | type Output = Self; | ||
14 | fn add(self, rhs: Self) -> Self::Output { | ||
15 | use LispNumber::*; | ||
16 | match (self, rhs) { | ||
17 | (Integer(a), Integer(b)) => Integer(a + b), | ||
18 | (Float(a), Integer(b)) => Float(a + b as f64), | ||
19 | (Integer(a), Float(b)) => Float(a as f64 + b), | ||
20 | (Float(a), Float(b)) => Float(a + b), | ||
21 | } | ||
22 | } | ||
23 | } | ||
24 | |||
25 | impl Sub for LispNumber { | ||
26 | type Output = Self; | ||
27 | fn sub(self, rhs: Self) -> Self::Output { | ||
28 | use LispNumber::*; | ||
29 | match (self, rhs) { | ||
30 | (Integer(a), Integer(b)) => Integer(a - b), | ||
31 | (Float(a), Integer(b)) => Float(a - b as f64), | ||
32 | (Integer(a), Float(b)) => Float(a as f64 - b), | ||
33 | (Float(a), Float(b)) => Float(a - b), | ||
34 | } | ||
35 | } | ||
36 | } | ||
37 | |||
38 | impl Mul for LispNumber { | ||
39 | type Output = Self; | ||
40 | fn mul(self, rhs: Self) -> Self::Output { | ||
41 | use LispNumber::*; | ||
42 | match (self, rhs) { | ||
43 | (Integer(a), Integer(b)) => Integer(a * b), | ||
44 | (Float(a), Integer(b)) => Float(a * b as f64), | ||
45 | (Integer(a), Float(b)) => Float(a as f64 * b), | ||
46 | (Float(a), Float(b)) => Float(a * b), | ||
47 | } | ||
48 | } | ||
49 | } | ||
50 | |||
51 | impl PartialEq for LispNumber { | ||
52 | fn eq(&self, other: &Self) -> bool { | ||
53 | use LispNumber::*; | ||
54 | match (self, other) { | ||
55 | (Integer(a), Integer(b)) => *a == *b, | ||
56 | (Float(a), Integer(b)) => *a == *b as f64, | ||
57 | (Integer(a), Float(b)) => *a as f64 == *b, | ||
58 | (Float(a), Float(b)) => *a == *b, | ||
59 | } | ||
60 | } | ||
61 | } | ||
62 | |||
63 | impl Display for LispNumber { | ||
64 | fn fmt(&self, f: &mut Formatter<'_>) -> Result { | ||
65 | match self { | ||
66 | LispNumber::Integer(v) => write!(f, "{}", v), | ||
67 | LispNumber::Float(v) => write!(f, "{}", v), | ||
68 | } | ||
69 | } | ||
70 | } | ||