diff options
author | Akshay <[email protected]> | 2024-07-13 18:32:41 +0100 |
---|---|---|
committer | Akshay <[email protected]> | 2024-07-13 18:32:41 +0100 |
commit | 8eb38033e0c615983c4490354dad4abb00031042 (patch) | |
tree | 78d35946d2d14dd015eab53f6bf7b92153518b6f |
init trawk
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Cargo.toml | 16 | ||||
-rw-r--r-- | src/ast.rs | 186 | ||||
-rw-r--r-- | src/eval.rs | 764 | ||||
-rw-r--r-- | src/lib.rs | 7 | ||||
-rw-r--r-- | src/main.rs | 47 | ||||
-rw-r--r-- | src/parser.rs | 689 | ||||
-rw-r--r-- | src/string.rs | 152 |
8 files changed, 1862 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/.gitignore | |||
@@ -0,0 +1 @@ | |||
target | |||
diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..147096b --- /dev/null +++ b/Cargo.toml | |||
@@ -0,0 +1,16 @@ | |||
1 | [package] | ||
2 | name = "trawk" | ||
3 | version = "0.1.0" | ||
4 | edition = "2021" | ||
5 | |||
6 | [lib] | ||
7 | |||
8 | [dependencies] | ||
9 | tree-sitter = "0.20" | ||
10 | regex = "1.3" | ||
11 | thiserror = "1.0.61" | ||
12 | serde = "1.0.204" | ||
13 | nom = "7.1.3" | ||
14 | tree-sitter-python = "0.20" | ||
15 | |||
16 | [dev-dependencies] | ||
diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..07b5c39 --- /dev/null +++ b/src/ast.rs | |||
@@ -0,0 +1,186 @@ | |||
1 | #[derive(Debug)] | ||
2 | pub struct Program { | ||
3 | pub stanzas: Vec<Stanza>, | ||
4 | } | ||
5 | |||
6 | impl Program { | ||
7 | pub fn new() -> Self { | ||
8 | Self { | ||
9 | stanzas: Vec::new(), | ||
10 | } | ||
11 | } | ||
12 | |||
13 | pub fn from_str(mut self, i: &str) -> Result<Self, nom::error::Error<&str>> { | ||
14 | use nom::Finish; | ||
15 | let (remaining_input, stanzas) = crate::parser::parse_file(i).finish()?; | ||
16 | assert!(remaining_input.trim().is_empty(), "{remaining_input}"); | ||
17 | self.stanzas = stanzas; | ||
18 | Ok(self) | ||
19 | } | ||
20 | } | ||
21 | |||
22 | #[derive(Debug, PartialEq, Eq)] | ||
23 | pub struct Stanza { | ||
24 | pub pattern: Pattern, | ||
25 | pub statements: Block, | ||
26 | } | ||
27 | |||
28 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
29 | pub enum Pattern { | ||
30 | Begin, | ||
31 | End, | ||
32 | Node(NodePattern), | ||
33 | } | ||
34 | |||
35 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
36 | pub struct NodePattern { | ||
37 | pub modifier: Modifier, | ||
38 | pub kind: String, | ||
39 | } | ||
40 | |||
41 | #[derive(Default, Debug, Eq, PartialEq, Clone, Copy)] | ||
42 | pub enum Modifier { | ||
43 | #[default] | ||
44 | Enter, | ||
45 | Leave, | ||
46 | } | ||
47 | |||
48 | #[derive(Debug, Default, Eq, PartialEq, Clone)] | ||
49 | pub struct Block { | ||
50 | pub body: Vec<Statement>, | ||
51 | } | ||
52 | |||
53 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
54 | pub enum Statement { | ||
55 | Bare(Expr), | ||
56 | Declaration(Declaration), | ||
57 | } | ||
58 | |||
59 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
60 | pub enum Expr { | ||
61 | Node, | ||
62 | Unit, | ||
63 | Lit(Literal), | ||
64 | Ident(Identifier), | ||
65 | // List(Vec<Expr>), | ||
66 | Bin(Box<Expr>, BinOp, Box<Expr>), | ||
67 | Unary(Box<Expr>, UnaryOp), | ||
68 | Call(Call), | ||
69 | IfExpr(If), | ||
70 | Block(Block), | ||
71 | } | ||
72 | |||
73 | impl Expr { | ||
74 | pub fn int(int: i128) -> Expr { | ||
75 | Self::Lit(Literal::Int(int)) | ||
76 | } | ||
77 | |||
78 | pub fn str(s: &str) -> Expr { | ||
79 | Self::Lit(Literal::Str(s.to_owned())) | ||
80 | } | ||
81 | |||
82 | pub const fn false_() -> Expr { | ||
83 | Self::Lit(Literal::Bool(false)) | ||
84 | } | ||
85 | |||
86 | pub const fn true_() -> Expr { | ||
87 | Self::Lit(Literal::Bool(true)) | ||
88 | } | ||
89 | |||
90 | pub fn boxed(self) -> Box<Expr> { | ||
91 | Box::new(self) | ||
92 | } | ||
93 | } | ||
94 | |||
95 | #[derive(Debug, Eq, PartialEq, Clone, Copy)] | ||
96 | pub enum UnaryOp { | ||
97 | Not, | ||
98 | } | ||
99 | |||
100 | #[derive(Debug, Eq, PartialEq, Clone, Copy)] | ||
101 | pub enum BinOp { | ||
102 | Arith(ArithOp), | ||
103 | Cmp(CmpOp), | ||
104 | Logic(LogicOp), | ||
105 | // = | ||
106 | Assign(AssignOp), | ||
107 | } | ||
108 | |||
109 | // + - * / | ||
110 | #[derive(Debug, Eq, PartialEq, Clone, Copy)] | ||
111 | pub enum ArithOp { | ||
112 | Add, | ||
113 | Sub, | ||
114 | Mul, | ||
115 | Div, | ||
116 | Mod, | ||
117 | } | ||
118 | |||
119 | // && || | ||
120 | #[derive(Debug, Eq, PartialEq, Clone, Copy)] | ||
121 | pub enum LogicOp { | ||
122 | And, | ||
123 | Or, | ||
124 | } | ||
125 | |||
126 | // == != > < >= <= | ||
127 | #[derive(Debug, Eq, PartialEq, Clone, Copy)] | ||
128 | pub enum CmpOp { | ||
129 | Eq, | ||
130 | Neq, | ||
131 | Gt, | ||
132 | Lt, | ||
133 | Gte, | ||
134 | Lte, | ||
135 | } | ||
136 | |||
137 | // =, +=, -=, *=, /= | ||
138 | #[derive(Debug, Eq, PartialEq, Clone, Copy)] | ||
139 | pub struct AssignOp { | ||
140 | pub op: Option<ArithOp>, | ||
141 | } | ||
142 | |||
143 | pub type Identifier = String; | ||
144 | |||
145 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
146 | pub enum Literal { | ||
147 | Str(String), | ||
148 | Int(i128), | ||
149 | Bool(bool), | ||
150 | } | ||
151 | |||
152 | /// A function call | ||
153 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
154 | pub struct Call { | ||
155 | pub function: Identifier, | ||
156 | pub parameters: Vec<Expr>, | ||
157 | } | ||
158 | |||
159 | impl From<Call> for Expr { | ||
160 | fn from(expr: Call) -> Expr { | ||
161 | Expr::Call(expr) | ||
162 | } | ||
163 | } | ||
164 | |||
165 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] | ||
166 | pub enum Type { | ||
167 | Unit, | ||
168 | Integer, | ||
169 | String, | ||
170 | Boolean, | ||
171 | Node, | ||
172 | } | ||
173 | |||
174 | #[derive(Debug, PartialEq, Eq, Clone)] | ||
175 | pub struct Declaration { | ||
176 | pub ty: Type, | ||
177 | pub name: Identifier, | ||
178 | pub init: Option<Box<Expr>>, | ||
179 | } | ||
180 | |||
181 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
182 | pub struct If { | ||
183 | pub condition: Box<Expr>, | ||
184 | pub then: Block, | ||
185 | pub else_: Block, | ||
186 | } | ||
diff --git a/src/eval.rs b/src/eval.rs new file mode 100644 index 0000000..859979d --- /dev/null +++ b/src/eval.rs | |||
@@ -0,0 +1,764 @@ | |||
1 | //! tree walking interpreter for trawk | ||
2 | |||
3 | use crate::ast; | ||
4 | use std::{collections::HashMap, fmt}; | ||
5 | |||
6 | #[derive(Debug, PartialEq, Eq, Clone)] | ||
7 | pub struct Variable { | ||
8 | pub ty: ast::Type, | ||
9 | pub name: ast::Identifier, | ||
10 | pub value: Value, | ||
11 | } | ||
12 | |||
13 | impl Variable { | ||
14 | fn value(&self) -> &Value { | ||
15 | &self.value | ||
16 | } | ||
17 | |||
18 | fn ty(&self) -> ast::Type { | ||
19 | self.ty | ||
20 | } | ||
21 | |||
22 | fn assign(&mut self, value: Value) -> Result { | ||
23 | if self.ty() == value.ty() { | ||
24 | self.value = value; | ||
25 | Ok(self.value.clone()) | ||
26 | } else { | ||
27 | Err(Error::TypeMismatch { | ||
28 | expected: self.ty(), | ||
29 | got: value.ty(), | ||
30 | }) | ||
31 | } | ||
32 | } | ||
33 | } | ||
34 | |||
35 | #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] | ||
36 | pub enum Value { | ||
37 | Unit, | ||
38 | Integer(i128), | ||
39 | String(String), | ||
40 | Boolean(bool), | ||
41 | Node, | ||
42 | } | ||
43 | |||
44 | impl Value { | ||
45 | fn ty(&self) -> ast::Type { | ||
46 | match self { | ||
47 | Self::Unit => ast::Type::Unit, | ||
48 | Self::Integer(_) => ast::Type::Integer, | ||
49 | Self::String(_) => ast::Type::String, | ||
50 | Self::Boolean(_) => ast::Type::Boolean, | ||
51 | Self::Node => ast::Type::Node, | ||
52 | } | ||
53 | } | ||
54 | |||
55 | fn default(ty: ast::Type) -> Self { | ||
56 | match ty { | ||
57 | ast::Type::Unit => Self::Unit, | ||
58 | ast::Type::Integer => Self::default_int(), | ||
59 | ast::Type::String => Self::default_string(), | ||
60 | ast::Type::Boolean => Self::default_bool(), | ||
61 | ast::Type::Node => unreachable!(), | ||
62 | } | ||
63 | } | ||
64 | |||
65 | fn default_int() -> Self { | ||
66 | Self::Integer(0) | ||
67 | } | ||
68 | |||
69 | fn default_bool() -> Self { | ||
70 | Self::Boolean(false) | ||
71 | } | ||
72 | |||
73 | fn default_string() -> Self { | ||
74 | Self::String(String::default()) | ||
75 | } | ||
76 | |||
77 | fn as_boolean(&self) -> Option<bool> { | ||
78 | match self { | ||
79 | Self::Boolean(b) => Some(*b), | ||
80 | _ => None, | ||
81 | } | ||
82 | } | ||
83 | |||
84 | fn add(&self, other: &Self) -> Result { | ||
85 | match (self, other) { | ||
86 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s + *o)), | ||
87 | (Self::String(s), Self::String(o)) => Ok(Self::String(format!("{s}{o}"))), | ||
88 | _ => Err(Error::UndefinedBinOp( | ||
89 | ast::BinOp::Arith(ast::ArithOp::Add), | ||
90 | self.ty(), | ||
91 | other.ty(), | ||
92 | )), | ||
93 | } | ||
94 | } | ||
95 | |||
96 | fn sub(&self, other: &Self) -> Result { | ||
97 | match (self, other) { | ||
98 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s - *o)), | ||
99 | (Self::String(s), Self::String(o)) => { | ||
100 | Ok(Self::String(s.strip_suffix(o).unwrap_or(s).to_owned())) | ||
101 | } | ||
102 | _ => Err(Error::UndefinedBinOp( | ||
103 | ast::BinOp::Arith(ast::ArithOp::Sub), | ||
104 | self.ty(), | ||
105 | other.ty(), | ||
106 | )), | ||
107 | } | ||
108 | } | ||
109 | |||
110 | fn mul(&self, other: &Self) -> Result { | ||
111 | match (self, other) { | ||
112 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s * *o)), | ||
113 | _ => Err(Error::UndefinedBinOp( | ||
114 | ast::BinOp::Arith(ast::ArithOp::Mul), | ||
115 | self.ty(), | ||
116 | other.ty(), | ||
117 | )), | ||
118 | } | ||
119 | } | ||
120 | |||
121 | fn div(&self, other: &Self) -> Result { | ||
122 | match (self, other) { | ||
123 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s / *o)), | ||
124 | _ => Err(Error::UndefinedBinOp( | ||
125 | ast::BinOp::Arith(ast::ArithOp::Div), | ||
126 | self.ty(), | ||
127 | other.ty(), | ||
128 | )), | ||
129 | } | ||
130 | } | ||
131 | |||
132 | fn mod_(&self, other: &Self) -> Result { | ||
133 | match (self, other) { | ||
134 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s % *o)), | ||
135 | _ => Err(Error::UndefinedBinOp( | ||
136 | ast::BinOp::Arith(ast::ArithOp::Mod), | ||
137 | self.ty(), | ||
138 | other.ty(), | ||
139 | )), | ||
140 | } | ||
141 | } | ||
142 | |||
143 | fn equals(&self, other: &Self) -> Result { | ||
144 | match (self, other) { | ||
145 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s == o)), | ||
146 | (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s == o)), | ||
147 | (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(s == o)), | ||
148 | _ => Err(Error::UndefinedBinOp( | ||
149 | ast::BinOp::Cmp(ast::CmpOp::Eq), | ||
150 | self.ty(), | ||
151 | other.ty(), | ||
152 | )), | ||
153 | } | ||
154 | } | ||
155 | |||
156 | fn greater_than(&self, other: &Self) -> Result { | ||
157 | match (self, other) { | ||
158 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s > o)), | ||
159 | (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s.cmp(o).is_gt())), | ||
160 | _ => Err(Error::UndefinedBinOp( | ||
161 | ast::BinOp::Cmp(ast::CmpOp::Gt), | ||
162 | self.ty(), | ||
163 | other.ty(), | ||
164 | )), | ||
165 | } | ||
166 | } | ||
167 | |||
168 | fn less_than(&self, other: &Self) -> Result { | ||
169 | match (self, other) { | ||
170 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s < o)), | ||
171 | (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s.cmp(o).is_lt())), | ||
172 | _ => Err(Error::UndefinedBinOp( | ||
173 | ast::BinOp::Cmp(ast::CmpOp::Lt), | ||
174 | self.ty(), | ||
175 | other.ty(), | ||
176 | )), | ||
177 | } | ||
178 | } | ||
179 | |||
180 | fn greater_than_equals(&self, other: &Self) -> Result { | ||
181 | match (self, other) { | ||
182 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s >= o)), | ||
183 | (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s.cmp(o).is_ge())), | ||
184 | (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(s == o)), | ||
185 | _ => Err(Error::UndefinedBinOp( | ||
186 | ast::BinOp::Cmp(ast::CmpOp::Gte), | ||
187 | self.ty(), | ||
188 | other.ty(), | ||
189 | )), | ||
190 | } | ||
191 | } | ||
192 | |||
193 | fn less_than_equals(&self, other: &Self) -> Result { | ||
194 | match (self, other) { | ||
195 | (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s <= o)), | ||
196 | (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s.cmp(o).is_le())), | ||
197 | (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(s == o)), | ||
198 | _ => Err(Error::UndefinedBinOp( | ||
199 | ast::BinOp::Cmp(ast::CmpOp::Lte), | ||
200 | self.ty(), | ||
201 | other.ty(), | ||
202 | )), | ||
203 | } | ||
204 | } | ||
205 | |||
206 | fn not(&self) -> Result { | ||
207 | match self { | ||
208 | Self::Boolean(s) => Ok(Self::Boolean(!s)), | ||
209 | _ => Err(Error::UndefinedUnaryOp(ast::UnaryOp::Not, self.ty())), | ||
210 | } | ||
211 | } | ||
212 | |||
213 | fn and(&self, other: &Self) -> Result { | ||
214 | match (self, other) { | ||
215 | (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(*s && *o)), | ||
216 | _ => Err(Error::UndefinedBinOp( | ||
217 | ast::BinOp::Logic(ast::LogicOp::And), | ||
218 | self.ty(), | ||
219 | other.ty(), | ||
220 | )), | ||
221 | } | ||
222 | } | ||
223 | |||
224 | fn or(&self, other: &Self) -> Result { | ||
225 | match (self, other) { | ||
226 | (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(*s || *o)), | ||
227 | _ => Err(Error::UndefinedBinOp( | ||
228 | ast::BinOp::Logic(ast::LogicOp::Or), | ||
229 | self.ty(), | ||
230 | other.ty(), | ||
231 | )), | ||
232 | } | ||
233 | } | ||
234 | } | ||
235 | |||
236 | impl fmt::Display for Value { | ||
237 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
238 | match self { | ||
239 | Self::Unit => write!(f, "()"), | ||
240 | Self::Integer(i) => write!(f, "{i}"), | ||
241 | Self::String(s) => write!(f, "{s}"), | ||
242 | Self::Boolean(b) => write!(f, "{b}"), | ||
243 | Self::Node => write!(f, "<node>"), | ||
244 | } | ||
245 | } | ||
246 | } | ||
247 | |||
248 | type NodeKind = u16; | ||
249 | |||
250 | #[derive(Debug, Default)] | ||
251 | struct Visitor { | ||
252 | enter: ast::Block, | ||
253 | leave: ast::Block, | ||
254 | } | ||
255 | |||
256 | #[derive(Debug)] | ||
257 | struct Visitors { | ||
258 | visitors: HashMap<NodeKind, Visitor>, | ||
259 | begin: ast::Block, | ||
260 | end: ast::Block, | ||
261 | } | ||
262 | |||
263 | impl Default for Visitors { | ||
264 | fn default() -> Self { | ||
265 | Self::new() | ||
266 | } | ||
267 | } | ||
268 | |||
269 | impl Visitors { | ||
270 | pub fn new() -> Self { | ||
271 | Self { | ||
272 | visitors: HashMap::new(), | ||
273 | begin: ast::Block { body: vec![] }, | ||
274 | end: ast::Block { body: vec![] }, | ||
275 | } | ||
276 | } | ||
277 | |||
278 | pub fn insert( | ||
279 | &mut self, | ||
280 | stanza: ast::Stanza, | ||
281 | language: &tree_sitter::Language, | ||
282 | ) -> std::result::Result<(), Error> { | ||
283 | match &stanza.pattern { | ||
284 | ast::Pattern::Begin => self.begin = stanza.statements, | ||
285 | ast::Pattern::End => self.end = stanza.statements, | ||
286 | ast::Pattern::Node(ast::NodePattern { modifier, kind }) => { | ||
287 | let id = language.id_for_node_kind(&kind, true); | ||
288 | if id == 0 { | ||
289 | return Err(Error::InvalidNodeKind(kind.to_owned())); | ||
290 | } | ||
291 | let v = self.visitors.entry(id).or_default(); | ||
292 | match modifier { | ||
293 | ast::Modifier::Enter => v.enter = stanza.statements.clone(), | ||
294 | ast::Modifier::Leave => v.leave = stanza.statements.clone(), | ||
295 | }; | ||
296 | } | ||
297 | } | ||
298 | Ok(()) | ||
299 | } | ||
300 | |||
301 | pub fn get_by_node(&self, node: tree_sitter::Node) -> Option<&Visitor> { | ||
302 | let node_id = node.kind_id(); | ||
303 | self.visitors.get(&node_id) | ||
304 | } | ||
305 | } | ||
306 | |||
307 | #[derive(Debug, PartialEq, Eq)] | ||
308 | pub enum Error { | ||
309 | FailedLookup(ast::Identifier), | ||
310 | TypeMismatch { expected: ast::Type, got: ast::Type }, | ||
311 | UndefinedBinOp(ast::BinOp, ast::Type, ast::Type), | ||
312 | UndefinedUnaryOp(ast::UnaryOp, ast::Type), | ||
313 | AlreadyBound(ast::Identifier), | ||
314 | MalformedExpr(String), | ||
315 | InvalidNodeKind(String), | ||
316 | // current node is only set in visitors, not in BEGIN or END blocks | ||
317 | CurrentNodeNotPresent, | ||
318 | } | ||
319 | |||
320 | type Result = std::result::Result<Value, Error>; | ||
321 | |||
322 | pub struct Context<'a> { | ||
323 | variables: HashMap<ast::Identifier, Variable>, | ||
324 | language: tree_sitter::Language, | ||
325 | visitors: Visitors, | ||
326 | input_src: Option<String>, | ||
327 | cursor: Option<tree_sitter::TreeCursor<'a>>, | ||
328 | } | ||
329 | |||
330 | impl<'a> fmt::Debug for Context<'a> { | ||
331 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||
332 | f.debug_struct("Context") | ||
333 | .field("variables", &self.variables) | ||
334 | .field("language", &self.language) | ||
335 | .field("visitors", &self.visitors) | ||
336 | .field("input_src", &self.input_src) | ||
337 | .field( | ||
338 | "cursor", | ||
339 | if self.cursor.is_some() { | ||
340 | &"Some(<cursor>)" | ||
341 | } else { | ||
342 | &"None" | ||
343 | }, | ||
344 | ) | ||
345 | .finish() | ||
346 | } | ||
347 | } | ||
348 | |||
349 | impl<'a> Context<'a> { | ||
350 | pub fn new(language: tree_sitter::Language) -> Self { | ||
351 | Self { | ||
352 | visitors: Default::default(), | ||
353 | variables: Default::default(), | ||
354 | language, | ||
355 | input_src: None, | ||
356 | cursor: None, | ||
357 | } | ||
358 | } | ||
359 | |||
360 | pub fn with_program(mut self, program: ast::Program) -> std::result::Result<Self, Error> { | ||
361 | for stanza in program.stanzas.into_iter() { | ||
362 | self.visitors.insert(stanza, &self.language)?; | ||
363 | } | ||
364 | Ok(self) | ||
365 | } | ||
366 | |||
367 | pub fn with_input(mut self, src: String) -> Self { | ||
368 | self.input_src = Some(src); | ||
369 | self | ||
370 | } | ||
371 | |||
372 | pub fn with_cursor(mut self, cursor: tree_sitter::TreeCursor<'a>) -> Self { | ||
373 | self.cursor = Some(cursor); | ||
374 | self | ||
375 | } | ||
376 | |||
377 | fn eval_expr(&mut self, expr: &ast::Expr) -> Result { | ||
378 | match expr { | ||
379 | ast::Expr::Unit => Ok(Value::Unit), | ||
380 | ast::Expr::Lit(lit) => self.eval_lit(lit), | ||
381 | ast::Expr::Ident(ident) => self.lookup(ident).map(Variable::value).cloned(), | ||
382 | ast::Expr::Bin(lhs, op, rhs) => self.eval_bin(&*lhs, *op, &*rhs), | ||
383 | ast::Expr::Unary(expr, op) => self.eval_unary(&*expr, *op), | ||
384 | ast::Expr::Call(call) => self.eval_call(&*call), | ||
385 | ast::Expr::IfExpr(if_expr) => self.eval_if(if_expr), | ||
386 | ast::Expr::Block(block) => self.eval_block(block), | ||
387 | ast::Expr::Node => Ok(Value::Node), | ||
388 | } | ||
389 | } | ||
390 | |||
391 | fn eval_lit(&mut self, lit: &ast::Literal) -> Result { | ||
392 | match lit { | ||
393 | ast::Literal::Str(s) => Ok(Value::String(s.to_owned())), | ||
394 | ast::Literal::Int(i) => Ok(Value::Integer(*i)), | ||
395 | ast::Literal::Bool(b) => Ok(Value::Boolean(*b)), | ||
396 | } | ||
397 | } | ||
398 | |||
399 | fn lookup(&mut self, ident: &ast::Identifier) -> std::result::Result<&Variable, Error> { | ||
400 | self.variables | ||
401 | .get(ident) | ||
402 | .ok_or_else(|| Error::FailedLookup(ident.to_owned())) | ||
403 | } | ||
404 | |||
405 | fn lookup_mut(&mut self, ident: &ast::Identifier) -> std::result::Result<&mut Variable, Error> { | ||
406 | self.variables | ||
407 | .get_mut(ident) | ||
408 | .ok_or_else(|| Error::FailedLookup(ident.to_owned())) | ||
409 | } | ||
410 | |||
411 | fn bind( | ||
412 | &mut self, | ||
413 | ident: &ast::Identifier, | ||
414 | ty: ast::Type, | ||
415 | ) -> std::result::Result<&mut Variable, Error> { | ||
416 | if self.lookup(ident).is_err() { | ||
417 | Ok(self | ||
418 | .variables | ||
419 | .entry(ident.to_owned()) | ||
420 | .or_insert_with(|| Variable { | ||
421 | name: ident.to_owned(), | ||
422 | value: Value::default(ty), | ||
423 | ty, | ||
424 | })) | ||
425 | } else { | ||
426 | Err(Error::AlreadyBound(ident.to_owned())) | ||
427 | } | ||
428 | } | ||
429 | |||
430 | fn eval_bin(&mut self, lhs: &ast::Expr, op: ast::BinOp, rhs: &ast::Expr) -> Result { | ||
431 | match op { | ||
432 | ast::BinOp::Assign(op) => self.eval_assign(lhs, op, rhs), | ||
433 | ast::BinOp::Arith(op) => self.eval_arith(lhs, op, rhs), | ||
434 | ast::BinOp::Cmp(op) => self.eval_cmp(lhs, op, rhs), | ||
435 | ast::BinOp::Logic(op) => self.eval_logic(lhs, op, rhs), | ||
436 | } | ||
437 | } | ||
438 | |||
439 | fn eval_assign( | ||
440 | &mut self, | ||
441 | lhs: &ast::Expr, | ||
442 | ast::AssignOp { op }: ast::AssignOp, | ||
443 | rhs: &ast::Expr, | ||
444 | ) -> Result { | ||
445 | let ast::Expr::Ident(ident) = lhs else { | ||
446 | return Err(Error::MalformedExpr(format!( | ||
447 | "malformed assigment, lhs: {:?}", | ||
448 | lhs | ||
449 | ))); | ||
450 | }; | ||
451 | let value = self.eval_expr(rhs)?; | ||
452 | let variable = self.lookup_mut(ident)?; | ||
453 | match op { | ||
454 | None => variable.assign(value), | ||
455 | Some(ast::ArithOp::Add) => variable.assign(variable.value().add(&value)?), | ||
456 | Some(ast::ArithOp::Sub) => variable.assign(variable.value().sub(&value)?), | ||
457 | Some(ast::ArithOp::Mul) => variable.assign(variable.value().mul(&value)?), | ||
458 | Some(ast::ArithOp::Div) => variable.assign(variable.value().div(&value)?), | ||
459 | Some(ast::ArithOp::Mod) => variable.assign(variable.value().mod_(&value)?), | ||
460 | } | ||
461 | } | ||
462 | |||
463 | fn eval_arith(&mut self, lhs: &ast::Expr, op: ast::ArithOp, rhs: &ast::Expr) -> Result { | ||
464 | let l = self.eval_expr(lhs)?; | ||
465 | let r = self.eval_expr(rhs)?; | ||
466 | match op { | ||
467 | ast::ArithOp::Add => l.add(&r), | ||
468 | ast::ArithOp::Sub => l.sub(&r), | ||
469 | ast::ArithOp::Mul => l.mul(&r), | ||
470 | ast::ArithOp::Div => l.div(&r), | ||
471 | ast::ArithOp::Mod => l.mod_(&r), | ||
472 | } | ||
473 | } | ||
474 | |||
475 | fn eval_cmp(&mut self, lhs: &ast::Expr, op: ast::CmpOp, rhs: &ast::Expr) -> Result { | ||
476 | let l = self.eval_expr(lhs)?; | ||
477 | let r = self.eval_expr(rhs)?; | ||
478 | |||
479 | match op { | ||
480 | ast::CmpOp::Eq => l.equals(&r), | ||
481 | ast::CmpOp::Gt => l.greater_than(&r), | ||
482 | ast::CmpOp::Lt => l.less_than(&r), | ||
483 | ast::CmpOp::Neq => l.equals(&r).and_then(|v| v.not()), | ||
484 | ast::CmpOp::Gte => l.greater_than_equals(&r), | ||
485 | ast::CmpOp::Lte => l.less_than_equals(&r), | ||
486 | } | ||
487 | } | ||
488 | |||
489 | fn eval_logic(&mut self, lhs: &ast::Expr, op: ast::LogicOp, rhs: &ast::Expr) -> Result { | ||
490 | let l = self.eval_expr(lhs)?; | ||
491 | |||
492 | // short-circuit | ||
493 | let l_value = l.as_boolean().ok_or_else(|| Error::TypeMismatch { | ||
494 | expected: ast::Type::Boolean, | ||
495 | got: l.ty(), | ||
496 | })?; | ||
497 | |||
498 | match op { | ||
499 | ast::LogicOp::Or => { | ||
500 | if l_value { | ||
501 | return Ok(l); | ||
502 | } else { | ||
503 | let r = self.eval_expr(rhs)?; | ||
504 | l.or(&r) | ||
505 | } | ||
506 | } | ||
507 | ast::LogicOp::And => { | ||
508 | if !l_value { | ||
509 | return Ok(l); | ||
510 | } else { | ||
511 | let r = self.eval_expr(rhs)?; | ||
512 | l.and(&r) | ||
513 | } | ||
514 | } | ||
515 | } | ||
516 | } | ||
517 | |||
518 | fn eval_unary(&mut self, expr: &ast::Expr, op: ast::UnaryOp) -> Result { | ||
519 | let val = self.eval_expr(expr)?; | ||
520 | match op { | ||
521 | ast::UnaryOp::Not => val.not(), | ||
522 | } | ||
523 | } | ||
524 | |||
525 | fn eval_if(&mut self, if_expr: &ast::If) -> Result { | ||
526 | let cond = self.eval_expr(&if_expr.condition)?; | ||
527 | |||
528 | if cond.as_boolean().ok_or_else(|| Error::TypeMismatch { | ||
529 | expected: ast::Type::Boolean, | ||
530 | got: cond.ty(), | ||
531 | })? { | ||
532 | self.eval_block(&if_expr.then) | ||
533 | } else { | ||
534 | self.eval_block(&if_expr.else_) | ||
535 | } | ||
536 | } | ||
537 | |||
538 | fn eval_call(&mut self, call: &ast::Call) -> Result { | ||
539 | match (call.function.as_str(), call.parameters.as_slice()) { | ||
540 | ("print", args) => { | ||
541 | for arg in args { | ||
542 | let val = self.eval_expr(arg)?; | ||
543 | print!("{val}"); | ||
544 | } | ||
545 | Ok(Value::Unit) | ||
546 | } | ||
547 | ("text", [arg]) if self.eval_expr(arg)? == Value::Node => { | ||
548 | let node = self | ||
549 | .cursor | ||
550 | .as_ref() | ||
551 | .ok_or(Error::CurrentNodeNotPresent)? | ||
552 | .node(); | ||
553 | let text = node | ||
554 | .utf8_text(self.input_src.as_ref().unwrap().as_bytes()) | ||
555 | .unwrap(); | ||
556 | Ok(Value::String(text.to_owned())) | ||
557 | } | ||
558 | (s, _) => Err(Error::FailedLookup(s.to_owned())), | ||
559 | } | ||
560 | } | ||
561 | |||
562 | fn eval_declaration(&mut self, decl: &ast::Declaration) -> Result { | ||
563 | let initial_value = match decl.init.as_ref() { | ||
564 | Some(init) => Some(self.eval_expr(&*init)?), | ||
565 | None => None, | ||
566 | }; | ||
567 | let variable = self.bind(&decl.name, decl.ty)?; | ||
568 | |||
569 | if let Some(init) = initial_value { | ||
570 | variable.assign(init)?; | ||
571 | } | ||
572 | |||
573 | Ok(Value::Unit) | ||
574 | } | ||
575 | |||
576 | fn eval_statement(&mut self, stmt: &ast::Statement) -> Result { | ||
577 | match stmt { | ||
578 | ast::Statement::Bare(expr) => self.eval_expr(expr).map(|_| Value::Unit), | ||
579 | ast::Statement::Declaration(decl) => self.eval_declaration(decl), | ||
580 | } | ||
581 | } | ||
582 | |||
583 | fn eval_block(&mut self, block: &ast::Block) -> Result { | ||
584 | for stmt in block.body.iter() { | ||
585 | self.eval_statement(stmt)?; | ||
586 | } | ||
587 | Ok(Value::Unit) | ||
588 | } | ||
589 | |||
590 | pub fn eval(&mut self) -> Result { | ||
591 | let visitors = std::mem::take(&mut self.visitors); | ||
592 | let mut has_next = true; | ||
593 | let mut postorder = Vec::new(); | ||
594 | |||
595 | // BEGIN block | ||
596 | self.eval_block(&visitors.begin)?; | ||
597 | |||
598 | while has_next { | ||
599 | let current_node = self.cursor.as_mut().unwrap().node(); | ||
600 | postorder.push(current_node); | ||
601 | |||
602 | let visitor = visitors.get_by_node(current_node); | ||
603 | |||
604 | visitor.map(|v| self.eval_block(&v.enter)); | ||
605 | |||
606 | has_next = self.cursor.as_mut().unwrap().goto_first_child(); | ||
607 | |||
608 | if !has_next { | ||
609 | has_next = self.cursor.as_mut().unwrap().goto_next_sibling(); | ||
610 | postorder | ||
611 | .pop() | ||
612 | .and_then(|n| visitors.get_by_node(n)) | ||
613 | .map(|v| self.eval_block(&v.leave)); | ||
614 | } | ||
615 | |||
616 | while !has_next && self.cursor.as_mut().unwrap().goto_parent() { | ||
617 | has_next = self.cursor.as_mut().unwrap().goto_next_sibling(); | ||
618 | postorder | ||
619 | .pop() | ||
620 | .and_then(|n| visitors.get_by_node(n)) | ||
621 | .map(|v| self.eval_block(&v.leave)); | ||
622 | } | ||
623 | } | ||
624 | |||
625 | // END block | ||
626 | self.eval_block(&visitors.end)?; | ||
627 | |||
628 | Ok(Value::Unit) | ||
629 | } | ||
630 | } | ||
631 | |||
632 | #[cfg(test)] | ||
633 | mod test { | ||
634 | use super::*; | ||
635 | |||
636 | #[test] | ||
637 | fn bin() { | ||
638 | let language = tree_sitter_python::language(); | ||
639 | let mut ctx = Context::new(language) | ||
640 | .with_program(ast::Program::new()) | ||
641 | .unwrap(); | ||
642 | assert_eq!( | ||
643 | ctx.eval_expr(&ast::Expr::Bin( | ||
644 | ast::Expr::int(5).boxed(), | ||
645 | ast::BinOp::Arith(ast::ArithOp::Add), | ||
646 | ast::Expr::int(10).boxed(), | ||
647 | )), | ||
648 | Ok(Value::Integer(15)) | ||
649 | ); | ||
650 | assert_eq!( | ||
651 | ctx.eval_expr(&ast::Expr::Bin( | ||
652 | ast::Expr::int(5).boxed(), | ||
653 | ast::BinOp::Cmp(ast::CmpOp::Eq), | ||
654 | ast::Expr::int(10).boxed(), | ||
655 | )), | ||
656 | Ok(Value::Boolean(false)) | ||
657 | ); | ||
658 | assert_eq!( | ||
659 | ctx.eval_expr(&ast::Expr::Bin( | ||
660 | ast::Expr::int(5).boxed(), | ||
661 | ast::BinOp::Cmp(ast::CmpOp::Lt), | ||
662 | ast::Expr::int(10).boxed(), | ||
663 | )), | ||
664 | Ok(Value::Boolean(true)) | ||
665 | ); | ||
666 | assert_eq!( | ||
667 | ctx.eval_expr(&ast::Expr::Bin( | ||
668 | ast::Expr::Bin( | ||
669 | ast::Expr::int(5).boxed(), | ||
670 | ast::BinOp::Cmp(ast::CmpOp::Lt), | ||
671 | ast::Expr::int(10).boxed(), | ||
672 | ) | ||
673 | .boxed(), | ||
674 | ast::BinOp::Logic(ast::LogicOp::And), | ||
675 | ast::Expr::false_().boxed() | ||
676 | )), | ||
677 | Ok(Value::Boolean(false)) | ||
678 | ); | ||
679 | } | ||
680 | |||
681 | #[test] | ||
682 | fn test_evaluate_blocks() { | ||
683 | let language = tree_sitter_python::language(); | ||
684 | let mut ctx = Context::new(language) | ||
685 | .with_program(ast::Program::new()) | ||
686 | .unwrap(); | ||
687 | assert_eq!( | ||
688 | ctx.eval_block(&ast::Block { | ||
689 | body: vec![ | ||
690 | ast::Statement::Declaration(ast::Declaration { | ||
691 | ty: ast::Type::Integer, | ||
692 | name: "a".to_owned(), | ||
693 | init: None, | ||
694 | }), | ||
695 | ast::Statement::Bare(ast::Expr::Bin( | ||
696 | ast::Expr::Ident("a".to_owned()).boxed(), | ||
697 | ast::BinOp::Assign(ast::AssignOp { | ||
698 | op: Some(ast::ArithOp::Add) | ||
699 | }), | ||
700 | ast::Expr::int(5).boxed() | ||
701 | )), | ||
702 | ] | ||
703 | }), | ||
704 | Ok(Value::Unit) | ||
705 | ); | ||
706 | assert_eq!( | ||
707 | ctx.lookup(&String::from("a")).unwrap().clone(), | ||
708 | Variable { | ||
709 | ty: ast::Type::Integer, | ||
710 | name: "a".to_owned(), | ||
711 | value: Value::Integer(5) | ||
712 | } | ||
713 | ); | ||
714 | } | ||
715 | |||
716 | #[test] | ||
717 | fn test_evaluate_if() { | ||
718 | let language = tree_sitter_python::language(); | ||
719 | let mut ctx = Context::new(language) | ||
720 | .with_program(ast::Program::new()) | ||
721 | .unwrap(); | ||
722 | assert_eq!( | ||
723 | ctx.eval_block(&ast::Block { | ||
724 | body: vec![ | ||
725 | ast::Statement::Declaration(ast::Declaration { | ||
726 | ty: ast::Type::Integer, | ||
727 | name: "a".to_owned(), | ||
728 | init: Some(ast::Expr::int(1).boxed()), | ||
729 | }), | ||
730 | ast::Statement::Bare(ast::Expr::IfExpr(ast::If { | ||
731 | condition: ast::Expr::true_().boxed(), | ||
732 | then: ast::Block { | ||
733 | body: vec![ast::Statement::Bare(ast::Expr::Bin( | ||
734 | ast::Expr::Ident("a".to_owned()).boxed(), | ||
735 | ast::BinOp::Assign(ast::AssignOp { | ||
736 | op: Some(ast::ArithOp::Add) | ||
737 | }), | ||
738 | ast::Expr::int(5).boxed() | ||
739 | ))] | ||
740 | }, | ||
741 | else_: ast::Block { | ||
742 | body: vec![ast::Statement::Bare(ast::Expr::Bin( | ||
743 | ast::Expr::Ident("a".to_owned()).boxed(), | ||
744 | ast::BinOp::Assign(ast::AssignOp { | ||
745 | op: Some(ast::ArithOp::Add) | ||
746 | }), | ||
747 | ast::Expr::int(10).boxed() | ||
748 | ))] | ||
749 | } | ||
750 | })) | ||
751 | ] | ||
752 | }), | ||
753 | Ok(Value::Unit) | ||
754 | ); | ||
755 | assert_eq!( | ||
756 | ctx.lookup(&String::from("a")).unwrap().clone(), | ||
757 | Variable { | ||
758 | ty: ast::Type::Integer, | ||
759 | name: "a".to_owned(), | ||
760 | value: Value::Integer(6) | ||
761 | } | ||
762 | ); | ||
763 | } | ||
764 | } | ||
diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8780b74 --- /dev/null +++ b/src/lib.rs | |||
@@ -0,0 +1,7 @@ | |||
1 | mod ast; | ||
2 | mod eval; | ||
3 | mod parser; | ||
4 | mod string; | ||
5 | |||
6 | pub use ast::Program; | ||
7 | pub use eval::Context; | ||
diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..09a15ef --- /dev/null +++ b/src/main.rs | |||
@@ -0,0 +1,47 @@ | |||
1 | use trawk::{Context, Program}; | ||
2 | |||
3 | fn main() { | ||
4 | let src = r#" | ||
5 | bar = 0 | ||
6 | def foo(): | ||
7 | baz = 5 | ||
8 | "# | ||
9 | .to_owned(); | ||
10 | |||
11 | let program = Program::new() | ||
12 | .from_str( | ||
13 | r#" | ||
14 | BEGIN { | ||
15 | bool in_def = false; | ||
16 | } | ||
17 | pre function_definition { | ||
18 | in_def = true; | ||
19 | } | ||
20 | post function_definition { | ||
21 | in_def = false; | ||
22 | } | ||
23 | pre identifier { | ||
24 | if (in_def) { | ||
25 | print(text(node)); | ||
26 | print(" "); | ||
27 | print("in def\n"); | ||
28 | } else { | ||
29 | }; | ||
30 | }"#, | ||
31 | ) | ||
32 | .unwrap(); | ||
33 | |||
34 | let mut parser = tree_sitter::Parser::new(); | ||
35 | let _ = parser.set_language(tree_sitter_python::language()); | ||
36 | |||
37 | let tree = parser.parse(&src, None).unwrap(); | ||
38 | let cursor = tree.walk(); | ||
39 | |||
40 | let mut ctx = Context::new(tree_sitter_python::language()) | ||
41 | .with_input(src) | ||
42 | .with_cursor(cursor) | ||
43 | .with_program(program) | ||
44 | .unwrap(); | ||
45 | |||
46 | let _ = ctx.eval(); | ||
47 | } | ||
diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000..3a020dc --- /dev/null +++ b/src/parser.rs | |||
@@ -0,0 +1,689 @@ | |||
1 | use nom::{ | ||
2 | branch::alt, | ||
3 | bytes::complete::tag, | ||
4 | character::complete::{alpha1, alphanumeric1, char, multispace0, multispace1, one_of}, | ||
5 | combinator::{map, opt, recognize, value}, | ||
6 | error::ParseError, | ||
7 | multi::{many0, many0_count, many1, separated_list0}, | ||
8 | sequence::{delimited, pair, preceded, terminated, tuple}, | ||
9 | IResult, Parser, | ||
10 | }; | ||
11 | // use tree_sitter::Query; | ||
12 | |||
13 | use crate::ast::*; | ||
14 | use crate::string::parse_string; | ||
15 | |||
16 | fn ws<'a, F: 'a, O, E>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> | ||
17 | where | ||
18 | F: FnMut(&'a str) -> IResult<&'a str, O, E>, | ||
19 | E: ParseError<&'a str>, | ||
20 | { | ||
21 | delimited(multispace0, inner, multispace0) | ||
22 | } | ||
23 | |||
24 | fn parse_unit<'a>(i: &'a str) -> IResult<&'a str, ()> { | ||
25 | let open = char('('); | ||
26 | let close = char(')'); | ||
27 | let unit = tuple((open, close)); | ||
28 | value((), unit)(i) | ||
29 | } | ||
30 | |||
31 | fn parse_bool(i: &str) -> IResult<&str, bool> { | ||
32 | let t = value(true, tag("true")); | ||
33 | let f = value(false, tag("false")); | ||
34 | alt((t, f)).parse(i) | ||
35 | } | ||
36 | |||
37 | fn parse_int<'a>(i: &'a str) -> IResult<&'a str, i128> { | ||
38 | map(recognize(many1(one_of("0123456789"))), |s: &str| { | ||
39 | s.parse::<i128>().unwrap() | ||
40 | })(i) | ||
41 | } | ||
42 | |||
43 | fn parse_name(i: &str) -> IResult<&str, &str> { | ||
44 | recognize(pair( | ||
45 | alt((alpha1, tag("_"))), | ||
46 | many0_count(alt((alphanumeric1, tag("_")))), | ||
47 | )) | ||
48 | .parse(i) | ||
49 | } | ||
50 | |||
51 | fn parse_ident(i: &str) -> IResult<&str, Identifier> { | ||
52 | map(parse_name, str::to_owned)(i) | ||
53 | } | ||
54 | |||
55 | fn parse_lit<'a>(i: &'a str) -> IResult<&'a str, Literal> { | ||
56 | alt(( | ||
57 | map(parse_string, Literal::Str), | ||
58 | map(parse_int, Literal::Int), | ||
59 | map(parse_bool, Literal::Bool), | ||
60 | )) | ||
61 | .parse(i) | ||
62 | } | ||
63 | |||
64 | fn parse_cmp_op(i: &str) -> IResult<&str, CmpOp> { | ||
65 | alt(( | ||
66 | value(CmpOp::Eq, tag("==")), | ||
67 | value(CmpOp::Neq, tag("!=")), | ||
68 | value(CmpOp::Gte, tag(">=")), | ||
69 | value(CmpOp::Lte, tag("<=")), | ||
70 | value(CmpOp::Gt, tag(">")), | ||
71 | value(CmpOp::Lt, tag("<")), | ||
72 | )) | ||
73 | .parse(i) | ||
74 | } | ||
75 | |||
76 | fn parse_assign_op(i: &str) -> IResult<&str, AssignOp> { | ||
77 | let parse_arith_op = alt(( | ||
78 | value(ArithOp::Add, char('+')), | ||
79 | value(ArithOp::Sub, char('-')), | ||
80 | value(ArithOp::Mul, char('*')), | ||
81 | value(ArithOp::Div, char('/')), | ||
82 | value(ArithOp::Mod, char('%')), | ||
83 | )); | ||
84 | map(tuple((opt(parse_arith_op), char('='))), |(op, _)| { | ||
85 | AssignOp { op } | ||
86 | })(i) | ||
87 | } | ||
88 | |||
89 | fn parse_op<'a, E, T>( | ||
90 | op_str: &'static str, | ||
91 | op: T, | ||
92 | ) -> impl FnMut(&'a str) -> Result<(&'a str, T), nom::Err<E>> | ||
93 | where | ||
94 | E: ParseError<&'a str>, | ||
95 | T: Copy, | ||
96 | { | ||
97 | value(op, tag(op_str)) | ||
98 | } | ||
99 | |||
100 | fn parse_binary<'a, P1, P2, P3, E>( | ||
101 | lhs: P1, | ||
102 | op: P2, | ||
103 | rhs: P3, | ||
104 | ) -> impl FnMut(&'a str) -> Result<(&'a str, Expr), nom::Err<E>> | ||
105 | where | ||
106 | P1: Parser<&'a str, Expr, E>, | ||
107 | P2: Parser<&'a str, BinOp, E>, | ||
108 | P3: Parser<&'a str, Expr, E>, | ||
109 | E: ParseError<&'a str>, | ||
110 | { | ||
111 | map(tuple((lhs, op, rhs)), |(l, o, r)| { | ||
112 | Expr::Bin(l.boxed(), o, r.boxed()) | ||
113 | }) | ||
114 | } | ||
115 | |||
116 | fn parse_assign<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
117 | let op = map(parse_assign_op, BinOp::Assign); | ||
118 | let recursive = parse_binary(parse_atom, op, parse_assign); | ||
119 | let base = parse_union; | ||
120 | alt((recursive, base)).parse(i) | ||
121 | } | ||
122 | |||
123 | fn parse_union<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
124 | let op = parse_op("||", BinOp::Logic(LogicOp::Or)); | ||
125 | let recursive = parse_binary(parse_intersection, op, parse_union); | ||
126 | let base = parse_intersection; | ||
127 | alt((recursive, base)).parse(i) | ||
128 | } | ||
129 | |||
130 | fn parse_intersection<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
131 | let op = parse_op("&&", BinOp::Logic(LogicOp::And)); | ||
132 | let recursive = parse_binary(parse_negated, op, parse_intersection); | ||
133 | let base = parse_negated; | ||
134 | alt((recursive, base)).parse(i) | ||
135 | } | ||
136 | |||
137 | fn parse_negated<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
138 | let op = parse_op("!", UnaryOp::Not); | ||
139 | let recursive = map(tuple((op, parse_rel)), |(op, expr)| { | ||
140 | Expr::Unary(expr.boxed(), op) | ||
141 | }); | ||
142 | let base = parse_rel; | ||
143 | alt((recursive, base)).parse(i) | ||
144 | } | ||
145 | |||
146 | fn parse_rel<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
147 | let op = map(parse_cmp_op, BinOp::Cmp); | ||
148 | let recursive = parse_binary(parse_sum, op, parse_rel); | ||
149 | let base = parse_sum; | ||
150 | alt((recursive, base)).parse(i) | ||
151 | } | ||
152 | |||
153 | fn parse_sum<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
154 | let add = parse_op("+", BinOp::Arith(ArithOp::Add)); | ||
155 | let sub = parse_op("-", BinOp::Arith(ArithOp::Sub)); | ||
156 | let op = alt((add, sub)); | ||
157 | let recursive = parse_binary(parse_mul, op, parse_sum); | ||
158 | let base = parse_mul; | ||
159 | alt((recursive, base)).parse(i) | ||
160 | } | ||
161 | |||
162 | fn parse_mul<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
163 | let mul = parse_op("*", BinOp::Arith(ArithOp::Mul)); | ||
164 | let div = parse_op("/", BinOp::Arith(ArithOp::Div)); | ||
165 | let mod_ = parse_op("%", BinOp::Arith(ArithOp::Mod)); | ||
166 | let op = alt((mul, div, mod_)); | ||
167 | let recursive = parse_binary(parse_atom, op, parse_mul); | ||
168 | let base = parse_atom; | ||
169 | alt((recursive, base)).parse(i) | ||
170 | } | ||
171 | |||
172 | fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
173 | let inner = alt(( | ||
174 | map(tag("node"), |_| Expr::Node), | ||
175 | map(parse_block, Expr::Block), | ||
176 | map(parse_if, Expr::IfExpr), | ||
177 | map(parse_call, Expr::Call), | ||
178 | map(parse_lit, Expr::Lit), | ||
179 | map(parse_ident, Expr::Ident), | ||
180 | map(parse_unit, |_| Expr::Unit), | ||
181 | )); | ||
182 | ws(inner).parse(i) | ||
183 | } | ||
184 | |||
185 | fn parse_call<'a>(i: &'a str) -> IResult<&'a str, Call> { | ||
186 | let ident = parse_ident; | ||
187 | let open = ws(char('(')); | ||
188 | let args = separated_list0(char(','), parse_expr); | ||
189 | let close = ws(char(')')); | ||
190 | map( | ||
191 | tuple((ident, open, args, close)), | ||
192 | |(function, _, parameters, _)| Call { | ||
193 | function, | ||
194 | parameters, | ||
195 | }, | ||
196 | ) | ||
197 | .parse(i) | ||
198 | } | ||
199 | |||
200 | fn parse_block<'a>(i: &'a str) -> IResult<&'a str, Block> { | ||
201 | let open = ws(char('{')); | ||
202 | let statements = map(many0(parse_statement), |body| Block { body }); | ||
203 | let close = ws(char('}')); | ||
204 | delimited(open, statements, close).parse(i) | ||
205 | } | ||
206 | |||
207 | fn parse_if<'a>(i: &'a str) -> IResult<&'a str, If> { | ||
208 | let if_ = delimited(multispace0, tag("if"), multispace1); | ||
209 | |||
210 | let open = char('('); | ||
211 | let condition = ws(parse_expr); | ||
212 | let close = terminated(char(')'), multispace0); | ||
213 | |||
214 | let then = parse_block; | ||
215 | |||
216 | let else_kw = ws(tag("else")); | ||
217 | let else_ = opt(preceded(else_kw, parse_block)); | ||
218 | |||
219 | map( | ||
220 | tuple((if_, open, condition, close, then, else_)), | ||
221 | |(_, _, condition, _, then, else_)| If { | ||
222 | condition: condition.boxed(), | ||
223 | then, | ||
224 | else_: else_.unwrap_or_default(), | ||
225 | }, | ||
226 | )(i) | ||
227 | } | ||
228 | |||
229 | fn parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
230 | parse_assign.parse(i) | ||
231 | } | ||
232 | |||
233 | fn parse_bare<'a>(i: &'a str) -> IResult<&'a str, Expr> { | ||
234 | parse_expr(i) | ||
235 | } | ||
236 | |||
237 | fn parse_type<'a>(i: &'a str) -> IResult<&'a str, Type> { | ||
238 | let int = value(Type::Integer, tag("int")); | ||
239 | let string = value(Type::String, tag("string")); | ||
240 | let bool_ = value(Type::Boolean, tag("bool")); | ||
241 | alt((int, string, bool_)).parse(i) | ||
242 | } | ||
243 | |||
244 | fn parse_declaration<'a>(i: &'a str) -> IResult<&'a str, Declaration> { | ||
245 | let ty = parse_type; | ||
246 | let name = parse_ident; | ||
247 | let op = ws(char('=')); | ||
248 | let init = opt(preceded(op, map(parse_expr, Expr::boxed))); | ||
249 | map( | ||
250 | tuple((ty, multispace0, name, init)), | ||
251 | |(ty, _, name, init)| Declaration { ty, name, init }, | ||
252 | )(i) | ||
253 | } | ||
254 | |||
255 | fn parse_statement<'a>(i: &'a str) -> IResult<&'a str, Statement> { | ||
256 | let semicolon = ws(char(';')); | ||
257 | let inner = alt(( | ||
258 | map(parse_declaration, Statement::Declaration), | ||
259 | map(parse_bare, Statement::Bare), | ||
260 | )); | ||
261 | terminated(inner, semicolon).parse(i) | ||
262 | } | ||
263 | |||
264 | // pub fn skip_query(mut i: &str) -> IResult<&str, ()> { | ||
265 | // let mut paren_depth = 0; | ||
266 | // let mut in_string = false; | ||
267 | // let mut in_escape = false; | ||
268 | // let mut in_comment = false; | ||
269 | // loop { | ||
270 | // let ch = i | ||
271 | // .chars() | ||
272 | // .next() | ||
273 | // .ok_or(nom::Err::Error(nom::error::Error::new( | ||
274 | // i, | ||
275 | // nom::error::ErrorKind::Eof, | ||
276 | // )))?; | ||
277 | // if in_escape { | ||
278 | // in_escape = false; | ||
279 | // } else if in_string { | ||
280 | // match ch { | ||
281 | // '\\' => { | ||
282 | // in_escape = true; | ||
283 | // } | ||
284 | // '"' | '\n' => { | ||
285 | // in_string = false; | ||
286 | // } | ||
287 | // _ => {} | ||
288 | // } | ||
289 | // } else if in_comment { | ||
290 | // if ch == '\n' { | ||
291 | // in_comment = false; | ||
292 | // } | ||
293 | // } else { | ||
294 | // match ch { | ||
295 | // '"' => in_string = true, | ||
296 | // '(' => paren_depth += 1, | ||
297 | // ')' => { | ||
298 | // if paren_depth > 0 { | ||
299 | // paren_depth -= 1; | ||
300 | // } | ||
301 | // } | ||
302 | // '{' => return Ok((i, ())), | ||
303 | // ';' => in_comment = true, | ||
304 | // _ => {} | ||
305 | // } | ||
306 | // } | ||
307 | // i = &i[1..]; | ||
308 | // } | ||
309 | // } | ||
310 | |||
311 | // fn parse_query<'a>( | ||
312 | // language: tree_sitter::Language, | ||
313 | // ) -> impl FnMut(&'a str) -> IResult<&'a str, Query> { | ||
314 | // return move |initial: &'a str| { | ||
315 | // let query_start = 0; | ||
316 | // let (skipped, _) = skip_query(initial)?; | ||
317 | // let query_end = initial.len() - skipped.len(); | ||
318 | // let query_source = &initial[query_start..query_end].to_owned(); | ||
319 | // | ||
320 | // let query = Query::new(language, &query_source).map_err(|mut _e| { | ||
321 | // nom::Err::Error(nom::error::Error::new(initial, nom::error::ErrorKind::Fail)) | ||
322 | // })?; | ||
323 | // Ok((skipped, query)) | ||
324 | // }; | ||
325 | // } | ||
326 | |||
327 | fn parse_modifier<'a>(i: &str) -> IResult<&str, Modifier> { | ||
328 | let pre = value(Modifier::Enter, tag("enter")); | ||
329 | let post = value(Modifier::Leave, tag("leave")); | ||
330 | map(opt(alt((pre, post))), Option::unwrap_or_default)(i) | ||
331 | } | ||
332 | |||
333 | fn parse_pattern<'a>(i: &str) -> IResult<&str, Pattern> { | ||
334 | let begin = value(Pattern::Begin, ws(tag("BEGIN"))); | ||
335 | let end = value(Pattern::End, ws(tag("END"))); | ||
336 | let node = map( | ||
337 | tuple((parse_modifier, multispace0, parse_ident)), | ||
338 | |(modifier, _, kind)| Pattern::Node(NodePattern { modifier, kind }), | ||
339 | ); | ||
340 | alt((begin, end, node)).parse(i) | ||
341 | } | ||
342 | |||
343 | pub fn parse_stanza<'a>(i: &str) -> IResult<&str, Stanza> { | ||
344 | map( | ||
345 | tuple((parse_pattern, parse_block)), | ||
346 | |(pattern, statements)| Stanza { | ||
347 | pattern, | ||
348 | statements, | ||
349 | }, | ||
350 | )(i) | ||
351 | } | ||
352 | |||
353 | pub fn parse_file(i: &str) -> IResult<&str, Vec<Stanza>> { | ||
354 | many0(parse_stanza).parse(i) | ||
355 | } | ||
356 | |||
357 | #[cfg(test)] | ||
358 | mod test { | ||
359 | use super::*; | ||
360 | |||
361 | #[test] | ||
362 | fn test_parse_unit() { | ||
363 | assert_eq!(parse_unit("()"), Ok(("", ()))) | ||
364 | } | ||
365 | |||
366 | #[test] | ||
367 | fn test_parse_int() { | ||
368 | assert_eq!(parse_int("123456"), Ok(("", 123456))); | ||
369 | assert_eq!(parse_int("00123456"), Ok(("", 123456))); | ||
370 | } | ||
371 | |||
372 | #[test] | ||
373 | fn test_parse_bool() { | ||
374 | assert_eq!(parse_bool("true"), Ok(("", true))); | ||
375 | assert_eq!(parse_bool("false"), Ok(("", false))); | ||
376 | } | ||
377 | |||
378 | #[test] | ||
379 | fn test_parse_name() { | ||
380 | assert_eq!(parse_name("true"), Ok(("", "true"))); | ||
381 | assert_eq!(parse_name("_abc"), Ok(("", "_abc"))); | ||
382 | } | ||
383 | |||
384 | #[test] | ||
385 | fn test_parse_literal() { | ||
386 | assert_eq!( | ||
387 | parse_lit(r#""foobarbaz""#), | ||
388 | Ok(("", Literal::Str("foobarbaz".to_owned()))) | ||
389 | ); | ||
390 | assert_eq!(parse_lit("123"), Ok(("", Literal::Int(123)))); | ||
391 | assert_eq!(parse_lit("true"), Ok(("", Literal::Bool(true)))); | ||
392 | } | ||
393 | |||
394 | #[test] | ||
395 | fn test_parse_expr() { | ||
396 | assert_eq!(parse_expr(" () "), Ok(("", Expr::Unit))); | ||
397 | assert_eq!(parse_expr(" 55 "), Ok(("", Expr::int(55)))); | ||
398 | assert_eq!( | ||
399 | parse_expr(" true || true "), | ||
400 | Ok(( | ||
401 | "", | ||
402 | Expr::Bin( | ||
403 | Expr::true_().boxed(), | ||
404 | BinOp::Logic(LogicOp::Or), | ||
405 | Expr::true_().boxed() | ||
406 | ) | ||
407 | )) | ||
408 | ); | ||
409 | assert_eq!( | ||
410 | parse_expr("true || false && 5 == 5 "), | ||
411 | Ok(( | ||
412 | "", | ||
413 | Expr::Bin( | ||
414 | Expr::true_().boxed(), | ||
415 | BinOp::Logic(LogicOp::Or), | ||
416 | Expr::Bin( | ||
417 | Expr::false_().boxed(), | ||
418 | BinOp::Logic(LogicOp::And), | ||
419 | Expr::Bin( | ||
420 | Expr::int(5).boxed(), | ||
421 | BinOp::Cmp(CmpOp::Eq), | ||
422 | Expr::int(5).boxed(), | ||
423 | ) | ||
424 | .boxed() | ||
425 | ) | ||
426 | .boxed() | ||
427 | ) | ||
428 | )) | ||
429 | ); | ||
430 | assert_eq!( | ||
431 | parse_expr(" foo ( 1, 2,3 , 1 == 1)"), | ||
432 | Ok(( | ||
433 | "", | ||
434 | Expr::Call(Call { | ||
435 | function: "foo".to_owned(), | ||
436 | parameters: vec![ | ||
437 | Expr::int(1), | ||
438 | Expr::int(2), | ||
439 | Expr::int(3), | ||
440 | Expr::Bin( | ||
441 | Expr::int(1).boxed(), | ||
442 | BinOp::Cmp(CmpOp::Eq), | ||
443 | Expr::int(1).boxed() | ||
444 | ) | ||
445 | ], | ||
446 | }) | ||
447 | )) | ||
448 | ); | ||
449 | assert_eq!( | ||
450 | parse_expr("a = b"), | ||
451 | Ok(( | ||
452 | "", | ||
453 | Expr::Bin( | ||
454 | Expr::Ident("a".to_owned()).boxed(), | ||
455 | BinOp::Assign(AssignOp { op: None }), | ||
456 | Expr::Ident("b".to_owned()).boxed(), | ||
457 | ) | ||
458 | )) | ||
459 | ); | ||
460 | assert_eq!( | ||
461 | parse_expr(" a += 4 + 5"), | ||
462 | Ok(( | ||
463 | "", | ||
464 | Expr::Bin( | ||
465 | Expr::Ident("a".to_owned()).boxed(), | ||
466 | BinOp::Assign(AssignOp { | ||
467 | op: Some(ArithOp::Add) | ||
468 | }), | ||
469 | Expr::Bin( | ||
470 | Expr::int(4).boxed(), | ||
471 | BinOp::Arith(ArithOp::Add), | ||
472 | Expr::int(5).boxed(), | ||
473 | ) | ||
474 | .boxed() | ||
475 | ) | ||
476 | )) | ||
477 | ); | ||
478 | } | ||
479 | |||
480 | #[test] | ||
481 | fn test_parse_statement() { | ||
482 | assert_eq!( | ||
483 | parse_statement("true;"), | ||
484 | Ok(("", Statement::Bare(Expr::true_()))) | ||
485 | ); | ||
486 | assert_eq!( | ||
487 | parse_statement("true ; "), | ||
488 | Ok(("", Statement::Bare(Expr::true_()))) | ||
489 | ); | ||
490 | assert_eq!( | ||
491 | parse_statement("int a ; "), | ||
492 | Ok(( | ||
493 | "", | ||
494 | Statement::Declaration(Declaration { | ||
495 | ty: Type::Integer, | ||
496 | name: "a".to_owned(), | ||
497 | init: None | ||
498 | }) | ||
499 | )) | ||
500 | ); | ||
501 | assert_eq!( | ||
502 | parse_statement("int a =5 ; "), | ||
503 | Ok(( | ||
504 | "", | ||
505 | Statement::Declaration(Declaration { | ||
506 | ty: Type::Integer, | ||
507 | name: "a".to_owned(), | ||
508 | init: Some(Expr::int(5).boxed()) | ||
509 | }) | ||
510 | )) | ||
511 | ); | ||
512 | } | ||
513 | |||
514 | #[test] | ||
515 | fn test_parse_block() { | ||
516 | assert_eq!( | ||
517 | parse_expr( | ||
518 | r#" | ||
519 | { | ||
520 | true; | ||
521 | 1; | ||
522 | } | ||
523 | "# | ||
524 | ), | ||
525 | Ok(( | ||
526 | "", | ||
527 | Expr::Block(Block { | ||
528 | body: vec![ | ||
529 | Statement::Bare(Expr::true_()), | ||
530 | Statement::Bare(Expr::int(1)), | ||
531 | ] | ||
532 | }) | ||
533 | )) | ||
534 | ); | ||
535 | } | ||
536 | |||
537 | #[test] | ||
538 | fn test_parse_if() { | ||
539 | assert_eq!( | ||
540 | parse_expr( | ||
541 | r#" | ||
542 | if (1 == true) { | ||
543 | 5; | ||
544 | } else { | ||
545 | 10; | ||
546 | } | ||
547 | "# | ||
548 | ), | ||
549 | Ok(( | ||
550 | "", | ||
551 | Expr::IfExpr(If { | ||
552 | condition: Expr::Bin( | ||
553 | Expr::int(1).boxed(), | ||
554 | BinOp::Cmp(CmpOp::Eq), | ||
555 | Expr::true_().boxed() | ||
556 | ) | ||
557 | .boxed(), | ||
558 | then: Block { | ||
559 | body: vec![Statement::Bare(Expr::int(5)),] | ||
560 | }, | ||
561 | else_: Block { | ||
562 | body: vec![Statement::Bare(Expr::int(10)),] | ||
563 | } | ||
564 | }) | ||
565 | )) | ||
566 | ); | ||
567 | } | ||
568 | |||
569 | // #[test] | ||
570 | // fn test_skip_query() { | ||
571 | // assert_eq!( | ||
572 | // skip_query( | ||
573 | // r#"(heading | ||
574 | // (paragraph) @foo) {}"# | ||
575 | // ), | ||
576 | // Ok(("{}", ())) | ||
577 | // ); | ||
578 | // } | ||
579 | |||
580 | #[test] | ||
581 | fn test_parse_pattern() { | ||
582 | assert_eq!( | ||
583 | parse_pattern("enter function_definition"), | ||
584 | Ok(( | ||
585 | "", | ||
586 | Pattern::Node(NodePattern { | ||
587 | modifier: Modifier::Enter, | ||
588 | kind: "function_definition".to_owned() | ||
589 | }) | ||
590 | )) | ||
591 | ); | ||
592 | assert_eq!( | ||
593 | parse_pattern("function_definition"), | ||
594 | Ok(( | ||
595 | "", | ||
596 | Pattern::Node(NodePattern { | ||
597 | modifier: Modifier::Enter, | ||
598 | kind: "function_definition".to_owned() | ||
599 | }) | ||
600 | )) | ||
601 | ); | ||
602 | assert_eq!( | ||
603 | parse_pattern("leave function_definition"), | ||
604 | Ok(( | ||
605 | "", | ||
606 | Pattern::Node(NodePattern { | ||
607 | modifier: Modifier::Leave, | ||
608 | kind: "function_definition".to_owned() | ||
609 | }) | ||
610 | )) | ||
611 | ); | ||
612 | } | ||
613 | |||
614 | #[test] | ||
615 | fn test_parse_stanza() { | ||
616 | assert_eq!( | ||
617 | parse_stanza("enter function_definition { true; }"), | ||
618 | Ok(( | ||
619 | "", | ||
620 | Stanza { | ||
621 | pattern: Pattern::Node(NodePattern { | ||
622 | modifier: Modifier::Enter, | ||
623 | kind: "function_definition".to_owned() | ||
624 | }), | ||
625 | statements: Block { | ||
626 | body: vec![Statement::Bare(Expr::true_())] | ||
627 | } | ||
628 | } | ||
629 | )) | ||
630 | ); | ||
631 | assert_eq!( | ||
632 | parse_stanza("BEGIN { true; }"), | ||
633 | Ok(( | ||
634 | "", | ||
635 | Stanza { | ||
636 | pattern: Pattern::Begin, | ||
637 | statements: Block { | ||
638 | body: vec![Statement::Bare(Expr::true_())] | ||
639 | } | ||
640 | } | ||
641 | )) | ||
642 | ); | ||
643 | assert_eq!( | ||
644 | parse_block( | ||
645 | " { | ||
646 | true; | ||
647 | }" | ||
648 | ), | ||
649 | Ok(( | ||
650 | "", | ||
651 | Block { | ||
652 | body: vec![Statement::Bare(Expr::true_())] | ||
653 | } | ||
654 | )) | ||
655 | ); | ||
656 | } | ||
657 | |||
658 | #[test] | ||
659 | fn test_parse_if_statement_regression() { | ||
660 | assert_eq!( | ||
661 | parse_statement("if (true) { true; };"), | ||
662 | Ok(( | ||
663 | "", | ||
664 | Statement::Bare(Expr::IfExpr(If { | ||
665 | condition: Expr::true_().boxed(), | ||
666 | then: Block { | ||
667 | body: vec![Statement::Bare(Expr::true_())] | ||
668 | }, | ||
669 | else_: Block::default(), | ||
670 | })) | ||
671 | )) | ||
672 | ); | ||
673 | assert_eq!( | ||
674 | parse_expr("if (true) { true; } else { true; }"), | ||
675 | Ok(( | ||
676 | "", | ||
677 | Expr::IfExpr(If { | ||
678 | condition: Expr::true_().boxed(), | ||
679 | then: Block { | ||
680 | body: vec![Statement::Bare(Expr::true_())] | ||
681 | }, | ||
682 | else_: Block { | ||
683 | body: vec![Statement::Bare(Expr::true_())] | ||
684 | }, | ||
685 | }) | ||
686 | )) | ||
687 | ); | ||
688 | } | ||
689 | } | ||
diff --git a/src/string.rs b/src/string.rs new file mode 100644 index 0000000..820f9ff --- /dev/null +++ b/src/string.rs | |||
@@ -0,0 +1,152 @@ | |||
1 | use nom::branch::alt; | ||
2 | use nom::bytes::streaming::{is_not, take_while_m_n}; | ||
3 | use nom::character::streaming::{char, multispace1}; | ||
4 | use nom::combinator::{map, map_opt, map_res, value, verify}; | ||
5 | use nom::error::{FromExternalError, ParseError}; | ||
6 | use nom::multi::fold_many0; | ||
7 | use nom::sequence::{delimited, preceded}; | ||
8 | use nom::{IResult, Parser}; | ||
9 | |||
10 | // parser combinators are constructed from the bottom up: | ||
11 | // first we write parsers for the smallest elements (escaped characters), | ||
12 | // then combine them into larger parsers. | ||
13 | |||
14 | /// Parse a unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6 | ||
15 | /// hexadecimal numerals. We will combine this later with parse_escaped_char | ||
16 | /// to parse sequences like \u{00AC}. | ||
17 | fn parse_unicode<'a, E>(input: &'a str) -> IResult<&'a str, char, E> | ||
18 | where | ||
19 | E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, | ||
20 | { | ||
21 | // `take_while_m_n` parses between `m` and `n` bytes (inclusive) that match | ||
22 | // a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals. | ||
23 | let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit()); | ||
24 | |||
25 | // `preceded` takes a prefix parser, and if it succeeds, returns the result | ||
26 | // of the body parser. In this case, it parses u{XXXX}. | ||
27 | let parse_delimited_hex = preceded( | ||
28 | char('u'), | ||
29 | // `delimited` is like `preceded`, but it parses both a prefix and a suffix. | ||
30 | // It returns the result of the middle parser. In this case, it parses | ||
31 | // {XXXX}, where XXXX is 1 to 6 hex numerals, and returns XXXX | ||
32 | delimited(char('{'), parse_hex, char('}')), | ||
33 | ); | ||
34 | |||
35 | // `map_res` takes the result of a parser and applies a function that returns | ||
36 | // a Result. In this case we take the hex bytes from parse_hex and attempt to | ||
37 | // convert them to a u32. | ||
38 | let parse_u32 = map_res(parse_delimited_hex, move |hex| u32::from_str_radix(hex, 16)); | ||
39 | |||
40 | // map_opt is like map_res, but it takes an Option instead of a Result. If | ||
41 | // the function returns None, map_opt returns an error. In this case, because | ||
42 | // not all u32 values are valid unicode code points, we have to fallibly | ||
43 | // convert to char with from_u32. | ||
44 | map_opt(parse_u32, std::char::from_u32).parse(input) | ||
45 | } | ||
46 | |||
47 | /// Parse an escaped character: \n, \t, \r, \u{00AC}, etc. | ||
48 | fn parse_escaped_char<'a, E>(input: &'a str) -> IResult<&'a str, char, E> | ||
49 | where | ||
50 | E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, | ||
51 | { | ||
52 | preceded( | ||
53 | char('\\'), | ||
54 | // `alt` tries each parser in sequence, returning the result of | ||
55 | // the first successful match | ||
56 | alt(( | ||
57 | parse_unicode, | ||
58 | // The `value` parser returns a fixed value (the first argument) if its | ||
59 | // parser (the second argument) succeeds. In these cases, it looks for | ||
60 | // the marker characters (n, r, t, etc) and returns the matching | ||
61 | // character (\n, \r, \t, etc). | ||
62 | value('\n', char('n')), | ||
63 | value('\r', char('r')), | ||
64 | value('\t', char('t')), | ||
65 | value('\u{08}', char('b')), | ||
66 | value('\u{0C}', char('f')), | ||
67 | value('\\', char('\\')), | ||
68 | value('/', char('/')), | ||
69 | value('"', char('"')), | ||
70 | )), | ||
71 | ) | ||
72 | .parse(input) | ||
73 | } | ||
74 | |||
75 | /// Parse a backslash, followed by any amount of whitespace. This is used later | ||
76 | /// to discard any escaped whitespace. | ||
77 | fn parse_escaped_whitespace<'a, E: ParseError<&'a str>>( | ||
78 | input: &'a str, | ||
79 | ) -> IResult<&'a str, &'a str, E> { | ||
80 | preceded(char('\\'), multispace1).parse(input) | ||
81 | } | ||
82 | |||
83 | /// Parse a non-empty block of text that doesn't include \ or " | ||
84 | fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> { | ||
85 | // `is_not` parses a string of 0 or more characters that aren't one of the | ||
86 | // given characters. | ||
87 | let not_quote_slash = is_not("\"\\"); | ||
88 | |||
89 | // `verify` runs a parser, then runs a verification function on the output of | ||
90 | // the parser. The verification function accepts out output only if it | ||
91 | // returns true. In this case, we want to ensure that the output of is_not | ||
92 | // is non-empty. | ||
93 | verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input) | ||
94 | } | ||
95 | |||
96 | /// A string fragment contains a fragment of a string being parsed: either | ||
97 | /// a non-empty Literal (a series of non-escaped characters), a single | ||
98 | /// parsed escaped character, or a block of escaped whitespace. | ||
99 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] | ||
100 | enum StringFragment<'a> { | ||
101 | Literal(&'a str), | ||
102 | EscapedChar(char), | ||
103 | EscapedWS, | ||
104 | } | ||
105 | |||
106 | /// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char | ||
107 | /// into a StringFragment. | ||
108 | fn parse_fragment<'a, E>(input: &'a str) -> IResult<&'a str, StringFragment<'a>, E> | ||
109 | where | ||
110 | E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, | ||
111 | { | ||
112 | alt(( | ||
113 | // The `map` combinator runs a parser, then applies a function to the output | ||
114 | // of that parser. | ||
115 | map(parse_literal, StringFragment::Literal), | ||
116 | map(parse_escaped_char, StringFragment::EscapedChar), | ||
117 | value(StringFragment::EscapedWS, parse_escaped_whitespace), | ||
118 | )) | ||
119 | .parse(input) | ||
120 | } | ||
121 | |||
122 | /// Parse a string. Use a loop of parse_fragment and push all of the fragments | ||
123 | /// into an output string. | ||
124 | pub fn parse_string<'a, E>(input: &'a str) -> IResult<&'a str, String, E> | ||
125 | where | ||
126 | E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>, | ||
127 | { | ||
128 | // fold is the equivalent of iterator::fold. It runs a parser in a loop, | ||
129 | // and for each output value, calls a folding function on each output value. | ||
130 | let build_string = fold_many0( | ||
131 | // Our parser function – parses a single string fragment | ||
132 | parse_fragment, | ||
133 | // Our init value, an empty string | ||
134 | String::new, | ||
135 | // Our folding function. For each fragment, append the fragment to the | ||
136 | // string. | ||
137 | |mut string, fragment| { | ||
138 | match fragment { | ||
139 | StringFragment::Literal(s) => string.push_str(s), | ||
140 | StringFragment::EscapedChar(c) => string.push(c), | ||
141 | StringFragment::EscapedWS => {} | ||
142 | } | ||
143 | string | ||
144 | }, | ||
145 | ); | ||
146 | |||
147 | // Finally, parse the string. Note that, if `build_string` could accept a raw | ||
148 | // " character, the closing delimiter " would never match. When using | ||
149 | // `delimited` with a looping parser (like fold), be sure that the | ||
150 | // loop won't accidentally match your closing delimiter! | ||
151 | delimited(char('"'), build_string, char('"')).parse(input) | ||
152 | } | ||