aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAkshay <[email protected]>2024-07-13 18:32:41 +0100
committerAkshay <[email protected]>2024-07-13 18:32:41 +0100
commit8eb38033e0c615983c4490354dad4abb00031042 (patch)
tree78d35946d2d14dd015eab53f6bf7b92153518b6f /src
init trawk
Diffstat (limited to 'src')
-rw-r--r--src/ast.rs186
-rw-r--r--src/eval.rs764
-rw-r--r--src/lib.rs7
-rw-r--r--src/main.rs47
-rw-r--r--src/parser.rs689
-rw-r--r--src/string.rs152
6 files changed, 1845 insertions, 0 deletions
diff --git a/src/ast.rs b/src/ast.rs
new file mode 100644
index 0000000..07b5c39
--- /dev/null
+++ b/src/ast.rs
@@ -0,0 +1,186 @@
1#[derive(Debug)]
2pub struct Program {
3 pub stanzas: Vec<Stanza>,
4}
5
6impl Program {
7 pub fn new() -> Self {
8 Self {
9 stanzas: Vec::new(),
10 }
11 }
12
13 pub fn from_str(mut self, i: &str) -> Result<Self, nom::error::Error<&str>> {
14 use nom::Finish;
15 let (remaining_input, stanzas) = crate::parser::parse_file(i).finish()?;
16 assert!(remaining_input.trim().is_empty(), "{remaining_input}");
17 self.stanzas = stanzas;
18 Ok(self)
19 }
20}
21
22#[derive(Debug, PartialEq, Eq)]
23pub struct Stanza {
24 pub pattern: Pattern,
25 pub statements: Block,
26}
27
28#[derive(Debug, Eq, PartialEq, Clone)]
29pub enum Pattern {
30 Begin,
31 End,
32 Node(NodePattern),
33}
34
35#[derive(Debug, Eq, PartialEq, Clone)]
36pub struct NodePattern {
37 pub modifier: Modifier,
38 pub kind: String,
39}
40
41#[derive(Default, Debug, Eq, PartialEq, Clone, Copy)]
42pub enum Modifier {
43 #[default]
44 Enter,
45 Leave,
46}
47
48#[derive(Debug, Default, Eq, PartialEq, Clone)]
49pub struct Block {
50 pub body: Vec<Statement>,
51}
52
53#[derive(Debug, Eq, PartialEq, Clone)]
54pub enum Statement {
55 Bare(Expr),
56 Declaration(Declaration),
57}
58
59#[derive(Debug, Eq, PartialEq, Clone)]
60pub enum Expr {
61 Node,
62 Unit,
63 Lit(Literal),
64 Ident(Identifier),
65 // List(Vec<Expr>),
66 Bin(Box<Expr>, BinOp, Box<Expr>),
67 Unary(Box<Expr>, UnaryOp),
68 Call(Call),
69 IfExpr(If),
70 Block(Block),
71}
72
73impl Expr {
74 pub fn int(int: i128) -> Expr {
75 Self::Lit(Literal::Int(int))
76 }
77
78 pub fn str(s: &str) -> Expr {
79 Self::Lit(Literal::Str(s.to_owned()))
80 }
81
82 pub const fn false_() -> Expr {
83 Self::Lit(Literal::Bool(false))
84 }
85
86 pub const fn true_() -> Expr {
87 Self::Lit(Literal::Bool(true))
88 }
89
90 pub fn boxed(self) -> Box<Expr> {
91 Box::new(self)
92 }
93}
94
95#[derive(Debug, Eq, PartialEq, Clone, Copy)]
96pub enum UnaryOp {
97 Not,
98}
99
100#[derive(Debug, Eq, PartialEq, Clone, Copy)]
101pub enum BinOp {
102 Arith(ArithOp),
103 Cmp(CmpOp),
104 Logic(LogicOp),
105 // =
106 Assign(AssignOp),
107}
108
109// + - * /
110#[derive(Debug, Eq, PartialEq, Clone, Copy)]
111pub enum ArithOp {
112 Add,
113 Sub,
114 Mul,
115 Div,
116 Mod,
117}
118
119// && ||
120#[derive(Debug, Eq, PartialEq, Clone, Copy)]
121pub enum LogicOp {
122 And,
123 Or,
124}
125
126// == != > < >= <=
127#[derive(Debug, Eq, PartialEq, Clone, Copy)]
128pub enum CmpOp {
129 Eq,
130 Neq,
131 Gt,
132 Lt,
133 Gte,
134 Lte,
135}
136
137// =, +=, -=, *=, /=
138#[derive(Debug, Eq, PartialEq, Clone, Copy)]
139pub struct AssignOp {
140 pub op: Option<ArithOp>,
141}
142
143pub type Identifier = String;
144
145#[derive(Debug, Eq, PartialEq, Clone)]
146pub enum Literal {
147 Str(String),
148 Int(i128),
149 Bool(bool),
150}
151
152/// A function call
153#[derive(Debug, Eq, PartialEq, Clone)]
154pub struct Call {
155 pub function: Identifier,
156 pub parameters: Vec<Expr>,
157}
158
159impl From<Call> for Expr {
160 fn from(expr: Call) -> Expr {
161 Expr::Call(expr)
162 }
163}
164
165#[derive(Debug, PartialEq, Eq, Clone, Copy)]
166pub enum Type {
167 Unit,
168 Integer,
169 String,
170 Boolean,
171 Node,
172}
173
174#[derive(Debug, PartialEq, Eq, Clone)]
175pub struct Declaration {
176 pub ty: Type,
177 pub name: Identifier,
178 pub init: Option<Box<Expr>>,
179}
180
181#[derive(Debug, Eq, PartialEq, Clone)]
182pub struct If {
183 pub condition: Box<Expr>,
184 pub then: Block,
185 pub else_: Block,
186}
diff --git a/src/eval.rs b/src/eval.rs
new file mode 100644
index 0000000..859979d
--- /dev/null
+++ b/src/eval.rs
@@ -0,0 +1,764 @@
1//! tree walking interpreter for trawk
2
3use crate::ast;
4use std::{collections::HashMap, fmt};
5
6#[derive(Debug, PartialEq, Eq, Clone)]
7pub struct Variable {
8 pub ty: ast::Type,
9 pub name: ast::Identifier,
10 pub value: Value,
11}
12
13impl Variable {
14 fn value(&self) -> &Value {
15 &self.value
16 }
17
18 fn ty(&self) -> ast::Type {
19 self.ty
20 }
21
22 fn assign(&mut self, value: Value) -> Result {
23 if self.ty() == value.ty() {
24 self.value = value;
25 Ok(self.value.clone())
26 } else {
27 Err(Error::TypeMismatch {
28 expected: self.ty(),
29 got: value.ty(),
30 })
31 }
32 }
33}
34
35#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
36pub enum Value {
37 Unit,
38 Integer(i128),
39 String(String),
40 Boolean(bool),
41 Node,
42}
43
44impl Value {
45 fn ty(&self) -> ast::Type {
46 match self {
47 Self::Unit => ast::Type::Unit,
48 Self::Integer(_) => ast::Type::Integer,
49 Self::String(_) => ast::Type::String,
50 Self::Boolean(_) => ast::Type::Boolean,
51 Self::Node => ast::Type::Node,
52 }
53 }
54
55 fn default(ty: ast::Type) -> Self {
56 match ty {
57 ast::Type::Unit => Self::Unit,
58 ast::Type::Integer => Self::default_int(),
59 ast::Type::String => Self::default_string(),
60 ast::Type::Boolean => Self::default_bool(),
61 ast::Type::Node => unreachable!(),
62 }
63 }
64
65 fn default_int() -> Self {
66 Self::Integer(0)
67 }
68
69 fn default_bool() -> Self {
70 Self::Boolean(false)
71 }
72
73 fn default_string() -> Self {
74 Self::String(String::default())
75 }
76
77 fn as_boolean(&self) -> Option<bool> {
78 match self {
79 Self::Boolean(b) => Some(*b),
80 _ => None,
81 }
82 }
83
84 fn add(&self, other: &Self) -> Result {
85 match (self, other) {
86 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s + *o)),
87 (Self::String(s), Self::String(o)) => Ok(Self::String(format!("{s}{o}"))),
88 _ => Err(Error::UndefinedBinOp(
89 ast::BinOp::Arith(ast::ArithOp::Add),
90 self.ty(),
91 other.ty(),
92 )),
93 }
94 }
95
96 fn sub(&self, other: &Self) -> Result {
97 match (self, other) {
98 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s - *o)),
99 (Self::String(s), Self::String(o)) => {
100 Ok(Self::String(s.strip_suffix(o).unwrap_or(s).to_owned()))
101 }
102 _ => Err(Error::UndefinedBinOp(
103 ast::BinOp::Arith(ast::ArithOp::Sub),
104 self.ty(),
105 other.ty(),
106 )),
107 }
108 }
109
110 fn mul(&self, other: &Self) -> Result {
111 match (self, other) {
112 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s * *o)),
113 _ => Err(Error::UndefinedBinOp(
114 ast::BinOp::Arith(ast::ArithOp::Mul),
115 self.ty(),
116 other.ty(),
117 )),
118 }
119 }
120
121 fn div(&self, other: &Self) -> Result {
122 match (self, other) {
123 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s / *o)),
124 _ => Err(Error::UndefinedBinOp(
125 ast::BinOp::Arith(ast::ArithOp::Div),
126 self.ty(),
127 other.ty(),
128 )),
129 }
130 }
131
132 fn mod_(&self, other: &Self) -> Result {
133 match (self, other) {
134 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Integer(*s % *o)),
135 _ => Err(Error::UndefinedBinOp(
136 ast::BinOp::Arith(ast::ArithOp::Mod),
137 self.ty(),
138 other.ty(),
139 )),
140 }
141 }
142
143 fn equals(&self, other: &Self) -> Result {
144 match (self, other) {
145 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s == o)),
146 (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s == o)),
147 (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(s == o)),
148 _ => Err(Error::UndefinedBinOp(
149 ast::BinOp::Cmp(ast::CmpOp::Eq),
150 self.ty(),
151 other.ty(),
152 )),
153 }
154 }
155
156 fn greater_than(&self, other: &Self) -> Result {
157 match (self, other) {
158 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s > o)),
159 (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s.cmp(o).is_gt())),
160 _ => Err(Error::UndefinedBinOp(
161 ast::BinOp::Cmp(ast::CmpOp::Gt),
162 self.ty(),
163 other.ty(),
164 )),
165 }
166 }
167
168 fn less_than(&self, other: &Self) -> Result {
169 match (self, other) {
170 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s < o)),
171 (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s.cmp(o).is_lt())),
172 _ => Err(Error::UndefinedBinOp(
173 ast::BinOp::Cmp(ast::CmpOp::Lt),
174 self.ty(),
175 other.ty(),
176 )),
177 }
178 }
179
180 fn greater_than_equals(&self, other: &Self) -> Result {
181 match (self, other) {
182 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s >= o)),
183 (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s.cmp(o).is_ge())),
184 (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(s == o)),
185 _ => Err(Error::UndefinedBinOp(
186 ast::BinOp::Cmp(ast::CmpOp::Gte),
187 self.ty(),
188 other.ty(),
189 )),
190 }
191 }
192
193 fn less_than_equals(&self, other: &Self) -> Result {
194 match (self, other) {
195 (Self::Integer(s), Self::Integer(o)) => Ok(Self::Boolean(s <= o)),
196 (Self::String(s), Self::String(o)) => Ok(Self::Boolean(s.cmp(o).is_le())),
197 (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(s == o)),
198 _ => Err(Error::UndefinedBinOp(
199 ast::BinOp::Cmp(ast::CmpOp::Lte),
200 self.ty(),
201 other.ty(),
202 )),
203 }
204 }
205
206 fn not(&self) -> Result {
207 match self {
208 Self::Boolean(s) => Ok(Self::Boolean(!s)),
209 _ => Err(Error::UndefinedUnaryOp(ast::UnaryOp::Not, self.ty())),
210 }
211 }
212
213 fn and(&self, other: &Self) -> Result {
214 match (self, other) {
215 (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(*s && *o)),
216 _ => Err(Error::UndefinedBinOp(
217 ast::BinOp::Logic(ast::LogicOp::And),
218 self.ty(),
219 other.ty(),
220 )),
221 }
222 }
223
224 fn or(&self, other: &Self) -> Result {
225 match (self, other) {
226 (Self::Boolean(s), Self::Boolean(o)) => Ok(Self::Boolean(*s || *o)),
227 _ => Err(Error::UndefinedBinOp(
228 ast::BinOp::Logic(ast::LogicOp::Or),
229 self.ty(),
230 other.ty(),
231 )),
232 }
233 }
234}
235
236impl fmt::Display for Value {
237 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
238 match self {
239 Self::Unit => write!(f, "()"),
240 Self::Integer(i) => write!(f, "{i}"),
241 Self::String(s) => write!(f, "{s}"),
242 Self::Boolean(b) => write!(f, "{b}"),
243 Self::Node => write!(f, "<node>"),
244 }
245 }
246}
247
248type NodeKind = u16;
249
250#[derive(Debug, Default)]
251struct Visitor {
252 enter: ast::Block,
253 leave: ast::Block,
254}
255
256#[derive(Debug)]
257struct Visitors {
258 visitors: HashMap<NodeKind, Visitor>,
259 begin: ast::Block,
260 end: ast::Block,
261}
262
263impl Default for Visitors {
264 fn default() -> Self {
265 Self::new()
266 }
267}
268
269impl Visitors {
270 pub fn new() -> Self {
271 Self {
272 visitors: HashMap::new(),
273 begin: ast::Block { body: vec![] },
274 end: ast::Block { body: vec![] },
275 }
276 }
277
278 pub fn insert(
279 &mut self,
280 stanza: ast::Stanza,
281 language: &tree_sitter::Language,
282 ) -> std::result::Result<(), Error> {
283 match &stanza.pattern {
284 ast::Pattern::Begin => self.begin = stanza.statements,
285 ast::Pattern::End => self.end = stanza.statements,
286 ast::Pattern::Node(ast::NodePattern { modifier, kind }) => {
287 let id = language.id_for_node_kind(&kind, true);
288 if id == 0 {
289 return Err(Error::InvalidNodeKind(kind.to_owned()));
290 }
291 let v = self.visitors.entry(id).or_default();
292 match modifier {
293 ast::Modifier::Enter => v.enter = stanza.statements.clone(),
294 ast::Modifier::Leave => v.leave = stanza.statements.clone(),
295 };
296 }
297 }
298 Ok(())
299 }
300
301 pub fn get_by_node(&self, node: tree_sitter::Node) -> Option<&Visitor> {
302 let node_id = node.kind_id();
303 self.visitors.get(&node_id)
304 }
305}
306
307#[derive(Debug, PartialEq, Eq)]
308pub enum Error {
309 FailedLookup(ast::Identifier),
310 TypeMismatch { expected: ast::Type, got: ast::Type },
311 UndefinedBinOp(ast::BinOp, ast::Type, ast::Type),
312 UndefinedUnaryOp(ast::UnaryOp, ast::Type),
313 AlreadyBound(ast::Identifier),
314 MalformedExpr(String),
315 InvalidNodeKind(String),
316 // current node is only set in visitors, not in BEGIN or END blocks
317 CurrentNodeNotPresent,
318}
319
320type Result = std::result::Result<Value, Error>;
321
322pub struct Context<'a> {
323 variables: HashMap<ast::Identifier, Variable>,
324 language: tree_sitter::Language,
325 visitors: Visitors,
326 input_src: Option<String>,
327 cursor: Option<tree_sitter::TreeCursor<'a>>,
328}
329
330impl<'a> fmt::Debug for Context<'a> {
331 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
332 f.debug_struct("Context")
333 .field("variables", &self.variables)
334 .field("language", &self.language)
335 .field("visitors", &self.visitors)
336 .field("input_src", &self.input_src)
337 .field(
338 "cursor",
339 if self.cursor.is_some() {
340 &"Some(<cursor>)"
341 } else {
342 &"None"
343 },
344 )
345 .finish()
346 }
347}
348
349impl<'a> Context<'a> {
350 pub fn new(language: tree_sitter::Language) -> Self {
351 Self {
352 visitors: Default::default(),
353 variables: Default::default(),
354 language,
355 input_src: None,
356 cursor: None,
357 }
358 }
359
360 pub fn with_program(mut self, program: ast::Program) -> std::result::Result<Self, Error> {
361 for stanza in program.stanzas.into_iter() {
362 self.visitors.insert(stanza, &self.language)?;
363 }
364 Ok(self)
365 }
366
367 pub fn with_input(mut self, src: String) -> Self {
368 self.input_src = Some(src);
369 self
370 }
371
372 pub fn with_cursor(mut self, cursor: tree_sitter::TreeCursor<'a>) -> Self {
373 self.cursor = Some(cursor);
374 self
375 }
376
377 fn eval_expr(&mut self, expr: &ast::Expr) -> Result {
378 match expr {
379 ast::Expr::Unit => Ok(Value::Unit),
380 ast::Expr::Lit(lit) => self.eval_lit(lit),
381 ast::Expr::Ident(ident) => self.lookup(ident).map(Variable::value).cloned(),
382 ast::Expr::Bin(lhs, op, rhs) => self.eval_bin(&*lhs, *op, &*rhs),
383 ast::Expr::Unary(expr, op) => self.eval_unary(&*expr, *op),
384 ast::Expr::Call(call) => self.eval_call(&*call),
385 ast::Expr::IfExpr(if_expr) => self.eval_if(if_expr),
386 ast::Expr::Block(block) => self.eval_block(block),
387 ast::Expr::Node => Ok(Value::Node),
388 }
389 }
390
391 fn eval_lit(&mut self, lit: &ast::Literal) -> Result {
392 match lit {
393 ast::Literal::Str(s) => Ok(Value::String(s.to_owned())),
394 ast::Literal::Int(i) => Ok(Value::Integer(*i)),
395 ast::Literal::Bool(b) => Ok(Value::Boolean(*b)),
396 }
397 }
398
399 fn lookup(&mut self, ident: &ast::Identifier) -> std::result::Result<&Variable, Error> {
400 self.variables
401 .get(ident)
402 .ok_or_else(|| Error::FailedLookup(ident.to_owned()))
403 }
404
405 fn lookup_mut(&mut self, ident: &ast::Identifier) -> std::result::Result<&mut Variable, Error> {
406 self.variables
407 .get_mut(ident)
408 .ok_or_else(|| Error::FailedLookup(ident.to_owned()))
409 }
410
411 fn bind(
412 &mut self,
413 ident: &ast::Identifier,
414 ty: ast::Type,
415 ) -> std::result::Result<&mut Variable, Error> {
416 if self.lookup(ident).is_err() {
417 Ok(self
418 .variables
419 .entry(ident.to_owned())
420 .or_insert_with(|| Variable {
421 name: ident.to_owned(),
422 value: Value::default(ty),
423 ty,
424 }))
425 } else {
426 Err(Error::AlreadyBound(ident.to_owned()))
427 }
428 }
429
430 fn eval_bin(&mut self, lhs: &ast::Expr, op: ast::BinOp, rhs: &ast::Expr) -> Result {
431 match op {
432 ast::BinOp::Assign(op) => self.eval_assign(lhs, op, rhs),
433 ast::BinOp::Arith(op) => self.eval_arith(lhs, op, rhs),
434 ast::BinOp::Cmp(op) => self.eval_cmp(lhs, op, rhs),
435 ast::BinOp::Logic(op) => self.eval_logic(lhs, op, rhs),
436 }
437 }
438
439 fn eval_assign(
440 &mut self,
441 lhs: &ast::Expr,
442 ast::AssignOp { op }: ast::AssignOp,
443 rhs: &ast::Expr,
444 ) -> Result {
445 let ast::Expr::Ident(ident) = lhs else {
446 return Err(Error::MalformedExpr(format!(
447 "malformed assigment, lhs: {:?}",
448 lhs
449 )));
450 };
451 let value = self.eval_expr(rhs)?;
452 let variable = self.lookup_mut(ident)?;
453 match op {
454 None => variable.assign(value),
455 Some(ast::ArithOp::Add) => variable.assign(variable.value().add(&value)?),
456 Some(ast::ArithOp::Sub) => variable.assign(variable.value().sub(&value)?),
457 Some(ast::ArithOp::Mul) => variable.assign(variable.value().mul(&value)?),
458 Some(ast::ArithOp::Div) => variable.assign(variable.value().div(&value)?),
459 Some(ast::ArithOp::Mod) => variable.assign(variable.value().mod_(&value)?),
460 }
461 }
462
463 fn eval_arith(&mut self, lhs: &ast::Expr, op: ast::ArithOp, rhs: &ast::Expr) -> Result {
464 let l = self.eval_expr(lhs)?;
465 let r = self.eval_expr(rhs)?;
466 match op {
467 ast::ArithOp::Add => l.add(&r),
468 ast::ArithOp::Sub => l.sub(&r),
469 ast::ArithOp::Mul => l.mul(&r),
470 ast::ArithOp::Div => l.div(&r),
471 ast::ArithOp::Mod => l.mod_(&r),
472 }
473 }
474
475 fn eval_cmp(&mut self, lhs: &ast::Expr, op: ast::CmpOp, rhs: &ast::Expr) -> Result {
476 let l = self.eval_expr(lhs)?;
477 let r = self.eval_expr(rhs)?;
478
479 match op {
480 ast::CmpOp::Eq => l.equals(&r),
481 ast::CmpOp::Gt => l.greater_than(&r),
482 ast::CmpOp::Lt => l.less_than(&r),
483 ast::CmpOp::Neq => l.equals(&r).and_then(|v| v.not()),
484 ast::CmpOp::Gte => l.greater_than_equals(&r),
485 ast::CmpOp::Lte => l.less_than_equals(&r),
486 }
487 }
488
489 fn eval_logic(&mut self, lhs: &ast::Expr, op: ast::LogicOp, rhs: &ast::Expr) -> Result {
490 let l = self.eval_expr(lhs)?;
491
492 // short-circuit
493 let l_value = l.as_boolean().ok_or_else(|| Error::TypeMismatch {
494 expected: ast::Type::Boolean,
495 got: l.ty(),
496 })?;
497
498 match op {
499 ast::LogicOp::Or => {
500 if l_value {
501 return Ok(l);
502 } else {
503 let r = self.eval_expr(rhs)?;
504 l.or(&r)
505 }
506 }
507 ast::LogicOp::And => {
508 if !l_value {
509 return Ok(l);
510 } else {
511 let r = self.eval_expr(rhs)?;
512 l.and(&r)
513 }
514 }
515 }
516 }
517
518 fn eval_unary(&mut self, expr: &ast::Expr, op: ast::UnaryOp) -> Result {
519 let val = self.eval_expr(expr)?;
520 match op {
521 ast::UnaryOp::Not => val.not(),
522 }
523 }
524
525 fn eval_if(&mut self, if_expr: &ast::If) -> Result {
526 let cond = self.eval_expr(&if_expr.condition)?;
527
528 if cond.as_boolean().ok_or_else(|| Error::TypeMismatch {
529 expected: ast::Type::Boolean,
530 got: cond.ty(),
531 })? {
532 self.eval_block(&if_expr.then)
533 } else {
534 self.eval_block(&if_expr.else_)
535 }
536 }
537
538 fn eval_call(&mut self, call: &ast::Call) -> Result {
539 match (call.function.as_str(), call.parameters.as_slice()) {
540 ("print", args) => {
541 for arg in args {
542 let val = self.eval_expr(arg)?;
543 print!("{val}");
544 }
545 Ok(Value::Unit)
546 }
547 ("text", [arg]) if self.eval_expr(arg)? == Value::Node => {
548 let node = self
549 .cursor
550 .as_ref()
551 .ok_or(Error::CurrentNodeNotPresent)?
552 .node();
553 let text = node
554 .utf8_text(self.input_src.as_ref().unwrap().as_bytes())
555 .unwrap();
556 Ok(Value::String(text.to_owned()))
557 }
558 (s, _) => Err(Error::FailedLookup(s.to_owned())),
559 }
560 }
561
562 fn eval_declaration(&mut self, decl: &ast::Declaration) -> Result {
563 let initial_value = match decl.init.as_ref() {
564 Some(init) => Some(self.eval_expr(&*init)?),
565 None => None,
566 };
567 let variable = self.bind(&decl.name, decl.ty)?;
568
569 if let Some(init) = initial_value {
570 variable.assign(init)?;
571 }
572
573 Ok(Value::Unit)
574 }
575
576 fn eval_statement(&mut self, stmt: &ast::Statement) -> Result {
577 match stmt {
578 ast::Statement::Bare(expr) => self.eval_expr(expr).map(|_| Value::Unit),
579 ast::Statement::Declaration(decl) => self.eval_declaration(decl),
580 }
581 }
582
583 fn eval_block(&mut self, block: &ast::Block) -> Result {
584 for stmt in block.body.iter() {
585 self.eval_statement(stmt)?;
586 }
587 Ok(Value::Unit)
588 }
589
590 pub fn eval(&mut self) -> Result {
591 let visitors = std::mem::take(&mut self.visitors);
592 let mut has_next = true;
593 let mut postorder = Vec::new();
594
595 // BEGIN block
596 self.eval_block(&visitors.begin)?;
597
598 while has_next {
599 let current_node = self.cursor.as_mut().unwrap().node();
600 postorder.push(current_node);
601
602 let visitor = visitors.get_by_node(current_node);
603
604 visitor.map(|v| self.eval_block(&v.enter));
605
606 has_next = self.cursor.as_mut().unwrap().goto_first_child();
607
608 if !has_next {
609 has_next = self.cursor.as_mut().unwrap().goto_next_sibling();
610 postorder
611 .pop()
612 .and_then(|n| visitors.get_by_node(n))
613 .map(|v| self.eval_block(&v.leave));
614 }
615
616 while !has_next && self.cursor.as_mut().unwrap().goto_parent() {
617 has_next = self.cursor.as_mut().unwrap().goto_next_sibling();
618 postorder
619 .pop()
620 .and_then(|n| visitors.get_by_node(n))
621 .map(|v| self.eval_block(&v.leave));
622 }
623 }
624
625 // END block
626 self.eval_block(&visitors.end)?;
627
628 Ok(Value::Unit)
629 }
630}
631
632#[cfg(test)]
633mod test {
634 use super::*;
635
636 #[test]
637 fn bin() {
638 let language = tree_sitter_python::language();
639 let mut ctx = Context::new(language)
640 .with_program(ast::Program::new())
641 .unwrap();
642 assert_eq!(
643 ctx.eval_expr(&ast::Expr::Bin(
644 ast::Expr::int(5).boxed(),
645 ast::BinOp::Arith(ast::ArithOp::Add),
646 ast::Expr::int(10).boxed(),
647 )),
648 Ok(Value::Integer(15))
649 );
650 assert_eq!(
651 ctx.eval_expr(&ast::Expr::Bin(
652 ast::Expr::int(5).boxed(),
653 ast::BinOp::Cmp(ast::CmpOp::Eq),
654 ast::Expr::int(10).boxed(),
655 )),
656 Ok(Value::Boolean(false))
657 );
658 assert_eq!(
659 ctx.eval_expr(&ast::Expr::Bin(
660 ast::Expr::int(5).boxed(),
661 ast::BinOp::Cmp(ast::CmpOp::Lt),
662 ast::Expr::int(10).boxed(),
663 )),
664 Ok(Value::Boolean(true))
665 );
666 assert_eq!(
667 ctx.eval_expr(&ast::Expr::Bin(
668 ast::Expr::Bin(
669 ast::Expr::int(5).boxed(),
670 ast::BinOp::Cmp(ast::CmpOp::Lt),
671 ast::Expr::int(10).boxed(),
672 )
673 .boxed(),
674 ast::BinOp::Logic(ast::LogicOp::And),
675 ast::Expr::false_().boxed()
676 )),
677 Ok(Value::Boolean(false))
678 );
679 }
680
681 #[test]
682 fn test_evaluate_blocks() {
683 let language = tree_sitter_python::language();
684 let mut ctx = Context::new(language)
685 .with_program(ast::Program::new())
686 .unwrap();
687 assert_eq!(
688 ctx.eval_block(&ast::Block {
689 body: vec![
690 ast::Statement::Declaration(ast::Declaration {
691 ty: ast::Type::Integer,
692 name: "a".to_owned(),
693 init: None,
694 }),
695 ast::Statement::Bare(ast::Expr::Bin(
696 ast::Expr::Ident("a".to_owned()).boxed(),
697 ast::BinOp::Assign(ast::AssignOp {
698 op: Some(ast::ArithOp::Add)
699 }),
700 ast::Expr::int(5).boxed()
701 )),
702 ]
703 }),
704 Ok(Value::Unit)
705 );
706 assert_eq!(
707 ctx.lookup(&String::from("a")).unwrap().clone(),
708 Variable {
709 ty: ast::Type::Integer,
710 name: "a".to_owned(),
711 value: Value::Integer(5)
712 }
713 );
714 }
715
716 #[test]
717 fn test_evaluate_if() {
718 let language = tree_sitter_python::language();
719 let mut ctx = Context::new(language)
720 .with_program(ast::Program::new())
721 .unwrap();
722 assert_eq!(
723 ctx.eval_block(&ast::Block {
724 body: vec![
725 ast::Statement::Declaration(ast::Declaration {
726 ty: ast::Type::Integer,
727 name: "a".to_owned(),
728 init: Some(ast::Expr::int(1).boxed()),
729 }),
730 ast::Statement::Bare(ast::Expr::IfExpr(ast::If {
731 condition: ast::Expr::true_().boxed(),
732 then: ast::Block {
733 body: vec![ast::Statement::Bare(ast::Expr::Bin(
734 ast::Expr::Ident("a".to_owned()).boxed(),
735 ast::BinOp::Assign(ast::AssignOp {
736 op: Some(ast::ArithOp::Add)
737 }),
738 ast::Expr::int(5).boxed()
739 ))]
740 },
741 else_: ast::Block {
742 body: vec![ast::Statement::Bare(ast::Expr::Bin(
743 ast::Expr::Ident("a".to_owned()).boxed(),
744 ast::BinOp::Assign(ast::AssignOp {
745 op: Some(ast::ArithOp::Add)
746 }),
747 ast::Expr::int(10).boxed()
748 ))]
749 }
750 }))
751 ]
752 }),
753 Ok(Value::Unit)
754 );
755 assert_eq!(
756 ctx.lookup(&String::from("a")).unwrap().clone(),
757 Variable {
758 ty: ast::Type::Integer,
759 name: "a".to_owned(),
760 value: Value::Integer(6)
761 }
762 );
763 }
764}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..8780b74
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,7 @@
1mod ast;
2mod eval;
3mod parser;
4mod string;
5
6pub use ast::Program;
7pub use eval::Context;
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..09a15ef
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,47 @@
1use trawk::{Context, Program};
2
3fn main() {
4 let src = r#"
5bar = 0
6def foo():
7 baz = 5
8 "#
9 .to_owned();
10
11 let program = Program::new()
12 .from_str(
13 r#"
14 BEGIN {
15 bool in_def = false;
16 }
17 pre function_definition {
18 in_def = true;
19 }
20 post function_definition {
21 in_def = false;
22 }
23 pre identifier {
24 if (in_def) {
25 print(text(node));
26 print(" ");
27 print("in def\n");
28 } else {
29 };
30 }"#,
31 )
32 .unwrap();
33
34 let mut parser = tree_sitter::Parser::new();
35 let _ = parser.set_language(tree_sitter_python::language());
36
37 let tree = parser.parse(&src, None).unwrap();
38 let cursor = tree.walk();
39
40 let mut ctx = Context::new(tree_sitter_python::language())
41 .with_input(src)
42 .with_cursor(cursor)
43 .with_program(program)
44 .unwrap();
45
46 let _ = ctx.eval();
47}
diff --git a/src/parser.rs b/src/parser.rs
new file mode 100644
index 0000000..3a020dc
--- /dev/null
+++ b/src/parser.rs
@@ -0,0 +1,689 @@
1use nom::{
2 branch::alt,
3 bytes::complete::tag,
4 character::complete::{alpha1, alphanumeric1, char, multispace0, multispace1, one_of},
5 combinator::{map, opt, recognize, value},
6 error::ParseError,
7 multi::{many0, many0_count, many1, separated_list0},
8 sequence::{delimited, pair, preceded, terminated, tuple},
9 IResult, Parser,
10};
11// use tree_sitter::Query;
12
13use crate::ast::*;
14use crate::string::parse_string;
15
16fn ws<'a, F: 'a, O, E>(inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E>
17where
18 F: FnMut(&'a str) -> IResult<&'a str, O, E>,
19 E: ParseError<&'a str>,
20{
21 delimited(multispace0, inner, multispace0)
22}
23
24fn parse_unit<'a>(i: &'a str) -> IResult<&'a str, ()> {
25 let open = char('(');
26 let close = char(')');
27 let unit = tuple((open, close));
28 value((), unit)(i)
29}
30
31fn parse_bool(i: &str) -> IResult<&str, bool> {
32 let t = value(true, tag("true"));
33 let f = value(false, tag("false"));
34 alt((t, f)).parse(i)
35}
36
37fn parse_int<'a>(i: &'a str) -> IResult<&'a str, i128> {
38 map(recognize(many1(one_of("0123456789"))), |s: &str| {
39 s.parse::<i128>().unwrap()
40 })(i)
41}
42
43fn parse_name(i: &str) -> IResult<&str, &str> {
44 recognize(pair(
45 alt((alpha1, tag("_"))),
46 many0_count(alt((alphanumeric1, tag("_")))),
47 ))
48 .parse(i)
49}
50
51fn parse_ident(i: &str) -> IResult<&str, Identifier> {
52 map(parse_name, str::to_owned)(i)
53}
54
55fn parse_lit<'a>(i: &'a str) -> IResult<&'a str, Literal> {
56 alt((
57 map(parse_string, Literal::Str),
58 map(parse_int, Literal::Int),
59 map(parse_bool, Literal::Bool),
60 ))
61 .parse(i)
62}
63
64fn parse_cmp_op(i: &str) -> IResult<&str, CmpOp> {
65 alt((
66 value(CmpOp::Eq, tag("==")),
67 value(CmpOp::Neq, tag("!=")),
68 value(CmpOp::Gte, tag(">=")),
69 value(CmpOp::Lte, tag("<=")),
70 value(CmpOp::Gt, tag(">")),
71 value(CmpOp::Lt, tag("<")),
72 ))
73 .parse(i)
74}
75
76fn parse_assign_op(i: &str) -> IResult<&str, AssignOp> {
77 let parse_arith_op = alt((
78 value(ArithOp::Add, char('+')),
79 value(ArithOp::Sub, char('-')),
80 value(ArithOp::Mul, char('*')),
81 value(ArithOp::Div, char('/')),
82 value(ArithOp::Mod, char('%')),
83 ));
84 map(tuple((opt(parse_arith_op), char('='))), |(op, _)| {
85 AssignOp { op }
86 })(i)
87}
88
89fn parse_op<'a, E, T>(
90 op_str: &'static str,
91 op: T,
92) -> impl FnMut(&'a str) -> Result<(&'a str, T), nom::Err<E>>
93where
94 E: ParseError<&'a str>,
95 T: Copy,
96{
97 value(op, tag(op_str))
98}
99
100fn parse_binary<'a, P1, P2, P3, E>(
101 lhs: P1,
102 op: P2,
103 rhs: P3,
104) -> impl FnMut(&'a str) -> Result<(&'a str, Expr), nom::Err<E>>
105where
106 P1: Parser<&'a str, Expr, E>,
107 P2: Parser<&'a str, BinOp, E>,
108 P3: Parser<&'a str, Expr, E>,
109 E: ParseError<&'a str>,
110{
111 map(tuple((lhs, op, rhs)), |(l, o, r)| {
112 Expr::Bin(l.boxed(), o, r.boxed())
113 })
114}
115
116fn parse_assign<'a>(i: &'a str) -> IResult<&'a str, Expr> {
117 let op = map(parse_assign_op, BinOp::Assign);
118 let recursive = parse_binary(parse_atom, op, parse_assign);
119 let base = parse_union;
120 alt((recursive, base)).parse(i)
121}
122
123fn parse_union<'a>(i: &'a str) -> IResult<&'a str, Expr> {
124 let op = parse_op("||", BinOp::Logic(LogicOp::Or));
125 let recursive = parse_binary(parse_intersection, op, parse_union);
126 let base = parse_intersection;
127 alt((recursive, base)).parse(i)
128}
129
130fn parse_intersection<'a>(i: &'a str) -> IResult<&'a str, Expr> {
131 let op = parse_op("&&", BinOp::Logic(LogicOp::And));
132 let recursive = parse_binary(parse_negated, op, parse_intersection);
133 let base = parse_negated;
134 alt((recursive, base)).parse(i)
135}
136
137fn parse_negated<'a>(i: &'a str) -> IResult<&'a str, Expr> {
138 let op = parse_op("!", UnaryOp::Not);
139 let recursive = map(tuple((op, parse_rel)), |(op, expr)| {
140 Expr::Unary(expr.boxed(), op)
141 });
142 let base = parse_rel;
143 alt((recursive, base)).parse(i)
144}
145
146fn parse_rel<'a>(i: &'a str) -> IResult<&'a str, Expr> {
147 let op = map(parse_cmp_op, BinOp::Cmp);
148 let recursive = parse_binary(parse_sum, op, parse_rel);
149 let base = parse_sum;
150 alt((recursive, base)).parse(i)
151}
152
153fn parse_sum<'a>(i: &'a str) -> IResult<&'a str, Expr> {
154 let add = parse_op("+", BinOp::Arith(ArithOp::Add));
155 let sub = parse_op("-", BinOp::Arith(ArithOp::Sub));
156 let op = alt((add, sub));
157 let recursive = parse_binary(parse_mul, op, parse_sum);
158 let base = parse_mul;
159 alt((recursive, base)).parse(i)
160}
161
162fn parse_mul<'a>(i: &'a str) -> IResult<&'a str, Expr> {
163 let mul = parse_op("*", BinOp::Arith(ArithOp::Mul));
164 let div = parse_op("/", BinOp::Arith(ArithOp::Div));
165 let mod_ = parse_op("%", BinOp::Arith(ArithOp::Mod));
166 let op = alt((mul, div, mod_));
167 let recursive = parse_binary(parse_atom, op, parse_mul);
168 let base = parse_atom;
169 alt((recursive, base)).parse(i)
170}
171
172fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Expr> {
173 let inner = alt((
174 map(tag("node"), |_| Expr::Node),
175 map(parse_block, Expr::Block),
176 map(parse_if, Expr::IfExpr),
177 map(parse_call, Expr::Call),
178 map(parse_lit, Expr::Lit),
179 map(parse_ident, Expr::Ident),
180 map(parse_unit, |_| Expr::Unit),
181 ));
182 ws(inner).parse(i)
183}
184
185fn parse_call<'a>(i: &'a str) -> IResult<&'a str, Call> {
186 let ident = parse_ident;
187 let open = ws(char('('));
188 let args = separated_list0(char(','), parse_expr);
189 let close = ws(char(')'));
190 map(
191 tuple((ident, open, args, close)),
192 |(function, _, parameters, _)| Call {
193 function,
194 parameters,
195 },
196 )
197 .parse(i)
198}
199
200fn parse_block<'a>(i: &'a str) -> IResult<&'a str, Block> {
201 let open = ws(char('{'));
202 let statements = map(many0(parse_statement), |body| Block { body });
203 let close = ws(char('}'));
204 delimited(open, statements, close).parse(i)
205}
206
207fn parse_if<'a>(i: &'a str) -> IResult<&'a str, If> {
208 let if_ = delimited(multispace0, tag("if"), multispace1);
209
210 let open = char('(');
211 let condition = ws(parse_expr);
212 let close = terminated(char(')'), multispace0);
213
214 let then = parse_block;
215
216 let else_kw = ws(tag("else"));
217 let else_ = opt(preceded(else_kw, parse_block));
218
219 map(
220 tuple((if_, open, condition, close, then, else_)),
221 |(_, _, condition, _, then, else_)| If {
222 condition: condition.boxed(),
223 then,
224 else_: else_.unwrap_or_default(),
225 },
226 )(i)
227}
228
229fn parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr> {
230 parse_assign.parse(i)
231}
232
233fn parse_bare<'a>(i: &'a str) -> IResult<&'a str, Expr> {
234 parse_expr(i)
235}
236
237fn parse_type<'a>(i: &'a str) -> IResult<&'a str, Type> {
238 let int = value(Type::Integer, tag("int"));
239 let string = value(Type::String, tag("string"));
240 let bool_ = value(Type::Boolean, tag("bool"));
241 alt((int, string, bool_)).parse(i)
242}
243
244fn parse_declaration<'a>(i: &'a str) -> IResult<&'a str, Declaration> {
245 let ty = parse_type;
246 let name = parse_ident;
247 let op = ws(char('='));
248 let init = opt(preceded(op, map(parse_expr, Expr::boxed)));
249 map(
250 tuple((ty, multispace0, name, init)),
251 |(ty, _, name, init)| Declaration { ty, name, init },
252 )(i)
253}
254
255fn parse_statement<'a>(i: &'a str) -> IResult<&'a str, Statement> {
256 let semicolon = ws(char(';'));
257 let inner = alt((
258 map(parse_declaration, Statement::Declaration),
259 map(parse_bare, Statement::Bare),
260 ));
261 terminated(inner, semicolon).parse(i)
262}
263
264// pub fn skip_query(mut i: &str) -> IResult<&str, ()> {
265// let mut paren_depth = 0;
266// let mut in_string = false;
267// let mut in_escape = false;
268// let mut in_comment = false;
269// loop {
270// let ch = i
271// .chars()
272// .next()
273// .ok_or(nom::Err::Error(nom::error::Error::new(
274// i,
275// nom::error::ErrorKind::Eof,
276// )))?;
277// if in_escape {
278// in_escape = false;
279// } else if in_string {
280// match ch {
281// '\\' => {
282// in_escape = true;
283// }
284// '"' | '\n' => {
285// in_string = false;
286// }
287// _ => {}
288// }
289// } else if in_comment {
290// if ch == '\n' {
291// in_comment = false;
292// }
293// } else {
294// match ch {
295// '"' => in_string = true,
296// '(' => paren_depth += 1,
297// ')' => {
298// if paren_depth > 0 {
299// paren_depth -= 1;
300// }
301// }
302// '{' => return Ok((i, ())),
303// ';' => in_comment = true,
304// _ => {}
305// }
306// }
307// i = &i[1..];
308// }
309// }
310
311// fn parse_query<'a>(
312// language: tree_sitter::Language,
313// ) -> impl FnMut(&'a str) -> IResult<&'a str, Query> {
314// return move |initial: &'a str| {
315// let query_start = 0;
316// let (skipped, _) = skip_query(initial)?;
317// let query_end = initial.len() - skipped.len();
318// let query_source = &initial[query_start..query_end].to_owned();
319//
320// let query = Query::new(language, &query_source).map_err(|mut _e| {
321// nom::Err::Error(nom::error::Error::new(initial, nom::error::ErrorKind::Fail))
322// })?;
323// Ok((skipped, query))
324// };
325// }
326
327fn parse_modifier<'a>(i: &str) -> IResult<&str, Modifier> {
328 let pre = value(Modifier::Enter, tag("enter"));
329 let post = value(Modifier::Leave, tag("leave"));
330 map(opt(alt((pre, post))), Option::unwrap_or_default)(i)
331}
332
333fn parse_pattern<'a>(i: &str) -> IResult<&str, Pattern> {
334 let begin = value(Pattern::Begin, ws(tag("BEGIN")));
335 let end = value(Pattern::End, ws(tag("END")));
336 let node = map(
337 tuple((parse_modifier, multispace0, parse_ident)),
338 |(modifier, _, kind)| Pattern::Node(NodePattern { modifier, kind }),
339 );
340 alt((begin, end, node)).parse(i)
341}
342
343pub fn parse_stanza<'a>(i: &str) -> IResult<&str, Stanza> {
344 map(
345 tuple((parse_pattern, parse_block)),
346 |(pattern, statements)| Stanza {
347 pattern,
348 statements,
349 },
350 )(i)
351}
352
353pub fn parse_file(i: &str) -> IResult<&str, Vec<Stanza>> {
354 many0(parse_stanza).parse(i)
355}
356
357#[cfg(test)]
358mod test {
359 use super::*;
360
361 #[test]
362 fn test_parse_unit() {
363 assert_eq!(parse_unit("()"), Ok(("", ())))
364 }
365
366 #[test]
367 fn test_parse_int() {
368 assert_eq!(parse_int("123456"), Ok(("", 123456)));
369 assert_eq!(parse_int("00123456"), Ok(("", 123456)));
370 }
371
372 #[test]
373 fn test_parse_bool() {
374 assert_eq!(parse_bool("true"), Ok(("", true)));
375 assert_eq!(parse_bool("false"), Ok(("", false)));
376 }
377
378 #[test]
379 fn test_parse_name() {
380 assert_eq!(parse_name("true"), Ok(("", "true")));
381 assert_eq!(parse_name("_abc"), Ok(("", "_abc")));
382 }
383
384 #[test]
385 fn test_parse_literal() {
386 assert_eq!(
387 parse_lit(r#""foobarbaz""#),
388 Ok(("", Literal::Str("foobarbaz".to_owned())))
389 );
390 assert_eq!(parse_lit("123"), Ok(("", Literal::Int(123))));
391 assert_eq!(parse_lit("true"), Ok(("", Literal::Bool(true))));
392 }
393
394 #[test]
395 fn test_parse_expr() {
396 assert_eq!(parse_expr(" () "), Ok(("", Expr::Unit)));
397 assert_eq!(parse_expr(" 55 "), Ok(("", Expr::int(55))));
398 assert_eq!(
399 parse_expr(" true || true "),
400 Ok((
401 "",
402 Expr::Bin(
403 Expr::true_().boxed(),
404 BinOp::Logic(LogicOp::Or),
405 Expr::true_().boxed()
406 )
407 ))
408 );
409 assert_eq!(
410 parse_expr("true || false && 5 == 5 "),
411 Ok((
412 "",
413 Expr::Bin(
414 Expr::true_().boxed(),
415 BinOp::Logic(LogicOp::Or),
416 Expr::Bin(
417 Expr::false_().boxed(),
418 BinOp::Logic(LogicOp::And),
419 Expr::Bin(
420 Expr::int(5).boxed(),
421 BinOp::Cmp(CmpOp::Eq),
422 Expr::int(5).boxed(),
423 )
424 .boxed()
425 )
426 .boxed()
427 )
428 ))
429 );
430 assert_eq!(
431 parse_expr(" foo ( 1, 2,3 , 1 == 1)"),
432 Ok((
433 "",
434 Expr::Call(Call {
435 function: "foo".to_owned(),
436 parameters: vec![
437 Expr::int(1),
438 Expr::int(2),
439 Expr::int(3),
440 Expr::Bin(
441 Expr::int(1).boxed(),
442 BinOp::Cmp(CmpOp::Eq),
443 Expr::int(1).boxed()
444 )
445 ],
446 })
447 ))
448 );
449 assert_eq!(
450 parse_expr("a = b"),
451 Ok((
452 "",
453 Expr::Bin(
454 Expr::Ident("a".to_owned()).boxed(),
455 BinOp::Assign(AssignOp { op: None }),
456 Expr::Ident("b".to_owned()).boxed(),
457 )
458 ))
459 );
460 assert_eq!(
461 parse_expr(" a += 4 + 5"),
462 Ok((
463 "",
464 Expr::Bin(
465 Expr::Ident("a".to_owned()).boxed(),
466 BinOp::Assign(AssignOp {
467 op: Some(ArithOp::Add)
468 }),
469 Expr::Bin(
470 Expr::int(4).boxed(),
471 BinOp::Arith(ArithOp::Add),
472 Expr::int(5).boxed(),
473 )
474 .boxed()
475 )
476 ))
477 );
478 }
479
480 #[test]
481 fn test_parse_statement() {
482 assert_eq!(
483 parse_statement("true;"),
484 Ok(("", Statement::Bare(Expr::true_())))
485 );
486 assert_eq!(
487 parse_statement("true ; "),
488 Ok(("", Statement::Bare(Expr::true_())))
489 );
490 assert_eq!(
491 parse_statement("int a ; "),
492 Ok((
493 "",
494 Statement::Declaration(Declaration {
495 ty: Type::Integer,
496 name: "a".to_owned(),
497 init: None
498 })
499 ))
500 );
501 assert_eq!(
502 parse_statement("int a =5 ; "),
503 Ok((
504 "",
505 Statement::Declaration(Declaration {
506 ty: Type::Integer,
507 name: "a".to_owned(),
508 init: Some(Expr::int(5).boxed())
509 })
510 ))
511 );
512 }
513
514 #[test]
515 fn test_parse_block() {
516 assert_eq!(
517 parse_expr(
518 r#"
519 {
520 true;
521 1;
522 }
523 "#
524 ),
525 Ok((
526 "",
527 Expr::Block(Block {
528 body: vec![
529 Statement::Bare(Expr::true_()),
530 Statement::Bare(Expr::int(1)),
531 ]
532 })
533 ))
534 );
535 }
536
537 #[test]
538 fn test_parse_if() {
539 assert_eq!(
540 parse_expr(
541 r#"
542 if (1 == true) {
543 5;
544 } else {
545 10;
546 }
547 "#
548 ),
549 Ok((
550 "",
551 Expr::IfExpr(If {
552 condition: Expr::Bin(
553 Expr::int(1).boxed(),
554 BinOp::Cmp(CmpOp::Eq),
555 Expr::true_().boxed()
556 )
557 .boxed(),
558 then: Block {
559 body: vec![Statement::Bare(Expr::int(5)),]
560 },
561 else_: Block {
562 body: vec![Statement::Bare(Expr::int(10)),]
563 }
564 })
565 ))
566 );
567 }
568
569 // #[test]
570 // fn test_skip_query() {
571 // assert_eq!(
572 // skip_query(
573 // r#"(heading
574 // (paragraph) @foo) {}"#
575 // ),
576 // Ok(("{}", ()))
577 // );
578 // }
579
580 #[test]
581 fn test_parse_pattern() {
582 assert_eq!(
583 parse_pattern("enter function_definition"),
584 Ok((
585 "",
586 Pattern::Node(NodePattern {
587 modifier: Modifier::Enter,
588 kind: "function_definition".to_owned()
589 })
590 ))
591 );
592 assert_eq!(
593 parse_pattern("function_definition"),
594 Ok((
595 "",
596 Pattern::Node(NodePattern {
597 modifier: Modifier::Enter,
598 kind: "function_definition".to_owned()
599 })
600 ))
601 );
602 assert_eq!(
603 parse_pattern("leave function_definition"),
604 Ok((
605 "",
606 Pattern::Node(NodePattern {
607 modifier: Modifier::Leave,
608 kind: "function_definition".to_owned()
609 })
610 ))
611 );
612 }
613
614 #[test]
615 fn test_parse_stanza() {
616 assert_eq!(
617 parse_stanza("enter function_definition { true; }"),
618 Ok((
619 "",
620 Stanza {
621 pattern: Pattern::Node(NodePattern {
622 modifier: Modifier::Enter,
623 kind: "function_definition".to_owned()
624 }),
625 statements: Block {
626 body: vec![Statement::Bare(Expr::true_())]
627 }
628 }
629 ))
630 );
631 assert_eq!(
632 parse_stanza("BEGIN { true; }"),
633 Ok((
634 "",
635 Stanza {
636 pattern: Pattern::Begin,
637 statements: Block {
638 body: vec![Statement::Bare(Expr::true_())]
639 }
640 }
641 ))
642 );
643 assert_eq!(
644 parse_block(
645 " {
646 true;
647 }"
648 ),
649 Ok((
650 "",
651 Block {
652 body: vec![Statement::Bare(Expr::true_())]
653 }
654 ))
655 );
656 }
657
658 #[test]
659 fn test_parse_if_statement_regression() {
660 assert_eq!(
661 parse_statement("if (true) { true; };"),
662 Ok((
663 "",
664 Statement::Bare(Expr::IfExpr(If {
665 condition: Expr::true_().boxed(),
666 then: Block {
667 body: vec![Statement::Bare(Expr::true_())]
668 },
669 else_: Block::default(),
670 }))
671 ))
672 );
673 assert_eq!(
674 parse_expr("if (true) { true; } else { true; }"),
675 Ok((
676 "",
677 Expr::IfExpr(If {
678 condition: Expr::true_().boxed(),
679 then: Block {
680 body: vec![Statement::Bare(Expr::true_())]
681 },
682 else_: Block {
683 body: vec![Statement::Bare(Expr::true_())]
684 },
685 })
686 ))
687 );
688 }
689}
diff --git a/src/string.rs b/src/string.rs
new file mode 100644
index 0000000..820f9ff
--- /dev/null
+++ b/src/string.rs
@@ -0,0 +1,152 @@
1use nom::branch::alt;
2use nom::bytes::streaming::{is_not, take_while_m_n};
3use nom::character::streaming::{char, multispace1};
4use nom::combinator::{map, map_opt, map_res, value, verify};
5use nom::error::{FromExternalError, ParseError};
6use nom::multi::fold_many0;
7use nom::sequence::{delimited, preceded};
8use nom::{IResult, Parser};
9
10// parser combinators are constructed from the bottom up:
11// first we write parsers for the smallest elements (escaped characters),
12// then combine them into larger parsers.
13
14/// Parse a unicode sequence, of the form u{XXXX}, where XXXX is 1 to 6
15/// hexadecimal numerals. We will combine this later with parse_escaped_char
16/// to parse sequences like \u{00AC}.
17fn parse_unicode<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
18where
19 E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
20{
21 // `take_while_m_n` parses between `m` and `n` bytes (inclusive) that match
22 // a predicate. `parse_hex` here parses between 1 and 6 hexadecimal numerals.
23 let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit());
24
25 // `preceded` takes a prefix parser, and if it succeeds, returns the result
26 // of the body parser. In this case, it parses u{XXXX}.
27 let parse_delimited_hex = preceded(
28 char('u'),
29 // `delimited` is like `preceded`, but it parses both a prefix and a suffix.
30 // It returns the result of the middle parser. In this case, it parses
31 // {XXXX}, where XXXX is 1 to 6 hex numerals, and returns XXXX
32 delimited(char('{'), parse_hex, char('}')),
33 );
34
35 // `map_res` takes the result of a parser and applies a function that returns
36 // a Result. In this case we take the hex bytes from parse_hex and attempt to
37 // convert them to a u32.
38 let parse_u32 = map_res(parse_delimited_hex, move |hex| u32::from_str_radix(hex, 16));
39
40 // map_opt is like map_res, but it takes an Option instead of a Result. If
41 // the function returns None, map_opt returns an error. In this case, because
42 // not all u32 values are valid unicode code points, we have to fallibly
43 // convert to char with from_u32.
44 map_opt(parse_u32, std::char::from_u32).parse(input)
45}
46
47/// Parse an escaped character: \n, \t, \r, \u{00AC}, etc.
48fn parse_escaped_char<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
49where
50 E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
51{
52 preceded(
53 char('\\'),
54 // `alt` tries each parser in sequence, returning the result of
55 // the first successful match
56 alt((
57 parse_unicode,
58 // The `value` parser returns a fixed value (the first argument) if its
59 // parser (the second argument) succeeds. In these cases, it looks for
60 // the marker characters (n, r, t, etc) and returns the matching
61 // character (\n, \r, \t, etc).
62 value('\n', char('n')),
63 value('\r', char('r')),
64 value('\t', char('t')),
65 value('\u{08}', char('b')),
66 value('\u{0C}', char('f')),
67 value('\\', char('\\')),
68 value('/', char('/')),
69 value('"', char('"')),
70 )),
71 )
72 .parse(input)
73}
74
75/// Parse a backslash, followed by any amount of whitespace. This is used later
76/// to discard any escaped whitespace.
77fn parse_escaped_whitespace<'a, E: ParseError<&'a str>>(
78 input: &'a str,
79) -> IResult<&'a str, &'a str, E> {
80 preceded(char('\\'), multispace1).parse(input)
81}
82
83/// Parse a non-empty block of text that doesn't include \ or "
84fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
85 // `is_not` parses a string of 0 or more characters that aren't one of the
86 // given characters.
87 let not_quote_slash = is_not("\"\\");
88
89 // `verify` runs a parser, then runs a verification function on the output of
90 // the parser. The verification function accepts out output only if it
91 // returns true. In this case, we want to ensure that the output of is_not
92 // is non-empty.
93 verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
94}
95
96/// A string fragment contains a fragment of a string being parsed: either
97/// a non-empty Literal (a series of non-escaped characters), a single
98/// parsed escaped character, or a block of escaped whitespace.
99#[derive(Debug, Clone, Copy, PartialEq, Eq)]
100enum StringFragment<'a> {
101 Literal(&'a str),
102 EscapedChar(char),
103 EscapedWS,
104}
105
106/// Combine parse_literal, parse_escaped_whitespace, and parse_escaped_char
107/// into a StringFragment.
108fn parse_fragment<'a, E>(input: &'a str) -> IResult<&'a str, StringFragment<'a>, E>
109where
110 E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
111{
112 alt((
113 // The `map` combinator runs a parser, then applies a function to the output
114 // of that parser.
115 map(parse_literal, StringFragment::Literal),
116 map(parse_escaped_char, StringFragment::EscapedChar),
117 value(StringFragment::EscapedWS, parse_escaped_whitespace),
118 ))
119 .parse(input)
120}
121
122/// Parse a string. Use a loop of parse_fragment and push all of the fragments
123/// into an output string.
124pub fn parse_string<'a, E>(input: &'a str) -> IResult<&'a str, String, E>
125where
126 E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
127{
128 // fold is the equivalent of iterator::fold. It runs a parser in a loop,
129 // and for each output value, calls a folding function on each output value.
130 let build_string = fold_many0(
131 // Our parser function – parses a single string fragment
132 parse_fragment,
133 // Our init value, an empty string
134 String::new,
135 // Our folding function. For each fragment, append the fragment to the
136 // string.
137 |mut string, fragment| {
138 match fragment {
139 StringFragment::Literal(s) => string.push_str(s),
140 StringFragment::EscapedChar(c) => string.push(c),
141 StringFragment::EscapedWS => {}
142 }
143 string
144 },
145 );
146
147 // Finally, parse the string. Note that, if `build_string` could accept a raw
148 // " character, the closing delimiter " would never match. When using
149 // `delimited` with a looping parser (like fold), be sure that the
150 // loop won't accidentally match your closing delimiter!
151 delimited(char('"'), build_string, char('"')).parse(input)
152}