diff options
author | Akshay <[email protected]> | 2024-07-14 10:16:15 +0100 |
---|---|---|
committer | Akshay <[email protected]> | 2024-07-14 10:16:15 +0100 |
commit | 7ae7e42eb1eb981483cc4183368bec4932b8f1c2 (patch) | |
tree | 65862de0b1017bcbfb558c0e803c2c4622d61bb9 | |
parent | 8eb38033e0c615983c4490354dad4abb00031042 (diff) |
add trawk cli
-rw-r--r-- | Cargo.toml | 16 | ||||
-rw-r--r-- | src/ast.rs | 17 | ||||
-rw-r--r-- | src/eval.rs | 55 | ||||
-rw-r--r-- | src/lib.rs | 3 | ||||
-rw-r--r-- | src/main.rs | 219 | ||||
-rw-r--r-- | src/parser.rs | 58 |
6 files changed, 297 insertions, 71 deletions
@@ -11,6 +11,20 @@ regex = "1.3" | |||
11 | thiserror = "1.0.61" | 11 | thiserror = "1.0.61" |
12 | serde = "1.0.204" | 12 | serde = "1.0.204" |
13 | nom = "7.1.3" | 13 | nom = "7.1.3" |
14 | tree-sitter-python = "0.20" | 14 | |
15 | tree-sitter-md = {version = "0.1", optional = true} | ||
16 | tree-sitter-typescript = {version = "0.20.1", optional = true} | ||
17 | tree-sitter-javascript = {version = "0.20.0", optional = true} | ||
18 | tree-sitter-python = {version = "0.20.4", optional = true} | ||
19 | tree-sitter-rust = {version = "0.20.3", optional = true} | ||
20 | argh = "0.1.12" | ||
21 | |||
22 | [features] | ||
23 | default = ["md", "typescript", "javascript", "rust", "python"] | ||
24 | md = ["tree-sitter-md"] | ||
25 | typescript = ["tree-sitter-typescript"] | ||
26 | javascript = ["tree-sitter-javascript"] | ||
27 | python = ["tree-sitter-python"] | ||
28 | rust = ["tree-sitter-rust"] | ||
15 | 29 | ||
16 | [dev-dependencies] | 30 | [dev-dependencies] |
@@ -59,6 +59,7 @@ pub enum Statement { | |||
59 | #[derive(Debug, Eq, PartialEq, Clone)] | 59 | #[derive(Debug, Eq, PartialEq, Clone)] |
60 | pub enum Expr { | 60 | pub enum Expr { |
61 | Node, | 61 | Node, |
62 | FieldAccess(Vec<Identifier>), | ||
62 | Unit, | 63 | Unit, |
63 | Lit(Literal), | 64 | Lit(Literal), |
64 | Ident(Identifier), | 65 | Ident(Identifier), |
@@ -71,22 +72,6 @@ pub enum Expr { | |||
71 | } | 72 | } |
72 | 73 | ||
73 | impl Expr { | 74 | impl Expr { |
74 | pub fn int(int: i128) -> Expr { | ||
75 | Self::Lit(Literal::Int(int)) | ||
76 | } | ||
77 | |||
78 | pub fn str(s: &str) -> Expr { | ||
79 | Self::Lit(Literal::Str(s.to_owned())) | ||
80 | } | ||
81 | |||
82 | pub const fn false_() -> Expr { | ||
83 | Self::Lit(Literal::Bool(false)) | ||
84 | } | ||
85 | |||
86 | pub const fn true_() -> Expr { | ||
87 | Self::Lit(Literal::Bool(true)) | ||
88 | } | ||
89 | |||
90 | pub fn boxed(self) -> Box<Expr> { | 75 | pub fn boxed(self) -> Box<Expr> { |
91 | Box::new(self) | 76 | Box::new(self) |
92 | } | 77 | } |
diff --git a/src/eval.rs b/src/eval.rs index 859979d..e13cec3 100644 --- a/src/eval.rs +++ b/src/eval.rs | |||
@@ -39,6 +39,7 @@ pub enum Value { | |||
39 | String(String), | 39 | String(String), |
40 | Boolean(bool), | 40 | Boolean(bool), |
41 | Node, | 41 | Node, |
42 | FieldAccess(Vec<String>), | ||
42 | } | 43 | } |
43 | 44 | ||
44 | impl Value { | 45 | impl Value { |
@@ -49,6 +50,7 @@ impl Value { | |||
49 | Self::String(_) => ast::Type::String, | 50 | Self::String(_) => ast::Type::String, |
50 | Self::Boolean(_) => ast::Type::Boolean, | 51 | Self::Boolean(_) => ast::Type::Boolean, |
51 | Self::Node => ast::Type::Node, | 52 | Self::Node => ast::Type::Node, |
53 | Self::FieldAccess(_) => ast::Type::Node, | ||
52 | } | 54 | } |
53 | } | 55 | } |
54 | 56 | ||
@@ -241,6 +243,7 @@ impl fmt::Display for Value { | |||
241 | Self::String(s) => write!(f, "{s}"), | 243 | Self::String(s) => write!(f, "{s}"), |
242 | Self::Boolean(b) => write!(f, "{b}"), | 244 | Self::Boolean(b) => write!(f, "{b}"), |
243 | Self::Node => write!(f, "<node>"), | 245 | Self::Node => write!(f, "<node>"), |
246 | Self::FieldAccess(items) => write!(f, "<node>.{}", items.join(".")), | ||
244 | } | 247 | } |
245 | } | 248 | } |
246 | } | 249 | } |
@@ -317,7 +320,7 @@ pub enum Error { | |||
317 | CurrentNodeNotPresent, | 320 | CurrentNodeNotPresent, |
318 | } | 321 | } |
319 | 322 | ||
320 | type Result = std::result::Result<Value, Error>; | 323 | pub type Result = std::result::Result<Value, Error>; |
321 | 324 | ||
322 | pub struct Context<'a> { | 325 | pub struct Context<'a> { |
323 | variables: HashMap<ast::Identifier, Variable>, | 326 | variables: HashMap<ast::Identifier, Variable>, |
@@ -385,6 +388,7 @@ impl<'a> Context<'a> { | |||
385 | ast::Expr::IfExpr(if_expr) => self.eval_if(if_expr), | 388 | ast::Expr::IfExpr(if_expr) => self.eval_if(if_expr), |
386 | ast::Expr::Block(block) => self.eval_block(block), | 389 | ast::Expr::Block(block) => self.eval_block(block), |
387 | ast::Expr::Node => Ok(Value::Node), | 390 | ast::Expr::Node => Ok(Value::Node), |
391 | ast::Expr::FieldAccess(items) => Ok(Value::FieldAccess(items.to_owned())), | ||
388 | } | 392 | } |
389 | } | 393 | } |
390 | 394 | ||
@@ -544,12 +548,33 @@ impl<'a> Context<'a> { | |||
544 | } | 548 | } |
545 | Ok(Value::Unit) | 549 | Ok(Value::Unit) |
546 | } | 550 | } |
547 | ("text", [arg]) if self.eval_expr(arg)? == Value::Node => { | 551 | ("text", [arg]) => { |
548 | let node = self | 552 | let node = match self.eval_expr(arg)? { |
549 | .cursor | 553 | Value::Node => self |
550 | .as_ref() | 554 | .cursor |
551 | .ok_or(Error::CurrentNodeNotPresent)? | 555 | .as_ref() |
552 | .node(); | 556 | .ok_or(Error::CurrentNodeNotPresent)? |
557 | .node(), | ||
558 | Value::FieldAccess(fields) => { | ||
559 | let mut node = self | ||
560 | .cursor | ||
561 | .as_ref() | ||
562 | .ok_or(Error::CurrentNodeNotPresent)? | ||
563 | .node(); | ||
564 | for field in &fields { | ||
565 | node = node | ||
566 | .child_by_field_name(field.as_bytes()) | ||
567 | .ok_or_else(|| Error::FailedLookup(field.to_owned()))?; | ||
568 | } | ||
569 | node | ||
570 | } | ||
571 | v => { | ||
572 | return Err(Error::TypeMismatch { | ||
573 | expected: ast::Type::Node, | ||
574 | got: v.ty(), | ||
575 | }) | ||
576 | } | ||
577 | }; | ||
553 | let text = node | 578 | let text = node |
554 | .utf8_text(self.input_src.as_ref().unwrap().as_bytes()) | 579 | .utf8_text(self.input_src.as_ref().unwrap().as_bytes()) |
555 | .unwrap(); | 580 | .unwrap(); |
@@ -629,6 +654,22 @@ impl<'a> Context<'a> { | |||
629 | } | 654 | } |
630 | } | 655 | } |
631 | 656 | ||
657 | pub fn evaluate(file: &str, program: &str, language: tree_sitter::Language) -> Result { | ||
658 | let mut parser = tree_sitter::Parser::new(); | ||
659 | let _ = parser.set_language(language); | ||
660 | |||
661 | let tree = parser.parse(file, None).unwrap(); | ||
662 | let cursor = tree.walk(); | ||
663 | |||
664 | let program = ast::Program::new().from_str(program).unwrap(); | ||
665 | let mut ctx = Context::new(tree_sitter_md::language()) | ||
666 | .with_input(file.to_owned()) | ||
667 | .with_cursor(cursor) | ||
668 | .with_program(program)?; | ||
669 | |||
670 | ctx.eval() | ||
671 | } | ||
672 | |||
632 | #[cfg(test)] | 673 | #[cfg(test)] |
633 | mod test { | 674 | mod test { |
634 | use super::*; | 675 | use super::*; |
@@ -3,5 +3,4 @@ mod eval; | |||
3 | mod parser; | 3 | mod parser; |
4 | mod string; | 4 | mod string; |
5 | 5 | ||
6 | pub use ast::Program; | 6 | pub use eval::evaluate; |
7 | pub use eval::Context; | ||
diff --git a/src/main.rs b/src/main.rs index 09a15ef..6196a32 100644 --- a/src/main.rs +++ b/src/main.rs | |||
@@ -1,47 +1,182 @@ | |||
1 | use trawk::{Context, Program}; | 1 | /// TBSP: tree-based source processor |
2 | #[derive(argh::FromArgs)] | ||
3 | struct Cli { | ||
4 | /// read the TBSP program source from a file | ||
5 | #[argh(option, short = 'f')] | ||
6 | program_file: std::path::PathBuf, | ||
7 | |||
8 | /// set the language that the file is written in | ||
9 | #[argh(option, short = 'l')] | ||
10 | language: String, | ||
11 | |||
12 | /// input file to process | ||
13 | #[argh(positional)] | ||
14 | file: Option<std::path::PathBuf>, | ||
15 | } | ||
2 | 16 | ||
3 | fn main() { | 17 | fn main() { |
4 | let src = r#" | 18 | let cli: Cli = argh::from_env(); |
5 | bar = 0 | 19 | |
6 | def foo(): | 20 | let program = std::fs::read_to_string(&cli.program_file).unwrap_or_else(|e| { |
7 | baz = 5 | 21 | eprintln!( |
8 | "# | 22 | "failed to read program-file from `{}`: {e}", |
9 | .to_owned(); | 23 | cli.program_file.display() |
10 | 24 | ); | |
11 | let program = Program::new() | 25 | std::process::exit(-1); |
12 | .from_str( | 26 | }); |
13 | r#" | 27 | |
14 | BEGIN { | 28 | let language = match cli.language.as_str() { |
15 | bool in_def = false; | 29 | "md" => tree_sitter_md::language(), |
16 | } | 30 | "typescript" => tree_sitter_typescript::language_typescript(), |
17 | pre function_definition { | 31 | "javascript" => tree_sitter_javascript::language(), |
18 | in_def = true; | 32 | "python" => tree_sitter_python::language(), |
33 | "rust" => tree_sitter_rust::language(), | ||
34 | lang => { | ||
35 | eprintln!("unknown language `{lang}`"); | ||
36 | std::process::exit(-1); | ||
37 | } | ||
38 | }; | ||
39 | |||
40 | let file = cli | ||
41 | .file | ||
42 | .map(std::fs::read_to_string) | ||
43 | .unwrap_or_else(try_consume_stdin) | ||
44 | .unwrap_or_else(|e| { | ||
45 | eprintln!("{e}"); | ||
46 | std::process::exit(-1) | ||
47 | }); | ||
48 | |||
49 | trawk::evaluate(&file, &program, language).unwrap_or_else(|e| { | ||
50 | eprintln!("{e:?}"); | ||
51 | std::process::exit(-1); | ||
52 | }); | ||
53 | } | ||
54 | |||
55 | fn try_consume_stdin() -> std::io::Result<String> { | ||
56 | let mut buffer = String::new(); | ||
57 | let mut lock = std::io::stdin().lock(); | ||
58 | |||
59 | while let Ok(n) = std::io::Read::read_to_string(&mut lock, &mut buffer) { | ||
60 | if n == 0 { | ||
61 | break; | ||
62 | } | ||
19 | } | 63 | } |
20 | post function_definition { | 64 | |
21 | in_def = false; | 65 | if buffer.is_empty() { |
66 | Err(std::io::Error::other("empty stdin")) | ||
67 | } else { | ||
68 | Ok(buffer) | ||
22 | } | 69 | } |
23 | pre identifier { | ||
24 | if (in_def) { | ||
25 | print(text(node)); | ||
26 | print(" "); | ||
27 | print("in def\n"); | ||
28 | } else { | ||
29 | }; | ||
30 | }"#, | ||
31 | ) | ||
32 | .unwrap(); | ||
33 | |||
34 | let mut parser = tree_sitter::Parser::new(); | ||
35 | let _ = parser.set_language(tree_sitter_python::language()); | ||
36 | |||
37 | let tree = parser.parse(&src, None).unwrap(); | ||
38 | let cursor = tree.walk(); | ||
39 | |||
40 | let mut ctx = Context::new(tree_sitter_python::language()) | ||
41 | .with_input(src) | ||
42 | .with_cursor(cursor) | ||
43 | .with_program(program) | ||
44 | .unwrap(); | ||
45 | |||
46 | let _ = ctx.eval(); | ||
47 | } | 70 | } |
71 | |||
72 | // fn main() { | ||
73 | // let src = r#" | ||
74 | // # foo1 | ||
75 | // | ||
76 | // bar | ||
77 | // | ||
78 | // ## foo1.1 | ||
79 | // | ||
80 | // bar baz | ||
81 | // | ||
82 | // # foo2 | ||
83 | // | ||
84 | // bar baz | ||
85 | // | ||
86 | // ``` | ||
87 | // fn main() { | ||
88 | // } | ||
89 | // ``` | ||
90 | // | ||
91 | // - foo | ||
92 | // - bar | ||
93 | // - baz | ||
94 | // | ||
95 | // "# | ||
96 | // .to_owned(); | ||
97 | // | ||
98 | // let program = Program::new() | ||
99 | // .from_str( | ||
100 | // r#" | ||
101 | // BEGIN { | ||
102 | // int depth = 0; | ||
103 | // | ||
104 | // print("<html>\n"); | ||
105 | // print("<body>\n"); | ||
106 | // } | ||
107 | // | ||
108 | // enter section { | ||
109 | // depth += 1; | ||
110 | // } | ||
111 | // leave section { | ||
112 | // depth -= 1; | ||
113 | // } | ||
114 | // | ||
115 | // enter atx_heading { | ||
116 | // print("<h"); | ||
117 | // print(depth); | ||
118 | // print(">"); | ||
119 | // } | ||
120 | // leave atx_heading { | ||
121 | // print("</h"); | ||
122 | // print(depth); | ||
123 | // print(">\n"); | ||
124 | // } | ||
125 | // | ||
126 | // enter paragraph { | ||
127 | // print("<p>"); | ||
128 | // } | ||
129 | // leave paragraph { | ||
130 | // print("</p>\n"); | ||
131 | // } | ||
132 | // | ||
133 | // enter list { | ||
134 | // print("<ol>"); | ||
135 | // } | ||
136 | // leave list { | ||
137 | // print("</ol>\n"); | ||
138 | // } | ||
139 | // | ||
140 | // enter list_item { | ||
141 | // print("<li>"); | ||
142 | // } | ||
143 | // leave list_item { | ||
144 | // print("</li>\n"); | ||
145 | // } | ||
146 | // | ||
147 | // enter fenced_code_block { | ||
148 | // print("<pre>"); | ||
149 | // } | ||
150 | // leave fenced_code_block { | ||
151 | // print("</pre>\n"); | ||
152 | // } | ||
153 | // | ||
154 | // enter inline { | ||
155 | // print(text(node)); | ||
156 | // } | ||
157 | // enter code_fence_content { | ||
158 | // print(text(node)); | ||
159 | // } | ||
160 | // | ||
161 | // END { | ||
162 | // print("</body>\n"); | ||
163 | // print("</html>\n"); | ||
164 | // } | ||
165 | // "#, | ||
166 | // ) | ||
167 | // .unwrap(); | ||
168 | // | ||
169 | // let mut parser = tree_sitter::Parser::new(); | ||
170 | // let _ = parser.set_language(&tree_sitter_md::language()); | ||
171 | // | ||
172 | // let tree = parser.parse(&src, None).unwrap(); | ||
173 | // let cursor = tree.walk(); | ||
174 | // | ||
175 | // let mut ctx = Context::new(tree_sitter_md::language()) | ||
176 | // .with_input(src) | ||
177 | // .with_cursor(cursor) | ||
178 | // .with_program(program) | ||
179 | // .unwrap(); | ||
180 | // | ||
181 | // let _ = ctx.eval(); | ||
182 | // } | ||
diff --git a/src/parser.rs b/src/parser.rs index 3a020dc..d705a11 100644 --- a/src/parser.rs +++ b/src/parser.rs | |||
@@ -1,10 +1,10 @@ | |||
1 | use nom::{ | 1 | use nom::{ |
2 | branch::alt, | 2 | branch::alt, |
3 | bytes::complete::tag, | 3 | bytes::complete::{is_not, tag}, |
4 | character::complete::{alpha1, alphanumeric1, char, multispace0, multispace1, one_of}, | 4 | character::complete::{alpha1, alphanumeric1, char, multispace0, multispace1, one_of}, |
5 | combinator::{map, opt, recognize, value}, | 5 | combinator::{map, opt, recognize, value}, |
6 | error::ParseError, | 6 | error::ParseError, |
7 | multi::{many0, many0_count, many1, separated_list0}, | 7 | multi::{many0, many0_count, many1, separated_list0, separated_list1}, |
8 | sequence::{delimited, pair, preceded, terminated, tuple}, | 8 | sequence::{delimited, pair, preceded, terminated, tuple}, |
9 | IResult, Parser, | 9 | IResult, Parser, |
10 | }; | 10 | }; |
@@ -21,6 +21,11 @@ where | |||
21 | delimited(multispace0, inner, multispace0) | 21 | delimited(multispace0, inner, multispace0) |
22 | } | 22 | } |
23 | 23 | ||
24 | // TODO use this | ||
25 | fn _parse_comment<'a>(i: &'a str) -> IResult<&'a str, ()> { | ||
26 | value((), pair(tag("//"), is_not("\n\r")))(i) | ||
27 | } | ||
28 | |||
24 | fn parse_unit<'a>(i: &'a str) -> IResult<&'a str, ()> { | 29 | fn parse_unit<'a>(i: &'a str) -> IResult<&'a str, ()> { |
25 | let open = char('('); | 30 | let open = char('('); |
26 | let close = char(')'); | 31 | let close = char(')'); |
@@ -169,8 +174,16 @@ fn parse_mul<'a>(i: &'a str) -> IResult<&'a str, Expr> { | |||
169 | alt((recursive, base)).parse(i) | 174 | alt((recursive, base)).parse(i) |
170 | } | 175 | } |
171 | 176 | ||
177 | fn parse_field_access<'a>(i: &'a str) -> IResult<&'a str, Vec<Identifier>> { | ||
178 | let node = tag("node"); | ||
179 | let dot = ws(char('.')); | ||
180 | let fields = separated_list1(ws(char('.')), map(parse_name, str::to_owned)); | ||
181 | map(tuple((node, dot, fields)), |(_, _, fields)| fields)(i) | ||
182 | } | ||
183 | |||
172 | fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Expr> { | 184 | fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Expr> { |
173 | let inner = alt(( | 185 | let inner = alt(( |
186 | map(parse_field_access, Expr::FieldAccess), | ||
174 | map(tag("node"), |_| Expr::Node), | 187 | map(tag("node"), |_| Expr::Node), |
175 | map(parse_block, Expr::Block), | 188 | map(parse_block, Expr::Block), |
176 | map(parse_if, Expr::IfExpr), | 189 | map(parse_if, Expr::IfExpr), |
@@ -337,7 +350,7 @@ fn parse_pattern<'a>(i: &str) -> IResult<&str, Pattern> { | |||
337 | tuple((parse_modifier, multispace0, parse_ident)), | 350 | tuple((parse_modifier, multispace0, parse_ident)), |
338 | |(modifier, _, kind)| Pattern::Node(NodePattern { modifier, kind }), | 351 | |(modifier, _, kind)| Pattern::Node(NodePattern { modifier, kind }), |
339 | ); | 352 | ); |
340 | alt((begin, end, node)).parse(i) | 353 | ws(alt((begin, end, node))).parse(i) |
341 | } | 354 | } |
342 | 355 | ||
343 | pub fn parse_stanza<'a>(i: &str) -> IResult<&str, Stanza> { | 356 | pub fn parse_stanza<'a>(i: &str) -> IResult<&str, Stanza> { |
@@ -358,6 +371,25 @@ pub fn parse_file(i: &str) -> IResult<&str, Vec<Stanza>> { | |||
358 | mod test { | 371 | mod test { |
359 | use super::*; | 372 | use super::*; |
360 | 373 | ||
374 | // test helpers | ||
375 | impl Expr { | ||
376 | pub fn int(int: i128) -> Expr { | ||
377 | Self::Lit(Literal::Int(int)) | ||
378 | } | ||
379 | |||
380 | pub fn str(s: &str) -> Expr { | ||
381 | Self::Lit(Literal::Str(s.to_owned())) | ||
382 | } | ||
383 | |||
384 | pub const fn false_() -> Expr { | ||
385 | Self::Lit(Literal::Bool(false)) | ||
386 | } | ||
387 | |||
388 | pub const fn true_() -> Expr { | ||
389 | Self::Lit(Literal::Bool(true)) | ||
390 | } | ||
391 | } | ||
392 | |||
361 | #[test] | 393 | #[test] |
362 | fn test_parse_unit() { | 394 | fn test_parse_unit() { |
363 | assert_eq!(parse_unit("()"), Ok(("", ()))) | 395 | assert_eq!(parse_unit("()"), Ok(("", ()))) |
@@ -535,6 +567,26 @@ mod test { | |||
535 | } | 567 | } |
536 | 568 | ||
537 | #[test] | 569 | #[test] |
570 | fn test_parse_node() { | ||
571 | assert_eq!(parse_expr(r#" node "#), Ok(("", Expr::Node))); | ||
572 | assert_eq!( | ||
573 | parse_expr(r#" node.foo "#), | ||
574 | Ok(("", Expr::FieldAccess(vec!["foo".to_owned()]))) | ||
575 | ); | ||
576 | assert_eq!( | ||
577 | parse_expr( | ||
578 | r#" node | ||
579 | .foo | ||
580 | .bar"# | ||
581 | ), | ||
582 | Ok(( | ||
583 | "", | ||
584 | Expr::FieldAccess(vec!["foo".to_owned(), "bar".to_owned()]) | ||
585 | )) | ||
586 | ); | ||
587 | } | ||
588 | |||
589 | #[test] | ||
538 | fn test_parse_if() { | 590 | fn test_parse_if() { |
539 | assert_eq!( | 591 | assert_eq!( |
540 | parse_expr( | 592 | parse_expr( |