aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.toml16
-rw-r--r--src/ast.rs17
-rw-r--r--src/eval.rs55
-rw-r--r--src/lib.rs3
-rw-r--r--src/main.rs219
-rw-r--r--src/parser.rs58
6 files changed, 297 insertions, 71 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 147096b..570ec22 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,6 +11,20 @@ regex = "1.3"
11thiserror = "1.0.61" 11thiserror = "1.0.61"
12serde = "1.0.204" 12serde = "1.0.204"
13nom = "7.1.3" 13nom = "7.1.3"
14tree-sitter-python = "0.20" 14
15tree-sitter-md = {version = "0.1", optional = true}
16tree-sitter-typescript = {version = "0.20.1", optional = true}
17tree-sitter-javascript = {version = "0.20.0", optional = true}
18tree-sitter-python = {version = "0.20.4", optional = true}
19tree-sitter-rust = {version = "0.20.3", optional = true}
20argh = "0.1.12"
21
22[features]
23default = ["md", "typescript", "javascript", "rust", "python"]
24md = ["tree-sitter-md"]
25typescript = ["tree-sitter-typescript"]
26javascript = ["tree-sitter-javascript"]
27python = ["tree-sitter-python"]
28rust = ["tree-sitter-rust"]
15 29
16[dev-dependencies] 30[dev-dependencies]
diff --git a/src/ast.rs b/src/ast.rs
index 07b5c39..96fe8ab 100644
--- a/src/ast.rs
+++ b/src/ast.rs
@@ -59,6 +59,7 @@ pub enum Statement {
59#[derive(Debug, Eq, PartialEq, Clone)] 59#[derive(Debug, Eq, PartialEq, Clone)]
60pub enum Expr { 60pub enum Expr {
61 Node, 61 Node,
62 FieldAccess(Vec<Identifier>),
62 Unit, 63 Unit,
63 Lit(Literal), 64 Lit(Literal),
64 Ident(Identifier), 65 Ident(Identifier),
@@ -71,22 +72,6 @@ pub enum Expr {
71} 72}
72 73
73impl Expr { 74impl Expr {
74 pub fn int(int: i128) -> Expr {
75 Self::Lit(Literal::Int(int))
76 }
77
78 pub fn str(s: &str) -> Expr {
79 Self::Lit(Literal::Str(s.to_owned()))
80 }
81
82 pub const fn false_() -> Expr {
83 Self::Lit(Literal::Bool(false))
84 }
85
86 pub const fn true_() -> Expr {
87 Self::Lit(Literal::Bool(true))
88 }
89
90 pub fn boxed(self) -> Box<Expr> { 75 pub fn boxed(self) -> Box<Expr> {
91 Box::new(self) 76 Box::new(self)
92 } 77 }
diff --git a/src/eval.rs b/src/eval.rs
index 859979d..e13cec3 100644
--- a/src/eval.rs
+++ b/src/eval.rs
@@ -39,6 +39,7 @@ pub enum Value {
39 String(String), 39 String(String),
40 Boolean(bool), 40 Boolean(bool),
41 Node, 41 Node,
42 FieldAccess(Vec<String>),
42} 43}
43 44
44impl Value { 45impl Value {
@@ -49,6 +50,7 @@ impl Value {
49 Self::String(_) => ast::Type::String, 50 Self::String(_) => ast::Type::String,
50 Self::Boolean(_) => ast::Type::Boolean, 51 Self::Boolean(_) => ast::Type::Boolean,
51 Self::Node => ast::Type::Node, 52 Self::Node => ast::Type::Node,
53 Self::FieldAccess(_) => ast::Type::Node,
52 } 54 }
53 } 55 }
54 56
@@ -241,6 +243,7 @@ impl fmt::Display for Value {
241 Self::String(s) => write!(f, "{s}"), 243 Self::String(s) => write!(f, "{s}"),
242 Self::Boolean(b) => write!(f, "{b}"), 244 Self::Boolean(b) => write!(f, "{b}"),
243 Self::Node => write!(f, "<node>"), 245 Self::Node => write!(f, "<node>"),
246 Self::FieldAccess(items) => write!(f, "<node>.{}", items.join(".")),
244 } 247 }
245 } 248 }
246} 249}
@@ -317,7 +320,7 @@ pub enum Error {
317 CurrentNodeNotPresent, 320 CurrentNodeNotPresent,
318} 321}
319 322
320type Result = std::result::Result<Value, Error>; 323pub type Result = std::result::Result<Value, Error>;
321 324
322pub struct Context<'a> { 325pub struct Context<'a> {
323 variables: HashMap<ast::Identifier, Variable>, 326 variables: HashMap<ast::Identifier, Variable>,
@@ -385,6 +388,7 @@ impl<'a> Context<'a> {
385 ast::Expr::IfExpr(if_expr) => self.eval_if(if_expr), 388 ast::Expr::IfExpr(if_expr) => self.eval_if(if_expr),
386 ast::Expr::Block(block) => self.eval_block(block), 389 ast::Expr::Block(block) => self.eval_block(block),
387 ast::Expr::Node => Ok(Value::Node), 390 ast::Expr::Node => Ok(Value::Node),
391 ast::Expr::FieldAccess(items) => Ok(Value::FieldAccess(items.to_owned())),
388 } 392 }
389 } 393 }
390 394
@@ -544,12 +548,33 @@ impl<'a> Context<'a> {
544 } 548 }
545 Ok(Value::Unit) 549 Ok(Value::Unit)
546 } 550 }
547 ("text", [arg]) if self.eval_expr(arg)? == Value::Node => { 551 ("text", [arg]) => {
548 let node = self 552 let node = match self.eval_expr(arg)? {
549 .cursor 553 Value::Node => self
550 .as_ref() 554 .cursor
551 .ok_or(Error::CurrentNodeNotPresent)? 555 .as_ref()
552 .node(); 556 .ok_or(Error::CurrentNodeNotPresent)?
557 .node(),
558 Value::FieldAccess(fields) => {
559 let mut node = self
560 .cursor
561 .as_ref()
562 .ok_or(Error::CurrentNodeNotPresent)?
563 .node();
564 for field in &fields {
565 node = node
566 .child_by_field_name(field.as_bytes())
567 .ok_or_else(|| Error::FailedLookup(field.to_owned()))?;
568 }
569 node
570 }
571 v => {
572 return Err(Error::TypeMismatch {
573 expected: ast::Type::Node,
574 got: v.ty(),
575 })
576 }
577 };
553 let text = node 578 let text = node
554 .utf8_text(self.input_src.as_ref().unwrap().as_bytes()) 579 .utf8_text(self.input_src.as_ref().unwrap().as_bytes())
555 .unwrap(); 580 .unwrap();
@@ -629,6 +654,22 @@ impl<'a> Context<'a> {
629 } 654 }
630} 655}
631 656
657pub fn evaluate(file: &str, program: &str, language: tree_sitter::Language) -> Result {
658 let mut parser = tree_sitter::Parser::new();
659 let _ = parser.set_language(language);
660
661 let tree = parser.parse(file, None).unwrap();
662 let cursor = tree.walk();
663
664 let program = ast::Program::new().from_str(program).unwrap();
665 let mut ctx = Context::new(tree_sitter_md::language())
666 .with_input(file.to_owned())
667 .with_cursor(cursor)
668 .with_program(program)?;
669
670 ctx.eval()
671}
672
632#[cfg(test)] 673#[cfg(test)]
633mod test { 674mod test {
634 use super::*; 675 use super::*;
diff --git a/src/lib.rs b/src/lib.rs
index 8780b74..fca5dd5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,5 +3,4 @@ mod eval;
3mod parser; 3mod parser;
4mod string; 4mod string;
5 5
6pub use ast::Program; 6pub use eval::evaluate;
7pub use eval::Context;
diff --git a/src/main.rs b/src/main.rs
index 09a15ef..6196a32 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,47 +1,182 @@
1use trawk::{Context, Program}; 1/// TBSP: tree-based source processor
2#[derive(argh::FromArgs)]
3struct Cli {
4 /// read the TBSP program source from a file
5 #[argh(option, short = 'f')]
6 program_file: std::path::PathBuf,
7
8 /// set the language that the file is written in
9 #[argh(option, short = 'l')]
10 language: String,
11
12 /// input file to process
13 #[argh(positional)]
14 file: Option<std::path::PathBuf>,
15}
2 16
3fn main() { 17fn main() {
4 let src = r#" 18 let cli: Cli = argh::from_env();
5bar = 0 19
6def foo(): 20 let program = std::fs::read_to_string(&cli.program_file).unwrap_or_else(|e| {
7 baz = 5 21 eprintln!(
8 "# 22 "failed to read program-file from `{}`: {e}",
9 .to_owned(); 23 cli.program_file.display()
10 24 );
11 let program = Program::new() 25 std::process::exit(-1);
12 .from_str( 26 });
13 r#" 27
14 BEGIN { 28 let language = match cli.language.as_str() {
15 bool in_def = false; 29 "md" => tree_sitter_md::language(),
16 } 30 "typescript" => tree_sitter_typescript::language_typescript(),
17 pre function_definition { 31 "javascript" => tree_sitter_javascript::language(),
18 in_def = true; 32 "python" => tree_sitter_python::language(),
33 "rust" => tree_sitter_rust::language(),
34 lang => {
35 eprintln!("unknown language `{lang}`");
36 std::process::exit(-1);
37 }
38 };
39
40 let file = cli
41 .file
42 .map(std::fs::read_to_string)
43 .unwrap_or_else(try_consume_stdin)
44 .unwrap_or_else(|e| {
45 eprintln!("{e}");
46 std::process::exit(-1)
47 });
48
49 trawk::evaluate(&file, &program, language).unwrap_or_else(|e| {
50 eprintln!("{e:?}");
51 std::process::exit(-1);
52 });
53}
54
55fn try_consume_stdin() -> std::io::Result<String> {
56 let mut buffer = String::new();
57 let mut lock = std::io::stdin().lock();
58
59 while let Ok(n) = std::io::Read::read_to_string(&mut lock, &mut buffer) {
60 if n == 0 {
61 break;
62 }
19 } 63 }
20 post function_definition { 64
21 in_def = false; 65 if buffer.is_empty() {
66 Err(std::io::Error::other("empty stdin"))
67 } else {
68 Ok(buffer)
22 } 69 }
23 pre identifier {
24 if (in_def) {
25 print(text(node));
26 print(" ");
27 print("in def\n");
28 } else {
29 };
30 }"#,
31 )
32 .unwrap();
33
34 let mut parser = tree_sitter::Parser::new();
35 let _ = parser.set_language(tree_sitter_python::language());
36
37 let tree = parser.parse(&src, None).unwrap();
38 let cursor = tree.walk();
39
40 let mut ctx = Context::new(tree_sitter_python::language())
41 .with_input(src)
42 .with_cursor(cursor)
43 .with_program(program)
44 .unwrap();
45
46 let _ = ctx.eval();
47} 70}
71
72// fn main() {
73// let src = r#"
74// # foo1
75//
76// bar
77//
78// ## foo1.1
79//
80// bar baz
81//
82// # foo2
83//
84// bar baz
85//
86// ```
87// fn main() {
88// }
89// ```
90//
91// - foo
92// - bar
93// - baz
94//
95// "#
96// .to_owned();
97//
98// let program = Program::new()
99// .from_str(
100// r#"
101// BEGIN {
102// int depth = 0;
103//
104// print("<html>\n");
105// print("<body>\n");
106// }
107//
108// enter section {
109// depth += 1;
110// }
111// leave section {
112// depth -= 1;
113// }
114//
115// enter atx_heading {
116// print("<h");
117// print(depth);
118// print(">");
119// }
120// leave atx_heading {
121// print("</h");
122// print(depth);
123// print(">\n");
124// }
125//
126// enter paragraph {
127// print("<p>");
128// }
129// leave paragraph {
130// print("</p>\n");
131// }
132//
133// enter list {
134// print("<ol>");
135// }
136// leave list {
137// print("</ol>\n");
138// }
139//
140// enter list_item {
141// print("<li>");
142// }
143// leave list_item {
144// print("</li>\n");
145// }
146//
147// enter fenced_code_block {
148// print("<pre>");
149// }
150// leave fenced_code_block {
151// print("</pre>\n");
152// }
153//
154// enter inline {
155// print(text(node));
156// }
157// enter code_fence_content {
158// print(text(node));
159// }
160//
161// END {
162// print("</body>\n");
163// print("</html>\n");
164// }
165// "#,
166// )
167// .unwrap();
168//
169// let mut parser = tree_sitter::Parser::new();
170// let _ = parser.set_language(&tree_sitter_md::language());
171//
172// let tree = parser.parse(&src, None).unwrap();
173// let cursor = tree.walk();
174//
175// let mut ctx = Context::new(tree_sitter_md::language())
176// .with_input(src)
177// .with_cursor(cursor)
178// .with_program(program)
179// .unwrap();
180//
181// let _ = ctx.eval();
182// }
diff --git a/src/parser.rs b/src/parser.rs
index 3a020dc..d705a11 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -1,10 +1,10 @@
1use nom::{ 1use nom::{
2 branch::alt, 2 branch::alt,
3 bytes::complete::tag, 3 bytes::complete::{is_not, tag},
4 character::complete::{alpha1, alphanumeric1, char, multispace0, multispace1, one_of}, 4 character::complete::{alpha1, alphanumeric1, char, multispace0, multispace1, one_of},
5 combinator::{map, opt, recognize, value}, 5 combinator::{map, opt, recognize, value},
6 error::ParseError, 6 error::ParseError,
7 multi::{many0, many0_count, many1, separated_list0}, 7 multi::{many0, many0_count, many1, separated_list0, separated_list1},
8 sequence::{delimited, pair, preceded, terminated, tuple}, 8 sequence::{delimited, pair, preceded, terminated, tuple},
9 IResult, Parser, 9 IResult, Parser,
10}; 10};
@@ -21,6 +21,11 @@ where
21 delimited(multispace0, inner, multispace0) 21 delimited(multispace0, inner, multispace0)
22} 22}
23 23
24// TODO use this
25fn _parse_comment<'a>(i: &'a str) -> IResult<&'a str, ()> {
26 value((), pair(tag("//"), is_not("\n\r")))(i)
27}
28
24fn parse_unit<'a>(i: &'a str) -> IResult<&'a str, ()> { 29fn parse_unit<'a>(i: &'a str) -> IResult<&'a str, ()> {
25 let open = char('('); 30 let open = char('(');
26 let close = char(')'); 31 let close = char(')');
@@ -169,8 +174,16 @@ fn parse_mul<'a>(i: &'a str) -> IResult<&'a str, Expr> {
169 alt((recursive, base)).parse(i) 174 alt((recursive, base)).parse(i)
170} 175}
171 176
177fn parse_field_access<'a>(i: &'a str) -> IResult<&'a str, Vec<Identifier>> {
178 let node = tag("node");
179 let dot = ws(char('.'));
180 let fields = separated_list1(ws(char('.')), map(parse_name, str::to_owned));
181 map(tuple((node, dot, fields)), |(_, _, fields)| fields)(i)
182}
183
172fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Expr> { 184fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Expr> {
173 let inner = alt(( 185 let inner = alt((
186 map(parse_field_access, Expr::FieldAccess),
174 map(tag("node"), |_| Expr::Node), 187 map(tag("node"), |_| Expr::Node),
175 map(parse_block, Expr::Block), 188 map(parse_block, Expr::Block),
176 map(parse_if, Expr::IfExpr), 189 map(parse_if, Expr::IfExpr),
@@ -337,7 +350,7 @@ fn parse_pattern<'a>(i: &str) -> IResult<&str, Pattern> {
337 tuple((parse_modifier, multispace0, parse_ident)), 350 tuple((parse_modifier, multispace0, parse_ident)),
338 |(modifier, _, kind)| Pattern::Node(NodePattern { modifier, kind }), 351 |(modifier, _, kind)| Pattern::Node(NodePattern { modifier, kind }),
339 ); 352 );
340 alt((begin, end, node)).parse(i) 353 ws(alt((begin, end, node))).parse(i)
341} 354}
342 355
343pub fn parse_stanza<'a>(i: &str) -> IResult<&str, Stanza> { 356pub fn parse_stanza<'a>(i: &str) -> IResult<&str, Stanza> {
@@ -358,6 +371,25 @@ pub fn parse_file(i: &str) -> IResult<&str, Vec<Stanza>> {
358mod test { 371mod test {
359 use super::*; 372 use super::*;
360 373
374 // test helpers
375 impl Expr {
376 pub fn int(int: i128) -> Expr {
377 Self::Lit(Literal::Int(int))
378 }
379
380 pub fn str(s: &str) -> Expr {
381 Self::Lit(Literal::Str(s.to_owned()))
382 }
383
384 pub const fn false_() -> Expr {
385 Self::Lit(Literal::Bool(false))
386 }
387
388 pub const fn true_() -> Expr {
389 Self::Lit(Literal::Bool(true))
390 }
391 }
392
361 #[test] 393 #[test]
362 fn test_parse_unit() { 394 fn test_parse_unit() {
363 assert_eq!(parse_unit("()"), Ok(("", ()))) 395 assert_eq!(parse_unit("()"), Ok(("", ())))
@@ -535,6 +567,26 @@ mod test {
535 } 567 }
536 568
537 #[test] 569 #[test]
570 fn test_parse_node() {
571 assert_eq!(parse_expr(r#" node "#), Ok(("", Expr::Node)));
572 assert_eq!(
573 parse_expr(r#" node.foo "#),
574 Ok(("", Expr::FieldAccess(vec!["foo".to_owned()])))
575 );
576 assert_eq!(
577 parse_expr(
578 r#" node
579 .foo
580 .bar"#
581 ),
582 Ok((
583 "",
584 Expr::FieldAccess(vec!["foo".to_owned(), "bar".to_owned()])
585 ))
586 );
587 }
588
589 #[test]
538 fn test_parse_if() { 590 fn test_parse_if() {
539 assert_eq!( 591 assert_eq!(
540 parse_expr( 592 parse_expr(