From 7ae7e42eb1eb981483cc4183368bec4932b8f1c2 Mon Sep 17 00:00:00 2001 From: Akshay Date: Sun, 14 Jul 2024 10:16:15 +0100 Subject: add trawk cli --- Cargo.toml | 16 ++++- src/ast.rs | 17 +---- src/eval.rs | 55 +++++++++++++-- src/lib.rs | 3 +- src/main.rs | 219 +++++++++++++++++++++++++++++++++++++++++++++++----------- src/parser.rs | 58 +++++++++++++++- 6 files changed, 297 insertions(+), 71 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 147096b..570ec22 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,6 +11,20 @@ regex = "1.3" thiserror = "1.0.61" serde = "1.0.204" nom = "7.1.3" -tree-sitter-python = "0.20" + +tree-sitter-md = {version = "0.1", optional = true} +tree-sitter-typescript = {version = "0.20.1", optional = true} +tree-sitter-javascript = {version = "0.20.0", optional = true} +tree-sitter-python = {version = "0.20.4", optional = true} +tree-sitter-rust = {version = "0.20.3", optional = true} +argh = "0.1.12" + +[features] +default = ["md", "typescript", "javascript", "rust", "python"] +md = ["tree-sitter-md"] +typescript = ["tree-sitter-typescript"] +javascript = ["tree-sitter-javascript"] +python = ["tree-sitter-python"] +rust = ["tree-sitter-rust"] [dev-dependencies] diff --git a/src/ast.rs b/src/ast.rs index 07b5c39..96fe8ab 100644 --- a/src/ast.rs +++ b/src/ast.rs @@ -59,6 +59,7 @@ pub enum Statement { #[derive(Debug, Eq, PartialEq, Clone)] pub enum Expr { Node, + FieldAccess(Vec), Unit, Lit(Literal), Ident(Identifier), @@ -71,22 +72,6 @@ pub enum Expr { } impl Expr { - pub fn int(int: i128) -> Expr { - Self::Lit(Literal::Int(int)) - } - - pub fn str(s: &str) -> Expr { - Self::Lit(Literal::Str(s.to_owned())) - } - - pub const fn false_() -> Expr { - Self::Lit(Literal::Bool(false)) - } - - pub const fn true_() -> Expr { - Self::Lit(Literal::Bool(true)) - } - pub fn boxed(self) -> Box { Box::new(self) } diff --git a/src/eval.rs b/src/eval.rs index 859979d..e13cec3 100644 --- a/src/eval.rs +++ b/src/eval.rs @@ -39,6 +39,7 @@ pub enum Value { String(String), Boolean(bool), Node, + FieldAccess(Vec), } impl Value { @@ -49,6 +50,7 @@ impl Value { Self::String(_) => ast::Type::String, Self::Boolean(_) => ast::Type::Boolean, Self::Node => ast::Type::Node, + Self::FieldAccess(_) => ast::Type::Node, } } @@ -241,6 +243,7 @@ impl fmt::Display for Value { Self::String(s) => write!(f, "{s}"), Self::Boolean(b) => write!(f, "{b}"), Self::Node => write!(f, ""), + Self::FieldAccess(items) => write!(f, ".{}", items.join(".")), } } } @@ -317,7 +320,7 @@ pub enum Error { CurrentNodeNotPresent, } -type Result = std::result::Result; +pub type Result = std::result::Result; pub struct Context<'a> { variables: HashMap, @@ -385,6 +388,7 @@ impl<'a> Context<'a> { ast::Expr::IfExpr(if_expr) => self.eval_if(if_expr), ast::Expr::Block(block) => self.eval_block(block), ast::Expr::Node => Ok(Value::Node), + ast::Expr::FieldAccess(items) => Ok(Value::FieldAccess(items.to_owned())), } } @@ -544,12 +548,33 @@ impl<'a> Context<'a> { } Ok(Value::Unit) } - ("text", [arg]) if self.eval_expr(arg)? == Value::Node => { - let node = self - .cursor - .as_ref() - .ok_or(Error::CurrentNodeNotPresent)? - .node(); + ("text", [arg]) => { + let node = match self.eval_expr(arg)? { + Value::Node => self + .cursor + .as_ref() + .ok_or(Error::CurrentNodeNotPresent)? + .node(), + Value::FieldAccess(fields) => { + let mut node = self + .cursor + .as_ref() + .ok_or(Error::CurrentNodeNotPresent)? + .node(); + for field in &fields { + node = node + .child_by_field_name(field.as_bytes()) + .ok_or_else(|| Error::FailedLookup(field.to_owned()))?; + } + node + } + v => { + return Err(Error::TypeMismatch { + expected: ast::Type::Node, + got: v.ty(), + }) + } + }; let text = node .utf8_text(self.input_src.as_ref().unwrap().as_bytes()) .unwrap(); @@ -629,6 +654,22 @@ impl<'a> Context<'a> { } } +pub fn evaluate(file: &str, program: &str, language: tree_sitter::Language) -> Result { + let mut parser = tree_sitter::Parser::new(); + let _ = parser.set_language(language); + + let tree = parser.parse(file, None).unwrap(); + let cursor = tree.walk(); + + let program = ast::Program::new().from_str(program).unwrap(); + let mut ctx = Context::new(tree_sitter_md::language()) + .with_input(file.to_owned()) + .with_cursor(cursor) + .with_program(program)?; + + ctx.eval() +} + #[cfg(test)] mod test { use super::*; diff --git a/src/lib.rs b/src/lib.rs index 8780b74..fca5dd5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,5 +3,4 @@ mod eval; mod parser; mod string; -pub use ast::Program; -pub use eval::Context; +pub use eval::evaluate; diff --git a/src/main.rs b/src/main.rs index 09a15ef..6196a32 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,47 +1,182 @@ -use trawk::{Context, Program}; +/// TBSP: tree-based source processor +#[derive(argh::FromArgs)] +struct Cli { + /// read the TBSP program source from a file + #[argh(option, short = 'f')] + program_file: std::path::PathBuf, + + /// set the language that the file is written in + #[argh(option, short = 'l')] + language: String, + + /// input file to process + #[argh(positional)] + file: Option, +} fn main() { - let src = r#" -bar = 0 -def foo(): - baz = 5 - "# - .to_owned(); - - let program = Program::new() - .from_str( - r#" - BEGIN { - bool in_def = false; - } - pre function_definition { - in_def = true; + let cli: Cli = argh::from_env(); + + let program = std::fs::read_to_string(&cli.program_file).unwrap_or_else(|e| { + eprintln!( + "failed to read program-file from `{}`: {e}", + cli.program_file.display() + ); + std::process::exit(-1); + }); + + let language = match cli.language.as_str() { + "md" => tree_sitter_md::language(), + "typescript" => tree_sitter_typescript::language_typescript(), + "javascript" => tree_sitter_javascript::language(), + "python" => tree_sitter_python::language(), + "rust" => tree_sitter_rust::language(), + lang => { + eprintln!("unknown language `{lang}`"); + std::process::exit(-1); + } + }; + + let file = cli + .file + .map(std::fs::read_to_string) + .unwrap_or_else(try_consume_stdin) + .unwrap_or_else(|e| { + eprintln!("{e}"); + std::process::exit(-1) + }); + + trawk::evaluate(&file, &program, language).unwrap_or_else(|e| { + eprintln!("{e:?}"); + std::process::exit(-1); + }); +} + +fn try_consume_stdin() -> std::io::Result { + let mut buffer = String::new(); + let mut lock = std::io::stdin().lock(); + + while let Ok(n) = std::io::Read::read_to_string(&mut lock, &mut buffer) { + if n == 0 { + break; + } } - post function_definition { - in_def = false; + + if buffer.is_empty() { + Err(std::io::Error::other("empty stdin")) + } else { + Ok(buffer) } - pre identifier { - if (in_def) { - print(text(node)); - print(" "); - print("in def\n"); - } else { - }; - }"#, - ) - .unwrap(); - - let mut parser = tree_sitter::Parser::new(); - let _ = parser.set_language(tree_sitter_python::language()); - - let tree = parser.parse(&src, None).unwrap(); - let cursor = tree.walk(); - - let mut ctx = Context::new(tree_sitter_python::language()) - .with_input(src) - .with_cursor(cursor) - .with_program(program) - .unwrap(); - - let _ = ctx.eval(); } + +// fn main() { +// let src = r#" +// # foo1 +// +// bar +// +// ## foo1.1 +// +// bar baz +// +// # foo2 +// +// bar baz +// +// ``` +// fn main() { +// } +// ``` +// +// - foo +// - bar +// - baz +// +// "# +// .to_owned(); +// +// let program = Program::new() +// .from_str( +// r#" +// BEGIN { +// int depth = 0; +// +// print("\n"); +// print("\n"); +// } +// +// enter section { +// depth += 1; +// } +// leave section { +// depth -= 1; +// } +// +// enter atx_heading { +// print(""); +// } +// leave atx_heading { +// print("\n"); +// } +// +// enter paragraph { +// print("

"); +// } +// leave paragraph { +// print("

\n"); +// } +// +// enter list { +// print("
    "); +// } +// leave list { +// print("
\n"); +// } +// +// enter list_item { +// print("
  • "); +// } +// leave list_item { +// print("
  • \n"); +// } +// +// enter fenced_code_block { +// print("
    ");
    +//     }
    +//     leave fenced_code_block {
    +//         print("
    \n"); +// } +// +// enter inline { +// print(text(node)); +// } +// enter code_fence_content { +// print(text(node)); +// } +// +// END { +// print("\n"); +// print("\n"); +// } +// "#, +// ) +// .unwrap(); +// +// let mut parser = tree_sitter::Parser::new(); +// let _ = parser.set_language(&tree_sitter_md::language()); +// +// let tree = parser.parse(&src, None).unwrap(); +// let cursor = tree.walk(); +// +// let mut ctx = Context::new(tree_sitter_md::language()) +// .with_input(src) +// .with_cursor(cursor) +// .with_program(program) +// .unwrap(); +// +// let _ = ctx.eval(); +// } diff --git a/src/parser.rs b/src/parser.rs index 3a020dc..d705a11 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,10 +1,10 @@ use nom::{ branch::alt, - bytes::complete::tag, + bytes::complete::{is_not, tag}, character::complete::{alpha1, alphanumeric1, char, multispace0, multispace1, one_of}, combinator::{map, opt, recognize, value}, error::ParseError, - multi::{many0, many0_count, many1, separated_list0}, + multi::{many0, many0_count, many1, separated_list0, separated_list1}, sequence::{delimited, pair, preceded, terminated, tuple}, IResult, Parser, }; @@ -21,6 +21,11 @@ where delimited(multispace0, inner, multispace0) } +// TODO use this +fn _parse_comment<'a>(i: &'a str) -> IResult<&'a str, ()> { + value((), pair(tag("//"), is_not("\n\r")))(i) +} + fn parse_unit<'a>(i: &'a str) -> IResult<&'a str, ()> { let open = char('('); let close = char(')'); @@ -169,8 +174,16 @@ fn parse_mul<'a>(i: &'a str) -> IResult<&'a str, Expr> { alt((recursive, base)).parse(i) } +fn parse_field_access<'a>(i: &'a str) -> IResult<&'a str, Vec> { + let node = tag("node"); + let dot = ws(char('.')); + let fields = separated_list1(ws(char('.')), map(parse_name, str::to_owned)); + map(tuple((node, dot, fields)), |(_, _, fields)| fields)(i) +} + fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Expr> { let inner = alt(( + map(parse_field_access, Expr::FieldAccess), map(tag("node"), |_| Expr::Node), map(parse_block, Expr::Block), map(parse_if, Expr::IfExpr), @@ -337,7 +350,7 @@ fn parse_pattern<'a>(i: &str) -> IResult<&str, Pattern> { tuple((parse_modifier, multispace0, parse_ident)), |(modifier, _, kind)| Pattern::Node(NodePattern { modifier, kind }), ); - alt((begin, end, node)).parse(i) + ws(alt((begin, end, node))).parse(i) } pub fn parse_stanza<'a>(i: &str) -> IResult<&str, Stanza> { @@ -358,6 +371,25 @@ pub fn parse_file(i: &str) -> IResult<&str, Vec> { mod test { use super::*; + // test helpers + impl Expr { + pub fn int(int: i128) -> Expr { + Self::Lit(Literal::Int(int)) + } + + pub fn str(s: &str) -> Expr { + Self::Lit(Literal::Str(s.to_owned())) + } + + pub const fn false_() -> Expr { + Self::Lit(Literal::Bool(false)) + } + + pub const fn true_() -> Expr { + Self::Lit(Literal::Bool(true)) + } + } + #[test] fn test_parse_unit() { assert_eq!(parse_unit("()"), Ok(("", ()))) @@ -534,6 +566,26 @@ mod test { ); } + #[test] + fn test_parse_node() { + assert_eq!(parse_expr(r#" node "#), Ok(("", Expr::Node))); + assert_eq!( + parse_expr(r#" node.foo "#), + Ok(("", Expr::FieldAccess(vec!["foo".to_owned()]))) + ); + assert_eq!( + parse_expr( + r#" node + .foo + .bar"# + ), + Ok(( + "", + Expr::FieldAccess(vec!["foo".to_owned(), "bar".to_owned()]) + )) + ); + } + #[test] fn test_parse_if() { assert_eq!( -- cgit v1.2.3