From 58b651418ee59eb576595718439352f244b23ddc Mon Sep 17 00:00:00 2001 From: Akshay Date: Sun, 14 Jul 2024 13:58:11 +0100 Subject: add readme --- readme.txt | 148 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 2 +- 2 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 readme.txt diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..c2bcdea --- /dev/null +++ b/readme.txt @@ -0,0 +1,148 @@ +tbsp - tree-based source-processing language + + +tbsp is an awk-like language that operates on tree-sitter +syntax trees. to motivate the need for such a program, we +could begin by writing a markdown-to-html converter using +tbsp and tree-sitter-md [0]. we need some markdown to begin +with: + + + # 1 heading + + content of first paragraph + + ## 1.1 heading + + content of nested paragraph + + +for future reference, this markdown is parsed like so by +tree-sitter-md (visualization generated by tree-viz [1]): + + + document + | section + | | atx_heading + | | | atx_h1_marker "#" + | | | heading_content inline "1 heading" + | | paragraph + | | | inline "content of first paragraph" + | | section + | | | atx_heading + | | | | atx_h2_marker "##" + | | | | heading_content inline "1.1 heading" + | | | paragraph + | | | | inline "content of nested paragraph" + + +onto the converter itself. every tbsp program is written as +a collection of stanzas. typically, we start with a stanza +like so: + + + BEGIN { + int depth = 0; + + print("\n"); + print("\n"); + } + + +the stanza begins with a "pattern", in this case, "BEGIN", +and is followed a block of code. this block specifically, is +executed right at the beginning, before traversing the parse +tree. in this stanza, we set a "depth" variable to keep +track of nesting of markdown headers, and begin our html +document by printing the "" and "" tags. + +we can follow this stanza with an "END" stanza, that is +executed after the traversal: + + + END { + print("\n"); + print("\n"); + } + + +in this stanza, we close off the tags we opened at the start +of the document. we can move onto the interesting bits of +the conversion now: + + + enter section { + depth += 1; + } + leave section { + depth -= 1; + } + + +the above stanzas begin with "enter" and "leave" clauses, +followed by the name of a tree-sitter node kind: "section". +the "section" identifier is visible in the +tree-visualization above, it encompasses a markdown-section, +and is created for every markdown header. to understand how +tbsp executes above stanzas: + + + document ... depth = 0 + | section <-------- enter section (1) ... depth = 1 + | | atx_heading + | | | inline + | | paragraph + | | | inline + | | section <----- enter section (2) ... depth = 2 + | | | atx_heading + | | | | inline + | | | paragraph + | | | | inline + | | | <----------- leave section (2) ... depth = 1 + | | <-------------- leave section (1) ... depth = 0 + + +the following stanzas should be self-explanatory now: + + + enter atx_heading { + print(""); + } + leave atx_heading { + print("\n"); + } + + enter inline { + print(text(node)); + } + + +but an explanation is included nonetheless: + + + document ... depth = 0 + | section <-------- enter section (1) ... depth = 1 + | | atx_heading <- enter atx_heading ... print "

" + | | | inline <--- enter inline ... print .. + | | | <----------- leave atx_heading ... print "

" + | | paragraph + | | | inline <--- enter inline ... print .. + | | section <----- enter section (2) ... depth = 2 + | | | atx_heading enter atx_heading ... print "

" + | | | | inline <- enter inline ... print .. + | | | | <-------- leave atx_heading ... print "

" + | | | paragraph + | | | | inline <- enter inline ... print .. + | | | <----------- leave section (2) ... depth = 1 + | | <-------------- leave section (1) ... depth = 0 + + +the examples directory contains a complete markdown-to-html +converter, along with a few other motivating examples. + +[0]: https://github.com/tree-sitter-grammars/tree-sitter-markdown +[1]: https://git.peppe.rs/languages/scope-tools/tree/tree-viz diff --git a/src/main.rs b/src/main.rs index ed024fc..8f63ca2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,7 @@ /// tree-based source processor #[derive(argh::FromArgs)] struct Cli { - /// read the TBSP program source from a file + /// read the tbsp program source from a file #[argh(option, short = 'f')] program_file: std::path::PathBuf, -- cgit v1.2.3