diff options
author | Akshay <[email protected]> | 2024-07-14 13:58:11 +0100 |
---|---|---|
committer | Akshay <[email protected]> | 2024-07-14 14:00:55 +0100 |
commit | 58b651418ee59eb576595718439352f244b23ddc (patch) | |
tree | 0b0a5c095f52b4307450f7cbc570c3b7c5c49fa4 | |
parent | 9ad06d721c1e481c82b4f43df819d76e35757282 (diff) |
add readme
-rw-r--r-- | readme.txt | 148 | ||||
-rw-r--r-- | src/main.rs | 2 |
2 files changed, 149 insertions, 1 deletions
diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..c2bcdea --- /dev/null +++ b/readme.txt | |||
@@ -0,0 +1,148 @@ | |||
1 | tbsp - tree-based source-processing language | ||
2 | |||
3 | |||
4 | tbsp is an awk-like language that operates on tree-sitter | ||
5 | syntax trees. to motivate the need for such a program, we | ||
6 | could begin by writing a markdown-to-html converter using | ||
7 | tbsp and tree-sitter-md [0]. we need some markdown to begin | ||
8 | with: | ||
9 | |||
10 | |||
11 | # 1 heading | ||
12 | |||
13 | content of first paragraph | ||
14 | |||
15 | ## 1.1 heading | ||
16 | |||
17 | content of nested paragraph | ||
18 | |||
19 | |||
20 | for future reference, this markdown is parsed like so by | ||
21 | tree-sitter-md (visualization generated by tree-viz [1]): | ||
22 | |||
23 | |||
24 | document | ||
25 | | section | ||
26 | | | atx_heading | ||
27 | | | | atx_h1_marker "#" | ||
28 | | | | heading_content inline "1 heading" | ||
29 | | | paragraph | ||
30 | | | | inline "content of first paragraph" | ||
31 | | | section | ||
32 | | | | atx_heading | ||
33 | | | | | atx_h2_marker "##" | ||
34 | | | | | heading_content inline "1.1 heading" | ||
35 | | | | paragraph | ||
36 | | | | | inline "content of nested paragraph" | ||
37 | |||
38 | |||
39 | onto the converter itself. every tbsp program is written as | ||
40 | a collection of stanzas. typically, we start with a stanza | ||
41 | like so: | ||
42 | |||
43 | |||
44 | BEGIN { | ||
45 | int depth = 0; | ||
46 | |||
47 | print("<html>\n"); | ||
48 | print("<body>\n"); | ||
49 | } | ||
50 | |||
51 | |||
52 | the stanza begins with a "pattern", in this case, "BEGIN", | ||
53 | and is followed a block of code. this block specifically, is | ||
54 | executed right at the beginning, before traversing the parse | ||
55 | tree. in this stanza, we set a "depth" variable to keep | ||
56 | track of nesting of markdown headers, and begin our html | ||
57 | document by printing the "<html>" and "<body>" tags. | ||
58 | |||
59 | we can follow this stanza with an "END" stanza, that is | ||
60 | executed after the traversal: | ||
61 | |||
62 | |||
63 | END { | ||
64 | print("</body>\n"); | ||
65 | print("</html>\n"); | ||
66 | } | ||
67 | |||
68 | |||
69 | in this stanza, we close off the tags we opened at the start | ||
70 | of the document. we can move onto the interesting bits of | ||
71 | the conversion now: | ||
72 | |||
73 | |||
74 | enter section { | ||
75 | depth += 1; | ||
76 | } | ||
77 | leave section { | ||
78 | depth -= 1; | ||
79 | } | ||
80 | |||
81 | |||
82 | the above stanzas begin with "enter" and "leave" clauses, | ||
83 | followed by the name of a tree-sitter node kind: "section". | ||
84 | the "section" identifier is visible in the | ||
85 | tree-visualization above, it encompasses a markdown-section, | ||
86 | and is created for every markdown header. to understand how | ||
87 | tbsp executes above stanzas: | ||
88 | |||
89 | |||
90 | document ... depth = 0 | ||
91 | | section <-------- enter section (1) ... depth = 1 | ||
92 | | | atx_heading | ||
93 | | | | inline | ||
94 | | | paragraph | ||
95 | | | | inline | ||
96 | | | section <----- enter section (2) ... depth = 2 | ||
97 | | | | atx_heading | ||
98 | | | | | inline | ||
99 | | | | paragraph | ||
100 | | | | | inline | ||
101 | | | | <----------- leave section (2) ... depth = 1 | ||
102 | | | <-------------- leave section (1) ... depth = 0 | ||
103 | |||
104 | |||
105 | the following stanzas should be self-explanatory now: | ||
106 | |||
107 | |||
108 | enter atx_heading { | ||
109 | print("<h"); | ||
110 | print(depth); | ||
111 | print(">"); | ||
112 | } | ||
113 | leave atx_heading { | ||
114 | print("</h"); | ||
115 | print(depth); | ||
116 | print(">\n"); | ||
117 | } | ||
118 | |||
119 | enter inline { | ||
120 | print(text(node)); | ||
121 | } | ||
122 | |||
123 | |||
124 | but an explanation is included nonetheless: | ||
125 | |||
126 | |||
127 | document ... depth = 0 | ||
128 | | section <-------- enter section (1) ... depth = 1 | ||
129 | | | atx_heading <- enter atx_heading ... print "<h1>" | ||
130 | | | | inline <--- enter inline ... print .. | ||
131 | | | | <----------- leave atx_heading ... print "</h1>" | ||
132 | | | paragraph | ||
133 | | | | inline <--- enter inline ... print .. | ||
134 | | | section <----- enter section (2) ... depth = 2 | ||
135 | | | | atx_heading enter atx_heading ... print "<h2>" | ||
136 | | | | | inline <- enter inline ... print .. | ||
137 | | | | | <-------- leave atx_heading ... print "</h2>" | ||
138 | | | | paragraph | ||
139 | | | | | inline <- enter inline ... print .. | ||
140 | | | | <----------- leave section (2) ... depth = 1 | ||
141 | | | <-------------- leave section (1) ... depth = 0 | ||
142 | |||
143 | |||
144 | the examples directory contains a complete markdown-to-html | ||
145 | converter, along with a few other motivating examples. | ||
146 | |||
147 | [0]: https://github.com/tree-sitter-grammars/tree-sitter-markdown | ||
148 | [1]: https://git.peppe.rs/languages/scope-tools/tree/tree-viz | ||
diff --git a/src/main.rs b/src/main.rs index ed024fc..8f63ca2 100644 --- a/src/main.rs +++ b/src/main.rs | |||
@@ -1,7 +1,7 @@ | |||
1 | /// tree-based source processor | 1 | /// tree-based source processor |
2 | #[derive(argh::FromArgs)] | 2 | #[derive(argh::FromArgs)] |
3 | struct Cli { | 3 | struct Cli { |
4 | /// read the TBSP program source from a file | 4 | /// read the tbsp program source from a file |
5 | #[argh(option, short = 'f')] | 5 | #[argh(option, short = 'f')] |
6 | program_file: std::path::PathBuf, | 6 | program_file: std::path::PathBuf, |
7 | 7 | ||