add readme

author: Akshay <[email protected]> 2024-07-14 13:58:11 +0100
committer: Akshay <[email protected]> 2024-07-14 14:00:55 +0100
commit: 58b651418ee59eb576595718439352f244b23ddc (patch)
tree: 0b0a5c095f52b4307450f7cbc570c3b7c5c49fa4
parent: 9ad06d721c1e481c82b4f43df819d76e35757282 (diff)
2 files changed, 149 insertions, 1 deletions
diff --git a/readme.txt b/readme.txt
new file mode 100644
index 0000000..c2bcdea
--- /dev/null
+++ b/readme.txt
@@ -0,0 +1,148 @@
+tbsp - tree-based source-processing language
+tbsp is an awk-like language that operates on tree-sitter
+syntax trees. to motivate the need for such a program, we
+could begin by writing a markdown-to-html converter using
+tbsp and tree-sitter-md [0]. we need some markdown to begin
+with:
+    # 1 heading
+    content of first paragraph
+    ## 1.1 heading
+    content of nested paragraph
+for future reference, this markdown is parsed like so by
+tree-sitter-md (visualization generated by tree-viz [1]):
+    document
+    |  section
+    |  |  atx_heading
+    |  |  |  atx_h1_marker "#"
+    |  |  |  heading_content inline "1 heading"
+    |  |  paragraph
+    |  |  |  inline "content of first paragraph"
+    |  |  section
+    |  |  |  atx_heading
+    |  |  |  |  atx_h2_marker "##"
+    |  |  |  |  heading_content inline "1.1 heading"
+    |  |  |  paragraph
+    |  |  |  |  inline "content of nested paragraph"
+onto the converter itself. every tbsp program is written as
+a collection of stanzas. typically, we start with a stanza
+like so:
+    BEGIN {
+        int depth = 0;
+        print("<html>\n");
+        print("<body>\n");
+    }
+the stanza begins with a "pattern", in this case, "BEGIN",
+and is followed a block of code. this block specifically, is
+executed right at the beginning, before traversing the parse
+tree. in this stanza, we set a "depth" variable to keep
+track of nesting of markdown headers, and begin our html
+document by printing the "<html>" and "<body>" tags.
+we can follow this stanza with an "END" stanza, that is
+executed after the traversal:
+    END {
+        print("</body>\n");
+        print("</html>\n");
+    }
+in this stanza, we close off the tags we opened at the start
+of the document. we can move onto the interesting bits of
+the conversion now:
+    enter section {
+        depth += 1;
+    }
+    leave section {
+        depth -= 1;
+    }
+the above stanzas begin with "enter" and "leave" clauses,
+followed by the name of a tree-sitter node kind: "section".
+the "section" identifier is visible in the
+tree-visualization above, it encompasses a markdown-section,
+and is created for every markdown header. to understand how
+tbsp executes above stanzas:
+    document                                 ...  depth = 0 
+    |  section <-------- enter section (1)   ...  depth = 1 
+    |  |  atx_heading
+    |  |  |  inline
+    |  |  paragraph
+    |  |  |  inline
+    |  |  section <----- enter section (2)   ...  depth = 2 
+    |  |  |  atx_heading
+    |  |  |  | inline
+    |  |  |  paragraph
+    |  |  |  | inline
+    |  |  | <----------- leave section (2)   ...  depth = 1 
+    |  | <-------------- leave section (1)   ...  depth = 0 
+the following stanzas should be self-explanatory now:
+    enter atx_heading {
+        print("<h");
+        print(depth);
+        print(">");
+    }
+    leave atx_heading {
+        print("</h");
+        print(depth);
+        print(">\n");
+    }
+    enter inline {
+        print(text(node));
+    }
+but an explanation is included nonetheless:
+    document                                 ...  depth = 0 
+    |  section <-------- enter section (1)   ...  depth = 1 
+    |  |  atx_heading <- enter atx_heading   ...  print "<h1>"
+    |  |  |  inline <--- enter inline        ...  print ..
+    |  |  | <----------- leave atx_heading   ...  print "</h1>"
+    |  |  paragraph
+    |  |  |  inline <--- enter inline        ...  print ..
+    |  |  section <----- enter section (2)   ...  depth = 2 
+    |  |  |  atx_heading enter atx_heading   ...  print "<h2>"
+    |  |  |  | inline <- enter inline        ...  print ..
+    |  |  |  | <-------- leave atx_heading   ...  print "</h2>"
+    |  |  |  paragraph
+    |  |  |  | inline <- enter inline        ...  print ..
+    |  |  | <----------- leave section (2)   ...  depth = 1 
+    |  | <-------------- leave section (1)   ...  depth = 0 
+the examples directory contains a complete markdown-to-html
+converter, along with a few other motivating examples.
+[0]: https://github.com/tree-sitter-grammars/tree-sitter-markdown
+[1]: https://git.peppe.rs/languages/scope-tools/tree/tree-viz
diff --git a/src/main.rs b/src/main.rs
index ed024fc..8f63ca2 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,7 +1,7 @@
 /// tree-based source processor
 #[derive(argh::FromArgs)]
 struct Cli {
-    /// read the TBSP program source from a file
+    /// read the tbsp program source from a file
    #[argh(option, short = 'f')]
    program_file: std::path::PathBuf,
author	Akshay <[email protected]>	2024-07-14 13:58:11 +0100
committer	Akshay <[email protected]>	2024-07-14 14:00:55 +0100
commit	58b651418ee59eb576595718439352f244b23ddc (patch)
tree	0b0a5c095f52b4307450f7cbc570c3b7c5c49fa4
parent	9ad06d721c1e481c82b4f43df819d76e35757282 (diff)

diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..c2bcdea --- /dev/null +++ b/readme.txt
@@ -0,0 +1,148 @@
		1	tbsp - tree-based source-processing language
		2
		3
		4	tbsp is an awk-like language that operates on tree-sitter
		5	syntax trees. to motivate the need for such a program, we
		6	could begin by writing a markdown-to-html converter using
		7	tbsp and tree-sitter-md [0]. we need some markdown to begin
		8	with:
		9
		10
		11	# 1 heading
		12
		13	content of first paragraph
		14
		15	## 1.1 heading
		16
		17	content of nested paragraph
		18
		19
		20	for future reference, this markdown is parsed like so by
		21	tree-sitter-md (visualization generated by tree-viz [1]):
		22
		23
		24	document
		25	\| section
		26	\| \| atx_heading
		27	\| \| \| atx_h1_marker "#"
		28	\| \| \| heading_content inline "1 heading"
		29	\| \| paragraph
		30	\| \| \| inline "content of first paragraph"
		31	\| \| section
		32	\| \| \| atx_heading
		33	\| \| \| \| atx_h2_marker "##"
		34	\| \| \| \| heading_content inline "1.1 heading"
		35	\| \| \| paragraph
		36	\| \| \| \| inline "content of nested paragraph"
		37
		38
		39	onto the converter itself. every tbsp program is written as
		40	a collection of stanzas. typically, we start with a stanza
		41	like so:
		42
		43
		44	BEGIN {
		45	int depth = 0;
		46
		47	print("<html>\n");
		48	print("<body>\n");
		49	}
		50
		51
		52	the stanza begins with a "pattern", in this case, "BEGIN",
		53	and is followed a block of code. this block specifically, is
		54	executed right at the beginning, before traversing the parse
		55	tree. in this stanza, we set a "depth" variable to keep
		56	track of nesting of markdown headers, and begin our html
		57	document by printing the "<html>" and "<body>" tags.
		58
		59	we can follow this stanza with an "END" stanza, that is
		60	executed after the traversal:
		61
		62
		63	END {
		64	print("</body>\n");
		65	print("</html>\n");
		66	}
		67
		68
		69	in this stanza, we close off the tags we opened at the start
		70	of the document. we can move onto the interesting bits of
		71	the conversion now:
		72
		73
		74	enter section {
		75	depth += 1;
		76	}
		77	leave section {
		78	depth -= 1;
		79	}
		80
		81
		82	the above stanzas begin with "enter" and "leave" clauses,
		83	followed by the name of a tree-sitter node kind: "section".
		84	the "section" identifier is visible in the
		85	tree-visualization above, it encompasses a markdown-section,
		86	and is created for every markdown header. to understand how
		87	tbsp executes above stanzas:
		88
		89
		90	document ... depth = 0
		91	\| section <-------- enter section (1) ... depth = 1
		92	\| \| atx_heading
		93	\| \| \| inline
		94	\| \| paragraph
		95	\| \| \| inline
		96	\| \| section <----- enter section (2) ... depth = 2
		97	\| \| \| atx_heading
		98	\| \| \| \| inline
		99	\| \| \| paragraph
		100	\| \| \| \| inline
		101	\| \| \| <----------- leave section (2) ... depth = 1
		102	\| \| <-------------- leave section (1) ... depth = 0
		103
		104
		105	the following stanzas should be self-explanatory now:
		106
		107
		108	enter atx_heading {
		109	print("<h");
		110	print(depth);
		111	print(">");
		112	}
		113	leave atx_heading {
		114	print("</h");
		115	print(depth);
		116	print(">\n");
		117	}
		118
		119	enter inline {
		120	print(text(node));
		121	}
		122
		123
		124	but an explanation is included nonetheless:
		125
		126
		127	document ... depth = 0
		128	\| section <-------- enter section (1) ... depth = 1
		129	\| \| atx_heading <- enter atx_heading ... print "<h1>"
		130	\| \| \| inline <--- enter inline ... print ..
		131	\| \| \| <----------- leave atx_heading ... print "</h1>"
		132	\| \| paragraph
		133	\| \| \| inline <--- enter inline ... print ..
		134	\| \| section <----- enter section (2) ... depth = 2
		135	\| \| \| atx_heading enter atx_heading ... print "<h2>"
		136	\| \| \| \| inline <- enter inline ... print ..
		137	\| \| \| \| <-------- leave atx_heading ... print "</h2>"
		138	\| \| \| paragraph
		139	\| \| \| \| inline <- enter inline ... print ..
		140	\| \| \| <----------- leave section (2) ... depth = 1
		141	\| \| <-------------- leave section (1) ... depth = 0
		142
		143
		144	the examples directory contains a complete markdown-to-html
		145	converter, along with a few other motivating examples.
		146
		147	[0]: https://github.com/tree-sitter-grammars/tree-sitter-markdown
		148	[1]: https://git.peppe.rs/languages/scope-tools/tree/tree-viz


diff --git a/src/main.rs b/src/main.rs index ed024fc..8f63ca2 100644 --- a/src/main.rs +++ b/src/main.rs
@@ -1,7 +1,7 @@
1	/// tree-based source processor	1	/// tree-based source processor
2	#[derive(argh::FromArgs)]	2	#[derive(argh::FromArgs)]
3	struct Cli {	3	struct Cli {
4	/// read the TBSP program source from a file	4	/// read the tbsp program source from a file
5	#[argh(option, short = 'f')]	5	#[argh(option, short = 'f')]
6	program_file: std::path::PathBuf,	6	program_file: std::path::PathBuf,
7		7