aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--Cargo.toml6
-rw-r--r--minirust.rs152
-rw-r--r--rfc.md133
-rw-r--r--src/lib.rs7
5 files changed, 112 insertions, 189 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..6aa106405
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
1/target/
2**/*.rs.bk
3Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 000000000..c94b99fad
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,6 @@
1[package]
2name = "libsyntax2"
3version = "0.1.0"
4authors = ["Aleksey Kladov <[email protected]>"]
5
6[dependencies]
diff --git a/minirust.rs b/minirust.rs
deleted file mode 100644
index 9baa96039..000000000
--- a/minirust.rs
+++ /dev/null
@@ -1,152 +0,0 @@
1#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
2pub struct NodeKind(u16);
3
4pub struct File {
5 text: String,
6 nodes: Vec<NodeData>,
7}
8
9struct NodeData {
10 kind: NodeKind,
11 range: (u32, u32),
12 parent: Option<u32>,
13 first_child: Option<u32>,
14 next_sibling: Option<u32>,
15}
16
17#[derive(Clone, Copy)]
18pub struct Node<'f> {
19 file: &'f File,
20 idx: u32,
21}
22
23pub struct Children<'f> {
24 next: Option<Node<'f>>,
25}
26
27impl File {
28 pub fn root<'f>(&'f self) -> Node<'f> {
29 assert!(!self.nodes.is_empty());
30 Node { file: self, idx: 0 }
31 }
32}
33
34impl<'f> Node<'f> {
35 pub fn kind(&self) -> NodeKind {
36 self.data().kind
37 }
38
39 pub fn text(&self) -> &'f str {
40 let (start, end) = self.data().range;
41 &self.file.text[start as usize..end as usize]
42 }
43
44 pub fn parent(&self) -> Option<Node<'f>> {
45 self.as_node(self.data().parent)
46 }
47
48 pub fn children(&self) -> Children<'f> {
49 Children { next: self.as_node(self.data().first_child) }
50 }
51
52 fn data(&self) -> &'f NodeData {
53 &self.file.nodes[self.idx as usize]
54 }
55
56 fn as_node(&self, idx: Option<u32>) -> Option<Node<'f>> {
57 idx.map(|idx| Node { file: self.file, idx })
58 }
59}
60
61impl<'f> Iterator for Children<'f> {
62 type Item = Node<'f>;
63
64 fn next(&mut self) -> Option<Node<'f>> {
65 let next = self.next;
66 self.next = next.and_then(|node| node.as_node(node.data().next_sibling));
67 next
68 }
69}
70
71pub const ERROR: NodeKind = NodeKind(0);
72pub const WHITESPACE: NodeKind = NodeKind(1);
73pub const STRUCT_KW: NodeKind = NodeKind(2);
74pub const IDENT: NodeKind = NodeKind(3);
75pub const L_CURLY: NodeKind = NodeKind(4);
76pub const R_CURLY: NodeKind = NodeKind(5);
77pub const COLON: NodeKind = NodeKind(6);
78pub const COMMA: NodeKind = NodeKind(7);
79pub const AMP: NodeKind = NodeKind(8);
80pub const LINE_COMMENT: NodeKind = NodeKind(9);
81pub const FILE: NodeKind = NodeKind(10);
82pub const STRUCT_DEF: NodeKind = NodeKind(11);
83pub const FIELD_DEF: NodeKind = NodeKind(12);
84pub const TYPE_REF: NodeKind = NodeKind(13);
85
86
87pub trait AstNode<'f>: Copy + 'f {
88 fn new(node: Node<'f>) -> Option<Self>;
89 fn node(&self) -> Node<'f>;
90}
91
92pub fn child_of_kind<'f>(node: Node<'f>, kind: NodeKind) -> Option<Node<'f>> {
93 node.children().find(|child| child.kind() == kind)
94}
95
96pub fn ast_children<'f, A: AstNode<'f>>(node: Node<'f>) -> Box<Iterator<Item=A> + 'f> {
97 Box::new(node.children().filter_map(A::new))
98}
99
100#[derive(Clone, Copy)]
101pub struct StructDef<'f>(Node<'f>);
102
103#[derive(Clone, Copy)]
104pub struct FieldDef<'f>(Node<'f>);
105
106#[derive(Clone, Copy)]
107pub struct TypeRef<'f>(Node<'f>);
108
109pub trait NameOwner<'f>: AstNode<'f> {
110 fn name_ident(&self) -> Node<'f> {
111 child_of_kind(self.node(), IDENT).unwrap()
112 }
113
114 fn name(&self) -> &'f str { self.name_ident().text() }
115}
116
117
118impl<'f> AstNode<'f> for StructDef<'f> {
119 fn new(node: Node<'f>) -> Option<Self> {
120 if node.kind() == STRUCT_DEF { Some(StructDef(node)) } else { None }
121 }
122 fn node(&self) -> Node<'f> { self.0 }
123}
124
125impl<'f> AstNode<'f> for FieldDef<'f> {
126 fn new(node: Node<'f>) -> Option<Self> {
127 if node.kind() == FIELD_DEF { Some(FieldDef(node)) } else { None }
128 }
129 fn node(&self) -> Node<'f> { self.0 }
130}
131
132impl<'f> AstNode<'f> for TypeRef<'f> {
133 fn new(node: Node<'f>) -> Option<Self> {
134 if node.kind() == TYPE_REF { Some(TypeRef(node)) } else { None }
135 }
136 fn node(&self) -> Node<'f> { self.0 }
137}
138
139impl<'f> NameOwner<'f> for StructDef<'f> {}
140impl<'f> NameOwner<'f> for FieldDef<'f> {}
141
142impl<'f> StructDef<'f> {
143 pub fn fields(&self) -> Box<Iterator<Item=FieldDef<'f>> + 'f> {
144 ast_children(self.node())
145 }
146}
147
148impl<'f> FieldDef<'f> {
149 pub fn type_ref(&self) -> Option<TypeRef<'f>> {
150 ast_children(self.node()).next()
151 }
152} \ No newline at end of file
diff --git a/rfc.md b/rfc.md
index c6cb4320f..2bd9f18f1 100644
--- a/rfc.md
+++ b/rfc.md
@@ -30,12 +30,66 @@ other tools, and eventual libsyntax removal.
30 30
31Note that this RFC does not propose to stabilize any API for working 31Note that this RFC does not propose to stabilize any API for working
32with rust syntax: the semver version of the hypothetical library would 32with rust syntax: the semver version of the hypothetical library would
33be `0.1.0`. 33be `0.1.0`. It is intended to be used by tools, which are currently
34closely related to the compiler: `rustc`, `rustfmt`, `clippy`, `rls`
35and hypothetical `rustfix`. While it would be possible to create
36third-party tools on top of the new libsyntax, the burden of adopting
37to breaking changes would be on authors of such tools.
34 38
35 39
36# Motivation 40# Motivation
37[motivation]: #motivation 41[motivation]: #motivation
38 42
43There are two main drawbacks with the current version of libsyntax:
44
45* It is tightly integrated with the compiler and hard to use
46 independently
47
48* The AST representation is not well-suited for use inside IDEs
49
50
51## IDE support
52
53There are several differences in how IDEs and compilers typically
54treat source code.
55
56In the compiler, it is convenient to transform the source
57code into Abstract Syntax Tree form, which is independent of the
58surface syntax. For example, it's convenient to discard comments,
59whitespaces and desugar some syntactic constructs in terms of the
60simpler ones.
61
62In contrast, IDEs work much closer to the source code, so it is
63crucial to preserve full information about the original text. For
64example, IDE may adjust indentation after typing a `}` which closes a
65block, and to do this correctly, IDE must be aware of syntax (that is,
66that `}` indeed closes some block, and is not a syntax error) and of
67all whitespaces and comments. So, IDE suitable AST should explicitly
68account for syntactic elements, not considered important by the
69compiler.
70
71Another difference is that IDEs typically work with incomplete and
72syntactically invalid code. This boils down to two parser properties.
73First, the parser must produce syntax tree even if some required input
74is missing. For example, for input `fn foo` the function node should
75be present in the parse, despite the fact that there is no parameters
76or body. Second, the parser must be able to skip over parts of input
77it can't recognize and aggressively recover from errors. That is, the
78syntax tree data structure should be able to handle both missing and
79extra nodes.
80
81IDEs also need the ability to incrementally reparse and relex source
82code after the user types. A smart IDE would use syntax tree structure
83to handle editing commands (for example, to add/remove trailing commas
84after join/split lines actions), so parsing time can be very
85noticeable.
86
87
88Currently rustc uses the classical AST approach, and preserves some of
89the source code information in the form of spans in the AST. It is not
90clear if this structure can full fill all IDE requirements.
91
92
39## Reusability 93## Reusability
40 94
41In theory, the parser can be a pure function, which takes a `&str` as 95In theory, the parser can be a pure function, which takes a `&str` as
@@ -67,43 +121,42 @@ files. As a data point, it turned out to be easier to move `rustfmt`
67into the main `rustc` repository than to move libsyntax outside! 121into the main `rustc` repository than to move libsyntax outside!
68 122
69 123
70## IDE support 124# Guide-level explanation
125[guide-level-explanation]: #guide-level-explanation
71 126
72There is one big difference in how IDEs and compilers typically treat 127Not applicable.
73source code.
74 128
75In the compiler, it is convenient to transform the source
76code into Abstract Syntax Tree form, which is independent of the
77surface syntax. For example, it's convenient to discard comments,
78whitespaces and desugar some syntactic constructs in terms of the
79simpler ones.
80 129
81In contrast, for IDEs it is crucial to have a lossless view of the 130# Reference-level explanation
82source code because, for example, it's important to preserve comments 131[reference-level-explanation]: #reference-level-explanation
83during refactorings. Ideally, IDEs should be able to incrementally
84relex and reparse the file as the user types, because syntax tree is
85necessary to correctly handle certain code-editing actions like
86autoindentation or joining lines. IDE also must be able to produce
87partial parse trees when some input is missing or invalid.
88 132
89Currently rustc uses the AST approach, and preserves some of the 133It is not clear if a single parser can accommodate the needs of the
90source code information in the form of spans in the AST. 134compiler and the IDE, but there is hope that it is possible. The RFC
135proposes to develop libsynax2.0 as an experimental crates.io crate. If
136the experiment turns out to be a success, the second RFC will propose
137to integrate it with all existing tools and `rustc`.
91 138
139Next, a syntax tree data structure is proposed for libsyntax2.0. It
140seems to have the following important properties:
92 141
93# Guide-level explanation 142* It is lossless and faithfully represents the original source code,
94[guide-level-explanation]: #guide-level-explanation 143 including explicit nodes for comments and whitespace.
95 144
96Not applicable. 145* It is flexible and allows to encode arbitrary node structure,
146 even for invalid syntax.
97 147
148* It is minimal: it stores small amount of data and has no
149 dependencies. For instance, it does not need compiler's string
150 interner or literal data representation.
98 151
99# Reference-level explanation 152* While the tree itself is minimal, it is extensible in a sense that
100[reference-level-explanation]: #reference-level-explanation 153 it possible to associate arbitrary data with certain nodes in a
154 type-safe way.
101 155
102This section proposes a new syntax tree data structure, which should
103be suitable for both compiler and IDE. It is heavily inspired by [PSI]
104data structure which used in [IntelliJ] based IDEs and in the [Kotlin]
105compiler.
106 156
157It is not clear if this representation is the best one. It is heavily
158inspired by [PSI] data structure which used in [IntelliJ] based IDEs
159and in the [Kotlin] compiler.
107 160
108[PSI]: http://www.jetbrains.org/intellij/sdk/docs/reference_guide/custom_language_support/implementing_parser_and_psi.html 161[PSI]: http://www.jetbrains.org/intellij/sdk/docs/reference_guide/custom_language_support/implementing_parser_and_psi.html
109[IntelliJ]: https://github.com/JetBrains/intellij-community/ 162[IntelliJ]: https://github.com/JetBrains/intellij-community/
@@ -351,6 +404,11 @@ impl<'f> AstNode<'f> for TypeRef<'f> {
351} 404}
352``` 405```
353 406
407Note that although AST wrappers provide a type-safe access to the
408tree, they are still represented as indexes, so clients of the syntax
409tree can easily associated additional data with AST nodes by storing
410it in a side-table.
411
354 412
355## Missing Source Code 413## Missing Source Code
356 414
@@ -374,7 +432,8 @@ This RFC proposes huge changes to the internals of the compiler, so
374it's important to proceed carefully and incrementally. The following 432it's important to proceed carefully and incrementally. The following
375plan is suggested: 433plan is suggested:
376 434
377* RFC discussion about the theoretical feasibility of the proposal. 435* RFC discussion about the theoretical feasibility of the proposal,
436 and the best representation representation for the syntax tree.
378 437
379* Implementation of the proposal as a completely separate crates.io 438* Implementation of the proposal as a completely separate crates.io
380 crate, by refactoring existing libsyntax source code to produce a 439 crate, by refactoring existing libsyntax source code to produce a
@@ -393,11 +452,11 @@ plan is suggested:
393- No harm will be done as long as the new libsyntax exists as an 452- No harm will be done as long as the new libsyntax exists as an
394 experiemt on crates.io. However, actually using it in the compiler 453 experiemt on crates.io. However, actually using it in the compiler
395 and other tools would require massive refactorings. 454 and other tools would require massive refactorings.
396 455
397- Proposed syntax tree requires to keep the original source code 456- It's difficult to know upfront if the proposed syntax tree would
398 available, which might increase memory usage of the 457 actually work well in both the compiler and IDE. It may be possible
399 compiler. However, it should be possible to throw the original tree 458 that some drawbacks will be discovered during implementation.
400 and source code away after conversion to HIR. 459
401 460
402# Rationale and alternatives 461# Rationale and alternatives
403[alternatives]: #alternatives 462[alternatives]: #alternatives
@@ -422,14 +481,14 @@ plan is suggested:
422 the source code? It seems like the answer is yes, because the 481 the source code? It seems like the answer is yes, because the
423 language and especially macros were cleverly designed with this 482 language and especially macros were cleverly designed with this
424 use-case in mind. 483 use-case in mind.
425 484
426 485
427- Is it possible to implement macro expansion using the proposed 486- Is it possible to implement macro expansion using the proposed
428 framework? This is the main question of this RFC. The proposed 487 framework? This is the main question of this RFC. The proposed
429 solution of synthesizing source code on the fly seems workable: it's 488 solution of synthesizing source code on the fly seems workable: it's
430 not that different from the current implementation, which 489 not that different from the current implementation, which
431 synthesizes token trees. 490 synthesizes token trees.
432 491
433 492
434- How to actually phase out current libsyntax, if libsyntax2.0 turns 493- How to actually phase out current libsyntax, if libsyntax2.0 turns
435 out to be a success? 494 out to be a success?
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 000000000..31e1bb209
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,7 @@
1#[cfg(test)]
2mod tests {
3 #[test]
4 fn it_works() {
5 assert_eq!(2 + 2, 4);
6 }
7}