diff options
-rw-r--r-- | rfc.md | 217 |
1 files changed, 209 insertions, 8 deletions
@@ -1,4 +1,4 @@ | |||
1 | - Feature Name: libsyntax2 | 1 | - Feature Name: libsyntax2.0 |
2 | - Start Date: 2017-12-30 | 2 | - Start Date: 2017-12-30 |
3 | - RFC PR: (leave this empty) | 3 | - RFC PR: (leave this empty) |
4 | - Rust Issue: (leave this empty) | 4 | - Rust Issue: (leave this empty) |
@@ -111,6 +111,8 @@ compiler. | |||
111 | [Kotlin]: https://kotlinlang.org/ | 111 | [Kotlin]: https://kotlinlang.org/ |
112 | 112 | ||
113 | 113 | ||
114 | ## Untyped Tree | ||
115 | |||
114 | The main idea is to store the minimal amount of information in the | 116 | The main idea is to store the minimal amount of information in the |
115 | tree itself, and instead lean heavily on the source code string for | 117 | tree itself, and instead lean heavily on the source code string for |
116 | the actual data about identifier names, constant values etc. | 118 | the actual data about identifier names, constant values etc. |
@@ -123,6 +125,90 @@ syntactic categories | |||
123 | 125 | ||
124 | 126 | ||
125 | ```rust | 127 | ```rust |
128 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] | ||
129 | pub struct NodeKind(u16); | ||
130 | |||
131 | pub struct File { | ||
132 | text: String, | ||
133 | nodes: Vec<NodeData>, | ||
134 | } | ||
135 | |||
136 | struct NodeData { | ||
137 | kind: NodeKind, | ||
138 | range: (u32, u32), | ||
139 | parent: Option<u32>, | ||
140 | first_child: Option<u32>, | ||
141 | next_sibling: Option<u32>, | ||
142 | } | ||
143 | |||
144 | #[derive(Clone, Copy)] | ||
145 | pub struct Node<'f> { | ||
146 | file: &'f File, | ||
147 | idx: u32, | ||
148 | } | ||
149 | |||
150 | pub struct Children<'f> { | ||
151 | next: Option<Node<'f>>, | ||
152 | } | ||
153 | |||
154 | impl File { | ||
155 | pub fn root<'f>(&'f self) -> Node<'f> { | ||
156 | assert!(!self.nodes.is_empty()); | ||
157 | Node { file: self, idx: 0 } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | impl<'f> Node<'f> { | ||
162 | pub fn kind(&self) -> NodeKind { | ||
163 | self.data().kind | ||
164 | } | ||
165 | |||
166 | pub fn text(&self) -> &'f str { | ||
167 | let (start, end) = self.data().range; | ||
168 | &self.file.text[start as usize..end as usize] | ||
169 | } | ||
170 | |||
171 | pub fn parent(&self) -> Option<Node<'f>> { | ||
172 | self.as_node(self.data().parent) | ||
173 | } | ||
174 | |||
175 | pub fn children(&self) -> Children<'f> { | ||
176 | Children { next: self.as_node(self.data().first_child) } | ||
177 | } | ||
178 | |||
179 | fn data(&self) -> &'f NodeData { | ||
180 | &self.file.nodes[self.idx as usize] | ||
181 | } | ||
182 | |||
183 | fn as_node(&self, idx: Option<u32>) -> Option<Node<'f>> { | ||
184 | idx.map(|idx| Node { file: self.file, idx }) | ||
185 | } | ||
186 | } | ||
187 | |||
188 | impl<'f> Iterator for Children<'f> { | ||
189 | type Item = Node<'f>; | ||
190 | |||
191 | fn next(&mut self) -> Option<Node<'f>> { | ||
192 | let next = self.next; | ||
193 | self.next = next.and_then(|node| node.as_node(node.data().next_sibling)); | ||
194 | next | ||
195 | } | ||
196 | } | ||
197 | |||
198 | pub const ERROR: NodeKind = NodeKind(0); | ||
199 | pub const WHITESPACE: NodeKind = NodeKind(1); | ||
200 | pub const STRUCT_KW: NodeKind = NodeKind(2); | ||
201 | pub const IDENT: NodeKind = NodeKind(3); | ||
202 | pub const L_CURLY: NodeKind = NodeKind(4); | ||
203 | pub const R_CURLY: NodeKind = NodeKind(5); | ||
204 | pub const COLON: NodeKind = NodeKind(6); | ||
205 | pub const COMMA: NodeKind = NodeKind(7); | ||
206 | pub const AMP: NodeKind = NodeKind(8); | ||
207 | pub const LINE_COMMENT: NodeKind = NodeKind(9); | ||
208 | pub const FILE: NodeKind = NodeKind(10); | ||
209 | pub const STRUCT_DEF: NodeKind = NodeKind(11); | ||
210 | pub const FIELD_DEF: NodeKind = NodeKind(12); | ||
211 | pub const TYPE_REF: NodeKind = NodeKind(13); | ||
126 | ``` | 212 | ``` |
127 | 213 | ||
128 | Here is a rust snippet and the corresponding parse tree: | 214 | Here is a rust snippet and the corresponding parse tree: |
@@ -182,22 +268,137 @@ Note several features of the tree: | |||
182 | field. | 268 | field. |
183 | 269 | ||
184 | 270 | ||
271 | ## Typed Tree | ||
272 | |||
273 | It's hard to work with this raw parse tree, because it is untyped: | ||
274 | node containing a struct definition has the same API as the node for | ||
275 | the struct field. But it's possible to add a strongly typed layer on | ||
276 | top of this raw tree, and get a zero-cost typed AST. Here is an | ||
277 | example which adds type-safe wrappers for structs and fields: | ||
278 | |||
279 | ```rust | ||
280 | pub trait AstNode<'f>: Copy + 'f { | ||
281 | fn new(node: Node<'f>) -> Option<Self>; | ||
282 | fn node(&self) -> Node<'f>; | ||
283 | } | ||
284 | |||
285 | pub fn child_of_kind<'f>(node: Node<'f>, kind: NodeKind) -> Option<Node<'f>> { | ||
286 | node.children().find(|child| child.kind() == kind) | ||
287 | } | ||
288 | |||
289 | pub fn ast_children<'f, A: AstNode<'f>>(node: Node<'f>) -> Box<Iterator<Item=A> + 'f> { | ||
290 | Box::new(node.children().filter_map(A::new)) | ||
291 | } | ||
292 | |||
293 | #[derive(Clone, Copy)] | ||
294 | pub struct StructDef<'f>(Node<'f>); | ||
295 | |||
296 | #[derive(Clone, Copy)] | ||
297 | pub struct FieldDef<'f>(Node<'f>); | ||
298 | |||
299 | #[derive(Clone, Copy)] | ||
300 | pub struct TypeRef<'f>(Node<'f>); | ||
301 | |||
302 | pub trait NameOwner<'f>: AstNode<'f> { | ||
303 | fn name_ident(&self) -> Node<'f> { | ||
304 | child_of_kind(self.node(), IDENT).unwrap() | ||
305 | } | ||
306 | |||
307 | fn name(&self) -> &'f str { self.name_ident().text() } | ||
308 | } | ||
309 | |||
310 | |||
311 | impl<'f> AstNode<'f> for StructDef<'f> { | ||
312 | fn new(node: Node<'f>) -> Option<Self> { | ||
313 | if node.kind() == STRUCT_DEF { Some(StructDef(node)) } else { None } | ||
314 | } | ||
315 | fn node(&self) -> Node<'f> { self.0 } | ||
316 | } | ||
317 | |||
318 | impl<'f> AstNode<'f> for FieldDef<'f> { | ||
319 | fn new(node: Node<'f>) -> Option<Self> { | ||
320 | if node.kind() == FIELD_DEF { Some(FieldDef(node)) } else { None } | ||
321 | } | ||
322 | fn node(&self) -> Node<'f> { self.0 } | ||
323 | } | ||
324 | |||
325 | impl<'f> AstNode<'f> for TypeRef<'f> { | ||
326 | fn new(node: Node<'f>) -> Option<Self> { | ||
327 | if node.kind() == TYPE_REF { Some(TypeRef(node)) } else { None } | ||
328 | } | ||
329 | fn node(&self) -> Node<'f> { self.0 } | ||
330 | } | ||
331 | |||
332 | impl<'f> NameOwner<'f> for StructDef<'f> {} | ||
333 | impl<'f> NameOwner<'f> for FieldDef<'f> {} | ||
334 | |||
335 | impl<'f> StructDef<'f> { | ||
336 | pub fn fields(&self) -> Box<Iterator<Item=FieldDef<'f>> + 'f> { | ||
337 | ast_children(self.node()) | ||
338 | } | ||
339 | } | ||
340 | |||
341 | impl<'f> FieldDef<'f> { | ||
342 | pub fn type_ref(&self) -> Option<TypeRef<'f>> { | ||
343 | ast_children(self.node()).next() | ||
344 | } | ||
345 | } | ||
346 | ``` | ||
347 | |||
348 | |||
349 | ## Missing Source Code | ||
350 | |||
351 | The crucial feature of this syntax tree is that it is just a view into | ||
352 | the original source code. And this poses a problem for the Rust | ||
353 | language, because not all compiled Rust code is represented in the | ||
354 | form of source code! Specifically, Rust has a powerful macro system, | ||
355 | which effectively allows to create and parse additional source code at | ||
356 | compile time. It is not entirely clear that the proposed parsing | ||
357 | framework is able to handle this use case, and it's the main purpose | ||
358 | of this RFC to figure it out. The current idea for handling macros is | ||
359 | to make each macro expansion produce a triple of (expansion text, | ||
360 | syntax tree, hygiene information), where hygiene information is a side | ||
361 | table, which colors different ranges of the expansion text according | ||
362 | to the original syntactic context. | ||
363 | |||
364 | |||
365 | ## Implementation plan | ||
366 | |||
367 | This RFC proposes huge changes to the internals of the compiler, so | ||
368 | it's important to proceed carefully and incrementally. The following | ||
369 | plan is suggested: | ||
370 | |||
371 | * RFC discussion about the theoretical feasibility of the proposal. | ||
372 | |||
373 | * Implementation of the proposal as a completely separate crates.io | ||
374 | crate. | ||
375 | |||
376 | * A prototype implementation of the macro expansion on top of the new sytnax tree. | ||
377 | |||
378 | * Additional round of discussion/RFC about merging with the mainline | ||
379 | compiler. | ||
380 | |||
185 | 381 | ||
186 | # Drawbacks | 382 | # Drawbacks |
187 | [drawbacks]: #drawbacks | 383 | [drawbacks]: #drawbacks |
188 | 384 | ||
189 | Why should we *not* do this? | 385 | - No harm will be done as long as the new libsyntax exists as an |
386 | experiemt on crates.io. However, actually using it in the compiler | ||
387 | and other tools would require massive refactorings. | ||
190 | 388 | ||
191 | # Rationale and alternatives | 389 | # Rationale and alternatives |
192 | [alternatives]: #alternatives | 390 | [alternatives]: #alternatives |
193 | 391 | ||
194 | - Why is this design the best in the space of possible designs? | 392 | - Incrementally add more information about source code to the current AST. |
195 | - What other designs have been considered and what is the rationale for not choosing them? | 393 | - Move the current libsyntax to crates.io as is. |
196 | - What is the impact of not doing this? | 394 | - Explore alternative representations for the parse tree. |
197 | 395 | ||
198 | # Unresolved questions | 396 | # Unresolved questions |
199 | [unresolved]: #unresolved-questions | 397 | [unresolved]: #unresolved-questions |
200 | 398 | ||
201 | - What parts of the design do you expect to resolve through the RFC process before this gets merged? | 399 | - Is it at all possible to represent Rust parser as a pure function of |
202 | - What parts of the design do you expect to resolve through the implementation of this feature before stabilization? | 400 | the source code? |
203 | - What related issues do you consider out of scope for this RFC that could be addressed in the future independently of the solution that comes out of this RFC? | 401 | - Is it possible to implement macro expansion using the proposed |
402 | framework? | ||
403 | - How to actually phase out current libsyntax, if libsyntax2.0 turns | ||
404 | out to be a success? | ||