diff options
-rw-r--r-- | rfc.md | 253 |
1 files changed, 131 insertions, 122 deletions
@@ -38,8 +38,8 @@ be `0.1.0`. | |||
38 | 38 | ||
39 | ## Reusability | 39 | ## Reusability |
40 | 40 | ||
41 | In theory, parsing can be a pure function, which takes a `&str` as an | 41 | In theory, the parser can be a pure function, which takes a `&str` as |
42 | input, and produces a `ParseTree` as an output. | 42 | an input, and produces a `ParseTree` as an output. |
43 | 43 | ||
44 | This is great for reusability: for example, you can compile this | 44 | This is great for reusability: for example, you can compile this |
45 | function to WASM and use it for fast client-side validation of syntax | 45 | function to WASM and use it for fast client-side validation of syntax |
@@ -64,13 +64,13 @@ Unfortunately, the current libsyntax is far from this ideal. For | |||
64 | example, even the lexer makes use of the `FileMap` which is | 64 | example, even the lexer makes use of the `FileMap` which is |
65 | essentially a global state of the compiler which represents all know | 65 | essentially a global state of the compiler which represents all know |
66 | files. As a data point, it turned out to be easier to move `rustfmt` | 66 | files. As a data point, it turned out to be easier to move `rustfmt` |
67 | inside of main `rustc` repository than to move libsyntax outside! | 67 | into the main `rustc` repository than to move libsyntax outside! |
68 | 68 | ||
69 | 69 | ||
70 | ## IDE support | 70 | ## IDE support |
71 | 71 | ||
72 | There is one big difference in how IDEs and compilers typically treat | 72 | There is one big difference in how IDEs and compilers typically treat |
73 | source code. | 73 | source code. |
74 | 74 | ||
75 | In the compiler, it is convenient to transform the source | 75 | In the compiler, it is convenient to transform the source |
76 | code into Abstract Syntax Tree form, which is independent of the | 76 | code into Abstract Syntax Tree form, which is independent of the |
@@ -86,9 +86,8 @@ necessary to correctly handle certain code-editing actions like | |||
86 | autoindentation or joining lines. IDE also must be able to produce | 86 | autoindentation or joining lines. IDE also must be able to produce |
87 | partial parse trees when some input is missing or invalid. | 87 | partial parse trees when some input is missing or invalid. |
88 | 88 | ||
89 | Currently rustc uses the AST approach, which preserves the source code | 89 | Currently rustc uses the AST approach, and preserves some of the |
90 | information to some extent by storing spans in the AST. | 90 | source code information in the form of spans in the AST. |
91 | |||
92 | 91 | ||
93 | 92 | ||
94 | # Guide-level explanation | 93 | # Guide-level explanation |
@@ -114,8 +113,8 @@ compiler. | |||
114 | ## Untyped Tree | 113 | ## Untyped Tree |
115 | 114 | ||
116 | The main idea is to store the minimal amount of information in the | 115 | The main idea is to store the minimal amount of information in the |
117 | tree itself, and instead lean heavily on the source code string for | 116 | tree itself, and instead lean heavily on the source code for the |
118 | the actual data about identifier names, constant values etc. | 117 | actual data about identifier names, constant values etc. |
119 | 118 | ||
120 | All nodes in the tree are of the same type and store a constant for | 119 | All nodes in the tree are of the same type and store a constant for |
121 | the syntactic category of the element and a range in the source code. | 120 | the syntactic category of the element and a range in the source code. |
@@ -129,70 +128,70 @@ syntactic categories | |||
129 | pub struct NodeKind(u16); | 128 | pub struct NodeKind(u16); |
130 | 129 | ||
131 | pub struct File { | 130 | pub struct File { |
132 | text: String, | 131 | text: String, |
133 | nodes: Vec<NodeData>, | 132 | nodes: Vec<NodeData>, |
134 | } | 133 | } |
135 | 134 | ||
136 | struct NodeData { | 135 | struct NodeData { |
137 | kind: NodeKind, | 136 | kind: NodeKind, |
138 | range: (u32, u32), | 137 | range: (u32, u32), |
139 | parent: Option<u32>, | 138 | parent: Option<u32>, |
140 | first_child: Option<u32>, | 139 | first_child: Option<u32>, |
141 | next_sibling: Option<u32>, | 140 | next_sibling: Option<u32>, |
142 | } | 141 | } |
143 | 142 | ||
144 | #[derive(Clone, Copy)] | 143 | #[derive(Clone, Copy)] |
145 | pub struct Node<'f> { | 144 | pub struct Node<'f> { |
146 | file: &'f File, | 145 | file: &'f File, |
147 | idx: u32, | 146 | idx: u32, |
148 | } | 147 | } |
149 | 148 | ||
150 | pub struct Children<'f> { | 149 | pub struct Children<'f> { |
151 | next: Option<Node<'f>>, | 150 | next: Option<Node<'f>>, |
152 | } | 151 | } |
153 | 152 | ||
154 | impl File { | 153 | impl File { |
155 | pub fn root<'f>(&'f self) -> Node<'f> { | 154 | pub fn root<'f>(&'f self) -> Node<'f> { |
156 | assert!(!self.nodes.is_empty()); | 155 | assert!(!self.nodes.is_empty()); |
157 | Node { file: self, idx: 0 } | 156 | Node { file: self, idx: 0 } |
158 | } | 157 | } |
159 | } | 158 | } |
160 | 159 | ||
161 | impl<'f> Node<'f> { | 160 | impl<'f> Node<'f> { |
162 | pub fn kind(&self) -> NodeKind { | 161 | pub fn kind(&self) -> NodeKind { |
163 | self.data().kind | 162 | self.data().kind |
164 | } | 163 | } |
165 | 164 | ||
166 | pub fn text(&self) -> &'f str { | 165 | pub fn text(&self) -> &'f str { |
167 | let (start, end) = self.data().range; | 166 | let (start, end) = self.data().range; |
168 | &self.file.text[start as usize..end as usize] | 167 | &self.file.text[start as usize..end as usize] |
169 | } | 168 | } |
170 | 169 | ||
171 | pub fn parent(&self) -> Option<Node<'f>> { | 170 | pub fn parent(&self) -> Option<Node<'f>> { |
172 | self.as_node(self.data().parent) | 171 | self.as_node(self.data().parent) |
173 | } | 172 | } |
174 | 173 | ||
175 | pub fn children(&self) -> Children<'f> { | 174 | pub fn children(&self) -> Children<'f> { |
176 | Children { next: self.as_node(self.data().first_child) } | 175 | Children { next: self.as_node(self.data().first_child) } |
177 | } | 176 | } |
178 | 177 | ||
179 | fn data(&self) -> &'f NodeData { | 178 | fn data(&self) -> &'f NodeData { |
180 | &self.file.nodes[self.idx as usize] | 179 | &self.file.nodes[self.idx as usize] |
181 | } | 180 | } |
182 | 181 | ||
183 | fn as_node(&self, idx: Option<u32>) -> Option<Node<'f>> { | 182 | fn as_node(&self, idx: Option<u32>) -> Option<Node<'f>> { |
184 | idx.map(|idx| Node { file: self.file, idx }) | 183 | idx.map(|idx| Node { file: self.file, idx }) |
185 | } | 184 | } |
186 | } | 185 | } |
187 | 186 | ||
188 | impl<'f> Iterator for Children<'f> { | 187 | impl<'f> Iterator for Children<'f> { |
189 | type Item = Node<'f>; | 188 | type Item = Node<'f>; |
190 | 189 | ||
191 | fn next(&mut self) -> Option<Node<'f>> { | 190 | fn next(&mut self) -> Option<Node<'f>> { |
192 | let next = self.next; | 191 | let next = self.next; |
193 | self.next = next.and_then(|node| node.as_node(node.data().next_sibling)); | 192 | self.next = next.and_then(|node| node.as_node(node.data().next_sibling)); |
194 | next | 193 | next |
195 | } | 194 | } |
196 | } | 195 | } |
197 | 196 | ||
198 | pub const ERROR: NodeKind = NodeKind(0); | 197 | pub const ERROR: NodeKind = NodeKind(0); |
@@ -215,10 +214,10 @@ Here is a rust snippet and the corresponding parse tree: | |||
215 | 214 | ||
216 | ```rust | 215 | ```rust |
217 | struct Foo { | 216 | struct Foo { |
218 | field1: u32, | 217 | field1: u32, |
219 | & | 218 | & |
220 | // non-doc comment | 219 | // non-doc comment |
221 | field2: | 220 | field2: |
222 | } | 221 | } |
223 | ``` | 222 | ``` |
224 | 223 | ||
@@ -227,30 +226,30 @@ struct Foo { | |||
227 | FILE | 226 | FILE |
228 | STRUCT_DEF | 227 | STRUCT_DEF |
229 | STRUCT_KW | 228 | STRUCT_KW |
230 | WHITESPACE | 229 | WHITESPACE |
231 | IDENT | 230 | IDENT |
232 | WHITESPACE | 231 | WHITESPACE |
233 | L_CURLY | 232 | L_CURLY |
234 | WHITESPACE | 233 | WHITESPACE |
235 | FIELD_DEF | 234 | FIELD_DEF |
236 | IDENT | 235 | IDENT |
237 | COLON | 236 | COLON |
238 | WHITESPACE | 237 | WHITESPACE |
239 | TYPE_REF | 238 | TYPE_REF |
240 | IDENT | 239 | IDENT |
241 | COMMA | 240 | COMMA |
242 | WHITESPACE | 241 | WHITESPACE |
243 | ERROR | 242 | ERROR |
244 | AMP | 243 | AMP |
245 | WHITESPACE | 244 | WHITESPACE |
246 | FIELD_DEF | 245 | FIELD_DEF |
247 | LINE_COMMENT | 246 | LINE_COMMENT |
248 | WHITESPACE | 247 | WHITESPACE |
249 | IDENT | 248 | IDENT |
250 | COLON | 249 | COLON |
251 | ERROR | 250 | ERROR |
252 | WHITESPACE | 251 | WHITESPACE |
253 | R_CURLY | 252 | R_CURLY |
254 | ``` | 253 | ``` |
255 | 254 | ||
256 | Note several features of the tree: | 255 | Note several features of the tree: |
@@ -259,37 +258,41 @@ Note several features of the tree: | |||
259 | 258 | ||
260 | * The node for `STRUCT_DEF` contains the error element for `&`, but | 259 | * The node for `STRUCT_DEF` contains the error element for `&`, but |
261 | still represents the following field correctly. | 260 | still represents the following field correctly. |
262 | 261 | ||
263 | * The second field of the struct is incomplete: `FIELD_DEF` node for | 262 | * The second field of the struct is incomplete: `FIELD_DEF` node for |
264 | it contains an `ERROR` element, but nevertheless has the correct | 263 | it contains an `ERROR` element, but nevertheless has the correct |
265 | `NodeKind`. | 264 | `NodeKind`. |
266 | 265 | ||
267 | * The non-documenting comment is correctly attached to the following | 266 | * The non-documenting comment is correctly attached to the following |
268 | field. | 267 | field. |
269 | 268 | ||
270 | 269 | ||
271 | ## Typed Tree | 270 | ## Typed Tree |
272 | 271 | ||
273 | It's hard to work with this raw parse tree, because it is untyped: | 272 | It's hard to work with this raw parse tree, because it is untyped: |
274 | node containing a struct definition has the same API as the node for | 273 | node containing a struct definition has the same API as the node for |
275 | the struct field. But it's possible to add a strongly typed layer on | 274 | the struct field. But it's possible to add a strongly typed layer on |
276 | top of this raw tree, and get a zero-cost typed AST. Here is an | 275 | top of this raw tree, and get a zero-cost AST. Here is an example |
277 | example which adds type-safe wrappers for structs and fields: | 276 | which adds type-safe wrappers for structs and fields: |
278 | 277 | ||
279 | ```rust | 278 | ```rust |
279 | // generic infrastructure | ||
280 | |||
280 | pub trait AstNode<'f>: Copy + 'f { | 281 | pub trait AstNode<'f>: Copy + 'f { |
281 | fn new(node: Node<'f>) -> Option<Self>; | 282 | fn new(node: Node<'f>) -> Option<Self>; |
282 | fn node(&self) -> Node<'f>; | 283 | fn node(&self) -> Node<'f>; |
283 | } | 284 | } |
284 | 285 | ||
285 | pub fn child_of_kind<'f>(node: Node<'f>, kind: NodeKind) -> Option<Node<'f>> { | 286 | pub fn child_of_kind<'f>(node: Node<'f>, kind: NodeKind) -> Option<Node<'f>> { |
286 | node.children().find(|child| child.kind() == kind) | 287 | node.children().find(|child| child.kind() == kind) |
287 | } | 288 | } |
288 | 289 | ||
289 | pub fn ast_children<'f, A: AstNode<'f>>(node: Node<'f>) -> Box<Iterator<Item=A> + 'f> { | 290 | pub fn ast_children<'f, A: AstNode<'f>>(node: Node<'f>) -> Box<Iterator<Item=A> + 'f> { |
290 | Box::new(node.children().filter_map(A::new)) | 291 | Box::new(node.children().filter_map(A::new)) |
291 | } | 292 | } |
292 | 293 | ||
294 | // AST elements, specific to Rust | ||
295 | |||
293 | #[derive(Clone, Copy)] | 296 | #[derive(Clone, Copy)] |
294 | pub struct StructDef<'f>(Node<'f>); | 297 | pub struct StructDef<'f>(Node<'f>); |
295 | 298 | ||
@@ -300,48 +303,51 @@ pub struct FieldDef<'f>(Node<'f>); | |||
300 | pub struct TypeRef<'f>(Node<'f>); | 303 | pub struct TypeRef<'f>(Node<'f>); |
301 | 304 | ||
302 | pub trait NameOwner<'f>: AstNode<'f> { | 305 | pub trait NameOwner<'f>: AstNode<'f> { |
303 | fn name_ident(&self) -> Node<'f> { | 306 | fn name_ident(&self) -> Node<'f> { |
304 | child_of_kind(self.node(), IDENT).unwrap() | 307 | child_of_kind(self.node(), IDENT).unwrap() |
305 | } | 308 | } |
306 | 309 | ||
307 | fn name(&self) -> &'f str { self.name_ident().text() } | 310 | fn name(&self) -> &'f str { self.name_ident().text() } |
308 | } | 311 | } |
309 | 312 | ||
310 | 313 | ||
311 | impl<'f> AstNode<'f> for StructDef<'f> { | 314 | impl<'f> AstNode<'f> for StructDef<'f> { |
312 | fn new(node: Node<'f>) -> Option<Self> { | 315 | fn new(node: Node<'f>) -> Option<Self> { |
313 | if node.kind() == STRUCT_DEF { Some(StructDef(node)) } else { None } | 316 | if node.kind() == STRUCT_DEF { Some(StructDef(node)) } else { None } |
314 | } | 317 | } |
315 | fn node(&self) -> Node<'f> { self.0 } | 318 | fn node(&self) -> Node<'f> { self.0 } |
316 | } | 319 | } |
317 | 320 | ||
321 | impl<'f> NameOwner<'f> for StructDef<'f> {} | ||
322 | |||
323 | impl<'f> StructDef<'f> { | ||
324 | pub fn fields(&self) -> Box<Iterator<Item=FieldDef<'f>> + 'f> { | ||
325 | ast_children(self.node()) | ||
326 | } | ||
327 | } | ||
328 | |||
329 | |||
318 | impl<'f> AstNode<'f> for FieldDef<'f> { | 330 | impl<'f> AstNode<'f> for FieldDef<'f> { |
319 | fn new(node: Node<'f>) -> Option<Self> { | 331 | fn new(node: Node<'f>) -> Option<Self> { |
320 | if node.kind() == FIELD_DEF { Some(FieldDef(node)) } else { None } | 332 | if node.kind() == FIELD_DEF { Some(FieldDef(node)) } else { None } |
321 | } | 333 | } |
322 | fn node(&self) -> Node<'f> { self.0 } | 334 | fn node(&self) -> Node<'f> { self.0 } |
323 | } | 335 | } |
324 | 336 | ||
325 | impl<'f> AstNode<'f> for TypeRef<'f> { | 337 | impl<'f> FieldDef<'f> { |
326 | fn new(node: Node<'f>) -> Option<Self> { | 338 | pub fn type_ref(&self) -> Option<TypeRef<'f>> { |
327 | if node.kind() == TYPE_REF { Some(TypeRef(node)) } else { None } | 339 | ast_children(self.node()).next() |
328 | } | 340 | } |
329 | fn node(&self) -> Node<'f> { self.0 } | ||
330 | } | 341 | } |
331 | 342 | ||
332 | impl<'f> NameOwner<'f> for StructDef<'f> {} | ||
333 | impl<'f> NameOwner<'f> for FieldDef<'f> {} | 343 | impl<'f> NameOwner<'f> for FieldDef<'f> {} |
334 | 344 | ||
335 | impl<'f> StructDef<'f> { | ||
336 | pub fn fields(&self) -> Box<Iterator<Item=FieldDef<'f>> + 'f> { | ||
337 | ast_children(self.node()) | ||
338 | } | ||
339 | } | ||
340 | 345 | ||
341 | impl<'f> FieldDef<'f> { | 346 | impl<'f> AstNode<'f> for TypeRef<'f> { |
342 | pub fn type_ref(&self) -> Option<TypeRef<'f>> { | 347 | fn new(node: Node<'f>) -> Option<Self> { |
343 | ast_children(self.node()).next() | 348 | if node.kind() == TYPE_REF { Some(TypeRef(node)) } else { None } |
344 | } | 349 | } |
350 | fn node(&self) -> Node<'f> { self.0 } | ||
345 | } | 351 | } |
346 | ``` | 352 | ``` |
347 | 353 | ||
@@ -371,9 +377,11 @@ plan is suggested: | |||
371 | * RFC discussion about the theoretical feasibility of the proposal. | 377 | * RFC discussion about the theoretical feasibility of the proposal. |
372 | 378 | ||
373 | * Implementation of the proposal as a completely separate crates.io | 379 | * Implementation of the proposal as a completely separate crates.io |
374 | crate. | 380 | crate, by refactoring existing libsyntax source code to produce a |
375 | 381 | new tree. | |
376 | * A prototype implementation of the macro expansion on top of the new sytnax tree. | 382 | |
383 | * A prototype implementation of the macro expansion on top of the new | ||
384 | sytnax tree. | ||
377 | 385 | ||
378 | * Additional round of discussion/RFC about merging with the mainline | 386 | * Additional round of discussion/RFC about merging with the mainline |
379 | compiler. | 387 | compiler. |
@@ -390,8 +398,9 @@ plan is suggested: | |||
390 | [alternatives]: #alternatives | 398 | [alternatives]: #alternatives |
391 | 399 | ||
392 | - Incrementally add more information about source code to the current AST. | 400 | - Incrementally add more information about source code to the current AST. |
393 | - Move the current libsyntax to crates.io as is. | 401 | - Move the current libsyntax to crates.io as is. |
394 | - Explore alternative representations for the parse tree. | 402 | - Explore alternative representations for the parse tree. |
403 | - Use parser generator instead of hand written parser. | ||
395 | 404 | ||
396 | # Unresolved questions | 405 | # Unresolved questions |
397 | [unresolved]: #unresolved-questions | 406 | [unresolved]: #unresolved-questions |