diff options
Diffstat (limited to 'crates/ra_syntax/src/lib.rs')
-rw-r--r-- | crates/ra_syntax/src/lib.rs | 214 |
1 files changed, 196 insertions, 18 deletions
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs index b12282b39..e7d402446 100644 --- a/crates/ra_syntax/src/lib.rs +++ b/crates/ra_syntax/src/lib.rs | |||
@@ -1,22 +1,24 @@ | |||
1 | //! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256]. | 1 | //! Syntax Tree library used throughout the rust analyzer. |
2 | //! | 2 | //! |
3 | //! The intent is to be an IDE-ready parser, i.e. one that offers | 3 | //! Properties: |
4 | //! - easy and fast incremental re-parsing | ||
5 | //! - graceful handling of errors | ||
6 | //! - full-fidelity representation (*any* text can be precisely represented as | ||
7 | //! a syntax tree) | ||
4 | //! | 8 | //! |
5 | //! - easy and fast incremental re-parsing, | 9 | //! For more information, see the [RFC]. Current implementation is inspired by |
6 | //! - graceful handling of errors, and | 10 | //! the [Swift] one. |
7 | //! - maintains all information in the source file. | ||
8 | //! | 11 | //! |
9 | //! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md]. | 12 | //! The most interesting modules here are `syntax_node` (which defines concrete |
13 | //! syntax tree) and `ast` (which defines abstract syntax tree on top of the | ||
14 | //! CST). The actual parser live in a separate `ra_parser` crate, thought the | ||
15 | //! lexer lives in this crate. | ||
10 | //! | 16 | //! |
11 | //! [rfc#2256]: <https://github.com/rust-lang/rfcs/pull/2256> | 17 | //! See `api_walkthrough` test in this file for a quick API tour! |
12 | //! [RFC.md]: <https://github.com/matklad/libsyntax2/blob/master/docs/RFC.md> | 18 | //! |
13 | 19 | //! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256> | |
14 | #![forbid(missing_debug_implementations, unconditional_recursion, future_incompatible)] | 20 | //! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md> |
15 | #![deny(bad_style, missing_docs)] | ||
16 | #![allow(missing_docs)] | ||
17 | //#![warn(unreachable_pub)] // rust-lang/rust#47816 | ||
18 | 21 | ||
19 | mod syntax_kinds; | ||
20 | mod syntax_node; | 22 | mod syntax_node; |
21 | mod syntax_text; | 23 | mod syntax_text; |
22 | mod syntax_error; | 24 | mod syntax_error; |
@@ -27,13 +29,11 @@ mod ptr; | |||
27 | 29 | ||
28 | pub mod algo; | 30 | pub mod algo; |
29 | pub mod ast; | 31 | pub mod ast; |
30 | /// Utilities for simple uses of the parser. | ||
31 | pub mod utils; | ||
32 | 32 | ||
33 | pub use rowan::{SmolStr, TextRange, TextUnit}; | 33 | pub use rowan::{SmolStr, TextRange, TextUnit}; |
34 | pub use ra_parser::SyntaxKind; | ||
34 | pub use crate::{ | 35 | pub use crate::{ |
35 | ast::AstNode, | 36 | ast::AstNode, |
36 | syntax_kinds::SyntaxKind, | ||
37 | syntax_error::{SyntaxError, SyntaxErrorKind, Location}, | 37 | syntax_error::{SyntaxError, SyntaxErrorKind, Location}, |
38 | syntax_text::SyntaxText, | 38 | syntax_text::SyntaxText, |
39 | syntax_node::{Direction, SyntaxNode, WalkEvent, TreeArc}, | 39 | syntax_node::{Direction, SyntaxNode, WalkEvent, TreeArc}, |
@@ -51,7 +51,7 @@ impl SourceFile { | |||
51 | fn new(green: GreenNode, errors: Vec<SyntaxError>) -> TreeArc<SourceFile> { | 51 | fn new(green: GreenNode, errors: Vec<SyntaxError>) -> TreeArc<SourceFile> { |
52 | let root = SyntaxNode::new(green, errors); | 52 | let root = SyntaxNode::new(green, errors); |
53 | if cfg!(debug_assertions) { | 53 | if cfg!(debug_assertions) { |
54 | utils::validate_block_structure(&root); | 54 | validation::validate_block_structure(&root); |
55 | } | 55 | } |
56 | assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE); | 56 | assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE); |
57 | TreeArc::cast(root) | 57 | TreeArc::cast(root) |
@@ -82,3 +82,181 @@ impl SourceFile { | |||
82 | errors | 82 | errors |
83 | } | 83 | } |
84 | } | 84 | } |
85 | |||
86 | pub fn check_fuzz_invariants(text: &str) { | ||
87 | let file = SourceFile::parse(text); | ||
88 | let root = file.syntax(); | ||
89 | validation::validate_block_structure(root); | ||
90 | let _ = file.errors(); | ||
91 | } | ||
92 | |||
93 | /// This test does not assert anything and instead just shows off the crate's | ||
94 | /// API. | ||
95 | #[test] | ||
96 | fn api_walkthrough() { | ||
97 | use ast::{ModuleItemOwner, NameOwner}; | ||
98 | |||
99 | let source_code = " | ||
100 | fn foo() { | ||
101 | 1 + 1 | ||
102 | } | ||
103 | "; | ||
104 | // `SourceFile` is the main entry point. | ||
105 | // | ||
106 | // Note how `parse` does not return a `Result`: even completely invalid | ||
107 | // source code might be parsed. | ||
108 | let file = SourceFile::parse(source_code); | ||
109 | |||
110 | // Due to the way ownership is set up, owned syntax Nodes always live behind | ||
111 | // a `TreeArc` smart pointer. `TreeArc` is roughly an `std::sync::Arc` which | ||
112 | // points to the whole file instead of an individual node. | ||
113 | let file: TreeArc<SourceFile> = file; | ||
114 | |||
115 | // `SourceFile` is the root of the syntax tree. We can iterate file's items: | ||
116 | let mut func = None; | ||
117 | for item in file.items() { | ||
118 | match item.kind() { | ||
119 | ast::ModuleItemKind::FnDef(f) => func = Some(f), | ||
120 | _ => unreachable!(), | ||
121 | } | ||
122 | } | ||
123 | // The returned items are always references. | ||
124 | let func: &ast::FnDef = func.unwrap(); | ||
125 | |||
126 | // All nodes implement `ToOwned` trait, with `Owned = TreeArc<Self>`. | ||
127 | // `to_owned` is a cheap operation: atomic increment. | ||
128 | let _owned_func: TreeArc<ast::FnDef> = func.to_owned(); | ||
129 | |||
130 | // Each AST node has a bunch of getters for children. All getters return | ||
131 | // `Option`s though, to account for incomplete code. Some getters are common | ||
132 | // for several kinds of node. In this case, a trait like `ast::NameOwner` | ||
133 | // usually exists. By convention, all ast types should be used with `ast::` | ||
134 | // qualifier. | ||
135 | let name: Option<&ast::Name> = func.name(); | ||
136 | let name = name.unwrap(); | ||
137 | assert_eq!(name.text(), "foo"); | ||
138 | |||
139 | // Let's get the `1 + 1` expression! | ||
140 | let block: &ast::Block = func.body().unwrap(); | ||
141 | let expr: &ast::Expr = block.expr().unwrap(); | ||
142 | |||
143 | // "Enum"-like nodes are represented using the "kind" pattern. It allows us | ||
144 | // to match exhaustively against all flavors of nodes, while maintaining | ||
145 | // internal representation flexibility. The drawback is that one can't write | ||
146 | // nested matches as one pattern. | ||
147 | let bin_expr: &ast::BinExpr = match expr.kind() { | ||
148 | ast::ExprKind::BinExpr(e) => e, | ||
149 | _ => unreachable!(), | ||
150 | }; | ||
151 | |||
152 | // Besides the "typed" AST API, there's an untyped CST one as well. | ||
153 | // To switch from AST to CST, call `.syntax()` method: | ||
154 | let expr_syntax: &SyntaxNode = expr.syntax(); | ||
155 | |||
156 | // Note how `expr` and `bin_expr` are in fact the same node underneath: | ||
157 | assert!(std::ptr::eq(expr_syntax, bin_expr.syntax())); | ||
158 | |||
159 | // To go from CST to AST, `AstNode::cast` function is used: | ||
160 | let expr = match ast::Expr::cast(expr_syntax) { | ||
161 | Some(e) => e, | ||
162 | None => unreachable!(), | ||
163 | }; | ||
164 | |||
165 | // Note how expr is also a reference! | ||
166 | let expr: &ast::Expr = expr; | ||
167 | |||
168 | // This is possible because the underlying representation is the same: | ||
169 | assert_eq!( | ||
170 | expr as *const ast::Expr as *const u8, | ||
171 | expr_syntax as *const SyntaxNode as *const u8 | ||
172 | ); | ||
173 | |||
174 | // The two properties each syntax node has is a `SyntaxKind`: | ||
175 | assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR); | ||
176 | |||
177 | // And text range: | ||
178 | assert_eq!(expr_syntax.range(), TextRange::from_to(32.into(), 37.into())); | ||
179 | |||
180 | // You can get node's text as a `SyntaxText` object, which will traverse the | ||
181 | // tree collecting token's text: | ||
182 | let text: SyntaxText<'_> = expr_syntax.text(); | ||
183 | assert_eq!(text.to_string(), "1 + 1"); | ||
184 | |||
185 | // There's a bunch of traversal methods on `SyntaxNode`: | ||
186 | assert_eq!(expr_syntax.parent(), Some(block.syntax())); | ||
187 | assert_eq!(block.syntax().first_child().map(|it| it.kind()), Some(SyntaxKind::L_CURLY)); | ||
188 | assert_eq!(expr_syntax.next_sibling().map(|it| it.kind()), Some(SyntaxKind::WHITESPACE)); | ||
189 | |||
190 | // As well as some iterator helpers: | ||
191 | let f = expr_syntax.ancestors().find_map(ast::FnDef::cast); | ||
192 | assert_eq!(f, Some(&*func)); | ||
193 | assert!(expr_syntax.siblings(Direction::Next).any(|it| it.kind() == SyntaxKind::R_CURLY)); | ||
194 | assert_eq!( | ||
195 | expr_syntax.descendants().count(), | ||
196 | 8, // 5 tokens `1`, ` `, `+`, ` `, `!` | ||
197 | // 2 child literal expressions: `1`, `1` | ||
198 | // 1 the node itself: `1 + 1` | ||
199 | ); | ||
200 | |||
201 | // There's also a `preorder` method with a more fine-grained iteration control: | ||
202 | let mut buf = String::new(); | ||
203 | let mut indent = 0; | ||
204 | for event in expr_syntax.preorder() { | ||
205 | match event { | ||
206 | WalkEvent::Enter(node) => { | ||
207 | buf += &format!( | ||
208 | "{:indent$}{:?} {:?}\n", | ||
209 | " ", | ||
210 | node.text(), | ||
211 | node.kind(), | ||
212 | indent = indent | ||
213 | ); | ||
214 | indent += 2; | ||
215 | } | ||
216 | WalkEvent::Leave(_) => indent -= 2, | ||
217 | } | ||
218 | } | ||
219 | assert_eq!(indent, 0); | ||
220 | assert_eq!( | ||
221 | buf.trim(), | ||
222 | r#" | ||
223 | "1 + 1" BIN_EXPR | ||
224 | "1" LITERAL | ||
225 | "1" INT_NUMBER | ||
226 | " " WHITESPACE | ||
227 | "+" PLUS | ||
228 | " " WHITESPACE | ||
229 | "1" LITERAL | ||
230 | "1" INT_NUMBER | ||
231 | "# | ||
232 | .trim() | ||
233 | ); | ||
234 | |||
235 | // To recursively process the tree, there are three approaches: | ||
236 | // 1. explicitly call getter methods on AST nodes. | ||
237 | // 2. use descendants and `AstNode::cast`. | ||
238 | // 3. use descendants and the visitor. | ||
239 | // | ||
240 | // Here's how the first one looks like: | ||
241 | let exprs_cast: Vec<String> = file | ||
242 | .syntax() | ||
243 | .descendants() | ||
244 | .filter_map(ast::Expr::cast) | ||
245 | .map(|expr| expr.syntax().text().to_string()) | ||
246 | .collect(); | ||
247 | |||
248 | // An alternative is to use a visitor. The visitor does not do traversal | ||
249 | // automatically (so it's more akin to a generic lambda) and is constructed | ||
250 | // from closures. This seems more flexible than a single generated visitor | ||
251 | // trait. | ||
252 | use algo::visit::{visitor, Visitor}; | ||
253 | let mut exprs_visit = Vec::new(); | ||
254 | for node in file.syntax().descendants() { | ||
255 | if let Some(result) = | ||
256 | visitor().visit::<ast::Expr, _>(|expr| expr.syntax().text().to_string()).accept(node) | ||
257 | { | ||
258 | exprs_visit.push(result); | ||
259 | } | ||
260 | } | ||
261 | assert_eq!(exprs_cast, exprs_visit); | ||
262 | } | ||