diff options
Diffstat (limited to 'crates/syntax/src/lib.rs')
-rw-r--r-- | crates/syntax/src/lib.rs | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/crates/syntax/src/lib.rs b/crates/syntax/src/lib.rs new file mode 100644 index 000000000..7f8da66af --- /dev/null +++ b/crates/syntax/src/lib.rs | |||
@@ -0,0 +1,388 @@ | |||
1 | //! Syntax Tree library used throughout the rust analyzer. | ||
2 | //! | ||
3 | //! Properties: | ||
4 | //! - easy and fast incremental re-parsing | ||
5 | //! - graceful handling of errors | ||
6 | //! - full-fidelity representation (*any* text can be precisely represented as | ||
7 | //! a syntax tree) | ||
8 | //! | ||
9 | //! For more information, see the [RFC]. Current implementation is inspired by | ||
10 | //! the [Swift] one. | ||
11 | //! | ||
12 | //! The most interesting modules here are `syntax_node` (which defines concrete | ||
13 | //! syntax tree) and `ast` (which defines abstract syntax tree on top of the | ||
14 | //! CST). The actual parser live in a separate `parser` crate, though the | ||
15 | //! lexer lives in this crate. | ||
16 | //! | ||
17 | //! See `api_walkthrough` test in this file for a quick API tour! | ||
18 | //! | ||
19 | //! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256> | ||
20 | //! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md> | ||
21 | |||
22 | #[allow(unused)] | ||
23 | macro_rules! eprintln { | ||
24 | ($($tt:tt)*) => { stdx::eprintln!($($tt)*) }; | ||
25 | } | ||
26 | |||
27 | mod syntax_node; | ||
28 | mod syntax_error; | ||
29 | mod parsing; | ||
30 | mod validation; | ||
31 | mod ptr; | ||
32 | #[cfg(test)] | ||
33 | mod tests; | ||
34 | |||
35 | pub mod algo; | ||
36 | pub mod ast; | ||
37 | #[doc(hidden)] | ||
38 | pub mod fuzz; | ||
39 | |||
40 | use std::{marker::PhantomData, sync::Arc}; | ||
41 | |||
42 | use stdx::format_to; | ||
43 | use text_edit::Indel; | ||
44 | |||
45 | pub use crate::{ | ||
46 | algo::InsertPosition, | ||
47 | ast::{AstNode, AstToken}, | ||
48 | parsing::{lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token}, | ||
49 | ptr::{AstPtr, SyntaxNodePtr}, | ||
50 | syntax_error::SyntaxError, | ||
51 | syntax_node::{ | ||
52 | Direction, GreenNode, NodeOrToken, SyntaxElement, SyntaxElementChildren, SyntaxNode, | ||
53 | SyntaxNodeChildren, SyntaxToken, SyntaxTreeBuilder, | ||
54 | }, | ||
55 | }; | ||
56 | pub use parser::{SyntaxKind, T}; | ||
57 | pub use rowan::{SmolStr, SyntaxText, TextRange, TextSize, TokenAtOffset, WalkEvent}; | ||
58 | |||
59 | /// `Parse` is the result of the parsing: a syntax tree and a collection of | ||
60 | /// errors. | ||
61 | /// | ||
62 | /// Note that we always produce a syntax tree, even for completely invalid | ||
63 | /// files. | ||
64 | #[derive(Debug, PartialEq, Eq)] | ||
65 | pub struct Parse<T> { | ||
66 | green: GreenNode, | ||
67 | errors: Arc<Vec<SyntaxError>>, | ||
68 | _ty: PhantomData<fn() -> T>, | ||
69 | } | ||
70 | |||
71 | impl<T> Clone for Parse<T> { | ||
72 | fn clone(&self) -> Parse<T> { | ||
73 | Parse { green: self.green.clone(), errors: self.errors.clone(), _ty: PhantomData } | ||
74 | } | ||
75 | } | ||
76 | |||
77 | impl<T> Parse<T> { | ||
78 | fn new(green: GreenNode, errors: Vec<SyntaxError>) -> Parse<T> { | ||
79 | Parse { green, errors: Arc::new(errors), _ty: PhantomData } | ||
80 | } | ||
81 | |||
82 | pub fn syntax_node(&self) -> SyntaxNode { | ||
83 | SyntaxNode::new_root(self.green.clone()) | ||
84 | } | ||
85 | } | ||
86 | |||
87 | impl<T: AstNode> Parse<T> { | ||
88 | pub fn to_syntax(self) -> Parse<SyntaxNode> { | ||
89 | Parse { green: self.green, errors: self.errors, _ty: PhantomData } | ||
90 | } | ||
91 | |||
92 | pub fn tree(&self) -> T { | ||
93 | T::cast(self.syntax_node()).unwrap() | ||
94 | } | ||
95 | |||
96 | pub fn errors(&self) -> &[SyntaxError] { | ||
97 | &*self.errors | ||
98 | } | ||
99 | |||
100 | pub fn ok(self) -> Result<T, Arc<Vec<SyntaxError>>> { | ||
101 | if self.errors.is_empty() { | ||
102 | Ok(self.tree()) | ||
103 | } else { | ||
104 | Err(self.errors) | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | |||
109 | impl Parse<SyntaxNode> { | ||
110 | pub fn cast<N: AstNode>(self) -> Option<Parse<N>> { | ||
111 | if N::cast(self.syntax_node()).is_some() { | ||
112 | Some(Parse { green: self.green, errors: self.errors, _ty: PhantomData }) | ||
113 | } else { | ||
114 | None | ||
115 | } | ||
116 | } | ||
117 | } | ||
118 | |||
119 | impl Parse<SourceFile> { | ||
120 | pub fn debug_dump(&self) -> String { | ||
121 | let mut buf = format!("{:#?}", self.tree().syntax()); | ||
122 | for err in self.errors.iter() { | ||
123 | format_to!(buf, "error {:?}: {}\n", err.range(), err); | ||
124 | } | ||
125 | buf | ||
126 | } | ||
127 | |||
128 | pub fn reparse(&self, indel: &Indel) -> Parse<SourceFile> { | ||
129 | self.incremental_reparse(indel).unwrap_or_else(|| self.full_reparse(indel)) | ||
130 | } | ||
131 | |||
132 | fn incremental_reparse(&self, indel: &Indel) -> Option<Parse<SourceFile>> { | ||
133 | // FIXME: validation errors are not handled here | ||
134 | parsing::incremental_reparse(self.tree().syntax(), indel, self.errors.to_vec()).map( | ||
135 | |(green_node, errors, _reparsed_range)| Parse { | ||
136 | green: green_node, | ||
137 | errors: Arc::new(errors), | ||
138 | _ty: PhantomData, | ||
139 | }, | ||
140 | ) | ||
141 | } | ||
142 | |||
143 | fn full_reparse(&self, indel: &Indel) -> Parse<SourceFile> { | ||
144 | let mut text = self.tree().syntax().text().to_string(); | ||
145 | indel.apply(&mut text); | ||
146 | SourceFile::parse(&text) | ||
147 | } | ||
148 | } | ||
149 | |||
150 | /// `SourceFile` represents a parse tree for a single Rust file. | ||
151 | pub use crate::ast::SourceFile; | ||
152 | |||
153 | impl SourceFile { | ||
154 | pub fn parse(text: &str) -> Parse<SourceFile> { | ||
155 | let (green, mut errors) = parsing::parse_text(text); | ||
156 | let root = SyntaxNode::new_root(green.clone()); | ||
157 | |||
158 | if cfg!(debug_assertions) { | ||
159 | validation::validate_block_structure(&root); | ||
160 | } | ||
161 | |||
162 | errors.extend(validation::validate(&root)); | ||
163 | |||
164 | assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE); | ||
165 | Parse { green, errors: Arc::new(errors), _ty: PhantomData } | ||
166 | } | ||
167 | } | ||
168 | |||
169 | impl ast::Path { | ||
170 | /// Returns `text`, parsed as a path, but only if it has no errors. | ||
171 | pub fn parse(text: &str) -> Result<Self, ()> { | ||
172 | parsing::parse_text_fragment(text, parser::FragmentKind::Path) | ||
173 | } | ||
174 | } | ||
175 | |||
176 | impl ast::Pat { | ||
177 | /// Returns `text`, parsed as a pattern, but only if it has no errors. | ||
178 | pub fn parse(text: &str) -> Result<Self, ()> { | ||
179 | parsing::parse_text_fragment(text, parser::FragmentKind::Pattern) | ||
180 | } | ||
181 | } | ||
182 | |||
183 | impl ast::Expr { | ||
184 | /// Returns `text`, parsed as an expression, but only if it has no errors. | ||
185 | pub fn parse(text: &str) -> Result<Self, ()> { | ||
186 | parsing::parse_text_fragment(text, parser::FragmentKind::Expr) | ||
187 | } | ||
188 | } | ||
189 | |||
190 | impl ast::Item { | ||
191 | /// Returns `text`, parsed as an item, but only if it has no errors. | ||
192 | pub fn parse(text: &str) -> Result<Self, ()> { | ||
193 | parsing::parse_text_fragment(text, parser::FragmentKind::Item) | ||
194 | } | ||
195 | } | ||
196 | |||
197 | impl ast::Type { | ||
198 | /// Returns `text`, parsed as an type reference, but only if it has no errors. | ||
199 | pub fn parse(text: &str) -> Result<Self, ()> { | ||
200 | parsing::parse_text_fragment(text, parser::FragmentKind::Type) | ||
201 | } | ||
202 | } | ||
203 | |||
204 | /// Matches a `SyntaxNode` against an `ast` type. | ||
205 | /// | ||
206 | /// # Example: | ||
207 | /// | ||
208 | /// ```ignore | ||
209 | /// match_ast! { | ||
210 | /// match node { | ||
211 | /// ast::CallExpr(it) => { ... }, | ||
212 | /// ast::MethodCallExpr(it) => { ... }, | ||
213 | /// ast::MacroCall(it) => { ... }, | ||
214 | /// _ => None, | ||
215 | /// } | ||
216 | /// } | ||
217 | /// ``` | ||
218 | #[macro_export] | ||
219 | macro_rules! match_ast { | ||
220 | (match $node:ident { $($tt:tt)* }) => { match_ast!(match ($node) { $($tt)* }) }; | ||
221 | |||
222 | (match ($node:expr) { | ||
223 | $( ast::$ast:ident($it:ident) => $res:expr, )* | ||
224 | _ => $catch_all:expr $(,)? | ||
225 | }) => {{ | ||
226 | $( if let Some($it) = ast::$ast::cast($node.clone()) { $res } else )* | ||
227 | { $catch_all } | ||
228 | }}; | ||
229 | } | ||
230 | |||
231 | /// This test does not assert anything and instead just shows off the crate's | ||
232 | /// API. | ||
233 | #[test] | ||
234 | fn api_walkthrough() { | ||
235 | use ast::{ModuleItemOwner, NameOwner}; | ||
236 | |||
237 | let source_code = " | ||
238 | fn foo() { | ||
239 | 1 + 1 | ||
240 | } | ||
241 | "; | ||
242 | // `SourceFile` is the main entry point. | ||
243 | // | ||
244 | // The `parse` method returns a `Parse` -- a pair of syntax tree and a list | ||
245 | // of errors. That is, syntax tree is constructed even in presence of errors. | ||
246 | let parse = SourceFile::parse(source_code); | ||
247 | assert!(parse.errors().is_empty()); | ||
248 | |||
249 | // The `tree` method returns an owned syntax node of type `SourceFile`. | ||
250 | // Owned nodes are cheap: inside, they are `Rc` handles to the underling data. | ||
251 | let file: SourceFile = parse.tree(); | ||
252 | |||
253 | // `SourceFile` is the root of the syntax tree. We can iterate file's items. | ||
254 | // Let's fetch the `foo` function. | ||
255 | let mut func = None; | ||
256 | for item in file.items() { | ||
257 | match item { | ||
258 | ast::Item::Fn(f) => func = Some(f), | ||
259 | _ => unreachable!(), | ||
260 | } | ||
261 | } | ||
262 | let func: ast::Fn = func.unwrap(); | ||
263 | |||
264 | // Each AST node has a bunch of getters for children. All getters return | ||
265 | // `Option`s though, to account for incomplete code. Some getters are common | ||
266 | // for several kinds of node. In this case, a trait like `ast::NameOwner` | ||
267 | // usually exists. By convention, all ast types should be used with `ast::` | ||
268 | // qualifier. | ||
269 | let name: Option<ast::Name> = func.name(); | ||
270 | let name = name.unwrap(); | ||
271 | assert_eq!(name.text(), "foo"); | ||
272 | |||
273 | // Let's get the `1 + 1` expression! | ||
274 | let body: ast::BlockExpr = func.body().unwrap(); | ||
275 | let expr: ast::Expr = body.expr().unwrap(); | ||
276 | |||
277 | // Enums are used to group related ast nodes together, and can be used for | ||
278 | // matching. However, because there are no public fields, it's possible to | ||
279 | // match only the top level enum: that is the price we pay for increased API | ||
280 | // flexibility | ||
281 | let bin_expr: &ast::BinExpr = match &expr { | ||
282 | ast::Expr::BinExpr(e) => e, | ||
283 | _ => unreachable!(), | ||
284 | }; | ||
285 | |||
286 | // Besides the "typed" AST API, there's an untyped CST one as well. | ||
287 | // To switch from AST to CST, call `.syntax()` method: | ||
288 | let expr_syntax: &SyntaxNode = expr.syntax(); | ||
289 | |||
290 | // Note how `expr` and `bin_expr` are in fact the same node underneath: | ||
291 | assert!(expr_syntax == bin_expr.syntax()); | ||
292 | |||
293 | // To go from CST to AST, `AstNode::cast` function is used: | ||
294 | let _expr: ast::Expr = match ast::Expr::cast(expr_syntax.clone()) { | ||
295 | Some(e) => e, | ||
296 | None => unreachable!(), | ||
297 | }; | ||
298 | |||
299 | // The two properties each syntax node has is a `SyntaxKind`: | ||
300 | assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR); | ||
301 | |||
302 | // And text range: | ||
303 | assert_eq!(expr_syntax.text_range(), TextRange::new(32.into(), 37.into())); | ||
304 | |||
305 | // You can get node's text as a `SyntaxText` object, which will traverse the | ||
306 | // tree collecting token's text: | ||
307 | let text: SyntaxText = expr_syntax.text(); | ||
308 | assert_eq!(text.to_string(), "1 + 1"); | ||
309 | |||
310 | // There's a bunch of traversal methods on `SyntaxNode`: | ||
311 | assert_eq!(expr_syntax.parent().as_ref(), Some(body.syntax())); | ||
312 | assert_eq!(body.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{'])); | ||
313 | assert_eq!( | ||
314 | expr_syntax.next_sibling_or_token().map(|it| it.kind()), | ||
315 | Some(SyntaxKind::WHITESPACE) | ||
316 | ); | ||
317 | |||
318 | // As well as some iterator helpers: | ||
319 | let f = expr_syntax.ancestors().find_map(ast::Fn::cast); | ||
320 | assert_eq!(f, Some(func)); | ||
321 | assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}'])); | ||
322 | assert_eq!( | ||
323 | expr_syntax.descendants_with_tokens().count(), | ||
324 | 8, // 5 tokens `1`, ` `, `+`, ` `, `!` | ||
325 | // 2 child literal expressions: `1`, `1` | ||
326 | // 1 the node itself: `1 + 1` | ||
327 | ); | ||
328 | |||
329 | // There's also a `preorder` method with a more fine-grained iteration control: | ||
330 | let mut buf = String::new(); | ||
331 | let mut indent = 0; | ||
332 | for event in expr_syntax.preorder_with_tokens() { | ||
333 | match event { | ||
334 | WalkEvent::Enter(node) => { | ||
335 | let text = match &node { | ||
336 | NodeOrToken::Node(it) => it.text().to_string(), | ||
337 | NodeOrToken::Token(it) => it.text().to_string(), | ||
338 | }; | ||
339 | format_to!(buf, "{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent); | ||
340 | indent += 2; | ||
341 | } | ||
342 | WalkEvent::Leave(_) => indent -= 2, | ||
343 | } | ||
344 | } | ||
345 | assert_eq!(indent, 0); | ||
346 | assert_eq!( | ||
347 | buf.trim(), | ||
348 | r#" | ||
349 | "1 + 1" BIN_EXPR | ||
350 | "1" LITERAL | ||
351 | "1" INT_NUMBER | ||
352 | " " WHITESPACE | ||
353 | "+" PLUS | ||
354 | " " WHITESPACE | ||
355 | "1" LITERAL | ||
356 | "1" INT_NUMBER | ||
357 | "# | ||
358 | .trim() | ||
359 | ); | ||
360 | |||
361 | // To recursively process the tree, there are three approaches: | ||
362 | // 1. explicitly call getter methods on AST nodes. | ||
363 | // 2. use descendants and `AstNode::cast`. | ||
364 | // 3. use descendants and `match_ast!`. | ||
365 | // | ||
366 | // Here's how the first one looks like: | ||
367 | let exprs_cast: Vec<String> = file | ||
368 | .syntax() | ||
369 | .descendants() | ||
370 | .filter_map(ast::Expr::cast) | ||
371 | .map(|expr| expr.syntax().text().to_string()) | ||
372 | .collect(); | ||
373 | |||
374 | // An alternative is to use a macro. | ||
375 | let mut exprs_visit = Vec::new(); | ||
376 | for node in file.syntax().descendants() { | ||
377 | match_ast! { | ||
378 | match node { | ||
379 | ast::Expr(it) => { | ||
380 | let res = it.syntax().text().to_string(); | ||
381 | exprs_visit.push(res); | ||
382 | }, | ||
383 | _ => (), | ||
384 | } | ||
385 | } | ||
386 | } | ||
387 | assert_eq!(exprs_cast, exprs_visit); | ||
388 | } | ||