aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/lib.rs
blob: 06d3ea7275bba6778ff408b86ba49d71462b6a5d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
//! Syntax Tree library used throughout the rust analyzer.
//!
//! Properties:
//!   - easy and fast incremental re-parsing
//!   - graceful handling of errors
//!   - full-fidelity representation (*any* text can be precisely represented as
//!     a syntax tree)
//!
//! For more information, see the [RFC]. Current implementation is inspired by
//! the [Swift] one.
//!
//! The most interesting modules here are `syntax_node` (which defines concrete
//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
//! CST). The actual parser live in a separate `ra_parser` crate, though the
//! lexer lives in this crate.
//!
//! See `api_walkthrough` test in this file for a quick API tour!
//!
//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>

mod syntax_node;
mod syntax_text;
mod syntax_error;
mod parsing;
mod validation;
mod ptr;

pub mod algo;
pub mod ast;
#[doc(hidden)]
pub mod fuzz;

use std::{fmt::Write, sync::Arc};

use ra_text_edit::AtomTextEdit;

use crate::syntax_node::GreenNode;

pub use crate::{
    ast::AstNode,
    parsing::{classify_literal, tokenize, Token},
    ptr::{AstPtr, SyntaxNodePtr},
    syntax_error::{Location, SyntaxError, SyntaxErrorKind},
    syntax_node::{
        Direction, InsertPosition, SyntaxElement, SyntaxNode, SyntaxToken, SyntaxTreeBuilder,
        TreeArc, WalkEvent,
    },
    syntax_text::SyntaxText,
};
pub use ra_parser::SyntaxKind;
pub use ra_parser::T;
pub use rowan::{SmolStr, TextRange, TextUnit};

/// `Parse` is the result of the parsing: a syntax tree and a collection of
/// errors.
///
/// Note that we always produce a syntax tree, even for completely invalid
/// files.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Parse {
    tree: TreeArc<SourceFile>,
    errors: Arc<Vec<SyntaxError>>,
}

impl Parse {
    pub fn tree(&self) -> &SourceFile {
        &*self.tree
    }

    pub fn errors(&self) -> &[SyntaxError] {
        &*self.errors
    }

    pub fn ok(self) -> Result<TreeArc<SourceFile>, Arc<Vec<SyntaxError>>> {
        if self.errors.is_empty() {
            Ok(self.tree)
        } else {
            Err(self.errors)
        }
    }

    pub fn reparse(&self, edit: &AtomTextEdit) -> Parse {
        self.incremental_reparse(edit).unwrap_or_else(|| self.full_reparse(edit))
    }

    pub fn debug_dump(&self) -> String {
        let mut buf = self.tree.syntax().debug_dump();
        for err in self.errors.iter() {
            writeln!(buf, "error {:?}: {}", err.location(), err.kind()).unwrap();
        }
        buf
    }

    fn incremental_reparse(&self, edit: &AtomTextEdit) -> Option<Parse> {
        // FIXME: validation errors are not handled here
        parsing::incremental_reparse(self.tree.syntax(), edit, self.errors.to_vec()).map(
            |(green_node, errors, _reparsed_range)| Parse {
                tree: SourceFile::new(green_node),
                errors: Arc::new(errors),
            },
        )
    }

    fn full_reparse(&self, edit: &AtomTextEdit) -> Parse {
        let text = edit.apply(self.tree.syntax().text().to_string());
        SourceFile::parse(&text)
    }
}

/// `SourceFile` represents a parse tree for a single Rust file.
pub use crate::ast::SourceFile;

impl SourceFile {
    fn new(green: GreenNode) -> TreeArc<SourceFile> {
        let root = SyntaxNode::new(green);
        if cfg!(debug_assertions) {
            validation::validate_block_structure(&root);
        }
        assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
        TreeArc::cast(root)
    }

    pub fn parse(text: &str) -> Parse {
        let (green, mut errors) = parsing::parse_text(text);
        let tree = SourceFile::new(green);
        errors.extend(validation::validate(&tree));
        Parse { tree, errors: Arc::new(errors) }
    }
}

/// This test does not assert anything and instead just shows off the crate's
/// API.
#[test]
fn api_walkthrough() {
    use ast::{ModuleItemOwner, NameOwner};

    let source_code = "
        fn foo() {
            1 + 1
        }
    ";
    // `SourceFile` is the main entry point.
    //
    // The `parse` method returns a `Parse` -- a pair of syntax tree and a list
    // of errors. That is, syntax tree is constructed even in presence of errors.
    let parse = SourceFile::parse(source_code);
    assert!(parse.errors.is_empty());

    // Due to the way ownership is set up, owned syntax Nodes always live behind
    // a `TreeArc` smart pointer. `TreeArc` is roughly an `std::sync::Arc` which
    // points to the whole file instead of an individual node.
    let file: TreeArc<SourceFile> = parse.tree;

    // `SourceFile` is the root of the syntax tree. We can iterate file's items:
    let mut func = None;
    for item in file.items() {
        match item.kind() {
            ast::ModuleItemKind::FnDef(f) => func = Some(f),
            _ => unreachable!(),
        }
    }
    // The returned items are always references.
    let func: &ast::FnDef = func.unwrap();

    // All nodes implement `ToOwned` trait, with `Owned = TreeArc<Self>`.
    // `to_owned` is a cheap operation: atomic increment.
    let _owned_func: TreeArc<ast::FnDef> = func.to_owned();

    // Each AST node has a bunch of getters for children. All getters return
    // `Option`s though, to account for incomplete code. Some getters are common
    // for several kinds of node. In this case, a trait like `ast::NameOwner`
    // usually exists. By convention, all ast types should be used with `ast::`
    // qualifier.
    let name: Option<&ast::Name> = func.name();
    let name = name.unwrap();
    assert_eq!(name.text(), "foo");

    // Let's get the `1 + 1` expression!
    let block: &ast::Block = func.body().unwrap();
    let expr: &ast::Expr = block.expr().unwrap();

    // "Enum"-like nodes are represented using the "kind" pattern. It allows us
    // to match exhaustively against all flavors of nodes, while maintaining
    // internal representation flexibility. The drawback is that one can't write
    // nested matches as one pattern.
    let bin_expr: &ast::BinExpr = match expr.kind() {
        ast::ExprKind::BinExpr(e) => e,
        _ => unreachable!(),
    };

    // Besides the "typed" AST API, there's an untyped CST one as well.
    // To switch from AST to CST, call `.syntax()` method:
    let expr_syntax: &SyntaxNode = expr.syntax();

    // Note how `expr` and `bin_expr` are in fact the same node underneath:
    assert!(std::ptr::eq(expr_syntax, bin_expr.syntax()));

    // To go from CST to AST, `AstNode::cast` function is used:
    let expr = match ast::Expr::cast(expr_syntax) {
        Some(e) => e,
        None => unreachable!(),
    };

    // Note how expr is also a reference!
    let expr: &ast::Expr = expr;

    // This is possible because the underlying representation is the same:
    assert_eq!(
        expr as *const ast::Expr as *const u8,
        expr_syntax as *const SyntaxNode as *const u8
    );

    // The two properties each syntax node has is a `SyntaxKind`:
    assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);

    // And text range:
    assert_eq!(expr_syntax.range(), TextRange::from_to(32.into(), 37.into()));

    // You can get node's text as a `SyntaxText` object, which will traverse the
    // tree collecting token's text:
    let text: SyntaxText<'_> = expr_syntax.text();
    assert_eq!(text.to_string(), "1 + 1");

    // There's a bunch of traversal methods on `SyntaxNode`:
    assert_eq!(expr_syntax.parent(), Some(block.syntax()));
    assert_eq!(block.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{']));
    assert_eq!(
        expr_syntax.next_sibling_or_token().map(|it| it.kind()),
        Some(SyntaxKind::WHITESPACE)
    );

    // As well as some iterator helpers:
    let f = expr_syntax.ancestors().find_map(ast::FnDef::cast);
    assert_eq!(f, Some(&*func));
    assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}']));
    assert_eq!(
        expr_syntax.descendants_with_tokens().count(),
        8, // 5 tokens `1`, ` `, `+`, ` `, `!`
           // 2 child literal expressions: `1`, `1`
           // 1 the node itself: `1 + 1`
    );

    // There's also a `preorder` method with a more fine-grained iteration control:
    let mut buf = String::new();
    let mut indent = 0;
    for event in expr_syntax.preorder_with_tokens() {
        match event {
            WalkEvent::Enter(node) => {
                let text = match node {
                    SyntaxElement::Node(it) => it.text().to_string(),
                    SyntaxElement::Token(it) => it.text().to_string(),
                };
                buf += &format!("{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent);
                indent += 2;
            }
            WalkEvent::Leave(_) => indent -= 2,
        }
    }
    assert_eq!(indent, 0);
    assert_eq!(
        buf.trim(),
        r#"
"1 + 1" BIN_EXPR
  "1" LITERAL
    "1" INT_NUMBER
  " " WHITESPACE
  "+" PLUS
  " " WHITESPACE
  "1" LITERAL
    "1" INT_NUMBER
"#
        .trim()
    );

    // To recursively process the tree, there are three approaches:
    // 1. explicitly call getter methods on AST nodes.
    // 2. use descendants and `AstNode::cast`.
    // 3. use descendants and the visitor.
    //
    // Here's how the first one looks like:
    let exprs_cast: Vec<String> = file
        .syntax()
        .descendants()
        .filter_map(ast::Expr::cast)
        .map(|expr| expr.syntax().text().to_string())
        .collect();

    // An alternative is to use a visitor. The visitor does not do traversal
    // automatically (so it's more akin to a generic lambda) and is constructed
    // from closures. This seems more flexible than a single generated visitor
    // trait.
    use algo::visit::{visitor, Visitor};
    let mut exprs_visit = Vec::new();
    for node in file.syntax().descendants() {
        if let Some(result) =
            visitor().visit::<ast::Expr, _>(|expr| expr.syntax().text().to_string()).accept(node)
        {
            exprs_visit.push(result);
        }
    }
    assert_eq!(exprs_cast, exprs_visit);
}