From a1c187eef3ba08076aedb5154929f7eda8d1b424 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 12 Aug 2020 18:26:51 +0200 Subject: Rename ra_syntax -> syntax --- crates/syntax/src/lib.rs | 388 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 crates/syntax/src/lib.rs (limited to 'crates/syntax/src/lib.rs') diff --git a/crates/syntax/src/lib.rs b/crates/syntax/src/lib.rs new file mode 100644 index 000000000..7f8da66af --- /dev/null +++ b/crates/syntax/src/lib.rs @@ -0,0 +1,388 @@ +//! Syntax Tree library used throughout the rust analyzer. +//! +//! Properties: +//! - easy and fast incremental re-parsing +//! - graceful handling of errors +//! - full-fidelity representation (*any* text can be precisely represented as +//! a syntax tree) +//! +//! For more information, see the [RFC]. Current implementation is inspired by +//! the [Swift] one. +//! +//! The most interesting modules here are `syntax_node` (which defines concrete +//! syntax tree) and `ast` (which defines abstract syntax tree on top of the +//! CST). The actual parser live in a separate `parser` crate, though the +//! lexer lives in this crate. +//! +//! See `api_walkthrough` test in this file for a quick API tour! +//! +//! [RFC]: +//! [Swift]: + +#[allow(unused)] +macro_rules! eprintln { + ($($tt:tt)*) => { stdx::eprintln!($($tt)*) }; +} + +mod syntax_node; +mod syntax_error; +mod parsing; +mod validation; +mod ptr; +#[cfg(test)] +mod tests; + +pub mod algo; +pub mod ast; +#[doc(hidden)] +pub mod fuzz; + +use std::{marker::PhantomData, sync::Arc}; + +use stdx::format_to; +use text_edit::Indel; + +pub use crate::{ + algo::InsertPosition, + ast::{AstNode, AstToken}, + parsing::{lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token}, + ptr::{AstPtr, SyntaxNodePtr}, + syntax_error::SyntaxError, + syntax_node::{ + Direction, GreenNode, NodeOrToken, SyntaxElement, SyntaxElementChildren, SyntaxNode, + SyntaxNodeChildren, SyntaxToken, SyntaxTreeBuilder, + }, +}; +pub use parser::{SyntaxKind, T}; +pub use rowan::{SmolStr, SyntaxText, TextRange, TextSize, TokenAtOffset, WalkEvent}; + +/// `Parse` is the result of the parsing: a syntax tree and a collection of +/// errors. +/// +/// Note that we always produce a syntax tree, even for completely invalid +/// files. +#[derive(Debug, PartialEq, Eq)] +pub struct Parse { + green: GreenNode, + errors: Arc>, + _ty: PhantomData T>, +} + +impl Clone for Parse { + fn clone(&self) -> Parse { + Parse { green: self.green.clone(), errors: self.errors.clone(), _ty: PhantomData } + } +} + +impl Parse { + fn new(green: GreenNode, errors: Vec) -> Parse { + Parse { green, errors: Arc::new(errors), _ty: PhantomData } + } + + pub fn syntax_node(&self) -> SyntaxNode { + SyntaxNode::new_root(self.green.clone()) + } +} + +impl Parse { + pub fn to_syntax(self) -> Parse { + Parse { green: self.green, errors: self.errors, _ty: PhantomData } + } + + pub fn tree(&self) -> T { + T::cast(self.syntax_node()).unwrap() + } + + pub fn errors(&self) -> &[SyntaxError] { + &*self.errors + } + + pub fn ok(self) -> Result>> { + if self.errors.is_empty() { + Ok(self.tree()) + } else { + Err(self.errors) + } + } +} + +impl Parse { + pub fn cast(self) -> Option> { + if N::cast(self.syntax_node()).is_some() { + Some(Parse { green: self.green, errors: self.errors, _ty: PhantomData }) + } else { + None + } + } +} + +impl Parse { + pub fn debug_dump(&self) -> String { + let mut buf = format!("{:#?}", self.tree().syntax()); + for err in self.errors.iter() { + format_to!(buf, "error {:?}: {}\n", err.range(), err); + } + buf + } + + pub fn reparse(&self, indel: &Indel) -> Parse { + self.incremental_reparse(indel).unwrap_or_else(|| self.full_reparse(indel)) + } + + fn incremental_reparse(&self, indel: &Indel) -> Option> { + // FIXME: validation errors are not handled here + parsing::incremental_reparse(self.tree().syntax(), indel, self.errors.to_vec()).map( + |(green_node, errors, _reparsed_range)| Parse { + green: green_node, + errors: Arc::new(errors), + _ty: PhantomData, + }, + ) + } + + fn full_reparse(&self, indel: &Indel) -> Parse { + let mut text = self.tree().syntax().text().to_string(); + indel.apply(&mut text); + SourceFile::parse(&text) + } +} + +/// `SourceFile` represents a parse tree for a single Rust file. +pub use crate::ast::SourceFile; + +impl SourceFile { + pub fn parse(text: &str) -> Parse { + let (green, mut errors) = parsing::parse_text(text); + let root = SyntaxNode::new_root(green.clone()); + + if cfg!(debug_assertions) { + validation::validate_block_structure(&root); + } + + errors.extend(validation::validate(&root)); + + assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE); + Parse { green, errors: Arc::new(errors), _ty: PhantomData } + } +} + +impl ast::Path { + /// Returns `text`, parsed as a path, but only if it has no errors. + pub fn parse(text: &str) -> Result { + parsing::parse_text_fragment(text, parser::FragmentKind::Path) + } +} + +impl ast::Pat { + /// Returns `text`, parsed as a pattern, but only if it has no errors. + pub fn parse(text: &str) -> Result { + parsing::parse_text_fragment(text, parser::FragmentKind::Pattern) + } +} + +impl ast::Expr { + /// Returns `text`, parsed as an expression, but only if it has no errors. + pub fn parse(text: &str) -> Result { + parsing::parse_text_fragment(text, parser::FragmentKind::Expr) + } +} + +impl ast::Item { + /// Returns `text`, parsed as an item, but only if it has no errors. + pub fn parse(text: &str) -> Result { + parsing::parse_text_fragment(text, parser::FragmentKind::Item) + } +} + +impl ast::Type { + /// Returns `text`, parsed as an type reference, but only if it has no errors. + pub fn parse(text: &str) -> Result { + parsing::parse_text_fragment(text, parser::FragmentKind::Type) + } +} + +/// Matches a `SyntaxNode` against an `ast` type. +/// +/// # Example: +/// +/// ```ignore +/// match_ast! { +/// match node { +/// ast::CallExpr(it) => { ... }, +/// ast::MethodCallExpr(it) => { ... }, +/// ast::MacroCall(it) => { ... }, +/// _ => None, +/// } +/// } +/// ``` +#[macro_export] +macro_rules! match_ast { + (match $node:ident { $($tt:tt)* }) => { match_ast!(match ($node) { $($tt)* }) }; + + (match ($node:expr) { + $( ast::$ast:ident($it:ident) => $res:expr, )* + _ => $catch_all:expr $(,)? + }) => {{ + $( if let Some($it) = ast::$ast::cast($node.clone()) { $res } else )* + { $catch_all } + }}; +} + +/// This test does not assert anything and instead just shows off the crate's +/// API. +#[test] +fn api_walkthrough() { + use ast::{ModuleItemOwner, NameOwner}; + + let source_code = " + fn foo() { + 1 + 1 + } + "; + // `SourceFile` is the main entry point. + // + // The `parse` method returns a `Parse` -- a pair of syntax tree and a list + // of errors. That is, syntax tree is constructed even in presence of errors. + let parse = SourceFile::parse(source_code); + assert!(parse.errors().is_empty()); + + // The `tree` method returns an owned syntax node of type `SourceFile`. + // Owned nodes are cheap: inside, they are `Rc` handles to the underling data. + let file: SourceFile = parse.tree(); + + // `SourceFile` is the root of the syntax tree. We can iterate file's items. + // Let's fetch the `foo` function. + let mut func = None; + for item in file.items() { + match item { + ast::Item::Fn(f) => func = Some(f), + _ => unreachable!(), + } + } + let func: ast::Fn = func.unwrap(); + + // Each AST node has a bunch of getters for children. All getters return + // `Option`s though, to account for incomplete code. Some getters are common + // for several kinds of node. In this case, a trait like `ast::NameOwner` + // usually exists. By convention, all ast types should be used with `ast::` + // qualifier. + let name: Option = func.name(); + let name = name.unwrap(); + assert_eq!(name.text(), "foo"); + + // Let's get the `1 + 1` expression! + let body: ast::BlockExpr = func.body().unwrap(); + let expr: ast::Expr = body.expr().unwrap(); + + // Enums are used to group related ast nodes together, and can be used for + // matching. However, because there are no public fields, it's possible to + // match only the top level enum: that is the price we pay for increased API + // flexibility + let bin_expr: &ast::BinExpr = match &expr { + ast::Expr::BinExpr(e) => e, + _ => unreachable!(), + }; + + // Besides the "typed" AST API, there's an untyped CST one as well. + // To switch from AST to CST, call `.syntax()` method: + let expr_syntax: &SyntaxNode = expr.syntax(); + + // Note how `expr` and `bin_expr` are in fact the same node underneath: + assert!(expr_syntax == bin_expr.syntax()); + + // To go from CST to AST, `AstNode::cast` function is used: + let _expr: ast::Expr = match ast::Expr::cast(expr_syntax.clone()) { + Some(e) => e, + None => unreachable!(), + }; + + // The two properties each syntax node has is a `SyntaxKind`: + assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR); + + // And text range: + assert_eq!(expr_syntax.text_range(), TextRange::new(32.into(), 37.into())); + + // You can get node's text as a `SyntaxText` object, which will traverse the + // tree collecting token's text: + let text: SyntaxText = expr_syntax.text(); + assert_eq!(text.to_string(), "1 + 1"); + + // There's a bunch of traversal methods on `SyntaxNode`: + assert_eq!(expr_syntax.parent().as_ref(), Some(body.syntax())); + assert_eq!(body.syntax().first_child_or_token().map(|it| it.kind()), Some(T!['{'])); + assert_eq!( + expr_syntax.next_sibling_or_token().map(|it| it.kind()), + Some(SyntaxKind::WHITESPACE) + ); + + // As well as some iterator helpers: + let f = expr_syntax.ancestors().find_map(ast::Fn::cast); + assert_eq!(f, Some(func)); + assert!(expr_syntax.siblings_with_tokens(Direction::Next).any(|it| it.kind() == T!['}'])); + assert_eq!( + expr_syntax.descendants_with_tokens().count(), + 8, // 5 tokens `1`, ` `, `+`, ` `, `!` + // 2 child literal expressions: `1`, `1` + // 1 the node itself: `1 + 1` + ); + + // There's also a `preorder` method with a more fine-grained iteration control: + let mut buf = String::new(); + let mut indent = 0; + for event in expr_syntax.preorder_with_tokens() { + match event { + WalkEvent::Enter(node) => { + let text = match &node { + NodeOrToken::Node(it) => it.text().to_string(), + NodeOrToken::Token(it) => it.text().to_string(), + }; + format_to!(buf, "{:indent$}{:?} {:?}\n", " ", text, node.kind(), indent = indent); + indent += 2; + } + WalkEvent::Leave(_) => indent -= 2, + } + } + assert_eq!(indent, 0); + assert_eq!( + buf.trim(), + r#" +"1 + 1" BIN_EXPR + "1" LITERAL + "1" INT_NUMBER + " " WHITESPACE + "+" PLUS + " " WHITESPACE + "1" LITERAL + "1" INT_NUMBER +"# + .trim() + ); + + // To recursively process the tree, there are three approaches: + // 1. explicitly call getter methods on AST nodes. + // 2. use descendants and `AstNode::cast`. + // 3. use descendants and `match_ast!`. + // + // Here's how the first one looks like: + let exprs_cast: Vec = file + .syntax() + .descendants() + .filter_map(ast::Expr::cast) + .map(|expr| expr.syntax().text().to_string()) + .collect(); + + // An alternative is to use a macro. + let mut exprs_visit = Vec::new(); + for node in file.syntax().descendants() { + match_ast! { + match node { + ast::Expr(it) => { + let res = it.syntax().text().to_string(); + exprs_visit.push(res); + }, + _ => (), + } + } + } + assert_eq!(exprs_cast, exprs_visit); +} -- cgit v1.2.3