1 files changed, 196 insertions, 18 deletions
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs
index b12282b39..e7d402446 100644
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@@ -1,22 +1,24 @@
-//! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256].
+//! Syntax Tree library used throughout the rust analyzer.
 //!
-//! The intent is to be an IDE-ready parser, i.e. one that offers
+//! Properties:
+//!   - easy and fast incremental re-parsing
+//!   - graceful handling of errors
+//!   - full-fidelity representation (*any* text can be precisely represented as
+//!     a syntax tree)
 //!
-//! - easy and fast incremental re-parsing,
+//! For more information, see the [RFC]. Current implementation is inspired by
-//! - graceful handling of errors, and
+//! the [Swift] one.
-//! - maintains all information in the source file.
 //!
-//! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md].
+//! The most interesting modules here are `syntax_node` (which defines concrete
+//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
+//! CST). The actual parser live in a separate `ra_parser` crate, thought the
+//! lexer lives in this crate.
 //!
-//!   [rfc#2256]: <https://github.com/rust-lang/rfcs/pull/2256>
+//! See `api_walkthrough` test in this file for a quick API tour!
-//!   [RFC.md]: <https://github.com/matklad/libsyntax2/blob/master/docs/RFC.md>
+//!
+//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
-#![forbid(missing_debug_implementations, unconditional_recursion, future_incompatible)]
+//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
-#![deny(bad_style, missing_docs)]
-#![allow(missing_docs)]
-//#![warn(unreachable_pub)] // rust-lang/rust#47816
-mod syntax_kinds;
 mod syntax_node;
 mod syntax_text;
 mod syntax_error;
@@ -27,13 +29,11 @@ mod ptr;
 pub mod algo;
 pub mod ast;
-/// Utilities for simple uses of the parser.
-pub mod utils;
 pub use rowan::{SmolStr, TextRange, TextUnit};
+pub use ra_parser::SyntaxKind;
 pub use crate::{
    ast::AstNode,
-    syntax_kinds::SyntaxKind,
    syntax_error::{SyntaxError, SyntaxErrorKind, Location},
    syntax_text::SyntaxText,
    syntax_node::{Direction,  SyntaxNode, WalkEvent, TreeArc},
@@ -51,7 +51,7 @@ impl SourceFile {
    fn new(green: GreenNode, errors: Vec<SyntaxError>) -> TreeArc<SourceFile> {
        let root = SyntaxNode::new(green, errors);
        if cfg!(debug_assertions) {
-            utils::validate_block_structure(&root);
+            validation::validate_block_structure(&root);
        }
        assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
        TreeArc::cast(root)
@@ -82,3 +82,181 @@ impl SourceFile {
        errors
    }
 }
+pub fn check_fuzz_invariants(text: &str) {
+    let file = SourceFile::parse(text);
+    let root = file.syntax();
+    validation::validate_block_structure(root);
+    let _ = file.errors();
+}
+/// This test does not assert anything and instead just shows off the crate's
+/// API.
+#[test]
+fn api_walkthrough() {
+    use ast::{ModuleItemOwner, NameOwner};
+    let source_code = "
+        fn foo() {
+            1 + 1
+        }
+    ";
+    // `SourceFile` is the main entry point.
+    //
+    // Note how `parse` does not return a `Result`: even completely invalid
+    // source code might be parsed.
+    let file = SourceFile::parse(source_code);
+    // Due to the way ownership is set up, owned syntax Nodes always live behind
+    // a `TreeArc` smart pointer. `TreeArc` is roughly an `std::sync::Arc` which
+    // points to the whole file instead of an individual node.
+    let file: TreeArc<SourceFile> = file;
+    // `SourceFile` is the root of the syntax tree. We can iterate file's items:
+    let mut func = None;
+    for item in file.items() {
+        match item.kind() {
+            ast::ModuleItemKind::FnDef(f) => func = Some(f),
+            _ => unreachable!(),
+        }
+    }
+    // The returned items are always references.
+    let func: &ast::FnDef = func.unwrap();
+    // All nodes implement `ToOwned` trait, with `Owned = TreeArc<Self>`.
+    // `to_owned` is a cheap operation: atomic increment.
+    let _owned_func: TreeArc<ast::FnDef> = func.to_owned();
+    // Each AST node has a bunch of getters for children. All getters return
+    // `Option`s though, to account for incomplete code. Some getters are common
+    // for several kinds of node. In this case, a trait like `ast::NameOwner`
+    // usually exists. By convention, all ast types should be used with `ast::`
+    // qualifier.
+    let name: Option<&ast::Name> = func.name();
+    let name = name.unwrap();
+    assert_eq!(name.text(), "foo");
+    // Let's get the `1 + 1` expression!
+    let block: &ast::Block = func.body().unwrap();
+    let expr: &ast::Expr = block.expr().unwrap();
+    // "Enum"-like nodes are represented using the "kind" pattern. It allows us
+    // to match exhaustively against all flavors of nodes, while maintaining
+    // internal representation flexibility. The drawback is that one can't write
+    // nested matches as one pattern.
+    let bin_expr: &ast::BinExpr = match expr.kind() {
+        ast::ExprKind::BinExpr(e) => e,
+        _ => unreachable!(),
+    };
+    // Besides the "typed" AST API, there's an untyped CST one as well.
+    // To switch from AST to CST, call `.syntax()` method:
+    let expr_syntax: &SyntaxNode = expr.syntax();
+    // Note how `expr` and `bin_expr` are in fact the same node underneath:
+    assert!(std::ptr::eq(expr_syntax, bin_expr.syntax()));
+    // To go from CST to AST, `AstNode::cast` function is used:
+    let expr = match ast::Expr::cast(expr_syntax) {
+        Some(e) => e,
+        None => unreachable!(),
+    };
+    // Note how expr is also a reference!
+    let expr: &ast::Expr = expr;
+    // This is possible because the underlying representation is the same:
+    assert_eq!(
+        expr as *const ast::Expr as *const u8,
+        expr_syntax as *const SyntaxNode as *const u8
+    );
+    // The two properties each syntax node has is a `SyntaxKind`:
+    assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
+    // And text range:
+    assert_eq!(expr_syntax.range(), TextRange::from_to(32.into(), 37.into()));
+    // You can get node's text as a `SyntaxText` object, which will traverse the
+    // tree collecting token's text:
+    let text: SyntaxText<'_> = expr_syntax.text();
+    assert_eq!(text.to_string(), "1 + 1");
+    // There's a bunch of traversal methods on `SyntaxNode`:
+    assert_eq!(expr_syntax.parent(), Some(block.syntax()));
+    assert_eq!(block.syntax().first_child().map(|it| it.kind()), Some(SyntaxKind::L_CURLY));
+    assert_eq!(expr_syntax.next_sibling().map(|it| it.kind()), Some(SyntaxKind::WHITESPACE));
+    // As well as some iterator helpers:
+    let f = expr_syntax.ancestors().find_map(ast::FnDef::cast);
+    assert_eq!(f, Some(&*func));
+    assert!(expr_syntax.siblings(Direction::Next).any(|it| it.kind() == SyntaxKind::R_CURLY));
+    assert_eq!(
+        expr_syntax.descendants().count(),
+        8, // 5 tokens `1`, ` `, `+`, ` `, `!`
+           // 2 child literal expressions: `1`, `1`
+           // 1 the node itself: `1 + 1`
+    );
+    // There's also a `preorder` method with a more fine-grained iteration control:
+    let mut buf = String::new();
+    let mut indent = 0;
+    for event in expr_syntax.preorder() {
+        match event {
+            WalkEvent::Enter(node) => {
+                buf += &format!(
+                    "{:indent$}{:?} {:?}\n",
+                    " ",
+                    node.text(),
+                    node.kind(),
+                    indent = indent
+                );
+                indent += 2;
+            }
+            WalkEvent::Leave(_) => indent -= 2,
+        }
+    }
+    assert_eq!(indent, 0);
+    assert_eq!(
+        buf.trim(),
+        r#"
+"1 + 1" BIN_EXPR
+  "1" LITERAL
+    "1" INT_NUMBER
+  " " WHITESPACE
+  "+" PLUS
+  " " WHITESPACE
+  "1" LITERAL
+    "1" INT_NUMBER
+"#
+        .trim()
+    );
+    // To recursively process the tree, there are three approaches:
+    // 1. explicitly call getter methods on AST nodes.
+    // 2. use descendants and `AstNode::cast`.
+    // 3. use descendants and the visitor.
+    //
+    // Here's how the first one looks like:
+    let exprs_cast: Vec<String> = file
+        .syntax()
+        .descendants()
+        .filter_map(ast::Expr::cast)
+        .map(|expr| expr.syntax().text().to_string())
+        .collect();
+    // An alternative is to use a visitor. The visitor does not do traversal
+    // automatically (so it's more akin to a generic lambda) and is constructed
+    // from closures. This seems more flexible than a single generated visitor
+    // trait.
+    use algo::visit::{visitor, Visitor};
+    let mut exprs_visit = Vec::new();
+    for node in file.syntax().descendants() {
+        if let Some(result) =
+            visitor().visit::<ast::Expr, _>(|expr| expr.syntax().text().to_string()).accept(node)
+        {
+            exprs_visit.push(result);
+        }
+    }
+    assert_eq!(exprs_cast, exprs_visit);
+}

diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs index b12282b39..e7d402446 100644 --- a/crates/ra_syntax/src/lib.rs +++ b/crates/ra_syntax/src/lib.rs
@@ -1,22 +1,24 @@
1	//! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256].	1	//! Syntax Tree library used throughout the rust analyzer.
2	//!	2	//!
3	//! The intent is to be an IDE-ready parser, i.e. one that offers	3	//! Properties:
		4	//! - easy and fast incremental re-parsing
		5	//! - graceful handling of errors
		6	//! - full-fidelity representation (any text can be precisely represented as
		7	//! a syntax tree)
4	//!	8	//!
5	//! - easy and fast incremental re-parsing,	9	//! For more information, see the [RFC]. Current implementation is inspired by
6	//! - graceful handling of errors, and	10	//! the [Swift] one.
7	//! - maintains all information in the source file.
8	//!	11	//!
9	//! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md].	12	//! The most interesting modules here are `syntax_node` (which defines concrete
		13	//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
		14	//! CST). The actual parser live in a separate `ra_parser` crate, thought the
		15	//! lexer lives in this crate.
10	//!	16	//!
11	//! [rfc#2256]: <https://github.com/rust-lang/rfcs/pull/2256>	17	//! See `api_walkthrough` test in this file for a quick API tour!
12	//! [RFC.md]: <https://github.com/matklad/libsyntax2/blob/master/docs/RFC.md>	18	//!
13		19	//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
14	#![forbid(missing_debug_implementations, unconditional_recursion, future_incompatible)]	20	//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
15	#![deny(bad_style, missing_docs)]
16	#![allow(missing_docs)]
17	//#![warn(unreachable_pub)] // rust-lang/rust#47816
18		21
19	mod syntax_kinds;
20	mod syntax_node;	22	mod syntax_node;
21	mod syntax_text;	23	mod syntax_text;
22	mod syntax_error;	24	mod syntax_error;
@@ -27,13 +29,11 @@ mod ptr;
27		29
28	pub mod algo;	30	pub mod algo;
29	pub mod ast;	31	pub mod ast;
30	/// Utilities for simple uses of the parser.
31	pub mod utils;
32		32
33	pub use rowan::{SmolStr, TextRange, TextUnit};	33	pub use rowan::{SmolStr, TextRange, TextUnit};
		34	pub use ra_parser::SyntaxKind;
34	pub use crate::{	35	pub use crate::{
35	ast::AstNode,	36	ast::AstNode,
36	syntax_kinds::SyntaxKind,
37	syntax_error::{SyntaxError, SyntaxErrorKind, Location},	37	syntax_error::{SyntaxError, SyntaxErrorKind, Location},
38	syntax_text::SyntaxText,	38	syntax_text::SyntaxText,
39	syntax_node::{Direction, SyntaxNode, WalkEvent, TreeArc},	39	syntax_node::{Direction, SyntaxNode, WalkEvent, TreeArc},
@@ -51,7 +51,7 @@ impl SourceFile {
51	fn new(green: GreenNode, errors: Vec<SyntaxError>) -> TreeArc<SourceFile> {	51	fn new(green: GreenNode, errors: Vec<SyntaxError>) -> TreeArc<SourceFile> {
52	let root = SyntaxNode::new(green, errors);	52	let root = SyntaxNode::new(green, errors);
53	if cfg!(debug_assertions) {	53	if cfg!(debug_assertions) {
54	utils::validate_block_structure(&root);	54	validation::validate_block_structure(&root);
55	}	55	}
56	assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);	56	assert_eq!(root.kind(), SyntaxKind::SOURCE_FILE);
57	TreeArc::cast(root)	57	TreeArc::cast(root)
@@ -82,3 +82,181 @@ impl SourceFile {
82	errors	82	errors
83	}	83	}
84	}	84	}
		85
		86	pub fn check_fuzz_invariants(text: &str) {
		87	let file = SourceFile::parse(text);
		88	let root = file.syntax();
		89	validation::validate_block_structure(root);
		90	let _ = file.errors();
		91	}
		92
		93	/// This test does not assert anything and instead just shows off the crate's
		94	/// API.
		95	#[test]
		96	fn api_walkthrough() {
		97	use ast::{ModuleItemOwner, NameOwner};
		98
		99	let source_code = "
		100	fn foo() {
		101	1 + 1
		102	}
		103	";
		104	// `SourceFile` is the main entry point.
		105	//
		106	// Note how `parse` does not return a `Result`: even completely invalid
		107	// source code might be parsed.
		108	let file = SourceFile::parse(source_code);
		109
		110	// Due to the way ownership is set up, owned syntax Nodes always live behind
		111	// a `TreeArc` smart pointer. `TreeArc` is roughly an `std::sync::Arc` which
		112	// points to the whole file instead of an individual node.
		113	let file: TreeArc<SourceFile> = file;
		114
		115	// `SourceFile` is the root of the syntax tree. We can iterate file's items:
		116	let mut func = None;
		117	for item in file.items() {
		118	match item.kind() {
		119	ast::ModuleItemKind::FnDef(f) => func = Some(f),
		120	_ => unreachable!(),
		121	}
		122	}
		123	// The returned items are always references.
		124	let func: &ast::FnDef = func.unwrap();
		125
		126	// All nodes implement `ToOwned` trait, with `Owned = TreeArc<Self>`.
		127	// `to_owned` is a cheap operation: atomic increment.
		128	let _owned_func: TreeArc<ast::FnDef> = func.to_owned();
		129
		130	// Each AST node has a bunch of getters for children. All getters return
		131	// `Option`s though, to account for incomplete code. Some getters are common
		132	// for several kinds of node. In this case, a trait like `ast::NameOwner`
		133	// usually exists. By convention, all ast types should be used with `ast::`
		134	// qualifier.
		135	let name: Option<&ast::Name> = func.name();
		136	let name = name.unwrap();
		137	assert_eq!(name.text(), "foo");
		138
		139	// Let's get the `1 + 1` expression!
		140	let block: &ast::Block = func.body().unwrap();
		141	let expr: &ast::Expr = block.expr().unwrap();
		142
		143	// "Enum"-like nodes are represented using the "kind" pattern. It allows us
		144	// to match exhaustively against all flavors of nodes, while maintaining
		145	// internal representation flexibility. The drawback is that one can't write
		146	// nested matches as one pattern.
		147	let bin_expr: &ast::BinExpr = match expr.kind() {
		148	ast::ExprKind::BinExpr(e) => e,
		149	_ => unreachable!(),
		150	};
		151
		152	// Besides the "typed" AST API, there's an untyped CST one as well.
		153	// To switch from AST to CST, call `.syntax()` method:
		154	let expr_syntax: &SyntaxNode = expr.syntax();
		155
		156	// Note how `expr` and `bin_expr` are in fact the same node underneath:
		157	assert!(std::ptr::eq(expr_syntax, bin_expr.syntax()));
		158
		159	// To go from CST to AST, `AstNode::cast` function is used:
		160	let expr = match ast::Expr::cast(expr_syntax) {
		161	Some(e) => e,
		162	None => unreachable!(),
		163	};
		164
		165	// Note how expr is also a reference!
		166	let expr: &ast::Expr = expr;
		167
		168	// This is possible because the underlying representation is the same:
		169	assert_eq!(
		170	expr as const ast::Expr as const u8,
		171	expr_syntax as const SyntaxNode as const u8
		172	);
		173
		174	// The two properties each syntax node has is a `SyntaxKind`:
		175	assert_eq!(expr_syntax.kind(), SyntaxKind::BIN_EXPR);
		176
		177	// And text range:
		178	assert_eq!(expr_syntax.range(), TextRange::from_to(32.into(), 37.into()));
		179
		180	// You can get node's text as a `SyntaxText` object, which will traverse the
		181	// tree collecting token's text:
		182	let text: SyntaxText<'_> = expr_syntax.text();
		183	assert_eq!(text.to_string(), "1 + 1");
		184
		185	// There's a bunch of traversal methods on `SyntaxNode`:
		186	assert_eq!(expr_syntax.parent(), Some(block.syntax()));
		187	assert_eq!(block.syntax().first_child().map(\|it\| it.kind()), Some(SyntaxKind::L_CURLY));
		188	assert_eq!(expr_syntax.next_sibling().map(\|it\| it.kind()), Some(SyntaxKind::WHITESPACE));
		189
		190	// As well as some iterator helpers:
		191	let f = expr_syntax.ancestors().find_map(ast::FnDef::cast);
		192	assert_eq!(f, Some(&*func));
		193	assert!(expr_syntax.siblings(Direction::Next).any(\|it\| it.kind() == SyntaxKind::R_CURLY));
		194	assert_eq!(
		195	expr_syntax.descendants().count(),
		196	8, // 5 tokens `1`, ` `, `+`, ` `, `!`
		197	// 2 child literal expressions: `1`, `1`
		198	// 1 the node itself: `1 + 1`
		199	);
		200
		201	// There's also a `preorder` method with a more fine-grained iteration control:
		202	let mut buf = String::new();
		203	let mut indent = 0;
		204	for event in expr_syntax.preorder() {
		205	match event {
		206	WalkEvent::Enter(node) => {
		207	buf += &format!(
		208	"{:indent$}{:?} {:?}\n",
		209	" ",
		210	node.text(),
		211	node.kind(),
		212	indent = indent
		213	);
		214	indent += 2;
		215	}
		216	WalkEvent::Leave(_) => indent -= 2,
		217	}
		218	}
		219	assert_eq!(indent, 0);
		220	assert_eq!(
		221	buf.trim(),
		222	r#"
		223	"1 + 1" BIN_EXPR
		224	"1" LITERAL
		225	"1" INT_NUMBER
		226	" " WHITESPACE
		227	"+" PLUS
		228	" " WHITESPACE
		229	"1" LITERAL
		230	"1" INT_NUMBER
		231	"#
		232	.trim()
		233	);
		234
		235	// To recursively process the tree, there are three approaches:
		236	// 1. explicitly call getter methods on AST nodes.
		237	// 2. use descendants and `AstNode::cast`.
		238	// 3. use descendants and the visitor.
		239	//
		240	// Here's how the first one looks like:
		241	let exprs_cast: Vec<String> = file
		242	.syntax()
		243	.descendants()
		244	.filter_map(ast::Expr::cast)
		245	.map(\|expr\| expr.syntax().text().to_string())
		246	.collect();
		247
		248	// An alternative is to use a visitor. The visitor does not do traversal
		249	// automatically (so it's more akin to a generic lambda) and is constructed
		250	// from closures. This seems more flexible than a single generated visitor
		251	// trait.
		252	use algo::visit::{visitor, Visitor};
		253	let mut exprs_visit = Vec::new();
		254	for node in file.syntax().descendants() {
		255	if let Some(result) =
		256	visitor().visit::<ast::Expr, _>(\|expr\| expr.syntax().text().to_string()).accept(node)
		257	{
		258	exprs_visit.push(result);
		259	}
		260	}
		261	assert_eq!(exprs_cast, exprs_visit);
		262	}