From 412ac63ff517c7eab5e1cfe0bf239616bd2c13a1 Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Thu, 21 Feb 2019 15:24:42 +0300
Subject: docs

---
 crates/ra_parser/src/event.rs             |  7 ++--
 crates/ra_parser/src/lib.rs               | 46 +++++++++++++++++++-------
 crates/ra_parser/src/token_set.rs         |  1 +
 crates/ra_syntax/src/algo.rs              |  1 +
 crates/ra_syntax/src/ast.rs               |  1 +
 crates/ra_syntax/src/lib.rs               | 27 +++++++--------
 crates/ra_syntax/src/parsing.rs           | 44 ++++---------------------
 crates/ra_syntax/src/parsing/builder.rs   | 55 ++++++++++++++++++++++++-------
 crates/ra_syntax/src/parsing/input.rs     | 40 +++++++++++-----------
 crates/ra_syntax/src/parsing/reparsing.rs | 11 +++++--
 crates/ra_syntax/src/syntax_node.rs       |  8 +++++
 11 files changed, 143 insertions(+), 98 deletions(-)

(limited to 'crates')
diff --git a/crates/ra_parser/src/event.rs b/crates/ra_parser/src/event.rs
index d6e8454d4..6361d5d86 100644
--- a/crates/ra_parser/src/event.rs
+++ b/crates/ra_parser/src/event.rs
@@ -113,12 +113,11 @@ pub(super) fn process(sink: &mut dyn TreeSink, mut events: Vec<Event>) {
                     // append `B`'s forward_parent `C` in the next stage.
                 }
 
-                for (j, kind) in forward_parents.drain(..).rev().enumerate() {
-                    let is_root_node = i == 0 && j == 0;
-                    sink.start_branch(kind, is_root_node);
+                for kind in forward_parents.drain(..).rev() {
+                    sink.start_branch(kind);
                 }
             }
-            Event::Finish => sink.finish_branch(i == events.len() - 1),
+            Event::Finish => sink.finish_branch(),
             Event::Token { kind, n_raw_tokens } => {
                 sink.leaf(kind, n_raw_tokens);
             }
diff --git a/crates/ra_parser/src/lib.rs b/crates/ra_parser/src/lib.rs
index 7931b5189..ddc08e462 100644
--- a/crates/ra_parser/src/lib.rs
+++ b/crates/ra_parser/src/lib.rs
@@ -1,3 +1,17 @@
+//! The Rust parser.
+//!
+//! The parser doesn't know about concrete representation of tokens and syntax
+//! trees. Abstract `TokenSource` and `TreeSink` traits are used instead. As a
+//! consequence, this crates does not contain a lexer.
+//!
+//! The `Parser` struct from the `parser` module is a cursor into the sequence
+//! of tokens. Parsing routines use `Parser` to inspect current state and
+//! advance the parsing.
+//!
+//! The actual parsing happens in the `grammar` module.
+//!
+//! Tests for this crate live in `ra_syntax` crate.
+
 #[macro_use]
 mod token_set;
 mod syntax_kind;
@@ -12,30 +26,34 @@ pub use syntax_kind::SyntaxKind;
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub struct ParseError(pub String);
 
+/// `TokenSource` abstracts the source of the tokens parser operates one.
+///
+/// Hopefully this will allow us to treat text and token trees in the same way!
+pub trait TokenSource {
+    /// What is the current token?
+    fn token_kind(&self, pos: usize) -> SyntaxKind;
+    /// Is the current token joined to the next one (`> >` vs `>>`).
+    fn is_token_joint_to_next(&self, pos: usize) -> bool;
+    /// Is the current token a specified keyword?
+    fn is_keyword(&self, pos: usize, kw: &str) -> bool;
+}
+
 /// `TreeSink` abstracts details of a particular syntax tree implementation.
 pub trait TreeSink {
     /// Adds new leaf to the current branch.
     fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8);
 
     /// Start new branch and make it current.
-    fn start_branch(&mut self, kind: SyntaxKind, root: bool);
+    fn start_branch(&mut self, kind: SyntaxKind);
 
     /// Finish current branch and restore previous
     /// branch as current.
-    fn finish_branch(&mut self, root: bool);
+    fn finish_branch(&mut self);
 
     fn error(&mut self, error: ParseError);
 }
 
-/// `TokenSource` abstracts the source of the tokens parser operates one.
-///
-/// Hopefully this will allow us to treat text and token trees in the same way!
-pub trait TokenSource {
-    fn token_kind(&self, pos: usize) -> SyntaxKind;
-    fn is_token_joint_to_next(&self, pos: usize) -> bool;
-    fn is_keyword(&self, pos: usize, kw: &str) -> bool;
-}
-
+/// Parse given tokens into the given sink as a rust file.
 pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) {
     let mut p = parser::Parser::new(token_source);
     grammar::root(&mut p);
@@ -43,9 +61,11 @@ pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) {
     event::process(tree_sink, events);
 }
 
+/// A parsing function for a specific braced-block.
 pub struct Reparser(fn(&mut parser::Parser));
 
 impl Reparser {
+    /// If the node is a braced block, return the corresponding `Reparser`.
     pub fn for_node(
         node: SyntaxKind,
         first_child: Option<SyntaxKind>,
@@ -54,6 +74,10 @@ impl Reparser {
         grammar::reparser(node, first_child, parent).map(Reparser)
     }
 
+    /// Re-parse given tokens using this `Reparser`.
+    ///
+    /// Tokens must start with `{`, end with `}` and form a valid brace
+    /// sequence.
     pub fn parse(self, token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) {
         let Reparser(r) = self;
         let mut p = parser::Parser::new(token_source);
diff --git a/crates/ra_parser/src/token_set.rs b/crates/ra_parser/src/token_set.rs
index 24152a38a..79121b35f 100644
--- a/crates/ra_parser/src/token_set.rs
+++ b/crates/ra_parser/src/token_set.rs
@@ -1,5 +1,6 @@
 use crate::SyntaxKind;
 
+/// A bit-set of `SyntaxKind`s
 #[derive(Clone, Copy)]
 pub(crate) struct TokenSet(u128);
 
diff --git a/crates/ra_syntax/src/algo.rs b/crates/ra_syntax/src/algo.rs
index 99b0983b0..e8cf0d4b5 100644
--- a/crates/ra_syntax/src/algo.rs
+++ b/crates/ra_syntax/src/algo.rs
@@ -33,6 +33,7 @@ pub fn find_covering_node(root: &SyntaxNode, range: TextRange) -> &SyntaxNode {
     SyntaxNode::from_repr(root.0.covering_node(range))
 }
 
+// Replace with `std::iter::successors` in `1.34.0`
 pub fn generate<T>(seed: Option<T>, step: impl Fn(&T) -> Option<T>) -> impl Iterator<Item = T> {
     ::itertools::unfold(seed, move |slot| {
         slot.take().map(|curr| {
diff --git a/crates/ra_syntax/src/ast.rs b/crates/ra_syntax/src/ast.rs
index 62641c9fe..20e0a6856 100644
--- a/crates/ra_syntax/src/ast.rs
+++ b/crates/ra_syntax/src/ast.rs
@@ -1,3 +1,4 @@
+//! Abstract Syntax Tree, layered on top of untyped `SyntaxNode`s
 mod generated;
 
 use std::marker::PhantomData;
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs
index c788bddec..6982b9815 100644
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@@ -1,20 +1,21 @@
-//! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256].
+//! Syntax Tree library used throughout the rust analyzer.
 //!
-//! The intent is to be an IDE-ready parser, i.e. one that offers
+//! Properties:
+//!   - easy and fast incremental re-parsing
+//!   - graceful handling of errors
+//!   - full-fidelity representation (*any* text can be precisely represented as
+//!     a syntax tree)
 //!
-//! - easy and fast incremental re-parsing,
-//! - graceful handling of errors, and
-//! - maintains all information in the source file.
+//! For more information, see the [RFC]. Current implementation is inspired by
+//! the [Swift] one.
 //!
-//! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md].
+//! The most interesting modules here are `syntax_node` (which defines concrete
+//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
+//! CST). The actual parser live in a separate `ra_parser` crate, thought the
+//! lexer lives in this crate.
 //!
-//!   [rfc#2256]: <https://github.com/rust-lang/rfcs/pull/2256>
-//!   [RFC.md]: <https://github.com/matklad/libsyntax2/blob/master/docs/RFC.md>
-
-#![forbid(missing_debug_implementations, unconditional_recursion, future_incompatible)]
-#![deny(bad_style, missing_docs)]
-#![allow(missing_docs)]
-//#![warn(unreachable_pub)] // rust-lang/rust#47816
+//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
+//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
 
 mod syntax_node;
 mod syntax_text;
diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs
index 0a11600e1..cf573801c 100644
--- a/crates/ra_syntax/src/parsing.rs
+++ b/crates/ra_syntax/src/parsing.rs
@@ -1,17 +1,18 @@
-mod builder;
+//! Lexing, bridging to ra_parser (which does the actual parsing) and
+//! incremental reparsing.
+
 mod lexer;
 mod input;
+mod builder;
 mod reparsing;
 
-use ra_parser::{parse, ParseError};
-
 use crate::{
-    SyntaxKind, SyntaxError,
+    SyntaxError,
+    syntax_node::GreenNode,
     parsing::{
         builder::TreeBuilder,
         input::ParserInput,
     },
-    syntax_node::GreenNode,
 };
 
 pub use self::lexer::{tokenize, Token};
@@ -22,37 +23,6 @@ pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
     let tokens = tokenize(&text);
     let token_source = ParserInput::new(text, &tokens);
     let mut tree_sink = TreeBuilder::new(text, &tokens);
-    parse(&token_source, &mut tree_sink);
+    ra_parser::parse(&token_source, &mut tree_sink);
     tree_sink.finish()
 }
-
-/// `TreeSink` abstracts details of a particular syntax tree implementation.
-trait TreeSink {
-    type Tree;
-
-    /// Adds new leaf to the current branch.
-    fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8);
-
-    /// Start new branch and make it current.
-    fn start_branch(&mut self, kind: SyntaxKind, root: bool);
-
-    /// Finish current branch and restore previous
-    /// branch as current.
-    fn finish_branch(&mut self, root: bool);
-
-    fn error(&mut self, error: ParseError);
-
-    /// Complete tree building. Make sure that
-    /// `start_branch` and `finish_branch` calls
-    /// are paired!
-    fn finish(self) -> Self::Tree;
-}
-
-/// `TokenSource` abstracts the source of the tokens parser operates one.
-///
-/// Hopefully this will allow us to treat text and token trees in the same way!
-trait TokenSource {
-    fn token_kind(&self, pos: usize) -> SyntaxKind;
-    fn is_token_joint_to_next(&self, pos: usize) -> bool;
-    fn is_keyword(&self, pos: usize, kw: &str) -> bool;
-}
diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs
index 0775b0900..cfe3139b8 100644
--- a/crates/ra_syntax/src/parsing/builder.rs
+++ b/crates/ra_syntax/src/parsing/builder.rs
@@ -1,4 +1,7 @@
+use std::mem;
+
 use ra_parser::{TreeSink, ParseError};
+use rowan::GreenNodeBuilder;
 
 use crate::{
     SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange,
@@ -7,19 +10,32 @@ use crate::{
     syntax_node::{GreenNode, RaTypes},
 };
 
-use rowan::GreenNodeBuilder;
-
+/// Bridges the parser with our specific syntax tree representation.
+///
+/// `TreeBuilder` also handles attachment of trivia (whitespace) to nodes.
 pub(crate) struct TreeBuilder<'a> {
     text: &'a str,
     tokens: &'a [Token],
     text_pos: TextUnit,
     token_pos: usize,
+    state: State,
     errors: Vec<SyntaxError>,
     inner: GreenNodeBuilder<RaTypes>,
 }
 
+enum State {
+    PendingStart,
+    Normal,
+    PendingFinish,
+}
+
 impl<'a> TreeSink for TreeBuilder<'a> {
     fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) {
+        match mem::replace(&mut self.state, State::Normal) {
+            State::PendingStart => unreachable!(),
+            State::PendingFinish => self.inner.finish_internal(),
+            State::Normal => (),
+        }
         self.eat_trivias();
         let n_tokens = n_tokens as usize;
         let len = self.tokens[self.token_pos..self.token_pos + n_tokens]
@@ -29,11 +45,18 @@ impl<'a> TreeSink for TreeBuilder<'a> {
         self.do_leaf(kind, len, n_tokens);
     }
 
-    fn start_branch(&mut self, kind: SyntaxKind, root: bool) {
-        if root {
-            self.inner.start_internal(kind);
-            return;
+    fn start_branch(&mut self, kind: SyntaxKind) {
+        match mem::replace(&mut self.state, State::Normal) {
+            State::PendingStart => {
+                self.inner.start_internal(kind);
+                // No need to attach trivias to previous node: there is no
+                // previous node.
+                return;
+            }
+            State::PendingFinish => self.inner.finish_internal(),
+            State::Normal => (),
         }
+
         let n_trivias =
             self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
         let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
@@ -54,11 +77,12 @@ impl<'a> TreeSink for TreeBuilder<'a> {
         self.eat_n_trivias(n_attached_trivias);
     }
 
-    fn finish_branch(&mut self, root: bool) {
-        if root {
-            self.eat_trivias()
+    fn finish_branch(&mut self) {
+        match mem::replace(&mut self.state, State::PendingFinish) {
+            State::PendingStart => unreachable!(),
+            State::PendingFinish => self.inner.finish_internal(),
+            State::Normal => (),
         }
-        self.inner.finish_internal();
     }
 
     fn error(&mut self, error: ParseError) {
@@ -74,12 +98,21 @@ impl<'a> TreeBuilder<'a> {
             tokens,
             text_pos: 0.into(),
             token_pos: 0,
+            state: State::PendingStart,
             errors: Vec::new(),
             inner: GreenNodeBuilder::new(),
         }
     }
 
-    pub(super) fn finish(self) -> (GreenNode, Vec<SyntaxError>) {
+    pub(super) fn finish(mut self) -> (GreenNode, Vec<SyntaxError>) {
+        match mem::replace(&mut self.state, State::Normal) {
+            State::PendingFinish => {
+                self.eat_trivias();
+                self.inner.finish_internal()
+            }
+            State::PendingStart | State::Normal => unreachable!(),
+        }
+
         (self.inner.finish(), self.errors)
     }
 
diff --git a/crates/ra_syntax/src/parsing/input.rs b/crates/ra_syntax/src/parsing/input.rs
index 58be795bc..31c6a3b9b 100644
--- a/crates/ra_syntax/src/parsing/input.rs
+++ b/crates/ra_syntax/src/parsing/input.rs
@@ -5,6 +5,26 @@ use crate::{
     parsing::lexer::Token,
 };
 
+pub(crate) struct ParserInput<'t> {
+    text: &'t str,
+    /// start position of each token(expect whitespace and comment)
+    /// ```non-rust
+    ///  struct Foo;
+    /// ^------^---
+    /// |      |  ^-
+    /// 0      7  10
+    /// ```
+    /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
+    start_offsets: Vec<TextUnit>,
+    /// non-whitespace/comment tokens
+    /// ```non-rust
+    /// struct Foo {}
+    /// ^^^^^^ ^^^ ^^
+    /// ```
+    /// tokens: `[struct, Foo, {, }]`
+    tokens: Vec<Token>,
+}
+
 impl<'t> TokenSource for ParserInput<'t> {
     fn token_kind(&self, pos: usize) -> SyntaxKind {
         if !(pos < self.tokens.len()) {
@@ -28,26 +48,6 @@ impl<'t> TokenSource for ParserInput<'t> {
     }
 }
 
-pub(crate) struct ParserInput<'t> {
-    text: &'t str,
-    /// start position of each token(expect whitespace and comment)
-    /// ```non-rust
-    ///  struct Foo;
-    /// ^------^---
-    /// |      |  ^-
-    /// 0      7  10
-    /// ```
-    /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
-    start_offsets: Vec<TextUnit>,
-    /// non-whitespace/comment tokens
-    /// ```non-rust
-    /// struct Foo {}
-    /// ^^^^^^ ^^^ ^^
-    /// ```
-    /// tokens: `[struct, Foo, {, }]`
-    tokens: Vec<Token>,
-}
-
 impl<'t> ParserInput<'t> {
     /// Generate input from tokens(expect comment and whitespace).
     pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index ffcb512ad..6957c26c0 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -1,11 +1,18 @@
+//! Implementation of incremental re-parsing.
+//!
+//! We use two simple strategies for this:
+//!   - if the edit modifies only a single token (like changing an identifier's
+//!     letter), we replace only this token.
+//!   - otherwise, we search for the nearest `{}` block which contains the edit
+//!     and try to parse only this block.
+
 use ra_text_edit::AtomTextEdit;
 use ra_parser::Reparser;
 
 use crate::{
-    SyntaxKind::*, TextRange, TextUnit,
+    SyntaxKind::*, TextRange, TextUnit, SyntaxError,
     algo,
     syntax_node::{GreenNode, SyntaxNode},
-    syntax_error::SyntaxError,
     parsing::{
         input::ParserInput,
         builder::TreeBuilder,
diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs
index aa627398d..a1bc0b499 100644
--- a/crates/ra_syntax/src/syntax_node.rs
+++ b/crates/ra_syntax/src/syntax_node.rs
@@ -1,3 +1,11 @@
+//! This module defines Concrete Syntax Tree (CST), used by rust-analyzer.
+//!
+//! The CST includes comments and whitespace, provides a single node type,
+//! `SyntaxNode`, and a basic traversal API (parent, children, siblings).
+//!
+//! The *real* implementation is in the (language-agnostic) `rowan` crate, this
+//! modules just wraps its API.
+
 use std::{fmt, borrow::Borrow};
 
 use rowan::{Types, TransparentNewType};
-- 
cgit v1.2.3