From 412ac63ff517c7eab5e1cfe0bf239616bd2c13a1 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 21 Feb 2019 15:24:42 +0300 Subject: docs --- crates/ra_parser/src/event.rs | 7 ++-- crates/ra_parser/src/lib.rs | 46 +++++++++++++++++++------- crates/ra_parser/src/token_set.rs | 1 + crates/ra_syntax/src/algo.rs | 1 + crates/ra_syntax/src/ast.rs | 1 + crates/ra_syntax/src/lib.rs | 27 +++++++-------- crates/ra_syntax/src/parsing.rs | 44 ++++--------------------- crates/ra_syntax/src/parsing/builder.rs | 55 ++++++++++++++++++++++++------- crates/ra_syntax/src/parsing/input.rs | 40 +++++++++++----------- crates/ra_syntax/src/parsing/reparsing.rs | 11 +++++-- crates/ra_syntax/src/syntax_node.rs | 8 +++++ 11 files changed, 143 insertions(+), 98 deletions(-) (limited to 'crates') diff --git a/crates/ra_parser/src/event.rs b/crates/ra_parser/src/event.rs index d6e8454d4..6361d5d86 100644 --- a/crates/ra_parser/src/event.rs +++ b/crates/ra_parser/src/event.rs @@ -113,12 +113,11 @@ pub(super) fn process(sink: &mut dyn TreeSink, mut events: Vec) { // append `B`'s forward_parent `C` in the next stage. } - for (j, kind) in forward_parents.drain(..).rev().enumerate() { - let is_root_node = i == 0 && j == 0; - sink.start_branch(kind, is_root_node); + for kind in forward_parents.drain(..).rev() { + sink.start_branch(kind); } } - Event::Finish => sink.finish_branch(i == events.len() - 1), + Event::Finish => sink.finish_branch(), Event::Token { kind, n_raw_tokens } => { sink.leaf(kind, n_raw_tokens); } diff --git a/crates/ra_parser/src/lib.rs b/crates/ra_parser/src/lib.rs index 7931b5189..ddc08e462 100644 --- a/crates/ra_parser/src/lib.rs +++ b/crates/ra_parser/src/lib.rs @@ -1,3 +1,17 @@ +//! The Rust parser. +//! +//! The parser doesn't know about concrete representation of tokens and syntax +//! trees. Abstract `TokenSource` and `TreeSink` traits are used instead. As a +//! consequence, this crates does not contain a lexer. +//! +//! The `Parser` struct from the `parser` module is a cursor into the sequence +//! of tokens. Parsing routines use `Parser` to inspect current state and +//! advance the parsing. +//! +//! The actual parsing happens in the `grammar` module. +//! +//! Tests for this crate live in `ra_syntax` crate. + #[macro_use] mod token_set; mod syntax_kind; @@ -12,30 +26,34 @@ pub use syntax_kind::SyntaxKind; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ParseError(pub String); +/// `TokenSource` abstracts the source of the tokens parser operates one. +/// +/// Hopefully this will allow us to treat text and token trees in the same way! +pub trait TokenSource { + /// What is the current token? + fn token_kind(&self, pos: usize) -> SyntaxKind; + /// Is the current token joined to the next one (`> >` vs `>>`). + fn is_token_joint_to_next(&self, pos: usize) -> bool; + /// Is the current token a specified keyword? + fn is_keyword(&self, pos: usize, kw: &str) -> bool; +} + /// `TreeSink` abstracts details of a particular syntax tree implementation. pub trait TreeSink { /// Adds new leaf to the current branch. fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8); /// Start new branch and make it current. - fn start_branch(&mut self, kind: SyntaxKind, root: bool); + fn start_branch(&mut self, kind: SyntaxKind); /// Finish current branch and restore previous /// branch as current. - fn finish_branch(&mut self, root: bool); + fn finish_branch(&mut self); fn error(&mut self, error: ParseError); } -/// `TokenSource` abstracts the source of the tokens parser operates one. -/// -/// Hopefully this will allow us to treat text and token trees in the same way! -pub trait TokenSource { - fn token_kind(&self, pos: usize) -> SyntaxKind; - fn is_token_joint_to_next(&self, pos: usize) -> bool; - fn is_keyword(&self, pos: usize, kw: &str) -> bool; -} - +/// Parse given tokens into the given sink as a rust file. pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { let mut p = parser::Parser::new(token_source); grammar::root(&mut p); @@ -43,9 +61,11 @@ pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { event::process(tree_sink, events); } +/// A parsing function for a specific braced-block. pub struct Reparser(fn(&mut parser::Parser)); impl Reparser { + /// If the node is a braced block, return the corresponding `Reparser`. pub fn for_node( node: SyntaxKind, first_child: Option, @@ -54,6 +74,10 @@ impl Reparser { grammar::reparser(node, first_child, parent).map(Reparser) } + /// Re-parse given tokens using this `Reparser`. + /// + /// Tokens must start with `{`, end with `}` and form a valid brace + /// sequence. pub fn parse(self, token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { let Reparser(r) = self; let mut p = parser::Parser::new(token_source); diff --git a/crates/ra_parser/src/token_set.rs b/crates/ra_parser/src/token_set.rs index 24152a38a..79121b35f 100644 --- a/crates/ra_parser/src/token_set.rs +++ b/crates/ra_parser/src/token_set.rs @@ -1,5 +1,6 @@ use crate::SyntaxKind; +/// A bit-set of `SyntaxKind`s #[derive(Clone, Copy)] pub(crate) struct TokenSet(u128); diff --git a/crates/ra_syntax/src/algo.rs b/crates/ra_syntax/src/algo.rs index 99b0983b0..e8cf0d4b5 100644 --- a/crates/ra_syntax/src/algo.rs +++ b/crates/ra_syntax/src/algo.rs @@ -33,6 +33,7 @@ pub fn find_covering_node(root: &SyntaxNode, range: TextRange) -> &SyntaxNode { SyntaxNode::from_repr(root.0.covering_node(range)) } +// Replace with `std::iter::successors` in `1.34.0` pub fn generate(seed: Option, step: impl Fn(&T) -> Option) -> impl Iterator { ::itertools::unfold(seed, move |slot| { slot.take().map(|curr| { diff --git a/crates/ra_syntax/src/ast.rs b/crates/ra_syntax/src/ast.rs index 62641c9fe..20e0a6856 100644 --- a/crates/ra_syntax/src/ast.rs +++ b/crates/ra_syntax/src/ast.rs @@ -1,3 +1,4 @@ +//! Abstract Syntax Tree, layered on top of untyped `SyntaxNode`s mod generated; use std::marker::PhantomData; diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs index c788bddec..6982b9815 100644 --- a/crates/ra_syntax/src/lib.rs +++ b/crates/ra_syntax/src/lib.rs @@ -1,20 +1,21 @@ -//! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256]. +//! Syntax Tree library used throughout the rust analyzer. //! -//! The intent is to be an IDE-ready parser, i.e. one that offers +//! Properties: +//! - easy and fast incremental re-parsing +//! - graceful handling of errors +//! - full-fidelity representation (*any* text can be precisely represented as +//! a syntax tree) //! -//! - easy and fast incremental re-parsing, -//! - graceful handling of errors, and -//! - maintains all information in the source file. +//! For more information, see the [RFC]. Current implementation is inspired by +//! the [Swift] one. //! -//! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md]. +//! The most interesting modules here are `syntax_node` (which defines concrete +//! syntax tree) and `ast` (which defines abstract syntax tree on top of the +//! CST). The actual parser live in a separate `ra_parser` crate, thought the +//! lexer lives in this crate. //! -//! [rfc#2256]: -//! [RFC.md]: - -#![forbid(missing_debug_implementations, unconditional_recursion, future_incompatible)] -#![deny(bad_style, missing_docs)] -#![allow(missing_docs)] -//#![warn(unreachable_pub)] // rust-lang/rust#47816 +//! [RFC]: +//! [Swift]: mod syntax_node; mod syntax_text; diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 0a11600e1..cf573801c 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -1,17 +1,18 @@ -mod builder; +//! Lexing, bridging to ra_parser (which does the actual parsing) and +//! incremental reparsing. + mod lexer; mod input; +mod builder; mod reparsing; -use ra_parser::{parse, ParseError}; - use crate::{ - SyntaxKind, SyntaxError, + SyntaxError, + syntax_node::GreenNode, parsing::{ builder::TreeBuilder, input::ParserInput, }, - syntax_node::GreenNode, }; pub use self::lexer::{tokenize, Token}; @@ -22,37 +23,6 @@ pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec) { let tokens = tokenize(&text); let token_source = ParserInput::new(text, &tokens); let mut tree_sink = TreeBuilder::new(text, &tokens); - parse(&token_source, &mut tree_sink); + ra_parser::parse(&token_source, &mut tree_sink); tree_sink.finish() } - -/// `TreeSink` abstracts details of a particular syntax tree implementation. -trait TreeSink { - type Tree; - - /// Adds new leaf to the current branch. - fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8); - - /// Start new branch and make it current. - fn start_branch(&mut self, kind: SyntaxKind, root: bool); - - /// Finish current branch and restore previous - /// branch as current. - fn finish_branch(&mut self, root: bool); - - fn error(&mut self, error: ParseError); - - /// Complete tree building. Make sure that - /// `start_branch` and `finish_branch` calls - /// are paired! - fn finish(self) -> Self::Tree; -} - -/// `TokenSource` abstracts the source of the tokens parser operates one. -/// -/// Hopefully this will allow us to treat text and token trees in the same way! -trait TokenSource { - fn token_kind(&self, pos: usize) -> SyntaxKind; - fn is_token_joint_to_next(&self, pos: usize) -> bool; - fn is_keyword(&self, pos: usize, kw: &str) -> bool; -} diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index 0775b0900..cfe3139b8 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs @@ -1,4 +1,7 @@ +use std::mem; + use ra_parser::{TreeSink, ParseError}; +use rowan::GreenNodeBuilder; use crate::{ SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange, @@ -7,19 +10,32 @@ use crate::{ syntax_node::{GreenNode, RaTypes}, }; -use rowan::GreenNodeBuilder; - +/// Bridges the parser with our specific syntax tree representation. +/// +/// `TreeBuilder` also handles attachment of trivia (whitespace) to nodes. pub(crate) struct TreeBuilder<'a> { text: &'a str, tokens: &'a [Token], text_pos: TextUnit, token_pos: usize, + state: State, errors: Vec, inner: GreenNodeBuilder, } +enum State { + PendingStart, + Normal, + PendingFinish, +} + impl<'a> TreeSink for TreeBuilder<'a> { fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingStart => unreachable!(), + State::PendingFinish => self.inner.finish_internal(), + State::Normal => (), + } self.eat_trivias(); let n_tokens = n_tokens as usize; let len = self.tokens[self.token_pos..self.token_pos + n_tokens] @@ -29,11 +45,18 @@ impl<'a> TreeSink for TreeBuilder<'a> { self.do_leaf(kind, len, n_tokens); } - fn start_branch(&mut self, kind: SyntaxKind, root: bool) { - if root { - self.inner.start_internal(kind); - return; + fn start_branch(&mut self, kind: SyntaxKind) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingStart => { + self.inner.start_internal(kind); + // No need to attach trivias to previous node: there is no + // previous node. + return; + } + State::PendingFinish => self.inner.finish_internal(), + State::Normal => (), } + let n_trivias = self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; @@ -54,11 +77,12 @@ impl<'a> TreeSink for TreeBuilder<'a> { self.eat_n_trivias(n_attached_trivias); } - fn finish_branch(&mut self, root: bool) { - if root { - self.eat_trivias() + fn finish_branch(&mut self) { + match mem::replace(&mut self.state, State::PendingFinish) { + State::PendingStart => unreachable!(), + State::PendingFinish => self.inner.finish_internal(), + State::Normal => (), } - self.inner.finish_internal(); } fn error(&mut self, error: ParseError) { @@ -74,12 +98,21 @@ impl<'a> TreeBuilder<'a> { tokens, text_pos: 0.into(), token_pos: 0, + state: State::PendingStart, errors: Vec::new(), inner: GreenNodeBuilder::new(), } } - pub(super) fn finish(self) -> (GreenNode, Vec) { + pub(super) fn finish(mut self) -> (GreenNode, Vec) { + match mem::replace(&mut self.state, State::Normal) { + State::PendingFinish => { + self.eat_trivias(); + self.inner.finish_internal() + } + State::PendingStart | State::Normal => unreachable!(), + } + (self.inner.finish(), self.errors) } diff --git a/crates/ra_syntax/src/parsing/input.rs b/crates/ra_syntax/src/parsing/input.rs index 58be795bc..31c6a3b9b 100644 --- a/crates/ra_syntax/src/parsing/input.rs +++ b/crates/ra_syntax/src/parsing/input.rs @@ -5,6 +5,26 @@ use crate::{ parsing::lexer::Token, }; +pub(crate) struct ParserInput<'t> { + text: &'t str, + /// start position of each token(expect whitespace and comment) + /// ```non-rust + /// struct Foo; + /// ^------^--- + /// | | ^- + /// 0 7 10 + /// ``` + /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` + start_offsets: Vec, + /// non-whitespace/comment tokens + /// ```non-rust + /// struct Foo {} + /// ^^^^^^ ^^^ ^^ + /// ``` + /// tokens: `[struct, Foo, {, }]` + tokens: Vec, +} + impl<'t> TokenSource for ParserInput<'t> { fn token_kind(&self, pos: usize) -> SyntaxKind { if !(pos < self.tokens.len()) { @@ -28,26 +48,6 @@ impl<'t> TokenSource for ParserInput<'t> { } } -pub(crate) struct ParserInput<'t> { - text: &'t str, - /// start position of each token(expect whitespace and comment) - /// ```non-rust - /// struct Foo; - /// ^------^--- - /// | | ^- - /// 0 7 10 - /// ``` - /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` - start_offsets: Vec, - /// non-whitespace/comment tokens - /// ```non-rust - /// struct Foo {} - /// ^^^^^^ ^^^ ^^ - /// ``` - /// tokens: `[struct, Foo, {, }]` - tokens: Vec, -} - impl<'t> ParserInput<'t> { /// Generate input from tokens(expect comment and whitespace). pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index ffcb512ad..6957c26c0 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -1,11 +1,18 @@ +//! Implementation of incremental re-parsing. +//! +//! We use two simple strategies for this: +//! - if the edit modifies only a single token (like changing an identifier's +//! letter), we replace only this token. +//! - otherwise, we search for the nearest `{}` block which contains the edit +//! and try to parse only this block. + use ra_text_edit::AtomTextEdit; use ra_parser::Reparser; use crate::{ - SyntaxKind::*, TextRange, TextUnit, + SyntaxKind::*, TextRange, TextUnit, SyntaxError, algo, syntax_node::{GreenNode, SyntaxNode}, - syntax_error::SyntaxError, parsing::{ input::ParserInput, builder::TreeBuilder, diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs index aa627398d..a1bc0b499 100644 --- a/crates/ra_syntax/src/syntax_node.rs +++ b/crates/ra_syntax/src/syntax_node.rs @@ -1,3 +1,11 @@ +//! This module defines Concrete Syntax Tree (CST), used by rust-analyzer. +//! +//! The CST includes comments and whitespace, provides a single node type, +//! `SyntaxNode`, and a basic traversal API (parent, children, siblings). +//! +//! The *real* implementation is in the (language-agnostic) `rowan` crate, this +//! modules just wraps its API. + use std::{fmt, borrow::Borrow}; use rowan::{Types, TransparentNewType}; -- cgit v1.2.3