aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--crates/ra_parser/src/event.rs7
-rw-r--r--crates/ra_parser/src/lib.rs46
-rw-r--r--crates/ra_parser/src/token_set.rs1
-rw-r--r--crates/ra_syntax/src/algo.rs1
-rw-r--r--crates/ra_syntax/src/ast.rs1
-rw-r--r--crates/ra_syntax/src/lib.rs27
-rw-r--r--crates/ra_syntax/src/parsing.rs44
-rw-r--r--crates/ra_syntax/src/parsing/builder.rs55
-rw-r--r--crates/ra_syntax/src/parsing/input.rs40
-rw-r--r--crates/ra_syntax/src/parsing/reparsing.rs11
-rw-r--r--crates/ra_syntax/src/syntax_node.rs8
11 files changed, 143 insertions, 98 deletions
diff --git a/crates/ra_parser/src/event.rs b/crates/ra_parser/src/event.rs
index d6e8454d4..6361d5d86 100644
--- a/crates/ra_parser/src/event.rs
+++ b/crates/ra_parser/src/event.rs
@@ -113,12 +113,11 @@ pub(super) fn process(sink: &mut dyn TreeSink, mut events: Vec<Event>) {
113 // append `B`'s forward_parent `C` in the next stage. 113 // append `B`'s forward_parent `C` in the next stage.
114 } 114 }
115 115
116 for (j, kind) in forward_parents.drain(..).rev().enumerate() { 116 for kind in forward_parents.drain(..).rev() {
117 let is_root_node = i == 0 && j == 0; 117 sink.start_branch(kind);
118 sink.start_branch(kind, is_root_node);
119 } 118 }
120 } 119 }
121 Event::Finish => sink.finish_branch(i == events.len() - 1), 120 Event::Finish => sink.finish_branch(),
122 Event::Token { kind, n_raw_tokens } => { 121 Event::Token { kind, n_raw_tokens } => {
123 sink.leaf(kind, n_raw_tokens); 122 sink.leaf(kind, n_raw_tokens);
124 } 123 }
diff --git a/crates/ra_parser/src/lib.rs b/crates/ra_parser/src/lib.rs
index 7931b5189..ddc08e462 100644
--- a/crates/ra_parser/src/lib.rs
+++ b/crates/ra_parser/src/lib.rs
@@ -1,3 +1,17 @@
1//! The Rust parser.
2//!
3//! The parser doesn't know about concrete representation of tokens and syntax
4//! trees. Abstract `TokenSource` and `TreeSink` traits are used instead. As a
5//! consequence, this crates does not contain a lexer.
6//!
7//! The `Parser` struct from the `parser` module is a cursor into the sequence
8//! of tokens. Parsing routines use `Parser` to inspect current state and
9//! advance the parsing.
10//!
11//! The actual parsing happens in the `grammar` module.
12//!
13//! Tests for this crate live in `ra_syntax` crate.
14
1#[macro_use] 15#[macro_use]
2mod token_set; 16mod token_set;
3mod syntax_kind; 17mod syntax_kind;
@@ -12,30 +26,34 @@ pub use syntax_kind::SyntaxKind;
12#[derive(Debug, Clone, PartialEq, Eq, Hash)] 26#[derive(Debug, Clone, PartialEq, Eq, Hash)]
13pub struct ParseError(pub String); 27pub struct ParseError(pub String);
14 28
29/// `TokenSource` abstracts the source of the tokens parser operates one.
30///
31/// Hopefully this will allow us to treat text and token trees in the same way!
32pub trait TokenSource {
33 /// What is the current token?
34 fn token_kind(&self, pos: usize) -> SyntaxKind;
35 /// Is the current token joined to the next one (`> >` vs `>>`).
36 fn is_token_joint_to_next(&self, pos: usize) -> bool;
37 /// Is the current token a specified keyword?
38 fn is_keyword(&self, pos: usize, kw: &str) -> bool;
39}
40
15/// `TreeSink` abstracts details of a particular syntax tree implementation. 41/// `TreeSink` abstracts details of a particular syntax tree implementation.
16pub trait TreeSink { 42pub trait TreeSink {
17 /// Adds new leaf to the current branch. 43 /// Adds new leaf to the current branch.
18 fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8); 44 fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8);
19 45
20 /// Start new branch and make it current. 46 /// Start new branch and make it current.
21 fn start_branch(&mut self, kind: SyntaxKind, root: bool); 47 fn start_branch(&mut self, kind: SyntaxKind);
22 48
23 /// Finish current branch and restore previous 49 /// Finish current branch and restore previous
24 /// branch as current. 50 /// branch as current.
25 fn finish_branch(&mut self, root: bool); 51 fn finish_branch(&mut self);
26 52
27 fn error(&mut self, error: ParseError); 53 fn error(&mut self, error: ParseError);
28} 54}
29 55
30/// `TokenSource` abstracts the source of the tokens parser operates one. 56/// Parse given tokens into the given sink as a rust file.
31///
32/// Hopefully this will allow us to treat text and token trees in the same way!
33pub trait TokenSource {
34 fn token_kind(&self, pos: usize) -> SyntaxKind;
35 fn is_token_joint_to_next(&self, pos: usize) -> bool;
36 fn is_keyword(&self, pos: usize, kw: &str) -> bool;
37}
38
39pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 57pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) {
40 let mut p = parser::Parser::new(token_source); 58 let mut p = parser::Parser::new(token_source);
41 grammar::root(&mut p); 59 grammar::root(&mut p);
@@ -43,9 +61,11 @@ pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) {
43 event::process(tree_sink, events); 61 event::process(tree_sink, events);
44} 62}
45 63
64/// A parsing function for a specific braced-block.
46pub struct Reparser(fn(&mut parser::Parser)); 65pub struct Reparser(fn(&mut parser::Parser));
47 66
48impl Reparser { 67impl Reparser {
68 /// If the node is a braced block, return the corresponding `Reparser`.
49 pub fn for_node( 69 pub fn for_node(
50 node: SyntaxKind, 70 node: SyntaxKind,
51 first_child: Option<SyntaxKind>, 71 first_child: Option<SyntaxKind>,
@@ -54,6 +74,10 @@ impl Reparser {
54 grammar::reparser(node, first_child, parent).map(Reparser) 74 grammar::reparser(node, first_child, parent).map(Reparser)
55 } 75 }
56 76
77 /// Re-parse given tokens using this `Reparser`.
78 ///
79 /// Tokens must start with `{`, end with `}` and form a valid brace
80 /// sequence.
57 pub fn parse(self, token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 81 pub fn parse(self, token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) {
58 let Reparser(r) = self; 82 let Reparser(r) = self;
59 let mut p = parser::Parser::new(token_source); 83 let mut p = parser::Parser::new(token_source);
diff --git a/crates/ra_parser/src/token_set.rs b/crates/ra_parser/src/token_set.rs
index 24152a38a..79121b35f 100644
--- a/crates/ra_parser/src/token_set.rs
+++ b/crates/ra_parser/src/token_set.rs
@@ -1,5 +1,6 @@
1use crate::SyntaxKind; 1use crate::SyntaxKind;
2 2
3/// A bit-set of `SyntaxKind`s
3#[derive(Clone, Copy)] 4#[derive(Clone, Copy)]
4pub(crate) struct TokenSet(u128); 5pub(crate) struct TokenSet(u128);
5 6
diff --git a/crates/ra_syntax/src/algo.rs b/crates/ra_syntax/src/algo.rs
index 99b0983b0..e8cf0d4b5 100644
--- a/crates/ra_syntax/src/algo.rs
+++ b/crates/ra_syntax/src/algo.rs
@@ -33,6 +33,7 @@ pub fn find_covering_node(root: &SyntaxNode, range: TextRange) -> &SyntaxNode {
33 SyntaxNode::from_repr(root.0.covering_node(range)) 33 SyntaxNode::from_repr(root.0.covering_node(range))
34} 34}
35 35
36// Replace with `std::iter::successors` in `1.34.0`
36pub fn generate<T>(seed: Option<T>, step: impl Fn(&T) -> Option<T>) -> impl Iterator<Item = T> { 37pub fn generate<T>(seed: Option<T>, step: impl Fn(&T) -> Option<T>) -> impl Iterator<Item = T> {
37 ::itertools::unfold(seed, move |slot| { 38 ::itertools::unfold(seed, move |slot| {
38 slot.take().map(|curr| { 39 slot.take().map(|curr| {
diff --git a/crates/ra_syntax/src/ast.rs b/crates/ra_syntax/src/ast.rs
index 62641c9fe..20e0a6856 100644
--- a/crates/ra_syntax/src/ast.rs
+++ b/crates/ra_syntax/src/ast.rs
@@ -1,3 +1,4 @@
1//! Abstract Syntax Tree, layered on top of untyped `SyntaxNode`s
1mod generated; 2mod generated;
2 3
3use std::marker::PhantomData; 4use std::marker::PhantomData;
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs
index c788bddec..6982b9815 100644
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@@ -1,20 +1,21 @@
1//! An experimental implementation of [Rust RFC#2256 libsyntax2.0][rfc#2256]. 1//! Syntax Tree library used throughout the rust analyzer.
2//! 2//!
3//! The intent is to be an IDE-ready parser, i.e. one that offers 3//! Properties:
4//! - easy and fast incremental re-parsing
5//! - graceful handling of errors
6//! - full-fidelity representation (*any* text can be precisely represented as
7//! a syntax tree)
4//! 8//!
5//! - easy and fast incremental re-parsing, 9//! For more information, see the [RFC]. Current implementation is inspired by
6//! - graceful handling of errors, and 10//! the [Swift] one.
7//! - maintains all information in the source file.
8//! 11//!
9//! For more information, see [the RFC][rfc#2265], or [the working draft][RFC.md]. 12//! The most interesting modules here are `syntax_node` (which defines concrete
13//! syntax tree) and `ast` (which defines abstract syntax tree on top of the
14//! CST). The actual parser live in a separate `ra_parser` crate, thought the
15//! lexer lives in this crate.
10//! 16//!
11//! [rfc#2256]: <https://github.com/rust-lang/rfcs/pull/2256> 17//! [RFC]: <https://github.com/rust-lang/rfcs/pull/2256>
12//! [RFC.md]: <https://github.com/matklad/libsyntax2/blob/master/docs/RFC.md> 18//! [Swift]: <https://github.com/apple/swift/blob/13d593df6f359d0cb2fc81cfaac273297c539455/lib/Syntax/README.md>
13
14#![forbid(missing_debug_implementations, unconditional_recursion, future_incompatible)]
15#![deny(bad_style, missing_docs)]
16#![allow(missing_docs)]
17//#![warn(unreachable_pub)] // rust-lang/rust#47816
18 19
19mod syntax_node; 20mod syntax_node;
20mod syntax_text; 21mod syntax_text;
diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs
index 0a11600e1..cf573801c 100644
--- a/crates/ra_syntax/src/parsing.rs
+++ b/crates/ra_syntax/src/parsing.rs
@@ -1,17 +1,18 @@
1mod builder; 1//! Lexing, bridging to ra_parser (which does the actual parsing) and
2//! incremental reparsing.
3
2mod lexer; 4mod lexer;
3mod input; 5mod input;
6mod builder;
4mod reparsing; 7mod reparsing;
5 8
6use ra_parser::{parse, ParseError};
7
8use crate::{ 9use crate::{
9 SyntaxKind, SyntaxError, 10 SyntaxError,
11 syntax_node::GreenNode,
10 parsing::{ 12 parsing::{
11 builder::TreeBuilder, 13 builder::TreeBuilder,
12 input::ParserInput, 14 input::ParserInput,
13 }, 15 },
14 syntax_node::GreenNode,
15}; 16};
16 17
17pub use self::lexer::{tokenize, Token}; 18pub use self::lexer::{tokenize, Token};
@@ -22,37 +23,6 @@ pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
22 let tokens = tokenize(&text); 23 let tokens = tokenize(&text);
23 let token_source = ParserInput::new(text, &tokens); 24 let token_source = ParserInput::new(text, &tokens);
24 let mut tree_sink = TreeBuilder::new(text, &tokens); 25 let mut tree_sink = TreeBuilder::new(text, &tokens);
25 parse(&token_source, &mut tree_sink); 26 ra_parser::parse(&token_source, &mut tree_sink);
26 tree_sink.finish() 27 tree_sink.finish()
27} 28}
28
29/// `TreeSink` abstracts details of a particular syntax tree implementation.
30trait TreeSink {
31 type Tree;
32
33 /// Adds new leaf to the current branch.
34 fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8);
35
36 /// Start new branch and make it current.
37 fn start_branch(&mut self, kind: SyntaxKind, root: bool);
38
39 /// Finish current branch and restore previous
40 /// branch as current.
41 fn finish_branch(&mut self, root: bool);
42
43 fn error(&mut self, error: ParseError);
44
45 /// Complete tree building. Make sure that
46 /// `start_branch` and `finish_branch` calls
47 /// are paired!
48 fn finish(self) -> Self::Tree;
49}
50
51/// `TokenSource` abstracts the source of the tokens parser operates one.
52///
53/// Hopefully this will allow us to treat text and token trees in the same way!
54trait TokenSource {
55 fn token_kind(&self, pos: usize) -> SyntaxKind;
56 fn is_token_joint_to_next(&self, pos: usize) -> bool;
57 fn is_keyword(&self, pos: usize, kw: &str) -> bool;
58}
diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs
index 0775b0900..cfe3139b8 100644
--- a/crates/ra_syntax/src/parsing/builder.rs
+++ b/crates/ra_syntax/src/parsing/builder.rs
@@ -1,4 +1,7 @@
1use std::mem;
2
1use ra_parser::{TreeSink, ParseError}; 3use ra_parser::{TreeSink, ParseError};
4use rowan::GreenNodeBuilder;
2 5
3use crate::{ 6use crate::{
4 SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange, 7 SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange,
@@ -7,19 +10,32 @@ use crate::{
7 syntax_node::{GreenNode, RaTypes}, 10 syntax_node::{GreenNode, RaTypes},
8}; 11};
9 12
10use rowan::GreenNodeBuilder; 13/// Bridges the parser with our specific syntax tree representation.
11 14///
15/// `TreeBuilder` also handles attachment of trivia (whitespace) to nodes.
12pub(crate) struct TreeBuilder<'a> { 16pub(crate) struct TreeBuilder<'a> {
13 text: &'a str, 17 text: &'a str,
14 tokens: &'a [Token], 18 tokens: &'a [Token],
15 text_pos: TextUnit, 19 text_pos: TextUnit,
16 token_pos: usize, 20 token_pos: usize,
21 state: State,
17 errors: Vec<SyntaxError>, 22 errors: Vec<SyntaxError>,
18 inner: GreenNodeBuilder<RaTypes>, 23 inner: GreenNodeBuilder<RaTypes>,
19} 24}
20 25
26enum State {
27 PendingStart,
28 Normal,
29 PendingFinish,
30}
31
21impl<'a> TreeSink for TreeBuilder<'a> { 32impl<'a> TreeSink for TreeBuilder<'a> {
22 fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) { 33 fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) {
34 match mem::replace(&mut self.state, State::Normal) {
35 State::PendingStart => unreachable!(),
36 State::PendingFinish => self.inner.finish_internal(),
37 State::Normal => (),
38 }
23 self.eat_trivias(); 39 self.eat_trivias();
24 let n_tokens = n_tokens as usize; 40 let n_tokens = n_tokens as usize;
25 let len = self.tokens[self.token_pos..self.token_pos + n_tokens] 41 let len = self.tokens[self.token_pos..self.token_pos + n_tokens]
@@ -29,11 +45,18 @@ impl<'a> TreeSink for TreeBuilder<'a> {
29 self.do_leaf(kind, len, n_tokens); 45 self.do_leaf(kind, len, n_tokens);
30 } 46 }
31 47
32 fn start_branch(&mut self, kind: SyntaxKind, root: bool) { 48 fn start_branch(&mut self, kind: SyntaxKind) {
33 if root { 49 match mem::replace(&mut self.state, State::Normal) {
34 self.inner.start_internal(kind); 50 State::PendingStart => {
35 return; 51 self.inner.start_internal(kind);
52 // No need to attach trivias to previous node: there is no
53 // previous node.
54 return;
55 }
56 State::PendingFinish => self.inner.finish_internal(),
57 State::Normal => (),
36 } 58 }
59
37 let n_trivias = 60 let n_trivias =
38 self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); 61 self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
39 let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; 62 let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
@@ -54,11 +77,12 @@ impl<'a> TreeSink for TreeBuilder<'a> {
54 self.eat_n_trivias(n_attached_trivias); 77 self.eat_n_trivias(n_attached_trivias);
55 } 78 }
56 79
57 fn finish_branch(&mut self, root: bool) { 80 fn finish_branch(&mut self) {
58 if root { 81 match mem::replace(&mut self.state, State::PendingFinish) {
59 self.eat_trivias() 82 State::PendingStart => unreachable!(),
83 State::PendingFinish => self.inner.finish_internal(),
84 State::Normal => (),
60 } 85 }
61 self.inner.finish_internal();
62 } 86 }
63 87
64 fn error(&mut self, error: ParseError) { 88 fn error(&mut self, error: ParseError) {
@@ -74,12 +98,21 @@ impl<'a> TreeBuilder<'a> {
74 tokens, 98 tokens,
75 text_pos: 0.into(), 99 text_pos: 0.into(),
76 token_pos: 0, 100 token_pos: 0,
101 state: State::PendingStart,
77 errors: Vec::new(), 102 errors: Vec::new(),
78 inner: GreenNodeBuilder::new(), 103 inner: GreenNodeBuilder::new(),
79 } 104 }
80 } 105 }
81 106
82 pub(super) fn finish(self) -> (GreenNode, Vec<SyntaxError>) { 107 pub(super) fn finish(mut self) -> (GreenNode, Vec<SyntaxError>) {
108 match mem::replace(&mut self.state, State::Normal) {
109 State::PendingFinish => {
110 self.eat_trivias();
111 self.inner.finish_internal()
112 }
113 State::PendingStart | State::Normal => unreachable!(),
114 }
115
83 (self.inner.finish(), self.errors) 116 (self.inner.finish(), self.errors)
84 } 117 }
85 118
diff --git a/crates/ra_syntax/src/parsing/input.rs b/crates/ra_syntax/src/parsing/input.rs
index 58be795bc..31c6a3b9b 100644
--- a/crates/ra_syntax/src/parsing/input.rs
+++ b/crates/ra_syntax/src/parsing/input.rs
@@ -5,6 +5,26 @@ use crate::{
5 parsing::lexer::Token, 5 parsing::lexer::Token,
6}; 6};
7 7
8pub(crate) struct ParserInput<'t> {
9 text: &'t str,
10 /// start position of each token(expect whitespace and comment)
11 /// ```non-rust
12 /// struct Foo;
13 /// ^------^---
14 /// | | ^-
15 /// 0 7 10
16 /// ```
17 /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
18 start_offsets: Vec<TextUnit>,
19 /// non-whitespace/comment tokens
20 /// ```non-rust
21 /// struct Foo {}
22 /// ^^^^^^ ^^^ ^^
23 /// ```
24 /// tokens: `[struct, Foo, {, }]`
25 tokens: Vec<Token>,
26}
27
8impl<'t> TokenSource for ParserInput<'t> { 28impl<'t> TokenSource for ParserInput<'t> {
9 fn token_kind(&self, pos: usize) -> SyntaxKind { 29 fn token_kind(&self, pos: usize) -> SyntaxKind {
10 if !(pos < self.tokens.len()) { 30 if !(pos < self.tokens.len()) {
@@ -28,26 +48,6 @@ impl<'t> TokenSource for ParserInput<'t> {
28 } 48 }
29} 49}
30 50
31pub(crate) struct ParserInput<'t> {
32 text: &'t str,
33 /// start position of each token(expect whitespace and comment)
34 /// ```non-rust
35 /// struct Foo;
36 /// ^------^---
37 /// | | ^-
38 /// 0 7 10
39 /// ```
40 /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
41 start_offsets: Vec<TextUnit>,
42 /// non-whitespace/comment tokens
43 /// ```non-rust
44 /// struct Foo {}
45 /// ^^^^^^ ^^^ ^^
46 /// ```
47 /// tokens: `[struct, Foo, {, }]`
48 tokens: Vec<Token>,
49}
50
51impl<'t> ParserInput<'t> { 51impl<'t> ParserInput<'t> {
52 /// Generate input from tokens(expect comment and whitespace). 52 /// Generate input from tokens(expect comment and whitespace).
53 pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { 53 pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index ffcb512ad..6957c26c0 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -1,11 +1,18 @@
1//! Implementation of incremental re-parsing.
2//!
3//! We use two simple strategies for this:
4//! - if the edit modifies only a single token (like changing an identifier's
5//! letter), we replace only this token.
6//! - otherwise, we search for the nearest `{}` block which contains the edit
7//! and try to parse only this block.
8
1use ra_text_edit::AtomTextEdit; 9use ra_text_edit::AtomTextEdit;
2use ra_parser::Reparser; 10use ra_parser::Reparser;
3 11
4use crate::{ 12use crate::{
5 SyntaxKind::*, TextRange, TextUnit, 13 SyntaxKind::*, TextRange, TextUnit, SyntaxError,
6 algo, 14 algo,
7 syntax_node::{GreenNode, SyntaxNode}, 15 syntax_node::{GreenNode, SyntaxNode},
8 syntax_error::SyntaxError,
9 parsing::{ 16 parsing::{
10 input::ParserInput, 17 input::ParserInput,
11 builder::TreeBuilder, 18 builder::TreeBuilder,
diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs
index aa627398d..a1bc0b499 100644
--- a/crates/ra_syntax/src/syntax_node.rs
+++ b/crates/ra_syntax/src/syntax_node.rs
@@ -1,3 +1,11 @@
1//! This module defines Concrete Syntax Tree (CST), used by rust-analyzer.
2//!
3//! The CST includes comments and whitespace, provides a single node type,
4//! `SyntaxNode`, and a basic traversal API (parent, children, siblings).
5//!
6//! The *real* implementation is in the (language-agnostic) `rowan` crate, this
7//! modules just wraps its API.
8
1use std::{fmt, borrow::Borrow}; 9use std::{fmt, borrow::Borrow};
2 10
3use rowan::{Types, TransparentNewType}; 11use rowan::{Types, TransparentNewType};