diff options
author | Aleksey Kladov <[email protected]> | 2019-02-21 09:03:42 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2019-02-21 09:03:42 +0000 |
commit | 79ce0fa8d753ea00b5a98c7835be2f7dec7704f9 (patch) | |
tree | a979b22ec18e0c0f464f93a99e61c2f9387fd4e9 | |
parent | c84561bb624280b84eb2fe6c6b2a6b9fe3f1dbf7 (diff) |
move whitespace handling to tree builder
-rw-r--r-- | crates/ra_syntax/src/parsing.rs | 22 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/builder.rs | 127 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/event.rs | 193 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/reparsing.rs | 5 |
4 files changed, 159 insertions, 188 deletions
diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 138d1394a..7e1b32035 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs | |||
@@ -9,11 +9,11 @@ mod grammar; | |||
9 | mod reparsing; | 9 | mod reparsing; |
10 | 10 | ||
11 | use crate::{ | 11 | use crate::{ |
12 | SyntaxKind, SmolStr, SyntaxError, | 12 | SyntaxKind, SyntaxError, |
13 | parsing::{ | 13 | parsing::{ |
14 | builder::GreenBuilder, | 14 | builder::TreeBuilder, |
15 | input::ParserInput, | 15 | input::ParserInput, |
16 | event::EventProcessor, | 16 | event::process, |
17 | parser::Parser, | 17 | parser::Parser, |
18 | }, | 18 | }, |
19 | syntax_node::GreenNode, | 19 | syntax_node::GreenNode, |
@@ -28,22 +28,24 @@ pub(crate) use self::reparsing::incremental_reparse; | |||
28 | 28 | ||
29 | pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { | 29 | pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { |
30 | let tokens = tokenize(&text); | 30 | let tokens = tokenize(&text); |
31 | parse_with(GreenBuilder::default(), text, &tokens, grammar::root) | 31 | let tree_sink = TreeBuilder::new(text, &tokens); |
32 | parse_with(tree_sink, text, &tokens, grammar::root) | ||
32 | } | 33 | } |
33 | 34 | ||
34 | fn parse_with<S: TreeSink>( | 35 | fn parse_with<S: TreeSink>( |
35 | tree_sink: S, | 36 | mut tree_sink: S, |
36 | text: &str, | 37 | text: &str, |
37 | tokens: &[Token], | 38 | tokens: &[Token], |
38 | f: fn(&mut Parser), | 39 | f: fn(&mut Parser), |
39 | ) -> S::Tree { | 40 | ) -> S::Tree { |
40 | let mut events = { | 41 | let events = { |
41 | let input = ParserInput::new(text, &tokens); | 42 | let input = ParserInput::new(text, &tokens); |
42 | let mut p = Parser::new(&input); | 43 | let mut p = Parser::new(&input); |
43 | f(&mut p); | 44 | f(&mut p); |
44 | p.finish() | 45 | p.finish() |
45 | }; | 46 | }; |
46 | EventProcessor::new(tree_sink, text, tokens, &mut events).process().finish() | 47 | process(&mut tree_sink, events); |
48 | tree_sink.finish() | ||
47 | } | 49 | } |
48 | 50 | ||
49 | /// `TreeSink` abstracts details of a particular syntax tree implementation. | 51 | /// `TreeSink` abstracts details of a particular syntax tree implementation. |
@@ -51,14 +53,14 @@ trait TreeSink { | |||
51 | type Tree; | 53 | type Tree; |
52 | 54 | ||
53 | /// Adds new leaf to the current branch. | 55 | /// Adds new leaf to the current branch. |
54 | fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); | 56 | fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8); |
55 | 57 | ||
56 | /// Start new branch and make it current. | 58 | /// Start new branch and make it current. |
57 | fn start_branch(&mut self, kind: SyntaxKind); | 59 | fn start_branch(&mut self, kind: SyntaxKind, root: bool); |
58 | 60 | ||
59 | /// Finish current branch and restore previous | 61 | /// Finish current branch and restore previous |
60 | /// branch as current. | 62 | /// branch as current. |
61 | fn finish_branch(&mut self); | 63 | fn finish_branch(&mut self, root: bool); |
62 | 64 | ||
63 | fn error(&mut self, error: ParseError); | 65 | fn error(&mut self, error: ParseError); |
64 | 66 | ||
diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs index ee0e2cce7..1041c6a7b 100644 --- a/crates/ra_syntax/src/parsing/builder.rs +++ b/crates/ra_syntax/src/parsing/builder.rs | |||
@@ -1,40 +1,63 @@ | |||
1 | use crate::{ | 1 | use crate::{ |
2 | SmolStr, SyntaxKind, SyntaxError, SyntaxErrorKind, TextUnit, | 2 | SmolStr, SyntaxError, SyntaxErrorKind, TextUnit, TextRange, |
3 | parsing::{TreeSink, ParseError}, | 3 | SyntaxKind::{self, *}, |
4 | parsing::{TreeSink, ParseError, Token}, | ||
4 | syntax_node::{GreenNode, RaTypes}, | 5 | syntax_node::{GreenNode, RaTypes}, |
5 | }; | 6 | }; |
6 | 7 | ||
7 | use rowan::GreenNodeBuilder; | 8 | use rowan::GreenNodeBuilder; |
8 | 9 | ||
9 | pub(crate) struct GreenBuilder { | 10 | pub(crate) struct TreeBuilder<'a> { |
11 | text: &'a str, | ||
12 | tokens: &'a [Token], | ||
10 | text_pos: TextUnit, | 13 | text_pos: TextUnit, |
14 | token_pos: usize, | ||
11 | errors: Vec<SyntaxError>, | 15 | errors: Vec<SyntaxError>, |
12 | inner: GreenNodeBuilder<RaTypes>, | 16 | inner: GreenNodeBuilder<RaTypes>, |
13 | } | 17 | } |
14 | 18 | ||
15 | impl Default for GreenBuilder { | 19 | impl<'a> TreeSink for TreeBuilder<'a> { |
16 | fn default() -> GreenBuilder { | ||
17 | GreenBuilder { | ||
18 | text_pos: TextUnit::default(), | ||
19 | errors: Vec::new(), | ||
20 | inner: GreenNodeBuilder::new(), | ||
21 | } | ||
22 | } | ||
23 | } | ||
24 | |||
25 | impl TreeSink for GreenBuilder { | ||
26 | type Tree = (GreenNode, Vec<SyntaxError>); | 20 | type Tree = (GreenNode, Vec<SyntaxError>); |
27 | 21 | ||
28 | fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { | 22 | fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) { |
29 | self.text_pos += TextUnit::of_str(text.as_str()); | 23 | self.eat_trivias(); |
30 | self.inner.leaf(kind, text); | 24 | let n_tokens = n_tokens as usize; |
25 | let len = self.tokens[self.token_pos..self.token_pos + n_tokens] | ||
26 | .iter() | ||
27 | .map(|it| it.len) | ||
28 | .sum::<TextUnit>(); | ||
29 | self.do_leaf(kind, len, n_tokens); | ||
31 | } | 30 | } |
32 | 31 | ||
33 | fn start_branch(&mut self, kind: SyntaxKind) { | 32 | fn start_branch(&mut self, kind: SyntaxKind, root: bool) { |
34 | self.inner.start_internal(kind) | 33 | if root { |
34 | self.inner.start_internal(kind); | ||
35 | return; | ||
36 | } | ||
37 | let n_trivias = | ||
38 | self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); | ||
39 | let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; | ||
40 | let mut trivia_end = | ||
41 | self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>(); | ||
42 | |||
43 | let n_attached_trivias = { | ||
44 | let leading_trivias = leading_trivias.iter().rev().map(|it| { | ||
45 | let next_end = trivia_end - it.len; | ||
46 | let range = TextRange::from_to(next_end, trivia_end); | ||
47 | trivia_end = next_end; | ||
48 | (it.kind, &self.text[range]) | ||
49 | }); | ||
50 | n_attached_trivias(kind, leading_trivias) | ||
51 | }; | ||
52 | self.eat_n_trivias(n_trivias - n_attached_trivias); | ||
53 | self.inner.start_internal(kind); | ||
54 | self.eat_n_trivias(n_attached_trivias); | ||
35 | } | 55 | } |
36 | 56 | ||
37 | fn finish_branch(&mut self) { | 57 | fn finish_branch(&mut self, root: bool) { |
58 | if root { | ||
59 | self.eat_trivias() | ||
60 | } | ||
38 | self.inner.finish_internal(); | 61 | self.inner.finish_internal(); |
39 | } | 62 | } |
40 | 63 | ||
@@ -47,3 +70,67 @@ impl TreeSink for GreenBuilder { | |||
47 | (self.inner.finish(), self.errors) | 70 | (self.inner.finish(), self.errors) |
48 | } | 71 | } |
49 | } | 72 | } |
73 | |||
74 | impl<'a> TreeBuilder<'a> { | ||
75 | pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> TreeBuilder<'a> { | ||
76 | TreeBuilder { | ||
77 | text, | ||
78 | tokens, | ||
79 | text_pos: 0.into(), | ||
80 | token_pos: 0, | ||
81 | errors: Vec::new(), | ||
82 | inner: GreenNodeBuilder::new(), | ||
83 | } | ||
84 | } | ||
85 | fn eat_trivias(&mut self) { | ||
86 | while let Some(&token) = self.tokens.get(self.token_pos) { | ||
87 | if !token.kind.is_trivia() { | ||
88 | break; | ||
89 | } | ||
90 | self.do_leaf(token.kind, token.len, 1); | ||
91 | } | ||
92 | } | ||
93 | |||
94 | fn eat_n_trivias(&mut self, n: usize) { | ||
95 | for _ in 0..n { | ||
96 | let token = self.tokens[self.token_pos]; | ||
97 | assert!(token.kind.is_trivia()); | ||
98 | self.do_leaf(token.kind, token.len, 1); | ||
99 | } | ||
100 | } | ||
101 | |||
102 | fn do_leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { | ||
103 | let range = TextRange::offset_len(self.text_pos, len); | ||
104 | let text: SmolStr = self.text[range].into(); | ||
105 | self.text_pos += len; | ||
106 | self.token_pos += n_tokens; | ||
107 | self.inner.leaf(kind, text); | ||
108 | } | ||
109 | } | ||
110 | |||
111 | fn n_attached_trivias<'a>( | ||
112 | kind: SyntaxKind, | ||
113 | trivias: impl Iterator<Item = (SyntaxKind, &'a str)>, | ||
114 | ) -> usize { | ||
115 | match kind { | ||
116 | CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF | ||
117 | | MODULE | NAMED_FIELD_DEF => { | ||
118 | let mut res = 0; | ||
119 | for (i, (kind, text)) in trivias.enumerate() { | ||
120 | match kind { | ||
121 | WHITESPACE => { | ||
122 | if text.contains("\n\n") { | ||
123 | break; | ||
124 | } | ||
125 | } | ||
126 | COMMENT => { | ||
127 | res = i + 1; | ||
128 | } | ||
129 | _ => (), | ||
130 | } | ||
131 | } | ||
132 | res | ||
133 | } | ||
134 | _ => 0, | ||
135 | } | ||
136 | } | ||
diff --git a/crates/ra_syntax/src/parsing/event.rs b/crates/ra_syntax/src/parsing/event.rs index f6f020eab..d6cbdffe0 100644 --- a/crates/ra_syntax/src/parsing/event.rs +++ b/crates/ra_syntax/src/parsing/event.rs | |||
@@ -10,13 +10,8 @@ | |||
10 | use std::mem; | 10 | use std::mem; |
11 | 11 | ||
12 | use crate::{ | 12 | use crate::{ |
13 | SmolStr, | ||
14 | SyntaxKind::{self, *}, | 13 | SyntaxKind::{self, *}, |
15 | TextRange, TextUnit, | 14 | parsing::{ParseError, TreeSink}, |
16 | parsing::{ | ||
17 | ParseError, TreeSink, | ||
18 | lexer::Token, | ||
19 | }, | ||
20 | }; | 15 | }; |
21 | 16 | ||
22 | /// `Parser` produces a flat list of `Event`s. | 17 | /// `Parser` produces a flat list of `Event`s. |
@@ -88,160 +83,46 @@ impl Event { | |||
88 | } | 83 | } |
89 | } | 84 | } |
90 | 85 | ||
91 | pub(super) struct EventProcessor<'a, S: TreeSink> { | 86 | /// Generate the syntax tree with the control of events. |
92 | sink: S, | 87 | pub(super) fn process(sink: &mut impl TreeSink, mut events: Vec<Event>) { |
93 | text_pos: TextUnit, | 88 | let mut forward_parents = Vec::new(); |
94 | text: &'a str, | 89 | |
95 | token_pos: usize, | 90 | for i in 0..events.len() { |
96 | tokens: &'a [Token], | 91 | match mem::replace(&mut events[i], Event::tombstone()) { |
97 | events: &'a mut [Event], | 92 | Event::Start { kind: TOMBSTONE, .. } => (), |
98 | } | 93 | |
99 | 94 | Event::Start { kind, forward_parent } => { | |
100 | impl<'a, S: TreeSink> EventProcessor<'a, S> { | 95 | // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, |
101 | pub(super) fn new( | 96 | // in the normal control flow, the parent-child relation: `A -> B -> C`, |
102 | sink: S, | 97 | // while with the magic forward_parent, it writes: `C <- B <- A`. |
103 | text: &'a str, | 98 | |
104 | tokens: &'a [Token], | 99 | // append `A` into parents. |
105 | events: &'a mut [Event], | 100 | forward_parents.push(kind); |
106 | ) -> EventProcessor<'a, S> { | 101 | let mut idx = i; |
107 | EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } | 102 | let mut fp = forward_parent; |
108 | } | 103 | while let Some(fwd) = fp { |
109 | 104 | idx += fwd as usize; | |
110 | /// Generate the syntax tree with the control of events. | 105 | // append `A`'s forward_parent `B` |
111 | pub(crate) fn process(mut self) -> S { | 106 | fp = match mem::replace(&mut events[idx], Event::tombstone()) { |
112 | let mut forward_parents = Vec::new(); | 107 | Event::Start { kind, forward_parent } => { |
113 | 108 | forward_parents.push(kind); | |
114 | for i in 0..self.events.len() { | 109 | forward_parent |
115 | match mem::replace(&mut self.events[i], Event::tombstone()) { | 110 | } |
116 | Event::Start { kind: TOMBSTONE, .. } => (), | 111 | _ => unreachable!(), |
117 | 112 | }; | |
118 | Event::Start { kind, forward_parent } => { | 113 | // append `B`'s forward_parent `C` in the next stage. |
119 | // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, | ||
120 | // in the normal control flow, the parent-child relation: `A -> B -> C`, | ||
121 | // while with the magic forward_parent, it writes: `C <- B <- A`. | ||
122 | |||
123 | // append `A` into parents. | ||
124 | forward_parents.push(kind); | ||
125 | let mut idx = i; | ||
126 | let mut fp = forward_parent; | ||
127 | while let Some(fwd) = fp { | ||
128 | idx += fwd as usize; | ||
129 | // append `A`'s forward_parent `B` | ||
130 | fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { | ||
131 | Event::Start { kind, forward_parent } => { | ||
132 | forward_parents.push(kind); | ||
133 | forward_parent | ||
134 | } | ||
135 | _ => unreachable!(), | ||
136 | }; | ||
137 | // append `B`'s forward_parent `C` in the next stage. | ||
138 | } | ||
139 | |||
140 | for kind in forward_parents.drain(..).rev() { | ||
141 | self.start(kind); | ||
142 | } | ||
143 | } | ||
144 | Event::Finish => { | ||
145 | let is_last = i == self.events.len() - 1; | ||
146 | self.finish(is_last); | ||
147 | } | ||
148 | Event::Token { kind, n_raw_tokens } => { | ||
149 | self.eat_trivias(); | ||
150 | let n_raw_tokens = n_raw_tokens as usize; | ||
151 | let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] | ||
152 | .iter() | ||
153 | .map(|it| it.len) | ||
154 | .sum::<TextUnit>(); | ||
155 | self.leaf(kind, len, n_raw_tokens); | ||
156 | } | 114 | } |
157 | Event::Error { msg } => self.sink.error(msg), | ||
158 | } | ||
159 | } | ||
160 | self.sink | ||
161 | } | ||
162 | |||
163 | /// Add the node into syntax tree but discard the comments/whitespaces. | ||
164 | fn start(&mut self, kind: SyntaxKind) { | ||
165 | if kind == SOURCE_FILE { | ||
166 | self.sink.start_branch(kind); | ||
167 | return; | ||
168 | } | ||
169 | let n_trivias = | ||
170 | self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); | ||
171 | let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; | ||
172 | let mut trivia_end = | ||
173 | self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>(); | ||
174 | 115 | ||
175 | let n_attached_trivias = { | 116 | for (j, kind) in forward_parents.drain(..).rev().enumerate() { |
176 | let leading_trivias = leading_trivias.iter().rev().map(|it| { | 117 | let is_root_node = i == 0 && j == 0; |
177 | let next_end = trivia_end - it.len; | 118 | sink.start_branch(kind, is_root_node); |
178 | let range = TextRange::from_to(next_end, trivia_end); | ||
179 | trivia_end = next_end; | ||
180 | (it.kind, &self.text[range]) | ||
181 | }); | ||
182 | n_attached_trivias(kind, leading_trivias) | ||
183 | }; | ||
184 | self.eat_n_trivias(n_trivias - n_attached_trivias); | ||
185 | self.sink.start_branch(kind); | ||
186 | self.eat_n_trivias(n_attached_trivias); | ||
187 | } | ||
188 | |||
189 | fn finish(&mut self, is_last: bool) { | ||
190 | if is_last { | ||
191 | self.eat_trivias() | ||
192 | } | ||
193 | self.sink.finish_branch(); | ||
194 | } | ||
195 | |||
196 | fn eat_trivias(&mut self) { | ||
197 | while let Some(&token) = self.tokens.get(self.token_pos) { | ||
198 | if !token.kind.is_trivia() { | ||
199 | break; | ||
200 | } | ||
201 | self.leaf(token.kind, token.len, 1); | ||
202 | } | ||
203 | } | ||
204 | |||
205 | fn eat_n_trivias(&mut self, n: usize) { | ||
206 | for _ in 0..n { | ||
207 | let token = self.tokens[self.token_pos]; | ||
208 | assert!(token.kind.is_trivia()); | ||
209 | self.leaf(token.kind, token.len, 1); | ||
210 | } | ||
211 | } | ||
212 | |||
213 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { | ||
214 | let range = TextRange::offset_len(self.text_pos, len); | ||
215 | let text: SmolStr = self.text[range].into(); | ||
216 | self.text_pos += len; | ||
217 | self.token_pos += n_tokens; | ||
218 | self.sink.leaf(kind, text); | ||
219 | } | ||
220 | } | ||
221 | |||
222 | fn n_attached_trivias<'a>( | ||
223 | kind: SyntaxKind, | ||
224 | trivias: impl Iterator<Item = (SyntaxKind, &'a str)>, | ||
225 | ) -> usize { | ||
226 | match kind { | ||
227 | CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF | ||
228 | | MODULE | NAMED_FIELD_DEF => { | ||
229 | let mut res = 0; | ||
230 | for (i, (kind, text)) in trivias.enumerate() { | ||
231 | match kind { | ||
232 | WHITESPACE => { | ||
233 | if text.contains("\n\n") { | ||
234 | break; | ||
235 | } | ||
236 | } | ||
237 | COMMENT => { | ||
238 | res = i + 1; | ||
239 | } | ||
240 | _ => (), | ||
241 | } | 119 | } |
242 | } | 120 | } |
243 | res | 121 | Event::Finish => sink.finish_branch(i == events.len() - 1), |
122 | Event::Token { kind, n_raw_tokens } => { | ||
123 | sink.leaf(kind, n_raw_tokens); | ||
124 | } | ||
125 | Event::Error { msg } => sink.error(msg), | ||
244 | } | 126 | } |
245 | _ => 0, | ||
246 | } | 127 | } |
247 | } | 128 | } |
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index f2d218ab9..f4c2251d7 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs | |||
@@ -5,7 +5,7 @@ use crate::{ | |||
5 | syntax_error::SyntaxError, | 5 | syntax_error::SyntaxError, |
6 | parsing::{ | 6 | parsing::{ |
7 | grammar, parse_with, | 7 | grammar, parse_with, |
8 | builder::GreenBuilder, | 8 | builder::TreeBuilder, |
9 | parser::Parser, | 9 | parser::Parser, |
10 | lexer::{tokenize, Token}, | 10 | lexer::{tokenize, Token}, |
11 | } | 11 | } |
@@ -61,7 +61,8 @@ fn reparse_block<'node>( | |||
61 | if !is_balanced(&tokens) { | 61 | if !is_balanced(&tokens) { |
62 | return None; | 62 | return None; |
63 | } | 63 | } |
64 | let (green, new_errors) = parse_with(GreenBuilder::default(), &text, &tokens, reparser); | 64 | let tree_sink = TreeBuilder::new(&text, &tokens); |
65 | let (green, new_errors) = parse_with(tree_sink, &text, &tokens, reparser); | ||
65 | Some((node, green, new_errors)) | 66 | Some((node, green, new_errors)) |
66 | } | 67 | } |
67 | 68 | ||