diff options
author | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-02-20 13:50:29 +0000 |
---|---|---|
committer | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-02-20 13:50:29 +0000 |
commit | 96899f8278b787280bd07d9ac9dce29a610ce40d (patch) | |
tree | d02a22f02908fd3c89e50845a06a89b997220fc2 /crates/ra_syntax/src/parsing/parser_impl | |
parent | 5b617e3bf8252887a3eb1ce76d4b62cbee74e551 (diff) | |
parent | 86a67dce25f11ba9803a5727f77c02fd1f49e2c0 (diff) |
Merge #861
861: Move parsing to a separate module r=matklad a=matklad
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax/src/parsing/parser_impl')
-rw-r--r-- | crates/ra_syntax/src/parsing/parser_impl/event.rs | 254 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/parser_impl/input.rs | 104 |
2 files changed, 358 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs new file mode 100644 index 000000000..2ddbdd34d --- /dev/null +++ b/crates/ra_syntax/src/parsing/parser_impl/event.rs | |||
@@ -0,0 +1,254 @@ | |||
1 | //! This module provides a way to construct a `File`. | ||
2 | //! It is intended to be completely decoupled from the | ||
3 | //! parser, so as to allow to evolve the tree representation | ||
4 | //! and the parser algorithm independently. | ||
5 | //! | ||
6 | //! The `Sink` trait is the bridge between the parser and the | ||
7 | //! tree builder: the parser produces a stream of events like | ||
8 | //! `start node`, `finish node`, and `FileBuilder` converts | ||
9 | //! this stream to a real tree. | ||
10 | use std::mem; | ||
11 | |||
12 | use crate::{ | ||
13 | SmolStr, | ||
14 | SyntaxKind::{self, *}, | ||
15 | TextRange, TextUnit, | ||
16 | syntax_error::{ | ||
17 | ParseError, | ||
18 | SyntaxError, | ||
19 | SyntaxErrorKind, | ||
20 | }, | ||
21 | parsing::{ | ||
22 | lexer::Token, | ||
23 | parser_impl::Sink, | ||
24 | }, | ||
25 | }; | ||
26 | |||
27 | /// `Parser` produces a flat list of `Event`s. | ||
28 | /// They are converted to a tree-structure in | ||
29 | /// a separate pass, via `TreeBuilder`. | ||
30 | #[derive(Debug)] | ||
31 | pub(crate) enum Event { | ||
32 | /// This event signifies the start of the node. | ||
33 | /// It should be either abandoned (in which case the | ||
34 | /// `kind` is `TOMBSTONE`, and the event is ignored), | ||
35 | /// or completed via a `Finish` event. | ||
36 | /// | ||
37 | /// All tokens between a `Start` and a `Finish` would | ||
38 | /// become the children of the respective node. | ||
39 | /// | ||
40 | /// For left-recursive syntactic constructs, the parser produces | ||
41 | /// a child node before it sees a parent. `forward_parent` | ||
42 | /// saves the position of current event's parent. | ||
43 | /// | ||
44 | /// Consider this path | ||
45 | /// | ||
46 | /// foo::bar | ||
47 | /// | ||
48 | /// The events for it would look like this: | ||
49 | /// | ||
50 | /// | ||
51 | /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH | ||
52 | /// | /\ | ||
53 | /// | | | ||
54 | /// +------forward-parent------+ | ||
55 | /// | ||
56 | /// And the tree would look like this | ||
57 | /// | ||
58 | /// +--PATH---------+ | ||
59 | /// | | | | ||
60 | /// | | | | ||
61 | /// | '::' 'bar' | ||
62 | /// | | ||
63 | /// PATH | ||
64 | /// | | ||
65 | /// 'foo' | ||
66 | /// | ||
67 | /// See also `CompletedMarker::precede`. | ||
68 | Start { | ||
69 | kind: SyntaxKind, | ||
70 | forward_parent: Option<u32>, | ||
71 | }, | ||
72 | |||
73 | /// Complete the previous `Start` event | ||
74 | Finish, | ||
75 | |||
76 | /// Produce a single leaf-element. | ||
77 | /// `n_raw_tokens` is used to glue complex contextual tokens. | ||
78 | /// For example, lexer tokenizes `>>` as `>`, `>`, and | ||
79 | /// `n_raw_tokens = 2` is used to produced a single `>>`. | ||
80 | Token { | ||
81 | kind: SyntaxKind, | ||
82 | n_raw_tokens: u8, | ||
83 | }, | ||
84 | |||
85 | Error { | ||
86 | msg: ParseError, | ||
87 | }, | ||
88 | } | ||
89 | |||
90 | impl Event { | ||
91 | pub(crate) fn tombstone() -> Self { | ||
92 | Event::Start { kind: TOMBSTONE, forward_parent: None } | ||
93 | } | ||
94 | } | ||
95 | |||
96 | pub(super) struct EventProcessor<'a, S: Sink> { | ||
97 | sink: S, | ||
98 | text_pos: TextUnit, | ||
99 | text: &'a str, | ||
100 | token_pos: usize, | ||
101 | tokens: &'a [Token], | ||
102 | events: &'a mut [Event], | ||
103 | } | ||
104 | |||
105 | impl<'a, S: Sink> EventProcessor<'a, S> { | ||
106 | pub(super) fn new( | ||
107 | sink: S, | ||
108 | text: &'a str, | ||
109 | tokens: &'a [Token], | ||
110 | events: &'a mut [Event], | ||
111 | ) -> EventProcessor<'a, S> { | ||
112 | EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } | ||
113 | } | ||
114 | |||
115 | /// Generate the syntax tree with the control of events. | ||
116 | pub(super) fn process(mut self) -> S { | ||
117 | let mut forward_parents = Vec::new(); | ||
118 | |||
119 | for i in 0..self.events.len() { | ||
120 | match mem::replace(&mut self.events[i], Event::tombstone()) { | ||
121 | Event::Start { kind: TOMBSTONE, .. } => (), | ||
122 | |||
123 | Event::Start { kind, forward_parent } => { | ||
124 | // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, | ||
125 | // in the normal control flow, the parent-child relation: `A -> B -> C`, | ||
126 | // while with the magic forward_parent, it writes: `C <- B <- A`. | ||
127 | |||
128 | // append `A` into parents. | ||
129 | forward_parents.push(kind); | ||
130 | let mut idx = i; | ||
131 | let mut fp = forward_parent; | ||
132 | while let Some(fwd) = fp { | ||
133 | idx += fwd as usize; | ||
134 | // append `A`'s forward_parent `B` | ||
135 | fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { | ||
136 | Event::Start { kind, forward_parent } => { | ||
137 | forward_parents.push(kind); | ||
138 | forward_parent | ||
139 | } | ||
140 | _ => unreachable!(), | ||
141 | }; | ||
142 | // append `B`'s forward_parent `C` in the next stage. | ||
143 | } | ||
144 | |||
145 | for kind in forward_parents.drain(..).rev() { | ||
146 | self.start(kind); | ||
147 | } | ||
148 | } | ||
149 | Event::Finish => { | ||
150 | let is_last = i == self.events.len() - 1; | ||
151 | self.finish(is_last); | ||
152 | } | ||
153 | Event::Token { kind, n_raw_tokens } => { | ||
154 | self.eat_trivias(); | ||
155 | let n_raw_tokens = n_raw_tokens as usize; | ||
156 | let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] | ||
157 | .iter() | ||
158 | .map(|it| it.len) | ||
159 | .sum::<TextUnit>(); | ||
160 | self.leaf(kind, len, n_raw_tokens); | ||
161 | } | ||
162 | Event::Error { msg } => self | ||
163 | .sink | ||
164 | .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), | ||
165 | } | ||
166 | } | ||
167 | self.sink | ||
168 | } | ||
169 | |||
170 | /// Add the node into syntax tree but discard the comments/whitespaces. | ||
171 | fn start(&mut self, kind: SyntaxKind) { | ||
172 | if kind == SOURCE_FILE { | ||
173 | self.sink.start_branch(kind); | ||
174 | return; | ||
175 | } | ||
176 | let n_trivias = | ||
177 | self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); | ||
178 | let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; | ||
179 | let mut trivia_end = | ||
180 | self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>(); | ||
181 | |||
182 | let n_attached_trivias = { | ||
183 | let leading_trivias = leading_trivias.iter().rev().map(|it| { | ||
184 | let next_end = trivia_end - it.len; | ||
185 | let range = TextRange::from_to(next_end, trivia_end); | ||
186 | trivia_end = next_end; | ||
187 | (it.kind, &self.text[range]) | ||
188 | }); | ||
189 | n_attached_trivias(kind, leading_trivias) | ||
190 | }; | ||
191 | self.eat_n_trivias(n_trivias - n_attached_trivias); | ||
192 | self.sink.start_branch(kind); | ||
193 | self.eat_n_trivias(n_attached_trivias); | ||
194 | } | ||
195 | |||
196 | fn finish(&mut self, is_last: bool) { | ||
197 | if is_last { | ||
198 | self.eat_trivias() | ||
199 | } | ||
200 | self.sink.finish_branch(); | ||
201 | } | ||
202 | |||
203 | fn eat_trivias(&mut self) { | ||
204 | while let Some(&token) = self.tokens.get(self.token_pos) { | ||
205 | if !token.kind.is_trivia() { | ||
206 | break; | ||
207 | } | ||
208 | self.leaf(token.kind, token.len, 1); | ||
209 | } | ||
210 | } | ||
211 | |||
212 | fn eat_n_trivias(&mut self, n: usize) { | ||
213 | for _ in 0..n { | ||
214 | let token = self.tokens[self.token_pos]; | ||
215 | assert!(token.kind.is_trivia()); | ||
216 | self.leaf(token.kind, token.len, 1); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { | ||
221 | let range = TextRange::offset_len(self.text_pos, len); | ||
222 | let text: SmolStr = self.text[range].into(); | ||
223 | self.text_pos += len; | ||
224 | self.token_pos += n_tokens; | ||
225 | self.sink.leaf(kind, text); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | fn n_attached_trivias<'a>( | ||
230 | kind: SyntaxKind, | ||
231 | trivias: impl Iterator<Item = (SyntaxKind, &'a str)>, | ||
232 | ) -> usize { | ||
233 | match kind { | ||
234 | CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF | ||
235 | | MODULE | NAMED_FIELD_DEF => { | ||
236 | let mut res = 0; | ||
237 | for (i, (kind, text)) in trivias.enumerate() { | ||
238 | match kind { | ||
239 | WHITESPACE => { | ||
240 | if text.contains("\n\n") { | ||
241 | break; | ||
242 | } | ||
243 | } | ||
244 | COMMENT => { | ||
245 | res = i + 1; | ||
246 | } | ||
247 | _ => (), | ||
248 | } | ||
249 | } | ||
250 | res | ||
251 | } | ||
252 | _ => 0, | ||
253 | } | ||
254 | } | ||
diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs new file mode 100644 index 000000000..275d94918 --- /dev/null +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs | |||
@@ -0,0 +1,104 @@ | |||
1 | use crate::{ | ||
2 | SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, | ||
3 | parsing::lexer::Token, | ||
4 | }; | ||
5 | |||
6 | use std::ops::{Add, AddAssign}; | ||
7 | |||
8 | pub(crate) struct ParserInput<'t> { | ||
9 | text: &'t str, | ||
10 | /// start position of each token(expect whitespace and comment) | ||
11 | /// ```non-rust | ||
12 | /// struct Foo; | ||
13 | /// ^------^--- | ||
14 | /// | | ^- | ||
15 | /// 0 7 10 | ||
16 | /// ``` | ||
17 | /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` | ||
18 | start_offsets: Vec<TextUnit>, | ||
19 | /// non-whitespace/comment tokens | ||
20 | /// ```non-rust | ||
21 | /// struct Foo {} | ||
22 | /// ^^^^^^ ^^^ ^^ | ||
23 | /// ``` | ||
24 | /// tokens: `[struct, Foo, {, }]` | ||
25 | tokens: Vec<Token>, | ||
26 | } | ||
27 | |||
28 | impl<'t> ParserInput<'t> { | ||
29 | /// Generate input from tokens(expect comment and whitespace). | ||
30 | pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { | ||
31 | let mut tokens = Vec::new(); | ||
32 | let mut start_offsets = Vec::new(); | ||
33 | let mut len = 0.into(); | ||
34 | for &token in raw_tokens.iter() { | ||
35 | if !token.kind.is_trivia() { | ||
36 | tokens.push(token); | ||
37 | start_offsets.push(len); | ||
38 | } | ||
39 | len += token.len; | ||
40 | } | ||
41 | |||
42 | ParserInput { text, start_offsets, tokens } | ||
43 | } | ||
44 | |||
45 | /// Get the syntax kind of token at given input position. | ||
46 | pub fn kind(&self, pos: InputPosition) -> SyntaxKind { | ||
47 | let idx = pos.0 as usize; | ||
48 | if !(idx < self.tokens.len()) { | ||
49 | return EOF; | ||
50 | } | ||
51 | self.tokens[idx].kind | ||
52 | } | ||
53 | |||
54 | /// Get the length of a token at given input position. | ||
55 | pub fn token_len(&self, pos: InputPosition) -> TextUnit { | ||
56 | let idx = pos.0 as usize; | ||
57 | if !(idx < self.tokens.len()) { | ||
58 | return 0.into(); | ||
59 | } | ||
60 | self.tokens[idx].len | ||
61 | } | ||
62 | |||
63 | /// Get the start position of a taken at given input position. | ||
64 | pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { | ||
65 | let idx = pos.0 as usize; | ||
66 | if !(idx < self.tokens.len()) { | ||
67 | return 0.into(); | ||
68 | } | ||
69 | self.start_offsets[idx] | ||
70 | } | ||
71 | |||
72 | /// Get the raw text of a token at given input position. | ||
73 | pub fn token_text(&self, pos: InputPosition) -> &'t str { | ||
74 | let idx = pos.0 as usize; | ||
75 | if !(idx < self.tokens.len()) { | ||
76 | return ""; | ||
77 | } | ||
78 | let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); | ||
79 | &self.text[range] | ||
80 | } | ||
81 | } | ||
82 | |||
83 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] | ||
84 | pub(crate) struct InputPosition(u32); | ||
85 | |||
86 | impl InputPosition { | ||
87 | pub fn new() -> Self { | ||
88 | InputPosition(0) | ||
89 | } | ||
90 | } | ||
91 | |||
92 | impl Add<u32> for InputPosition { | ||
93 | type Output = InputPosition; | ||
94 | |||
95 | fn add(self, rhs: u32) -> InputPosition { | ||
96 | InputPosition(self.0 + rhs) | ||
97 | } | ||
98 | } | ||
99 | |||
100 | impl AddAssign<u32> for InputPosition { | ||
101 | fn add_assign(&mut self, rhs: u32) { | ||
102 | self.0 += rhs | ||
103 | } | ||
104 | } | ||