diff options
author | Aleksey Kladov <[email protected]> | 2019-02-20 12:47:32 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2019-02-20 12:47:32 +0000 |
commit | 5222b8aba3b1c2c68706aacf6869423a8e4fe6d5 (patch) | |
tree | c8a6e999b8ac5f1f29bde86a2e0b3a53466bb369 /crates/ra_syntax/src/parser_impl | |
parent | 9d0cda4bc84350961f3884e75a1c20e62c449ede (diff) |
move all parsing related bits to a separate module
Diffstat (limited to 'crates/ra_syntax/src/parser_impl')
-rw-r--r-- | crates/ra_syntax/src/parser_impl/event.rs | 251 | ||||
-rw-r--r-- | crates/ra_syntax/src/parser_impl/input.rs | 101 |
2 files changed, 0 insertions, 352 deletions
diff --git a/crates/ra_syntax/src/parser_impl/event.rs b/crates/ra_syntax/src/parser_impl/event.rs deleted file mode 100644 index b45830c61..000000000 --- a/crates/ra_syntax/src/parser_impl/event.rs +++ /dev/null | |||
@@ -1,251 +0,0 @@ | |||
1 | //! This module provides a way to construct a `File`. | ||
2 | //! It is intended to be completely decoupled from the | ||
3 | //! parser, so as to allow to evolve the tree representation | ||
4 | //! and the parser algorithm independently. | ||
5 | //! | ||
6 | //! The `Sink` trait is the bridge between the parser and the | ||
7 | //! tree builder: the parser produces a stream of events like | ||
8 | //! `start node`, `finish node`, and `FileBuilder` converts | ||
9 | //! this stream to a real tree. | ||
10 | use crate::{ | ||
11 | lexer::Token, | ||
12 | parser_impl::Sink, | ||
13 | SmolStr, | ||
14 | SyntaxKind::{self, *}, | ||
15 | TextRange, TextUnit, | ||
16 | syntax_node::syntax_error::{ | ||
17 | ParseError, | ||
18 | SyntaxError, | ||
19 | SyntaxErrorKind, | ||
20 | }, | ||
21 | }; | ||
22 | use std::mem; | ||
23 | |||
24 | /// `Parser` produces a flat list of `Event`s. | ||
25 | /// They are converted to a tree-structure in | ||
26 | /// a separate pass, via `TreeBuilder`. | ||
27 | #[derive(Debug)] | ||
28 | pub(crate) enum Event { | ||
29 | /// This event signifies the start of the node. | ||
30 | /// It should be either abandoned (in which case the | ||
31 | /// `kind` is `TOMBSTONE`, and the event is ignored), | ||
32 | /// or completed via a `Finish` event. | ||
33 | /// | ||
34 | /// All tokens between a `Start` and a `Finish` would | ||
35 | /// become the children of the respective node. | ||
36 | /// | ||
37 | /// For left-recursive syntactic constructs, the parser produces | ||
38 | /// a child node before it sees a parent. `forward_parent` | ||
39 | /// saves the position of current event's parent. | ||
40 | /// | ||
41 | /// Consider this path | ||
42 | /// | ||
43 | /// foo::bar | ||
44 | /// | ||
45 | /// The events for it would look like this: | ||
46 | /// | ||
47 | /// | ||
48 | /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH | ||
49 | /// | /\ | ||
50 | /// | | | ||
51 | /// +------forward-parent------+ | ||
52 | /// | ||
53 | /// And the tree would look like this | ||
54 | /// | ||
55 | /// +--PATH---------+ | ||
56 | /// | | | | ||
57 | /// | | | | ||
58 | /// | '::' 'bar' | ||
59 | /// | | ||
60 | /// PATH | ||
61 | /// | | ||
62 | /// 'foo' | ||
63 | /// | ||
64 | /// See also `CompletedMarker::precede`. | ||
65 | Start { | ||
66 | kind: SyntaxKind, | ||
67 | forward_parent: Option<u32>, | ||
68 | }, | ||
69 | |||
70 | /// Complete the previous `Start` event | ||
71 | Finish, | ||
72 | |||
73 | /// Produce a single leaf-element. | ||
74 | /// `n_raw_tokens` is used to glue complex contextual tokens. | ||
75 | /// For example, lexer tokenizes `>>` as `>`, `>`, and | ||
76 | /// `n_raw_tokens = 2` is used to produced a single `>>`. | ||
77 | Token { | ||
78 | kind: SyntaxKind, | ||
79 | n_raw_tokens: u8, | ||
80 | }, | ||
81 | |||
82 | Error { | ||
83 | msg: ParseError, | ||
84 | }, | ||
85 | } | ||
86 | |||
87 | impl Event { | ||
88 | pub(crate) fn tombstone() -> Self { | ||
89 | Event::Start { kind: TOMBSTONE, forward_parent: None } | ||
90 | } | ||
91 | } | ||
92 | |||
93 | pub(super) struct EventProcessor<'a, S: Sink> { | ||
94 | sink: S, | ||
95 | text_pos: TextUnit, | ||
96 | text: &'a str, | ||
97 | token_pos: usize, | ||
98 | tokens: &'a [Token], | ||
99 | events: &'a mut [Event], | ||
100 | } | ||
101 | |||
102 | impl<'a, S: Sink> EventProcessor<'a, S> { | ||
103 | pub(super) fn new( | ||
104 | sink: S, | ||
105 | text: &'a str, | ||
106 | tokens: &'a [Token], | ||
107 | events: &'a mut [Event], | ||
108 | ) -> EventProcessor<'a, S> { | ||
109 | EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } | ||
110 | } | ||
111 | |||
112 | /// Generate the syntax tree with the control of events. | ||
113 | pub(super) fn process(mut self) -> S { | ||
114 | let mut forward_parents = Vec::new(); | ||
115 | |||
116 | for i in 0..self.events.len() { | ||
117 | match mem::replace(&mut self.events[i], Event::tombstone()) { | ||
118 | Event::Start { kind: TOMBSTONE, .. } => (), | ||
119 | |||
120 | Event::Start { kind, forward_parent } => { | ||
121 | // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, | ||
122 | // in the normal control flow, the parent-child relation: `A -> B -> C`, | ||
123 | // while with the magic forward_parent, it writes: `C <- B <- A`. | ||
124 | |||
125 | // append `A` into parents. | ||
126 | forward_parents.push(kind); | ||
127 | let mut idx = i; | ||
128 | let mut fp = forward_parent; | ||
129 | while let Some(fwd) = fp { | ||
130 | idx += fwd as usize; | ||
131 | // append `A`'s forward_parent `B` | ||
132 | fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { | ||
133 | Event::Start { kind, forward_parent } => { | ||
134 | forward_parents.push(kind); | ||
135 | forward_parent | ||
136 | } | ||
137 | _ => unreachable!(), | ||
138 | }; | ||
139 | // append `B`'s forward_parent `C` in the next stage. | ||
140 | } | ||
141 | |||
142 | for kind in forward_parents.drain(..).rev() { | ||
143 | self.start(kind); | ||
144 | } | ||
145 | } | ||
146 | Event::Finish => { | ||
147 | let is_last = i == self.events.len() - 1; | ||
148 | self.finish(is_last); | ||
149 | } | ||
150 | Event::Token { kind, n_raw_tokens } => { | ||
151 | self.eat_trivias(); | ||
152 | let n_raw_tokens = n_raw_tokens as usize; | ||
153 | let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] | ||
154 | .iter() | ||
155 | .map(|it| it.len) | ||
156 | .sum::<TextUnit>(); | ||
157 | self.leaf(kind, len, n_raw_tokens); | ||
158 | } | ||
159 | Event::Error { msg } => self | ||
160 | .sink | ||
161 | .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), | ||
162 | } | ||
163 | } | ||
164 | self.sink | ||
165 | } | ||
166 | |||
167 | /// Add the node into syntax tree but discard the comments/whitespaces. | ||
168 | fn start(&mut self, kind: SyntaxKind) { | ||
169 | if kind == SOURCE_FILE { | ||
170 | self.sink.start_branch(kind); | ||
171 | return; | ||
172 | } | ||
173 | let n_trivias = | ||
174 | self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); | ||
175 | let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; | ||
176 | let mut trivia_end = | ||
177 | self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>(); | ||
178 | |||
179 | let n_attached_trivias = { | ||
180 | let leading_trivias = leading_trivias.iter().rev().map(|it| { | ||
181 | let next_end = trivia_end - it.len; | ||
182 | let range = TextRange::from_to(next_end, trivia_end); | ||
183 | trivia_end = next_end; | ||
184 | (it.kind, &self.text[range]) | ||
185 | }); | ||
186 | n_attached_trivias(kind, leading_trivias) | ||
187 | }; | ||
188 | self.eat_n_trivias(n_trivias - n_attached_trivias); | ||
189 | self.sink.start_branch(kind); | ||
190 | self.eat_n_trivias(n_attached_trivias); | ||
191 | } | ||
192 | |||
193 | fn finish(&mut self, is_last: bool) { | ||
194 | if is_last { | ||
195 | self.eat_trivias() | ||
196 | } | ||
197 | self.sink.finish_branch(); | ||
198 | } | ||
199 | |||
200 | fn eat_trivias(&mut self) { | ||
201 | while let Some(&token) = self.tokens.get(self.token_pos) { | ||
202 | if !token.kind.is_trivia() { | ||
203 | break; | ||
204 | } | ||
205 | self.leaf(token.kind, token.len, 1); | ||
206 | } | ||
207 | } | ||
208 | |||
209 | fn eat_n_trivias(&mut self, n: usize) { | ||
210 | for _ in 0..n { | ||
211 | let token = self.tokens[self.token_pos]; | ||
212 | assert!(token.kind.is_trivia()); | ||
213 | self.leaf(token.kind, token.len, 1); | ||
214 | } | ||
215 | } | ||
216 | |||
217 | fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { | ||
218 | let range = TextRange::offset_len(self.text_pos, len); | ||
219 | let text: SmolStr = self.text[range].into(); | ||
220 | self.text_pos += len; | ||
221 | self.token_pos += n_tokens; | ||
222 | self.sink.leaf(kind, text); | ||
223 | } | ||
224 | } | ||
225 | |||
226 | fn n_attached_trivias<'a>( | ||
227 | kind: SyntaxKind, | ||
228 | trivias: impl Iterator<Item = (SyntaxKind, &'a str)>, | ||
229 | ) -> usize { | ||
230 | match kind { | ||
231 | CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF | ||
232 | | MODULE | NAMED_FIELD_DEF => { | ||
233 | let mut res = 0; | ||
234 | for (i, (kind, text)) in trivias.enumerate() { | ||
235 | match kind { | ||
236 | WHITESPACE => { | ||
237 | if text.contains("\n\n") { | ||
238 | break; | ||
239 | } | ||
240 | } | ||
241 | COMMENT => { | ||
242 | res = i + 1; | ||
243 | } | ||
244 | _ => (), | ||
245 | } | ||
246 | } | ||
247 | res | ||
248 | } | ||
249 | _ => 0, | ||
250 | } | ||
251 | } | ||
diff --git a/crates/ra_syntax/src/parser_impl/input.rs b/crates/ra_syntax/src/parser_impl/input.rs deleted file mode 100644 index 616a26fdc..000000000 --- a/crates/ra_syntax/src/parser_impl/input.rs +++ /dev/null | |||
@@ -1,101 +0,0 @@ | |||
1 | use crate::{lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit}; | ||
2 | |||
3 | use std::ops::{Add, AddAssign}; | ||
4 | |||
5 | pub(crate) struct ParserInput<'t> { | ||
6 | text: &'t str, | ||
7 | /// start position of each token(expect whitespace and comment) | ||
8 | /// ```non-rust | ||
9 | /// struct Foo; | ||
10 | /// ^------^--- | ||
11 | /// | | ^- | ||
12 | /// 0 7 10 | ||
13 | /// ``` | ||
14 | /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` | ||
15 | start_offsets: Vec<TextUnit>, | ||
16 | /// non-whitespace/comment tokens | ||
17 | /// ```non-rust | ||
18 | /// struct Foo {} | ||
19 | /// ^^^^^^ ^^^ ^^ | ||
20 | /// ``` | ||
21 | /// tokens: `[struct, Foo, {, }]` | ||
22 | tokens: Vec<Token>, | ||
23 | } | ||
24 | |||
25 | impl<'t> ParserInput<'t> { | ||
26 | /// Generate input from tokens(expect comment and whitespace). | ||
27 | pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { | ||
28 | let mut tokens = Vec::new(); | ||
29 | let mut start_offsets = Vec::new(); | ||
30 | let mut len = 0.into(); | ||
31 | for &token in raw_tokens.iter() { | ||
32 | if !token.kind.is_trivia() { | ||
33 | tokens.push(token); | ||
34 | start_offsets.push(len); | ||
35 | } | ||
36 | len += token.len; | ||
37 | } | ||
38 | |||
39 | ParserInput { text, start_offsets, tokens } | ||
40 | } | ||
41 | |||
42 | /// Get the syntax kind of token at given input position. | ||
43 | pub fn kind(&self, pos: InputPosition) -> SyntaxKind { | ||
44 | let idx = pos.0 as usize; | ||
45 | if !(idx < self.tokens.len()) { | ||
46 | return EOF; | ||
47 | } | ||
48 | self.tokens[idx].kind | ||
49 | } | ||
50 | |||
51 | /// Get the length of a token at given input position. | ||
52 | pub fn token_len(&self, pos: InputPosition) -> TextUnit { | ||
53 | let idx = pos.0 as usize; | ||
54 | if !(idx < self.tokens.len()) { | ||
55 | return 0.into(); | ||
56 | } | ||
57 | self.tokens[idx].len | ||
58 | } | ||
59 | |||
60 | /// Get the start position of a taken at given input position. | ||
61 | pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { | ||
62 | let idx = pos.0 as usize; | ||
63 | if !(idx < self.tokens.len()) { | ||
64 | return 0.into(); | ||
65 | } | ||
66 | self.start_offsets[idx] | ||
67 | } | ||
68 | |||
69 | /// Get the raw text of a token at given input position. | ||
70 | pub fn token_text(&self, pos: InputPosition) -> &'t str { | ||
71 | let idx = pos.0 as usize; | ||
72 | if !(idx < self.tokens.len()) { | ||
73 | return ""; | ||
74 | } | ||
75 | let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); | ||
76 | &self.text[range] | ||
77 | } | ||
78 | } | ||
79 | |||
80 | #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] | ||
81 | pub(crate) struct InputPosition(u32); | ||
82 | |||
83 | impl InputPosition { | ||
84 | pub fn new() -> Self { | ||
85 | InputPosition(0) | ||
86 | } | ||
87 | } | ||
88 | |||
89 | impl Add<u32> for InputPosition { | ||
90 | type Output = InputPosition; | ||
91 | |||
92 | fn add(self, rhs: u32) -> InputPosition { | ||
93 | InputPosition(self.0 + rhs) | ||
94 | } | ||
95 | } | ||
96 | |||
97 | impl AddAssign<u32> for InputPosition { | ||
98 | fn add_assign(&mut self, rhs: u32) { | ||
99 | self.0 += rhs | ||
100 | } | ||
101 | } | ||