aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/parsing/parser_impl
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_syntax/src/parsing/parser_impl')
-rw-r--r--crates/ra_syntax/src/parsing/parser_impl/event.rs254
-rw-r--r--crates/ra_syntax/src/parsing/parser_impl/input.rs73
2 files changed, 0 insertions, 327 deletions
diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs
deleted file mode 100644
index 9663fba35..000000000
--- a/crates/ra_syntax/src/parsing/parser_impl/event.rs
+++ /dev/null
@@ -1,254 +0,0 @@
1//! This module provides a way to construct a `File`.
2//! It is intended to be completely decoupled from the
3//! parser, so as to allow to evolve the tree representation
4//! and the parser algorithm independently.
5//!
6//! The `TreeSink` trait is the bridge between the parser and the
7//! tree builder: the parser produces a stream of events like
8//! `start node`, `finish node`, and `FileBuilder` converts
9//! this stream to a real tree.
10use std::mem;
11
12use crate::{
13 SmolStr,
14 SyntaxKind::{self, *},
15 TextRange, TextUnit,
16 syntax_error::{
17 ParseError,
18 SyntaxError,
19 SyntaxErrorKind,
20 },
21 parsing::{
22 lexer::Token,
23 parser_impl::TreeSink,
24 },
25};
26
27/// `Parser` produces a flat list of `Event`s.
28/// They are converted to a tree-structure in
29/// a separate pass, via `TreeBuilder`.
30#[derive(Debug)]
31pub(crate) enum Event {
32 /// This event signifies the start of the node.
33 /// It should be either abandoned (in which case the
34 /// `kind` is `TOMBSTONE`, and the event is ignored),
35 /// or completed via a `Finish` event.
36 ///
37 /// All tokens between a `Start` and a `Finish` would
38 /// become the children of the respective node.
39 ///
40 /// For left-recursive syntactic constructs, the parser produces
41 /// a child node before it sees a parent. `forward_parent`
42 /// saves the position of current event's parent.
43 ///
44 /// Consider this path
45 ///
46 /// foo::bar
47 ///
48 /// The events for it would look like this:
49 ///
50 ///
51 /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH
52 /// | /\
53 /// | |
54 /// +------forward-parent------+
55 ///
56 /// And the tree would look like this
57 ///
58 /// +--PATH---------+
59 /// | | |
60 /// | | |
61 /// | '::' 'bar'
62 /// |
63 /// PATH
64 /// |
65 /// 'foo'
66 ///
67 /// See also `CompletedMarker::precede`.
68 Start {
69 kind: SyntaxKind,
70 forward_parent: Option<u32>,
71 },
72
73 /// Complete the previous `Start` event
74 Finish,
75
76 /// Produce a single leaf-element.
77 /// `n_raw_tokens` is used to glue complex contextual tokens.
78 /// For example, lexer tokenizes `>>` as `>`, `>`, and
79 /// `n_raw_tokens = 2` is used to produced a single `>>`.
80 Token {
81 kind: SyntaxKind,
82 n_raw_tokens: u8,
83 },
84
85 Error {
86 msg: ParseError,
87 },
88}
89
90impl Event {
91 pub(crate) fn tombstone() -> Self {
92 Event::Start { kind: TOMBSTONE, forward_parent: None }
93 }
94}
95
96pub(super) struct EventProcessor<'a, S: TreeSink> {
97 sink: S,
98 text_pos: TextUnit,
99 text: &'a str,
100 token_pos: usize,
101 tokens: &'a [Token],
102 events: &'a mut [Event],
103}
104
105impl<'a, S: TreeSink> EventProcessor<'a, S> {
106 pub(super) fn new(
107 sink: S,
108 text: &'a str,
109 tokens: &'a [Token],
110 events: &'a mut [Event],
111 ) -> EventProcessor<'a, S> {
112 EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events }
113 }
114
115 /// Generate the syntax tree with the control of events.
116 pub(super) fn process(mut self) -> S {
117 let mut forward_parents = Vec::new();
118
119 for i in 0..self.events.len() {
120 match mem::replace(&mut self.events[i], Event::tombstone()) {
121 Event::Start { kind: TOMBSTONE, .. } => (),
122
123 Event::Start { kind, forward_parent } => {
124 // For events[A, B, C], B is A's forward_parent, C is B's forward_parent,
125 // in the normal control flow, the parent-child relation: `A -> B -> C`,
126 // while with the magic forward_parent, it writes: `C <- B <- A`.
127
128 // append `A` into parents.
129 forward_parents.push(kind);
130 let mut idx = i;
131 let mut fp = forward_parent;
132 while let Some(fwd) = fp {
133 idx += fwd as usize;
134 // append `A`'s forward_parent `B`
135 fp = match mem::replace(&mut self.events[idx], Event::tombstone()) {
136 Event::Start { kind, forward_parent } => {
137 forward_parents.push(kind);
138 forward_parent
139 }
140 _ => unreachable!(),
141 };
142 // append `B`'s forward_parent `C` in the next stage.
143 }
144
145 for kind in forward_parents.drain(..).rev() {
146 self.start(kind);
147 }
148 }
149 Event::Finish => {
150 let is_last = i == self.events.len() - 1;
151 self.finish(is_last);
152 }
153 Event::Token { kind, n_raw_tokens } => {
154 self.eat_trivias();
155 let n_raw_tokens = n_raw_tokens as usize;
156 let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens]
157 .iter()
158 .map(|it| it.len)
159 .sum::<TextUnit>();
160 self.leaf(kind, len, n_raw_tokens);
161 }
162 Event::Error { msg } => self
163 .sink
164 .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)),
165 }
166 }
167 self.sink
168 }
169
170 /// Add the node into syntax tree but discard the comments/whitespaces.
171 fn start(&mut self, kind: SyntaxKind) {
172 if kind == SOURCE_FILE {
173 self.sink.start_branch(kind);
174 return;
175 }
176 let n_trivias =
177 self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
178 let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
179 let mut trivia_end =
180 self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>();
181
182 let n_attached_trivias = {
183 let leading_trivias = leading_trivias.iter().rev().map(|it| {
184 let next_end = trivia_end - it.len;
185 let range = TextRange::from_to(next_end, trivia_end);
186 trivia_end = next_end;
187 (it.kind, &self.text[range])
188 });
189 n_attached_trivias(kind, leading_trivias)
190 };
191 self.eat_n_trivias(n_trivias - n_attached_trivias);
192 self.sink.start_branch(kind);
193 self.eat_n_trivias(n_attached_trivias);
194 }
195
196 fn finish(&mut self, is_last: bool) {
197 if is_last {
198 self.eat_trivias()
199 }
200 self.sink.finish_branch();
201 }
202
203 fn eat_trivias(&mut self) {
204 while let Some(&token) = self.tokens.get(self.token_pos) {
205 if !token.kind.is_trivia() {
206 break;
207 }
208 self.leaf(token.kind, token.len, 1);
209 }
210 }
211
212 fn eat_n_trivias(&mut self, n: usize) {
213 for _ in 0..n {
214 let token = self.tokens[self.token_pos];
215 assert!(token.kind.is_trivia());
216 self.leaf(token.kind, token.len, 1);
217 }
218 }
219
220 fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) {
221 let range = TextRange::offset_len(self.text_pos, len);
222 let text: SmolStr = self.text[range].into();
223 self.text_pos += len;
224 self.token_pos += n_tokens;
225 self.sink.leaf(kind, text);
226 }
227}
228
229fn n_attached_trivias<'a>(
230 kind: SyntaxKind,
231 trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
232) -> usize {
233 match kind {
234 CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF
235 | MODULE | NAMED_FIELD_DEF => {
236 let mut res = 0;
237 for (i, (kind, text)) in trivias.enumerate() {
238 match kind {
239 WHITESPACE => {
240 if text.contains("\n\n") {
241 break;
242 }
243 }
244 COMMENT => {
245 res = i + 1;
246 }
247 _ => (),
248 }
249 }
250 res
251 }
252 _ => 0,
253 }
254}
diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs
deleted file mode 100644
index 11b32b9ce..000000000
--- a/crates/ra_syntax/src/parsing/parser_impl/input.rs
+++ /dev/null
@@ -1,73 +0,0 @@
1use crate::{
2 SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit,
3 parsing::{
4 TokenPos,
5 parser_impl::TokenSource,
6 lexer::Token,
7 },
8};
9
10impl<'t> TokenSource for ParserInput<'t> {
11 fn token_kind(&self, pos: TokenPos) -> SyntaxKind {
12 let idx = pos.0 as usize;
13 if !(idx < self.tokens.len()) {
14 return EOF;
15 }
16 self.tokens[idx].kind
17 }
18 fn is_token_joint_to_next(&self, pos: TokenPos) -> bool {
19 let idx_curr = pos.0 as usize;
20 let idx_next = pos.0 as usize + 1;
21 if !(idx_next < self.tokens.len()) {
22 return true;
23 }
24 self.start_offsets[idx_curr] + self.tokens[idx_curr].len == self.start_offsets[idx_next]
25 }
26 fn is_keyword(&self, pos: TokenPos, kw: &str) -> bool {
27 let idx = pos.0 as usize;
28 if !(idx < self.tokens.len()) {
29 return false;
30 }
31 let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len);
32
33 self.text[range] == *kw
34 }
35}
36
37pub(crate) struct ParserInput<'t> {
38 text: &'t str,
39 /// start position of each token(expect whitespace and comment)
40 /// ```non-rust
41 /// struct Foo;
42 /// ^------^---
43 /// | | ^-
44 /// 0 7 10
45 /// ```
46 /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
47 start_offsets: Vec<TextUnit>,
48 /// non-whitespace/comment tokens
49 /// ```non-rust
50 /// struct Foo {}
51 /// ^^^^^^ ^^^ ^^
52 /// ```
53 /// tokens: `[struct, Foo, {, }]`
54 tokens: Vec<Token>,
55}
56
57impl<'t> ParserInput<'t> {
58 /// Generate input from tokens(expect comment and whitespace).
59 pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
60 let mut tokens = Vec::new();
61 let mut start_offsets = Vec::new();
62 let mut len = 0.into();
63 for &token in raw_tokens.iter() {
64 if !token.kind.is_trivia() {
65 tokens.push(token);
66 start_offsets.push(len);
67 }
68 len += token.len;
69 }
70
71 ParserInput { text, start_offsets, tokens }
72 }
73}