aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/parsing/parser_impl
diff options
context:
space:
mode:
authorbors[bot] <bors[bot]@users.noreply.github.com>2019-02-20 13:50:29 +0000
committerbors[bot] <bors[bot]@users.noreply.github.com>2019-02-20 13:50:29 +0000
commit96899f8278b787280bd07d9ac9dce29a610ce40d (patch)
treed02a22f02908fd3c89e50845a06a89b997220fc2 /crates/ra_syntax/src/parsing/parser_impl
parent5b617e3bf8252887a3eb1ce76d4b62cbee74e551 (diff)
parent86a67dce25f11ba9803a5727f77c02fd1f49e2c0 (diff)
Merge #861
861: Move parsing to a separate module r=matklad a=matklad Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax/src/parsing/parser_impl')
-rw-r--r--crates/ra_syntax/src/parsing/parser_impl/event.rs254
-rw-r--r--crates/ra_syntax/src/parsing/parser_impl/input.rs104
2 files changed, 358 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs
new file mode 100644
index 000000000..2ddbdd34d
--- /dev/null
+++ b/crates/ra_syntax/src/parsing/parser_impl/event.rs
@@ -0,0 +1,254 @@
1//! This module provides a way to construct a `File`.
2//! It is intended to be completely decoupled from the
3//! parser, so as to allow to evolve the tree representation
4//! and the parser algorithm independently.
5//!
6//! The `Sink` trait is the bridge between the parser and the
7//! tree builder: the parser produces a stream of events like
8//! `start node`, `finish node`, and `FileBuilder` converts
9//! this stream to a real tree.
10use std::mem;
11
12use crate::{
13 SmolStr,
14 SyntaxKind::{self, *},
15 TextRange, TextUnit,
16 syntax_error::{
17 ParseError,
18 SyntaxError,
19 SyntaxErrorKind,
20 },
21 parsing::{
22 lexer::Token,
23 parser_impl::Sink,
24 },
25};
26
27/// `Parser` produces a flat list of `Event`s.
28/// They are converted to a tree-structure in
29/// a separate pass, via `TreeBuilder`.
30#[derive(Debug)]
31pub(crate) enum Event {
32 /// This event signifies the start of the node.
33 /// It should be either abandoned (in which case the
34 /// `kind` is `TOMBSTONE`, and the event is ignored),
35 /// or completed via a `Finish` event.
36 ///
37 /// All tokens between a `Start` and a `Finish` would
38 /// become the children of the respective node.
39 ///
40 /// For left-recursive syntactic constructs, the parser produces
41 /// a child node before it sees a parent. `forward_parent`
42 /// saves the position of current event's parent.
43 ///
44 /// Consider this path
45 ///
46 /// foo::bar
47 ///
48 /// The events for it would look like this:
49 ///
50 ///
51 /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH
52 /// | /\
53 /// | |
54 /// +------forward-parent------+
55 ///
56 /// And the tree would look like this
57 ///
58 /// +--PATH---------+
59 /// | | |
60 /// | | |
61 /// | '::' 'bar'
62 /// |
63 /// PATH
64 /// |
65 /// 'foo'
66 ///
67 /// See also `CompletedMarker::precede`.
68 Start {
69 kind: SyntaxKind,
70 forward_parent: Option<u32>,
71 },
72
73 /// Complete the previous `Start` event
74 Finish,
75
76 /// Produce a single leaf-element.
77 /// `n_raw_tokens` is used to glue complex contextual tokens.
78 /// For example, lexer tokenizes `>>` as `>`, `>`, and
79 /// `n_raw_tokens = 2` is used to produced a single `>>`.
80 Token {
81 kind: SyntaxKind,
82 n_raw_tokens: u8,
83 },
84
85 Error {
86 msg: ParseError,
87 },
88}
89
90impl Event {
91 pub(crate) fn tombstone() -> Self {
92 Event::Start { kind: TOMBSTONE, forward_parent: None }
93 }
94}
95
96pub(super) struct EventProcessor<'a, S: Sink> {
97 sink: S,
98 text_pos: TextUnit,
99 text: &'a str,
100 token_pos: usize,
101 tokens: &'a [Token],
102 events: &'a mut [Event],
103}
104
105impl<'a, S: Sink> EventProcessor<'a, S> {
106 pub(super) fn new(
107 sink: S,
108 text: &'a str,
109 tokens: &'a [Token],
110 events: &'a mut [Event],
111 ) -> EventProcessor<'a, S> {
112 EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events }
113 }
114
115 /// Generate the syntax tree with the control of events.
116 pub(super) fn process(mut self) -> S {
117 let mut forward_parents = Vec::new();
118
119 for i in 0..self.events.len() {
120 match mem::replace(&mut self.events[i], Event::tombstone()) {
121 Event::Start { kind: TOMBSTONE, .. } => (),
122
123 Event::Start { kind, forward_parent } => {
124 // For events[A, B, C], B is A's forward_parent, C is B's forward_parent,
125 // in the normal control flow, the parent-child relation: `A -> B -> C`,
126 // while with the magic forward_parent, it writes: `C <- B <- A`.
127
128 // append `A` into parents.
129 forward_parents.push(kind);
130 let mut idx = i;
131 let mut fp = forward_parent;
132 while let Some(fwd) = fp {
133 idx += fwd as usize;
134 // append `A`'s forward_parent `B`
135 fp = match mem::replace(&mut self.events[idx], Event::tombstone()) {
136 Event::Start { kind, forward_parent } => {
137 forward_parents.push(kind);
138 forward_parent
139 }
140 _ => unreachable!(),
141 };
142 // append `B`'s forward_parent `C` in the next stage.
143 }
144
145 for kind in forward_parents.drain(..).rev() {
146 self.start(kind);
147 }
148 }
149 Event::Finish => {
150 let is_last = i == self.events.len() - 1;
151 self.finish(is_last);
152 }
153 Event::Token { kind, n_raw_tokens } => {
154 self.eat_trivias();
155 let n_raw_tokens = n_raw_tokens as usize;
156 let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens]
157 .iter()
158 .map(|it| it.len)
159 .sum::<TextUnit>();
160 self.leaf(kind, len, n_raw_tokens);
161 }
162 Event::Error { msg } => self
163 .sink
164 .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)),
165 }
166 }
167 self.sink
168 }
169
170 /// Add the node into syntax tree but discard the comments/whitespaces.
171 fn start(&mut self, kind: SyntaxKind) {
172 if kind == SOURCE_FILE {
173 self.sink.start_branch(kind);
174 return;
175 }
176 let n_trivias =
177 self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
178 let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
179 let mut trivia_end =
180 self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>();
181
182 let n_attached_trivias = {
183 let leading_trivias = leading_trivias.iter().rev().map(|it| {
184 let next_end = trivia_end - it.len;
185 let range = TextRange::from_to(next_end, trivia_end);
186 trivia_end = next_end;
187 (it.kind, &self.text[range])
188 });
189 n_attached_trivias(kind, leading_trivias)
190 };
191 self.eat_n_trivias(n_trivias - n_attached_trivias);
192 self.sink.start_branch(kind);
193 self.eat_n_trivias(n_attached_trivias);
194 }
195
196 fn finish(&mut self, is_last: bool) {
197 if is_last {
198 self.eat_trivias()
199 }
200 self.sink.finish_branch();
201 }
202
203 fn eat_trivias(&mut self) {
204 while let Some(&token) = self.tokens.get(self.token_pos) {
205 if !token.kind.is_trivia() {
206 break;
207 }
208 self.leaf(token.kind, token.len, 1);
209 }
210 }
211
212 fn eat_n_trivias(&mut self, n: usize) {
213 for _ in 0..n {
214 let token = self.tokens[self.token_pos];
215 assert!(token.kind.is_trivia());
216 self.leaf(token.kind, token.len, 1);
217 }
218 }
219
220 fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) {
221 let range = TextRange::offset_len(self.text_pos, len);
222 let text: SmolStr = self.text[range].into();
223 self.text_pos += len;
224 self.token_pos += n_tokens;
225 self.sink.leaf(kind, text);
226 }
227}
228
229fn n_attached_trivias<'a>(
230 kind: SyntaxKind,
231 trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
232) -> usize {
233 match kind {
234 CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF
235 | MODULE | NAMED_FIELD_DEF => {
236 let mut res = 0;
237 for (i, (kind, text)) in trivias.enumerate() {
238 match kind {
239 WHITESPACE => {
240 if text.contains("\n\n") {
241 break;
242 }
243 }
244 COMMENT => {
245 res = i + 1;
246 }
247 _ => (),
248 }
249 }
250 res
251 }
252 _ => 0,
253 }
254}
diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs
new file mode 100644
index 000000000..275d94918
--- /dev/null
+++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs
@@ -0,0 +1,104 @@
1use crate::{
2 SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit,
3 parsing::lexer::Token,
4};
5
6use std::ops::{Add, AddAssign};
7
8pub(crate) struct ParserInput<'t> {
9 text: &'t str,
10 /// start position of each token(expect whitespace and comment)
11 /// ```non-rust
12 /// struct Foo;
13 /// ^------^---
14 /// | | ^-
15 /// 0 7 10
16 /// ```
17 /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
18 start_offsets: Vec<TextUnit>,
19 /// non-whitespace/comment tokens
20 /// ```non-rust
21 /// struct Foo {}
22 /// ^^^^^^ ^^^ ^^
23 /// ```
24 /// tokens: `[struct, Foo, {, }]`
25 tokens: Vec<Token>,
26}
27
28impl<'t> ParserInput<'t> {
29 /// Generate input from tokens(expect comment and whitespace).
30 pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
31 let mut tokens = Vec::new();
32 let mut start_offsets = Vec::new();
33 let mut len = 0.into();
34 for &token in raw_tokens.iter() {
35 if !token.kind.is_trivia() {
36 tokens.push(token);
37 start_offsets.push(len);
38 }
39 len += token.len;
40 }
41
42 ParserInput { text, start_offsets, tokens }
43 }
44
45 /// Get the syntax kind of token at given input position.
46 pub fn kind(&self, pos: InputPosition) -> SyntaxKind {
47 let idx = pos.0 as usize;
48 if !(idx < self.tokens.len()) {
49 return EOF;
50 }
51 self.tokens[idx].kind
52 }
53
54 /// Get the length of a token at given input position.
55 pub fn token_len(&self, pos: InputPosition) -> TextUnit {
56 let idx = pos.0 as usize;
57 if !(idx < self.tokens.len()) {
58 return 0.into();
59 }
60 self.tokens[idx].len
61 }
62
63 /// Get the start position of a taken at given input position.
64 pub fn token_start_at(&self, pos: InputPosition) -> TextUnit {
65 let idx = pos.0 as usize;
66 if !(idx < self.tokens.len()) {
67 return 0.into();
68 }
69 self.start_offsets[idx]
70 }
71
72 /// Get the raw text of a token at given input position.
73 pub fn token_text(&self, pos: InputPosition) -> &'t str {
74 let idx = pos.0 as usize;
75 if !(idx < self.tokens.len()) {
76 return "";
77 }
78 let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len);
79 &self.text[range]
80 }
81}
82
83#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
84pub(crate) struct InputPosition(u32);
85
86impl InputPosition {
87 pub fn new() -> Self {
88 InputPosition(0)
89 }
90}
91
92impl Add<u32> for InputPosition {
93 type Output = InputPosition;
94
95 fn add(self, rhs: u32) -> InputPosition {
96 InputPosition(self.0 + rhs)
97 }
98}
99
100impl AddAssign<u32> for InputPosition {
101 fn add_assign(&mut self, rhs: u32) {
102 self.0 += rhs
103 }
104}