aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/parser_impl
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2019-02-20 12:47:32 +0000
committerAleksey Kladov <[email protected]>2019-02-20 12:47:32 +0000
commit5222b8aba3b1c2c68706aacf6869423a8e4fe6d5 (patch)
treec8a6e999b8ac5f1f29bde86a2e0b3a53466bb369 /crates/ra_syntax/src/parser_impl
parent9d0cda4bc84350961f3884e75a1c20e62c449ede (diff)
move all parsing related bits to a separate module
Diffstat (limited to 'crates/ra_syntax/src/parser_impl')
-rw-r--r--crates/ra_syntax/src/parser_impl/event.rs251
-rw-r--r--crates/ra_syntax/src/parser_impl/input.rs101
2 files changed, 0 insertions, 352 deletions
diff --git a/crates/ra_syntax/src/parser_impl/event.rs b/crates/ra_syntax/src/parser_impl/event.rs
deleted file mode 100644
index b45830c61..000000000
--- a/crates/ra_syntax/src/parser_impl/event.rs
+++ /dev/null
@@ -1,251 +0,0 @@
1//! This module provides a way to construct a `File`.
2//! It is intended to be completely decoupled from the
3//! parser, so as to allow to evolve the tree representation
4//! and the parser algorithm independently.
5//!
6//! The `Sink` trait is the bridge between the parser and the
7//! tree builder: the parser produces a stream of events like
8//! `start node`, `finish node`, and `FileBuilder` converts
9//! this stream to a real tree.
10use crate::{
11 lexer::Token,
12 parser_impl::Sink,
13 SmolStr,
14 SyntaxKind::{self, *},
15 TextRange, TextUnit,
16 syntax_node::syntax_error::{
17 ParseError,
18 SyntaxError,
19 SyntaxErrorKind,
20 },
21};
22use std::mem;
23
24/// `Parser` produces a flat list of `Event`s.
25/// They are converted to a tree-structure in
26/// a separate pass, via `TreeBuilder`.
27#[derive(Debug)]
28pub(crate) enum Event {
29 /// This event signifies the start of the node.
30 /// It should be either abandoned (in which case the
31 /// `kind` is `TOMBSTONE`, and the event is ignored),
32 /// or completed via a `Finish` event.
33 ///
34 /// All tokens between a `Start` and a `Finish` would
35 /// become the children of the respective node.
36 ///
37 /// For left-recursive syntactic constructs, the parser produces
38 /// a child node before it sees a parent. `forward_parent`
39 /// saves the position of current event's parent.
40 ///
41 /// Consider this path
42 ///
43 /// foo::bar
44 ///
45 /// The events for it would look like this:
46 ///
47 ///
48 /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH
49 /// | /\
50 /// | |
51 /// +------forward-parent------+
52 ///
53 /// And the tree would look like this
54 ///
55 /// +--PATH---------+
56 /// | | |
57 /// | | |
58 /// | '::' 'bar'
59 /// |
60 /// PATH
61 /// |
62 /// 'foo'
63 ///
64 /// See also `CompletedMarker::precede`.
65 Start {
66 kind: SyntaxKind,
67 forward_parent: Option<u32>,
68 },
69
70 /// Complete the previous `Start` event
71 Finish,
72
73 /// Produce a single leaf-element.
74 /// `n_raw_tokens` is used to glue complex contextual tokens.
75 /// For example, lexer tokenizes `>>` as `>`, `>`, and
76 /// `n_raw_tokens = 2` is used to produced a single `>>`.
77 Token {
78 kind: SyntaxKind,
79 n_raw_tokens: u8,
80 },
81
82 Error {
83 msg: ParseError,
84 },
85}
86
87impl Event {
88 pub(crate) fn tombstone() -> Self {
89 Event::Start { kind: TOMBSTONE, forward_parent: None }
90 }
91}
92
93pub(super) struct EventProcessor<'a, S: Sink> {
94 sink: S,
95 text_pos: TextUnit,
96 text: &'a str,
97 token_pos: usize,
98 tokens: &'a [Token],
99 events: &'a mut [Event],
100}
101
102impl<'a, S: Sink> EventProcessor<'a, S> {
103 pub(super) fn new(
104 sink: S,
105 text: &'a str,
106 tokens: &'a [Token],
107 events: &'a mut [Event],
108 ) -> EventProcessor<'a, S> {
109 EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events }
110 }
111
112 /// Generate the syntax tree with the control of events.
113 pub(super) fn process(mut self) -> S {
114 let mut forward_parents = Vec::new();
115
116 for i in 0..self.events.len() {
117 match mem::replace(&mut self.events[i], Event::tombstone()) {
118 Event::Start { kind: TOMBSTONE, .. } => (),
119
120 Event::Start { kind, forward_parent } => {
121 // For events[A, B, C], B is A's forward_parent, C is B's forward_parent,
122 // in the normal control flow, the parent-child relation: `A -> B -> C`,
123 // while with the magic forward_parent, it writes: `C <- B <- A`.
124
125 // append `A` into parents.
126 forward_parents.push(kind);
127 let mut idx = i;
128 let mut fp = forward_parent;
129 while let Some(fwd) = fp {
130 idx += fwd as usize;
131 // append `A`'s forward_parent `B`
132 fp = match mem::replace(&mut self.events[idx], Event::tombstone()) {
133 Event::Start { kind, forward_parent } => {
134 forward_parents.push(kind);
135 forward_parent
136 }
137 _ => unreachable!(),
138 };
139 // append `B`'s forward_parent `C` in the next stage.
140 }
141
142 for kind in forward_parents.drain(..).rev() {
143 self.start(kind);
144 }
145 }
146 Event::Finish => {
147 let is_last = i == self.events.len() - 1;
148 self.finish(is_last);
149 }
150 Event::Token { kind, n_raw_tokens } => {
151 self.eat_trivias();
152 let n_raw_tokens = n_raw_tokens as usize;
153 let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens]
154 .iter()
155 .map(|it| it.len)
156 .sum::<TextUnit>();
157 self.leaf(kind, len, n_raw_tokens);
158 }
159 Event::Error { msg } => self
160 .sink
161 .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)),
162 }
163 }
164 self.sink
165 }
166
167 /// Add the node into syntax tree but discard the comments/whitespaces.
168 fn start(&mut self, kind: SyntaxKind) {
169 if kind == SOURCE_FILE {
170 self.sink.start_branch(kind);
171 return;
172 }
173 let n_trivias =
174 self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count();
175 let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias];
176 let mut trivia_end =
177 self.text_pos + leading_trivias.iter().map(|it| it.len).sum::<TextUnit>();
178
179 let n_attached_trivias = {
180 let leading_trivias = leading_trivias.iter().rev().map(|it| {
181 let next_end = trivia_end - it.len;
182 let range = TextRange::from_to(next_end, trivia_end);
183 trivia_end = next_end;
184 (it.kind, &self.text[range])
185 });
186 n_attached_trivias(kind, leading_trivias)
187 };
188 self.eat_n_trivias(n_trivias - n_attached_trivias);
189 self.sink.start_branch(kind);
190 self.eat_n_trivias(n_attached_trivias);
191 }
192
193 fn finish(&mut self, is_last: bool) {
194 if is_last {
195 self.eat_trivias()
196 }
197 self.sink.finish_branch();
198 }
199
200 fn eat_trivias(&mut self) {
201 while let Some(&token) = self.tokens.get(self.token_pos) {
202 if !token.kind.is_trivia() {
203 break;
204 }
205 self.leaf(token.kind, token.len, 1);
206 }
207 }
208
209 fn eat_n_trivias(&mut self, n: usize) {
210 for _ in 0..n {
211 let token = self.tokens[self.token_pos];
212 assert!(token.kind.is_trivia());
213 self.leaf(token.kind, token.len, 1);
214 }
215 }
216
217 fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) {
218 let range = TextRange::offset_len(self.text_pos, len);
219 let text: SmolStr = self.text[range].into();
220 self.text_pos += len;
221 self.token_pos += n_tokens;
222 self.sink.leaf(kind, text);
223 }
224}
225
226fn n_attached_trivias<'a>(
227 kind: SyntaxKind,
228 trivias: impl Iterator<Item = (SyntaxKind, &'a str)>,
229) -> usize {
230 match kind {
231 CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF
232 | MODULE | NAMED_FIELD_DEF => {
233 let mut res = 0;
234 for (i, (kind, text)) in trivias.enumerate() {
235 match kind {
236 WHITESPACE => {
237 if text.contains("\n\n") {
238 break;
239 }
240 }
241 COMMENT => {
242 res = i + 1;
243 }
244 _ => (),
245 }
246 }
247 res
248 }
249 _ => 0,
250 }
251}
diff --git a/crates/ra_syntax/src/parser_impl/input.rs b/crates/ra_syntax/src/parser_impl/input.rs
deleted file mode 100644
index 616a26fdc..000000000
--- a/crates/ra_syntax/src/parser_impl/input.rs
+++ /dev/null
@@ -1,101 +0,0 @@
1use crate::{lexer::Token, SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit};
2
3use std::ops::{Add, AddAssign};
4
5pub(crate) struct ParserInput<'t> {
6 text: &'t str,
7 /// start position of each token(expect whitespace and comment)
8 /// ```non-rust
9 /// struct Foo;
10 /// ^------^---
11 /// | | ^-
12 /// 0 7 10
13 /// ```
14 /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]`
15 start_offsets: Vec<TextUnit>,
16 /// non-whitespace/comment tokens
17 /// ```non-rust
18 /// struct Foo {}
19 /// ^^^^^^ ^^^ ^^
20 /// ```
21 /// tokens: `[struct, Foo, {, }]`
22 tokens: Vec<Token>,
23}
24
25impl<'t> ParserInput<'t> {
26 /// Generate input from tokens(expect comment and whitespace).
27 pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> {
28 let mut tokens = Vec::new();
29 let mut start_offsets = Vec::new();
30 let mut len = 0.into();
31 for &token in raw_tokens.iter() {
32 if !token.kind.is_trivia() {
33 tokens.push(token);
34 start_offsets.push(len);
35 }
36 len += token.len;
37 }
38
39 ParserInput { text, start_offsets, tokens }
40 }
41
42 /// Get the syntax kind of token at given input position.
43 pub fn kind(&self, pos: InputPosition) -> SyntaxKind {
44 let idx = pos.0 as usize;
45 if !(idx < self.tokens.len()) {
46 return EOF;
47 }
48 self.tokens[idx].kind
49 }
50
51 /// Get the length of a token at given input position.
52 pub fn token_len(&self, pos: InputPosition) -> TextUnit {
53 let idx = pos.0 as usize;
54 if !(idx < self.tokens.len()) {
55 return 0.into();
56 }
57 self.tokens[idx].len
58 }
59
60 /// Get the start position of a taken at given input position.
61 pub fn token_start_at(&self, pos: InputPosition) -> TextUnit {
62 let idx = pos.0 as usize;
63 if !(idx < self.tokens.len()) {
64 return 0.into();
65 }
66 self.start_offsets[idx]
67 }
68
69 /// Get the raw text of a token at given input position.
70 pub fn token_text(&self, pos: InputPosition) -> &'t str {
71 let idx = pos.0 as usize;
72 if !(idx < self.tokens.len()) {
73 return "";
74 }
75 let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len);
76 &self.text[range]
77 }
78}
79
80#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
81pub(crate) struct InputPosition(u32);
82
83impl InputPosition {
84 pub fn new() -> Self {
85 InputPosition(0)
86 }
87}
88
89impl Add<u32> for InputPosition {
90 type Output = InputPosition;
91
92 fn add(self, rhs: u32) -> InputPosition {
93 InputPosition(self.0 + rhs)
94 }
95}
96
97impl AddAssign<u32> for InputPosition {
98 fn add_assign(&mut self, rhs: u32) {
99 self.0 += rhs
100 }
101}