diff options
Diffstat (limited to 'crates/parser/src/parser.rs')
-rw-r--r-- | crates/parser/src/parser.rs | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs new file mode 100644 index 000000000..d2487acc3 --- /dev/null +++ b/crates/parser/src/parser.rs | |||
@@ -0,0 +1,350 @@ | |||
1 | //! FIXME: write short doc here | ||
2 | |||
3 | use std::cell::Cell; | ||
4 | |||
5 | use drop_bomb::DropBomb; | ||
6 | |||
7 | use crate::{ | ||
8 | event::Event, | ||
9 | ParseError, | ||
10 | SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, | ||
11 | TokenSet, TokenSource, T, | ||
12 | }; | ||
13 | |||
14 | /// `Parser` struct provides the low-level API for | ||
15 | /// navigating through the stream of tokens and | ||
16 | /// constructing the parse tree. The actual parsing | ||
17 | /// happens in the `grammar` module. | ||
18 | /// | ||
19 | /// However, the result of this `Parser` is not a real | ||
20 | /// tree, but rather a flat stream of events of the form | ||
21 | /// "start expression, consume number literal, | ||
22 | /// finish expression". See `Event` docs for more. | ||
23 | pub(crate) struct Parser<'t> { | ||
24 | token_source: &'t mut dyn TokenSource, | ||
25 | events: Vec<Event>, | ||
26 | steps: Cell<u32>, | ||
27 | } | ||
28 | |||
29 | impl<'t> Parser<'t> { | ||
30 | pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> { | ||
31 | Parser { token_source, events: Vec::new(), steps: Cell::new(0) } | ||
32 | } | ||
33 | |||
34 | pub(crate) fn finish(self) -> Vec<Event> { | ||
35 | self.events | ||
36 | } | ||
37 | |||
38 | /// Returns the kind of the current token. | ||
39 | /// If parser has already reached the end of input, | ||
40 | /// the special `EOF` kind is returned. | ||
41 | pub(crate) fn current(&self) -> SyntaxKind { | ||
42 | self.nth(0) | ||
43 | } | ||
44 | |||
45 | /// Lookahead operation: returns the kind of the next nth | ||
46 | /// token. | ||
47 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { | ||
48 | assert!(n <= 3); | ||
49 | |||
50 | let steps = self.steps.get(); | ||
51 | assert!(steps <= 10_000_000, "the parser seems stuck"); | ||
52 | self.steps.set(steps + 1); | ||
53 | |||
54 | self.token_source.lookahead_nth(n).kind | ||
55 | } | ||
56 | |||
57 | /// Checks if the current token is `kind`. | ||
58 | pub(crate) fn at(&self, kind: SyntaxKind) -> bool { | ||
59 | self.nth_at(0, kind) | ||
60 | } | ||
61 | |||
62 | pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { | ||
63 | match kind { | ||
64 | T![-=] => self.at_composite2(n, T![-], T![=]), | ||
65 | T![->] => self.at_composite2(n, T![-], T![>]), | ||
66 | T![::] => self.at_composite2(n, T![:], T![:]), | ||
67 | T![!=] => self.at_composite2(n, T![!], T![=]), | ||
68 | T![..] => self.at_composite2(n, T![.], T![.]), | ||
69 | T![*=] => self.at_composite2(n, T![*], T![=]), | ||
70 | T![/=] => self.at_composite2(n, T![/], T![=]), | ||
71 | T![&&] => self.at_composite2(n, T![&], T![&]), | ||
72 | T![&=] => self.at_composite2(n, T![&], T![=]), | ||
73 | T![%=] => self.at_composite2(n, T![%], T![=]), | ||
74 | T![^=] => self.at_composite2(n, T![^], T![=]), | ||
75 | T![+=] => self.at_composite2(n, T![+], T![=]), | ||
76 | T![<<] => self.at_composite2(n, T![<], T![<]), | ||
77 | T![<=] => self.at_composite2(n, T![<], T![=]), | ||
78 | T![==] => self.at_composite2(n, T![=], T![=]), | ||
79 | T![=>] => self.at_composite2(n, T![=], T![>]), | ||
80 | T![>=] => self.at_composite2(n, T![>], T![=]), | ||
81 | T![>>] => self.at_composite2(n, T![>], T![>]), | ||
82 | T![|=] => self.at_composite2(n, T![|], T![=]), | ||
83 | T![||] => self.at_composite2(n, T![|], T![|]), | ||
84 | |||
85 | T![...] => self.at_composite3(n, T![.], T![.], T![.]), | ||
86 | T![..=] => self.at_composite3(n, T![.], T![.], T![=]), | ||
87 | T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), | ||
88 | T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), | ||
89 | |||
90 | _ => self.token_source.lookahead_nth(n).kind == kind, | ||
91 | } | ||
92 | } | ||
93 | |||
94 | /// Consume the next token if `kind` matches. | ||
95 | pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { | ||
96 | if !self.at(kind) { | ||
97 | return false; | ||
98 | } | ||
99 | let n_raw_tokens = match kind { | ||
100 | T![-=] | ||
101 | | T![->] | ||
102 | | T![::] | ||
103 | | T![!=] | ||
104 | | T![..] | ||
105 | | T![*=] | ||
106 | | T![/=] | ||
107 | | T![&&] | ||
108 | | T![&=] | ||
109 | | T![%=] | ||
110 | | T![^=] | ||
111 | | T![+=] | ||
112 | | T![<<] | ||
113 | | T![<=] | ||
114 | | T![==] | ||
115 | | T![=>] | ||
116 | | T![>=] | ||
117 | | T![>>] | ||
118 | | T![|=] | ||
119 | | T![||] => 2, | ||
120 | |||
121 | T![...] | T![..=] | T![<<=] | T![>>=] => 3, | ||
122 | _ => 1, | ||
123 | }; | ||
124 | self.do_bump(kind, n_raw_tokens); | ||
125 | true | ||
126 | } | ||
127 | |||
128 | fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { | ||
129 | let t1 = self.token_source.lookahead_nth(n); | ||
130 | if t1.kind != k1 || !t1.is_jointed_to_next { | ||
131 | return false; | ||
132 | } | ||
133 | let t2 = self.token_source.lookahead_nth(n + 1); | ||
134 | t2.kind == k2 | ||
135 | } | ||
136 | |||
137 | fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { | ||
138 | let t1 = self.token_source.lookahead_nth(n); | ||
139 | if t1.kind != k1 || !t1.is_jointed_to_next { | ||
140 | return false; | ||
141 | } | ||
142 | let t2 = self.token_source.lookahead_nth(n + 1); | ||
143 | if t2.kind != k2 || !t2.is_jointed_to_next { | ||
144 | return false; | ||
145 | } | ||
146 | let t3 = self.token_source.lookahead_nth(n + 2); | ||
147 | t3.kind == k3 | ||
148 | } | ||
149 | |||
150 | /// Checks if the current token is in `kinds`. | ||
151 | pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { | ||
152 | kinds.contains(self.current()) | ||
153 | } | ||
154 | |||
155 | /// Checks if the current token is contextual keyword with text `t`. | ||
156 | pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { | ||
157 | self.token_source.is_keyword(kw) | ||
158 | } | ||
159 | |||
160 | /// Starts a new node in the syntax tree. All nodes and tokens | ||
161 | /// consumed between the `start` and the corresponding `Marker::complete` | ||
162 | /// belong to the same node. | ||
163 | pub(crate) fn start(&mut self) -> Marker { | ||
164 | let pos = self.events.len() as u32; | ||
165 | self.push_event(Event::tombstone()); | ||
166 | Marker::new(pos) | ||
167 | } | ||
168 | |||
169 | /// Consume the next token if `kind` matches. | ||
170 | pub(crate) fn bump(&mut self, kind: SyntaxKind) { | ||
171 | assert!(self.eat(kind)); | ||
172 | } | ||
173 | |||
174 | /// Advances the parser by one token | ||
175 | pub(crate) fn bump_any(&mut self) { | ||
176 | let kind = self.nth(0); | ||
177 | if kind == EOF { | ||
178 | return; | ||
179 | } | ||
180 | self.do_bump(kind, 1) | ||
181 | } | ||
182 | |||
183 | /// Advances the parser by one token, remapping its kind. | ||
184 | /// This is useful to create contextual keywords from | ||
185 | /// identifiers. For example, the lexer creates an `union` | ||
186 | /// *identifier* token, but the parser remaps it to the | ||
187 | /// `union` keyword, and keyword is what ends up in the | ||
188 | /// final tree. | ||
189 | pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
190 | if self.nth(0) == EOF { | ||
191 | // FIXME: panic!? | ||
192 | return; | ||
193 | } | ||
194 | self.do_bump(kind, 1); | ||
195 | } | ||
196 | |||
197 | /// Emit error with the `message` | ||
198 | /// FIXME: this should be much more fancy and support | ||
199 | /// structured errors with spans and notes, like rustc | ||
200 | /// does. | ||
201 | pub(crate) fn error<T: Into<String>>(&mut self, message: T) { | ||
202 | let msg = ParseError(Box::new(message.into())); | ||
203 | self.push_event(Event::Error { msg }) | ||
204 | } | ||
205 | |||
206 | /// Consume the next token if it is `kind` or emit an error | ||
207 | /// otherwise. | ||
208 | pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { | ||
209 | if self.eat(kind) { | ||
210 | return true; | ||
211 | } | ||
212 | self.error(format!("expected {:?}", kind)); | ||
213 | false | ||
214 | } | ||
215 | |||
216 | /// Create an error node and consume the next token. | ||
217 | pub(crate) fn err_and_bump(&mut self, message: &str) { | ||
218 | self.err_recover(message, TokenSet::EMPTY); | ||
219 | } | ||
220 | |||
221 | /// Create an error node and consume the next token. | ||
222 | pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { | ||
223 | match self.current() { | ||
224 | T!['{'] | T!['}'] => { | ||
225 | self.error(message); | ||
226 | return; | ||
227 | } | ||
228 | _ => (), | ||
229 | } | ||
230 | |||
231 | if self.at_ts(recovery) { | ||
232 | self.error(message); | ||
233 | return; | ||
234 | } | ||
235 | |||
236 | let m = self.start(); | ||
237 | self.error(message); | ||
238 | self.bump_any(); | ||
239 | m.complete(self, ERROR); | ||
240 | } | ||
241 | |||
242 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | ||
243 | for _ in 0..n_raw_tokens { | ||
244 | self.token_source.bump(); | ||
245 | } | ||
246 | |||
247 | self.push_event(Event::Token { kind, n_raw_tokens }); | ||
248 | } | ||
249 | |||
250 | fn push_event(&mut self, event: Event) { | ||
251 | self.events.push(event) | ||
252 | } | ||
253 | } | ||
254 | |||
255 | /// See `Parser::start`. | ||
256 | pub(crate) struct Marker { | ||
257 | pos: u32, | ||
258 | bomb: DropBomb, | ||
259 | } | ||
260 | |||
261 | impl Marker { | ||
262 | fn new(pos: u32) -> Marker { | ||
263 | Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } | ||
264 | } | ||
265 | |||
266 | /// Finishes the syntax tree node and assigns `kind` to it, | ||
267 | /// and mark the create a `CompletedMarker` for possible future | ||
268 | /// operation like `.precede()` to deal with forward_parent. | ||
269 | pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { | ||
270 | self.bomb.defuse(); | ||
271 | let idx = self.pos as usize; | ||
272 | match &mut p.events[idx] { | ||
273 | Event::Start { kind: slot, .. } => { | ||
274 | *slot = kind; | ||
275 | } | ||
276 | _ => unreachable!(), | ||
277 | } | ||
278 | let finish_pos = p.events.len() as u32; | ||
279 | p.push_event(Event::Finish); | ||
280 | CompletedMarker::new(self.pos, finish_pos, kind) | ||
281 | } | ||
282 | |||
283 | /// Abandons the syntax tree node. All its children | ||
284 | /// are attached to its parent instead. | ||
285 | pub(crate) fn abandon(mut self, p: &mut Parser) { | ||
286 | self.bomb.defuse(); | ||
287 | let idx = self.pos as usize; | ||
288 | if idx == p.events.len() - 1 { | ||
289 | match p.events.pop() { | ||
290 | Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), | ||
291 | _ => unreachable!(), | ||
292 | } | ||
293 | } | ||
294 | } | ||
295 | } | ||
296 | |||
297 | pub(crate) struct CompletedMarker { | ||
298 | start_pos: u32, | ||
299 | finish_pos: u32, | ||
300 | kind: SyntaxKind, | ||
301 | } | ||
302 | |||
303 | impl CompletedMarker { | ||
304 | fn new(start_pos: u32, finish_pos: u32, kind: SyntaxKind) -> Self { | ||
305 | CompletedMarker { start_pos, finish_pos, kind } | ||
306 | } | ||
307 | |||
308 | /// This method allows to create a new node which starts | ||
309 | /// *before* the current one. That is, parser could start | ||
310 | /// node `A`, then complete it, and then after parsing the | ||
311 | /// whole `A`, decide that it should have started some node | ||
312 | /// `B` before starting `A`. `precede` allows to do exactly | ||
313 | /// that. See also docs about `forward_parent` in `Event::Start`. | ||
314 | /// | ||
315 | /// Given completed events `[START, FINISH]` and its corresponding | ||
316 | /// `CompletedMarker(pos: 0, _)`. | ||
317 | /// Append a new `START` events as `[START, FINISH, NEWSTART]`, | ||
318 | /// then mark `NEWSTART` as `START`'s parent with saving its relative | ||
319 | /// distance to `NEWSTART` into forward_parent(=2 in this case); | ||
320 | pub(crate) fn precede(self, p: &mut Parser) -> Marker { | ||
321 | let new_pos = p.start(); | ||
322 | let idx = self.start_pos as usize; | ||
323 | match &mut p.events[idx] { | ||
324 | Event::Start { forward_parent, .. } => { | ||
325 | *forward_parent = Some(new_pos.pos - self.start_pos); | ||
326 | } | ||
327 | _ => unreachable!(), | ||
328 | } | ||
329 | new_pos | ||
330 | } | ||
331 | |||
332 | /// Undo this completion and turns into a `Marker` | ||
333 | pub(crate) fn undo_completion(self, p: &mut Parser) -> Marker { | ||
334 | let start_idx = self.start_pos as usize; | ||
335 | let finish_idx = self.finish_pos as usize; | ||
336 | match &mut p.events[start_idx] { | ||
337 | Event::Start { kind, forward_parent: None } => *kind = TOMBSTONE, | ||
338 | _ => unreachable!(), | ||
339 | } | ||
340 | match &mut p.events[finish_idx] { | ||
341 | slot @ Event::Finish => *slot = Event::tombstone(), | ||
342 | _ => unreachable!(), | ||
343 | } | ||
344 | Marker::new(self.start_pos) | ||
345 | } | ||
346 | |||
347 | pub(crate) fn kind(&self) -> SyntaxKind { | ||
348 | self.kind | ||
349 | } | ||
350 | } | ||