aboutsummaryrefslogtreecommitdiff
path: root/crates/parser/src/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/parser/src/parser.rs')
-rw-r--r--crates/parser/src/parser.rs350
1 files changed, 350 insertions, 0 deletions
diff --git a/crates/parser/src/parser.rs b/crates/parser/src/parser.rs
new file mode 100644
index 000000000..d2487acc3
--- /dev/null
+++ b/crates/parser/src/parser.rs
@@ -0,0 +1,350 @@
1//! FIXME: write short doc here
2
3use std::cell::Cell;
4
5use drop_bomb::DropBomb;
6
7use crate::{
8 event::Event,
9 ParseError,
10 SyntaxKind::{self, EOF, ERROR, TOMBSTONE},
11 TokenSet, TokenSource, T,
12};
13
14/// `Parser` struct provides the low-level API for
15/// navigating through the stream of tokens and
16/// constructing the parse tree. The actual parsing
17/// happens in the `grammar` module.
18///
19/// However, the result of this `Parser` is not a real
20/// tree, but rather a flat stream of events of the form
21/// "start expression, consume number literal,
22/// finish expression". See `Event` docs for more.
23pub(crate) struct Parser<'t> {
24 token_source: &'t mut dyn TokenSource,
25 events: Vec<Event>,
26 steps: Cell<u32>,
27}
28
29impl<'t> Parser<'t> {
30 pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> {
31 Parser { token_source, events: Vec::new(), steps: Cell::new(0) }
32 }
33
34 pub(crate) fn finish(self) -> Vec<Event> {
35 self.events
36 }
37
38 /// Returns the kind of the current token.
39 /// If parser has already reached the end of input,
40 /// the special `EOF` kind is returned.
41 pub(crate) fn current(&self) -> SyntaxKind {
42 self.nth(0)
43 }
44
45 /// Lookahead operation: returns the kind of the next nth
46 /// token.
47 pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
48 assert!(n <= 3);
49
50 let steps = self.steps.get();
51 assert!(steps <= 10_000_000, "the parser seems stuck");
52 self.steps.set(steps + 1);
53
54 self.token_source.lookahead_nth(n).kind
55 }
56
57 /// Checks if the current token is `kind`.
58 pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
59 self.nth_at(0, kind)
60 }
61
62 pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool {
63 match kind {
64 T![-=] => self.at_composite2(n, T![-], T![=]),
65 T![->] => self.at_composite2(n, T![-], T![>]),
66 T![::] => self.at_composite2(n, T![:], T![:]),
67 T![!=] => self.at_composite2(n, T![!], T![=]),
68 T![..] => self.at_composite2(n, T![.], T![.]),
69 T![*=] => self.at_composite2(n, T![*], T![=]),
70 T![/=] => self.at_composite2(n, T![/], T![=]),
71 T![&&] => self.at_composite2(n, T![&], T![&]),
72 T![&=] => self.at_composite2(n, T![&], T![=]),
73 T![%=] => self.at_composite2(n, T![%], T![=]),
74 T![^=] => self.at_composite2(n, T![^], T![=]),
75 T![+=] => self.at_composite2(n, T![+], T![=]),
76 T![<<] => self.at_composite2(n, T![<], T![<]),
77 T![<=] => self.at_composite2(n, T![<], T![=]),
78 T![==] => self.at_composite2(n, T![=], T![=]),
79 T![=>] => self.at_composite2(n, T![=], T![>]),
80 T![>=] => self.at_composite2(n, T![>], T![=]),
81 T![>>] => self.at_composite2(n, T![>], T![>]),
82 T![|=] => self.at_composite2(n, T![|], T![=]),
83 T![||] => self.at_composite2(n, T![|], T![|]),
84
85 T![...] => self.at_composite3(n, T![.], T![.], T![.]),
86 T![..=] => self.at_composite3(n, T![.], T![.], T![=]),
87 T![<<=] => self.at_composite3(n, T![<], T![<], T![=]),
88 T![>>=] => self.at_composite3(n, T![>], T![>], T![=]),
89
90 _ => self.token_source.lookahead_nth(n).kind == kind,
91 }
92 }
93
94 /// Consume the next token if `kind` matches.
95 pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
96 if !self.at(kind) {
97 return false;
98 }
99 let n_raw_tokens = match kind {
100 T![-=]
101 | T![->]
102 | T![::]
103 | T![!=]
104 | T![..]
105 | T![*=]
106 | T![/=]
107 | T![&&]
108 | T![&=]
109 | T![%=]
110 | T![^=]
111 | T![+=]
112 | T![<<]
113 | T![<=]
114 | T![==]
115 | T![=>]
116 | T![>=]
117 | T![>>]
118 | T![|=]
119 | T![||] => 2,
120
121 T![...] | T![..=] | T![<<=] | T![>>=] => 3,
122 _ => 1,
123 };
124 self.do_bump(kind, n_raw_tokens);
125 true
126 }
127
128 fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool {
129 let t1 = self.token_source.lookahead_nth(n);
130 if t1.kind != k1 || !t1.is_jointed_to_next {
131 return false;
132 }
133 let t2 = self.token_source.lookahead_nth(n + 1);
134 t2.kind == k2
135 }
136
137 fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool {
138 let t1 = self.token_source.lookahead_nth(n);
139 if t1.kind != k1 || !t1.is_jointed_to_next {
140 return false;
141 }
142 let t2 = self.token_source.lookahead_nth(n + 1);
143 if t2.kind != k2 || !t2.is_jointed_to_next {
144 return false;
145 }
146 let t3 = self.token_source.lookahead_nth(n + 2);
147 t3.kind == k3
148 }
149
150 /// Checks if the current token is in `kinds`.
151 pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
152 kinds.contains(self.current())
153 }
154
155 /// Checks if the current token is contextual keyword with text `t`.
156 pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool {
157 self.token_source.is_keyword(kw)
158 }
159
160 /// Starts a new node in the syntax tree. All nodes and tokens
161 /// consumed between the `start` and the corresponding `Marker::complete`
162 /// belong to the same node.
163 pub(crate) fn start(&mut self) -> Marker {
164 let pos = self.events.len() as u32;
165 self.push_event(Event::tombstone());
166 Marker::new(pos)
167 }
168
169 /// Consume the next token if `kind` matches.
170 pub(crate) fn bump(&mut self, kind: SyntaxKind) {
171 assert!(self.eat(kind));
172 }
173
174 /// Advances the parser by one token
175 pub(crate) fn bump_any(&mut self) {
176 let kind = self.nth(0);
177 if kind == EOF {
178 return;
179 }
180 self.do_bump(kind, 1)
181 }
182
183 /// Advances the parser by one token, remapping its kind.
184 /// This is useful to create contextual keywords from
185 /// identifiers. For example, the lexer creates an `union`
186 /// *identifier* token, but the parser remaps it to the
187 /// `union` keyword, and keyword is what ends up in the
188 /// final tree.
189 pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
190 if self.nth(0) == EOF {
191 // FIXME: panic!?
192 return;
193 }
194 self.do_bump(kind, 1);
195 }
196
197 /// Emit error with the `message`
198 /// FIXME: this should be much more fancy and support
199 /// structured errors with spans and notes, like rustc
200 /// does.
201 pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
202 let msg = ParseError(Box::new(message.into()));
203 self.push_event(Event::Error { msg })
204 }
205
206 /// Consume the next token if it is `kind` or emit an error
207 /// otherwise.
208 pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
209 if self.eat(kind) {
210 return true;
211 }
212 self.error(format!("expected {:?}", kind));
213 false
214 }
215
216 /// Create an error node and consume the next token.
217 pub(crate) fn err_and_bump(&mut self, message: &str) {
218 self.err_recover(message, TokenSet::EMPTY);
219 }
220
221 /// Create an error node and consume the next token.
222 pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
223 match self.current() {
224 T!['{'] | T!['}'] => {
225 self.error(message);
226 return;
227 }
228 _ => (),
229 }
230
231 if self.at_ts(recovery) {
232 self.error(message);
233 return;
234 }
235
236 let m = self.start();
237 self.error(message);
238 self.bump_any();
239 m.complete(self, ERROR);
240 }
241
242 fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
243 for _ in 0..n_raw_tokens {
244 self.token_source.bump();
245 }
246
247 self.push_event(Event::Token { kind, n_raw_tokens });
248 }
249
250 fn push_event(&mut self, event: Event) {
251 self.events.push(event)
252 }
253}
254
255/// See `Parser::start`.
256pub(crate) struct Marker {
257 pos: u32,
258 bomb: DropBomb,
259}
260
261impl Marker {
262 fn new(pos: u32) -> Marker {
263 Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
264 }
265
266 /// Finishes the syntax tree node and assigns `kind` to it,
267 /// and mark the create a `CompletedMarker` for possible future
268 /// operation like `.precede()` to deal with forward_parent.
269 pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
270 self.bomb.defuse();
271 let idx = self.pos as usize;
272 match &mut p.events[idx] {
273 Event::Start { kind: slot, .. } => {
274 *slot = kind;
275 }
276 _ => unreachable!(),
277 }
278 let finish_pos = p.events.len() as u32;
279 p.push_event(Event::Finish);
280 CompletedMarker::new(self.pos, finish_pos, kind)
281 }
282
283 /// Abandons the syntax tree node. All its children
284 /// are attached to its parent instead.
285 pub(crate) fn abandon(mut self, p: &mut Parser) {
286 self.bomb.defuse();
287 let idx = self.pos as usize;
288 if idx == p.events.len() - 1 {
289 match p.events.pop() {
290 Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
291 _ => unreachable!(),
292 }
293 }
294 }
295}
296
297pub(crate) struct CompletedMarker {
298 start_pos: u32,
299 finish_pos: u32,
300 kind: SyntaxKind,
301}
302
303impl CompletedMarker {
304 fn new(start_pos: u32, finish_pos: u32, kind: SyntaxKind) -> Self {
305 CompletedMarker { start_pos, finish_pos, kind }
306 }
307
308 /// This method allows to create a new node which starts
309 /// *before* the current one. That is, parser could start
310 /// node `A`, then complete it, and then after parsing the
311 /// whole `A`, decide that it should have started some node
312 /// `B` before starting `A`. `precede` allows to do exactly
313 /// that. See also docs about `forward_parent` in `Event::Start`.
314 ///
315 /// Given completed events `[START, FINISH]` and its corresponding
316 /// `CompletedMarker(pos: 0, _)`.
317 /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
318 /// then mark `NEWSTART` as `START`'s parent with saving its relative
319 /// distance to `NEWSTART` into forward_parent(=2 in this case);
320 pub(crate) fn precede(self, p: &mut Parser) -> Marker {
321 let new_pos = p.start();
322 let idx = self.start_pos as usize;
323 match &mut p.events[idx] {
324 Event::Start { forward_parent, .. } => {
325 *forward_parent = Some(new_pos.pos - self.start_pos);
326 }
327 _ => unreachable!(),
328 }
329 new_pos
330 }
331
332 /// Undo this completion and turns into a `Marker`
333 pub(crate) fn undo_completion(self, p: &mut Parser) -> Marker {
334 let start_idx = self.start_pos as usize;
335 let finish_idx = self.finish_pos as usize;
336 match &mut p.events[start_idx] {
337 Event::Start { kind, forward_parent: None } => *kind = TOMBSTONE,
338 _ => unreachable!(),
339 }
340 match &mut p.events[finish_idx] {
341 slot @ Event::Finish => *slot = Event::tombstone(),
342 _ => unreachable!(),
343 }
344 Marker::new(self.start_pos)
345 }
346
347 pub(crate) fn kind(&self) -> SyntaxKind {
348 self.kind
349 }
350}