aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/parsing/parser.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_syntax/src/parsing/parser.rs')
-rw-r--r--crates/ra_syntax/src/parsing/parser.rs270
1 files changed, 270 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/parsing/parser.rs b/crates/ra_syntax/src/parsing/parser.rs
new file mode 100644
index 000000000..923b0f2b2
--- /dev/null
+++ b/crates/ra_syntax/src/parsing/parser.rs
@@ -0,0 +1,270 @@
1use std::cell::Cell;
2
3use drop_bomb::DropBomb;
4
5use crate::{
6 SyntaxKind::{self, ERROR, EOF, TOMBSTONE},
7 parsing::{
8 TokenSource, ParseError,
9 token_set::TokenSet,
10 event::Event,
11 },
12};
13
14/// `Parser` struct provides the low-level API for
15/// navigating through the stream of tokens and
16/// constructing the parse tree. The actual parsing
17/// happens in the `grammar` module.
18///
19/// However, the result of this `Parser` is not a real
20/// tree, but rather a flat stream of events of the form
21/// "start expression, consume number literal,
22/// finish expression". See `Event` docs for more.
23pub(crate) struct Parser<'t> {
24 token_source: &'t dyn TokenSource,
25 token_pos: usize,
26 events: Vec<Event>,
27 steps: Cell<u32>,
28}
29
30impl<'t> Parser<'t> {
31 pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> {
32 Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) }
33 }
34
35 pub(crate) fn finish(self) -> Vec<Event> {
36 self.events
37 }
38
39 /// Returns the kind of the current token.
40 /// If parser has already reached the end of input,
41 /// the special `EOF` kind is returned.
42 pub(crate) fn current(&self) -> SyntaxKind {
43 self.nth(0)
44 }
45
46 /// Returns the kinds of the current two tokens, if they are not separated
47 /// by trivia.
48 ///
49 /// Useful for parsing things like `>>`.
50 pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> {
51 let c1 = self.token_source.token_kind(self.token_pos);
52 let c2 = self.token_source.token_kind(self.token_pos + 1);
53 if self.token_source.is_token_joint_to_next(self.token_pos) {
54 Some((c1, c2))
55 } else {
56 None
57 }
58 }
59
60 /// Returns the kinds of the current three tokens, if they are not separated
61 /// by trivia.
62 ///
63 /// Useful for parsing things like `=>>`.
64 pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> {
65 let c1 = self.token_source.token_kind(self.token_pos);
66 let c2 = self.token_source.token_kind(self.token_pos + 1);
67 let c3 = self.token_source.token_kind(self.token_pos + 2);
68 if self.token_source.is_token_joint_to_next(self.token_pos)
69 && self.token_source.is_token_joint_to_next(self.token_pos + 1)
70 {
71 Some((c1, c2, c3))
72 } else {
73 None
74 }
75 }
76
77 /// Lookahead operation: returns the kind of the next nth
78 /// token.
79 pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
80 let steps = self.steps.get();
81 assert!(steps <= 10_000_000, "the parser seems stuck");
82 self.steps.set(steps + 1);
83 self.token_source.token_kind(self.token_pos + n)
84 }
85
86 /// Checks if the current token is `kind`.
87 pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
88 self.current() == kind
89 }
90
91 /// Checks if the current token is in `kinds`.
92 pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
93 kinds.contains(self.current())
94 }
95
96 /// Checks if the current token is contextual keyword with text `t`.
97 pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool {
98 self.token_source.is_keyword(self.token_pos, kw)
99 }
100
101 /// Starts a new node in the syntax tree. All nodes and tokens
102 /// consumed between the `start` and the corresponding `Marker::complete`
103 /// belong to the same node.
104 pub(crate) fn start(&mut self) -> Marker {
105 let pos = self.events.len() as u32;
106 self.push_event(Event::tombstone());
107 Marker::new(pos)
108 }
109
110 /// Advances the parser by one token unconditionally.
111 pub(crate) fn bump(&mut self) {
112 let kind = self.nth(0);
113 if kind == EOF {
114 return;
115 }
116 self.do_bump(kind, 1);
117 }
118
119 /// Advances the parser by one token, remapping its kind.
120 /// This is useful to create contextual keywords from
121 /// identifiers. For example, the lexer creates an `union`
122 /// *identifier* token, but the parser remaps it to the
123 /// `union` keyword, and keyword is what ends up in the
124 /// final tree.
125 pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
126 if self.nth(0) == EOF {
127 // TODO: panic!?
128 return;
129 }
130 self.do_bump(kind, 1);
131 }
132
133 /// Advances the parser by `n` tokens, remapping its kind.
134 /// This is useful to create compound tokens from parts. For
135 /// example, an `<<` token is two consecutive remapped `<` tokens
136 pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) {
137 self.do_bump(kind, n);
138 }
139
140 /// Emit error with the `message`
141 /// TODO: this should be much more fancy and support
142 /// structured errors with spans and notes, like rustc
143 /// does.
144 pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
145 let msg = ParseError(message.into());
146 self.push_event(Event::Error { msg })
147 }
148
149 /// Consume the next token if `kind` matches.
150 pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
151 if !self.at(kind) {
152 return false;
153 }
154 self.bump();
155 true
156 }
157
158 /// Consume the next token if it is `kind` or emit an error
159 /// otherwise.
160 pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
161 if self.eat(kind) {
162 return true;
163 }
164 self.error(format!("expected {:?}", kind));
165 false
166 }
167
168 /// Create an error node and consume the next token.
169 pub(crate) fn err_and_bump(&mut self, message: &str) {
170 self.err_recover(message, TokenSet::empty());
171 }
172
173 /// Create an error node and consume the next token.
174 pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
175 if self.at(SyntaxKind::L_CURLY) || self.at(SyntaxKind::R_CURLY) || self.at_ts(recovery) {
176 self.error(message);
177 } else {
178 let m = self.start();
179 self.error(message);
180 self.bump();
181 m.complete(self, ERROR);
182 };
183 }
184
185 fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
186 self.token_pos += usize::from(n_raw_tokens);
187 self.push_event(Event::Token { kind, n_raw_tokens });
188 }
189
190 fn push_event(&mut self, event: Event) {
191 self.events.push(event)
192 }
193}
194
195/// See `Parser::start`.
196pub(crate) struct Marker {
197 pos: u32,
198 bomb: DropBomb,
199}
200
201impl Marker {
202 fn new(pos: u32) -> Marker {
203 Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
204 }
205
206 /// Finishes the syntax tree node and assigns `kind` to it,
207 /// and mark the create a `CompletedMarker` for possible future
208 /// operation like `.precede()` to deal with forward_parent.
209 pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
210 self.bomb.defuse();
211 let idx = self.pos as usize;
212 match p.events[idx] {
213 Event::Start { kind: ref mut slot, .. } => {
214 *slot = kind;
215 }
216 _ => unreachable!(),
217 }
218 p.push_event(Event::Finish);
219 CompletedMarker::new(self.pos, kind)
220 }
221
222 /// Abandons the syntax tree node. All its children
223 /// are attached to its parent instead.
224 pub(crate) fn abandon(mut self, p: &mut Parser) {
225 self.bomb.defuse();
226 let idx = self.pos as usize;
227 if idx == p.events.len() - 1 {
228 match p.events.pop() {
229 Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
230 _ => unreachable!(),
231 }
232 }
233 }
234}
235
236pub(crate) struct CompletedMarker(u32, SyntaxKind);
237
238impl CompletedMarker {
239 fn new(pos: u32, kind: SyntaxKind) -> Self {
240 CompletedMarker(pos, kind)
241 }
242
243 /// This method allows to create a new node which starts
244 /// *before* the current one. That is, parser could start
245 /// node `A`, then complete it, and then after parsing the
246 /// whole `A`, decide that it should have started some node
247 /// `B` before starting `A`. `precede` allows to do exactly
248 /// that. See also docs about `forward_parent` in `Event::Start`.
249 ///
250 /// Given completed events `[START, FINISH]` and its corresponding
251 /// `CompletedMarker(pos: 0, _)`.
252 /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
253 /// then mark `NEWSTART` as `START`'s parent with saving its relative
254 /// distance to `NEWSTART` into forward_parent(=2 in this case);
255 pub(crate) fn precede(self, p: &mut Parser) -> Marker {
256 let new_pos = p.start();
257 let idx = self.0 as usize;
258 match p.events[idx] {
259 Event::Start { ref mut forward_parent, .. } => {
260 *forward_parent = Some(new_pos.pos - self.0);
261 }
262 _ => unreachable!(),
263 }
264 new_pos
265 }
266
267 pub(crate) fn kind(&self) -> SyntaxKind {
268 self.1
269 }
270}