aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/parsing/parser.rs
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2019-02-20 20:05:59 +0000
committerAleksey Kladov <[email protected]>2019-02-20 20:05:59 +0000
commit61992dc1cd4956038e3c15439c1203f21e05af06 (patch)
tree0523a030376c0f7792cfc8341b3cfd80f48bde9f /crates/ra_syntax/src/parsing/parser.rs
parent4c1f9b8d4e9ab9ba3b16d2b03f3c8bcc7f61706e (diff)
simplify
Diffstat (limited to 'crates/ra_syntax/src/parsing/parser.rs')
-rw-r--r--crates/ra_syntax/src/parsing/parser.rs271
1 files changed, 271 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/parsing/parser.rs b/crates/ra_syntax/src/parsing/parser.rs
new file mode 100644
index 000000000..988fcb518
--- /dev/null
+++ b/crates/ra_syntax/src/parsing/parser.rs
@@ -0,0 +1,271 @@
1use std::cell::Cell;
2
3use drop_bomb::DropBomb;
4
5use crate::{
6 syntax_error::ParseError,
7 SyntaxKind::{self, ERROR, EOF, TOMBSTONE},
8 parsing::{
9 TokenSource,
10 token_set::TokenSet,
11 event::Event,
12 },
13};
14
15/// `Parser` struct provides the low-level API for
16/// navigating through the stream of tokens and
17/// constructing the parse tree. The actual parsing
18/// happens in the `grammar` module.
19///
20/// However, the result of this `Parser` is not a real
21/// tree, but rather a flat stream of events of the form
22/// "start expression, consume number literal,
23/// finish expression". See `Event` docs for more.
24pub(crate) struct Parser<'t> {
25 token_source: &'t dyn TokenSource,
26 token_pos: usize,
27 events: Vec<Event>,
28 steps: Cell<u32>,
29}
30
31impl<'t> Parser<'t> {
32 pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> {
33 Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) }
34 }
35
36 pub(crate) fn finish(self) -> Vec<Event> {
37 self.events
38 }
39
40 /// Returns the kind of the current token.
41 /// If parser has already reached the end of input,
42 /// the special `EOF` kind is returned.
43 pub(crate) fn current(&self) -> SyntaxKind {
44 self.nth(0)
45 }
46
47 /// Returns the kinds of the current two tokens, if they are not separated
48 /// by trivia.
49 ///
50 /// Useful for parsing things like `>>`.
51 pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> {
52 let c1 = self.token_source.token_kind(self.token_pos);
53 let c2 = self.token_source.token_kind(self.token_pos + 1);
54 if self.token_source.is_token_joint_to_next(self.token_pos) {
55 Some((c1, c2))
56 } else {
57 None
58 }
59 }
60
61 /// Returns the kinds of the current three tokens, if they are not separated
62 /// by trivia.
63 ///
64 /// Useful for parsing things like `=>>`.
65 pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> {
66 let c1 = self.token_source.token_kind(self.token_pos);
67 let c2 = self.token_source.token_kind(self.token_pos + 1);
68 let c3 = self.token_source.token_kind(self.token_pos + 2);
69 if self.token_source.is_token_joint_to_next(self.token_pos)
70 && self.token_source.is_token_joint_to_next(self.token_pos + 1)
71 {
72 Some((c1, c2, c3))
73 } else {
74 None
75 }
76 }
77
78 /// Lookahead operation: returns the kind of the next nth
79 /// token.
80 pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
81 let steps = self.steps.get();
82 assert!(steps <= 10_000_000, "the parser seems stuck");
83 self.steps.set(steps + 1);
84 self.token_source.token_kind(self.token_pos + n)
85 }
86
87 /// Checks if the current token is `kind`.
88 pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
89 self.current() == kind
90 }
91
92 /// Checks if the current token is in `kinds`.
93 pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool {
94 kinds.contains(self.current())
95 }
96
97 /// Checks if the current token is contextual keyword with text `t`.
98 pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool {
99 self.token_source.is_keyword(self.token_pos, kw)
100 }
101
102 /// Starts a new node in the syntax tree. All nodes and tokens
103 /// consumed between the `start` and the corresponding `Marker::complete`
104 /// belong to the same node.
105 pub(crate) fn start(&mut self) -> Marker {
106 let pos = self.events.len() as u32;
107 self.push_event(Event::tombstone());
108 Marker::new(pos)
109 }
110
111 /// Advances the parser by one token unconditionally.
112 pub(crate) fn bump(&mut self) {
113 let kind = self.nth(0);
114 if kind == EOF {
115 return;
116 }
117 self.do_bump(kind, 1);
118 }
119
120 /// Advances the parser by one token, remapping its kind.
121 /// This is useful to create contextual keywords from
122 /// identifiers. For example, the lexer creates an `union`
123 /// *identifier* token, but the parser remaps it to the
124 /// `union` keyword, and keyword is what ends up in the
125 /// final tree.
126 pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
127 if self.nth(0) == EOF {
128 // TODO: panic!?
129 return;
130 }
131 self.do_bump(kind, 1);
132 }
133
134 /// Advances the parser by `n` tokens, remapping its kind.
135 /// This is useful to create compound tokens from parts. For
136 /// example, an `<<` token is two consecutive remapped `<` tokens
137 pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) {
138 self.do_bump(kind, n);
139 }
140
141 /// Emit error with the `message`
142 /// TODO: this should be much more fancy and support
143 /// structured errors with spans and notes, like rustc
144 /// does.
145 pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
146 let msg = ParseError(message.into());
147 self.push_event(Event::Error { msg })
148 }
149
150 /// Consume the next token if `kind` matches.
151 pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
152 if !self.at(kind) {
153 return false;
154 }
155 self.bump();
156 true
157 }
158
159 /// Consume the next token if it is `kind` or emit an error
160 /// otherwise.
161 pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
162 if self.eat(kind) {
163 return true;
164 }
165 self.error(format!("expected {:?}", kind));
166 false
167 }
168
169 /// Create an error node and consume the next token.
170 pub(crate) fn err_and_bump(&mut self, message: &str) {
171 self.err_recover(message, TokenSet::empty());
172 }
173
174 /// Create an error node and consume the next token.
175 pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) {
176 if self.at(SyntaxKind::L_CURLY) || self.at(SyntaxKind::R_CURLY) || self.at_ts(recovery) {
177 self.error(message);
178 } else {
179 let m = self.start();
180 self.error(message);
181 self.bump();
182 m.complete(self, ERROR);
183 };
184 }
185
186 fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
187 self.token_pos += usize::from(n_raw_tokens);
188 self.push_event(Event::Token { kind, n_raw_tokens });
189 }
190
191 fn push_event(&mut self, event: Event) {
192 self.events.push(event)
193 }
194}
195
196/// See `Parser::start`.
197pub(crate) struct Marker {
198 pos: u32,
199 bomb: DropBomb,
200}
201
202impl Marker {
203 fn new(pos: u32) -> Marker {
204 Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") }
205 }
206
207 /// Finishes the syntax tree node and assigns `kind` to it,
208 /// and mark the create a `CompletedMarker` for possible future
209 /// operation like `.precede()` to deal with forward_parent.
210 pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
211 self.bomb.defuse();
212 let idx = self.pos as usize;
213 match p.events[idx] {
214 Event::Start { kind: ref mut slot, .. } => {
215 *slot = kind;
216 }
217 _ => unreachable!(),
218 }
219 p.push_event(Event::Finish);
220 CompletedMarker::new(self.pos, kind)
221 }
222
223 /// Abandons the syntax tree node. All its children
224 /// are attached to its parent instead.
225 pub(crate) fn abandon(mut self, p: &mut Parser) {
226 self.bomb.defuse();
227 let idx = self.pos as usize;
228 if idx == p.events.len() - 1 {
229 match p.events.pop() {
230 Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (),
231 _ => unreachable!(),
232 }
233 }
234 }
235}
236
237pub(crate) struct CompletedMarker(u32, SyntaxKind);
238
239impl CompletedMarker {
240 fn new(pos: u32, kind: SyntaxKind) -> Self {
241 CompletedMarker(pos, kind)
242 }
243
244 /// This method allows to create a new node which starts
245 /// *before* the current one. That is, parser could start
246 /// node `A`, then complete it, and then after parsing the
247 /// whole `A`, decide that it should have started some node
248 /// `B` before starting `A`. `precede` allows to do exactly
249 /// that. See also docs about `forward_parent` in `Event::Start`.
250 ///
251 /// Given completed events `[START, FINISH]` and its corresponding
252 /// `CompletedMarker(pos: 0, _)`.
253 /// Append a new `START` events as `[START, FINISH, NEWSTART]`,
254 /// then mark `NEWSTART` as `START`'s parent with saving its relative
255 /// distance to `NEWSTART` into forward_parent(=2 in this case);
256 pub(crate) fn precede(self, p: &mut Parser) -> Marker {
257 let new_pos = p.start();
258 let idx = self.0 as usize;
259 match p.events[idx] {
260 Event::Start { ref mut forward_parent, .. } => {
261 *forward_parent = Some(new_pos.pos - self.0);
262 }
263 _ => unreachable!(),
264 }
265 new_pos
266 }
267
268 pub(crate) fn kind(&self) -> SyntaxKind {
269 self.1
270 }
271}