diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2019-09-10 14:01:44 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2019-09-10 14:01:44 +0100 |
commit | 9d3c78e2eee6635772c99d7351b621cefb08bac5 (patch) | |
tree | 14e74b81c7195bcd5b308e799c3fd447e76274ca /crates/ra_parser/src/parser.rs | |
parent | e2ebb467bdf3ebb7d29260adb95c56594c6db282 (diff) | |
parent | ed726081d1df2fb6a1c21101996dcae203f79021 (diff) |
Merge #1801
1801: WIP: switch to fully decomposed tokens internally r=matklad a=matklad
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_parser/src/parser.rs')
-rw-r--r-- | crates/ra_parser/src/parser.rs | 255 |
1 files changed, 88 insertions, 167 deletions
diff --git a/crates/ra_parser/src/parser.rs b/crates/ra_parser/src/parser.rs index d8567e84b..e7281123b 100644 --- a/crates/ra_parser/src/parser.rs +++ b/crates/ra_parser/src/parser.rs | |||
@@ -6,7 +6,7 @@ use crate::{ | |||
6 | event::Event, | 6 | event::Event, |
7 | ParseError, | 7 | ParseError, |
8 | SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, | 8 | SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, |
9 | Token, TokenSet, TokenSource, T, | 9 | TokenSet, TokenSource, T, |
10 | }; | 10 | }; |
11 | 11 | ||
12 | /// `Parser` struct provides the low-level API for | 12 | /// `Parser` struct provides the low-level API for |
@@ -40,38 +40,6 @@ impl<'t> Parser<'t> { | |||
40 | self.nth(0) | 40 | self.nth(0) |
41 | } | 41 | } |
42 | 42 | ||
43 | /// Returns the kinds of the current two tokens, if they are not separated | ||
44 | /// by trivia. | ||
45 | /// | ||
46 | /// Useful for parsing things like `>>`. | ||
47 | pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { | ||
48 | let c1 = self.nth(0); | ||
49 | let c2 = self.nth(1); | ||
50 | |||
51 | if self.token_source.current().is_jointed_to_next { | ||
52 | Some((c1, c2)) | ||
53 | } else { | ||
54 | None | ||
55 | } | ||
56 | } | ||
57 | |||
58 | /// Returns the kinds of the current three tokens, if they are not separated | ||
59 | /// by trivia. | ||
60 | /// | ||
61 | /// Useful for parsing things like `=>>`. | ||
62 | pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { | ||
63 | let c1 = self.nth(0); | ||
64 | let c2 = self.nth(1); | ||
65 | let c3 = self.nth(2); | ||
66 | if self.token_source.current().is_jointed_to_next | ||
67 | && self.token_source.lookahead_nth(1).is_jointed_to_next | ||
68 | { | ||
69 | Some((c1, c2, c3)) | ||
70 | } else { | ||
71 | None | ||
72 | } | ||
73 | } | ||
74 | |||
75 | /// Lookahead operation: returns the kind of the next nth | 43 | /// Lookahead operation: returns the kind of the next nth |
76 | /// token. | 44 | /// token. |
77 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { | 45 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { |
@@ -81,33 +49,93 @@ impl<'t> Parser<'t> { | |||
81 | assert!(steps <= 10_000_000, "the parser seems stuck"); | 49 | assert!(steps <= 10_000_000, "the parser seems stuck"); |
82 | self.steps.set(steps + 1); | 50 | self.steps.set(steps + 1); |
83 | 51 | ||
84 | // It is beecause the Dollar will appear between nth | 52 | self.token_source.lookahead_nth(n).kind |
85 | // Following code skips through it | 53 | } |
86 | let mut non_dollars_count = 0; | ||
87 | let mut i = 0; | ||
88 | 54 | ||
89 | loop { | 55 | /// Checks if the current token is `kind`. |
90 | let token = self.token_source.lookahead_nth(i); | 56 | pub(crate) fn at(&self, kind: SyntaxKind) -> bool { |
91 | let mut kind = token.kind; | 57 | self.nth_at(0, kind) |
92 | if let Some((composited, step)) = self.is_composite(token, i) { | 58 | } |
93 | kind = composited; | ||
94 | i += step; | ||
95 | } else { | ||
96 | i += 1; | ||
97 | } | ||
98 | 59 | ||
99 | match kind { | 60 | pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { |
100 | EOF => return EOF, | 61 | match kind { |
101 | SyntaxKind::L_DOLLAR | SyntaxKind::R_DOLLAR => {} | 62 | T![-=] => self.at_composite2(n, T![-], T![=]), |
102 | _ if non_dollars_count == n => return kind, | 63 | T![->] => self.at_composite2(n, T![-], T![>]), |
103 | _ => non_dollars_count += 1, | 64 | T![::] => self.at_composite2(n, T![:], T![:]), |
104 | } | 65 | T![!=] => self.at_composite2(n, T![!], T![=]), |
66 | T![..] => self.at_composite2(n, T![.], T![.]), | ||
67 | T![*=] => self.at_composite2(n, T![*], T![=]), | ||
68 | T![/=] => self.at_composite2(n, T![/], T![=]), | ||
69 | T![&&] => self.at_composite2(n, T![&], T![&]), | ||
70 | T![&=] => self.at_composite2(n, T![&], T![=]), | ||
71 | T![%=] => self.at_composite2(n, T![%], T![=]), | ||
72 | T![^=] => self.at_composite2(n, T![^], T![=]), | ||
73 | T![+=] => self.at_composite2(n, T![+], T![=]), | ||
74 | T![<<] => self.at_composite2(n, T![<], T![<]), | ||
75 | T![<=] => self.at_composite2(n, T![<], T![=]), | ||
76 | T![==] => self.at_composite2(n, T![=], T![=]), | ||
77 | T![=>] => self.at_composite2(n, T![=], T![>]), | ||
78 | T![>=] => self.at_composite2(n, T![>], T![=]), | ||
79 | T![>>] => self.at_composite2(n, T![>], T![>]), | ||
80 | T![|=] => self.at_composite2(n, T![|], T![=]), | ||
81 | T![||] => self.at_composite2(n, T![|], T![|]), | ||
82 | |||
83 | T![...] => self.at_composite3(n, T![.], T![.], T![.]), | ||
84 | T![..=] => self.at_composite3(n, T![.], T![.], T![=]), | ||
85 | T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), | ||
86 | T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), | ||
87 | |||
88 | _ => self.token_source.lookahead_nth(n).kind == kind, | ||
105 | } | 89 | } |
106 | } | 90 | } |
107 | 91 | ||
108 | /// Checks if the current token is `kind`. | 92 | /// Consume the next token if `kind` matches. |
109 | pub(crate) fn at(&self, kind: SyntaxKind) -> bool { | 93 | pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { |
110 | self.current() == kind | 94 | if !self.at(kind) { |
95 | return false; | ||
96 | } | ||
97 | let n_raw_tokens = match kind { | ||
98 | T![-=] | ||
99 | | T![->] | ||
100 | | T![::] | ||
101 | | T![!=] | ||
102 | | T![..] | ||
103 | | T![*=] | ||
104 | | T![/=] | ||
105 | | T![&&] | ||
106 | | T![&=] | ||
107 | | T![%=] | ||
108 | | T![^=] | ||
109 | | T![+=] | ||
110 | | T![<<] | ||
111 | | T![<=] | ||
112 | | T![==] | ||
113 | | T![=>] | ||
114 | | T![>=] | ||
115 | | T![>>] | ||
116 | | T![|=] | ||
117 | | T![||] => 2, | ||
118 | |||
119 | T![...] | T![..=] | T![<<=] | T![>>=] => 3, | ||
120 | _ => 1, | ||
121 | }; | ||
122 | self.do_bump(kind, n_raw_tokens); | ||
123 | true | ||
124 | } | ||
125 | |||
126 | fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { | ||
127 | let t1 = self.token_source.lookahead_nth(n + 0); | ||
128 | let t2 = self.token_source.lookahead_nth(n + 1); | ||
129 | t1.kind == k1 && t1.is_jointed_to_next && t2.kind == k2 | ||
130 | } | ||
131 | |||
132 | fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { | ||
133 | let t1 = self.token_source.lookahead_nth(n + 0); | ||
134 | let t2 = self.token_source.lookahead_nth(n + 1); | ||
135 | let t3 = self.token_source.lookahead_nth(n + 2); | ||
136 | (t1.kind == k1 && t1.is_jointed_to_next) | ||
137 | && (t2.kind == k2 && t2.is_jointed_to_next) | ||
138 | && t3.kind == k3 | ||
111 | } | 139 | } |
112 | 140 | ||
113 | /// Checks if the current token is in `kinds`. | 141 | /// Checks if the current token is in `kinds`. |
@@ -129,22 +157,9 @@ impl<'t> Parser<'t> { | |||
129 | Marker::new(pos) | 157 | Marker::new(pos) |
130 | } | 158 | } |
131 | 159 | ||
132 | /// Advances the parser by one token unconditionally | 160 | /// Consume the next token if `kind` matches. |
133 | /// Mainly use in `token_tree` parsing | 161 | pub(crate) fn bump(&mut self, kind: SyntaxKind) { |
134 | pub(crate) fn bump_raw(&mut self) { | 162 | assert!(self.eat(kind)); |
135 | let mut kind = self.token_source.current().kind; | ||
136 | |||
137 | // Skip dollars, do_bump will eat these later | ||
138 | let mut i = 0; | ||
139 | while kind == SyntaxKind::L_DOLLAR || kind == SyntaxKind::R_DOLLAR { | ||
140 | kind = self.token_source.lookahead_nth(i).kind; | ||
141 | i += 1; | ||
142 | } | ||
143 | |||
144 | if kind == EOF { | ||
145 | return; | ||
146 | } | ||
147 | self.do_bump(kind, 1); | ||
148 | } | 163 | } |
149 | 164 | ||
150 | /// Advances the parser by one token with composite puncts handled | 165 | /// Advances the parser by one token with composite puncts handled |
@@ -153,27 +168,7 @@ impl<'t> Parser<'t> { | |||
153 | if kind == EOF { | 168 | if kind == EOF { |
154 | return; | 169 | return; |
155 | } | 170 | } |
156 | 171 | self.do_bump(kind, 1) | |
157 | use SyntaxKind::*; | ||
158 | |||
159 | // Handle parser composites | ||
160 | match kind { | ||
161 | T![...] | T![..=] => { | ||
162 | self.bump_compound(kind, 3); | ||
163 | } | ||
164 | T![..] | T![::] | T![==] | T![=>] | T![!=] | T![->] => { | ||
165 | self.bump_compound(kind, 2); | ||
166 | } | ||
167 | _ => { | ||
168 | self.do_bump(kind, 1); | ||
169 | } | ||
170 | } | ||
171 | } | ||
172 | |||
173 | /// Advances the parser by one token, asserting that it is exactly the expected token | ||
174 | pub(crate) fn bump(&mut self, expected: SyntaxKind) { | ||
175 | debug_assert!(self.nth(0) == expected); | ||
176 | self.bump_any() | ||
177 | } | 172 | } |
178 | 173 | ||
179 | /// Advances the parser by one token, remapping its kind. | 174 | /// Advances the parser by one token, remapping its kind. |
@@ -190,13 +185,6 @@ impl<'t> Parser<'t> { | |||
190 | self.do_bump(kind, 1); | 185 | self.do_bump(kind, 1); |
191 | } | 186 | } |
192 | 187 | ||
193 | /// Advances the parser by `n` tokens, remapping its kind. | ||
194 | /// This is useful to create compound tokens from parts. For | ||
195 | /// example, an `<<` token is two consecutive remapped `<` tokens | ||
196 | pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { | ||
197 | self.do_bump(kind, n); | ||
198 | } | ||
199 | |||
200 | /// Emit error with the `message` | 188 | /// Emit error with the `message` |
201 | /// FIXME: this should be much more fancy and support | 189 | /// FIXME: this should be much more fancy and support |
202 | /// structured errors with spans and notes, like rustc | 190 | /// structured errors with spans and notes, like rustc |
@@ -206,15 +194,6 @@ impl<'t> Parser<'t> { | |||
206 | self.push_event(Event::Error { msg }) | 194 | self.push_event(Event::Error { msg }) |
207 | } | 195 | } |
208 | 196 | ||
209 | /// Consume the next token if `kind` matches. | ||
210 | pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { | ||
211 | if !self.at(kind) { | ||
212 | return false; | ||
213 | } | ||
214 | self.bump_any(); | ||
215 | true | ||
216 | } | ||
217 | |||
218 | /// Consume the next token if it is `kind` or emit an error | 197 | /// Consume the next token if it is `kind` or emit an error |
219 | /// otherwise. | 198 | /// otherwise. |
220 | pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { | 199 | pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { |
@@ -243,7 +222,7 @@ impl<'t> Parser<'t> { | |||
243 | } | 222 | } |
244 | 223 | ||
245 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | 224 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { |
246 | self.eat_dollars(); | 225 | // self.eat_dollars(); |
247 | 226 | ||
248 | for _ in 0..n_raw_tokens { | 227 | for _ in 0..n_raw_tokens { |
249 | self.token_source.bump(); | 228 | self.token_source.bump(); |
@@ -256,64 +235,6 @@ impl<'t> Parser<'t> { | |||
256 | self.events.push(event) | 235 | self.events.push(event) |
257 | } | 236 | } |
258 | 237 | ||
259 | /// helper function for check if it is composite. | ||
260 | fn is_composite(&self, first: Token, n: usize) -> Option<(SyntaxKind, usize)> { | ||
261 | // We assume the dollars will not occuried between | ||
262 | // mult-byte tokens | ||
263 | |||
264 | let jn1 = first.is_jointed_to_next; | ||
265 | if !jn1 && first.kind != T![-] { | ||
266 | return None; | ||
267 | } | ||
268 | |||
269 | let second = self.token_source.lookahead_nth(n + 1); | ||
270 | if first.kind == T![-] && second.kind == T![>] { | ||
271 | return Some((T![->], 2)); | ||
272 | } | ||
273 | if !jn1 { | ||
274 | return None; | ||
275 | } | ||
276 | |||
277 | match (first.kind, second.kind) { | ||
278 | (T![:], T![:]) => return Some((T![::], 2)), | ||
279 | (T![=], T![=]) => return Some((T![==], 2)), | ||
280 | (T![=], T![>]) => return Some((T![=>], 2)), | ||
281 | (T![!], T![=]) => return Some((T![!=], 2)), | ||
282 | _ => {} | ||
283 | } | ||
284 | |||
285 | if first.kind != T![.] || second.kind != T![.] { | ||
286 | return None; | ||
287 | } | ||
288 | |||
289 | let third = self.token_source.lookahead_nth(n + 2); | ||
290 | |||
291 | let jn2 = second.is_jointed_to_next; | ||
292 | let la3 = third.kind; | ||
293 | |||
294 | if jn2 && la3 == T![.] { | ||
295 | return Some((T![...], 3)); | ||
296 | } | ||
297 | if la3 == T![=] { | ||
298 | return Some((T![..=], 3)); | ||
299 | } | ||
300 | return Some((T![..], 2)); | ||
301 | } | ||
302 | |||
303 | fn eat_dollars(&mut self) { | ||
304 | loop { | ||
305 | match self.token_source.current().kind { | ||
306 | k @ SyntaxKind::L_DOLLAR | k @ SyntaxKind::R_DOLLAR => { | ||
307 | self.token_source.bump(); | ||
308 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); | ||
309 | } | ||
310 | _ => { | ||
311 | return; | ||
312 | } | ||
313 | } | ||
314 | } | ||
315 | } | ||
316 | |||
317 | pub(crate) fn eat_l_dollars(&mut self) -> usize { | 238 | pub(crate) fn eat_l_dollars(&mut self) -> usize { |
318 | let mut ate_count = 0; | 239 | let mut ate_count = 0; |
319 | loop { | 240 | loop { |