diff options
author | Aleksey Kladov <[email protected]> | 2019-09-09 12:52:31 +0100 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2019-09-10 13:46:39 +0100 |
commit | 40170885e799ebdefb24ed00865cd1c7800af491 (patch) | |
tree | 1b7a6f1eaaa70e1db70dd5763377fd877636a55d /crates/ra_parser/src/parser.rs | |
parent | e2ebb467bdf3ebb7d29260adb95c56594c6db282 (diff) |
WIP: switch to fully decomposed tokens internally
Diffstat (limited to 'crates/ra_parser/src/parser.rs')
-rw-r--r-- | crates/ra_parser/src/parser.rs | 252 |
1 files changed, 115 insertions, 137 deletions
diff --git a/crates/ra_parser/src/parser.rs b/crates/ra_parser/src/parser.rs index d8567e84b..a27cdc2ea 100644 --- a/crates/ra_parser/src/parser.rs +++ b/crates/ra_parser/src/parser.rs | |||
@@ -6,7 +6,7 @@ use crate::{ | |||
6 | event::Event, | 6 | event::Event, |
7 | ParseError, | 7 | ParseError, |
8 | SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, | 8 | SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, |
9 | Token, TokenSet, TokenSource, T, | 9 | TokenSet, TokenSource, T, |
10 | }; | 10 | }; |
11 | 11 | ||
12 | /// `Parser` struct provides the low-level API for | 12 | /// `Parser` struct provides the low-level API for |
@@ -40,38 +40,6 @@ impl<'t> Parser<'t> { | |||
40 | self.nth(0) | 40 | self.nth(0) |
41 | } | 41 | } |
42 | 42 | ||
43 | /// Returns the kinds of the current two tokens, if they are not separated | ||
44 | /// by trivia. | ||
45 | /// | ||
46 | /// Useful for parsing things like `>>`. | ||
47 | pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { | ||
48 | let c1 = self.nth(0); | ||
49 | let c2 = self.nth(1); | ||
50 | |||
51 | if self.token_source.current().is_jointed_to_next { | ||
52 | Some((c1, c2)) | ||
53 | } else { | ||
54 | None | ||
55 | } | ||
56 | } | ||
57 | |||
58 | /// Returns the kinds of the current three tokens, if they are not separated | ||
59 | /// by trivia. | ||
60 | /// | ||
61 | /// Useful for parsing things like `=>>`. | ||
62 | pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { | ||
63 | let c1 = self.nth(0); | ||
64 | let c2 = self.nth(1); | ||
65 | let c3 = self.nth(2); | ||
66 | if self.token_source.current().is_jointed_to_next | ||
67 | && self.token_source.lookahead_nth(1).is_jointed_to_next | ||
68 | { | ||
69 | Some((c1, c2, c3)) | ||
70 | } else { | ||
71 | None | ||
72 | } | ||
73 | } | ||
74 | |||
75 | /// Lookahead operation: returns the kind of the next nth | 43 | /// Lookahead operation: returns the kind of the next nth |
76 | /// token. | 44 | /// token. |
77 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { | 45 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { |
@@ -81,33 +49,116 @@ impl<'t> Parser<'t> { | |||
81 | assert!(steps <= 10_000_000, "the parser seems stuck"); | 49 | assert!(steps <= 10_000_000, "the parser seems stuck"); |
82 | self.steps.set(steps + 1); | 50 | self.steps.set(steps + 1); |
83 | 51 | ||
84 | // It is beecause the Dollar will appear between nth | 52 | self.token_source.lookahead_nth(n).kind |
85 | // Following code skips through it | ||
86 | let mut non_dollars_count = 0; | ||
87 | let mut i = 0; | ||
88 | 53 | ||
89 | loop { | 54 | // // It is because the Dollar will appear between nth |
90 | let token = self.token_source.lookahead_nth(i); | 55 | // // Following code skips through it |
91 | let mut kind = token.kind; | 56 | // let mut non_dollars_count = 0; |
92 | if let Some((composited, step)) = self.is_composite(token, i) { | 57 | // let mut i = 0; |
93 | kind = composited; | ||
94 | i += step; | ||
95 | } else { | ||
96 | i += 1; | ||
97 | } | ||
98 | 58 | ||
99 | match kind { | 59 | // loop { |
100 | EOF => return EOF, | 60 | // let token = self.token_source.lookahead_nth(i); |
101 | SyntaxKind::L_DOLLAR | SyntaxKind::R_DOLLAR => {} | 61 | // let mut kind = token.kind; |
102 | _ if non_dollars_count == n => return kind, | 62 | // if let Some((composited, step)) = self.is_composite(token, i) { |
103 | _ => non_dollars_count += 1, | 63 | // kind = composited; |
104 | } | 64 | // i += step; |
105 | } | 65 | // } else { |
66 | // i += 1; | ||
67 | // } | ||
68 | |||
69 | // match kind { | ||
70 | // EOF => return EOF, | ||
71 | // SyntaxKind::L_DOLLAR | SyntaxKind::R_DOLLAR => {} | ||
72 | // _ if non_dollars_count == n => return kind, | ||
73 | // _ => non_dollars_count += 1, | ||
74 | // } | ||
75 | // } | ||
106 | } | 76 | } |
107 | 77 | ||
108 | /// Checks if the current token is `kind`. | 78 | /// Checks if the current token is `kind`. |
109 | pub(crate) fn at(&self, kind: SyntaxKind) -> bool { | 79 | pub(crate) fn at(&self, kind: SyntaxKind) -> bool { |
110 | self.current() == kind | 80 | self.nth_at(0, kind) |
81 | } | ||
82 | |||
83 | pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { | ||
84 | match kind { | ||
85 | T![-=] => self.at_composite2(n, T![-], T![=]), | ||
86 | T![->] => self.at_composite2(n, T![-], T![>]), | ||
87 | T![::] => self.at_composite2(n, T![:], T![:]), | ||
88 | T![!=] => self.at_composite2(n, T![!], T![=]), | ||
89 | T![..] => self.at_composite2(n, T![.], T![.]), | ||
90 | T![*=] => self.at_composite2(n, T![*], T![=]), | ||
91 | T![/=] => self.at_composite2(n, T![/], T![=]), | ||
92 | T![&&] => self.at_composite2(n, T![&], T![&]), | ||
93 | T![&=] => self.at_composite2(n, T![&], T![=]), | ||
94 | T![%=] => self.at_composite2(n, T![%], T![=]), | ||
95 | T![^=] => self.at_composite2(n, T![^], T![=]), | ||
96 | T![+=] => self.at_composite2(n, T![+], T![=]), | ||
97 | T![<<] => self.at_composite2(n, T![<], T![<]), | ||
98 | T![<=] => self.at_composite2(n, T![<], T![=]), | ||
99 | T![==] => self.at_composite2(n, T![=], T![=]), | ||
100 | T![=>] => self.at_composite2(n, T![=], T![>]), | ||
101 | T![>=] => self.at_composite2(n, T![>], T![=]), | ||
102 | T![>>] => self.at_composite2(n, T![>], T![>]), | ||
103 | T![|=] => self.at_composite2(n, T![|], T![=]), | ||
104 | T![||] => self.at_composite2(n, T![|], T![|]), | ||
105 | |||
106 | T![...] => self.at_composite3(n, T![.], T![.], T![.]), | ||
107 | T![..=] => self.at_composite3(n, T![.], T![.], T![=]), | ||
108 | T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), | ||
109 | T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), | ||
110 | |||
111 | _ => self.token_source.lookahead_nth(n).kind == kind, | ||
112 | } | ||
113 | } | ||
114 | |||
115 | /// Consume the next token if `kind` matches. | ||
116 | pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { | ||
117 | if !self.at(kind) { | ||
118 | return false; | ||
119 | } | ||
120 | let n_raw_tokens = match kind { | ||
121 | T![-=] | ||
122 | | T![->] | ||
123 | | T![::] | ||
124 | | T![!=] | ||
125 | | T![..] | ||
126 | | T![*=] | ||
127 | | T![/=] | ||
128 | | T![&&] | ||
129 | | T![&=] | ||
130 | | T![%=] | ||
131 | | T![^=] | ||
132 | | T![+=] | ||
133 | | T![<<] | ||
134 | | T![<=] | ||
135 | | T![==] | ||
136 | | T![=>] | ||
137 | | T![>=] | ||
138 | | T![>>] | ||
139 | | T![|=] | ||
140 | | T![||] => 2, | ||
141 | |||
142 | T![...] | T![..=] | T![<<=] | T![>>=] => 3, | ||
143 | _ => 1, | ||
144 | }; | ||
145 | self.do_bump(kind, n_raw_tokens); | ||
146 | true | ||
147 | } | ||
148 | |||
149 | fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { | ||
150 | let t1 = self.token_source.lookahead_nth(n + 0); | ||
151 | let t2 = self.token_source.lookahead_nth(n + 1); | ||
152 | t1.kind == k1 && t1.is_jointed_to_next && t2.kind == k2 | ||
153 | } | ||
154 | |||
155 | fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { | ||
156 | let t1 = self.token_source.lookahead_nth(n + 0); | ||
157 | let t2 = self.token_source.lookahead_nth(n + 1); | ||
158 | let t3 = self.token_source.lookahead_nth(n + 2); | ||
159 | (t1.kind == k1 && t1.is_jointed_to_next) | ||
160 | && (t2.kind == k2 && t2.is_jointed_to_next) | ||
161 | && t3.kind == k3 | ||
111 | } | 162 | } |
112 | 163 | ||
113 | /// Checks if the current token is in `kinds`. | 164 | /// Checks if the current token is in `kinds`. |
@@ -129,9 +180,15 @@ impl<'t> Parser<'t> { | |||
129 | Marker::new(pos) | 180 | Marker::new(pos) |
130 | } | 181 | } |
131 | 182 | ||
183 | /// Consume the next token if `kind` matches. | ||
184 | pub(crate) fn bump(&mut self, kind: SyntaxKind) { | ||
185 | assert!(self.eat(kind)); | ||
186 | } | ||
187 | |||
132 | /// Advances the parser by one token unconditionally | 188 | /// Advances the parser by one token unconditionally |
133 | /// Mainly use in `token_tree` parsing | 189 | /// Mainly use in `token_tree` parsing |
134 | pub(crate) fn bump_raw(&mut self) { | 190 | #[allow(unused)] |
191 | fn bump_raw(&mut self) { | ||
135 | let mut kind = self.token_source.current().kind; | 192 | let mut kind = self.token_source.current().kind; |
136 | 193 | ||
137 | // Skip dollars, do_bump will eat these later | 194 | // Skip dollars, do_bump will eat these later |
@@ -153,27 +210,7 @@ impl<'t> Parser<'t> { | |||
153 | if kind == EOF { | 210 | if kind == EOF { |
154 | return; | 211 | return; |
155 | } | 212 | } |
156 | 213 | self.do_bump(kind, 1) | |
157 | use SyntaxKind::*; | ||
158 | |||
159 | // Handle parser composites | ||
160 | match kind { | ||
161 | T![...] | T![..=] => { | ||
162 | self.bump_compound(kind, 3); | ||
163 | } | ||
164 | T![..] | T![::] | T![==] | T![=>] | T![!=] | T![->] => { | ||
165 | self.bump_compound(kind, 2); | ||
166 | } | ||
167 | _ => { | ||
168 | self.do_bump(kind, 1); | ||
169 | } | ||
170 | } | ||
171 | } | ||
172 | |||
173 | /// Advances the parser by one token, asserting that it is exactly the expected token | ||
174 | pub(crate) fn bump(&mut self, expected: SyntaxKind) { | ||
175 | debug_assert!(self.nth(0) == expected); | ||
176 | self.bump_any() | ||
177 | } | 214 | } |
178 | 215 | ||
179 | /// Advances the parser by one token, remapping its kind. | 216 | /// Advances the parser by one token, remapping its kind. |
@@ -190,13 +227,6 @@ impl<'t> Parser<'t> { | |||
190 | self.do_bump(kind, 1); | 227 | self.do_bump(kind, 1); |
191 | } | 228 | } |
192 | 229 | ||
193 | /// Advances the parser by `n` tokens, remapping its kind. | ||
194 | /// This is useful to create compound tokens from parts. For | ||
195 | /// example, an `<<` token is two consecutive remapped `<` tokens | ||
196 | pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { | ||
197 | self.do_bump(kind, n); | ||
198 | } | ||
199 | |||
200 | /// Emit error with the `message` | 230 | /// Emit error with the `message` |
201 | /// FIXME: this should be much more fancy and support | 231 | /// FIXME: this should be much more fancy and support |
202 | /// structured errors with spans and notes, like rustc | 232 | /// structured errors with spans and notes, like rustc |
@@ -206,15 +236,6 @@ impl<'t> Parser<'t> { | |||
206 | self.push_event(Event::Error { msg }) | 236 | self.push_event(Event::Error { msg }) |
207 | } | 237 | } |
208 | 238 | ||
209 | /// Consume the next token if `kind` matches. | ||
210 | pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { | ||
211 | if !self.at(kind) { | ||
212 | return false; | ||
213 | } | ||
214 | self.bump_any(); | ||
215 | true | ||
216 | } | ||
217 | |||
218 | /// Consume the next token if it is `kind` or emit an error | 239 | /// Consume the next token if it is `kind` or emit an error |
219 | /// otherwise. | 240 | /// otherwise. |
220 | pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { | 241 | pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { |
@@ -243,7 +264,7 @@ impl<'t> Parser<'t> { | |||
243 | } | 264 | } |
244 | 265 | ||
245 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | 266 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { |
246 | self.eat_dollars(); | 267 | // self.eat_dollars(); |
247 | 268 | ||
248 | for _ in 0..n_raw_tokens { | 269 | for _ in 0..n_raw_tokens { |
249 | self.token_source.bump(); | 270 | self.token_source.bump(); |
@@ -256,50 +277,7 @@ impl<'t> Parser<'t> { | |||
256 | self.events.push(event) | 277 | self.events.push(event) |
257 | } | 278 | } |
258 | 279 | ||
259 | /// helper function for check if it is composite. | 280 | #[allow(unused)] |
260 | fn is_composite(&self, first: Token, n: usize) -> Option<(SyntaxKind, usize)> { | ||
261 | // We assume the dollars will not occuried between | ||
262 | // mult-byte tokens | ||
263 | |||
264 | let jn1 = first.is_jointed_to_next; | ||
265 | if !jn1 && first.kind != T![-] { | ||
266 | return None; | ||
267 | } | ||
268 | |||
269 | let second = self.token_source.lookahead_nth(n + 1); | ||
270 | if first.kind == T![-] && second.kind == T![>] { | ||
271 | return Some((T![->], 2)); | ||
272 | } | ||
273 | if !jn1 { | ||
274 | return None; | ||
275 | } | ||
276 | |||
277 | match (first.kind, second.kind) { | ||
278 | (T![:], T![:]) => return Some((T![::], 2)), | ||
279 | (T![=], T![=]) => return Some((T![==], 2)), | ||
280 | (T![=], T![>]) => return Some((T![=>], 2)), | ||
281 | (T![!], T![=]) => return Some((T![!=], 2)), | ||
282 | _ => {} | ||
283 | } | ||
284 | |||
285 | if first.kind != T![.] || second.kind != T![.] { | ||
286 | return None; | ||
287 | } | ||
288 | |||
289 | let third = self.token_source.lookahead_nth(n + 2); | ||
290 | |||
291 | let jn2 = second.is_jointed_to_next; | ||
292 | let la3 = third.kind; | ||
293 | |||
294 | if jn2 && la3 == T![.] { | ||
295 | return Some((T![...], 3)); | ||
296 | } | ||
297 | if la3 == T![=] { | ||
298 | return Some((T![..=], 3)); | ||
299 | } | ||
300 | return Some((T![..], 2)); | ||
301 | } | ||
302 | |||
303 | fn eat_dollars(&mut self) { | 281 | fn eat_dollars(&mut self) { |
304 | loop { | 282 | loop { |
305 | match self.token_source.current().kind { | 283 | match self.token_source.current().kind { |