diff options
author | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-05-27 08:28:13 +0100 |
---|---|---|
committer | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-05-27 08:28:13 +0100 |
commit | ce694ae11854a806031db98c51c068253f927519 (patch) | |
tree | 2996ecd85ff9aa57b6f208e83d42dd03a7370d1e /crates/ra_parser/src/parser.rs | |
parent | 4f4e50db908ba44f113faeb356ae2b3d0788d308 (diff) | |
parent | 90764fc54b2be1e0fc5d6ac9c9e960d7bb059b14 (diff) |
Merge #1328
1328: Change TokenSource to iteration based r=matklad a=edwin0cheng
This PR change the `TokenSource` trait from random access to be an iteration based trait:
```rust
/// `TokenSource` abstracts the source of the tokens parser operates one.
///
/// Hopefully this will allow us to treat text and token trees in the same way!
pub trait TokenSource {
fn current(&self) -> Token;
/// Lookahead n token
fn lookahead_nth(&self, n: usize) -> Token;
/// bump cursor to next token
fn bump(&mut self);
/// Is the current token a specified keyword?
fn is_keyword(&self, kw: &str) -> bool;
}
/// `TokenCursor` abstracts the cursor of `TokenSource` operates one.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub struct Token {
/// What is the current token?
pub kind: SyntaxKind,
/// Is the current token joined to the next one (`> >` vs `>>`).
pub is_jointed_to_next: bool,
}
```
Note that the refactoring based on this new trait will be separated to incoming PRs
Co-authored-by: Edwin Cheng <[email protected]>
Diffstat (limited to 'crates/ra_parser/src/parser.rs')
-rw-r--r-- | crates/ra_parser/src/parser.rs | 57 |
1 files changed, 33 insertions, 24 deletions
diff --git a/crates/ra_parser/src/parser.rs b/crates/ra_parser/src/parser.rs index 4434dfb09..8f654f04c 100644 --- a/crates/ra_parser/src/parser.rs +++ b/crates/ra_parser/src/parser.rs | |||
@@ -19,15 +19,14 @@ use crate::{ | |||
19 | /// "start expression, consume number literal, | 19 | /// "start expression, consume number literal, |
20 | /// finish expression". See `Event` docs for more. | 20 | /// finish expression". See `Event` docs for more. |
21 | pub(crate) struct Parser<'t> { | 21 | pub(crate) struct Parser<'t> { |
22 | token_source: &'t dyn TokenSource, | 22 | token_source: &'t mut dyn TokenSource, |
23 | token_pos: usize, | ||
24 | events: Vec<Event>, | 23 | events: Vec<Event>, |
25 | steps: Cell<u32>, | 24 | steps: Cell<u32>, |
26 | } | 25 | } |
27 | 26 | ||
28 | impl<'t> Parser<'t> { | 27 | impl<'t> Parser<'t> { |
29 | pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { | 28 | pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> { |
30 | Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } | 29 | Parser { token_source, events: Vec::new(), steps: Cell::new(0) } |
31 | } | 30 | } |
32 | 31 | ||
33 | pub(crate) fn finish(self) -> Vec<Event> { | 32 | pub(crate) fn finish(self) -> Vec<Event> { |
@@ -49,7 +48,7 @@ impl<'t> Parser<'t> { | |||
49 | let c1 = self.nth(0); | 48 | let c1 = self.nth(0); |
50 | let c2 = self.nth(1); | 49 | let c2 = self.nth(1); |
51 | 50 | ||
52 | if self.token_source.is_token_joint_to_next(self.token_pos) { | 51 | if self.token_source.current().is_jointed_to_next { |
53 | Some((c1, c2)) | 52 | Some((c1, c2)) |
54 | } else { | 53 | } else { |
55 | None | 54 | None |
@@ -64,8 +63,8 @@ impl<'t> Parser<'t> { | |||
64 | let c1 = self.nth(0); | 63 | let c1 = self.nth(0); |
65 | let c2 = self.nth(1); | 64 | let c2 = self.nth(1); |
66 | let c3 = self.nth(2); | 65 | let c3 = self.nth(2); |
67 | if self.token_source.is_token_joint_to_next(self.token_pos) | 66 | if self.token_source.current().is_jointed_to_next |
68 | && self.token_source.is_token_joint_to_next(self.token_pos + 1) | 67 | && self.token_source.lookahead_nth(1).is_jointed_to_next |
69 | { | 68 | { |
70 | Some((c1, c2, c3)) | 69 | Some((c1, c2, c3)) |
71 | } else { | 70 | } else { |
@@ -76,6 +75,8 @@ impl<'t> Parser<'t> { | |||
76 | /// Lookahead operation: returns the kind of the next nth | 75 | /// Lookahead operation: returns the kind of the next nth |
77 | /// token. | 76 | /// token. |
78 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { | 77 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { |
78 | assert!(n <= 3); | ||
79 | |||
79 | let steps = self.steps.get(); | 80 | let steps = self.steps.get(); |
80 | assert!(steps <= 10_000_000, "the parser seems stuck"); | 81 | assert!(steps <= 10_000_000, "the parser seems stuck"); |
81 | self.steps.set(steps + 1); | 82 | self.steps.set(steps + 1); |
@@ -86,7 +87,7 @@ impl<'t> Parser<'t> { | |||
86 | let mut i = 0; | 87 | let mut i = 0; |
87 | 88 | ||
88 | loop { | 89 | loop { |
89 | let mut kind = self.token_source.token_kind(self.token_pos + i); | 90 | let mut kind = self.token_source.lookahead_nth(i).kind; |
90 | if let Some((composited, step)) = self.is_composite(kind, i) { | 91 | if let Some((composited, step)) = self.is_composite(kind, i) { |
91 | kind = composited; | 92 | kind = composited; |
92 | i += step; | 93 | i += step; |
@@ -115,7 +116,7 @@ impl<'t> Parser<'t> { | |||
115 | 116 | ||
116 | /// Checks if the current token is contextual keyword with text `t`. | 117 | /// Checks if the current token is contextual keyword with text `t`. |
117 | pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { | 118 | pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { |
118 | self.token_source.is_keyword(self.token_pos, kw) | 119 | self.token_source.is_keyword(kw) |
119 | } | 120 | } |
120 | 121 | ||
121 | /// Starts a new node in the syntax tree. All nodes and tokens | 122 | /// Starts a new node in the syntax tree. All nodes and tokens |
@@ -130,12 +131,12 @@ impl<'t> Parser<'t> { | |||
130 | /// Advances the parser by one token unconditionally | 131 | /// Advances the parser by one token unconditionally |
131 | /// Mainly use in `token_tree` parsing | 132 | /// Mainly use in `token_tree` parsing |
132 | pub(crate) fn bump_raw(&mut self) { | 133 | pub(crate) fn bump_raw(&mut self) { |
133 | let mut kind = self.token_source.token_kind(self.token_pos); | 134 | let mut kind = self.token_source.current().kind; |
134 | 135 | ||
135 | // Skip dollars, do_bump will eat these later | 136 | // Skip dollars, do_bump will eat these later |
136 | let mut i = 0; | 137 | let mut i = 0; |
137 | while kind == SyntaxKind::L_DOLLAR || kind == SyntaxKind::R_DOLLAR { | 138 | while kind == SyntaxKind::L_DOLLAR || kind == SyntaxKind::R_DOLLAR { |
138 | kind = self.token_source.token_kind(self.token_pos + i); | 139 | kind = self.token_source.lookahead_nth(i).kind; |
139 | i += 1; | 140 | i += 1; |
140 | } | 141 | } |
141 | 142 | ||
@@ -236,7 +237,11 @@ impl<'t> Parser<'t> { | |||
236 | 237 | ||
237 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | 238 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { |
238 | self.eat_dollars(); | 239 | self.eat_dollars(); |
239 | self.token_pos += usize::from(n_raw_tokens); | 240 | |
241 | for _ in 0..n_raw_tokens { | ||
242 | self.token_source.bump(); | ||
243 | } | ||
244 | |||
240 | self.push_event(Event::Token { kind, n_raw_tokens }); | 245 | self.push_event(Event::Token { kind, n_raw_tokens }); |
241 | } | 246 | } |
242 | 247 | ||
@@ -249,10 +254,14 @@ impl<'t> Parser<'t> { | |||
249 | // We assume the dollars will not occuried between | 254 | // We assume the dollars will not occuried between |
250 | // mult-byte tokens | 255 | // mult-byte tokens |
251 | 256 | ||
252 | let jn1 = self.token_source.is_token_joint_to_next(self.token_pos + n); | 257 | let first = self.token_source.lookahead_nth(n); |
253 | let la2 = self.token_source.token_kind(self.token_pos + n + 1); | 258 | let second = self.token_source.lookahead_nth(n + 1); |
254 | let jn2 = self.token_source.is_token_joint_to_next(self.token_pos + n + 1); | 259 | let third = self.token_source.lookahead_nth(n + 2); |
255 | let la3 = self.token_source.token_kind(self.token_pos + n + 2); | 260 | |
261 | let jn1 = first.is_jointed_to_next; | ||
262 | let la2 = second.kind; | ||
263 | let jn2 = second.is_jointed_to_next; | ||
264 | let la3 = third.kind; | ||
256 | 265 | ||
257 | match kind { | 266 | match kind { |
258 | T![.] if jn1 && la2 == T![.] && jn2 && la3 == T![.] => Some((T![...], 3)), | 267 | T![.] if jn1 && la2 == T![.] && jn2 && la3 == T![.] => Some((T![...], 3)), |
@@ -271,9 +280,9 @@ impl<'t> Parser<'t> { | |||
271 | 280 | ||
272 | fn eat_dollars(&mut self) { | 281 | fn eat_dollars(&mut self) { |
273 | loop { | 282 | loop { |
274 | match self.token_source.token_kind(self.token_pos) { | 283 | match self.token_source.current().kind { |
275 | k @ SyntaxKind::L_DOLLAR | k @ SyntaxKind::R_DOLLAR => { | 284 | k @ SyntaxKind::L_DOLLAR | k @ SyntaxKind::R_DOLLAR => { |
276 | self.token_pos += 1; | 285 | self.token_source.bump(); |
277 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); | 286 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); |
278 | } | 287 | } |
279 | _ => { | 288 | _ => { |
@@ -286,9 +295,9 @@ impl<'t> Parser<'t> { | |||
286 | pub(crate) fn eat_l_dollars(&mut self) -> usize { | 295 | pub(crate) fn eat_l_dollars(&mut self) -> usize { |
287 | let mut ate_count = 0; | 296 | let mut ate_count = 0; |
288 | loop { | 297 | loop { |
289 | match self.token_source.token_kind(self.token_pos) { | 298 | match self.token_source.current().kind { |
290 | k @ SyntaxKind::L_DOLLAR => { | 299 | k @ SyntaxKind::L_DOLLAR => { |
291 | self.token_pos += 1; | 300 | self.token_source.bump(); |
292 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); | 301 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); |
293 | ate_count += 1; | 302 | ate_count += 1; |
294 | } | 303 | } |
@@ -302,9 +311,9 @@ impl<'t> Parser<'t> { | |||
302 | pub(crate) fn eat_r_dollars(&mut self, max_count: usize) -> usize { | 311 | pub(crate) fn eat_r_dollars(&mut self, max_count: usize) -> usize { |
303 | let mut ate_count = 0; | 312 | let mut ate_count = 0; |
304 | loop { | 313 | loop { |
305 | match self.token_source.token_kind(self.token_pos) { | 314 | match self.token_source.current().kind { |
306 | k @ SyntaxKind::R_DOLLAR => { | 315 | k @ SyntaxKind::R_DOLLAR => { |
307 | self.token_pos += 1; | 316 | self.token_source.bump(); |
308 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); | 317 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); |
309 | ate_count += 1; | 318 | ate_count += 1; |
310 | 319 | ||
@@ -320,12 +329,12 @@ impl<'t> Parser<'t> { | |||
320 | } | 329 | } |
321 | 330 | ||
322 | pub(crate) fn at_l_dollar(&self) -> bool { | 331 | pub(crate) fn at_l_dollar(&self) -> bool { |
323 | let kind = self.token_source.token_kind(self.token_pos); | 332 | let kind = self.token_source.current().kind; |
324 | (kind == SyntaxKind::L_DOLLAR) | 333 | (kind == SyntaxKind::L_DOLLAR) |
325 | } | 334 | } |
326 | 335 | ||
327 | pub(crate) fn at_r_dollar(&self) -> bool { | 336 | pub(crate) fn at_r_dollar(&self) -> bool { |
328 | let kind = self.token_source.token_kind(self.token_pos); | 337 | let kind = self.token_source.current().kind; |
329 | (kind == SyntaxKind::R_DOLLAR) | 338 | (kind == SyntaxKind::R_DOLLAR) |
330 | } | 339 | } |
331 | } | 340 | } |