From 40170885e799ebdefb24ed00865cd1c7800af491 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 9 Sep 2019 14:52:31 +0300 Subject: WIP: switch to fully decomposed tokens internally --- crates/ra_parser/src/parser.rs | 252 +++++++++++++++++++---------------------- 1 file changed, 115 insertions(+), 137 deletions(-) (limited to 'crates/ra_parser/src/parser.rs') diff --git a/crates/ra_parser/src/parser.rs b/crates/ra_parser/src/parser.rs index d8567e84b..a27cdc2ea 100644 --- a/crates/ra_parser/src/parser.rs +++ b/crates/ra_parser/src/parser.rs @@ -6,7 +6,7 @@ use crate::{ event::Event, ParseError, SyntaxKind::{self, EOF, ERROR, TOMBSTONE}, - Token, TokenSet, TokenSource, T, + TokenSet, TokenSource, T, }; /// `Parser` struct provides the low-level API for @@ -40,38 +40,6 @@ impl<'t> Parser<'t> { self.nth(0) } - /// Returns the kinds of the current two tokens, if they are not separated - /// by trivia. - /// - /// Useful for parsing things like `>>`. - pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { - let c1 = self.nth(0); - let c2 = self.nth(1); - - if self.token_source.current().is_jointed_to_next { - Some((c1, c2)) - } else { - None - } - } - - /// Returns the kinds of the current three tokens, if they are not separated - /// by trivia. - /// - /// Useful for parsing things like `=>>`. - pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { - let c1 = self.nth(0); - let c2 = self.nth(1); - let c3 = self.nth(2); - if self.token_source.current().is_jointed_to_next - && self.token_source.lookahead_nth(1).is_jointed_to_next - { - Some((c1, c2, c3)) - } else { - None - } - } - /// Lookahead operation: returns the kind of the next nth /// token. pub(crate) fn nth(&self, n: usize) -> SyntaxKind { @@ -81,33 +49,116 @@ impl<'t> Parser<'t> { assert!(steps <= 10_000_000, "the parser seems stuck"); self.steps.set(steps + 1); - // It is beecause the Dollar will appear between nth - // Following code skips through it - let mut non_dollars_count = 0; - let mut i = 0; + self.token_source.lookahead_nth(n).kind - loop { - let token = self.token_source.lookahead_nth(i); - let mut kind = token.kind; - if let Some((composited, step)) = self.is_composite(token, i) { - kind = composited; - i += step; - } else { - i += 1; - } + // // It is because the Dollar will appear between nth + // // Following code skips through it + // let mut non_dollars_count = 0; + // let mut i = 0; - match kind { - EOF => return EOF, - SyntaxKind::L_DOLLAR | SyntaxKind::R_DOLLAR => {} - _ if non_dollars_count == n => return kind, - _ => non_dollars_count += 1, - } - } + // loop { + // let token = self.token_source.lookahead_nth(i); + // let mut kind = token.kind; + // if let Some((composited, step)) = self.is_composite(token, i) { + // kind = composited; + // i += step; + // } else { + // i += 1; + // } + + // match kind { + // EOF => return EOF, + // SyntaxKind::L_DOLLAR | SyntaxKind::R_DOLLAR => {} + // _ if non_dollars_count == n => return kind, + // _ => non_dollars_count += 1, + // } + // } } /// Checks if the current token is `kind`. pub(crate) fn at(&self, kind: SyntaxKind) -> bool { - self.current() == kind + self.nth_at(0, kind) + } + + pub(crate) fn nth_at(&self, n: usize, kind: SyntaxKind) -> bool { + match kind { + T![-=] => self.at_composite2(n, T![-], T![=]), + T![->] => self.at_composite2(n, T![-], T![>]), + T![::] => self.at_composite2(n, T![:], T![:]), + T![!=] => self.at_composite2(n, T![!], T![=]), + T![..] => self.at_composite2(n, T![.], T![.]), + T![*=] => self.at_composite2(n, T![*], T![=]), + T![/=] => self.at_composite2(n, T![/], T![=]), + T![&&] => self.at_composite2(n, T![&], T![&]), + T![&=] => self.at_composite2(n, T![&], T![=]), + T![%=] => self.at_composite2(n, T![%], T![=]), + T![^=] => self.at_composite2(n, T![^], T![=]), + T![+=] => self.at_composite2(n, T![+], T![=]), + T![<<] => self.at_composite2(n, T![<], T![<]), + T![<=] => self.at_composite2(n, T![<], T![=]), + T![==] => self.at_composite2(n, T![=], T![=]), + T![=>] => self.at_composite2(n, T![=], T![>]), + T![>=] => self.at_composite2(n, T![>], T![=]), + T![>>] => self.at_composite2(n, T![>], T![>]), + T![|=] => self.at_composite2(n, T![|], T![=]), + T![||] => self.at_composite2(n, T![|], T![|]), + + T![...] => self.at_composite3(n, T![.], T![.], T![.]), + T![..=] => self.at_composite3(n, T![.], T![.], T![=]), + T![<<=] => self.at_composite3(n, T![<], T![<], T![=]), + T![>>=] => self.at_composite3(n, T![>], T![>], T![=]), + + _ => self.token_source.lookahead_nth(n).kind == kind, + } + } + + /// Consume the next token if `kind` matches. + pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { + if !self.at(kind) { + return false; + } + let n_raw_tokens = match kind { + T![-=] + | T![->] + | T![::] + | T![!=] + | T![..] + | T![*=] + | T![/=] + | T![&&] + | T![&=] + | T![%=] + | T![^=] + | T![+=] + | T![<<] + | T![<=] + | T![==] + | T![=>] + | T![>=] + | T![>>] + | T![|=] + | T![||] => 2, + + T![...] | T![..=] | T![<<=] | T![>>=] => 3, + _ => 1, + }; + self.do_bump(kind, n_raw_tokens); + true + } + + fn at_composite2(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind) -> bool { + let t1 = self.token_source.lookahead_nth(n + 0); + let t2 = self.token_source.lookahead_nth(n + 1); + t1.kind == k1 && t1.is_jointed_to_next && t2.kind == k2 + } + + fn at_composite3(&self, n: usize, k1: SyntaxKind, k2: SyntaxKind, k3: SyntaxKind) -> bool { + let t1 = self.token_source.lookahead_nth(n + 0); + let t2 = self.token_source.lookahead_nth(n + 1); + let t3 = self.token_source.lookahead_nth(n + 2); + (t1.kind == k1 && t1.is_jointed_to_next) + && (t2.kind == k2 && t2.is_jointed_to_next) + && t3.kind == k3 } /// Checks if the current token is in `kinds`. @@ -129,9 +180,15 @@ impl<'t> Parser<'t> { Marker::new(pos) } + /// Consume the next token if `kind` matches. + pub(crate) fn bump(&mut self, kind: SyntaxKind) { + assert!(self.eat(kind)); + } + /// Advances the parser by one token unconditionally /// Mainly use in `token_tree` parsing - pub(crate) fn bump_raw(&mut self) { + #[allow(unused)] + fn bump_raw(&mut self) { let mut kind = self.token_source.current().kind; // Skip dollars, do_bump will eat these later @@ -153,27 +210,7 @@ impl<'t> Parser<'t> { if kind == EOF { return; } - - use SyntaxKind::*; - - // Handle parser composites - match kind { - T![...] | T![..=] => { - self.bump_compound(kind, 3); - } - T![..] | T![::] | T![==] | T![=>] | T![!=] | T![->] => { - self.bump_compound(kind, 2); - } - _ => { - self.do_bump(kind, 1); - } - } - } - - /// Advances the parser by one token, asserting that it is exactly the expected token - pub(crate) fn bump(&mut self, expected: SyntaxKind) { - debug_assert!(self.nth(0) == expected); - self.bump_any() + self.do_bump(kind, 1) } /// Advances the parser by one token, remapping its kind. @@ -190,13 +227,6 @@ impl<'t> Parser<'t> { self.do_bump(kind, 1); } - /// Advances the parser by `n` tokens, remapping its kind. - /// This is useful to create compound tokens from parts. For - /// example, an `<<` token is two consecutive remapped `<` tokens - pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { - self.do_bump(kind, n); - } - /// Emit error with the `message` /// FIXME: this should be much more fancy and support /// structured errors with spans and notes, like rustc @@ -206,15 +236,6 @@ impl<'t> Parser<'t> { self.push_event(Event::Error { msg }) } - /// Consume the next token if `kind` matches. - pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { - if !self.at(kind) { - return false; - } - self.bump_any(); - true - } - /// Consume the next token if it is `kind` or emit an error /// otherwise. pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { @@ -243,7 +264,7 @@ impl<'t> Parser<'t> { } fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { - self.eat_dollars(); + // self.eat_dollars(); for _ in 0..n_raw_tokens { self.token_source.bump(); @@ -256,50 +277,7 @@ impl<'t> Parser<'t> { self.events.push(event) } - /// helper function for check if it is composite. - fn is_composite(&self, first: Token, n: usize) -> Option<(SyntaxKind, usize)> { - // We assume the dollars will not occuried between - // mult-byte tokens - - let jn1 = first.is_jointed_to_next; - if !jn1 && first.kind != T![-] { - return None; - } - - let second = self.token_source.lookahead_nth(n + 1); - if first.kind == T![-] && second.kind == T![>] { - return Some((T![->], 2)); - } - if !jn1 { - return None; - } - - match (first.kind, second.kind) { - (T![:], T![:]) => return Some((T![::], 2)), - (T![=], T![=]) => return Some((T![==], 2)), - (T![=], T![>]) => return Some((T![=>], 2)), - (T![!], T![=]) => return Some((T![!=], 2)), - _ => {} - } - - if first.kind != T![.] || second.kind != T![.] { - return None; - } - - let third = self.token_source.lookahead_nth(n + 2); - - let jn2 = second.is_jointed_to_next; - let la3 = third.kind; - - if jn2 && la3 == T![.] { - return Some((T![...], 3)); - } - if la3 == T![=] { - return Some((T![..=], 3)); - } - return Some((T![..], 2)); - } - + #[allow(unused)] fn eat_dollars(&mut self) { loop { match self.token_source.current().kind { -- cgit v1.2.3 From 1c5800dee89b14406e816accf41c597e7860aff8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 10 Sep 2019 14:27:08 +0300 Subject: "Fix" mbe to work with decomposed tokens We regressed $i * 2 where $i = 1 + 1, need to fix that! --- crates/ra_parser/src/parser.rs | 57 ------------------------------------------ 1 file changed, 57 deletions(-) (limited to 'crates/ra_parser/src/parser.rs') diff --git a/crates/ra_parser/src/parser.rs b/crates/ra_parser/src/parser.rs index a27cdc2ea..e7281123b 100644 --- a/crates/ra_parser/src/parser.rs +++ b/crates/ra_parser/src/parser.rs @@ -50,29 +50,6 @@ impl<'t> Parser<'t> { self.steps.set(steps + 1); self.token_source.lookahead_nth(n).kind - - // // It is because the Dollar will appear between nth - // // Following code skips through it - // let mut non_dollars_count = 0; - // let mut i = 0; - - // loop { - // let token = self.token_source.lookahead_nth(i); - // let mut kind = token.kind; - // if let Some((composited, step)) = self.is_composite(token, i) { - // kind = composited; - // i += step; - // } else { - // i += 1; - // } - - // match kind { - // EOF => return EOF, - // SyntaxKind::L_DOLLAR | SyntaxKind::R_DOLLAR => {} - // _ if non_dollars_count == n => return kind, - // _ => non_dollars_count += 1, - // } - // } } /// Checks if the current token is `kind`. @@ -185,25 +162,6 @@ impl<'t> Parser<'t> { assert!(self.eat(kind)); } - /// Advances the parser by one token unconditionally - /// Mainly use in `token_tree` parsing - #[allow(unused)] - fn bump_raw(&mut self) { - let mut kind = self.token_source.current().kind; - - // Skip dollars, do_bump will eat these later - let mut i = 0; - while kind == SyntaxKind::L_DOLLAR || kind == SyntaxKind::R_DOLLAR { - kind = self.token_source.lookahead_nth(i).kind; - i += 1; - } - - if kind == EOF { - return; - } - self.do_bump(kind, 1); - } - /// Advances the parser by one token with composite puncts handled pub(crate) fn bump_any(&mut self) { let kind = self.nth(0); @@ -277,21 +235,6 @@ impl<'t> Parser<'t> { self.events.push(event) } - #[allow(unused)] - fn eat_dollars(&mut self) { - loop { - match self.token_source.current().kind { - k @ SyntaxKind::L_DOLLAR | k @ SyntaxKind::R_DOLLAR => { - self.token_source.bump(); - self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); - } - _ => { - return; - } - } - } - } - pub(crate) fn eat_l_dollars(&mut self) -> usize { let mut ate_count = 0; loop { -- cgit v1.2.3