From 1d7735fbc6795c3ea5f02950b47413e0b35d6677 Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Sat, 6 Apr 2019 12:12:32 +0800 Subject: Add path test and empty eat_path handling --- crates/ra_mbe/src/lib.rs | 22 ++++++++++++++++++++++ crates/ra_mbe/src/mbe_expander.rs | 5 +++++ crates/ra_mbe/src/tt_cursor.rs | 4 ++++ 3 files changed, 31 insertions(+) diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs index 4203929d4..a5b7fab52 100644 --- a/crates/ra_mbe/src/lib.rs +++ b/crates/ra_mbe/src/lib.rs @@ -379,4 +379,26 @@ SOURCE_FILE@[0; 40) // [let] [s] [=] ["rust1"] [;] assert_eq!(to_literal(&stm_tokens[15 + 3]).text, "\"rust1\""); } + + /// The following tests are port from intellij-rust directly + /// https://github.com/intellij-rust/intellij-rust/blob/c4e9feee4ad46e7953b1948c112533360b6087bb/src/test/kotlin/org/rust/lang/core/macros/RsMacroExpansionTest.kt + + #[test] + fn test_path() { + let rules = create_rules( + r#" + macro_rules! foo { + ($ i:path) => { + fn foo() { let a = $ i; } + } + } +"#, + ); + assert_expansion(&rules, "foo! { foo }", "fn foo () {let a = foo ;}"); + assert_expansion( + &rules, + "foo! { bar::::baz:: }", + "fn foo () {let a = bar :: < u8 > :: baz :: < u8 > ;}", + ); + } } diff --git a/crates/ra_mbe/src/mbe_expander.rs b/crates/ra_mbe/src/mbe_expander.rs index 2dafd68f6..ce41d7225 100644 --- a/crates/ra_mbe/src/mbe_expander.rs +++ b/crates/ra_mbe/src/mbe_expander.rs @@ -139,6 +139,11 @@ fn match_lhs(pattern: &crate::Subtree, input: &mut TtCursor) -> Result { + let path = + input.eat_path().ok_or(ExpandError::UnexpectedToken)?.clone(); + res.inner.insert(text.clone(), Binding::Simple(path.into())); + } _ => return Err(ExpandError::UnexpectedToken), } } diff --git a/crates/ra_mbe/src/tt_cursor.rs b/crates/ra_mbe/src/tt_cursor.rs index 3128cb9ae..6f619621a 100644 --- a/crates/ra_mbe/src/tt_cursor.rs +++ b/crates/ra_mbe/src/tt_cursor.rs @@ -78,6 +78,10 @@ impl<'a> TtCursor<'a> { }) } + pub(crate) fn eat_path(&mut self) -> Option { + None + } + pub(crate) fn expect_char(&mut self, char: char) -> Result<(), ParseError> { if self.at_char(char) { self.bump(); -- cgit v1.2.3 From aac9dfa46418603940ab2333cfea2190d9464d9e Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Sat, 6 Apr 2019 20:14:28 +0800 Subject: Add TtCursorTokenSource and TtCursorTokenSink --- crates/ra_mbe/src/syntax_bridge.rs | 58 ++++++++----- crates/ra_mbe/src/tt_cursor.rs | 170 ++++++++++++++++++++++++++++++++++++- crates/ra_parser/src/grammar.rs | 4 + crates/ra_parser/src/lib.rs | 8 ++ 4 files changed, 216 insertions(+), 24 deletions(-) diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 139a0fd33..3a0702a30 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -104,15 +104,16 @@ fn convert_tt( } #[derive(Debug)] -struct TtTokenSource { - tokens: Vec, +pub(crate) struct TtTokenSource { + pub tokens: Vec, } #[derive(Debug)] -struct TtToken { - kind: SyntaxKind, - is_joint_to_next: bool, - text: SmolStr, +pub(crate) struct TtToken { + pub kind: SyntaxKind, + pub is_joint_to_next: bool, + pub text: SmolStr, + pub n_tokens: usize, } // Some helper functions @@ -123,7 +124,7 @@ fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { None } -struct TokenPeek<'a, I> +pub(crate) struct TokenPeek<'a, I> where I: Iterator, { @@ -134,7 +135,11 @@ impl<'a, I> TokenPeek<'a, I> where I: Iterator, { - fn next(&mut self) -> Option<&tt::TokenTree> { + pub fn new(iter: I) -> Self { + TokenPeek { iter: itertools::multipeek(iter) } + } + + pub fn next(&mut self) -> Option<&tt::TokenTree> { self.iter.next() } @@ -161,14 +166,14 @@ where } impl TtTokenSource { - fn new(tt: &tt::Subtree) -> TtTokenSource { + pub fn new(tt: &tt::Subtree) -> TtTokenSource { let mut res = TtTokenSource { tokens: Vec::new() }; res.convert_subtree(tt); res } fn convert_subtree(&mut self, sub: &tt::Subtree) { self.push_delim(sub.delimiter, false); - let mut peek = TokenPeek { iter: itertools::multipeek(sub.token_trees.iter()) }; + let mut peek = TokenPeek::new(sub.token_trees.iter()); while let Some(tt) = peek.iter.next() { self.convert_tt(tt, &mut peek); } @@ -194,10 +199,17 @@ impl TtTokenSource { kind: classify_literal(&l.text).unwrap().kind, is_joint_to_next: false, text: l.text.clone(), + n_tokens: 1, }, tt::Leaf::Punct(p) => { - if let Some(tt) = Self::convert_multi_char_punct(p, iter) { - tt + if let Some((kind, is_joint_to_next, text, size)) = + Self::convert_multi_char_punct(p, iter) + { + for _ in 0..size - 1 { + iter.next(); + } + + TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } } else { let kind = match p.char { // lexer may produce combpund tokens for these ones @@ -213,21 +225,26 @@ impl TtTokenSource { let s: &str = p.char.encode_utf8(&mut buf); SmolStr::new(s) }; - TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } + TtToken { + kind, + is_joint_to_next: p.spacing == tt::Spacing::Joint, + text, + n_tokens: 1, + } } } tt::Leaf::Ident(ident) => { let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); - TtToken { kind, is_joint_to_next: false, text: ident.text.clone() } + TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } } }; self.tokens.push(tok) } - fn convert_multi_char_punct<'a, I>( + pub(crate) fn convert_multi_char_punct<'a, I>( p: &tt::Punct, iter: &mut TokenPeek<'a, I>, - ) -> Option + ) -> Option<(SyntaxKind, bool, &'static str, usize)> where I: Iterator, { @@ -239,9 +256,7 @@ impl TtTokenSource { ('.', '.', '=') => Some((DOTDOTEQ, "..=")), _ => None, } { - iter.next(); - iter.next(); - return Some(TtToken { kind, is_joint_to_next, text: text.into() }); + return Some((kind, is_joint_to_next, text, 3)); } } @@ -273,8 +288,7 @@ impl TtTokenSource { _ => None, } { - iter.next(); - return Some(TtToken { kind, is_joint_to_next, text: text.into() }); + return Some((kind, is_joint_to_next, text, 2)); } } @@ -291,7 +305,7 @@ impl TtTokenSource { let idx = closing as usize; let kind = kinds[idx]; let text = &texts[idx..texts.len() - (1 - idx)]; - let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) }; + let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; self.tokens.push(tok) } } diff --git a/crates/ra_mbe/src/tt_cursor.rs b/crates/ra_mbe/src/tt_cursor.rs index 6f619621a..6ac3ac187 100644 --- a/crates/ra_mbe/src/tt_cursor.rs +++ b/crates/ra_mbe/src/tt_cursor.rs @@ -1,4 +1,124 @@ use crate::ParseError; +use crate::syntax_bridge::{TtTokenSource, TtToken, TokenPeek}; +use ra_parser::{TokenSource, TreeSink}; + +use ra_syntax::{ + SyntaxKind +}; + +struct TtCursorTokenSource { + tt_pos: usize, + inner: TtTokenSource, +} + +impl TtCursorTokenSource { + fn new(subtree: &tt::Subtree, curr: usize) -> TtCursorTokenSource { + let mut res = TtCursorTokenSource { inner: TtTokenSource::new(subtree), tt_pos: 1 }; + + // Matching `TtToken` cursor to `tt::TokenTree` cursor + // It is because TtToken is not One to One mapping to tt::Token + // There are 3 case (`TtToken` <=> `tt::TokenTree`) : + // * One to One => ident, single char punch + // * Many to One => `tt::TokenTree::SubTree` + // * One to Many => multibyte punct + // + // Such that we cannot simpliy advance the cursor + // We have to bump it one by one + let mut pos = 0; + while pos < curr { + pos += res.bump(&subtree.token_trees[pos]); + } + + res + } + + fn skip_sibling_leaf(&self, leaf: &tt::Leaf, iter: &mut std::slice::Iter) { + if let tt::Leaf::Punct(p) = leaf { + let mut peek = TokenPeek::new(iter); + if let Some((_, _, _, size)) = TtTokenSource::convert_multi_char_punct(p, &mut peek) { + for _ in 0..size - 1 { + peek.next(); + } + } + } + } + + fn count_tt_tokens( + &self, + tt: &tt::TokenTree, + iter: Option<&mut std::slice::Iter>, + ) -> usize { + assert!(!self.inner.tokens.is_empty()); + + match tt { + tt::TokenTree::Subtree(sub_tree) => { + let mut iter = sub_tree.token_trees.iter(); + let mut count = match sub_tree.delimiter { + tt::Delimiter::None => 0, + _ => 2, + }; + + while let Some(tt) = iter.next() { + count += self.count_tt_tokens(&tt, Some(&mut iter)); + } + count + } + + tt::TokenTree::Leaf(leaf) => { + iter.map(|iter| { + self.skip_sibling_leaf(leaf, iter); + }); + + 1 + } + } + } + + fn count(&self, tt: &tt::TokenTree) -> usize { + self.count_tt_tokens(tt, None) + } + + fn bump(&mut self, tt: &tt::TokenTree) -> usize { + let cur = self.current().unwrap(); + let n_tokens = cur.n_tokens; + self.tt_pos += self.count(tt); + n_tokens + } + + fn current(&self) -> Option<&TtToken> { + self.inner.tokens.get(self.tt_pos) + } +} + +impl TokenSource for TtCursorTokenSource { + fn token_kind(&self, pos: usize) -> SyntaxKind { + if let Some(tok) = self.inner.tokens.get(self.tt_pos + pos) { + tok.kind + } else { + SyntaxKind::EOF + } + } + fn is_token_joint_to_next(&self, pos: usize) -> bool { + self.inner.tokens[self.tt_pos + pos].is_joint_to_next + } + fn is_keyword(&self, pos: usize, kw: &str) -> bool { + self.inner.tokens[self.tt_pos + pos].text == *kw + } +} + +struct TtCursorTokenSink { + token_pos: usize, +} + +impl TreeSink for TtCursorTokenSink { + fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) { + self.token_pos += n_tokens as usize; + } + + fn start_node(&mut self, _kind: SyntaxKind) {} + fn finish_node(&mut self) {} + fn error(&mut self, _error: ra_parser::ParseError) {} +} #[derive(Clone)] pub(crate) struct TtCursor<'a> { @@ -78,8 +198,54 @@ impl<'a> TtCursor<'a> { }) } - pub(crate) fn eat_path(&mut self) -> Option { - None + fn eat_parse_result( + &mut self, + parsed_token: usize, + src: &mut TtCursorTokenSource, + ) -> Option { + let mut res = vec![]; + + // Matching `TtToken` cursor to `tt::TokenTree` cursor + // It is because TtToken is not One to One mapping to tt::Token + // There are 3 case (`TtToken` <=> `tt::TokenTree`) : + // * One to One => ident, single char punch + // * Many to One => `tt::TokenTree::SubTree` + // * One to Many => multibyte punct + // + // Such that we cannot simpliy advance the cursor + // We have to bump it one by one + let next_pos = src.tt_pos + parsed_token; + while src.tt_pos < next_pos { + let n = src.bump(self.current().unwrap()); + res.extend((0..n).map(|_| self.eat().unwrap())); + } + + let res: Vec<_> = res.into_iter().cloned().collect(); + + match res.len() { + 0 => None, + 1 => Some(res[0].clone()), + _ => Some(tt::TokenTree::Subtree(tt::Subtree { + delimiter: tt::Delimiter::None, + token_trees: res, + })), + } + } + + fn eat_parse(&mut self, f: F) -> Option + where + F: FnOnce(&dyn TokenSource, &mut dyn TreeSink), + { + let mut src = TtCursorTokenSource::new(self.subtree, self.pos); + let mut sink = TtCursorTokenSink { token_pos: 0 }; + + f(&src, &mut sink); + + self.eat_parse_result(sink.token_pos, &mut src) + } + + pub(crate) fn eat_path(&mut self) -> Option { + self.eat_parse(ra_parser::parse_path) } pub(crate) fn expect_char(&mut self, char: char) -> Result<(), ParseError> { diff --git a/crates/ra_parser/src/grammar.rs b/crates/ra_parser/src/grammar.rs index b2ffeff8c..c5f510e6b 100644 --- a/crates/ra_parser/src/grammar.rs +++ b/crates/ra_parser/src/grammar.rs @@ -49,6 +49,10 @@ pub(crate) fn root(p: &mut Parser) { m.complete(p, SOURCE_FILE); } +pub(crate) fn path(p: &mut Parser) { + paths::type_path(p); +} + pub(crate) fn reparser( node: SyntaxKind, first_child: Option, diff --git a/crates/ra_parser/src/lib.rs b/crates/ra_parser/src/lib.rs index 30ba06aac..3ceeeebd7 100644 --- a/crates/ra_parser/src/lib.rs +++ b/crates/ra_parser/src/lib.rs @@ -61,6 +61,14 @@ pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { event::process(tree_sink, events); } +/// Parse given tokens into the given sink as a path +pub fn parse_path(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { + let mut p = parser::Parser::new(token_source); + grammar::path(&mut p); + let events = p.finish(); + event::process(tree_sink, events); +} + /// A parsing function for a specific braced-block. pub struct Reparser(fn(&mut parser::Parser)); -- cgit v1.2.3 From 7f1e93a3c934342a9b9c418865ff395b8ad245ce Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Sun, 7 Apr 2019 21:42:53 +0800 Subject: Refactoring subtree_source --- crates/ra_mbe/src/lib.rs | 4 +- crates/ra_mbe/src/subtree_source.rs | 352 ++++++++++++++++++++++++++++++++++++ crates/ra_mbe/src/syntax_bridge.rs | 262 +++------------------------ crates/ra_mbe/src/tt_cursor.rs | 132 +------------- 4 files changed, 386 insertions(+), 364 deletions(-) create mode 100644 crates/ra_mbe/src/subtree_source.rs diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs index a5b7fab52..38d3ec7e1 100644 --- a/crates/ra_mbe/src/lib.rs +++ b/crates/ra_mbe/src/lib.rs @@ -15,10 +15,12 @@ macro_rules! impl_froms { } } -mod tt_cursor; +// mod tt_cursor; mod mbe_parser; mod mbe_expander; mod syntax_bridge; +mod tt_cursor; +mod subtree_source; use ra_syntax::SmolStr; diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs new file mode 100644 index 000000000..8f5ce4ed5 --- /dev/null +++ b/crates/ra_mbe/src/subtree_source.rs @@ -0,0 +1,352 @@ +use ra_parser::{TokenSource}; +use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*}; + +#[derive(Debug)] +struct TtToken { + pub kind: SyntaxKind, + pub is_joint_to_next: bool, + pub text: SmolStr, + pub n_tokens: usize, +} + +/// SubtreeSourceQuerier let outside to query internal tokens as string +pub(crate) struct SubtreeSourceQuerier<'a> { + src: &'a SubtreeTokenSource<'a>, +} + +impl<'a> SubtreeSourceQuerier<'a> { + pub(crate) fn token(&self, uidx: usize) -> (SyntaxKind, &SmolStr) { + let tkn = &self.src.tokens[uidx]; + (tkn.kind, &tkn.text) + } +} + +pub(crate) struct SubtreeTokenSource<'a> { + tt_pos: usize, + tokens: Vec, + subtree: &'a tt::Subtree, +} + +impl<'a> SubtreeTokenSource<'a> { + pub fn new(subtree: &tt::Subtree) -> SubtreeTokenSource { + SubtreeTokenSource { tokens: TtTokenBuilder::build(subtree), tt_pos: 0, subtree } + } + + pub fn advance(&mut self, curr: usize, skip_first_delimiter: bool) { + if skip_first_delimiter { + self.tt_pos += 1; + } + + // Matching `TtToken` cursor to `tt::TokenTree` cursor + // It is because TtToken is not One to One mapping to tt::Token + // There are 3 case (`TtToken` <=> `tt::TokenTree`) : + // * One to One => ident, single char punch + // * Many to One => `tt::TokenTree::SubTree` + // * One to Many => multibyte punct + // + // Such that we cannot simpliy advance the cursor + // We have to bump it one by one + let mut pos = 0; + while pos < curr { + pos += self.bump(&self.subtree.token_trees[pos]); + } + } + + pub fn querier(&self) -> SubtreeSourceQuerier { + SubtreeSourceQuerier { src: self } + } + + fn count(&self, tt: &tt::TokenTree) -> usize { + assert!(!self.tokens.is_empty()); + TtTokenBuilder::count_tt_tokens(tt, None) + } + + pub(crate) fn bump(&mut self, tt: &tt::TokenTree) -> usize { + let cur = &self.tokens[self.tt_pos]; + let n_tokens = cur.n_tokens; + self.tt_pos += self.count(tt); + n_tokens + } + + pub(crate) fn bump_n( + &mut self, + n_tokens: usize, + mut token_pos: usize, + ) -> (usize, Vec<&tt::TokenTree>) { + let mut res = vec![]; + // Matching `TtToken` cursor to `tt::TokenTree` cursor + // It is because TtToken is not One to One mapping to tt::Token + // There are 3 case (`TtToken` <=> `tt::TokenTree`) : + // * One to One => ident, single char punch + // * Many to One => `tt::TokenTree::SubTree` + // * One to Many => multibyte punct + // + // Such that we cannot simpliy advance the cursor + // We have to bump it one by one + let next_pos = self.tt_pos + n_tokens; + let old_token_pos = token_pos; + + while self.tt_pos < next_pos { + let current = &self.subtree.token_trees[token_pos]; + let n = self.bump(current); + res.extend((0..n).map(|i| &self.subtree.token_trees[token_pos + i])); + token_pos += n; + } + + (token_pos - old_token_pos, res) + } +} + +impl<'a> TokenSource for SubtreeTokenSource<'a> { + fn token_kind(&self, pos: usize) -> SyntaxKind { + if let Some(tok) = self.tokens.get(self.tt_pos + pos) { + tok.kind + } else { + SyntaxKind::EOF + } + } + fn is_token_joint_to_next(&self, pos: usize) -> bool { + self.tokens[self.tt_pos + pos].is_joint_to_next + } + fn is_keyword(&self, pos: usize, kw: &str) -> bool { + self.tokens[self.tt_pos + pos].text == *kw + } +} + +struct TokenPeek<'a, I> +where + I: Iterator, +{ + iter: itertools::MultiPeek, +} + +// helper function +fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { + if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt { + return Some(pp); + } + None +} + +impl<'a, I> TokenPeek<'a, I> +where + I: Iterator, +{ + pub fn new(iter: I) -> Self { + TokenPeek { iter: itertools::multipeek(iter) } + } + + pub fn next(&mut self) -> Option<&tt::TokenTree> { + self.iter.next() + } + + fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { + if p.spacing != tt::Spacing::Joint { + return None; + } + + self.iter.reset_peek(); + let p1 = to_punct(self.iter.peek()?)?; + Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint)) + } + + fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> { + self.current_punct2(p).and_then(|((p0, p1), last_joint)| { + if !last_joint { + None + } else { + let p2 = to_punct(*self.iter.peek()?)?; + Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint)) + } + }) + } +} + +struct TtTokenBuilder { + tokens: Vec, +} + +impl TtTokenBuilder { + fn build(sub: &tt::Subtree) -> Vec { + let mut res = TtTokenBuilder { tokens: vec![] }; + res.convert_subtree(sub); + res.tokens + } + + fn convert_subtree(&mut self, sub: &tt::Subtree) { + self.push_delim(sub.delimiter, false); + let mut peek = TokenPeek::new(sub.token_trees.iter()); + while let Some(tt) = peek.iter.next() { + self.convert_tt(tt, &mut peek); + } + self.push_delim(sub.delimiter, true) + } + + fn convert_tt<'b, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'b, I>) + where + I: Iterator, + { + match tt { + tt::TokenTree::Leaf(token) => self.convert_token(token, iter), + tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), + } + } + + fn convert_token<'b, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'b, I>) + where + I: Iterator, + { + let tok = match token { + tt::Leaf::Literal(l) => TtToken { + kind: classify_literal(&l.text).unwrap().kind, + is_joint_to_next: false, + text: l.text.clone(), + n_tokens: 1, + }, + tt::Leaf::Punct(p) => { + if let Some((kind, is_joint_to_next, text, size)) = + Self::convert_multi_char_punct(p, iter) + { + for _ in 0..size - 1 { + iter.next(); + } + + TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } + } else { + let kind = match p.char { + // lexer may produce combpund tokens for these ones + '.' => DOT, + ':' => COLON, + '=' => EQ, + '!' => EXCL, + '-' => MINUS, + c => SyntaxKind::from_char(c).unwrap(), + }; + let text = { + let mut buf = [0u8; 4]; + let s: &str = p.char.encode_utf8(&mut buf); + SmolStr::new(s) + }; + TtToken { + kind, + is_joint_to_next: p.spacing == tt::Spacing::Joint, + text, + n_tokens: 1, + } + } + } + tt::Leaf::Ident(ident) => { + let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); + TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } + } + }; + self.tokens.push(tok) + } + + fn convert_multi_char_punct<'b, I>( + p: &tt::Punct, + iter: &mut TokenPeek<'b, I>, + ) -> Option<(SyntaxKind, bool, &'static str, usize)> + where + I: Iterator, + { + if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { + if let Some((kind, text)) = match m { + ('<', '<', '=') => Some((SHLEQ, "<<=")), + ('>', '>', '=') => Some((SHREQ, ">>=")), + ('.', '.', '.') => Some((DOTDOTDOT, "...")), + ('.', '.', '=') => Some((DOTDOTEQ, "..=")), + _ => None, + } { + return Some((kind, is_joint_to_next, text, 3)); + } + } + + if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { + if let Some((kind, text)) = match m { + ('<', '<') => Some((SHL, "<<")), + ('>', '>') => Some((SHR, ">>")), + + ('|', '|') => Some((PIPEPIPE, "||")), + ('&', '&') => Some((AMPAMP, "&&")), + ('%', '=') => Some((PERCENTEQ, "%=")), + ('*', '=') => Some((STAREQ, "*=")), + ('/', '=') => Some((SLASHEQ, "/=")), + ('^', '=') => Some((CARETEQ, "^=")), + + ('&', '=') => Some((AMPEQ, "&=")), + ('|', '=') => Some((PIPEEQ, "|=")), + ('-', '=') => Some((MINUSEQ, "-=")), + ('+', '=') => Some((PLUSEQ, "+=")), + ('>', '=') => Some((GTEQ, ">=")), + ('<', '=') => Some((LTEQ, "<=")), + + ('-', '>') => Some((THIN_ARROW, "->")), + ('!', '=') => Some((NEQ, "!=")), + ('=', '>') => Some((FAT_ARROW, "=>")), + ('=', '=') => Some((EQEQ, "==")), + ('.', '.') => Some((DOTDOT, "..")), + (':', ':') => Some((COLONCOLON, "::")), + + _ => None, + } { + return Some((kind, is_joint_to_next, text, 2)); + } + } + + None + } + + fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { + let (kinds, texts) = match d { + tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), + tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), + tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), + tt::Delimiter::None => return, + }; + let idx = closing as usize; + let kind = kinds[idx]; + let text = &texts[idx..texts.len() - (1 - idx)]; + let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; + self.tokens.push(tok) + } + + fn skip_sibling_leaf(leaf: &tt::Leaf, iter: &mut std::slice::Iter) { + if let tt::Leaf::Punct(p) = leaf { + let mut peek = TokenPeek::new(iter); + if let Some((_, _, _, size)) = TtTokenBuilder::convert_multi_char_punct(p, &mut peek) { + for _ in 0..size - 1 { + peek.next(); + } + } + } + } + + fn count_tt_tokens( + tt: &tt::TokenTree, + iter: Option<&mut std::slice::Iter>, + ) -> usize { + match tt { + tt::TokenTree::Subtree(sub_tree) => { + let mut iter = sub_tree.token_trees.iter(); + let mut count = match sub_tree.delimiter { + tt::Delimiter::None => 0, + _ => 2, + }; + + while let Some(tt) = iter.next() { + count += Self::count_tt_tokens(&tt, Some(&mut iter)); + } + count + } + + tt::TokenTree::Leaf(leaf) => { + iter.map(|iter| { + Self::skip_sibling_leaf(leaf, iter); + }); + + 1 + } + } + } +} diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 3a0702a30..102bba341 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -1,9 +1,11 @@ -use ra_parser::{TokenSource, TreeSink, ParseError}; +use ra_parser::{TreeSink, ParseError}; use ra_syntax::{ AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, SyntaxElement, - ast, SyntaxKind::*, TextUnit, classify_literal + ast, SyntaxKind::*, TextUnit }; +use crate::subtree_source::{SubtreeTokenSource, SubtreeSourceQuerier}; + /// Maps `tt::TokenId` to the relative range of the original token. #[derive(Default)] pub struct TokenMap { @@ -22,8 +24,8 @@ pub fn ast_to_token_tree(ast: &ast::TokenTree) -> Option<(tt::Subtree, TokenMap) /// Parses the token tree (result of macro expansion) as a sequence of items pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc { - let token_source = TtTokenSource::new(tt); - let mut tree_sink = TtTreeSink::new(&token_source.tokens); + let token_source = SubtreeTokenSource::new(tt); + let mut tree_sink = TtTreeSink::new(token_source.querier()); ra_parser::parse(&token_source, &mut tree_sink); let syntax = tree_sink.inner.finish(); ast::SourceFile::cast(&syntax).unwrap().to_owned() @@ -103,243 +105,19 @@ fn convert_tt( Some(res) } -#[derive(Debug)] -pub(crate) struct TtTokenSource { - pub tokens: Vec, -} - -#[derive(Debug)] -pub(crate) struct TtToken { - pub kind: SyntaxKind, - pub is_joint_to_next: bool, - pub text: SmolStr, - pub n_tokens: usize, -} - -// Some helper functions -fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { - if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt { - return Some(pp); - } - None -} - -pub(crate) struct TokenPeek<'a, I> -where - I: Iterator, -{ - iter: itertools::MultiPeek, -} - -impl<'a, I> TokenPeek<'a, I> -where - I: Iterator, -{ - pub fn new(iter: I) -> Self { - TokenPeek { iter: itertools::multipeek(iter) } - } - - pub fn next(&mut self) -> Option<&tt::TokenTree> { - self.iter.next() - } - - fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { - if p.spacing != tt::Spacing::Joint { - return None; - } - - self.iter.reset_peek(); - let p1 = to_punct(self.iter.peek()?)?; - Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint)) - } - - fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> { - self.current_punct2(p).and_then(|((p0, p1), last_joint)| { - if !last_joint { - None - } else { - let p2 = to_punct(*self.iter.peek()?)?; - Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint)) - } - }) - } -} - -impl TtTokenSource { - pub fn new(tt: &tt::Subtree) -> TtTokenSource { - let mut res = TtTokenSource { tokens: Vec::new() }; - res.convert_subtree(tt); - res - } - fn convert_subtree(&mut self, sub: &tt::Subtree) { - self.push_delim(sub.delimiter, false); - let mut peek = TokenPeek::new(sub.token_trees.iter()); - while let Some(tt) = peek.iter.next() { - self.convert_tt(tt, &mut peek); - } - self.push_delim(sub.delimiter, true) - } - - fn convert_tt<'a, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'a, I>) - where - I: Iterator, - { - match tt { - tt::TokenTree::Leaf(token) => self.convert_token(token, iter), - tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), - } - } - - fn convert_token<'a, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'a, I>) - where - I: Iterator, - { - let tok = match token { - tt::Leaf::Literal(l) => TtToken { - kind: classify_literal(&l.text).unwrap().kind, - is_joint_to_next: false, - text: l.text.clone(), - n_tokens: 1, - }, - tt::Leaf::Punct(p) => { - if let Some((kind, is_joint_to_next, text, size)) = - Self::convert_multi_char_punct(p, iter) - { - for _ in 0..size - 1 { - iter.next(); - } - - TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } - } else { - let kind = match p.char { - // lexer may produce combpund tokens for these ones - '.' => DOT, - ':' => COLON, - '=' => EQ, - '!' => EXCL, - '-' => MINUS, - c => SyntaxKind::from_char(c).unwrap(), - }; - let text = { - let mut buf = [0u8; 4]; - let s: &str = p.char.encode_utf8(&mut buf); - SmolStr::new(s) - }; - TtToken { - kind, - is_joint_to_next: p.spacing == tt::Spacing::Joint, - text, - n_tokens: 1, - } - } - } - tt::Leaf::Ident(ident) => { - let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); - TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } - } - }; - self.tokens.push(tok) - } - - pub(crate) fn convert_multi_char_punct<'a, I>( - p: &tt::Punct, - iter: &mut TokenPeek<'a, I>, - ) -> Option<(SyntaxKind, bool, &'static str, usize)> - where - I: Iterator, - { - if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { - if let Some((kind, text)) = match m { - ('<', '<', '=') => Some((SHLEQ, "<<=")), - ('>', '>', '=') => Some((SHREQ, ">>=")), - ('.', '.', '.') => Some((DOTDOTDOT, "...")), - ('.', '.', '=') => Some((DOTDOTEQ, "..=")), - _ => None, - } { - return Some((kind, is_joint_to_next, text, 3)); - } - } - - if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { - if let Some((kind, text)) = match m { - ('<', '<') => Some((SHL, "<<")), - ('>', '>') => Some((SHR, ">>")), - - ('|', '|') => Some((PIPEPIPE, "||")), - ('&', '&') => Some((AMPAMP, "&&")), - ('%', '=') => Some((PERCENTEQ, "%=")), - ('*', '=') => Some((STAREQ, "*=")), - ('/', '=') => Some((SLASHEQ, "/=")), - ('^', '=') => Some((CARETEQ, "^=")), - - ('&', '=') => Some((AMPEQ, "&=")), - ('|', '=') => Some((PIPEEQ, "|=")), - ('-', '=') => Some((MINUSEQ, "-=")), - ('+', '=') => Some((PLUSEQ, "+=")), - ('>', '=') => Some((GTEQ, ">=")), - ('<', '=') => Some((LTEQ, "<=")), - - ('-', '>') => Some((THIN_ARROW, "->")), - ('!', '=') => Some((NEQ, "!=")), - ('=', '>') => Some((FAT_ARROW, "=>")), - ('=', '=') => Some((EQEQ, "==")), - ('.', '.') => Some((DOTDOT, "..")), - (':', ':') => Some((COLONCOLON, "::")), - - _ => None, - } { - return Some((kind, is_joint_to_next, text, 2)); - } - } - - None - } - - fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { - let (kinds, texts) = match d { - tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), - tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), - tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), - tt::Delimiter::None => return, - }; - let idx = closing as usize; - let kind = kinds[idx]; - let text = &texts[idx..texts.len() - (1 - idx)]; - let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; - self.tokens.push(tok) - } -} - -impl TokenSource for TtTokenSource { - fn token_kind(&self, pos: usize) -> SyntaxKind { - if let Some(tok) = self.tokens.get(pos) { - tok.kind - } else { - SyntaxKind::EOF - } - } - fn is_token_joint_to_next(&self, pos: usize) -> bool { - self.tokens[pos].is_joint_to_next - } - fn is_keyword(&self, pos: usize, kw: &str) -> bool { - self.tokens[pos].text == *kw - } -} - -#[derive(Default)] struct TtTreeSink<'a> { buf: String, - tokens: &'a [TtToken], + src_querier: SubtreeSourceQuerier<'a>, text_pos: TextUnit, token_pos: usize, inner: SyntaxTreeBuilder, } impl<'a> TtTreeSink<'a> { - fn new(tokens: &'a [TtToken]) -> TtTreeSink { + fn new(src_querier: SubtreeSourceQuerier<'a>) -> TtTreeSink { TtTreeSink { buf: String::new(), - tokens, + src_querier, text_pos: 0.into(), token_pos: 0, inner: SyntaxTreeBuilder::default(), @@ -350,7 +128,7 @@ impl<'a> TtTreeSink<'a> { impl<'a> TreeSink for TtTreeSink<'a> { fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { for _ in 0..n_tokens { - self.buf += self.tokens[self.token_pos].text.as_str(); + self.buf += self.src_querier.token(self.token_pos).1; self.token_pos += 1; } self.text_pos += TextUnit::of_str(&self.buf); @@ -394,21 +172,23 @@ mod tests { "#, ); let expansion = expand(&rules, "literals!(foo)"); - let tt_src = TtTokenSource::new(&expansion); + let tt_src = SubtreeTokenSource::new(&expansion); + + let query = tt_src.querier(); // [{] // [let] [a] [=] ['c'] [;] - assert_eq!(tt_src.tokens[1 + 3].text, "'c'"); - assert_eq!(tt_src.tokens[1 + 3].kind, CHAR); + assert_eq!(query.token(1 + 3).1, "'c'"); + assert_eq!(query.token(1 + 3).0, CHAR); // [let] [c] [=] [1000] [;] - assert_eq!(tt_src.tokens[1 + 5 + 3].text, "1000"); - assert_eq!(tt_src.tokens[1 + 5 + 3].kind, INT_NUMBER); + assert_eq!(query.token(1 + 5 + 3).1, "1000"); + assert_eq!(query.token(1 + 5 + 3).0, INT_NUMBER); // [let] [f] [=] [12E+99_f64] [;] - assert_eq!(tt_src.tokens[1 + 10 + 3].text, "12E+99_f64"); - assert_eq!(tt_src.tokens[1 + 10 + 3].kind, FLOAT_NUMBER); + assert_eq!(query.token(1 + 10 + 3).1, "12E+99_f64"); + assert_eq!(query.token(1 + 10 + 3).0, FLOAT_NUMBER); // [let] [s] [=] ["rust1"] [;] - assert_eq!(tt_src.tokens[1 + 15 + 3].text, "\"rust1\""); - assert_eq!(tt_src.tokens[1 + 15 + 3].kind, STRING); + assert_eq!(query.token(1 + 15 + 3).1, "\"rust1\""); + assert_eq!(query.token(1 + 15 + 3).0, STRING); } } diff --git a/crates/ra_mbe/src/tt_cursor.rs b/crates/ra_mbe/src/tt_cursor.rs index 6ac3ac187..52e072599 100644 --- a/crates/ra_mbe/src/tt_cursor.rs +++ b/crates/ra_mbe/src/tt_cursor.rs @@ -1,116 +1,17 @@ use crate::ParseError; -use crate::syntax_bridge::{TtTokenSource, TtToken, TokenPeek}; +use crate::subtree_source::SubtreeTokenSource; + use ra_parser::{TokenSource, TreeSink}; use ra_syntax::{ SyntaxKind }; -struct TtCursorTokenSource { - tt_pos: usize, - inner: TtTokenSource, -} - -impl TtCursorTokenSource { - fn new(subtree: &tt::Subtree, curr: usize) -> TtCursorTokenSource { - let mut res = TtCursorTokenSource { inner: TtTokenSource::new(subtree), tt_pos: 1 }; - - // Matching `TtToken` cursor to `tt::TokenTree` cursor - // It is because TtToken is not One to One mapping to tt::Token - // There are 3 case (`TtToken` <=> `tt::TokenTree`) : - // * One to One => ident, single char punch - // * Many to One => `tt::TokenTree::SubTree` - // * One to Many => multibyte punct - // - // Such that we cannot simpliy advance the cursor - // We have to bump it one by one - let mut pos = 0; - while pos < curr { - pos += res.bump(&subtree.token_trees[pos]); - } - - res - } - - fn skip_sibling_leaf(&self, leaf: &tt::Leaf, iter: &mut std::slice::Iter) { - if let tt::Leaf::Punct(p) = leaf { - let mut peek = TokenPeek::new(iter); - if let Some((_, _, _, size)) = TtTokenSource::convert_multi_char_punct(p, &mut peek) { - for _ in 0..size - 1 { - peek.next(); - } - } - } - } - - fn count_tt_tokens( - &self, - tt: &tt::TokenTree, - iter: Option<&mut std::slice::Iter>, - ) -> usize { - assert!(!self.inner.tokens.is_empty()); - - match tt { - tt::TokenTree::Subtree(sub_tree) => { - let mut iter = sub_tree.token_trees.iter(); - let mut count = match sub_tree.delimiter { - tt::Delimiter::None => 0, - _ => 2, - }; - - while let Some(tt) = iter.next() { - count += self.count_tt_tokens(&tt, Some(&mut iter)); - } - count - } - - tt::TokenTree::Leaf(leaf) => { - iter.map(|iter| { - self.skip_sibling_leaf(leaf, iter); - }); - - 1 - } - } - } - - fn count(&self, tt: &tt::TokenTree) -> usize { - self.count_tt_tokens(tt, None) - } - - fn bump(&mut self, tt: &tt::TokenTree) -> usize { - let cur = self.current().unwrap(); - let n_tokens = cur.n_tokens; - self.tt_pos += self.count(tt); - n_tokens - } - - fn current(&self) -> Option<&TtToken> { - self.inner.tokens.get(self.tt_pos) - } -} - -impl TokenSource for TtCursorTokenSource { - fn token_kind(&self, pos: usize) -> SyntaxKind { - if let Some(tok) = self.inner.tokens.get(self.tt_pos + pos) { - tok.kind - } else { - SyntaxKind::EOF - } - } - fn is_token_joint_to_next(&self, pos: usize) -> bool { - self.inner.tokens[self.tt_pos + pos].is_joint_to_next - } - fn is_keyword(&self, pos: usize, kw: &str) -> bool { - self.inner.tokens[self.tt_pos + pos].text == *kw - } -} - -struct TtCursorTokenSink { +struct SubtreeTokenSink { token_pos: usize, } -impl TreeSink for TtCursorTokenSink { +impl TreeSink for SubtreeTokenSink { fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) { self.token_pos += n_tokens as usize; } @@ -201,24 +102,10 @@ impl<'a> TtCursor<'a> { fn eat_parse_result( &mut self, parsed_token: usize, - src: &mut TtCursorTokenSource, + src: &mut SubtreeTokenSource, ) -> Option { - let mut res = vec![]; - - // Matching `TtToken` cursor to `tt::TokenTree` cursor - // It is because TtToken is not One to One mapping to tt::Token - // There are 3 case (`TtToken` <=> `tt::TokenTree`) : - // * One to One => ident, single char punch - // * Many to One => `tt::TokenTree::SubTree` - // * One to Many => multibyte punct - // - // Such that we cannot simpliy advance the cursor - // We have to bump it one by one - let next_pos = src.tt_pos + parsed_token; - while src.tt_pos < next_pos { - let n = src.bump(self.current().unwrap()); - res.extend((0..n).map(|_| self.eat().unwrap())); - } + let (adv, res) = src.bump_n(parsed_token, self.pos); + self.pos += adv; let res: Vec<_> = res.into_iter().cloned().collect(); @@ -236,8 +123,9 @@ impl<'a> TtCursor<'a> { where F: FnOnce(&dyn TokenSource, &mut dyn TreeSink), { - let mut src = TtCursorTokenSource::new(self.subtree, self.pos); - let mut sink = TtCursorTokenSink { token_pos: 0 }; + let mut src = SubtreeTokenSource::new(self.subtree); + src.advance(self.pos, true); + let mut sink = SubtreeTokenSink { token_pos: 0 }; f(&src, &mut sink); -- cgit v1.2.3 From a7254201df07fb929ca689857d7472564d484c3e Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Mon, 8 Apr 2019 00:12:07 +0800 Subject: Combine all tokensource to one and refactoring --- crates/ra_mbe/src/lib.rs | 1 + crates/ra_mbe/src/subtree_parser.rs | 59 +++++++++++++++++++++++++++++++++++++ crates/ra_mbe/src/subtree_source.rs | 56 +++++++++++++++++------------------ crates/ra_mbe/src/syntax_bridge.rs | 6 ++-- crates/ra_mbe/src/tt_cursor.rs | 58 ++---------------------------------- 5 files changed, 94 insertions(+), 86 deletions(-) create mode 100644 crates/ra_mbe/src/subtree_parser.rs diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs index 38d3ec7e1..84ce2b783 100644 --- a/crates/ra_mbe/src/lib.rs +++ b/crates/ra_mbe/src/lib.rs @@ -21,6 +21,7 @@ mod mbe_expander; mod syntax_bridge; mod tt_cursor; mod subtree_source; +mod subtree_parser; use ra_syntax::SmolStr; diff --git a/crates/ra_mbe/src/subtree_parser.rs b/crates/ra_mbe/src/subtree_parser.rs new file mode 100644 index 000000000..48eee6fa7 --- /dev/null +++ b/crates/ra_mbe/src/subtree_parser.rs @@ -0,0 +1,59 @@ +use crate::subtree_source::SubtreeTokenSource; + +use ra_parser::{TokenSource, TreeSink}; +use ra_syntax::{SyntaxKind}; + +struct OffsetTokenSink { + token_pos: usize, +} + +impl TreeSink for OffsetTokenSink { + fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) { + self.token_pos += n_tokens as usize; + } + fn start_node(&mut self, _kind: SyntaxKind) {} + fn finish_node(&mut self) {} + fn error(&mut self, _error: ra_parser::ParseError) {} +} + +pub(crate) struct Parser<'a> { + subtree: &'a tt::Subtree, + pos: &'a mut usize, +} + +impl<'a> Parser<'a> { + pub fn new(pos: &'a mut usize, subtree: &'a tt::Subtree) -> Parser<'a> { + Parser { pos, subtree } + } + + pub fn parse_path(self) -> Option { + self.parse(ra_parser::parse_path) + } + + fn parse(self, f: F) -> Option + where + F: FnOnce(&dyn TokenSource, &mut dyn TreeSink), + { + let mut src = SubtreeTokenSource::new(self.subtree); + src.advance(*self.pos, true); + let mut sink = OffsetTokenSink { token_pos: 0 }; + + f(&src, &mut sink); + + self.finish(sink.token_pos, &mut src) + } + + fn finish(self, parsed_token: usize, src: &mut SubtreeTokenSource) -> Option { + let res = src.bump_n(parsed_token, self.pos); + let res: Vec<_> = res.into_iter().cloned().collect(); + + match res.len() { + 0 => None, + 1 => Some(res[0].clone()), + _ => Some(tt::TokenTree::Subtree(tt::Subtree { + delimiter: tt::Delimiter::None, + token_trees: res, + })), + } + } +} diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs index 8f5ce4ed5..d9ba5d3d0 100644 --- a/crates/ra_mbe/src/subtree_source.rs +++ b/crates/ra_mbe/src/subtree_source.rs @@ -9,12 +9,12 @@ struct TtToken { pub n_tokens: usize, } -/// SubtreeSourceQuerier let outside to query internal tokens as string -pub(crate) struct SubtreeSourceQuerier<'a> { +/// Querier let outside to query internal tokens as string +pub(crate) struct Querier<'a> { src: &'a SubtreeTokenSource<'a>, } -impl<'a> SubtreeSourceQuerier<'a> { +impl<'a> Querier<'a> { pub(crate) fn token(&self, uidx: usize) -> (SyntaxKind, &SmolStr) { let tkn = &self.src.tokens[uidx]; (tkn.kind, &tkn.text) @@ -32,7 +32,8 @@ impl<'a> SubtreeTokenSource<'a> { SubtreeTokenSource { tokens: TtTokenBuilder::build(subtree), tt_pos: 0, subtree } } - pub fn advance(&mut self, curr: usize, skip_first_delimiter: bool) { + // Advance token source and skip the first delimiter + pub fn advance(&mut self, n_token: usize, skip_first_delimiter: bool) { if skip_first_delimiter { self.tt_pos += 1; } @@ -47,32 +48,20 @@ impl<'a> SubtreeTokenSource<'a> { // Such that we cannot simpliy advance the cursor // We have to bump it one by one let mut pos = 0; - while pos < curr { + while pos < n_token { pos += self.bump(&self.subtree.token_trees[pos]); } } - pub fn querier(&self) -> SubtreeSourceQuerier { - SubtreeSourceQuerier { src: self } - } - - fn count(&self, tt: &tt::TokenTree) -> usize { - assert!(!self.tokens.is_empty()); - TtTokenBuilder::count_tt_tokens(tt, None) - } - - pub(crate) fn bump(&mut self, tt: &tt::TokenTree) -> usize { - let cur = &self.tokens[self.tt_pos]; - let n_tokens = cur.n_tokens; - self.tt_pos += self.count(tt); - n_tokens + pub fn querier(&self) -> Querier { + Querier { src: self } } pub(crate) fn bump_n( &mut self, - n_tokens: usize, - mut token_pos: usize, - ) -> (usize, Vec<&tt::TokenTree>) { + n_tt_tokens: usize, + token_pos: &mut usize, + ) -> Vec<&tt::TokenTree> { let mut res = vec![]; // Matching `TtToken` cursor to `tt::TokenTree` cursor // It is because TtToken is not One to One mapping to tt::Token @@ -83,17 +72,28 @@ impl<'a> SubtreeTokenSource<'a> { // // Such that we cannot simpliy advance the cursor // We have to bump it one by one - let next_pos = self.tt_pos + n_tokens; - let old_token_pos = token_pos; + let next_pos = self.tt_pos + n_tt_tokens; while self.tt_pos < next_pos { - let current = &self.subtree.token_trees[token_pos]; + let current = &self.subtree.token_trees[*token_pos]; let n = self.bump(current); - res.extend((0..n).map(|i| &self.subtree.token_trees[token_pos + i])); - token_pos += n; + res.extend((0..n).map(|i| &self.subtree.token_trees[*token_pos + i])); + *token_pos += n; } - (token_pos - old_token_pos, res) + res + } + + fn count(&self, tt: &tt::TokenTree) -> usize { + assert!(!self.tokens.is_empty()); + TtTokenBuilder::count_tt_tokens(tt, None) + } + + fn bump(&mut self, tt: &tt::TokenTree) -> usize { + let cur = &self.tokens[self.tt_pos]; + let n_tokens = cur.n_tokens; + self.tt_pos += self.count(tt); + n_tokens } } diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 102bba341..b0fb91a63 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -4,7 +4,7 @@ use ra_syntax::{ ast, SyntaxKind::*, TextUnit }; -use crate::subtree_source::{SubtreeTokenSource, SubtreeSourceQuerier}; +use crate::subtree_source::{SubtreeTokenSource, Querier}; /// Maps `tt::TokenId` to the relative range of the original token. #[derive(Default)] @@ -107,14 +107,14 @@ fn convert_tt( struct TtTreeSink<'a> { buf: String, - src_querier: SubtreeSourceQuerier<'a>, + src_querier: Querier<'a>, text_pos: TextUnit, token_pos: usize, inner: SyntaxTreeBuilder, } impl<'a> TtTreeSink<'a> { - fn new(src_querier: SubtreeSourceQuerier<'a>) -> TtTreeSink { + fn new(src_querier: Querier<'a>) -> TtTreeSink { TtTreeSink { buf: String::new(), src_querier, diff --git a/crates/ra_mbe/src/tt_cursor.rs b/crates/ra_mbe/src/tt_cursor.rs index 52e072599..d29faa77c 100644 --- a/crates/ra_mbe/src/tt_cursor.rs +++ b/crates/ra_mbe/src/tt_cursor.rs @@ -1,25 +1,5 @@ use crate::ParseError; -use crate::subtree_source::SubtreeTokenSource; - -use ra_parser::{TokenSource, TreeSink}; - -use ra_syntax::{ - SyntaxKind -}; - -struct SubtreeTokenSink { - token_pos: usize, -} - -impl TreeSink for SubtreeTokenSink { - fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) { - self.token_pos += n_tokens as usize; - } - - fn start_node(&mut self, _kind: SyntaxKind) {} - fn finish_node(&mut self) {} - fn error(&mut self, _error: ra_parser::ParseError) {} -} +use crate::subtree_parser::Parser; #[derive(Clone)] pub(crate) struct TtCursor<'a> { @@ -99,41 +79,9 @@ impl<'a> TtCursor<'a> { }) } - fn eat_parse_result( - &mut self, - parsed_token: usize, - src: &mut SubtreeTokenSource, - ) -> Option { - let (adv, res) = src.bump_n(parsed_token, self.pos); - self.pos += adv; - - let res: Vec<_> = res.into_iter().cloned().collect(); - - match res.len() { - 0 => None, - 1 => Some(res[0].clone()), - _ => Some(tt::TokenTree::Subtree(tt::Subtree { - delimiter: tt::Delimiter::None, - token_trees: res, - })), - } - } - - fn eat_parse(&mut self, f: F) -> Option - where - F: FnOnce(&dyn TokenSource, &mut dyn TreeSink), - { - let mut src = SubtreeTokenSource::new(self.subtree); - src.advance(self.pos, true); - let mut sink = SubtreeTokenSink { token_pos: 0 }; - - f(&src, &mut sink); - - self.eat_parse_result(sink.token_pos, &mut src) - } - pub(crate) fn eat_path(&mut self) -> Option { - self.eat_parse(ra_parser::parse_path) + let parser = Parser::new(&mut self.pos, self.subtree); + parser.parse_path() } pub(crate) fn expect_char(&mut self, char: char) -> Result<(), ParseError> { -- cgit v1.2.3 From 2697ecaa64570841f0ed2a3ca5bc02cf41dccc4a Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Mon, 8 Apr 2019 15:58:02 +0800 Subject: Use SubtreeWalker instread of flatten TtToken --- crates/ra_mbe/src/lib.rs | 34 +- crates/ra_mbe/src/subtree_parser.rs | 10 +- crates/ra_mbe/src/subtree_source.rs | 623 ++++++++++++++++++++++-------------- crates/ra_mbe/src/syntax_bridge.rs | 12 +- 4 files changed, 421 insertions(+), 258 deletions(-) diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs index 84ce2b783..a21ea4dbc 100644 --- a/crates/ra_mbe/src/lib.rs +++ b/crates/ra_mbe/src/lib.rs @@ -383,8 +383,22 @@ SOURCE_FILE@[0; 40) assert_eq!(to_literal(&stm_tokens[15 + 3]).text, "\"rust1\""); } - /// The following tests are port from intellij-rust directly - /// https://github.com/intellij-rust/intellij-rust/blob/c4e9feee4ad46e7953b1948c112533360b6087bb/src/test/kotlin/org/rust/lang/core/macros/RsMacroExpansionTest.kt + #[test] + fn test_two_idents() { + let rules = create_rules( + r#" + macro_rules! foo { + ($ i:ident, $ j:ident) => { + fn foo() { let a = $ i; let b = $j; } + } + } +"#, + ); + assert_expansion(&rules, "foo! { foo, bar }", "fn foo () {let a = foo ; let b = bar ;}"); + } + + // The following tests are port from intellij-rust directly + // https://github.com/intellij-rust/intellij-rust/blob/c4e9feee4ad46e7953b1948c112533360b6087bb/src/test/kotlin/org/rust/lang/core/macros/RsMacroExpansionTest.kt #[test] fn test_path() { @@ -401,7 +415,21 @@ SOURCE_FILE@[0; 40) assert_expansion( &rules, "foo! { bar::::baz:: }", - "fn foo () {let a = bar :: < u8 > :: baz :: < u8 > ;}", + "fn foo () {let a = bar ::< u8 > ::baz ::< u8 > ;}", + ); + } + + #[test] + fn test_two_paths() { + let rules = create_rules( + r#" + macro_rules! foo { + ($ i:path, $ j:path) => { + fn foo() { let a = $ i; let b = $j; } + } + } +"#, ); + assert_expansion(&rules, "foo! { foo, bar }", "fn foo () {let a = foo ; let b = bar ;}"); } } diff --git a/crates/ra_mbe/src/subtree_parser.rs b/crates/ra_mbe/src/subtree_parser.rs index 48eee6fa7..f198c8224 100644 --- a/crates/ra_mbe/src/subtree_parser.rs +++ b/crates/ra_mbe/src/subtree_parser.rs @@ -18,12 +18,12 @@ impl TreeSink for OffsetTokenSink { pub(crate) struct Parser<'a> { subtree: &'a tt::Subtree, - pos: &'a mut usize, + cur_pos: &'a mut usize, } impl<'a> Parser<'a> { - pub fn new(pos: &'a mut usize, subtree: &'a tt::Subtree) -> Parser<'a> { - Parser { pos, subtree } + pub fn new(cur_pos: &'a mut usize, subtree: &'a tt::Subtree) -> Parser<'a> { + Parser { cur_pos, subtree } } pub fn parse_path(self) -> Option { @@ -35,7 +35,7 @@ impl<'a> Parser<'a> { F: FnOnce(&dyn TokenSource, &mut dyn TreeSink), { let mut src = SubtreeTokenSource::new(self.subtree); - src.advance(*self.pos, true); + src.start_from_nth(*self.cur_pos); let mut sink = OffsetTokenSink { token_pos: 0 }; f(&src, &mut sink); @@ -44,7 +44,7 @@ impl<'a> Parser<'a> { } fn finish(self, parsed_token: usize, src: &mut SubtreeTokenSource) -> Option { - let res = src.bump_n(parsed_token, self.pos); + let res = src.bump_n(parsed_token, self.cur_pos); let res: Vec<_> = res.into_iter().cloned().collect(); match res.len() { diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs index d9ba5d3d0..9dd475f2c 100644 --- a/crates/ra_mbe/src/subtree_source.rs +++ b/crates/ra_mbe/src/subtree_source.rs @@ -1,7 +1,8 @@ use ra_parser::{TokenSource}; use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*}; +use std::cell::{RefCell}; -#[derive(Debug)] +#[derive(Debug, Clone, Eq, PartialEq)] struct TtToken { pub kind: SyntaxKind, pub is_joint_to_next: bool, @@ -9,107 +10,319 @@ struct TtToken { pub n_tokens: usize, } -/// Querier let outside to query internal tokens as string -pub(crate) struct Querier<'a> { - src: &'a SubtreeTokenSource<'a>, +#[derive(Debug, Clone, Eq, PartialEq)] +enum WalkIndex { + DelimiterBegin(Option), + Token(usize, Option), + DelimiterEnd(Option), + Eof, } -impl<'a> Querier<'a> { - pub(crate) fn token(&self, uidx: usize) -> (SyntaxKind, &SmolStr) { - let tkn = &self.src.tokens[uidx]; - (tkn.kind, &tkn.text) +impl<'a> SubTreeWalker<'a> { + fn new(subtree: &tt::Subtree) -> SubTreeWalker { + let mut res = SubTreeWalker { + pos: 0, + stack: vec![], + idx: WalkIndex::Eof, + last_steps: vec![], + subtree, + }; + + res.reset(); + res + } + + fn reset(&mut self) { + self.pos = 0; + self.stack = vec![(self.subtree, None)]; + self.idx = WalkIndex::DelimiterBegin(convert_delim(self.subtree.delimiter, false)); + self.last_steps = vec![]; + + while self.is_empty_delimiter() { + self.forward_unchecked(); + } + } + + // This funciton will fast forward the pos cursor, + // Such that backward will stop at `start_pos` point + fn start_from_nth(&mut self, start_pos: usize) { + self.reset(); + self.pos = start_pos; + self.idx = self.walk_token(start_pos, false); + + while self.is_empty_delimiter() { + self.forward_unchecked(); + } + } + + fn current(&self) -> Option<&TtToken> { + match &self.idx { + WalkIndex::DelimiterBegin(t) => t.as_ref(), + WalkIndex::Token(_, t) => t.as_ref(), + WalkIndex::DelimiterEnd(t) => t.as_ref(), + WalkIndex::Eof => None, + } + } + + fn is_empty_delimiter(&self) -> bool { + match &self.idx { + WalkIndex::DelimiterBegin(None) => true, + WalkIndex::DelimiterEnd(None) => true, + _ => false, + } + } + + fn backward(&mut self) { + if self.last_steps.is_empty() { + return; + } + self.pos -= 1; + loop { + self.backward_unchecked(); + // Skip Empty delimiter + if self.last_steps.is_empty() || !self.is_empty_delimiter() { + break; + } + } + } + + fn backward_unchecked(&mut self) { + if self.last_steps.is_empty() { + return; + } + + let last_step = self.last_steps.pop().unwrap(); + let do_walk_token = match self.idx { + WalkIndex::DelimiterBegin(_) => None, + WalkIndex::Token(u, _) => Some(u), + WalkIndex::DelimiterEnd(_) => { + let (top, _) = self.stack.last().unwrap(); + Some(top.token_trees.len()) + } + WalkIndex::Eof => None, + }; + + self.idx = match do_walk_token { + Some(u) if last_step > u => WalkIndex::DelimiterBegin(convert_delim( + self.stack.last().unwrap().0.delimiter, + false, + )), + Some(u) => self.walk_token(u - last_step, true), + None => match self.idx { + WalkIndex::Eof => { + self.stack.push((self.subtree, None)); + WalkIndex::DelimiterEnd(convert_delim( + self.stack.last().unwrap().0.delimiter, + true, + )) + } + _ => { + let (_, last_top_idx) = self.stack.pop().unwrap(); + assert!(!self.stack.is_empty()); + + match last_top_idx.unwrap() { + 0 => WalkIndex::DelimiterBegin(convert_delim( + self.stack.last().unwrap().0.delimiter, + false, + )), + c => self.walk_token(c - 1, true), + } + } + }, + }; + } + + fn forward(&mut self) { + self.pos += 1; + loop { + self.forward_unchecked(); + if !self.is_empty_delimiter() { + break; + } + } + } + + fn forward_unchecked(&mut self) { + if self.idx == WalkIndex::Eof { + return; + } + + let step = self.current().map(|x| x.n_tokens).unwrap_or(1); + self.last_steps.push(step); + + let do_walk_token = match self.idx { + WalkIndex::DelimiterBegin(_) => Some(0), + WalkIndex::Token(u, _) => Some(u + step), + WalkIndex::DelimiterEnd(_) => None, + _ => unreachable!(), + }; + + let (top, _) = self.stack.last().unwrap(); + + self.idx = match do_walk_token { + Some(u) if u >= top.token_trees.len() => { + WalkIndex::DelimiterEnd(convert_delim(self.stack.last().unwrap().0.delimiter, true)) + } + Some(u) => self.walk_token(u, false), + None => { + let (_, last_top_idx) = self.stack.pop().unwrap(); + match self.stack.last() { + Some(top) => match last_top_idx.unwrap() { + idx if idx + 1 >= top.0.token_trees.len() => { + WalkIndex::DelimiterEnd(convert_delim(top.0.delimiter, true)) + } + idx => self.walk_token(idx + 1, false), + }, + + None => WalkIndex::Eof, + } + } + }; + } + + fn walk_token(&mut self, pos: usize, backward: bool) -> WalkIndex { + let (top, _) = self.stack.last().unwrap(); + match &top.token_trees[pos] { + tt::TokenTree::Subtree(subtree) => { + self.stack.push((subtree, Some(pos))); + let delim = convert_delim(self.stack.last().unwrap().0.delimiter, backward); + if backward { + WalkIndex::DelimiterEnd(delim) + } else { + WalkIndex::DelimiterBegin(delim) + } + } + tt::TokenTree::Leaf(leaf) => WalkIndex::Token(pos, Some(self.walk_leaf(leaf, pos))), + } + } + + fn walk_leaf(&mut self, leaf: &tt::Leaf, pos: usize) -> TtToken { + match leaf { + tt::Leaf::Literal(l) => convert_literal(l), + tt::Leaf::Ident(ident) => convert_ident(ident), + tt::Leaf::Punct(punct) => { + let (top, _) = self.stack.last().unwrap(); + convert_punct(punct, top, pos) + } + } } } -pub(crate) struct SubtreeTokenSource<'a> { - tt_pos: usize, - tokens: Vec, - subtree: &'a tt::Subtree, +pub(crate) trait Querier { + fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr); } -impl<'a> SubtreeTokenSource<'a> { - pub fn new(subtree: &tt::Subtree) -> SubtreeTokenSource { - SubtreeTokenSource { tokens: TtTokenBuilder::build(subtree), tt_pos: 0, subtree } +// A wrapper class for ref cell +pub(crate) struct WalkerOwner<'a> { + walker: RefCell>, + offset: usize, +} + +impl<'a> WalkerOwner<'a> { + fn token_idx<'b>(&self, pos: usize) -> Option { + self.set_walker_pos(pos); + self.walker.borrow().current().cloned() } - // Advance token source and skip the first delimiter - pub fn advance(&mut self, n_token: usize, skip_first_delimiter: bool) { - if skip_first_delimiter { - self.tt_pos += 1; - } + fn start_from_nth(&mut self, pos: usize) { + self.offset = pos; + self.walker.borrow_mut().start_from_nth(pos); + } - // Matching `TtToken` cursor to `tt::TokenTree` cursor - // It is because TtToken is not One to One mapping to tt::Token - // There are 3 case (`TtToken` <=> `tt::TokenTree`) : - // * One to One => ident, single char punch - // * Many to One => `tt::TokenTree::SubTree` - // * One to Many => multibyte punct - // - // Such that we cannot simpliy advance the cursor - // We have to bump it one by one - let mut pos = 0; - while pos < n_token { - pos += self.bump(&self.subtree.token_trees[pos]); + fn set_walker_pos(&self, mut pos: usize) { + pos += self.offset; + let mut walker = self.walker.borrow_mut(); + while pos > walker.pos { + walker.forward(); + } + while pos < walker.pos { + walker.backward(); } + assert!(pos == walker.pos); } - pub fn querier(&self) -> Querier { - Querier { src: self } + fn new(subtree: &'a tt::Subtree) -> Self { + WalkerOwner { walker: RefCell::new(SubTreeWalker::new(subtree)), offset: 0 } } - pub(crate) fn bump_n( - &mut self, - n_tt_tokens: usize, - token_pos: &mut usize, - ) -> Vec<&tt::TokenTree> { + fn collect_token_tree(&mut self, n: usize) -> Vec<&tt::TokenTree> { + self.start_from_nth(self.offset); + let mut res = vec![]; - // Matching `TtToken` cursor to `tt::TokenTree` cursor - // It is because TtToken is not One to One mapping to tt::Token - // There are 3 case (`TtToken` <=> `tt::TokenTree`) : - // * One to One => ident, single char punch - // * Many to One => `tt::TokenTree::SubTree` - // * One to Many => multibyte punct - // - // Such that we cannot simpliy advance the cursor - // We have to bump it one by one - let next_pos = self.tt_pos + n_tt_tokens; - - while self.tt_pos < next_pos { - let current = &self.subtree.token_trees[*token_pos]; - let n = self.bump(current); - res.extend((0..n).map(|i| &self.subtree.token_trees[*token_pos + i])); - *token_pos += n; + let mut walker = self.walker.borrow_mut(); + + while walker.pos - self.offset < n { + if let WalkIndex::Token(u, tt) = &walker.idx { + if walker.stack.len() == 1 { + // We only collect the topmost child + res.push(&walker.stack[0].0.token_trees[*u]); + if let Some(tt) = tt { + for i in 0..tt.n_tokens - 1 { + res.push(&walker.stack[0].0.token_trees[u + i]); + } + } + } + } + + walker.forward(); } res } +} + +impl<'a> Querier for WalkerOwner<'a> { + fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr) { + let tkn = self.token_idx(uidx).unwrap(); + (tkn.kind, tkn.text) + } +} + +pub(crate) struct SubtreeTokenSource<'a> { + walker: WalkerOwner<'a>, +} + +impl<'a> SubtreeTokenSource<'a> { + pub fn new(subtree: &tt::Subtree) -> SubtreeTokenSource { + SubtreeTokenSource { walker: WalkerOwner::new(subtree) } + } + + pub fn start_from_nth(&mut self, n: usize) { + self.walker.start_from_nth(n); + } - fn count(&self, tt: &tt::TokenTree) -> usize { - assert!(!self.tokens.is_empty()); - TtTokenBuilder::count_tt_tokens(tt, None) + pub fn querier<'b>(&'a self) -> &'b WalkerOwner<'a> + where + 'a: 'b, + { + &self.walker } - fn bump(&mut self, tt: &tt::TokenTree) -> usize { - let cur = &self.tokens[self.tt_pos]; - let n_tokens = cur.n_tokens; - self.tt_pos += self.count(tt); - n_tokens + pub(crate) fn bump_n( + &mut self, + parsed_tokens: usize, + cursor_pos: &mut usize, + ) -> Vec<&tt::TokenTree> { + let res = self.walker.collect_token_tree(parsed_tokens); + *cursor_pos += res.len(); + + res } } impl<'a> TokenSource for SubtreeTokenSource<'a> { fn token_kind(&self, pos: usize) -> SyntaxKind { - if let Some(tok) = self.tokens.get(self.tt_pos + pos) { + if let Some(tok) = self.walker.token_idx(pos) { tok.kind } else { SyntaxKind::EOF } } fn is_token_joint_to_next(&self, pos: usize) -> bool { - self.tokens[self.tt_pos + pos].is_joint_to_next + self.walker.token_idx(pos).unwrap().is_joint_to_next } fn is_keyword(&self, pos: usize, kw: &str) -> bool { - self.tokens[self.tt_pos + pos].text == *kw + self.walker.token_idx(pos).unwrap().text == *kw } } @@ -136,10 +349,6 @@ where TokenPeek { iter: itertools::multipeek(iter) } } - pub fn next(&mut self) -> Option<&tt::TokenTree> { - self.iter.next() - } - fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { if p.spacing != tt::Spacing::Joint { return None; @@ -162,191 +371,117 @@ where } } -struct TtTokenBuilder { - tokens: Vec, -} - -impl TtTokenBuilder { - fn build(sub: &tt::Subtree) -> Vec { - let mut res = TtTokenBuilder { tokens: vec![] }; - res.convert_subtree(sub); - res.tokens - } - - fn convert_subtree(&mut self, sub: &tt::Subtree) { - self.push_delim(sub.delimiter, false); - let mut peek = TokenPeek::new(sub.token_trees.iter()); - while let Some(tt) = peek.iter.next() { - self.convert_tt(tt, &mut peek); +fn convert_multi_char_punct<'b, I>( + p: &tt::Punct, + iter: &mut TokenPeek<'b, I>, +) -> Option<(SyntaxKind, bool, &'static str, usize)> +where + I: Iterator, +{ + if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { + if let Some((kind, text)) = match m { + ('<', '<', '=') => Some((SHLEQ, "<<=")), + ('>', '>', '=') => Some((SHREQ, ">>=")), + ('.', '.', '.') => Some((DOTDOTDOT, "...")), + ('.', '.', '=') => Some((DOTDOTEQ, "..=")), + _ => None, + } { + return Some((kind, is_joint_to_next, text, 3)); } - self.push_delim(sub.delimiter, true) } - fn convert_tt<'b, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'b, I>) - where - I: Iterator, - { - match tt { - tt::TokenTree::Leaf(token) => self.convert_token(token, iter), - tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), + if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { + if let Some((kind, text)) = match m { + ('<', '<') => Some((SHL, "<<")), + ('>', '>') => Some((SHR, ">>")), + + ('|', '|') => Some((PIPEPIPE, "||")), + ('&', '&') => Some((AMPAMP, "&&")), + ('%', '=') => Some((PERCENTEQ, "%=")), + ('*', '=') => Some((STAREQ, "*=")), + ('/', '=') => Some((SLASHEQ, "/=")), + ('^', '=') => Some((CARETEQ, "^=")), + + ('&', '=') => Some((AMPEQ, "&=")), + ('|', '=') => Some((PIPEEQ, "|=")), + ('-', '=') => Some((MINUSEQ, "-=")), + ('+', '=') => Some((PLUSEQ, "+=")), + ('>', '=') => Some((GTEQ, ">=")), + ('<', '=') => Some((LTEQ, "<=")), + + ('-', '>') => Some((THIN_ARROW, "->")), + ('!', '=') => Some((NEQ, "!=")), + ('=', '>') => Some((FAT_ARROW, "=>")), + ('=', '=') => Some((EQEQ, "==")), + ('.', '.') => Some((DOTDOT, "..")), + (':', ':') => Some((COLONCOLON, "::")), + + _ => None, + } { + return Some((kind, is_joint_to_next, text, 2)); } } - fn convert_token<'b, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'b, I>) - where - I: Iterator, - { - let tok = match token { - tt::Leaf::Literal(l) => TtToken { - kind: classify_literal(&l.text).unwrap().kind, - is_joint_to_next: false, - text: l.text.clone(), - n_tokens: 1, - }, - tt::Leaf::Punct(p) => { - if let Some((kind, is_joint_to_next, text, size)) = - Self::convert_multi_char_punct(p, iter) - { - for _ in 0..size - 1 { - iter.next(); - } - - TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } - } else { - let kind = match p.char { - // lexer may produce combpund tokens for these ones - '.' => DOT, - ':' => COLON, - '=' => EQ, - '!' => EXCL, - '-' => MINUS, - c => SyntaxKind::from_char(c).unwrap(), - }; - let text = { - let mut buf = [0u8; 4]; - let s: &str = p.char.encode_utf8(&mut buf); - SmolStr::new(s) - }; - TtToken { - kind, - is_joint_to_next: p.spacing == tt::Spacing::Joint, - text, - n_tokens: 1, - } - } - } - tt::Leaf::Ident(ident) => { - let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); - TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } - } - }; - self.tokens.push(tok) - } - - fn convert_multi_char_punct<'b, I>( - p: &tt::Punct, - iter: &mut TokenPeek<'b, I>, - ) -> Option<(SyntaxKind, bool, &'static str, usize)> - where - I: Iterator, - { - if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { - if let Some((kind, text)) = match m { - ('<', '<', '=') => Some((SHLEQ, "<<=")), - ('>', '>', '=') => Some((SHREQ, ">>=")), - ('.', '.', '.') => Some((DOTDOTDOT, "...")), - ('.', '.', '=') => Some((DOTDOTEQ, "..=")), - _ => None, - } { - return Some((kind, is_joint_to_next, text, 3)); - } - } + None +} - if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { - if let Some((kind, text)) = match m { - ('<', '<') => Some((SHL, "<<")), - ('>', '>') => Some((SHR, ">>")), - - ('|', '|') => Some((PIPEPIPE, "||")), - ('&', '&') => Some((AMPAMP, "&&")), - ('%', '=') => Some((PERCENTEQ, "%=")), - ('*', '=') => Some((STAREQ, "*=")), - ('/', '=') => Some((SLASHEQ, "/=")), - ('^', '=') => Some((CARETEQ, "^=")), - - ('&', '=') => Some((AMPEQ, "&=")), - ('|', '=') => Some((PIPEEQ, "|=")), - ('-', '=') => Some((MINUSEQ, "-=")), - ('+', '=') => Some((PLUSEQ, "+=")), - ('>', '=') => Some((GTEQ, ">=")), - ('<', '=') => Some((LTEQ, "<=")), - - ('-', '>') => Some((THIN_ARROW, "->")), - ('!', '=') => Some((NEQ, "!=")), - ('=', '>') => Some((FAT_ARROW, "=>")), - ('=', '=') => Some((EQEQ, "==")), - ('.', '.') => Some((DOTDOT, "..")), - (':', ':') => Some((COLONCOLON, "::")), - - _ => None, - } { - return Some((kind, is_joint_to_next, text, 2)); - } - } +struct SubTreeWalker<'a> { + pos: usize, + stack: Vec<(&'a tt::Subtree, Option)>, + idx: WalkIndex, + last_steps: Vec, + subtree: &'a tt::Subtree, +} - None - } +fn convert_delim(d: tt::Delimiter, closing: bool) -> Option { + let (kinds, texts) = match d { + tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), + tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), + tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), + tt::Delimiter::None => return None, + }; + + let idx = closing as usize; + let kind = kinds[idx]; + let text = &texts[idx..texts.len() - (1 - idx)]; + Some(TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }) +} - fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { - let (kinds, texts) = match d { - tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), - tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), - tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), - tt::Delimiter::None => return, - }; - let idx = closing as usize; - let kind = kinds[idx]; - let text = &texts[idx..texts.len() - (1 - idx)]; - let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; - self.tokens.push(tok) - } - - fn skip_sibling_leaf(leaf: &tt::Leaf, iter: &mut std::slice::Iter) { - if let tt::Leaf::Punct(p) = leaf { - let mut peek = TokenPeek::new(iter); - if let Some((_, _, _, size)) = TtTokenBuilder::convert_multi_char_punct(p, &mut peek) { - for _ in 0..size - 1 { - peek.next(); - } - } - } +fn convert_literal(l: &tt::Literal) -> TtToken { + TtToken { + kind: classify_literal(&l.text).unwrap().kind, + is_joint_to_next: false, + text: l.text.clone(), + n_tokens: 1, } +} - fn count_tt_tokens( - tt: &tt::TokenTree, - iter: Option<&mut std::slice::Iter>, - ) -> usize { - match tt { - tt::TokenTree::Subtree(sub_tree) => { - let mut iter = sub_tree.token_trees.iter(); - let mut count = match sub_tree.delimiter { - tt::Delimiter::None => 0, - _ => 2, - }; - - while let Some(tt) = iter.next() { - count += Self::count_tt_tokens(&tt, Some(&mut iter)); - } - count - } - - tt::TokenTree::Leaf(leaf) => { - iter.map(|iter| { - Self::skip_sibling_leaf(leaf, iter); - }); +fn convert_ident(ident: &tt::Ident) -> TtToken { + let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); + TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } +} - 1 - } - } +fn convert_punct(p: &tt::Punct, parent: &tt::Subtree, next: usize) -> TtToken { + let iter = parent.token_trees[next..].iter(); + let mut peek = TokenPeek::new(iter); + + if let Some((kind, is_joint_to_next, text, size)) = convert_multi_char_punct(p, &mut peek) { + TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } + } else { + let kind = match p.char { + // lexer may produce combpund tokens for these ones + '.' => DOT, + ':' => COLON, + '=' => EQ, + '!' => EXCL, + '-' => MINUS, + c => SyntaxKind::from_char(c).unwrap(), + }; + let text = { + let mut buf = [0u8; 4]; + let s: &str = p.char.encode_utf8(&mut buf); + SmolStr::new(s) + }; + TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text, n_tokens: 1 } } } diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index b0fb91a63..19c17bd55 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -105,16 +105,16 @@ fn convert_tt( Some(res) } -struct TtTreeSink<'a> { +struct TtTreeSink<'a, Q: Querier> { buf: String, - src_querier: Querier<'a>, + src_querier: &'a Q, text_pos: TextUnit, token_pos: usize, inner: SyntaxTreeBuilder, } -impl<'a> TtTreeSink<'a> { - fn new(src_querier: Querier<'a>) -> TtTreeSink { +impl<'a, Q: Querier> TtTreeSink<'a, Q> { + fn new(src_querier: &'a Q) -> Self { TtTreeSink { buf: String::new(), src_querier, @@ -125,10 +125,10 @@ impl<'a> TtTreeSink<'a> { } } -impl<'a> TreeSink for TtTreeSink<'a> { +impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> { fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { for _ in 0..n_tokens { - self.buf += self.src_querier.token(self.token_pos).1; + self.buf += &self.src_querier.token(self.token_pos).1; self.token_pos += 1; } self.text_pos += TextUnit::of_str(&self.buf); -- cgit v1.2.3 From 184e9ea230ecbc468eda9309888e6abefbc70aaa Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Mon, 8 Apr 2019 18:21:48 +0800 Subject: Fixed empty node bug --- crates/ra_mbe/src/subtree_source.rs | 59 ++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs index 9dd475f2c..997a600a5 100644 --- a/crates/ra_mbe/src/subtree_source.rs +++ b/crates/ra_mbe/src/subtree_source.rs @@ -18,6 +18,15 @@ enum WalkIndex { Eof, } +#[derive(Debug)] +struct SubTreeWalker<'a> { + pos: usize, + stack: Vec<(&'a tt::Subtree, Option)>, + idx: WalkIndex, + last_steps: Vec, + subtree: &'a tt::Subtree, +} + impl<'a> SubTreeWalker<'a> { fn new(subtree: &tt::Subtree) -> SubTreeWalker { let mut res = SubTreeWalker { @@ -84,6 +93,13 @@ impl<'a> SubTreeWalker<'a> { break; } } + + // Move forward a little bit + if self.last_steps.is_empty() { + while self.is_empty_delimiter() { + self.forward_unchecked(); + } + } } fn backward_unchecked(&mut self) { @@ -133,6 +149,10 @@ impl<'a> SubTreeWalker<'a> { } fn forward(&mut self) { + if self.idx == WalkIndex::Eof { + return; + } + self.pos += 1; loop { self.forward_unchecked(); @@ -213,15 +233,38 @@ pub(crate) trait Querier { } // A wrapper class for ref cell +#[derive(Debug)] pub(crate) struct WalkerOwner<'a> { walker: RefCell>, offset: usize, + temp: RefCell>>, } impl<'a> WalkerOwner<'a> { fn token_idx<'b>(&self, pos: usize) -> Option { self.set_walker_pos(pos); - self.walker.borrow().current().cloned() + let walker = self.walker.borrow(); + let r = walker.current().cloned(); + + if walker.subtree.token_trees.len() == 1 { + if let tt::TokenTree::Leaf(_) = &walker.subtree.token_trees[0] { + let mut temp = self.temp.borrow_mut(); + + if r.is_none() { + if let Some(Some(p)) = temp.get(&pos) { + unreachable!( + "nWWWWWWWWWWWW~~~~~~~~~~~~~~,\n{:#?}\n{:#?}\n{:#?}", + pos, p, self + ); + } + } + + // eprintln!("===>{:#?}\n{:#?}\n{:#?}", pos, r, self); + temp.insert(pos, r.clone()); + } + } + + r } fn start_from_nth(&mut self, pos: usize) { @@ -242,7 +285,11 @@ impl<'a> WalkerOwner<'a> { } fn new(subtree: &'a tt::Subtree) -> Self { - WalkerOwner { walker: RefCell::new(SubTreeWalker::new(subtree)), offset: 0 } + WalkerOwner { + walker: RefCell::new(SubTreeWalker::new(subtree)), + offset: 0, + temp: RefCell::new(Default::default()), + } } fn collect_token_tree(&mut self, n: usize) -> Vec<&tt::TokenTree> { @@ -425,14 +472,6 @@ where None } -struct SubTreeWalker<'a> { - pos: usize, - stack: Vec<(&'a tt::Subtree, Option)>, - idx: WalkIndex, - last_steps: Vec, - subtree: &'a tt::Subtree, -} - fn convert_delim(d: tt::Delimiter, closing: bool) -> Option { let (kinds, texts) = match d { tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), -- cgit v1.2.3 From c785c7312da834c62b44c4f2736dbde3404b64dd Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Mon, 8 Apr 2019 19:21:07 +0800 Subject: Fixed infintite loop bug --- crates/ra_mbe/src/subtree_source.rs | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs index 997a600a5..5f20112ce 100644 --- a/crates/ra_mbe/src/subtree_source.rs +++ b/crates/ra_mbe/src/subtree_source.rs @@ -237,34 +237,13 @@ pub(crate) trait Querier { pub(crate) struct WalkerOwner<'a> { walker: RefCell>, offset: usize, - temp: RefCell>>, } impl<'a> WalkerOwner<'a> { fn token_idx<'b>(&self, pos: usize) -> Option { self.set_walker_pos(pos); let walker = self.walker.borrow(); - let r = walker.current().cloned(); - - if walker.subtree.token_trees.len() == 1 { - if let tt::TokenTree::Leaf(_) = &walker.subtree.token_trees[0] { - let mut temp = self.temp.borrow_mut(); - - if r.is_none() { - if let Some(Some(p)) = temp.get(&pos) { - unreachable!( - "nWWWWWWWWWWWW~~~~~~~~~~~~~~,\n{:#?}\n{:#?}\n{:#?}", - pos, p, self - ); - } - } - - // eprintln!("===>{:#?}\n{:#?}\n{:#?}", pos, r, self); - temp.insert(pos, r.clone()); - } - } - - r + walker.current().cloned() } fn start_from_nth(&mut self, pos: usize) { @@ -275,21 +254,16 @@ impl<'a> WalkerOwner<'a> { fn set_walker_pos(&self, mut pos: usize) { pos += self.offset; let mut walker = self.walker.borrow_mut(); - while pos > walker.pos { + while pos > walker.pos && walker.idx != WalkIndex::Eof { walker.forward(); } while pos < walker.pos { walker.backward(); } - assert!(pos == walker.pos); } fn new(subtree: &'a tt::Subtree) -> Self { - WalkerOwner { - walker: RefCell::new(SubTreeWalker::new(subtree)), - offset: 0, - temp: RefCell::new(Default::default()), - } + WalkerOwner { walker: RefCell::new(SubTreeWalker::new(subtree)), offset: 0 } } fn collect_token_tree(&mut self, n: usize) -> Vec<&tt::TokenTree> { @@ -501,7 +475,7 @@ fn convert_ident(ident: &tt::Ident) -> TtToken { } fn convert_punct(p: &tt::Punct, parent: &tt::Subtree, next: usize) -> TtToken { - let iter = parent.token_trees[next..].iter(); + let iter = parent.token_trees[next + 1..].iter(); let mut peek = TokenPeek::new(iter); if let Some((kind, is_joint_to_next, text, size)) = convert_multi_char_punct(p, &mut peek) { -- cgit v1.2.3 From 8ed710457875e6f580a0ddf6ab29c6b10d389a41 Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Mon, 8 Apr 2019 20:32:21 +0800 Subject: Refactoring and add some docs --- crates/ra_mbe/src/subtree_parser.rs | 4 +- crates/ra_mbe/src/subtree_source.rs | 185 ++++++++++++++++++++---------------- 2 files changed, 106 insertions(+), 83 deletions(-) diff --git a/crates/ra_mbe/src/subtree_parser.rs b/crates/ra_mbe/src/subtree_parser.rs index f198c8224..ce39a40bb 100644 --- a/crates/ra_mbe/src/subtree_parser.rs +++ b/crates/ra_mbe/src/subtree_parser.rs @@ -44,7 +44,9 @@ impl<'a> Parser<'a> { } fn finish(self, parsed_token: usize, src: &mut SubtreeTokenSource) -> Option { - let res = src.bump_n(parsed_token, self.cur_pos); + let res = src.bump_n(parsed_token); + *self.cur_pos += res.len(); + let res: Vec<_> = res.into_iter().cloned().collect(); match res.len() { diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs index 5f20112ce..4b37c2bda 100644 --- a/crates/ra_mbe/src/subtree_source.rs +++ b/crates/ra_mbe/src/subtree_source.rs @@ -11,7 +11,7 @@ struct TtToken { } #[derive(Debug, Clone, Eq, PartialEq)] -enum WalkIndex { +enum WalkCursor { DelimiterBegin(Option), Token(usize, Option), DelimiterEnd(Option), @@ -22,7 +22,7 @@ enum WalkIndex { struct SubTreeWalker<'a> { pos: usize, stack: Vec<(&'a tt::Subtree, Option)>, - idx: WalkIndex, + cursor: WalkCursor, last_steps: Vec, subtree: &'a tt::Subtree, } @@ -32,7 +32,7 @@ impl<'a> SubTreeWalker<'a> { let mut res = SubTreeWalker { pos: 0, stack: vec![], - idx: WalkIndex::Eof, + cursor: WalkCursor::Eof, last_steps: vec![], subtree, }; @@ -41,10 +41,14 @@ impl<'a> SubTreeWalker<'a> { res } + fn is_eof(&self) -> bool { + self.cursor == WalkCursor::Eof + } + fn reset(&mut self) { self.pos = 0; self.stack = vec![(self.subtree, None)]; - self.idx = WalkIndex::DelimiterBegin(convert_delim(self.subtree.delimiter, false)); + self.cursor = WalkCursor::DelimiterBegin(convert_delim(self.subtree.delimiter, false)); self.last_steps = vec![]; while self.is_empty_delimiter() { @@ -52,12 +56,12 @@ impl<'a> SubTreeWalker<'a> { } } - // This funciton will fast forward the pos cursor, + // This funciton will fast forward the cursor, // Such that backward will stop at `start_pos` point fn start_from_nth(&mut self, start_pos: usize) { self.reset(); self.pos = start_pos; - self.idx = self.walk_token(start_pos, false); + self.cursor = self.walk_token(start_pos, 0, false); while self.is_empty_delimiter() { self.forward_unchecked(); @@ -65,22 +69,23 @@ impl<'a> SubTreeWalker<'a> { } fn current(&self) -> Option<&TtToken> { - match &self.idx { - WalkIndex::DelimiterBegin(t) => t.as_ref(), - WalkIndex::Token(_, t) => t.as_ref(), - WalkIndex::DelimiterEnd(t) => t.as_ref(), - WalkIndex::Eof => None, + match &self.cursor { + WalkCursor::DelimiterBegin(t) => t.as_ref(), + WalkCursor::Token(_, t) => t.as_ref(), + WalkCursor::DelimiterEnd(t) => t.as_ref(), + WalkCursor::Eof => None, } } fn is_empty_delimiter(&self) -> bool { - match &self.idx { - WalkIndex::DelimiterBegin(None) => true, - WalkIndex::DelimiterEnd(None) => true, + match &self.cursor { + WalkCursor::DelimiterBegin(None) => true, + WalkCursor::DelimiterEnd(None) => true, _ => false, } } + /// Move cursor backward by 1 step with empty checking fn backward(&mut self) { if self.last_steps.is_empty() { return; @@ -94,7 +99,7 @@ impl<'a> SubTreeWalker<'a> { } } - // Move forward a little bit + // Move forward if it is empty delimiter if self.last_steps.is_empty() { while self.is_empty_delimiter() { self.forward_unchecked(); @@ -102,54 +107,53 @@ impl<'a> SubTreeWalker<'a> { } } + /// Move cursor backward by 1 step without empty check + /// + /// Depends on the current state of cursor: + /// + /// * Delimiter Begin => Pop the stack, goto last walking token (`walk_token`) + /// * Token => Goto prev token (`walk_token`) + /// * Delimiter End => Goto the last child token (`walk_token`) + /// * Eof => push the root subtree, and set it as Delimiter End fn backward_unchecked(&mut self) { if self.last_steps.is_empty() { return; } let last_step = self.last_steps.pop().unwrap(); - let do_walk_token = match self.idx { - WalkIndex::DelimiterBegin(_) => None, - WalkIndex::Token(u, _) => Some(u), - WalkIndex::DelimiterEnd(_) => { + let do_walk_token = match self.cursor { + WalkCursor::DelimiterBegin(_) => None, + WalkCursor::Token(u, _) => Some(u), + WalkCursor::DelimiterEnd(_) => { let (top, _) = self.stack.last().unwrap(); Some(top.token_trees.len()) } - WalkIndex::Eof => None, + WalkCursor::Eof => None, }; - self.idx = match do_walk_token { - Some(u) if last_step > u => WalkIndex::DelimiterBegin(convert_delim( - self.stack.last().unwrap().0.delimiter, - false, - )), - Some(u) => self.walk_token(u - last_step, true), - None => match self.idx { - WalkIndex::Eof => { + self.cursor = match do_walk_token { + Some(u) => self.walk_token(u, last_step, true), + None => match self.cursor { + WalkCursor::Eof => { self.stack.push((self.subtree, None)); - WalkIndex::DelimiterEnd(convert_delim( + WalkCursor::DelimiterEnd(convert_delim( self.stack.last().unwrap().0.delimiter, true, )) } _ => { - let (_, last_top_idx) = self.stack.pop().unwrap(); + let (_, last_top_cursor) = self.stack.pop().unwrap(); assert!(!self.stack.is_empty()); - match last_top_idx.unwrap() { - 0 => WalkIndex::DelimiterBegin(convert_delim( - self.stack.last().unwrap().0.delimiter, - false, - )), - c => self.walk_token(c - 1, true), - } + self.walk_token(last_top_cursor.unwrap(), last_step, true) } }, }; } + /// Move cursor forward by 1 step with empty checking fn forward(&mut self) { - if self.idx == WalkIndex::Eof { + if self.is_eof() { return; } @@ -162,57 +166,80 @@ impl<'a> SubTreeWalker<'a> { } } + /// Move cursor forward by 1 step without empty checking + /// + /// Depends on the current state of cursor: + /// + /// * Delimiter Begin => Goto the first child token (`walk_token`) + /// * Token => Goto next token (`walk_token`) + /// * Delimiter End => Pop the stack, goto last walking token (`walk_token`) + /// fn forward_unchecked(&mut self) { - if self.idx == WalkIndex::Eof { + if self.is_eof() { return; } let step = self.current().map(|x| x.n_tokens).unwrap_or(1); self.last_steps.push(step); - let do_walk_token = match self.idx { - WalkIndex::DelimiterBegin(_) => Some(0), - WalkIndex::Token(u, _) => Some(u + step), - WalkIndex::DelimiterEnd(_) => None, + let do_walk_token = match self.cursor { + WalkCursor::DelimiterBegin(_) => Some((0, 0)), + WalkCursor::Token(u, _) => Some((u, step)), + WalkCursor::DelimiterEnd(_) => None, _ => unreachable!(), }; - let (top, _) = self.stack.last().unwrap(); - - self.idx = match do_walk_token { - Some(u) if u >= top.token_trees.len() => { - WalkIndex::DelimiterEnd(convert_delim(self.stack.last().unwrap().0.delimiter, true)) - } - Some(u) => self.walk_token(u, false), + self.cursor = match do_walk_token { + Some((u, step)) => self.walk_token(u, step, false), None => { let (_, last_top_idx) = self.stack.pop().unwrap(); match self.stack.last() { - Some(top) => match last_top_idx.unwrap() { - idx if idx + 1 >= top.0.token_trees.len() => { - WalkIndex::DelimiterEnd(convert_delim(top.0.delimiter, true)) - } - idx => self.walk_token(idx + 1, false), - }, - - None => WalkIndex::Eof, + Some(_) => self.walk_token(last_top_idx.unwrap(), 1, false), + None => WalkCursor::Eof, } } }; } - fn walk_token(&mut self, pos: usize, backward: bool) -> WalkIndex { + /// Traversal child token + /// Depends on the new position, it returns: + /// + /// * new position < 0 => DelimiterBegin + /// * new position > token_tree.len() => DelimiterEnd + /// * if new position is a subtree, depends on traversal direction: + /// ** backward => DelimiterEnd + /// ** forward => DelimiterBegin + /// * if new psoition is a leaf, return walk_leaf() + fn walk_token(&mut self, pos: usize, offset: usize, backward: bool) -> WalkCursor { let (top, _) = self.stack.last().unwrap(); + + if backward && pos < offset { + return WalkCursor::DelimiterBegin(convert_delim( + self.stack.last().unwrap().0.delimiter, + false, + )); + } + + if !backward && pos + offset >= top.token_trees.len() { + return WalkCursor::DelimiterEnd(convert_delim( + self.stack.last().unwrap().0.delimiter, + true, + )); + } + + let pos = if backward { pos - offset } else { pos + offset }; + match &top.token_trees[pos] { tt::TokenTree::Subtree(subtree) => { self.stack.push((subtree, Some(pos))); let delim = convert_delim(self.stack.last().unwrap().0.delimiter, backward); if backward { - WalkIndex::DelimiterEnd(delim) + WalkCursor::DelimiterEnd(delim) } else { - WalkIndex::DelimiterBegin(delim) + WalkCursor::DelimiterBegin(delim) } } - tt::TokenTree::Leaf(leaf) => WalkIndex::Token(pos, Some(self.walk_leaf(leaf, pos))), + tt::TokenTree::Leaf(leaf) => WalkCursor::Token(pos, Some(self.walk_leaf(leaf, pos))), } } @@ -240,7 +267,11 @@ pub(crate) struct WalkerOwner<'a> { } impl<'a> WalkerOwner<'a> { - fn token_idx<'b>(&self, pos: usize) -> Option { + fn new(subtree: &'a tt::Subtree) -> Self { + WalkerOwner { walker: RefCell::new(SubTreeWalker::new(subtree)), offset: 0 } + } + + fn get<'b>(&self, pos: usize) -> Option { self.set_walker_pos(pos); let walker = self.walker.borrow(); walker.current().cloned() @@ -254,7 +285,7 @@ impl<'a> WalkerOwner<'a> { fn set_walker_pos(&self, mut pos: usize) { pos += self.offset; let mut walker = self.walker.borrow_mut(); - while pos > walker.pos && walker.idx != WalkIndex::Eof { + while pos > walker.pos && !walker.is_eof() { walker.forward(); } while pos < walker.pos { @@ -262,18 +293,14 @@ impl<'a> WalkerOwner<'a> { } } - fn new(subtree: &'a tt::Subtree) -> Self { - WalkerOwner { walker: RefCell::new(SubTreeWalker::new(subtree)), offset: 0 } - } - - fn collect_token_tree(&mut self, n: usize) -> Vec<&tt::TokenTree> { + fn collect_token_trees(&mut self, n: usize) -> Vec<&tt::TokenTree> { self.start_from_nth(self.offset); let mut res = vec![]; let mut walker = self.walker.borrow_mut(); while walker.pos - self.offset < n { - if let WalkIndex::Token(u, tt) = &walker.idx { + if let WalkCursor::Token(u, tt) = &walker.cursor { if walker.stack.len() == 1 { // We only collect the topmost child res.push(&walker.stack[0].0.token_trees[*u]); @@ -294,7 +321,7 @@ impl<'a> WalkerOwner<'a> { impl<'a> Querier for WalkerOwner<'a> { fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr) { - let tkn = self.token_idx(uidx).unwrap(); + let tkn = self.get(uidx).unwrap(); (tkn.kind, tkn.text) } } @@ -319,31 +346,25 @@ impl<'a> SubtreeTokenSource<'a> { &self.walker } - pub(crate) fn bump_n( - &mut self, - parsed_tokens: usize, - cursor_pos: &mut usize, - ) -> Vec<&tt::TokenTree> { - let res = self.walker.collect_token_tree(parsed_tokens); - *cursor_pos += res.len(); - + pub(crate) fn bump_n(&mut self, parsed_tokens: usize) -> Vec<&tt::TokenTree> { + let res = self.walker.collect_token_trees(parsed_tokens); res } } impl<'a> TokenSource for SubtreeTokenSource<'a> { fn token_kind(&self, pos: usize) -> SyntaxKind { - if let Some(tok) = self.walker.token_idx(pos) { + if let Some(tok) = self.walker.get(pos) { tok.kind } else { SyntaxKind::EOF } } fn is_token_joint_to_next(&self, pos: usize) -> bool { - self.walker.token_idx(pos).unwrap().is_joint_to_next + self.walker.get(pos).unwrap().is_joint_to_next } fn is_keyword(&self, pos: usize, kw: &str) -> bool { - self.walker.token_idx(pos).unwrap().text == *kw + self.walker.get(pos).unwrap().text == *kw } } -- cgit v1.2.3