From aac9dfa46418603940ab2333cfea2190d9464d9e Mon Sep 17 00:00:00 2001 From: Edwin Cheng Date: Sat, 6 Apr 2019 20:14:28 +0800 Subject: Add TtCursorTokenSource and TtCursorTokenSink --- crates/ra_mbe/src/syntax_bridge.rs | 58 ++++++++----- crates/ra_mbe/src/tt_cursor.rs | 170 ++++++++++++++++++++++++++++++++++++- crates/ra_parser/src/grammar.rs | 4 + crates/ra_parser/src/lib.rs | 8 ++ 4 files changed, 216 insertions(+), 24 deletions(-) diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 139a0fd33..3a0702a30 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs @@ -104,15 +104,16 @@ fn convert_tt( } #[derive(Debug)] -struct TtTokenSource { - tokens: Vec, +pub(crate) struct TtTokenSource { + pub tokens: Vec, } #[derive(Debug)] -struct TtToken { - kind: SyntaxKind, - is_joint_to_next: bool, - text: SmolStr, +pub(crate) struct TtToken { + pub kind: SyntaxKind, + pub is_joint_to_next: bool, + pub text: SmolStr, + pub n_tokens: usize, } // Some helper functions @@ -123,7 +124,7 @@ fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { None } -struct TokenPeek<'a, I> +pub(crate) struct TokenPeek<'a, I> where I: Iterator, { @@ -134,7 +135,11 @@ impl<'a, I> TokenPeek<'a, I> where I: Iterator, { - fn next(&mut self) -> Option<&tt::TokenTree> { + pub fn new(iter: I) -> Self { + TokenPeek { iter: itertools::multipeek(iter) } + } + + pub fn next(&mut self) -> Option<&tt::TokenTree> { self.iter.next() } @@ -161,14 +166,14 @@ where } impl TtTokenSource { - fn new(tt: &tt::Subtree) -> TtTokenSource { + pub fn new(tt: &tt::Subtree) -> TtTokenSource { let mut res = TtTokenSource { tokens: Vec::new() }; res.convert_subtree(tt); res } fn convert_subtree(&mut self, sub: &tt::Subtree) { self.push_delim(sub.delimiter, false); - let mut peek = TokenPeek { iter: itertools::multipeek(sub.token_trees.iter()) }; + let mut peek = TokenPeek::new(sub.token_trees.iter()); while let Some(tt) = peek.iter.next() { self.convert_tt(tt, &mut peek); } @@ -194,10 +199,17 @@ impl TtTokenSource { kind: classify_literal(&l.text).unwrap().kind, is_joint_to_next: false, text: l.text.clone(), + n_tokens: 1, }, tt::Leaf::Punct(p) => { - if let Some(tt) = Self::convert_multi_char_punct(p, iter) { - tt + if let Some((kind, is_joint_to_next, text, size)) = + Self::convert_multi_char_punct(p, iter) + { + for _ in 0..size - 1 { + iter.next(); + } + + TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } } else { let kind = match p.char { // lexer may produce combpund tokens for these ones @@ -213,21 +225,26 @@ impl TtTokenSource { let s: &str = p.char.encode_utf8(&mut buf); SmolStr::new(s) }; - TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } + TtToken { + kind, + is_joint_to_next: p.spacing == tt::Spacing::Joint, + text, + n_tokens: 1, + } } } tt::Leaf::Ident(ident) => { let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); - TtToken { kind, is_joint_to_next: false, text: ident.text.clone() } + TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } } }; self.tokens.push(tok) } - fn convert_multi_char_punct<'a, I>( + pub(crate) fn convert_multi_char_punct<'a, I>( p: &tt::Punct, iter: &mut TokenPeek<'a, I>, - ) -> Option + ) -> Option<(SyntaxKind, bool, &'static str, usize)> where I: Iterator, { @@ -239,9 +256,7 @@ impl TtTokenSource { ('.', '.', '=') => Some((DOTDOTEQ, "..=")), _ => None, } { - iter.next(); - iter.next(); - return Some(TtToken { kind, is_joint_to_next, text: text.into() }); + return Some((kind, is_joint_to_next, text, 3)); } } @@ -273,8 +288,7 @@ impl TtTokenSource { _ => None, } { - iter.next(); - return Some(TtToken { kind, is_joint_to_next, text: text.into() }); + return Some((kind, is_joint_to_next, text, 2)); } } @@ -291,7 +305,7 @@ impl TtTokenSource { let idx = closing as usize; let kind = kinds[idx]; let text = &texts[idx..texts.len() - (1 - idx)]; - let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) }; + let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; self.tokens.push(tok) } } diff --git a/crates/ra_mbe/src/tt_cursor.rs b/crates/ra_mbe/src/tt_cursor.rs index 6f619621a..6ac3ac187 100644 --- a/crates/ra_mbe/src/tt_cursor.rs +++ b/crates/ra_mbe/src/tt_cursor.rs @@ -1,4 +1,124 @@ use crate::ParseError; +use crate::syntax_bridge::{TtTokenSource, TtToken, TokenPeek}; +use ra_parser::{TokenSource, TreeSink}; + +use ra_syntax::{ + SyntaxKind +}; + +struct TtCursorTokenSource { + tt_pos: usize, + inner: TtTokenSource, +} + +impl TtCursorTokenSource { + fn new(subtree: &tt::Subtree, curr: usize) -> TtCursorTokenSource { + let mut res = TtCursorTokenSource { inner: TtTokenSource::new(subtree), tt_pos: 1 }; + + // Matching `TtToken` cursor to `tt::TokenTree` cursor + // It is because TtToken is not One to One mapping to tt::Token + // There are 3 case (`TtToken` <=> `tt::TokenTree`) : + // * One to One => ident, single char punch + // * Many to One => `tt::TokenTree::SubTree` + // * One to Many => multibyte punct + // + // Such that we cannot simpliy advance the cursor + // We have to bump it one by one + let mut pos = 0; + while pos < curr { + pos += res.bump(&subtree.token_trees[pos]); + } + + res + } + + fn skip_sibling_leaf(&self, leaf: &tt::Leaf, iter: &mut std::slice::Iter) { + if let tt::Leaf::Punct(p) = leaf { + let mut peek = TokenPeek::new(iter); + if let Some((_, _, _, size)) = TtTokenSource::convert_multi_char_punct(p, &mut peek) { + for _ in 0..size - 1 { + peek.next(); + } + } + } + } + + fn count_tt_tokens( + &self, + tt: &tt::TokenTree, + iter: Option<&mut std::slice::Iter>, + ) -> usize { + assert!(!self.inner.tokens.is_empty()); + + match tt { + tt::TokenTree::Subtree(sub_tree) => { + let mut iter = sub_tree.token_trees.iter(); + let mut count = match sub_tree.delimiter { + tt::Delimiter::None => 0, + _ => 2, + }; + + while let Some(tt) = iter.next() { + count += self.count_tt_tokens(&tt, Some(&mut iter)); + } + count + } + + tt::TokenTree::Leaf(leaf) => { + iter.map(|iter| { + self.skip_sibling_leaf(leaf, iter); + }); + + 1 + } + } + } + + fn count(&self, tt: &tt::TokenTree) -> usize { + self.count_tt_tokens(tt, None) + } + + fn bump(&mut self, tt: &tt::TokenTree) -> usize { + let cur = self.current().unwrap(); + let n_tokens = cur.n_tokens; + self.tt_pos += self.count(tt); + n_tokens + } + + fn current(&self) -> Option<&TtToken> { + self.inner.tokens.get(self.tt_pos) + } +} + +impl TokenSource for TtCursorTokenSource { + fn token_kind(&self, pos: usize) -> SyntaxKind { + if let Some(tok) = self.inner.tokens.get(self.tt_pos + pos) { + tok.kind + } else { + SyntaxKind::EOF + } + } + fn is_token_joint_to_next(&self, pos: usize) -> bool { + self.inner.tokens[self.tt_pos + pos].is_joint_to_next + } + fn is_keyword(&self, pos: usize, kw: &str) -> bool { + self.inner.tokens[self.tt_pos + pos].text == *kw + } +} + +struct TtCursorTokenSink { + token_pos: usize, +} + +impl TreeSink for TtCursorTokenSink { + fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) { + self.token_pos += n_tokens as usize; + } + + fn start_node(&mut self, _kind: SyntaxKind) {} + fn finish_node(&mut self) {} + fn error(&mut self, _error: ra_parser::ParseError) {} +} #[derive(Clone)] pub(crate) struct TtCursor<'a> { @@ -78,8 +198,54 @@ impl<'a> TtCursor<'a> { }) } - pub(crate) fn eat_path(&mut self) -> Option { - None + fn eat_parse_result( + &mut self, + parsed_token: usize, + src: &mut TtCursorTokenSource, + ) -> Option { + let mut res = vec![]; + + // Matching `TtToken` cursor to `tt::TokenTree` cursor + // It is because TtToken is not One to One mapping to tt::Token + // There are 3 case (`TtToken` <=> `tt::TokenTree`) : + // * One to One => ident, single char punch + // * Many to One => `tt::TokenTree::SubTree` + // * One to Many => multibyte punct + // + // Such that we cannot simpliy advance the cursor + // We have to bump it one by one + let next_pos = src.tt_pos + parsed_token; + while src.tt_pos < next_pos { + let n = src.bump(self.current().unwrap()); + res.extend((0..n).map(|_| self.eat().unwrap())); + } + + let res: Vec<_> = res.into_iter().cloned().collect(); + + match res.len() { + 0 => None, + 1 => Some(res[0].clone()), + _ => Some(tt::TokenTree::Subtree(tt::Subtree { + delimiter: tt::Delimiter::None, + token_trees: res, + })), + } + } + + fn eat_parse(&mut self, f: F) -> Option + where + F: FnOnce(&dyn TokenSource, &mut dyn TreeSink), + { + let mut src = TtCursorTokenSource::new(self.subtree, self.pos); + let mut sink = TtCursorTokenSink { token_pos: 0 }; + + f(&src, &mut sink); + + self.eat_parse_result(sink.token_pos, &mut src) + } + + pub(crate) fn eat_path(&mut self) -> Option { + self.eat_parse(ra_parser::parse_path) } pub(crate) fn expect_char(&mut self, char: char) -> Result<(), ParseError> { diff --git a/crates/ra_parser/src/grammar.rs b/crates/ra_parser/src/grammar.rs index b2ffeff8c..c5f510e6b 100644 --- a/crates/ra_parser/src/grammar.rs +++ b/crates/ra_parser/src/grammar.rs @@ -49,6 +49,10 @@ pub(crate) fn root(p: &mut Parser) { m.complete(p, SOURCE_FILE); } +pub(crate) fn path(p: &mut Parser) { + paths::type_path(p); +} + pub(crate) fn reparser( node: SyntaxKind, first_child: Option, diff --git a/crates/ra_parser/src/lib.rs b/crates/ra_parser/src/lib.rs index 30ba06aac..3ceeeebd7 100644 --- a/crates/ra_parser/src/lib.rs +++ b/crates/ra_parser/src/lib.rs @@ -61,6 +61,14 @@ pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { event::process(tree_sink, events); } +/// Parse given tokens into the given sink as a path +pub fn parse_path(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { + let mut p = parser::Parser::new(token_source); + grammar::path(&mut p); + let events = p.finish(); + event::process(tree_sink, events); +} + /// A parsing function for a specific braced-block. pub struct Reparser(fn(&mut parser::Parser)); -- cgit v1.2.3