From aac9dfa46418603940ab2333cfea2190d9464d9e Mon Sep 17 00:00:00 2001
From: Edwin Cheng <edwin0cheng@gmail.com>
Date: Sat, 6 Apr 2019 20:14:28 +0800
Subject: Add TtCursorTokenSource and TtCursorTokenSink

---
 crates/ra_mbe/src/syntax_bridge.rs |  58 ++++++++-----
 crates/ra_mbe/src/tt_cursor.rs     | 170 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 204 insertions(+), 24 deletions(-)

(limited to 'crates/ra_mbe')

diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs
index 139a0fd33..3a0702a30 100644
--- a/crates/ra_mbe/src/syntax_bridge.rs
+++ b/crates/ra_mbe/src/syntax_bridge.rs
@@ -104,15 +104,16 @@ fn convert_tt(
 }
 
 #[derive(Debug)]
-struct TtTokenSource {
-    tokens: Vec<TtToken>,
+pub(crate) struct TtTokenSource {
+    pub tokens: Vec<TtToken>,
 }
 
 #[derive(Debug)]
-struct TtToken {
-    kind: SyntaxKind,
-    is_joint_to_next: bool,
-    text: SmolStr,
+pub(crate) struct TtToken {
+    pub kind: SyntaxKind,
+    pub is_joint_to_next: bool,
+    pub text: SmolStr,
+    pub n_tokens: usize,
 }
 
 // Some helper functions
@@ -123,7 +124,7 @@ fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
     None
 }
 
-struct TokenPeek<'a, I>
+pub(crate) struct TokenPeek<'a, I>
 where
     I: Iterator<Item = &'a tt::TokenTree>,
 {
@@ -134,7 +135,11 @@ impl<'a, I> TokenPeek<'a, I>
 where
     I: Iterator<Item = &'a tt::TokenTree>,
 {
-    fn next(&mut self) -> Option<&tt::TokenTree> {
+    pub fn new(iter: I) -> Self {
+        TokenPeek { iter: itertools::multipeek(iter) }
+    }
+
+    pub fn next(&mut self) -> Option<&tt::TokenTree> {
         self.iter.next()
     }
 
@@ -161,14 +166,14 @@ where
 }
 
 impl TtTokenSource {
-    fn new(tt: &tt::Subtree) -> TtTokenSource {
+    pub fn new(tt: &tt::Subtree) -> TtTokenSource {
         let mut res = TtTokenSource { tokens: Vec::new() };
         res.convert_subtree(tt);
         res
     }
     fn convert_subtree(&mut self, sub: &tt::Subtree) {
         self.push_delim(sub.delimiter, false);
-        let mut peek = TokenPeek { iter: itertools::multipeek(sub.token_trees.iter()) };
+        let mut peek = TokenPeek::new(sub.token_trees.iter());
         while let Some(tt) = peek.iter.next() {
             self.convert_tt(tt, &mut peek);
         }
@@ -194,10 +199,17 @@ impl TtTokenSource {
                 kind: classify_literal(&l.text).unwrap().kind,
                 is_joint_to_next: false,
                 text: l.text.clone(),
+                n_tokens: 1,
             },
             tt::Leaf::Punct(p) => {
-                if let Some(tt) = Self::convert_multi_char_punct(p, iter) {
-                    tt
+                if let Some((kind, is_joint_to_next, text, size)) =
+                    Self::convert_multi_char_punct(p, iter)
+                {
+                    for _ in 0..size - 1 {
+                        iter.next();
+                    }
+
+                    TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size }
                 } else {
                     let kind = match p.char {
                         // lexer may produce combpund tokens for these ones
@@ -213,21 +225,26 @@ impl TtTokenSource {
                         let s: &str = p.char.encode_utf8(&mut buf);
                         SmolStr::new(s)
                     };
-                    TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
+                    TtToken {
+                        kind,
+                        is_joint_to_next: p.spacing == tt::Spacing::Joint,
+                        text,
+                        n_tokens: 1,
+                    }
                 }
             }
             tt::Leaf::Ident(ident) => {
                 let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT);
-                TtToken { kind, is_joint_to_next: false, text: ident.text.clone() }
+                TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 }
             }
         };
         self.tokens.push(tok)
     }
 
-    fn convert_multi_char_punct<'a, I>(
+    pub(crate) fn convert_multi_char_punct<'a, I>(
         p: &tt::Punct,
         iter: &mut TokenPeek<'a, I>,
-    ) -> Option<TtToken>
+    ) -> Option<(SyntaxKind, bool, &'static str, usize)>
     where
         I: Iterator<Item = &'a tt::TokenTree>,
     {
@@ -239,9 +256,7 @@ impl TtTokenSource {
                 ('.', '.', '=') => Some((DOTDOTEQ, "..=")),
                 _ => None,
             } {
-                iter.next();
-                iter.next();
-                return Some(TtToken { kind, is_joint_to_next, text: text.into() });
+                return Some((kind, is_joint_to_next, text, 3));
             }
         }
 
@@ -273,8 +288,7 @@ impl TtTokenSource {
 
                 _ => None,
             } {
-                iter.next();
-                return Some(TtToken { kind, is_joint_to_next, text: text.into() });
+                return Some((kind, is_joint_to_next, text, 2));
             }
         }
 
@@ -291,7 +305,7 @@ impl TtTokenSource {
         let idx = closing as usize;
         let kind = kinds[idx];
         let text = &texts[idx..texts.len() - (1 - idx)];
-        let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) };
+        let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 };
         self.tokens.push(tok)
     }
 }
diff --git a/crates/ra_mbe/src/tt_cursor.rs b/crates/ra_mbe/src/tt_cursor.rs
index 6f619621a..6ac3ac187 100644
--- a/crates/ra_mbe/src/tt_cursor.rs
+++ b/crates/ra_mbe/src/tt_cursor.rs
@@ -1,4 +1,124 @@
 use crate::ParseError;
+use crate::syntax_bridge::{TtTokenSource, TtToken, TokenPeek};
+use ra_parser::{TokenSource, TreeSink};
+
+use ra_syntax::{
+    SyntaxKind
+};
+
+struct TtCursorTokenSource {
+    tt_pos: usize,
+    inner: TtTokenSource,
+}
+
+impl TtCursorTokenSource {
+    fn new(subtree: &tt::Subtree, curr: usize) -> TtCursorTokenSource {
+        let mut res = TtCursorTokenSource { inner: TtTokenSource::new(subtree), tt_pos: 1 };
+
+        // Matching `TtToken` cursor to `tt::TokenTree` cursor
+        // It is because TtToken is not One to One mapping to tt::Token
+        // There are 3 case (`TtToken` <=> `tt::TokenTree`) :
+        // * One to One =>  ident, single char punch
+        // * Many to One => `tt::TokenTree::SubTree`
+        // * One to Many => multibyte punct
+        //
+        // Such that we cannot simpliy advance the cursor
+        // We have to bump it one by one
+        let mut pos = 0;
+        while pos < curr {
+            pos += res.bump(&subtree.token_trees[pos]);
+        }
+
+        res
+    }
+
+    fn skip_sibling_leaf(&self, leaf: &tt::Leaf, iter: &mut std::slice::Iter<tt::TokenTree>) {
+        if let tt::Leaf::Punct(p) = leaf {
+            let mut peek = TokenPeek::new(iter);
+            if let Some((_, _, _, size)) = TtTokenSource::convert_multi_char_punct(p, &mut peek) {
+                for _ in 0..size - 1 {
+                    peek.next();
+                }
+            }
+        }
+    }
+
+    fn count_tt_tokens(
+        &self,
+        tt: &tt::TokenTree,
+        iter: Option<&mut std::slice::Iter<tt::TokenTree>>,
+    ) -> usize {
+        assert!(!self.inner.tokens.is_empty());
+
+        match tt {
+            tt::TokenTree::Subtree(sub_tree) => {
+                let mut iter = sub_tree.token_trees.iter();
+                let mut count = match sub_tree.delimiter {
+                    tt::Delimiter::None => 0,
+                    _ => 2,
+                };
+
+                while let Some(tt) = iter.next() {
+                    count += self.count_tt_tokens(&tt, Some(&mut iter));
+                }
+                count
+            }
+
+            tt::TokenTree::Leaf(leaf) => {
+                iter.map(|iter| {
+                    self.skip_sibling_leaf(leaf, iter);
+                });
+
+                1
+            }
+        }
+    }
+
+    fn count(&self, tt: &tt::TokenTree) -> usize {
+        self.count_tt_tokens(tt, None)
+    }
+
+    fn bump(&mut self, tt: &tt::TokenTree) -> usize {
+        let cur = self.current().unwrap();
+        let n_tokens = cur.n_tokens;
+        self.tt_pos += self.count(tt);
+        n_tokens
+    }
+
+    fn current(&self) -> Option<&TtToken> {
+        self.inner.tokens.get(self.tt_pos)
+    }
+}
+
+impl TokenSource for TtCursorTokenSource {
+    fn token_kind(&self, pos: usize) -> SyntaxKind {
+        if let Some(tok) = self.inner.tokens.get(self.tt_pos + pos) {
+            tok.kind
+        } else {
+            SyntaxKind::EOF
+        }
+    }
+    fn is_token_joint_to_next(&self, pos: usize) -> bool {
+        self.inner.tokens[self.tt_pos + pos].is_joint_to_next
+    }
+    fn is_keyword(&self, pos: usize, kw: &str) -> bool {
+        self.inner.tokens[self.tt_pos + pos].text == *kw
+    }
+}
+
+struct TtCursorTokenSink {
+    token_pos: usize,
+}
+
+impl TreeSink for TtCursorTokenSink {
+    fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) {
+        self.token_pos += n_tokens as usize;
+    }
+
+    fn start_node(&mut self, _kind: SyntaxKind) {}
+    fn finish_node(&mut self) {}
+    fn error(&mut self, _error: ra_parser::ParseError) {}
+}
 
 #[derive(Clone)]
 pub(crate) struct TtCursor<'a> {
@@ -78,8 +198,54 @@ impl<'a> TtCursor<'a> {
         })
     }
 
-    pub(crate) fn eat_path(&mut self) -> Option<tt::Subtree> {        
-        None
+    fn eat_parse_result(
+        &mut self,
+        parsed_token: usize,
+        src: &mut TtCursorTokenSource,
+    ) -> Option<tt::TokenTree> {
+        let mut res = vec![];
+
+        // Matching `TtToken` cursor to `tt::TokenTree` cursor
+        // It is because TtToken is not One to One mapping to tt::Token
+        // There are 3 case (`TtToken` <=> `tt::TokenTree`) :
+        // * One to One =>  ident, single char punch
+        // * Many to One => `tt::TokenTree::SubTree`
+        // * One to Many => multibyte punct
+        //
+        // Such that we cannot simpliy advance the cursor
+        // We have to bump it one by one
+        let next_pos = src.tt_pos + parsed_token;
+        while src.tt_pos < next_pos {
+            let n = src.bump(self.current().unwrap());
+            res.extend((0..n).map(|_| self.eat().unwrap()));
+        }
+
+        let res: Vec<_> = res.into_iter().cloned().collect();
+
+        match res.len() {
+            0 => None,
+            1 => Some(res[0].clone()),
+            _ => Some(tt::TokenTree::Subtree(tt::Subtree {
+                delimiter: tt::Delimiter::None,
+                token_trees: res,
+            })),
+        }
+    }
+
+    fn eat_parse<F>(&mut self, f: F) -> Option<tt::TokenTree>
+    where
+        F: FnOnce(&dyn TokenSource, &mut dyn TreeSink),
+    {
+        let mut src = TtCursorTokenSource::new(self.subtree, self.pos);
+        let mut sink = TtCursorTokenSink { token_pos: 0 };
+
+        f(&src, &mut sink);
+
+        self.eat_parse_result(sink.token_pos, &mut src)
+    }
+
+    pub(crate) fn eat_path(&mut self) -> Option<tt::TokenTree> {
+        self.eat_parse(ra_parser::parse_path)
     }
 
     pub(crate) fn expect_char(&mut self, char: char) -> Result<(), ParseError> {
-- 
cgit v1.2.3