Merge #1111

1111: Add multi-byte token support in token tree to ast item list r=matklad a=edwin0cheng As discusion in https://github.com/rust-analyzer/rust-analyzer/pull/1105 , this PR add implement all multi-byte tokens in `ra_mbe` crate. Co-authored-by: Edwin Cheng <[email protected]>
author: bors[bot] <bors[bot]@users.noreply.github.com> 2019-04-05 09:55:50 +0100
committer: bors[bot] <bors[bot]@users.noreply.github.com> 2019-04-05 09:55:50 +0100
commit: be9a44e9bad262ac5e615730e540fd434f846a0e (patch)
tree: 7082f2b1398f8481a5a583a8c499d9b931c5e590
parent: 7713416477fd59348ad60d44f0ec3a3aebcf4b9f (diff)
parent: 6ff16c7ad9c1b72c4f04bb0d6c7fc0fdc2cc00cb (diff)
3 files changed, 139 insertions, 20 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 603abe058..a14312821 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1040,6 +1040,7 @@ dependencies = [
 name = "ra_mbe"
 version = "0.1.0"
 dependencies = [
+ "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "ra_parser 0.1.0",
 "ra_syntax 0.1.0",
 "ra_tt 0.1.0",
diff --git a/crates/ra_mbe/Cargo.toml b/crates/ra_mbe/Cargo.toml
index 6e785f570..1d0c2a340 100644
--- a/crates/ra_mbe/Cargo.toml
+++ b/crates/ra_mbe/Cargo.toml
@@ -8,5 +8,5 @@ authors = ["rust-analyzer developers"]
 ra_syntax = { path = "../ra_syntax" }
 ra_parser = { path = "../ra_parser" }
 tt = { path = "../ra_tt", package = "ra_tt" }
+itertools = "0.8.0"
 rustc-hash = "1.0.0"
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs
index 05f9817da..257503de8 100644
--- a/crates/ra_mbe/src/syntax_bridge.rs
+++ b/crates/ra_mbe/src/syntax_bridge.rs
@@ -113,6 +113,51 @@ struct TtToken {
    text: SmolStr,
 }
+// Some helper functions
+fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
+    if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt {
+        return Some(pp);
+    }
+    None
+}
+struct TokenPeek<'a, I>
+where
+    I: Iterator<Item = &'a tt::TokenTree>,
+{
+    iter: itertools::MultiPeek<I>,
+}
+impl<'a, I> TokenPeek<'a, I>
+where
+    I: Iterator<Item = &'a tt::TokenTree>,
+{
+    fn next(&mut self) -> Option<&tt::TokenTree> {
+        self.iter.next()
+    }
+    fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> {
+        if p.spacing != tt::Spacing::Joint {
+            return None;
+        }
+        self.iter.reset_peek();
+        let p1 = to_punct(self.iter.peek()?)?;
+        Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint))
+    }
+    fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> {
+        self.current_punct2(p).and_then(|((p0, p1), last_joint)| {
+            if !last_joint {
+                None
+            } else {
+                let p2 = to_punct(*self.iter.peek()?)?;
+                Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint))
+            }
+        })
+    }
+}
 impl TtTokenSource {
    fn new(tt: &tt::Subtree) -> TtTokenSource {
        let mut res = TtTokenSource { tokens: Vec::new() };
@@ -121,16 +166,27 @@ impl TtTokenSource {
    }
    fn convert_subtree(&mut self, sub: &tt::Subtree) {
        self.push_delim(sub.delimiter, false);
-        sub.token_trees.iter().for_each(|tt| self.convert_tt(tt));
+        let mut peek = TokenPeek { iter: itertools::multipeek(sub.token_trees.iter()) };
+        while let Some(tt) = peek.iter.next() {
+            self.convert_tt(tt, &mut peek);
+        }
        self.push_delim(sub.delimiter, true)
    }
-    fn convert_tt(&mut self, tt: &tt::TokenTree) {
+    fn convert_tt<'a, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'a, I>)
+    where
+        I: Iterator<Item = &'a tt::TokenTree>,
+    {
        match tt {
-            tt::TokenTree::Leaf(token) => self.convert_token(token),
+            tt::TokenTree::Leaf(token) => self.convert_token(token, iter),
            tt::TokenTree::Subtree(sub) => self.convert_subtree(sub),
        }
    }
-    fn convert_token(&mut self, token: &tt::Leaf) {
+    fn convert_token<'a, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'a, I>)
+    where
+        I: Iterator<Item = &'a tt::TokenTree>,
+    {
        let tok = match token {
            tt::Leaf::Literal(l) => TtToken {
                kind: SyntaxKind::INT_NUMBER, // FIXME
@@ -138,21 +194,25 @@ impl TtTokenSource {
                text: l.text.clone(),
            },
            tt::Leaf::Punct(p) => {
-                let kind = match p.char {
+                if let Some(tt) = Self::convert_multi_char_punct(p, iter) {
-                    // lexer may produce combpund tokens for these ones
+                    tt
-                    '.' => DOT,
+                } else {
-                    ':' => COLON,
+                    let kind = match p.char {
-                    '=' => EQ,
+                        // lexer may produce combpund tokens for these ones
-                    '!' => EXCL,
+                        '.' => DOT,
-                    '-' => MINUS,
+                        ':' => COLON,
-                    c => SyntaxKind::from_char(c).unwrap(),
+                        '=' => EQ,
-                };
+                        '!' => EXCL,
-                let text = {
+                        '-' => MINUS,
-                    let mut buf = [0u8; 4];
+                        c => SyntaxKind::from_char(c).unwrap(),
-                    let s: &str = p.char.encode_utf8(&mut buf);
+                    };
-                    SmolStr::new(s)
+                    let text = {
-                };
+                        let mut buf = [0u8; 4];
-                TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
+                        let s: &str = p.char.encode_utf8(&mut buf);
+                        SmolStr::new(s)
+                    };
+                    TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
+                }
            }
            tt::Leaf::Ident(ident) => {
                let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT);
@@ -161,6 +221,64 @@ impl TtTokenSource {
        };
        self.tokens.push(tok)
    }
+    fn convert_multi_char_punct<'a, I>(
+        p: &tt::Punct,
+        iter: &mut TokenPeek<'a, I>,
+    ) -> Option<TtToken>
+    where
+        I: Iterator<Item = &'a tt::TokenTree>,
+    {
+        if let Some((m, is_joint_to_next)) = iter.current_punct3(p) {
+            if let Some((kind, text)) = match m {
+                ('<', '<', '=') => Some((SHLEQ, "<<=")),
+                ('>', '>', '=') => Some((SHREQ, ">>=")),
+                ('.', '.', '.') => Some((DOTDOTDOT, "...")),
+                ('.', '.', '=') => Some((DOTDOTEQ, "..=")),
+                _ => None,
+            } {
+                iter.next();
+                iter.next();
+                return Some(TtToken { kind, is_joint_to_next, text: text.into() });
+            }
+        }
+        if let Some((m, is_joint_to_next)) = iter.current_punct2(p) {
+            if let Some((kind, text)) = match m {
+                ('<', '<') => Some((SHL, "<<")),
+                ('>', '>') => Some((SHR, ">>")),
+                ('|', '|') => Some((PIPEPIPE, "||")),
+                ('&', '&') => Some((AMPAMP, "&&")),
+                ('%', '=') => Some((PERCENTEQ, "%=")),
+                ('*', '=') => Some((STAREQ, "*=")),
+                ('/', '=') => Some((SLASHEQ, "/=")),
+                ('^', '=') => Some((CARETEQ, "^=")),
+                ('&', '=') => Some((AMPEQ, "&=")),
+                ('|', '=') => Some((PIPEEQ, "|=")),
+                ('-', '=') => Some((MINUSEQ, "-=")),
+                ('+', '=') => Some((PLUSEQ, "+=")),
+                ('>', '=') => Some((GTEQ, ">=")),
+                ('<', '=') => Some((LTEQ, "<=")),
+                ('-', '>') => Some((THIN_ARROW, "->")),
+                ('!', '=') => Some((NEQ, "!=")),
+                ('=', '>') => Some((FAT_ARROW, "=>")),
+                ('=', '=') => Some((EQEQ, "==")),
+                ('.', '.') => Some((DOTDOT, "..")),
+                (':', ':') => Some((COLONCOLON, "::")),
+                _ => None,
+            } {
+                iter.next();
+                return Some(TtToken { kind, is_joint_to_next, text: text.into() });
+            }
+        }
+        None
+    }
    fn push_delim(&mut self, d: tt::Delimiter, closing: bool) {
        let (kinds, texts) = match d {
            tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"),
author	bors[bot] <bors[bot]@users.noreply.github.com>	2019-04-05 09:55:50 +0100
committer	bors[bot] <bors[bot]@users.noreply.github.com>	2019-04-05 09:55:50 +0100
commit	be9a44e9bad262ac5e615730e540fd434f846a0e (patch)
tree	7082f2b1398f8481a5a583a8c499d9b931c5e590
parent	7713416477fd59348ad60d44f0ec3a3aebcf4b9f (diff)
parent	6ff16c7ad9c1b72c4f04bb0d6c7fc0fdc2cc00cb (diff)

diff --git a/Cargo.lock b/Cargo.lock index 603abe058..a14312821 100644 --- a/Cargo.lock +++ b/Cargo.lock
@@ -1040,6 +1040,7 @@ dependencies = [
1040	name = "ra_mbe"	1040	name = "ra_mbe"
1041	version = "0.1.0"	1041	version = "0.1.0"
1042	dependencies = [	1042	dependencies = [
		1043	"itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
1043	"ra_parser 0.1.0",	1044	"ra_parser 0.1.0",
1044	"ra_syntax 0.1.0",	1045	"ra_syntax 0.1.0",
1045	"ra_tt 0.1.0",	1046	"ra_tt 0.1.0",


diff --git a/crates/ra_mbe/Cargo.toml b/crates/ra_mbe/Cargo.toml index 6e785f570..1d0c2a340 100644 --- a/crates/ra_mbe/Cargo.toml +++ b/crates/ra_mbe/Cargo.toml
@@ -8,5 +8,5 @@ authors = ["rust-analyzer developers"]
8	ra_syntax = { path = "../ra_syntax" }	8	ra_syntax = { path = "../ra_syntax" }
9	ra_parser = { path = "../ra_parser" }	9	ra_parser = { path = "../ra_parser" }
10	tt = { path = "../ra_tt", package = "ra_tt" }	10	tt = { path = "../ra_tt", package = "ra_tt" }
11		11	itertools = "0.8.0"
12	rustc-hash = "1.0.0"	12	rustc-hash = "1.0.0"


diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 05f9817da..257503de8 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs
@@ -113,6 +113,51 @@ struct TtToken {
113	text: SmolStr,	113	text: SmolStr,
114	}	114	}
115		115
		116	// Some helper functions
		117	fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> {
		118	if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt {
		119	return Some(pp);
		120	}
		121	None
		122	}
		123
		124	struct TokenPeek<'a, I>
		125	where
		126	I: Iterator<Item = &'a tt::TokenTree>,
		127	{
		128	iter: itertools::MultiPeek<I>,
		129	}
		130
		131	impl<'a, I> TokenPeek<'a, I>
		132	where
		133	I: Iterator<Item = &'a tt::TokenTree>,
		134	{
		135	fn next(&mut self) -> Option<&tt::TokenTree> {
		136	self.iter.next()
		137	}
		138
		139	fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> {
		140	if p.spacing != tt::Spacing::Joint {
		141	return None;
		142	}
		143
		144	self.iter.reset_peek();
		145	let p1 = to_punct(self.iter.peek()?)?;
		146	Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint))
		147	}
		148
		149	fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> {
		150	self.current_punct2(p).and_then(\|((p0, p1), last_joint)\| {
		151	if !last_joint {
		152	None
		153	} else {
		154	let p2 = to_punct(*self.iter.peek()?)?;
		155	Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint))
		156	}
		157	})
		158	}
		159	}
		160
116	impl TtTokenSource {	161	impl TtTokenSource {
117	fn new(tt: &tt::Subtree) -> TtTokenSource {	162	fn new(tt: &tt::Subtree) -> TtTokenSource {
118	let mut res = TtTokenSource { tokens: Vec::new() };	163	let mut res = TtTokenSource { tokens: Vec::new() };
@@ -121,16 +166,27 @@ impl TtTokenSource {
121	}	166	}
122	fn convert_subtree(&mut self, sub: &tt::Subtree) {	167	fn convert_subtree(&mut self, sub: &tt::Subtree) {
123	self.push_delim(sub.delimiter, false);	168	self.push_delim(sub.delimiter, false);
124	sub.token_trees.iter().for_each(\|tt\| self.convert_tt(tt));	169	let mut peek = TokenPeek { iter: itertools::multipeek(sub.token_trees.iter()) };
		170	while let Some(tt) = peek.iter.next() {
		171	self.convert_tt(tt, &mut peek);
		172	}
125	self.push_delim(sub.delimiter, true)	173	self.push_delim(sub.delimiter, true)
126	}	174	}
127	fn convert_tt(&mut self, tt: &tt::TokenTree) {	175
		176	fn convert_tt<'a, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'a, I>)
		177	where
		178	I: Iterator<Item = &'a tt::TokenTree>,
		179	{
128	match tt {	180	match tt {
129	tt::TokenTree::Leaf(token) => self.convert_token(token),	181	tt::TokenTree::Leaf(token) => self.convert_token(token, iter),
130	tt::TokenTree::Subtree(sub) => self.convert_subtree(sub),	182	tt::TokenTree::Subtree(sub) => self.convert_subtree(sub),
131	}	183	}
132	}	184	}
133	fn convert_token(&mut self, token: &tt::Leaf) {	185
		186	fn convert_token<'a, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'a, I>)
		187	where
		188	I: Iterator<Item = &'a tt::TokenTree>,
		189	{
134	let tok = match token {	190	let tok = match token {
135	tt::Leaf::Literal(l) => TtToken {	191	tt::Leaf::Literal(l) => TtToken {
136	kind: SyntaxKind::INT_NUMBER, // FIXME	192	kind: SyntaxKind::INT_NUMBER, // FIXME
@@ -138,21 +194,25 @@ impl TtTokenSource {
138	text: l.text.clone(),	194	text: l.text.clone(),
139	},	195	},
140	tt::Leaf::Punct(p) => {	196	tt::Leaf::Punct(p) => {
141	let kind = match p.char {	197	if let Some(tt) = Self::convert_multi_char_punct(p, iter) {
142	// lexer may produce combpund tokens for these ones	198	tt
143	'.' => DOT,	199	} else {
144	':' => COLON,	200	let kind = match p.char {
145	'=' => EQ,	201	// lexer may produce combpund tokens for these ones
146	'!' => EXCL,	202	'.' => DOT,
147	'-' => MINUS,	203	':' => COLON,
148	c => SyntaxKind::from_char(c).unwrap(),	204	'=' => EQ,
149	};	205	'!' => EXCL,
150	let text = {	206	'-' => MINUS,
151	let mut buf = [0u8; 4];	207	c => SyntaxKind::from_char(c).unwrap(),
152	let s: &str = p.char.encode_utf8(&mut buf);	208	};
153	SmolStr::new(s)	209	let text = {
154	};	210	let mut buf = [0u8; 4];
155	TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }	211	let s: &str = p.char.encode_utf8(&mut buf);
		212	SmolStr::new(s)
		213	};
		214	TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
		215	}
156	}	216	}
157	tt::Leaf::Ident(ident) => {	217	tt::Leaf::Ident(ident) => {
158	let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT);	218	let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT);
@@ -161,6 +221,64 @@ impl TtTokenSource {
161	};	221	};
162	self.tokens.push(tok)	222	self.tokens.push(tok)
163	}	223	}
		224
		225	fn convert_multi_char_punct<'a, I>(
		226	p: &tt::Punct,
		227	iter: &mut TokenPeek<'a, I>,
		228	) -> Option<TtToken>
		229	where
		230	I: Iterator<Item = &'a tt::TokenTree>,
		231	{
		232	if let Some((m, is_joint_to_next)) = iter.current_punct3(p) {
		233	if let Some((kind, text)) = match m {
		234	('<', '<', '=') => Some((SHLEQ, "<<=")),
		235	('>', '>', '=') => Some((SHREQ, ">>=")),
		236	('.', '.', '.') => Some((DOTDOTDOT, "...")),
		237	('.', '.', '=') => Some((DOTDOTEQ, "..=")),
		238	_ => None,
		239	} {
		240	iter.next();
		241	iter.next();
		242	return Some(TtToken { kind, is_joint_to_next, text: text.into() });
		243	}
		244	}
		245
		246	if let Some((m, is_joint_to_next)) = iter.current_punct2(p) {
		247	if let Some((kind, text)) = match m {
		248	('<', '<') => Some((SHL, "<<")),
		249	('>', '>') => Some((SHR, ">>")),
		250
		251	('\|', '\|') => Some((PIPEPIPE, "\|\|")),
		252	('&', '&') => Some((AMPAMP, "&&")),
		253	('%', '=') => Some((PERCENTEQ, "%=")),
		254	('', '=') => Some((STAREQ, "=")),
		255	('/', '=') => Some((SLASHEQ, "/=")),
		256	('^', '=') => Some((CARETEQ, "^=")),
		257
		258	('&', '=') => Some((AMPEQ, "&=")),
		259	('\|', '=') => Some((PIPEEQ, "\|=")),
		260	('-', '=') => Some((MINUSEQ, "-=")),
		261	('+', '=') => Some((PLUSEQ, "+=")),
		262	('>', '=') => Some((GTEQ, ">=")),
		263	('<', '=') => Some((LTEQ, "<=")),
		264
		265	('-', '>') => Some((THIN_ARROW, "->")),
		266	('!', '=') => Some((NEQ, "!=")),
		267	('=', '>') => Some((FAT_ARROW, "=>")),
		268	('=', '=') => Some((EQEQ, "==")),
		269	('.', '.') => Some((DOTDOT, "..")),
		270	(':', ':') => Some((COLONCOLON, "::")),
		271
		272	_ => None,
		273	} {
		274	iter.next();
		275	return Some(TtToken { kind, is_joint_to_next, text: text.into() });
		276	}
		277	}
		278
		279	None
		280	}
		281
164	fn push_delim(&mut self, d: tt::Delimiter, closing: bool) {	282	fn push_delim(&mut self, d: tt::Delimiter, closing: bool) {
165	let (kinds, texts) = match d {	283	let (kinds, texts) = match d {
166	tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"),	284	tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"),