diff options
author | Edwin Cheng <[email protected]> | 2019-04-04 20:39:54 +0100 |
---|---|---|
committer | Edwin Cheng <[email protected]> | 2019-04-04 20:39:54 +0100 |
commit | c23408751c43ea7e349d8cf27472e546214acef6 (patch) | |
tree | f31d692c8785a6b8f492f2f0220d499d8d2db39f | |
parent | 7713416477fd59348ad60d44f0ec3a3aebcf4b9f (diff) |
Add multi-byte token support in tkn tree to ast
-rw-r--r-- | Cargo.lock | 1 | ||||
-rw-r--r-- | crates/ra_mbe/Cargo.toml | 2 | ||||
-rw-r--r-- | crates/ra_mbe/src/syntax_bridge.rs | 156 |
3 files changed, 139 insertions, 20 deletions
diff --git a/Cargo.lock b/Cargo.lock index 603abe058..a14312821 100644 --- a/Cargo.lock +++ b/Cargo.lock | |||
@@ -1040,6 +1040,7 @@ dependencies = [ | |||
1040 | name = "ra_mbe" | 1040 | name = "ra_mbe" |
1041 | version = "0.1.0" | 1041 | version = "0.1.0" |
1042 | dependencies = [ | 1042 | dependencies = [ |
1043 | "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", | ||
1043 | "ra_parser 0.1.0", | 1044 | "ra_parser 0.1.0", |
1044 | "ra_syntax 0.1.0", | 1045 | "ra_syntax 0.1.0", |
1045 | "ra_tt 0.1.0", | 1046 | "ra_tt 0.1.0", |
diff --git a/crates/ra_mbe/Cargo.toml b/crates/ra_mbe/Cargo.toml index 6e785f570..1d0c2a340 100644 --- a/crates/ra_mbe/Cargo.toml +++ b/crates/ra_mbe/Cargo.toml | |||
@@ -8,5 +8,5 @@ authors = ["rust-analyzer developers"] | |||
8 | ra_syntax = { path = "../ra_syntax" } | 8 | ra_syntax = { path = "../ra_syntax" } |
9 | ra_parser = { path = "../ra_parser" } | 9 | ra_parser = { path = "../ra_parser" } |
10 | tt = { path = "../ra_tt", package = "ra_tt" } | 10 | tt = { path = "../ra_tt", package = "ra_tt" } |
11 | 11 | itertools = "0.8.0" | |
12 | rustc-hash = "1.0.0" | 12 | rustc-hash = "1.0.0" |
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 05f9817da..b7e8d34da 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs | |||
@@ -113,6 +113,51 @@ struct TtToken { | |||
113 | text: SmolStr, | 113 | text: SmolStr, |
114 | } | 114 | } |
115 | 115 | ||
116 | // Some helper functions | ||
117 | fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { | ||
118 | if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt { | ||
119 | return Some(pp); | ||
120 | } | ||
121 | None | ||
122 | } | ||
123 | |||
124 | struct TokenPeek<'a, I> | ||
125 | where | ||
126 | I: Iterator<Item = &'a tt::TokenTree>, | ||
127 | { | ||
128 | iter: itertools::MultiPeek<I>, | ||
129 | } | ||
130 | |||
131 | impl<'a, I> TokenPeek<'a, I> | ||
132 | where | ||
133 | I: Iterator<Item = &'a tt::TokenTree>, | ||
134 | { | ||
135 | fn next(&mut self) -> Option<&tt::TokenTree> { | ||
136 | self.iter.next() | ||
137 | } | ||
138 | |||
139 | fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { | ||
140 | if p.spacing != tt::Spacing::Joint { | ||
141 | return None; | ||
142 | } | ||
143 | |||
144 | self.iter.reset_peek(); | ||
145 | let p1 = to_punct(self.iter.peek()?)?; | ||
146 | Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint)) | ||
147 | } | ||
148 | |||
149 | fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> { | ||
150 | self.current_punct2(p).and_then(|((p0, p1), last_joint)| { | ||
151 | if !last_joint { | ||
152 | None | ||
153 | } else { | ||
154 | let p2 = to_punct(*self.iter.peek()?)?; | ||
155 | Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint)) | ||
156 | } | ||
157 | }) | ||
158 | } | ||
159 | } | ||
160 | |||
116 | impl TtTokenSource { | 161 | impl TtTokenSource { |
117 | fn new(tt: &tt::Subtree) -> TtTokenSource { | 162 | fn new(tt: &tt::Subtree) -> TtTokenSource { |
118 | let mut res = TtTokenSource { tokens: Vec::new() }; | 163 | let mut res = TtTokenSource { tokens: Vec::new() }; |
@@ -121,16 +166,27 @@ impl TtTokenSource { | |||
121 | } | 166 | } |
122 | fn convert_subtree(&mut self, sub: &tt::Subtree) { | 167 | fn convert_subtree(&mut self, sub: &tt::Subtree) { |
123 | self.push_delim(sub.delimiter, false); | 168 | self.push_delim(sub.delimiter, false); |
124 | sub.token_trees.iter().for_each(|tt| self.convert_tt(tt)); | 169 | let mut peek = TokenPeek { iter: itertools::multipeek(sub.token_trees.iter()) }; |
170 | while let Some(tt) = peek.iter.next() { | ||
171 | self.convert_tt(tt, &mut peek); | ||
172 | } | ||
125 | self.push_delim(sub.delimiter, true) | 173 | self.push_delim(sub.delimiter, true) |
126 | } | 174 | } |
127 | fn convert_tt(&mut self, tt: &tt::TokenTree) { | 175 | |
176 | fn convert_tt<'a, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'a, I>) | ||
177 | where | ||
178 | I: Iterator<Item = &'a tt::TokenTree>, | ||
179 | { | ||
128 | match tt { | 180 | match tt { |
129 | tt::TokenTree::Leaf(token) => self.convert_token(token), | 181 | tt::TokenTree::Leaf(token) => self.convert_token(token, iter), |
130 | tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), | 182 | tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), |
131 | } | 183 | } |
132 | } | 184 | } |
133 | fn convert_token(&mut self, token: &tt::Leaf) { | 185 | |
186 | fn convert_token<'a, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'a, I>) | ||
187 | where | ||
188 | I: Iterator<Item = &'a tt::TokenTree>, | ||
189 | { | ||
134 | let tok = match token { | 190 | let tok = match token { |
135 | tt::Leaf::Literal(l) => TtToken { | 191 | tt::Leaf::Literal(l) => TtToken { |
136 | kind: SyntaxKind::INT_NUMBER, // FIXME | 192 | kind: SyntaxKind::INT_NUMBER, // FIXME |
@@ -138,21 +194,25 @@ impl TtTokenSource { | |||
138 | text: l.text.clone(), | 194 | text: l.text.clone(), |
139 | }, | 195 | }, |
140 | tt::Leaf::Punct(p) => { | 196 | tt::Leaf::Punct(p) => { |
141 | let kind = match p.char { | 197 | if let Some(tt) = Self::convert_multi_char_punct(p, iter) { |
142 | // lexer may produce combpund tokens for these ones | 198 | tt |
143 | '.' => DOT, | 199 | } else { |
144 | ':' => COLON, | 200 | let kind = match p.char { |
145 | '=' => EQ, | 201 | // lexer may produce combpund tokens for these ones |
146 | '!' => EXCL, | 202 | '.' => DOT, |
147 | '-' => MINUS, | 203 | ':' => COLON, |
148 | c => SyntaxKind::from_char(c).unwrap(), | 204 | '=' => EQ, |
149 | }; | 205 | '!' => EXCL, |
150 | let text = { | 206 | '-' => MINUS, |
151 | let mut buf = [0u8; 4]; | 207 | c => SyntaxKind::from_char(c).unwrap(), |
152 | let s: &str = p.char.encode_utf8(&mut buf); | 208 | }; |
153 | SmolStr::new(s) | 209 | let text = { |
154 | }; | 210 | let mut buf = [0u8; 4]; |
155 | TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } | 211 | let s: &str = p.char.encode_utf8(&mut buf); |
212 | SmolStr::new(s) | ||
213 | }; | ||
214 | TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } | ||
215 | } | ||
156 | } | 216 | } |
157 | tt::Leaf::Ident(ident) => { | 217 | tt::Leaf::Ident(ident) => { |
158 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); | 218 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); |
@@ -161,6 +221,64 @@ impl TtTokenSource { | |||
161 | }; | 221 | }; |
162 | self.tokens.push(tok) | 222 | self.tokens.push(tok) |
163 | } | 223 | } |
224 | |||
225 | fn convert_multi_char_punct<'a, I>( | ||
226 | p: &tt::Punct, | ||
227 | iter: &mut TokenPeek<'a, I>, | ||
228 | ) -> Option<TtToken> | ||
229 | where | ||
230 | I: Iterator<Item = &'a tt::TokenTree>, | ||
231 | { | ||
232 | if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { | ||
233 | if let Some((kind, text)) = match m { | ||
234 | ('<', '<', '=') => Some((SHLEQ, "<<=".into())), | ||
235 | ('>', '>', '=') => Some((SHREQ, ">>=".into())), | ||
236 | ('.', '.', '.') => Some((DOTDOTDOT, "...".into())), | ||
237 | ('.', '.', '=') => Some((DOTDOTEQ, "..=".into())), | ||
238 | _ => None, | ||
239 | } { | ||
240 | iter.next(); | ||
241 | iter.next(); | ||
242 | return Some(TtToken { kind, is_joint_to_next, text }); | ||
243 | } | ||
244 | } | ||
245 | |||
246 | if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { | ||
247 | if let Some((kind, text)) = match m { | ||
248 | ('<', '<') => Some((SHL, "<<".into())), | ||
249 | ('>', '>') => Some((SHR, ">>".into())), | ||
250 | |||
251 | ('|', '|') => Some((PIPEPIPE, "||".into())), | ||
252 | ('&', '&') => Some((AMPAMP, "&&".into())), | ||
253 | ('%', '=') => Some((PERCENTEQ, "%=".into())), | ||
254 | ('*', '=') => Some((STAREQ, "*=".into())), | ||
255 | ('/', '=') => Some((SLASHEQ, "/=".into())), | ||
256 | ('^', '=') => Some((CARETEQ, "^=".into())), | ||
257 | |||
258 | ('&', '=') => Some((AMPEQ, "&=".into())), | ||
259 | ('|', '=') => Some((PIPEEQ, "|=".into())), | ||
260 | ('-', '=') => Some((MINUSEQ, "-=".into())), | ||
261 | ('+', '=') => Some((PLUSEQ, "+=".into())), | ||
262 | ('>', '=') => Some((GTEQ, ">=".into())), | ||
263 | ('<', '=') => Some((LTEQ, "<=".into())), | ||
264 | |||
265 | ('-', '>') => Some((THIN_ARROW, "->".into())), | ||
266 | ('!', '=') => Some((NEQ, "!=".into())), | ||
267 | ('=', '>') => Some((FAT_ARROW, "=>".into())), | ||
268 | ('=', '=') => Some((EQEQ, "==".into())), | ||
269 | ('.', '.') => Some((DOTDOT, "..".into())), | ||
270 | (':', ':') => Some((COLONCOLON, "::".into())), | ||
271 | |||
272 | _ => None, | ||
273 | } { | ||
274 | iter.next(); | ||
275 | return Some(TtToken { kind, is_joint_to_next, text }); | ||
276 | } | ||
277 | } | ||
278 | |||
279 | None | ||
280 | } | ||
281 | |||
164 | fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { | 282 | fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { |
165 | let (kinds, texts) = match d { | 283 | let (kinds, texts) = match d { |
166 | tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), | 284 | tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), |