diff options
Diffstat (limited to 'crates/ra_mbe/src/syntax_bridge.rs')
-rw-r--r-- | crates/ra_mbe/src/syntax_bridge.rs | 262 |
1 files changed, 21 insertions, 241 deletions
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 3a0702a30..102bba341 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs | |||
@@ -1,9 +1,11 @@ | |||
1 | use ra_parser::{TokenSource, TreeSink, ParseError}; | 1 | use ra_parser::{TreeSink, ParseError}; |
2 | use ra_syntax::{ | 2 | use ra_syntax::{ |
3 | AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, SyntaxElement, | 3 | AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, SyntaxElement, |
4 | ast, SyntaxKind::*, TextUnit, classify_literal | 4 | ast, SyntaxKind::*, TextUnit |
5 | }; | 5 | }; |
6 | 6 | ||
7 | use crate::subtree_source::{SubtreeTokenSource, SubtreeSourceQuerier}; | ||
8 | |||
7 | /// Maps `tt::TokenId` to the relative range of the original token. | 9 | /// Maps `tt::TokenId` to the relative range of the original token. |
8 | #[derive(Default)] | 10 | #[derive(Default)] |
9 | pub struct TokenMap { | 11 | pub struct TokenMap { |
@@ -22,8 +24,8 @@ pub fn ast_to_token_tree(ast: &ast::TokenTree) -> Option<(tt::Subtree, TokenMap) | |||
22 | 24 | ||
23 | /// Parses the token tree (result of macro expansion) as a sequence of items | 25 | /// Parses the token tree (result of macro expansion) as a sequence of items |
24 | pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc<ast::SourceFile> { | 26 | pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc<ast::SourceFile> { |
25 | let token_source = TtTokenSource::new(tt); | 27 | let token_source = SubtreeTokenSource::new(tt); |
26 | let mut tree_sink = TtTreeSink::new(&token_source.tokens); | 28 | let mut tree_sink = TtTreeSink::new(token_source.querier()); |
27 | ra_parser::parse(&token_source, &mut tree_sink); | 29 | ra_parser::parse(&token_source, &mut tree_sink); |
28 | let syntax = tree_sink.inner.finish(); | 30 | let syntax = tree_sink.inner.finish(); |
29 | ast::SourceFile::cast(&syntax).unwrap().to_owned() | 31 | ast::SourceFile::cast(&syntax).unwrap().to_owned() |
@@ -103,243 +105,19 @@ fn convert_tt( | |||
103 | Some(res) | 105 | Some(res) |
104 | } | 106 | } |
105 | 107 | ||
106 | #[derive(Debug)] | ||
107 | pub(crate) struct TtTokenSource { | ||
108 | pub tokens: Vec<TtToken>, | ||
109 | } | ||
110 | |||
111 | #[derive(Debug)] | ||
112 | pub(crate) struct TtToken { | ||
113 | pub kind: SyntaxKind, | ||
114 | pub is_joint_to_next: bool, | ||
115 | pub text: SmolStr, | ||
116 | pub n_tokens: usize, | ||
117 | } | ||
118 | |||
119 | // Some helper functions | ||
120 | fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { | ||
121 | if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt { | ||
122 | return Some(pp); | ||
123 | } | ||
124 | None | ||
125 | } | ||
126 | |||
127 | pub(crate) struct TokenPeek<'a, I> | ||
128 | where | ||
129 | I: Iterator<Item = &'a tt::TokenTree>, | ||
130 | { | ||
131 | iter: itertools::MultiPeek<I>, | ||
132 | } | ||
133 | |||
134 | impl<'a, I> TokenPeek<'a, I> | ||
135 | where | ||
136 | I: Iterator<Item = &'a tt::TokenTree>, | ||
137 | { | ||
138 | pub fn new(iter: I) -> Self { | ||
139 | TokenPeek { iter: itertools::multipeek(iter) } | ||
140 | } | ||
141 | |||
142 | pub fn next(&mut self) -> Option<&tt::TokenTree> { | ||
143 | self.iter.next() | ||
144 | } | ||
145 | |||
146 | fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { | ||
147 | if p.spacing != tt::Spacing::Joint { | ||
148 | return None; | ||
149 | } | ||
150 | |||
151 | self.iter.reset_peek(); | ||
152 | let p1 = to_punct(self.iter.peek()?)?; | ||
153 | Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint)) | ||
154 | } | ||
155 | |||
156 | fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> { | ||
157 | self.current_punct2(p).and_then(|((p0, p1), last_joint)| { | ||
158 | if !last_joint { | ||
159 | None | ||
160 | } else { | ||
161 | let p2 = to_punct(*self.iter.peek()?)?; | ||
162 | Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint)) | ||
163 | } | ||
164 | }) | ||
165 | } | ||
166 | } | ||
167 | |||
168 | impl TtTokenSource { | ||
169 | pub fn new(tt: &tt::Subtree) -> TtTokenSource { | ||
170 | let mut res = TtTokenSource { tokens: Vec::new() }; | ||
171 | res.convert_subtree(tt); | ||
172 | res | ||
173 | } | ||
174 | fn convert_subtree(&mut self, sub: &tt::Subtree) { | ||
175 | self.push_delim(sub.delimiter, false); | ||
176 | let mut peek = TokenPeek::new(sub.token_trees.iter()); | ||
177 | while let Some(tt) = peek.iter.next() { | ||
178 | self.convert_tt(tt, &mut peek); | ||
179 | } | ||
180 | self.push_delim(sub.delimiter, true) | ||
181 | } | ||
182 | |||
183 | fn convert_tt<'a, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'a, I>) | ||
184 | where | ||
185 | I: Iterator<Item = &'a tt::TokenTree>, | ||
186 | { | ||
187 | match tt { | ||
188 | tt::TokenTree::Leaf(token) => self.convert_token(token, iter), | ||
189 | tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), | ||
190 | } | ||
191 | } | ||
192 | |||
193 | fn convert_token<'a, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'a, I>) | ||
194 | where | ||
195 | I: Iterator<Item = &'a tt::TokenTree>, | ||
196 | { | ||
197 | let tok = match token { | ||
198 | tt::Leaf::Literal(l) => TtToken { | ||
199 | kind: classify_literal(&l.text).unwrap().kind, | ||
200 | is_joint_to_next: false, | ||
201 | text: l.text.clone(), | ||
202 | n_tokens: 1, | ||
203 | }, | ||
204 | tt::Leaf::Punct(p) => { | ||
205 | if let Some((kind, is_joint_to_next, text, size)) = | ||
206 | Self::convert_multi_char_punct(p, iter) | ||
207 | { | ||
208 | for _ in 0..size - 1 { | ||
209 | iter.next(); | ||
210 | } | ||
211 | |||
212 | TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } | ||
213 | } else { | ||
214 | let kind = match p.char { | ||
215 | // lexer may produce combpund tokens for these ones | ||
216 | '.' => DOT, | ||
217 | ':' => COLON, | ||
218 | '=' => EQ, | ||
219 | '!' => EXCL, | ||
220 | '-' => MINUS, | ||
221 | c => SyntaxKind::from_char(c).unwrap(), | ||
222 | }; | ||
223 | let text = { | ||
224 | let mut buf = [0u8; 4]; | ||
225 | let s: &str = p.char.encode_utf8(&mut buf); | ||
226 | SmolStr::new(s) | ||
227 | }; | ||
228 | TtToken { | ||
229 | kind, | ||
230 | is_joint_to_next: p.spacing == tt::Spacing::Joint, | ||
231 | text, | ||
232 | n_tokens: 1, | ||
233 | } | ||
234 | } | ||
235 | } | ||
236 | tt::Leaf::Ident(ident) => { | ||
237 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); | ||
238 | TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } | ||
239 | } | ||
240 | }; | ||
241 | self.tokens.push(tok) | ||
242 | } | ||
243 | |||
244 | pub(crate) fn convert_multi_char_punct<'a, I>( | ||
245 | p: &tt::Punct, | ||
246 | iter: &mut TokenPeek<'a, I>, | ||
247 | ) -> Option<(SyntaxKind, bool, &'static str, usize)> | ||
248 | where | ||
249 | I: Iterator<Item = &'a tt::TokenTree>, | ||
250 | { | ||
251 | if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { | ||
252 | if let Some((kind, text)) = match m { | ||
253 | ('<', '<', '=') => Some((SHLEQ, "<<=")), | ||
254 | ('>', '>', '=') => Some((SHREQ, ">>=")), | ||
255 | ('.', '.', '.') => Some((DOTDOTDOT, "...")), | ||
256 | ('.', '.', '=') => Some((DOTDOTEQ, "..=")), | ||
257 | _ => None, | ||
258 | } { | ||
259 | return Some((kind, is_joint_to_next, text, 3)); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { | ||
264 | if let Some((kind, text)) = match m { | ||
265 | ('<', '<') => Some((SHL, "<<")), | ||
266 | ('>', '>') => Some((SHR, ">>")), | ||
267 | |||
268 | ('|', '|') => Some((PIPEPIPE, "||")), | ||
269 | ('&', '&') => Some((AMPAMP, "&&")), | ||
270 | ('%', '=') => Some((PERCENTEQ, "%=")), | ||
271 | ('*', '=') => Some((STAREQ, "*=")), | ||
272 | ('/', '=') => Some((SLASHEQ, "/=")), | ||
273 | ('^', '=') => Some((CARETEQ, "^=")), | ||
274 | |||
275 | ('&', '=') => Some((AMPEQ, "&=")), | ||
276 | ('|', '=') => Some((PIPEEQ, "|=")), | ||
277 | ('-', '=') => Some((MINUSEQ, "-=")), | ||
278 | ('+', '=') => Some((PLUSEQ, "+=")), | ||
279 | ('>', '=') => Some((GTEQ, ">=")), | ||
280 | ('<', '=') => Some((LTEQ, "<=")), | ||
281 | |||
282 | ('-', '>') => Some((THIN_ARROW, "->")), | ||
283 | ('!', '=') => Some((NEQ, "!=")), | ||
284 | ('=', '>') => Some((FAT_ARROW, "=>")), | ||
285 | ('=', '=') => Some((EQEQ, "==")), | ||
286 | ('.', '.') => Some((DOTDOT, "..")), | ||
287 | (':', ':') => Some((COLONCOLON, "::")), | ||
288 | |||
289 | _ => None, | ||
290 | } { | ||
291 | return Some((kind, is_joint_to_next, text, 2)); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | None | ||
296 | } | ||
297 | |||
298 | fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { | ||
299 | let (kinds, texts) = match d { | ||
300 | tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), | ||
301 | tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), | ||
302 | tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), | ||
303 | tt::Delimiter::None => return, | ||
304 | }; | ||
305 | let idx = closing as usize; | ||
306 | let kind = kinds[idx]; | ||
307 | let text = &texts[idx..texts.len() - (1 - idx)]; | ||
308 | let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; | ||
309 | self.tokens.push(tok) | ||
310 | } | ||
311 | } | ||
312 | |||
313 | impl TokenSource for TtTokenSource { | ||
314 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
315 | if let Some(tok) = self.tokens.get(pos) { | ||
316 | tok.kind | ||
317 | } else { | ||
318 | SyntaxKind::EOF | ||
319 | } | ||
320 | } | ||
321 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
322 | self.tokens[pos].is_joint_to_next | ||
323 | } | ||
324 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
325 | self.tokens[pos].text == *kw | ||
326 | } | ||
327 | } | ||
328 | |||
329 | #[derive(Default)] | ||
330 | struct TtTreeSink<'a> { | 108 | struct TtTreeSink<'a> { |
331 | buf: String, | 109 | buf: String, |
332 | tokens: &'a [TtToken], | 110 | src_querier: SubtreeSourceQuerier<'a>, |
333 | text_pos: TextUnit, | 111 | text_pos: TextUnit, |
334 | token_pos: usize, | 112 | token_pos: usize, |
335 | inner: SyntaxTreeBuilder, | 113 | inner: SyntaxTreeBuilder, |
336 | } | 114 | } |
337 | 115 | ||
338 | impl<'a> TtTreeSink<'a> { | 116 | impl<'a> TtTreeSink<'a> { |
339 | fn new(tokens: &'a [TtToken]) -> TtTreeSink { | 117 | fn new(src_querier: SubtreeSourceQuerier<'a>) -> TtTreeSink { |
340 | TtTreeSink { | 118 | TtTreeSink { |
341 | buf: String::new(), | 119 | buf: String::new(), |
342 | tokens, | 120 | src_querier, |
343 | text_pos: 0.into(), | 121 | text_pos: 0.into(), |
344 | token_pos: 0, | 122 | token_pos: 0, |
345 | inner: SyntaxTreeBuilder::default(), | 123 | inner: SyntaxTreeBuilder::default(), |
@@ -350,7 +128,7 @@ impl<'a> TtTreeSink<'a> { | |||
350 | impl<'a> TreeSink for TtTreeSink<'a> { | 128 | impl<'a> TreeSink for TtTreeSink<'a> { |
351 | fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { | 129 | fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { |
352 | for _ in 0..n_tokens { | 130 | for _ in 0..n_tokens { |
353 | self.buf += self.tokens[self.token_pos].text.as_str(); | 131 | self.buf += self.src_querier.token(self.token_pos).1; |
354 | self.token_pos += 1; | 132 | self.token_pos += 1; |
355 | } | 133 | } |
356 | self.text_pos += TextUnit::of_str(&self.buf); | 134 | self.text_pos += TextUnit::of_str(&self.buf); |
@@ -394,21 +172,23 @@ mod tests { | |||
394 | "#, | 172 | "#, |
395 | ); | 173 | ); |
396 | let expansion = expand(&rules, "literals!(foo)"); | 174 | let expansion = expand(&rules, "literals!(foo)"); |
397 | let tt_src = TtTokenSource::new(&expansion); | 175 | let tt_src = SubtreeTokenSource::new(&expansion); |
176 | |||
177 | let query = tt_src.querier(); | ||
398 | 178 | ||
399 | // [{] | 179 | // [{] |
400 | // [let] [a] [=] ['c'] [;] | 180 | // [let] [a] [=] ['c'] [;] |
401 | assert_eq!(tt_src.tokens[1 + 3].text, "'c'"); | 181 | assert_eq!(query.token(1 + 3).1, "'c'"); |
402 | assert_eq!(tt_src.tokens[1 + 3].kind, CHAR); | 182 | assert_eq!(query.token(1 + 3).0, CHAR); |
403 | // [let] [c] [=] [1000] [;] | 183 | // [let] [c] [=] [1000] [;] |
404 | assert_eq!(tt_src.tokens[1 + 5 + 3].text, "1000"); | 184 | assert_eq!(query.token(1 + 5 + 3).1, "1000"); |
405 | assert_eq!(tt_src.tokens[1 + 5 + 3].kind, INT_NUMBER); | 185 | assert_eq!(query.token(1 + 5 + 3).0, INT_NUMBER); |
406 | // [let] [f] [=] [12E+99_f64] [;] | 186 | // [let] [f] [=] [12E+99_f64] [;] |
407 | assert_eq!(tt_src.tokens[1 + 10 + 3].text, "12E+99_f64"); | 187 | assert_eq!(query.token(1 + 10 + 3).1, "12E+99_f64"); |
408 | assert_eq!(tt_src.tokens[1 + 10 + 3].kind, FLOAT_NUMBER); | 188 | assert_eq!(query.token(1 + 10 + 3).0, FLOAT_NUMBER); |
409 | 189 | ||
410 | // [let] [s] [=] ["rust1"] [;] | 190 | // [let] [s] [=] ["rust1"] [;] |
411 | assert_eq!(tt_src.tokens[1 + 15 + 3].text, "\"rust1\""); | 191 | assert_eq!(query.token(1 + 15 + 3).1, "\"rust1\""); |
412 | assert_eq!(tt_src.tokens[1 + 15 + 3].kind, STRING); | 192 | assert_eq!(query.token(1 + 15 + 3).0, STRING); |
413 | } | 193 | } |
414 | } | 194 | } |