diff options
Diffstat (limited to 'crates/mbe/src/syntax_bridge.rs')
-rw-r--r-- | crates/mbe/src/syntax_bridge.rs | 832 |
1 files changed, 832 insertions, 0 deletions
diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs new file mode 100644 index 000000000..a8ad917fb --- /dev/null +++ b/crates/mbe/src/syntax_bridge.rs | |||
@@ -0,0 +1,832 @@ | |||
1 | //! FIXME: write short doc here | ||
2 | |||
3 | use parser::{FragmentKind, ParseError, TreeSink}; | ||
4 | use rustc_hash::FxHashMap; | ||
5 | use syntax::{ | ||
6 | ast::{self, make::tokens::doc_comment}, | ||
7 | tokenize, AstToken, Parse, SmolStr, SyntaxKind, | ||
8 | SyntaxKind::*, | ||
9 | SyntaxNode, SyntaxToken, SyntaxTreeBuilder, TextRange, TextSize, Token as RawToken, T, | ||
10 | }; | ||
11 | use tt::buffer::{Cursor, TokenBuffer}; | ||
12 | |||
13 | use crate::subtree_source::SubtreeTokenSource; | ||
14 | use crate::ExpandError; | ||
15 | |||
16 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] | ||
17 | pub enum TokenTextRange { | ||
18 | Token(TextRange), | ||
19 | Delimiter(TextRange, TextRange), | ||
20 | } | ||
21 | |||
22 | impl TokenTextRange { | ||
23 | pub fn by_kind(self, kind: SyntaxKind) -> Option<TextRange> { | ||
24 | match self { | ||
25 | TokenTextRange::Token(it) => Some(it), | ||
26 | TokenTextRange::Delimiter(open, close) => match kind { | ||
27 | T!['{'] | T!['('] | T!['['] => Some(open), | ||
28 | T!['}'] | T![')'] | T![']'] => Some(close), | ||
29 | _ => None, | ||
30 | }, | ||
31 | } | ||
32 | } | ||
33 | } | ||
34 | |||
35 | /// Maps `tt::TokenId` to the relative range of the original token. | ||
36 | #[derive(Debug, PartialEq, Eq, Clone, Default)] | ||
37 | pub struct TokenMap { | ||
38 | /// Maps `tt::TokenId` to the *relative* source range. | ||
39 | entries: Vec<(tt::TokenId, TokenTextRange)>, | ||
40 | } | ||
41 | |||
42 | /// Convert the syntax tree (what user has written) to a `TokenTree` (what macro | ||
43 | /// will consume). | ||
44 | pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> Option<(tt::Subtree, TokenMap)> { | ||
45 | syntax_node_to_token_tree(ast.syntax()) | ||
46 | } | ||
47 | |||
48 | /// Convert the syntax node to a `TokenTree` (what macro | ||
49 | /// will consume). | ||
50 | pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> Option<(tt::Subtree, TokenMap)> { | ||
51 | let global_offset = node.text_range().start(); | ||
52 | let mut c = Convertor::new(node, global_offset); | ||
53 | let subtree = c.go()?; | ||
54 | Some((subtree, c.id_alloc.map)) | ||
55 | } | ||
56 | |||
57 | // The following items are what `rustc` macro can be parsed into : | ||
58 | // link: https://github.com/rust-lang/rust/blob/9ebf47851a357faa4cd97f4b1dc7835f6376e639/src/libsyntax/ext/expand.rs#L141 | ||
59 | // * Expr(P<ast::Expr>) -> token_tree_to_expr | ||
60 | // * Pat(P<ast::Pat>) -> token_tree_to_pat | ||
61 | // * Ty(P<ast::Ty>) -> token_tree_to_ty | ||
62 | // * Stmts(SmallVec<[ast::Stmt; 1]>) -> token_tree_to_stmts | ||
63 | // * Items(SmallVec<[P<ast::Item>; 1]>) -> token_tree_to_items | ||
64 | // | ||
65 | // * TraitItems(SmallVec<[ast::TraitItem; 1]>) | ||
66 | // * AssocItems(SmallVec<[ast::AssocItem; 1]>) | ||
67 | // * ForeignItems(SmallVec<[ast::ForeignItem; 1]> | ||
68 | |||
69 | pub fn token_tree_to_syntax_node( | ||
70 | tt: &tt::Subtree, | ||
71 | fragment_kind: FragmentKind, | ||
72 | ) -> Result<(Parse<SyntaxNode>, TokenMap), ExpandError> { | ||
73 | let tmp; | ||
74 | let tokens = match tt { | ||
75 | tt::Subtree { delimiter: None, token_trees } => token_trees.as_slice(), | ||
76 | _ => { | ||
77 | tmp = [tt.clone().into()]; | ||
78 | &tmp[..] | ||
79 | } | ||
80 | }; | ||
81 | let buffer = TokenBuffer::new(&tokens); | ||
82 | let mut token_source = SubtreeTokenSource::new(&buffer); | ||
83 | let mut tree_sink = TtTreeSink::new(buffer.begin()); | ||
84 | parser::parse_fragment(&mut token_source, &mut tree_sink, fragment_kind); | ||
85 | if tree_sink.roots.len() != 1 { | ||
86 | return Err(ExpandError::ConversionError); | ||
87 | } | ||
88 | //FIXME: would be cool to report errors | ||
89 | let (parse, range_map) = tree_sink.finish(); | ||
90 | Ok((parse, range_map)) | ||
91 | } | ||
92 | |||
93 | /// Convert a string to a `TokenTree` | ||
94 | pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> { | ||
95 | let (tokens, errors) = tokenize(text); | ||
96 | if !errors.is_empty() { | ||
97 | return None; | ||
98 | } | ||
99 | |||
100 | let mut conv = RawConvertor { | ||
101 | text, | ||
102 | offset: TextSize::default(), | ||
103 | inner: tokens.iter(), | ||
104 | id_alloc: TokenIdAlloc { | ||
105 | map: Default::default(), | ||
106 | global_offset: TextSize::default(), | ||
107 | next_id: 0, | ||
108 | }, | ||
109 | }; | ||
110 | |||
111 | let subtree = conv.go()?; | ||
112 | Some((subtree, conv.id_alloc.map)) | ||
113 | } | ||
114 | |||
115 | impl TokenMap { | ||
116 | pub fn token_by_range(&self, relative_range: TextRange) -> Option<tt::TokenId> { | ||
117 | let &(token_id, _) = self.entries.iter().find(|(_, range)| match range { | ||
118 | TokenTextRange::Token(it) => *it == relative_range, | ||
119 | TokenTextRange::Delimiter(open, close) => { | ||
120 | *open == relative_range || *close == relative_range | ||
121 | } | ||
122 | })?; | ||
123 | Some(token_id) | ||
124 | } | ||
125 | |||
126 | pub fn range_by_token(&self, token_id: tt::TokenId) -> Option<TokenTextRange> { | ||
127 | let &(_, range) = self.entries.iter().find(|(tid, _)| *tid == token_id)?; | ||
128 | Some(range) | ||
129 | } | ||
130 | |||
131 | fn insert(&mut self, token_id: tt::TokenId, relative_range: TextRange) { | ||
132 | self.entries.push((token_id, TokenTextRange::Token(relative_range))); | ||
133 | } | ||
134 | |||
135 | fn insert_delim( | ||
136 | &mut self, | ||
137 | token_id: tt::TokenId, | ||
138 | open_relative_range: TextRange, | ||
139 | close_relative_range: TextRange, | ||
140 | ) -> usize { | ||
141 | let res = self.entries.len(); | ||
142 | self.entries | ||
143 | .push((token_id, TokenTextRange::Delimiter(open_relative_range, close_relative_range))); | ||
144 | res | ||
145 | } | ||
146 | |||
147 | fn update_close_delim(&mut self, idx: usize, close_relative_range: TextRange) { | ||
148 | let (_, token_text_range) = &mut self.entries[idx]; | ||
149 | if let TokenTextRange::Delimiter(dim, _) = token_text_range { | ||
150 | *token_text_range = TokenTextRange::Delimiter(*dim, close_relative_range); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | fn remove_delim(&mut self, idx: usize) { | ||
155 | // FIXME: This could be accidently quadratic | ||
156 | self.entries.remove(idx); | ||
157 | } | ||
158 | } | ||
159 | |||
160 | /// Returns the textual content of a doc comment block as a quoted string | ||
161 | /// That is, strips leading `///` (or `/**`, etc) | ||
162 | /// and strips the ending `*/` | ||
163 | /// And then quote the string, which is needed to convert to `tt::Literal` | ||
164 | fn doc_comment_text(comment: &ast::Comment) -> SmolStr { | ||
165 | let prefix_len = comment.prefix().len(); | ||
166 | let mut text = &comment.text()[prefix_len..]; | ||
167 | |||
168 | // Remove ending "*/" | ||
169 | if comment.kind().shape == ast::CommentShape::Block { | ||
170 | text = &text[0..text.len() - 2]; | ||
171 | } | ||
172 | |||
173 | // Quote the string | ||
174 | // Note that `tt::Literal` expect an escaped string | ||
175 | let text = format!("{:?}", text.escape_default().to_string()); | ||
176 | text.into() | ||
177 | } | ||
178 | |||
179 | fn convert_doc_comment(token: &syntax::SyntaxToken) -> Option<Vec<tt::TokenTree>> { | ||
180 | let comment = ast::Comment::cast(token.clone())?; | ||
181 | let doc = comment.kind().doc?; | ||
182 | |||
183 | // Make `doc="\" Comments\"" | ||
184 | let mut meta_tkns = Vec::new(); | ||
185 | meta_tkns.push(mk_ident("doc")); | ||
186 | meta_tkns.push(mk_punct('=')); | ||
187 | meta_tkns.push(mk_doc_literal(&comment)); | ||
188 | |||
189 | // Make `#![]` | ||
190 | let mut token_trees = Vec::new(); | ||
191 | token_trees.push(mk_punct('#')); | ||
192 | if let ast::CommentPlacement::Inner = doc { | ||
193 | token_trees.push(mk_punct('!')); | ||
194 | } | ||
195 | token_trees.push(tt::TokenTree::from(tt::Subtree { | ||
196 | delimiter: Some(tt::Delimiter { | ||
197 | kind: tt::DelimiterKind::Bracket, | ||
198 | id: tt::TokenId::unspecified(), | ||
199 | }), | ||
200 | token_trees: meta_tkns, | ||
201 | })); | ||
202 | |||
203 | return Some(token_trees); | ||
204 | |||
205 | // Helper functions | ||
206 | fn mk_ident(s: &str) -> tt::TokenTree { | ||
207 | tt::TokenTree::from(tt::Leaf::from(tt::Ident { | ||
208 | text: s.into(), | ||
209 | id: tt::TokenId::unspecified(), | ||
210 | })) | ||
211 | } | ||
212 | |||
213 | fn mk_punct(c: char) -> tt::TokenTree { | ||
214 | tt::TokenTree::from(tt::Leaf::from(tt::Punct { | ||
215 | char: c, | ||
216 | spacing: tt::Spacing::Alone, | ||
217 | id: tt::TokenId::unspecified(), | ||
218 | })) | ||
219 | } | ||
220 | |||
221 | fn mk_doc_literal(comment: &ast::Comment) -> tt::TokenTree { | ||
222 | let lit = tt::Literal { text: doc_comment_text(comment), id: tt::TokenId::unspecified() }; | ||
223 | |||
224 | tt::TokenTree::from(tt::Leaf::from(lit)) | ||
225 | } | ||
226 | } | ||
227 | |||
228 | struct TokenIdAlloc { | ||
229 | map: TokenMap, | ||
230 | global_offset: TextSize, | ||
231 | next_id: u32, | ||
232 | } | ||
233 | |||
234 | impl TokenIdAlloc { | ||
235 | fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId { | ||
236 | let relative_range = absolute_range - self.global_offset; | ||
237 | let token_id = tt::TokenId(self.next_id); | ||
238 | self.next_id += 1; | ||
239 | self.map.insert(token_id, relative_range); | ||
240 | token_id | ||
241 | } | ||
242 | |||
243 | fn open_delim(&mut self, open_abs_range: TextRange) -> (tt::TokenId, usize) { | ||
244 | let token_id = tt::TokenId(self.next_id); | ||
245 | self.next_id += 1; | ||
246 | let idx = self.map.insert_delim( | ||
247 | token_id, | ||
248 | open_abs_range - self.global_offset, | ||
249 | open_abs_range - self.global_offset, | ||
250 | ); | ||
251 | (token_id, idx) | ||
252 | } | ||
253 | |||
254 | fn close_delim(&mut self, idx: usize, close_abs_range: Option<TextRange>) { | ||
255 | match close_abs_range { | ||
256 | None => { | ||
257 | self.map.remove_delim(idx); | ||
258 | } | ||
259 | Some(close) => { | ||
260 | self.map.update_close_delim(idx, close - self.global_offset); | ||
261 | } | ||
262 | } | ||
263 | } | ||
264 | } | ||
265 | |||
266 | /// A Raw Token (straightly from lexer) convertor | ||
267 | struct RawConvertor<'a> { | ||
268 | text: &'a str, | ||
269 | offset: TextSize, | ||
270 | id_alloc: TokenIdAlloc, | ||
271 | inner: std::slice::Iter<'a, RawToken>, | ||
272 | } | ||
273 | |||
274 | trait SrcToken: std::fmt::Debug { | ||
275 | fn kind(&self) -> SyntaxKind; | ||
276 | |||
277 | fn to_char(&self) -> Option<char>; | ||
278 | |||
279 | fn to_text(&self) -> SmolStr; | ||
280 | } | ||
281 | |||
282 | trait TokenConvertor { | ||
283 | type Token: SrcToken; | ||
284 | |||
285 | fn go(&mut self) -> Option<tt::Subtree> { | ||
286 | let mut subtree = tt::Subtree::default(); | ||
287 | subtree.delimiter = None; | ||
288 | while self.peek().is_some() { | ||
289 | self.collect_leaf(&mut subtree.token_trees); | ||
290 | } | ||
291 | if subtree.token_trees.is_empty() { | ||
292 | return None; | ||
293 | } | ||
294 | if subtree.token_trees.len() == 1 { | ||
295 | if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] { | ||
296 | return Some(first.clone()); | ||
297 | } | ||
298 | } | ||
299 | Some(subtree) | ||
300 | } | ||
301 | |||
302 | fn collect_leaf(&mut self, result: &mut Vec<tt::TokenTree>) { | ||
303 | let (token, range) = match self.bump() { | ||
304 | None => return, | ||
305 | Some(it) => it, | ||
306 | }; | ||
307 | |||
308 | let k: SyntaxKind = token.kind(); | ||
309 | if k == COMMENT { | ||
310 | if let Some(tokens) = self.convert_doc_comment(&token) { | ||
311 | result.extend(tokens); | ||
312 | } | ||
313 | return; | ||
314 | } | ||
315 | |||
316 | result.push(if k.is_punct() { | ||
317 | assert_eq!(range.len(), TextSize::of('.')); | ||
318 | let delim = match k { | ||
319 | T!['('] => Some((tt::DelimiterKind::Parenthesis, T![')'])), | ||
320 | T!['{'] => Some((tt::DelimiterKind::Brace, T!['}'])), | ||
321 | T!['['] => Some((tt::DelimiterKind::Bracket, T![']'])), | ||
322 | _ => None, | ||
323 | }; | ||
324 | |||
325 | if let Some((kind, closed)) = delim { | ||
326 | let mut subtree = tt::Subtree::default(); | ||
327 | let (id, idx) = self.id_alloc().open_delim(range); | ||
328 | subtree.delimiter = Some(tt::Delimiter { kind, id }); | ||
329 | |||
330 | while self.peek().map(|it| it.kind() != closed).unwrap_or(false) { | ||
331 | self.collect_leaf(&mut subtree.token_trees); | ||
332 | } | ||
333 | let last_range = match self.bump() { | ||
334 | None => { | ||
335 | // For error resilience, we insert an char punct for the opening delim here | ||
336 | self.id_alloc().close_delim(idx, None); | ||
337 | let leaf: tt::Leaf = tt::Punct { | ||
338 | id: self.id_alloc().alloc(range), | ||
339 | char: token.to_char().unwrap(), | ||
340 | spacing: tt::Spacing::Alone, | ||
341 | } | ||
342 | .into(); | ||
343 | result.push(leaf.into()); | ||
344 | result.extend(subtree.token_trees); | ||
345 | return; | ||
346 | } | ||
347 | Some(it) => it.1, | ||
348 | }; | ||
349 | self.id_alloc().close_delim(idx, Some(last_range)); | ||
350 | subtree.into() | ||
351 | } else { | ||
352 | let spacing = match self.peek() { | ||
353 | Some(next) | ||
354 | if next.kind().is_trivia() | ||
355 | || next.kind() == T!['['] | ||
356 | || next.kind() == T!['{'] | ||
357 | || next.kind() == T!['('] => | ||
358 | { | ||
359 | tt::Spacing::Alone | ||
360 | } | ||
361 | Some(next) if next.kind().is_punct() => tt::Spacing::Joint, | ||
362 | _ => tt::Spacing::Alone, | ||
363 | }; | ||
364 | let char = match token.to_char() { | ||
365 | Some(c) => c, | ||
366 | None => { | ||
367 | panic!("Token from lexer must be single char: token = {:#?}", token); | ||
368 | } | ||
369 | }; | ||
370 | tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc().alloc(range) }).into() | ||
371 | } | ||
372 | } else { | ||
373 | macro_rules! make_leaf { | ||
374 | ($i:ident) => { | ||
375 | tt::$i { id: self.id_alloc().alloc(range), text: token.to_text() }.into() | ||
376 | }; | ||
377 | } | ||
378 | let leaf: tt::Leaf = match k { | ||
379 | T![true] | T![false] => make_leaf!(Ident), | ||
380 | IDENT => make_leaf!(Ident), | ||
381 | k if k.is_keyword() => make_leaf!(Ident), | ||
382 | k if k.is_literal() => make_leaf!(Literal), | ||
383 | LIFETIME => { | ||
384 | let char_unit = TextSize::of('\''); | ||
385 | let r = TextRange::at(range.start(), char_unit); | ||
386 | let apostrophe = tt::Leaf::from(tt::Punct { | ||
387 | char: '\'', | ||
388 | spacing: tt::Spacing::Joint, | ||
389 | id: self.id_alloc().alloc(r), | ||
390 | }); | ||
391 | result.push(apostrophe.into()); | ||
392 | |||
393 | let r = TextRange::at(range.start() + char_unit, range.len() - char_unit); | ||
394 | let ident = tt::Leaf::from(tt::Ident { | ||
395 | text: SmolStr::new(&token.to_text()[1..]), | ||
396 | id: self.id_alloc().alloc(r), | ||
397 | }); | ||
398 | result.push(ident.into()); | ||
399 | return; | ||
400 | } | ||
401 | _ => return, | ||
402 | }; | ||
403 | |||
404 | leaf.into() | ||
405 | }); | ||
406 | } | ||
407 | |||
408 | fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>; | ||
409 | |||
410 | fn bump(&mut self) -> Option<(Self::Token, TextRange)>; | ||
411 | |||
412 | fn peek(&self) -> Option<Self::Token>; | ||
413 | |||
414 | fn id_alloc(&mut self) -> &mut TokenIdAlloc; | ||
415 | } | ||
416 | |||
417 | impl<'a> SrcToken for (RawToken, &'a str) { | ||
418 | fn kind(&self) -> SyntaxKind { | ||
419 | self.0.kind | ||
420 | } | ||
421 | |||
422 | fn to_char(&self) -> Option<char> { | ||
423 | self.1.chars().next() | ||
424 | } | ||
425 | |||
426 | fn to_text(&self) -> SmolStr { | ||
427 | self.1.into() | ||
428 | } | ||
429 | } | ||
430 | |||
431 | impl RawConvertor<'_> {} | ||
432 | |||
433 | impl<'a> TokenConvertor for RawConvertor<'a> { | ||
434 | type Token = (RawToken, &'a str); | ||
435 | |||
436 | fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>> { | ||
437 | convert_doc_comment(&doc_comment(token.1)) | ||
438 | } | ||
439 | |||
440 | fn bump(&mut self) -> Option<(Self::Token, TextRange)> { | ||
441 | let token = self.inner.next()?; | ||
442 | let range = TextRange::at(self.offset, token.len); | ||
443 | self.offset += token.len; | ||
444 | |||
445 | Some(((*token, &self.text[range]), range)) | ||
446 | } | ||
447 | |||
448 | fn peek(&self) -> Option<Self::Token> { | ||
449 | let token = self.inner.as_slice().get(0).cloned(); | ||
450 | |||
451 | token.map(|it| { | ||
452 | let range = TextRange::at(self.offset, it.len); | ||
453 | (it, &self.text[range]) | ||
454 | }) | ||
455 | } | ||
456 | |||
457 | fn id_alloc(&mut self) -> &mut TokenIdAlloc { | ||
458 | &mut self.id_alloc | ||
459 | } | ||
460 | } | ||
461 | |||
462 | struct Convertor { | ||
463 | id_alloc: TokenIdAlloc, | ||
464 | current: Option<SyntaxToken>, | ||
465 | range: TextRange, | ||
466 | punct_offset: Option<(SyntaxToken, TextSize)>, | ||
467 | } | ||
468 | |||
469 | impl Convertor { | ||
470 | fn new(node: &SyntaxNode, global_offset: TextSize) -> Convertor { | ||
471 | Convertor { | ||
472 | id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } }, | ||
473 | current: node.first_token(), | ||
474 | range: node.text_range(), | ||
475 | punct_offset: None, | ||
476 | } | ||
477 | } | ||
478 | } | ||
479 | |||
480 | #[derive(Debug)] | ||
481 | enum SynToken { | ||
482 | Ordiniary(SyntaxToken), | ||
483 | Punch(SyntaxToken, TextSize), | ||
484 | } | ||
485 | |||
486 | impl SynToken { | ||
487 | fn token(&self) -> &SyntaxToken { | ||
488 | match self { | ||
489 | SynToken::Ordiniary(it) => it, | ||
490 | SynToken::Punch(it, _) => it, | ||
491 | } | ||
492 | } | ||
493 | } | ||
494 | |||
495 | impl SrcToken for SynToken { | ||
496 | fn kind(&self) -> SyntaxKind { | ||
497 | self.token().kind() | ||
498 | } | ||
499 | fn to_char(&self) -> Option<char> { | ||
500 | match self { | ||
501 | SynToken::Ordiniary(_) => None, | ||
502 | SynToken::Punch(it, i) => it.text().chars().nth((*i).into()), | ||
503 | } | ||
504 | } | ||
505 | fn to_text(&self) -> SmolStr { | ||
506 | self.token().text().clone() | ||
507 | } | ||
508 | } | ||
509 | |||
510 | impl TokenConvertor for Convertor { | ||
511 | type Token = SynToken; | ||
512 | fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>> { | ||
513 | convert_doc_comment(token.token()) | ||
514 | } | ||
515 | |||
516 | fn bump(&mut self) -> Option<(Self::Token, TextRange)> { | ||
517 | if let Some((punct, offset)) = self.punct_offset.clone() { | ||
518 | if usize::from(offset) + 1 < punct.text().len() { | ||
519 | let offset = offset + TextSize::of('.'); | ||
520 | let range = punct.text_range(); | ||
521 | self.punct_offset = Some((punct.clone(), offset)); | ||
522 | let range = TextRange::at(range.start() + offset, TextSize::of('.')); | ||
523 | return Some((SynToken::Punch(punct, offset), range)); | ||
524 | } | ||
525 | } | ||
526 | |||
527 | let curr = self.current.clone()?; | ||
528 | if !&self.range.contains_range(curr.text_range()) { | ||
529 | return None; | ||
530 | } | ||
531 | self.current = curr.next_token(); | ||
532 | |||
533 | let token = if curr.kind().is_punct() { | ||
534 | let range = curr.text_range(); | ||
535 | let range = TextRange::at(range.start(), TextSize::of('.')); | ||
536 | self.punct_offset = Some((curr.clone(), 0.into())); | ||
537 | (SynToken::Punch(curr, 0.into()), range) | ||
538 | } else { | ||
539 | self.punct_offset = None; | ||
540 | let range = curr.text_range(); | ||
541 | (SynToken::Ordiniary(curr), range) | ||
542 | }; | ||
543 | |||
544 | Some(token) | ||
545 | } | ||
546 | |||
547 | fn peek(&self) -> Option<Self::Token> { | ||
548 | if let Some((punct, mut offset)) = self.punct_offset.clone() { | ||
549 | offset = offset + TextSize::of('.'); | ||
550 | if usize::from(offset) < punct.text().len() { | ||
551 | return Some(SynToken::Punch(punct, offset)); | ||
552 | } | ||
553 | } | ||
554 | |||
555 | let curr = self.current.clone()?; | ||
556 | if !self.range.contains_range(curr.text_range()) { | ||
557 | return None; | ||
558 | } | ||
559 | |||
560 | let token = if curr.kind().is_punct() { | ||
561 | SynToken::Punch(curr, 0.into()) | ||
562 | } else { | ||
563 | SynToken::Ordiniary(curr) | ||
564 | }; | ||
565 | Some(token) | ||
566 | } | ||
567 | |||
568 | fn id_alloc(&mut self) -> &mut TokenIdAlloc { | ||
569 | &mut self.id_alloc | ||
570 | } | ||
571 | } | ||
572 | |||
573 | struct TtTreeSink<'a> { | ||
574 | buf: String, | ||
575 | cursor: Cursor<'a>, | ||
576 | open_delims: FxHashMap<tt::TokenId, TextSize>, | ||
577 | text_pos: TextSize, | ||
578 | inner: SyntaxTreeBuilder, | ||
579 | token_map: TokenMap, | ||
580 | |||
581 | // Number of roots | ||
582 | // Use for detect ill-form tree which is not single root | ||
583 | roots: smallvec::SmallVec<[usize; 1]>, | ||
584 | } | ||
585 | |||
586 | impl<'a> TtTreeSink<'a> { | ||
587 | fn new(cursor: Cursor<'a>) -> Self { | ||
588 | TtTreeSink { | ||
589 | buf: String::new(), | ||
590 | cursor, | ||
591 | open_delims: FxHashMap::default(), | ||
592 | text_pos: 0.into(), | ||
593 | inner: SyntaxTreeBuilder::default(), | ||
594 | roots: smallvec::SmallVec::new(), | ||
595 | token_map: TokenMap::default(), | ||
596 | } | ||
597 | } | ||
598 | |||
599 | fn finish(self) -> (Parse<SyntaxNode>, TokenMap) { | ||
600 | (self.inner.finish(), self.token_map) | ||
601 | } | ||
602 | } | ||
603 | |||
604 | fn delim_to_str(d: Option<tt::DelimiterKind>, closing: bool) -> SmolStr { | ||
605 | let texts = match d { | ||
606 | Some(tt::DelimiterKind::Parenthesis) => "()", | ||
607 | Some(tt::DelimiterKind::Brace) => "{}", | ||
608 | Some(tt::DelimiterKind::Bracket) => "[]", | ||
609 | None => return "".into(), | ||
610 | }; | ||
611 | |||
612 | let idx = closing as usize; | ||
613 | let text = &texts[idx..texts.len() - (1 - idx)]; | ||
614 | text.into() | ||
615 | } | ||
616 | |||
617 | impl<'a> TreeSink for TtTreeSink<'a> { | ||
618 | fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) { | ||
619 | if kind == L_DOLLAR || kind == R_DOLLAR { | ||
620 | self.cursor = self.cursor.bump_subtree(); | ||
621 | return; | ||
622 | } | ||
623 | if kind == LIFETIME { | ||
624 | n_tokens = 2; | ||
625 | } | ||
626 | |||
627 | let mut last = self.cursor; | ||
628 | for _ in 0..n_tokens { | ||
629 | if self.cursor.eof() { | ||
630 | break; | ||
631 | } | ||
632 | last = self.cursor; | ||
633 | let text: SmolStr = match self.cursor.token_tree() { | ||
634 | Some(tt::TokenTree::Leaf(leaf)) => { | ||
635 | // Mark the range if needed | ||
636 | let (text, id) = match leaf { | ||
637 | tt::Leaf::Ident(ident) => (ident.text.clone(), ident.id), | ||
638 | tt::Leaf::Punct(punct) => { | ||
639 | (SmolStr::new_inline_from_ascii(1, &[punct.char as u8]), punct.id) | ||
640 | } | ||
641 | tt::Leaf::Literal(lit) => (lit.text.clone(), lit.id), | ||
642 | }; | ||
643 | let range = TextRange::at(self.text_pos, TextSize::of(text.as_str())); | ||
644 | self.token_map.insert(id, range); | ||
645 | self.cursor = self.cursor.bump(); | ||
646 | text | ||
647 | } | ||
648 | Some(tt::TokenTree::Subtree(subtree)) => { | ||
649 | self.cursor = self.cursor.subtree().unwrap(); | ||
650 | if let Some(id) = subtree.delimiter.map(|it| it.id) { | ||
651 | self.open_delims.insert(id, self.text_pos); | ||
652 | } | ||
653 | delim_to_str(subtree.delimiter_kind(), false) | ||
654 | } | ||
655 | None => { | ||
656 | if let Some(parent) = self.cursor.end() { | ||
657 | self.cursor = self.cursor.bump(); | ||
658 | if let Some(id) = parent.delimiter.map(|it| it.id) { | ||
659 | if let Some(open_delim) = self.open_delims.get(&id) { | ||
660 | let open_range = TextRange::at(*open_delim, TextSize::of('(')); | ||
661 | let close_range = TextRange::at(self.text_pos, TextSize::of('(')); | ||
662 | self.token_map.insert_delim(id, open_range, close_range); | ||
663 | } | ||
664 | } | ||
665 | delim_to_str(parent.delimiter_kind(), true) | ||
666 | } else { | ||
667 | continue; | ||
668 | } | ||
669 | } | ||
670 | }; | ||
671 | self.buf += &text; | ||
672 | self.text_pos += TextSize::of(text.as_str()); | ||
673 | } | ||
674 | |||
675 | let text = SmolStr::new(self.buf.as_str()); | ||
676 | self.buf.clear(); | ||
677 | self.inner.token(kind, text); | ||
678 | |||
679 | // Add whitespace between adjoint puncts | ||
680 | let next = last.bump(); | ||
681 | if let ( | ||
682 | Some(tt::TokenTree::Leaf(tt::Leaf::Punct(curr))), | ||
683 | Some(tt::TokenTree::Leaf(tt::Leaf::Punct(_))), | ||
684 | ) = (last.token_tree(), next.token_tree()) | ||
685 | { | ||
686 | // Note: We always assume the semi-colon would be the last token in | ||
687 | // other parts of RA such that we don't add whitespace here. | ||
688 | if curr.spacing == tt::Spacing::Alone && curr.char != ';' { | ||
689 | self.inner.token(WHITESPACE, " ".into()); | ||
690 | self.text_pos += TextSize::of(' '); | ||
691 | } | ||
692 | } | ||
693 | } | ||
694 | |||
695 | fn start_node(&mut self, kind: SyntaxKind) { | ||
696 | self.inner.start_node(kind); | ||
697 | |||
698 | match self.roots.last_mut() { | ||
699 | None | Some(0) => self.roots.push(1), | ||
700 | Some(ref mut n) => **n += 1, | ||
701 | }; | ||
702 | } | ||
703 | |||
704 | fn finish_node(&mut self) { | ||
705 | self.inner.finish_node(); | ||
706 | *self.roots.last_mut().unwrap() -= 1; | ||
707 | } | ||
708 | |||
709 | fn error(&mut self, error: ParseError) { | ||
710 | self.inner.error(error, self.text_pos) | ||
711 | } | ||
712 | } | ||
713 | |||
714 | #[cfg(test)] | ||
715 | mod tests { | ||
716 | use super::*; | ||
717 | use crate::tests::parse_macro; | ||
718 | use parser::TokenSource; | ||
719 | use syntax::{ | ||
720 | algo::{insert_children, InsertPosition}, | ||
721 | ast::AstNode, | ||
722 | }; | ||
723 | |||
724 | #[test] | ||
725 | fn convert_tt_token_source() { | ||
726 | let expansion = parse_macro( | ||
727 | r#" | ||
728 | macro_rules! literals { | ||
729 | ($i:ident) => { | ||
730 | { | ||
731 | let a = 'c'; | ||
732 | let c = 1000; | ||
733 | let f = 12E+99_f64; | ||
734 | let s = "rust1"; | ||
735 | } | ||
736 | } | ||
737 | } | ||
738 | "#, | ||
739 | ) | ||
740 | .expand_tt("literals!(foo);"); | ||
741 | let tts = &[expansion.into()]; | ||
742 | let buffer = tt::buffer::TokenBuffer::new(tts); | ||
743 | let mut tt_src = SubtreeTokenSource::new(&buffer); | ||
744 | let mut tokens = vec![]; | ||
745 | while tt_src.current().kind != EOF { | ||
746 | tokens.push((tt_src.current().kind, tt_src.text())); | ||
747 | tt_src.bump(); | ||
748 | } | ||
749 | |||
750 | // [${] | ||
751 | // [let] [a] [=] ['c'] [;] | ||
752 | assert_eq!(tokens[2 + 3].1, "'c'"); | ||
753 | assert_eq!(tokens[2 + 3].0, CHAR); | ||
754 | // [let] [c] [=] [1000] [;] | ||
755 | assert_eq!(tokens[2 + 5 + 3].1, "1000"); | ||
756 | assert_eq!(tokens[2 + 5 + 3].0, INT_NUMBER); | ||
757 | // [let] [f] [=] [12E+99_f64] [;] | ||
758 | assert_eq!(tokens[2 + 10 + 3].1, "12E+99_f64"); | ||
759 | assert_eq!(tokens[2 + 10 + 3].0, FLOAT_NUMBER); | ||
760 | |||
761 | // [let] [s] [=] ["rust1"] [;] | ||
762 | assert_eq!(tokens[2 + 15 + 3].1, "\"rust1\""); | ||
763 | assert_eq!(tokens[2 + 15 + 3].0, STRING); | ||
764 | } | ||
765 | |||
766 | #[test] | ||
767 | fn stmts_token_trees_to_expr_is_err() { | ||
768 | let expansion = parse_macro( | ||
769 | r#" | ||
770 | macro_rules! stmts { | ||
771 | () => { | ||
772 | let a = 0; | ||
773 | let b = 0; | ||
774 | let c = 0; | ||
775 | let d = 0; | ||
776 | } | ||
777 | } | ||
778 | "#, | ||
779 | ) | ||
780 | .expand_tt("stmts!();"); | ||
781 | assert!(token_tree_to_syntax_node(&expansion, FragmentKind::Expr).is_err()); | ||
782 | } | ||
783 | |||
784 | #[test] | ||
785 | fn test_token_tree_last_child_is_white_space() { | ||
786 | let source_file = ast::SourceFile::parse("f!({} );").ok().unwrap(); | ||
787 | let macro_call = source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap(); | ||
788 | let token_tree = macro_call.token_tree().unwrap(); | ||
789 | |||
790 | // Token Tree now is : | ||
791 | // TokenTree | ||
792 | // - T!['('] | ||
793 | // - TokenTree | ||
794 | // - T!['{'] | ||
795 | // - T!['}'] | ||
796 | // - WHITE_SPACE | ||
797 | // - T![')'] | ||
798 | |||
799 | let rbrace = | ||
800 | token_tree.syntax().descendants_with_tokens().find(|it| it.kind() == T!['}']).unwrap(); | ||
801 | let space = token_tree | ||
802 | .syntax() | ||
803 | .descendants_with_tokens() | ||
804 | .find(|it| it.kind() == SyntaxKind::WHITESPACE) | ||
805 | .unwrap(); | ||
806 | |||
807 | // reorder th white space, such that the white is inside the inner token-tree. | ||
808 | let token_tree = insert_children( | ||
809 | &rbrace.parent().unwrap(), | ||
810 | InsertPosition::Last, | ||
811 | std::iter::once(space), | ||
812 | ); | ||
813 | |||
814 | // Token Tree now is : | ||
815 | // TokenTree | ||
816 | // - T!['{'] | ||
817 | // - T!['}'] | ||
818 | // - WHITE_SPACE | ||
819 | let token_tree = ast::TokenTree::cast(token_tree).unwrap(); | ||
820 | let tt = ast_to_token_tree(&token_tree).unwrap().0; | ||
821 | |||
822 | assert_eq!(tt.delimiter_kind(), Some(tt::DelimiterKind::Brace)); | ||
823 | } | ||
824 | |||
825 | #[test] | ||
826 | fn test_token_tree_multi_char_punct() { | ||
827 | let source_file = ast::SourceFile::parse("struct Foo { a: x::Y }").ok().unwrap(); | ||
828 | let struct_def = source_file.syntax().descendants().find_map(ast::Struct::cast).unwrap(); | ||
829 | let tt = ast_to_token_tree(&struct_def).unwrap().0; | ||
830 | token_tree_to_syntax_node(&tt, FragmentKind::Item).unwrap(); | ||
831 | } | ||
832 | } | ||