diff options
author | Edwin Cheng <[email protected]> | 2020-03-08 16:13:04 +0000 |
---|---|---|
committer | Edwin Cheng <[email protected]> | 2020-03-08 16:13:04 +0000 |
commit | e7206467d57c555f1ca1fee6acc0461d7579f4f7 (patch) | |
tree | 601b0b9b999595bb28d072664375681315503c43 /crates/ra_mbe | |
parent | 013e9080564aa497e6de92ae4bd1f162328b3cd8 (diff) |
Add parse_to_token_tree
Diffstat (limited to 'crates/ra_mbe')
-rw-r--r-- | crates/ra_mbe/src/lib.rs | 3 | ||||
-rw-r--r-- | crates/ra_mbe/src/syntax_bridge.rs | 221 | ||||
-rw-r--r-- | crates/ra_mbe/src/tests.rs | 12 |
3 files changed, 203 insertions, 33 deletions
diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs index 2c6ae5658..43afe24cc 100644 --- a/crates/ra_mbe/src/lib.rs +++ b/crates/ra_mbe/src/lib.rs | |||
@@ -31,7 +31,8 @@ pub enum ExpandError { | |||
31 | } | 31 | } |
32 | 32 | ||
33 | pub use crate::syntax_bridge::{ | 33 | pub use crate::syntax_bridge::{ |
34 | ast_to_token_tree, syntax_node_to_token_tree, token_tree_to_syntax_node, TokenMap, | 34 | ast_to_token_tree, parse_to_token_tree, syntax_node_to_token_tree, token_tree_to_syntax_node, |
35 | TokenMap, | ||
35 | }; | 36 | }; |
36 | 37 | ||
37 | /// This struct contains AST for a single `macro_rules` definition. What might | 38 | /// This struct contains AST for a single `macro_rules` definition. What might |
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index fb9fa5314..fcb73fbc7 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs | |||
@@ -2,8 +2,10 @@ | |||
2 | 2 | ||
3 | use ra_parser::{FragmentKind, ParseError, TreeSink}; | 3 | use ra_parser::{FragmentKind, ParseError, TreeSink}; |
4 | use ra_syntax::{ | 4 | use ra_syntax::{ |
5 | ast, AstToken, NodeOrToken, Parse, SmolStr, SyntaxKind, SyntaxKind::*, SyntaxNode, | 5 | ast::{self, make::tokens::doc_comment}, |
6 | SyntaxTreeBuilder, TextRange, TextUnit, T, | 6 | tokenize, AstToken, NodeOrToken, Parse, SmolStr, SyntaxKind, |
7 | SyntaxKind::*, | ||
8 | SyntaxNode, SyntaxTreeBuilder, TextRange, TextUnit, Token, T, | ||
7 | }; | 9 | }; |
8 | use rustc_hash::FxHashMap; | 10 | use rustc_hash::FxHashMap; |
9 | use std::iter::successors; | 11 | use std::iter::successors; |
@@ -48,9 +50,11 @@ pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> Option<(tt::Subtree, TokenM | |||
48 | /// will consume). | 50 | /// will consume). |
49 | pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> Option<(tt::Subtree, TokenMap)> { | 51 | pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> Option<(tt::Subtree, TokenMap)> { |
50 | let global_offset = node.text_range().start(); | 52 | let global_offset = node.text_range().start(); |
51 | let mut c = Convertor { map: TokenMap::default(), global_offset, next_id: 0 }; | 53 | let mut c = Convertor { |
54 | id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } }, | ||
55 | }; | ||
52 | let subtree = c.go(node)?; | 56 | let subtree = c.go(node)?; |
53 | Some((subtree, c.map)) | 57 | Some((subtree, c.id_alloc.map)) |
54 | } | 58 | } |
55 | 59 | ||
56 | // The following items are what `rustc` macro can be parsed into : | 60 | // The following items are what `rustc` macro can be parsed into : |
@@ -89,6 +93,28 @@ pub fn token_tree_to_syntax_node( | |||
89 | Ok((parse, range_map)) | 93 | Ok((parse, range_map)) |
90 | } | 94 | } |
91 | 95 | ||
96 | /// Convert a string to a `TokenTree` | ||
97 | pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> { | ||
98 | let (tokens, errors) = tokenize(text); | ||
99 | if !errors.is_empty() { | ||
100 | return None; | ||
101 | } | ||
102 | |||
103 | let mut conv = RawConvertor { | ||
104 | text, | ||
105 | offset: TextUnit::default(), | ||
106 | inner: tokens.iter(), | ||
107 | id_alloc: TokenIdAlloc { | ||
108 | map: Default::default(), | ||
109 | global_offset: TextUnit::default(), | ||
110 | next_id: 0, | ||
111 | }, | ||
112 | }; | ||
113 | |||
114 | let subtree = conv.go()?; | ||
115 | Some((subtree, conv.id_alloc.map)) | ||
116 | } | ||
117 | |||
92 | impl TokenMap { | 118 | impl TokenMap { |
93 | pub fn token_by_range(&self, relative_range: TextRange) -> Option<tt::TokenId> { | 119 | pub fn token_by_range(&self, relative_range: TextRange) -> Option<tt::TokenId> { |
94 | let &(token_id, _) = self.entries.iter().find(|(_, range)| match range { | 120 | let &(token_id, _) = self.entries.iter().find(|(_, range)| match range { |
@@ -118,6 +144,14 @@ impl TokenMap { | |||
118 | self.entries | 144 | self.entries |
119 | .push((token_id, TokenTextRange::Delimiter(open_relative_range, close_relative_range))); | 145 | .push((token_id, TokenTextRange::Delimiter(open_relative_range, close_relative_range))); |
120 | } | 146 | } |
147 | |||
148 | fn update_close_delim(&mut self, token_id: tt::TokenId, close_relative_range: TextRange) { | ||
149 | if let Some(entry) = self.entries.iter_mut().find(|(tid, _)| *tid == token_id) { | ||
150 | if let TokenTextRange::Delimiter(dim, _) = entry.1 { | ||
151 | entry.1 = TokenTextRange::Delimiter(dim, close_relative_range); | ||
152 | } | ||
153 | } | ||
154 | } | ||
121 | } | 155 | } |
122 | 156 | ||
123 | /// Returns the textual content of a doc comment block as a quoted string | 157 | /// Returns the textual content of a doc comment block as a quoted string |
@@ -188,12 +222,161 @@ fn convert_doc_comment(token: &ra_syntax::SyntaxToken) -> Option<Vec<tt::TokenTr | |||
188 | } | 222 | } |
189 | } | 223 | } |
190 | 224 | ||
191 | struct Convertor { | 225 | struct TokenIdAlloc { |
192 | map: TokenMap, | 226 | map: TokenMap, |
193 | global_offset: TextUnit, | 227 | global_offset: TextUnit, |
194 | next_id: u32, | 228 | next_id: u32, |
195 | } | 229 | } |
196 | 230 | ||
231 | impl TokenIdAlloc { | ||
232 | fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId { | ||
233 | let relative_range = absolute_range - self.global_offset; | ||
234 | let token_id = tt::TokenId(self.next_id); | ||
235 | self.next_id += 1; | ||
236 | self.map.insert(token_id, relative_range); | ||
237 | token_id | ||
238 | } | ||
239 | |||
240 | fn delim(&mut self, open_abs_range: TextRange, close_abs_range: TextRange) -> tt::TokenId { | ||
241 | let open_relative_range = open_abs_range - self.global_offset; | ||
242 | let close_relative_range = close_abs_range - self.global_offset; | ||
243 | let token_id = tt::TokenId(self.next_id); | ||
244 | self.next_id += 1; | ||
245 | |||
246 | self.map.insert_delim(token_id, open_relative_range, close_relative_range); | ||
247 | token_id | ||
248 | } | ||
249 | |||
250 | fn open_delim(&mut self, open_abs_range: TextRange) -> tt::TokenId { | ||
251 | let token_id = tt::TokenId(self.next_id); | ||
252 | self.next_id += 1; | ||
253 | self.map.insert_delim(token_id, open_abs_range, open_abs_range); | ||
254 | token_id | ||
255 | } | ||
256 | |||
257 | fn close_delim(&mut self, id: tt::TokenId, close_abs_range: TextRange) { | ||
258 | self.map.update_close_delim(id, close_abs_range); | ||
259 | } | ||
260 | } | ||
261 | |||
262 | /// A Raw Token (straightly from lexer) convertor | ||
263 | struct RawConvertor<'a> { | ||
264 | text: &'a str, | ||
265 | offset: TextUnit, | ||
266 | id_alloc: TokenIdAlloc, | ||
267 | inner: std::slice::Iter<'a, Token>, | ||
268 | } | ||
269 | |||
270 | impl RawConvertor<'_> { | ||
271 | fn go(&mut self) -> Option<tt::Subtree> { | ||
272 | let mut subtree = tt::Subtree::default(); | ||
273 | subtree.delimiter = None; | ||
274 | while self.peek().is_some() { | ||
275 | self.collect_leaf(&mut subtree.token_trees); | ||
276 | } | ||
277 | if subtree.token_trees.is_empty() { | ||
278 | return None; | ||
279 | } | ||
280 | if subtree.token_trees.len() == 1 { | ||
281 | if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] { | ||
282 | return Some(first.clone()); | ||
283 | } | ||
284 | } | ||
285 | Some(subtree) | ||
286 | } | ||
287 | |||
288 | fn bump(&mut self) -> Option<(Token, TextRange)> { | ||
289 | let token = self.inner.next()?; | ||
290 | let range = TextRange::offset_len(self.offset, token.len); | ||
291 | self.offset += token.len; | ||
292 | Some((*token, range)) | ||
293 | } | ||
294 | |||
295 | fn peek(&self) -> Option<Token> { | ||
296 | self.inner.as_slice().get(0).cloned() | ||
297 | } | ||
298 | |||
299 | fn collect_leaf(&mut self, result: &mut Vec<tt::TokenTree>) { | ||
300 | let (token, range) = match self.bump() { | ||
301 | None => return, | ||
302 | Some(it) => it, | ||
303 | }; | ||
304 | |||
305 | let k: SyntaxKind = token.kind; | ||
306 | if k == COMMENT { | ||
307 | let node = doc_comment(&self.text[range]); | ||
308 | if let Some(tokens) = convert_doc_comment(&node) { | ||
309 | result.extend(tokens); | ||
310 | } | ||
311 | return; | ||
312 | } | ||
313 | |||
314 | result.push(if k.is_punct() { | ||
315 | let delim = match k { | ||
316 | T!['('] => Some((tt::DelimiterKind::Parenthesis, T![')'])), | ||
317 | T!['{'] => Some((tt::DelimiterKind::Brace, T!['}'])), | ||
318 | T!['['] => Some((tt::DelimiterKind::Bracket, T![']'])), | ||
319 | _ => None, | ||
320 | }; | ||
321 | |||
322 | if let Some((kind, closed)) = delim { | ||
323 | let mut subtree = tt::Subtree::default(); | ||
324 | let id = self.id_alloc.open_delim(range); | ||
325 | subtree.delimiter = Some(tt::Delimiter { kind, id }); | ||
326 | |||
327 | while self.peek().map(|it| it.kind != closed).unwrap_or(false) { | ||
328 | self.collect_leaf(&mut subtree.token_trees); | ||
329 | } | ||
330 | let last_range = match self.bump() { | ||
331 | None => return, | ||
332 | Some(it) => it.1, | ||
333 | }; | ||
334 | self.id_alloc.close_delim(id, last_range); | ||
335 | subtree.into() | ||
336 | } else { | ||
337 | let spacing = match self.peek() { | ||
338 | Some(next) | ||
339 | if next.kind.is_trivia() | ||
340 | || next.kind == T!['['] | ||
341 | || next.kind == T!['{'] | ||
342 | || next.kind == T!['('] => | ||
343 | { | ||
344 | tt::Spacing::Alone | ||
345 | } | ||
346 | Some(next) if next.kind.is_punct() => tt::Spacing::Joint, | ||
347 | _ => tt::Spacing::Alone, | ||
348 | }; | ||
349 | let char = | ||
350 | self.text[range].chars().next().expect("Token from lexer must be single char"); | ||
351 | |||
352 | tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc.alloc(range) }).into() | ||
353 | } | ||
354 | } else { | ||
355 | macro_rules! make_leaf { | ||
356 | ($i:ident) => { | ||
357 | tt::$i { id: self.id_alloc.alloc(range), text: self.text[range].into() }.into() | ||
358 | }; | ||
359 | } | ||
360 | let leaf: tt::Leaf = match k { | ||
361 | T![true] | T![false] => make_leaf!(Literal), | ||
362 | IDENT | LIFETIME => make_leaf!(Ident), | ||
363 | k if k.is_keyword() => make_leaf!(Ident), | ||
364 | k if k.is_literal() => make_leaf!(Literal), | ||
365 | _ => return, | ||
366 | }; | ||
367 | |||
368 | leaf.into() | ||
369 | }); | ||
370 | } | ||
371 | } | ||
372 | |||
373 | // FIXME: There are some duplicate logic between RawConvertor and Convertor | ||
374 | // It would be nice to refactor to converting SyntaxNode to ra_parser::Token and thus | ||
375 | // use RawConvertor directly. But performance-wise it may not be a good idea ? | ||
376 | struct Convertor { | ||
377 | id_alloc: TokenIdAlloc, | ||
378 | } | ||
379 | |||
197 | impl Convertor { | 380 | impl Convertor { |
198 | fn go(&mut self, tt: &SyntaxNode) -> Option<tt::Subtree> { | 381 | fn go(&mut self, tt: &SyntaxNode) -> Option<tt::Subtree> { |
199 | // This tree is empty | 382 | // This tree is empty |
@@ -236,7 +419,7 @@ impl Convertor { | |||
236 | }; | 419 | }; |
237 | let delimiter = delimiter_kind.map(|kind| tt::Delimiter { | 420 | let delimiter = delimiter_kind.map(|kind| tt::Delimiter { |
238 | kind, | 421 | kind, |
239 | id: self.alloc_delim(first_child.text_range(), last_child.text_range()), | 422 | id: self.id_alloc.delim(first_child.text_range(), last_child.text_range()), |
240 | }); | 423 | }); |
241 | 424 | ||
242 | let mut token_trees = Vec::new(); | 425 | let mut token_trees = Vec::new(); |
@@ -273,7 +456,7 @@ impl Convertor { | |||
273 | tt::Leaf::from(tt::Punct { | 456 | tt::Leaf::from(tt::Punct { |
274 | char, | 457 | char, |
275 | spacing, | 458 | spacing, |
276 | id: self.alloc(token.text_range()), | 459 | id: self.id_alloc.alloc(token.text_range()), |
277 | }) | 460 | }) |
278 | .into(), | 461 | .into(), |
279 | ); | 462 | ); |
@@ -282,7 +465,7 @@ impl Convertor { | |||
282 | macro_rules! make_leaf { | 465 | macro_rules! make_leaf { |
283 | ($i:ident) => { | 466 | ($i:ident) => { |
284 | tt::$i { | 467 | tt::$i { |
285 | id: self.alloc(token.text_range()), | 468 | id: self.id_alloc.alloc(token.text_range()), |
286 | text: token.text().clone(), | 469 | text: token.text().clone(), |
287 | } | 470 | } |
288 | .into() | 471 | .into() |
@@ -313,28 +496,6 @@ impl Convertor { | |||
313 | let res = tt::Subtree { delimiter, token_trees }; | 496 | let res = tt::Subtree { delimiter, token_trees }; |
314 | Some(res) | 497 | Some(res) |
315 | } | 498 | } |
316 | |||
317 | fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId { | ||
318 | let relative_range = absolute_range - self.global_offset; | ||
319 | let token_id = tt::TokenId(self.next_id); | ||
320 | self.next_id += 1; | ||
321 | self.map.insert(token_id, relative_range); | ||
322 | token_id | ||
323 | } | ||
324 | |||
325 | fn alloc_delim( | ||
326 | &mut self, | ||
327 | open_abs_range: TextRange, | ||
328 | close_abs_range: TextRange, | ||
329 | ) -> tt::TokenId { | ||
330 | let open_relative_range = open_abs_range - self.global_offset; | ||
331 | let close_relative_range = close_abs_range - self.global_offset; | ||
332 | let token_id = tt::TokenId(self.next_id); | ||
333 | self.next_id += 1; | ||
334 | |||
335 | self.map.insert_delim(token_id, open_relative_range, close_relative_range); | ||
336 | token_id | ||
337 | } | ||
338 | } | 499 | } |
339 | 500 | ||
340 | struct TtTreeSink<'a> { | 501 | struct TtTreeSink<'a> { |
diff --git a/crates/ra_mbe/src/tests.rs b/crates/ra_mbe/src/tests.rs index 066ce150b..6d5d1e9e6 100644 --- a/crates/ra_mbe/src/tests.rs +++ b/crates/ra_mbe/src/tests.rs | |||
@@ -1499,12 +1499,20 @@ impl MacroFixture { | |||
1499 | } | 1499 | } |
1500 | } | 1500 | } |
1501 | 1501 | ||
1502 | pub(crate) fn parse_macro(macro_definition: &str) -> MacroFixture { | 1502 | pub(crate) fn parse_macro(ra_fixture: &str) -> MacroFixture { |
1503 | let source_file = ast::SourceFile::parse(macro_definition).ok().unwrap(); | 1503 | let source_file = ast::SourceFile::parse(ra_fixture).ok().unwrap(); |
1504 | let macro_definition = | 1504 | let macro_definition = |
1505 | source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap(); | 1505 | source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap(); |
1506 | 1506 | ||
1507 | let (definition_tt, _) = ast_to_token_tree(¯o_definition.token_tree().unwrap()).unwrap(); | 1507 | let (definition_tt, _) = ast_to_token_tree(¯o_definition.token_tree().unwrap()).unwrap(); |
1508 | |||
1509 | let parsed = parse_to_token_tree( | ||
1510 | &ra_fixture[macro_definition.token_tree().unwrap().syntax().text_range()], | ||
1511 | ) | ||
1512 | .unwrap() | ||
1513 | .0; | ||
1514 | assert_eq!(definition_tt, parsed); | ||
1515 | |||
1508 | let rules = MacroRules::parse(&definition_tt).unwrap(); | 1516 | let rules = MacroRules::parse(&definition_tt).unwrap(); |
1509 | MacroFixture { rules } | 1517 | MacroFixture { rules } |
1510 | } | 1518 | } |