aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_mbe/src/syntax_bridge.rs
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2020-03-09 08:43:07 +0000
committerGitHub <[email protected]>2020-03-09 08:43:07 +0000
commit0363c9495a6a07db276dce4c67fa35fbfc20153c (patch)
tree0a98e2b4659d0d0f4df98f41613c6da90e19551f /crates/ra_mbe/src/syntax_bridge.rs
parent7ac99aad28a36e7cdb27edcb319d7f540dbd8471 (diff)
parente7206467d57c555f1ca1fee6acc0461d7579f4f7 (diff)
Merge #3518
3518: Add parse_to_token_tree r=matklad a=edwin0cheng This PR introduce a function for parsing `&str` to `tt::TokenTree`: ```rust // Convert a string to a `TokenTree` pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> { ```` Co-authored-by: Edwin Cheng <[email protected]>
Diffstat (limited to 'crates/ra_mbe/src/syntax_bridge.rs')
-rw-r--r--crates/ra_mbe/src/syntax_bridge.rs221
1 files changed, 191 insertions, 30 deletions
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs
index fb9fa5314..fcb73fbc7 100644
--- a/crates/ra_mbe/src/syntax_bridge.rs
+++ b/crates/ra_mbe/src/syntax_bridge.rs
@@ -2,8 +2,10 @@
2 2
3use ra_parser::{FragmentKind, ParseError, TreeSink}; 3use ra_parser::{FragmentKind, ParseError, TreeSink};
4use ra_syntax::{ 4use ra_syntax::{
5 ast, AstToken, NodeOrToken, Parse, SmolStr, SyntaxKind, SyntaxKind::*, SyntaxNode, 5 ast::{self, make::tokens::doc_comment},
6 SyntaxTreeBuilder, TextRange, TextUnit, T, 6 tokenize, AstToken, NodeOrToken, Parse, SmolStr, SyntaxKind,
7 SyntaxKind::*,
8 SyntaxNode, SyntaxTreeBuilder, TextRange, TextUnit, Token, T,
7}; 9};
8use rustc_hash::FxHashMap; 10use rustc_hash::FxHashMap;
9use std::iter::successors; 11use std::iter::successors;
@@ -48,9 +50,11 @@ pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> Option<(tt::Subtree, TokenM
48/// will consume). 50/// will consume).
49pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> Option<(tt::Subtree, TokenMap)> { 51pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> Option<(tt::Subtree, TokenMap)> {
50 let global_offset = node.text_range().start(); 52 let global_offset = node.text_range().start();
51 let mut c = Convertor { map: TokenMap::default(), global_offset, next_id: 0 }; 53 let mut c = Convertor {
54 id_alloc: { TokenIdAlloc { map: TokenMap::default(), global_offset, next_id: 0 } },
55 };
52 let subtree = c.go(node)?; 56 let subtree = c.go(node)?;
53 Some((subtree, c.map)) 57 Some((subtree, c.id_alloc.map))
54} 58}
55 59
56// The following items are what `rustc` macro can be parsed into : 60// The following items are what `rustc` macro can be parsed into :
@@ -89,6 +93,28 @@ pub fn token_tree_to_syntax_node(
89 Ok((parse, range_map)) 93 Ok((parse, range_map))
90} 94}
91 95
96/// Convert a string to a `TokenTree`
97pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
98 let (tokens, errors) = tokenize(text);
99 if !errors.is_empty() {
100 return None;
101 }
102
103 let mut conv = RawConvertor {
104 text,
105 offset: TextUnit::default(),
106 inner: tokens.iter(),
107 id_alloc: TokenIdAlloc {
108 map: Default::default(),
109 global_offset: TextUnit::default(),
110 next_id: 0,
111 },
112 };
113
114 let subtree = conv.go()?;
115 Some((subtree, conv.id_alloc.map))
116}
117
92impl TokenMap { 118impl TokenMap {
93 pub fn token_by_range(&self, relative_range: TextRange) -> Option<tt::TokenId> { 119 pub fn token_by_range(&self, relative_range: TextRange) -> Option<tt::TokenId> {
94 let &(token_id, _) = self.entries.iter().find(|(_, range)| match range { 120 let &(token_id, _) = self.entries.iter().find(|(_, range)| match range {
@@ -118,6 +144,14 @@ impl TokenMap {
118 self.entries 144 self.entries
119 .push((token_id, TokenTextRange::Delimiter(open_relative_range, close_relative_range))); 145 .push((token_id, TokenTextRange::Delimiter(open_relative_range, close_relative_range)));
120 } 146 }
147
148 fn update_close_delim(&mut self, token_id: tt::TokenId, close_relative_range: TextRange) {
149 if let Some(entry) = self.entries.iter_mut().find(|(tid, _)| *tid == token_id) {
150 if let TokenTextRange::Delimiter(dim, _) = entry.1 {
151 entry.1 = TokenTextRange::Delimiter(dim, close_relative_range);
152 }
153 }
154 }
121} 155}
122 156
123/// Returns the textual content of a doc comment block as a quoted string 157/// Returns the textual content of a doc comment block as a quoted string
@@ -188,12 +222,161 @@ fn convert_doc_comment(token: &ra_syntax::SyntaxToken) -> Option<Vec<tt::TokenTr
188 } 222 }
189} 223}
190 224
191struct Convertor { 225struct TokenIdAlloc {
192 map: TokenMap, 226 map: TokenMap,
193 global_offset: TextUnit, 227 global_offset: TextUnit,
194 next_id: u32, 228 next_id: u32,
195} 229}
196 230
231impl TokenIdAlloc {
232 fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId {
233 let relative_range = absolute_range - self.global_offset;
234 let token_id = tt::TokenId(self.next_id);
235 self.next_id += 1;
236 self.map.insert(token_id, relative_range);
237 token_id
238 }
239
240 fn delim(&mut self, open_abs_range: TextRange, close_abs_range: TextRange) -> tt::TokenId {
241 let open_relative_range = open_abs_range - self.global_offset;
242 let close_relative_range = close_abs_range - self.global_offset;
243 let token_id = tt::TokenId(self.next_id);
244 self.next_id += 1;
245
246 self.map.insert_delim(token_id, open_relative_range, close_relative_range);
247 token_id
248 }
249
250 fn open_delim(&mut self, open_abs_range: TextRange) -> tt::TokenId {
251 let token_id = tt::TokenId(self.next_id);
252 self.next_id += 1;
253 self.map.insert_delim(token_id, open_abs_range, open_abs_range);
254 token_id
255 }
256
257 fn close_delim(&mut self, id: tt::TokenId, close_abs_range: TextRange) {
258 self.map.update_close_delim(id, close_abs_range);
259 }
260}
261
262/// A Raw Token (straightly from lexer) convertor
263struct RawConvertor<'a> {
264 text: &'a str,
265 offset: TextUnit,
266 id_alloc: TokenIdAlloc,
267 inner: std::slice::Iter<'a, Token>,
268}
269
270impl RawConvertor<'_> {
271 fn go(&mut self) -> Option<tt::Subtree> {
272 let mut subtree = tt::Subtree::default();
273 subtree.delimiter = None;
274 while self.peek().is_some() {
275 self.collect_leaf(&mut subtree.token_trees);
276 }
277 if subtree.token_trees.is_empty() {
278 return None;
279 }
280 if subtree.token_trees.len() == 1 {
281 if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] {
282 return Some(first.clone());
283 }
284 }
285 Some(subtree)
286 }
287
288 fn bump(&mut self) -> Option<(Token, TextRange)> {
289 let token = self.inner.next()?;
290 let range = TextRange::offset_len(self.offset, token.len);
291 self.offset += token.len;
292 Some((*token, range))
293 }
294
295 fn peek(&self) -> Option<Token> {
296 self.inner.as_slice().get(0).cloned()
297 }
298
299 fn collect_leaf(&mut self, result: &mut Vec<tt::TokenTree>) {
300 let (token, range) = match self.bump() {
301 None => return,
302 Some(it) => it,
303 };
304
305 let k: SyntaxKind = token.kind;
306 if k == COMMENT {
307 let node = doc_comment(&self.text[range]);
308 if let Some(tokens) = convert_doc_comment(&node) {
309 result.extend(tokens);
310 }
311 return;
312 }
313
314 result.push(if k.is_punct() {
315 let delim = match k {
316 T!['('] => Some((tt::DelimiterKind::Parenthesis, T![')'])),
317 T!['{'] => Some((tt::DelimiterKind::Brace, T!['}'])),
318 T!['['] => Some((tt::DelimiterKind::Bracket, T![']'])),
319 _ => None,
320 };
321
322 if let Some((kind, closed)) = delim {
323 let mut subtree = tt::Subtree::default();
324 let id = self.id_alloc.open_delim(range);
325 subtree.delimiter = Some(tt::Delimiter { kind, id });
326
327 while self.peek().map(|it| it.kind != closed).unwrap_or(false) {
328 self.collect_leaf(&mut subtree.token_trees);
329 }
330 let last_range = match self.bump() {
331 None => return,
332 Some(it) => it.1,
333 };
334 self.id_alloc.close_delim(id, last_range);
335 subtree.into()
336 } else {
337 let spacing = match self.peek() {
338 Some(next)
339 if next.kind.is_trivia()
340 || next.kind == T!['[']
341 || next.kind == T!['{']
342 || next.kind == T!['('] =>
343 {
344 tt::Spacing::Alone
345 }
346 Some(next) if next.kind.is_punct() => tt::Spacing::Joint,
347 _ => tt::Spacing::Alone,
348 };
349 let char =
350 self.text[range].chars().next().expect("Token from lexer must be single char");
351
352 tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc.alloc(range) }).into()
353 }
354 } else {
355 macro_rules! make_leaf {
356 ($i:ident) => {
357 tt::$i { id: self.id_alloc.alloc(range), text: self.text[range].into() }.into()
358 };
359 }
360 let leaf: tt::Leaf = match k {
361 T![true] | T![false] => make_leaf!(Literal),
362 IDENT | LIFETIME => make_leaf!(Ident),
363 k if k.is_keyword() => make_leaf!(Ident),
364 k if k.is_literal() => make_leaf!(Literal),
365 _ => return,
366 };
367
368 leaf.into()
369 });
370 }
371}
372
373// FIXME: There are some duplicate logic between RawConvertor and Convertor
374// It would be nice to refactor to converting SyntaxNode to ra_parser::Token and thus
375// use RawConvertor directly. But performance-wise it may not be a good idea ?
376struct Convertor {
377 id_alloc: TokenIdAlloc,
378}
379
197impl Convertor { 380impl Convertor {
198 fn go(&mut self, tt: &SyntaxNode) -> Option<tt::Subtree> { 381 fn go(&mut self, tt: &SyntaxNode) -> Option<tt::Subtree> {
199 // This tree is empty 382 // This tree is empty
@@ -236,7 +419,7 @@ impl Convertor {
236 }; 419 };
237 let delimiter = delimiter_kind.map(|kind| tt::Delimiter { 420 let delimiter = delimiter_kind.map(|kind| tt::Delimiter {
238 kind, 421 kind,
239 id: self.alloc_delim(first_child.text_range(), last_child.text_range()), 422 id: self.id_alloc.delim(first_child.text_range(), last_child.text_range()),
240 }); 423 });
241 424
242 let mut token_trees = Vec::new(); 425 let mut token_trees = Vec::new();
@@ -273,7 +456,7 @@ impl Convertor {
273 tt::Leaf::from(tt::Punct { 456 tt::Leaf::from(tt::Punct {
274 char, 457 char,
275 spacing, 458 spacing,
276 id: self.alloc(token.text_range()), 459 id: self.id_alloc.alloc(token.text_range()),
277 }) 460 })
278 .into(), 461 .into(),
279 ); 462 );
@@ -282,7 +465,7 @@ impl Convertor {
282 macro_rules! make_leaf { 465 macro_rules! make_leaf {
283 ($i:ident) => { 466 ($i:ident) => {
284 tt::$i { 467 tt::$i {
285 id: self.alloc(token.text_range()), 468 id: self.id_alloc.alloc(token.text_range()),
286 text: token.text().clone(), 469 text: token.text().clone(),
287 } 470 }
288 .into() 471 .into()
@@ -313,28 +496,6 @@ impl Convertor {
313 let res = tt::Subtree { delimiter, token_trees }; 496 let res = tt::Subtree { delimiter, token_trees };
314 Some(res) 497 Some(res)
315 } 498 }
316
317 fn alloc(&mut self, absolute_range: TextRange) -> tt::TokenId {
318 let relative_range = absolute_range - self.global_offset;
319 let token_id = tt::TokenId(self.next_id);
320 self.next_id += 1;
321 self.map.insert(token_id, relative_range);
322 token_id
323 }
324
325 fn alloc_delim(
326 &mut self,
327 open_abs_range: TextRange,
328 close_abs_range: TextRange,
329 ) -> tt::TokenId {
330 let open_relative_range = open_abs_range - self.global_offset;
331 let close_relative_range = close_abs_range - self.global_offset;
332 let token_id = tt::TokenId(self.next_id);
333 self.next_id += 1;
334
335 self.map.insert_delim(token_id, open_relative_range, close_relative_range);
336 token_id
337 }
338} 499}
339 500
340struct TtTreeSink<'a> { 501struct TtTreeSink<'a> {