diff options
Diffstat (limited to 'crates/ra_mbe/src/subtree_source.rs')
-rw-r--r-- | crates/ra_mbe/src/subtree_source.rs | 521 |
1 files changed, 521 insertions, 0 deletions
diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs new file mode 100644 index 000000000..4b37c2bda --- /dev/null +++ b/crates/ra_mbe/src/subtree_source.rs | |||
@@ -0,0 +1,521 @@ | |||
1 | use ra_parser::{TokenSource}; | ||
2 | use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*}; | ||
3 | use std::cell::{RefCell}; | ||
4 | |||
5 | #[derive(Debug, Clone, Eq, PartialEq)] | ||
6 | struct TtToken { | ||
7 | pub kind: SyntaxKind, | ||
8 | pub is_joint_to_next: bool, | ||
9 | pub text: SmolStr, | ||
10 | pub n_tokens: usize, | ||
11 | } | ||
12 | |||
13 | #[derive(Debug, Clone, Eq, PartialEq)] | ||
14 | enum WalkCursor { | ||
15 | DelimiterBegin(Option<TtToken>), | ||
16 | Token(usize, Option<TtToken>), | ||
17 | DelimiterEnd(Option<TtToken>), | ||
18 | Eof, | ||
19 | } | ||
20 | |||
21 | #[derive(Debug)] | ||
22 | struct SubTreeWalker<'a> { | ||
23 | pos: usize, | ||
24 | stack: Vec<(&'a tt::Subtree, Option<usize>)>, | ||
25 | cursor: WalkCursor, | ||
26 | last_steps: Vec<usize>, | ||
27 | subtree: &'a tt::Subtree, | ||
28 | } | ||
29 | |||
30 | impl<'a> SubTreeWalker<'a> { | ||
31 | fn new(subtree: &tt::Subtree) -> SubTreeWalker { | ||
32 | let mut res = SubTreeWalker { | ||
33 | pos: 0, | ||
34 | stack: vec![], | ||
35 | cursor: WalkCursor::Eof, | ||
36 | last_steps: vec![], | ||
37 | subtree, | ||
38 | }; | ||
39 | |||
40 | res.reset(); | ||
41 | res | ||
42 | } | ||
43 | |||
44 | fn is_eof(&self) -> bool { | ||
45 | self.cursor == WalkCursor::Eof | ||
46 | } | ||
47 | |||
48 | fn reset(&mut self) { | ||
49 | self.pos = 0; | ||
50 | self.stack = vec![(self.subtree, None)]; | ||
51 | self.cursor = WalkCursor::DelimiterBegin(convert_delim(self.subtree.delimiter, false)); | ||
52 | self.last_steps = vec![]; | ||
53 | |||
54 | while self.is_empty_delimiter() { | ||
55 | self.forward_unchecked(); | ||
56 | } | ||
57 | } | ||
58 | |||
59 | // This funciton will fast forward the cursor, | ||
60 | // Such that backward will stop at `start_pos` point | ||
61 | fn start_from_nth(&mut self, start_pos: usize) { | ||
62 | self.reset(); | ||
63 | self.pos = start_pos; | ||
64 | self.cursor = self.walk_token(start_pos, 0, false); | ||
65 | |||
66 | while self.is_empty_delimiter() { | ||
67 | self.forward_unchecked(); | ||
68 | } | ||
69 | } | ||
70 | |||
71 | fn current(&self) -> Option<&TtToken> { | ||
72 | match &self.cursor { | ||
73 | WalkCursor::DelimiterBegin(t) => t.as_ref(), | ||
74 | WalkCursor::Token(_, t) => t.as_ref(), | ||
75 | WalkCursor::DelimiterEnd(t) => t.as_ref(), | ||
76 | WalkCursor::Eof => None, | ||
77 | } | ||
78 | } | ||
79 | |||
80 | fn is_empty_delimiter(&self) -> bool { | ||
81 | match &self.cursor { | ||
82 | WalkCursor::DelimiterBegin(None) => true, | ||
83 | WalkCursor::DelimiterEnd(None) => true, | ||
84 | _ => false, | ||
85 | } | ||
86 | } | ||
87 | |||
88 | /// Move cursor backward by 1 step with empty checking | ||
89 | fn backward(&mut self) { | ||
90 | if self.last_steps.is_empty() { | ||
91 | return; | ||
92 | } | ||
93 | self.pos -= 1; | ||
94 | loop { | ||
95 | self.backward_unchecked(); | ||
96 | // Skip Empty delimiter | ||
97 | if self.last_steps.is_empty() || !self.is_empty_delimiter() { | ||
98 | break; | ||
99 | } | ||
100 | } | ||
101 | |||
102 | // Move forward if it is empty delimiter | ||
103 | if self.last_steps.is_empty() { | ||
104 | while self.is_empty_delimiter() { | ||
105 | self.forward_unchecked(); | ||
106 | } | ||
107 | } | ||
108 | } | ||
109 | |||
110 | /// Move cursor backward by 1 step without empty check | ||
111 | /// | ||
112 | /// Depends on the current state of cursor: | ||
113 | /// | ||
114 | /// * Delimiter Begin => Pop the stack, goto last walking token (`walk_token`) | ||
115 | /// * Token => Goto prev token (`walk_token`) | ||
116 | /// * Delimiter End => Goto the last child token (`walk_token`) | ||
117 | /// * Eof => push the root subtree, and set it as Delimiter End | ||
118 | fn backward_unchecked(&mut self) { | ||
119 | if self.last_steps.is_empty() { | ||
120 | return; | ||
121 | } | ||
122 | |||
123 | let last_step = self.last_steps.pop().unwrap(); | ||
124 | let do_walk_token = match self.cursor { | ||
125 | WalkCursor::DelimiterBegin(_) => None, | ||
126 | WalkCursor::Token(u, _) => Some(u), | ||
127 | WalkCursor::DelimiterEnd(_) => { | ||
128 | let (top, _) = self.stack.last().unwrap(); | ||
129 | Some(top.token_trees.len()) | ||
130 | } | ||
131 | WalkCursor::Eof => None, | ||
132 | }; | ||
133 | |||
134 | self.cursor = match do_walk_token { | ||
135 | Some(u) => self.walk_token(u, last_step, true), | ||
136 | None => match self.cursor { | ||
137 | WalkCursor::Eof => { | ||
138 | self.stack.push((self.subtree, None)); | ||
139 | WalkCursor::DelimiterEnd(convert_delim( | ||
140 | self.stack.last().unwrap().0.delimiter, | ||
141 | true, | ||
142 | )) | ||
143 | } | ||
144 | _ => { | ||
145 | let (_, last_top_cursor) = self.stack.pop().unwrap(); | ||
146 | assert!(!self.stack.is_empty()); | ||
147 | |||
148 | self.walk_token(last_top_cursor.unwrap(), last_step, true) | ||
149 | } | ||
150 | }, | ||
151 | }; | ||
152 | } | ||
153 | |||
154 | /// Move cursor forward by 1 step with empty checking | ||
155 | fn forward(&mut self) { | ||
156 | if self.is_eof() { | ||
157 | return; | ||
158 | } | ||
159 | |||
160 | self.pos += 1; | ||
161 | loop { | ||
162 | self.forward_unchecked(); | ||
163 | if !self.is_empty_delimiter() { | ||
164 | break; | ||
165 | } | ||
166 | } | ||
167 | } | ||
168 | |||
169 | /// Move cursor forward by 1 step without empty checking | ||
170 | /// | ||
171 | /// Depends on the current state of cursor: | ||
172 | /// | ||
173 | /// * Delimiter Begin => Goto the first child token (`walk_token`) | ||
174 | /// * Token => Goto next token (`walk_token`) | ||
175 | /// * Delimiter End => Pop the stack, goto last walking token (`walk_token`) | ||
176 | /// | ||
177 | fn forward_unchecked(&mut self) { | ||
178 | if self.is_eof() { | ||
179 | return; | ||
180 | } | ||
181 | |||
182 | let step = self.current().map(|x| x.n_tokens).unwrap_or(1); | ||
183 | self.last_steps.push(step); | ||
184 | |||
185 | let do_walk_token = match self.cursor { | ||
186 | WalkCursor::DelimiterBegin(_) => Some((0, 0)), | ||
187 | WalkCursor::Token(u, _) => Some((u, step)), | ||
188 | WalkCursor::DelimiterEnd(_) => None, | ||
189 | _ => unreachable!(), | ||
190 | }; | ||
191 | |||
192 | self.cursor = match do_walk_token { | ||
193 | Some((u, step)) => self.walk_token(u, step, false), | ||
194 | None => { | ||
195 | let (_, last_top_idx) = self.stack.pop().unwrap(); | ||
196 | match self.stack.last() { | ||
197 | Some(_) => self.walk_token(last_top_idx.unwrap(), 1, false), | ||
198 | None => WalkCursor::Eof, | ||
199 | } | ||
200 | } | ||
201 | }; | ||
202 | } | ||
203 | |||
204 | /// Traversal child token | ||
205 | /// Depends on the new position, it returns: | ||
206 | /// | ||
207 | /// * new position < 0 => DelimiterBegin | ||
208 | /// * new position > token_tree.len() => DelimiterEnd | ||
209 | /// * if new position is a subtree, depends on traversal direction: | ||
210 | /// ** backward => DelimiterEnd | ||
211 | /// ** forward => DelimiterBegin | ||
212 | /// * if new psoition is a leaf, return walk_leaf() | ||
213 | fn walk_token(&mut self, pos: usize, offset: usize, backward: bool) -> WalkCursor { | ||
214 | let (top, _) = self.stack.last().unwrap(); | ||
215 | |||
216 | if backward && pos < offset { | ||
217 | return WalkCursor::DelimiterBegin(convert_delim( | ||
218 | self.stack.last().unwrap().0.delimiter, | ||
219 | false, | ||
220 | )); | ||
221 | } | ||
222 | |||
223 | if !backward && pos + offset >= top.token_trees.len() { | ||
224 | return WalkCursor::DelimiterEnd(convert_delim( | ||
225 | self.stack.last().unwrap().0.delimiter, | ||
226 | true, | ||
227 | )); | ||
228 | } | ||
229 | |||
230 | let pos = if backward { pos - offset } else { pos + offset }; | ||
231 | |||
232 | match &top.token_trees[pos] { | ||
233 | tt::TokenTree::Subtree(subtree) => { | ||
234 | self.stack.push((subtree, Some(pos))); | ||
235 | let delim = convert_delim(self.stack.last().unwrap().0.delimiter, backward); | ||
236 | if backward { | ||
237 | WalkCursor::DelimiterEnd(delim) | ||
238 | } else { | ||
239 | WalkCursor::DelimiterBegin(delim) | ||
240 | } | ||
241 | } | ||
242 | tt::TokenTree::Leaf(leaf) => WalkCursor::Token(pos, Some(self.walk_leaf(leaf, pos))), | ||
243 | } | ||
244 | } | ||
245 | |||
246 | fn walk_leaf(&mut self, leaf: &tt::Leaf, pos: usize) -> TtToken { | ||
247 | match leaf { | ||
248 | tt::Leaf::Literal(l) => convert_literal(l), | ||
249 | tt::Leaf::Ident(ident) => convert_ident(ident), | ||
250 | tt::Leaf::Punct(punct) => { | ||
251 | let (top, _) = self.stack.last().unwrap(); | ||
252 | convert_punct(punct, top, pos) | ||
253 | } | ||
254 | } | ||
255 | } | ||
256 | } | ||
257 | |||
258 | pub(crate) trait Querier { | ||
259 | fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr); | ||
260 | } | ||
261 | |||
262 | // A wrapper class for ref cell | ||
263 | #[derive(Debug)] | ||
264 | pub(crate) struct WalkerOwner<'a> { | ||
265 | walker: RefCell<SubTreeWalker<'a>>, | ||
266 | offset: usize, | ||
267 | } | ||
268 | |||
269 | impl<'a> WalkerOwner<'a> { | ||
270 | fn new(subtree: &'a tt::Subtree) -> Self { | ||
271 | WalkerOwner { walker: RefCell::new(SubTreeWalker::new(subtree)), offset: 0 } | ||
272 | } | ||
273 | |||
274 | fn get<'b>(&self, pos: usize) -> Option<TtToken> { | ||
275 | self.set_walker_pos(pos); | ||
276 | let walker = self.walker.borrow(); | ||
277 | walker.current().cloned() | ||
278 | } | ||
279 | |||
280 | fn start_from_nth(&mut self, pos: usize) { | ||
281 | self.offset = pos; | ||
282 | self.walker.borrow_mut().start_from_nth(pos); | ||
283 | } | ||
284 | |||
285 | fn set_walker_pos(&self, mut pos: usize) { | ||
286 | pos += self.offset; | ||
287 | let mut walker = self.walker.borrow_mut(); | ||
288 | while pos > walker.pos && !walker.is_eof() { | ||
289 | walker.forward(); | ||
290 | } | ||
291 | while pos < walker.pos { | ||
292 | walker.backward(); | ||
293 | } | ||
294 | } | ||
295 | |||
296 | fn collect_token_trees(&mut self, n: usize) -> Vec<&tt::TokenTree> { | ||
297 | self.start_from_nth(self.offset); | ||
298 | |||
299 | let mut res = vec![]; | ||
300 | let mut walker = self.walker.borrow_mut(); | ||
301 | |||
302 | while walker.pos - self.offset < n { | ||
303 | if let WalkCursor::Token(u, tt) = &walker.cursor { | ||
304 | if walker.stack.len() == 1 { | ||
305 | // We only collect the topmost child | ||
306 | res.push(&walker.stack[0].0.token_trees[*u]); | ||
307 | if let Some(tt) = tt { | ||
308 | for i in 0..tt.n_tokens - 1 { | ||
309 | res.push(&walker.stack[0].0.token_trees[u + i]); | ||
310 | } | ||
311 | } | ||
312 | } | ||
313 | } | ||
314 | |||
315 | walker.forward(); | ||
316 | } | ||
317 | |||
318 | res | ||
319 | } | ||
320 | } | ||
321 | |||
322 | impl<'a> Querier for WalkerOwner<'a> { | ||
323 | fn token(&self, uidx: usize) -> (SyntaxKind, SmolStr) { | ||
324 | let tkn = self.get(uidx).unwrap(); | ||
325 | (tkn.kind, tkn.text) | ||
326 | } | ||
327 | } | ||
328 | |||
329 | pub(crate) struct SubtreeTokenSource<'a> { | ||
330 | walker: WalkerOwner<'a>, | ||
331 | } | ||
332 | |||
333 | impl<'a> SubtreeTokenSource<'a> { | ||
334 | pub fn new(subtree: &tt::Subtree) -> SubtreeTokenSource { | ||
335 | SubtreeTokenSource { walker: WalkerOwner::new(subtree) } | ||
336 | } | ||
337 | |||
338 | pub fn start_from_nth(&mut self, n: usize) { | ||
339 | self.walker.start_from_nth(n); | ||
340 | } | ||
341 | |||
342 | pub fn querier<'b>(&'a self) -> &'b WalkerOwner<'a> | ||
343 | where | ||
344 | 'a: 'b, | ||
345 | { | ||
346 | &self.walker | ||
347 | } | ||
348 | |||
349 | pub(crate) fn bump_n(&mut self, parsed_tokens: usize) -> Vec<&tt::TokenTree> { | ||
350 | let res = self.walker.collect_token_trees(parsed_tokens); | ||
351 | res | ||
352 | } | ||
353 | } | ||
354 | |||
355 | impl<'a> TokenSource for SubtreeTokenSource<'a> { | ||
356 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
357 | if let Some(tok) = self.walker.get(pos) { | ||
358 | tok.kind | ||
359 | } else { | ||
360 | SyntaxKind::EOF | ||
361 | } | ||
362 | } | ||
363 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
364 | self.walker.get(pos).unwrap().is_joint_to_next | ||
365 | } | ||
366 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
367 | self.walker.get(pos).unwrap().text == *kw | ||
368 | } | ||
369 | } | ||
370 | |||
371 | struct TokenPeek<'a, I> | ||
372 | where | ||
373 | I: Iterator<Item = &'a tt::TokenTree>, | ||
374 | { | ||
375 | iter: itertools::MultiPeek<I>, | ||
376 | } | ||
377 | |||
378 | // helper function | ||
379 | fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { | ||
380 | if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt { | ||
381 | return Some(pp); | ||
382 | } | ||
383 | None | ||
384 | } | ||
385 | |||
386 | impl<'a, I> TokenPeek<'a, I> | ||
387 | where | ||
388 | I: Iterator<Item = &'a tt::TokenTree>, | ||
389 | { | ||
390 | pub fn new(iter: I) -> Self { | ||
391 | TokenPeek { iter: itertools::multipeek(iter) } | ||
392 | } | ||
393 | |||
394 | fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { | ||
395 | if p.spacing != tt::Spacing::Joint { | ||
396 | return None; | ||
397 | } | ||
398 | |||
399 | self.iter.reset_peek(); | ||
400 | let p1 = to_punct(self.iter.peek()?)?; | ||
401 | Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint)) | ||
402 | } | ||
403 | |||
404 | fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> { | ||
405 | self.current_punct2(p).and_then(|((p0, p1), last_joint)| { | ||
406 | if !last_joint { | ||
407 | None | ||
408 | } else { | ||
409 | let p2 = to_punct(*self.iter.peek()?)?; | ||
410 | Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint)) | ||
411 | } | ||
412 | }) | ||
413 | } | ||
414 | } | ||
415 | |||
416 | fn convert_multi_char_punct<'b, I>( | ||
417 | p: &tt::Punct, | ||
418 | iter: &mut TokenPeek<'b, I>, | ||
419 | ) -> Option<(SyntaxKind, bool, &'static str, usize)> | ||
420 | where | ||
421 | I: Iterator<Item = &'b tt::TokenTree>, | ||
422 | { | ||
423 | if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { | ||
424 | if let Some((kind, text)) = match m { | ||
425 | ('<', '<', '=') => Some((SHLEQ, "<<=")), | ||
426 | ('>', '>', '=') => Some((SHREQ, ">>=")), | ||
427 | ('.', '.', '.') => Some((DOTDOTDOT, "...")), | ||
428 | ('.', '.', '=') => Some((DOTDOTEQ, "..=")), | ||
429 | _ => None, | ||
430 | } { | ||
431 | return Some((kind, is_joint_to_next, text, 3)); | ||
432 | } | ||
433 | } | ||
434 | |||
435 | if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { | ||
436 | if let Some((kind, text)) = match m { | ||
437 | ('<', '<') => Some((SHL, "<<")), | ||
438 | ('>', '>') => Some((SHR, ">>")), | ||
439 | |||
440 | ('|', '|') => Some((PIPEPIPE, "||")), | ||
441 | ('&', '&') => Some((AMPAMP, "&&")), | ||
442 | ('%', '=') => Some((PERCENTEQ, "%=")), | ||
443 | ('*', '=') => Some((STAREQ, "*=")), | ||
444 | ('/', '=') => Some((SLASHEQ, "/=")), | ||
445 | ('^', '=') => Some((CARETEQ, "^=")), | ||
446 | |||
447 | ('&', '=') => Some((AMPEQ, "&=")), | ||
448 | ('|', '=') => Some((PIPEEQ, "|=")), | ||
449 | ('-', '=') => Some((MINUSEQ, "-=")), | ||
450 | ('+', '=') => Some((PLUSEQ, "+=")), | ||
451 | ('>', '=') => Some((GTEQ, ">=")), | ||
452 | ('<', '=') => Some((LTEQ, "<=")), | ||
453 | |||
454 | ('-', '>') => Some((THIN_ARROW, "->")), | ||
455 | ('!', '=') => Some((NEQ, "!=")), | ||
456 | ('=', '>') => Some((FAT_ARROW, "=>")), | ||
457 | ('=', '=') => Some((EQEQ, "==")), | ||
458 | ('.', '.') => Some((DOTDOT, "..")), | ||
459 | (':', ':') => Some((COLONCOLON, "::")), | ||
460 | |||
461 | _ => None, | ||
462 | } { | ||
463 | return Some((kind, is_joint_to_next, text, 2)); | ||
464 | } | ||
465 | } | ||
466 | |||
467 | None | ||
468 | } | ||
469 | |||
470 | fn convert_delim(d: tt::Delimiter, closing: bool) -> Option<TtToken> { | ||
471 | let (kinds, texts) = match d { | ||
472 | tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), | ||
473 | tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), | ||
474 | tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), | ||
475 | tt::Delimiter::None => return None, | ||
476 | }; | ||
477 | |||
478 | let idx = closing as usize; | ||
479 | let kind = kinds[idx]; | ||
480 | let text = &texts[idx..texts.len() - (1 - idx)]; | ||
481 | Some(TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }) | ||
482 | } | ||
483 | |||
484 | fn convert_literal(l: &tt::Literal) -> TtToken { | ||
485 | TtToken { | ||
486 | kind: classify_literal(&l.text).unwrap().kind, | ||
487 | is_joint_to_next: false, | ||
488 | text: l.text.clone(), | ||
489 | n_tokens: 1, | ||
490 | } | ||
491 | } | ||
492 | |||
493 | fn convert_ident(ident: &tt::Ident) -> TtToken { | ||
494 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); | ||
495 | TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } | ||
496 | } | ||
497 | |||
498 | fn convert_punct(p: &tt::Punct, parent: &tt::Subtree, next: usize) -> TtToken { | ||
499 | let iter = parent.token_trees[next + 1..].iter(); | ||
500 | let mut peek = TokenPeek::new(iter); | ||
501 | |||
502 | if let Some((kind, is_joint_to_next, text, size)) = convert_multi_char_punct(p, &mut peek) { | ||
503 | TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } | ||
504 | } else { | ||
505 | let kind = match p.char { | ||
506 | // lexer may produce combpund tokens for these ones | ||
507 | '.' => DOT, | ||
508 | ':' => COLON, | ||
509 | '=' => EQ, | ||
510 | '!' => EXCL, | ||
511 | '-' => MINUS, | ||
512 | c => SyntaxKind::from_char(c).unwrap(), | ||
513 | }; | ||
514 | let text = { | ||
515 | let mut buf = [0u8; 4]; | ||
516 | let s: &str = p.char.encode_utf8(&mut buf); | ||
517 | SmolStr::new(s) | ||
518 | }; | ||
519 | TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text, n_tokens: 1 } | ||
520 | } | ||
521 | } | ||