diff options
Diffstat (limited to 'crates/ra_mbe/src/subtree_source.rs')
-rw-r--r-- | crates/ra_mbe/src/subtree_source.rs | 352 |
1 files changed, 352 insertions, 0 deletions
diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs new file mode 100644 index 000000000..8f5ce4ed5 --- /dev/null +++ b/crates/ra_mbe/src/subtree_source.rs | |||
@@ -0,0 +1,352 @@ | |||
1 | use ra_parser::{TokenSource}; | ||
2 | use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*}; | ||
3 | |||
4 | #[derive(Debug)] | ||
5 | struct TtToken { | ||
6 | pub kind: SyntaxKind, | ||
7 | pub is_joint_to_next: bool, | ||
8 | pub text: SmolStr, | ||
9 | pub n_tokens: usize, | ||
10 | } | ||
11 | |||
12 | /// SubtreeSourceQuerier let outside to query internal tokens as string | ||
13 | pub(crate) struct SubtreeSourceQuerier<'a> { | ||
14 | src: &'a SubtreeTokenSource<'a>, | ||
15 | } | ||
16 | |||
17 | impl<'a> SubtreeSourceQuerier<'a> { | ||
18 | pub(crate) fn token(&self, uidx: usize) -> (SyntaxKind, &SmolStr) { | ||
19 | let tkn = &self.src.tokens[uidx]; | ||
20 | (tkn.kind, &tkn.text) | ||
21 | } | ||
22 | } | ||
23 | |||
24 | pub(crate) struct SubtreeTokenSource<'a> { | ||
25 | tt_pos: usize, | ||
26 | tokens: Vec<TtToken>, | ||
27 | subtree: &'a tt::Subtree, | ||
28 | } | ||
29 | |||
30 | impl<'a> SubtreeTokenSource<'a> { | ||
31 | pub fn new(subtree: &tt::Subtree) -> SubtreeTokenSource { | ||
32 | SubtreeTokenSource { tokens: TtTokenBuilder::build(subtree), tt_pos: 0, subtree } | ||
33 | } | ||
34 | |||
35 | pub fn advance(&mut self, curr: usize, skip_first_delimiter: bool) { | ||
36 | if skip_first_delimiter { | ||
37 | self.tt_pos += 1; | ||
38 | } | ||
39 | |||
40 | // Matching `TtToken` cursor to `tt::TokenTree` cursor | ||
41 | // It is because TtToken is not One to One mapping to tt::Token | ||
42 | // There are 3 case (`TtToken` <=> `tt::TokenTree`) : | ||
43 | // * One to One => ident, single char punch | ||
44 | // * Many to One => `tt::TokenTree::SubTree` | ||
45 | // * One to Many => multibyte punct | ||
46 | // | ||
47 | // Such that we cannot simpliy advance the cursor | ||
48 | // We have to bump it one by one | ||
49 | let mut pos = 0; | ||
50 | while pos < curr { | ||
51 | pos += self.bump(&self.subtree.token_trees[pos]); | ||
52 | } | ||
53 | } | ||
54 | |||
55 | pub fn querier(&self) -> SubtreeSourceQuerier { | ||
56 | SubtreeSourceQuerier { src: self } | ||
57 | } | ||
58 | |||
59 | fn count(&self, tt: &tt::TokenTree) -> usize { | ||
60 | assert!(!self.tokens.is_empty()); | ||
61 | TtTokenBuilder::count_tt_tokens(tt, None) | ||
62 | } | ||
63 | |||
64 | pub(crate) fn bump(&mut self, tt: &tt::TokenTree) -> usize { | ||
65 | let cur = &self.tokens[self.tt_pos]; | ||
66 | let n_tokens = cur.n_tokens; | ||
67 | self.tt_pos += self.count(tt); | ||
68 | n_tokens | ||
69 | } | ||
70 | |||
71 | pub(crate) fn bump_n( | ||
72 | &mut self, | ||
73 | n_tokens: usize, | ||
74 | mut token_pos: usize, | ||
75 | ) -> (usize, Vec<&tt::TokenTree>) { | ||
76 | let mut res = vec![]; | ||
77 | // Matching `TtToken` cursor to `tt::TokenTree` cursor | ||
78 | // It is because TtToken is not One to One mapping to tt::Token | ||
79 | // There are 3 case (`TtToken` <=> `tt::TokenTree`) : | ||
80 | // * One to One => ident, single char punch | ||
81 | // * Many to One => `tt::TokenTree::SubTree` | ||
82 | // * One to Many => multibyte punct | ||
83 | // | ||
84 | // Such that we cannot simpliy advance the cursor | ||
85 | // We have to bump it one by one | ||
86 | let next_pos = self.tt_pos + n_tokens; | ||
87 | let old_token_pos = token_pos; | ||
88 | |||
89 | while self.tt_pos < next_pos { | ||
90 | let current = &self.subtree.token_trees[token_pos]; | ||
91 | let n = self.bump(current); | ||
92 | res.extend((0..n).map(|i| &self.subtree.token_trees[token_pos + i])); | ||
93 | token_pos += n; | ||
94 | } | ||
95 | |||
96 | (token_pos - old_token_pos, res) | ||
97 | } | ||
98 | } | ||
99 | |||
100 | impl<'a> TokenSource for SubtreeTokenSource<'a> { | ||
101 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
102 | if let Some(tok) = self.tokens.get(self.tt_pos + pos) { | ||
103 | tok.kind | ||
104 | } else { | ||
105 | SyntaxKind::EOF | ||
106 | } | ||
107 | } | ||
108 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
109 | self.tokens[self.tt_pos + pos].is_joint_to_next | ||
110 | } | ||
111 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
112 | self.tokens[self.tt_pos + pos].text == *kw | ||
113 | } | ||
114 | } | ||
115 | |||
116 | struct TokenPeek<'a, I> | ||
117 | where | ||
118 | I: Iterator<Item = &'a tt::TokenTree>, | ||
119 | { | ||
120 | iter: itertools::MultiPeek<I>, | ||
121 | } | ||
122 | |||
123 | // helper function | ||
124 | fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { | ||
125 | if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt { | ||
126 | return Some(pp); | ||
127 | } | ||
128 | None | ||
129 | } | ||
130 | |||
131 | impl<'a, I> TokenPeek<'a, I> | ||
132 | where | ||
133 | I: Iterator<Item = &'a tt::TokenTree>, | ||
134 | { | ||
135 | pub fn new(iter: I) -> Self { | ||
136 | TokenPeek { iter: itertools::multipeek(iter) } | ||
137 | } | ||
138 | |||
139 | pub fn next(&mut self) -> Option<&tt::TokenTree> { | ||
140 | self.iter.next() | ||
141 | } | ||
142 | |||
143 | fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { | ||
144 | if p.spacing != tt::Spacing::Joint { | ||
145 | return None; | ||
146 | } | ||
147 | |||
148 | self.iter.reset_peek(); | ||
149 | let p1 = to_punct(self.iter.peek()?)?; | ||
150 | Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint)) | ||
151 | } | ||
152 | |||
153 | fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> { | ||
154 | self.current_punct2(p).and_then(|((p0, p1), last_joint)| { | ||
155 | if !last_joint { | ||
156 | None | ||
157 | } else { | ||
158 | let p2 = to_punct(*self.iter.peek()?)?; | ||
159 | Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint)) | ||
160 | } | ||
161 | }) | ||
162 | } | ||
163 | } | ||
164 | |||
165 | struct TtTokenBuilder { | ||
166 | tokens: Vec<TtToken>, | ||
167 | } | ||
168 | |||
169 | impl TtTokenBuilder { | ||
170 | fn build(sub: &tt::Subtree) -> Vec<TtToken> { | ||
171 | let mut res = TtTokenBuilder { tokens: vec![] }; | ||
172 | res.convert_subtree(sub); | ||
173 | res.tokens | ||
174 | } | ||
175 | |||
176 | fn convert_subtree(&mut self, sub: &tt::Subtree) { | ||
177 | self.push_delim(sub.delimiter, false); | ||
178 | let mut peek = TokenPeek::new(sub.token_trees.iter()); | ||
179 | while let Some(tt) = peek.iter.next() { | ||
180 | self.convert_tt(tt, &mut peek); | ||
181 | } | ||
182 | self.push_delim(sub.delimiter, true) | ||
183 | } | ||
184 | |||
185 | fn convert_tt<'b, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'b, I>) | ||
186 | where | ||
187 | I: Iterator<Item = &'b tt::TokenTree>, | ||
188 | { | ||
189 | match tt { | ||
190 | tt::TokenTree::Leaf(token) => self.convert_token(token, iter), | ||
191 | tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), | ||
192 | } | ||
193 | } | ||
194 | |||
195 | fn convert_token<'b, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'b, I>) | ||
196 | where | ||
197 | I: Iterator<Item = &'b tt::TokenTree>, | ||
198 | { | ||
199 | let tok = match token { | ||
200 | tt::Leaf::Literal(l) => TtToken { | ||
201 | kind: classify_literal(&l.text).unwrap().kind, | ||
202 | is_joint_to_next: false, | ||
203 | text: l.text.clone(), | ||
204 | n_tokens: 1, | ||
205 | }, | ||
206 | tt::Leaf::Punct(p) => { | ||
207 | if let Some((kind, is_joint_to_next, text, size)) = | ||
208 | Self::convert_multi_char_punct(p, iter) | ||
209 | { | ||
210 | for _ in 0..size - 1 { | ||
211 | iter.next(); | ||
212 | } | ||
213 | |||
214 | TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } | ||
215 | } else { | ||
216 | let kind = match p.char { | ||
217 | // lexer may produce combpund tokens for these ones | ||
218 | '.' => DOT, | ||
219 | ':' => COLON, | ||
220 | '=' => EQ, | ||
221 | '!' => EXCL, | ||
222 | '-' => MINUS, | ||
223 | c => SyntaxKind::from_char(c).unwrap(), | ||
224 | }; | ||
225 | let text = { | ||
226 | let mut buf = [0u8; 4]; | ||
227 | let s: &str = p.char.encode_utf8(&mut buf); | ||
228 | SmolStr::new(s) | ||
229 | }; | ||
230 | TtToken { | ||
231 | kind, | ||
232 | is_joint_to_next: p.spacing == tt::Spacing::Joint, | ||
233 | text, | ||
234 | n_tokens: 1, | ||
235 | } | ||
236 | } | ||
237 | } | ||
238 | tt::Leaf::Ident(ident) => { | ||
239 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); | ||
240 | TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } | ||
241 | } | ||
242 | }; | ||
243 | self.tokens.push(tok) | ||
244 | } | ||
245 | |||
246 | fn convert_multi_char_punct<'b, I>( | ||
247 | p: &tt::Punct, | ||
248 | iter: &mut TokenPeek<'b, I>, | ||
249 | ) -> Option<(SyntaxKind, bool, &'static str, usize)> | ||
250 | where | ||
251 | I: Iterator<Item = &'b tt::TokenTree>, | ||
252 | { | ||
253 | if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { | ||
254 | if let Some((kind, text)) = match m { | ||
255 | ('<', '<', '=') => Some((SHLEQ, "<<=")), | ||
256 | ('>', '>', '=') => Some((SHREQ, ">>=")), | ||
257 | ('.', '.', '.') => Some((DOTDOTDOT, "...")), | ||
258 | ('.', '.', '=') => Some((DOTDOTEQ, "..=")), | ||
259 | _ => None, | ||
260 | } { | ||
261 | return Some((kind, is_joint_to_next, text, 3)); | ||
262 | } | ||
263 | } | ||
264 | |||
265 | if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { | ||
266 | if let Some((kind, text)) = match m { | ||
267 | ('<', '<') => Some((SHL, "<<")), | ||
268 | ('>', '>') => Some((SHR, ">>")), | ||
269 | |||
270 | ('|', '|') => Some((PIPEPIPE, "||")), | ||
271 | ('&', '&') => Some((AMPAMP, "&&")), | ||
272 | ('%', '=') => Some((PERCENTEQ, "%=")), | ||
273 | ('*', '=') => Some((STAREQ, "*=")), | ||
274 | ('/', '=') => Some((SLASHEQ, "/=")), | ||
275 | ('^', '=') => Some((CARETEQ, "^=")), | ||
276 | |||
277 | ('&', '=') => Some((AMPEQ, "&=")), | ||
278 | ('|', '=') => Some((PIPEEQ, "|=")), | ||
279 | ('-', '=') => Some((MINUSEQ, "-=")), | ||
280 | ('+', '=') => Some((PLUSEQ, "+=")), | ||
281 | ('>', '=') => Some((GTEQ, ">=")), | ||
282 | ('<', '=') => Some((LTEQ, "<=")), | ||
283 | |||
284 | ('-', '>') => Some((THIN_ARROW, "->")), | ||
285 | ('!', '=') => Some((NEQ, "!=")), | ||
286 | ('=', '>') => Some((FAT_ARROW, "=>")), | ||
287 | ('=', '=') => Some((EQEQ, "==")), | ||
288 | ('.', '.') => Some((DOTDOT, "..")), | ||
289 | (':', ':') => Some((COLONCOLON, "::")), | ||
290 | |||
291 | _ => None, | ||
292 | } { | ||
293 | return Some((kind, is_joint_to_next, text, 2)); | ||
294 | } | ||
295 | } | ||
296 | |||
297 | None | ||
298 | } | ||
299 | |||
300 | fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { | ||
301 | let (kinds, texts) = match d { | ||
302 | tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), | ||
303 | tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), | ||
304 | tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), | ||
305 | tt::Delimiter::None => return, | ||
306 | }; | ||
307 | let idx = closing as usize; | ||
308 | let kind = kinds[idx]; | ||
309 | let text = &texts[idx..texts.len() - (1 - idx)]; | ||
310 | let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; | ||
311 | self.tokens.push(tok) | ||
312 | } | ||
313 | |||
314 | fn skip_sibling_leaf(leaf: &tt::Leaf, iter: &mut std::slice::Iter<tt::TokenTree>) { | ||
315 | if let tt::Leaf::Punct(p) = leaf { | ||
316 | let mut peek = TokenPeek::new(iter); | ||
317 | if let Some((_, _, _, size)) = TtTokenBuilder::convert_multi_char_punct(p, &mut peek) { | ||
318 | for _ in 0..size - 1 { | ||
319 | peek.next(); | ||
320 | } | ||
321 | } | ||
322 | } | ||
323 | } | ||
324 | |||
325 | fn count_tt_tokens( | ||
326 | tt: &tt::TokenTree, | ||
327 | iter: Option<&mut std::slice::Iter<tt::TokenTree>>, | ||
328 | ) -> usize { | ||
329 | match tt { | ||
330 | tt::TokenTree::Subtree(sub_tree) => { | ||
331 | let mut iter = sub_tree.token_trees.iter(); | ||
332 | let mut count = match sub_tree.delimiter { | ||
333 | tt::Delimiter::None => 0, | ||
334 | _ => 2, | ||
335 | }; | ||
336 | |||
337 | while let Some(tt) = iter.next() { | ||
338 | count += Self::count_tt_tokens(&tt, Some(&mut iter)); | ||
339 | } | ||
340 | count | ||
341 | } | ||
342 | |||
343 | tt::TokenTree::Leaf(leaf) => { | ||
344 | iter.map(|iter| { | ||
345 | Self::skip_sibling_leaf(leaf, iter); | ||
346 | }); | ||
347 | |||
348 | 1 | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | } | ||