diff options
Diffstat (limited to 'crates/ra_mbe')
-rw-r--r-- | crates/ra_mbe/src/lib.rs | 4 | ||||
-rw-r--r-- | crates/ra_mbe/src/subtree_source.rs | 352 | ||||
-rw-r--r-- | crates/ra_mbe/src/syntax_bridge.rs | 262 | ||||
-rw-r--r-- | crates/ra_mbe/src/tt_cursor.rs | 132 |
4 files changed, 386 insertions, 364 deletions
diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs index a5b7fab52..38d3ec7e1 100644 --- a/crates/ra_mbe/src/lib.rs +++ b/crates/ra_mbe/src/lib.rs | |||
@@ -15,10 +15,12 @@ macro_rules! impl_froms { | |||
15 | } | 15 | } |
16 | } | 16 | } |
17 | 17 | ||
18 | mod tt_cursor; | 18 | // mod tt_cursor; |
19 | mod mbe_parser; | 19 | mod mbe_parser; |
20 | mod mbe_expander; | 20 | mod mbe_expander; |
21 | mod syntax_bridge; | 21 | mod syntax_bridge; |
22 | mod tt_cursor; | ||
23 | mod subtree_source; | ||
22 | 24 | ||
23 | use ra_syntax::SmolStr; | 25 | use ra_syntax::SmolStr; |
24 | 26 | ||
diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs new file mode 100644 index 000000000..8f5ce4ed5 --- /dev/null +++ b/crates/ra_mbe/src/subtree_source.rs | |||
@@ -0,0 +1,352 @@ | |||
1 | use ra_parser::{TokenSource}; | ||
2 | use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*}; | ||
3 | |||
4 | #[derive(Debug)] | ||
5 | struct TtToken { | ||
6 | pub kind: SyntaxKind, | ||
7 | pub is_joint_to_next: bool, | ||
8 | pub text: SmolStr, | ||
9 | pub n_tokens: usize, | ||
10 | } | ||
11 | |||
12 | /// SubtreeSourceQuerier let outside to query internal tokens as string | ||
13 | pub(crate) struct SubtreeSourceQuerier<'a> { | ||
14 | src: &'a SubtreeTokenSource<'a>, | ||
15 | } | ||
16 | |||
17 | impl<'a> SubtreeSourceQuerier<'a> { | ||
18 | pub(crate) fn token(&self, uidx: usize) -> (SyntaxKind, &SmolStr) { | ||
19 | let tkn = &self.src.tokens[uidx]; | ||
20 | (tkn.kind, &tkn.text) | ||
21 | } | ||
22 | } | ||
23 | |||
24 | pub(crate) struct SubtreeTokenSource<'a> { | ||
25 | tt_pos: usize, | ||
26 | tokens: Vec<TtToken>, | ||
27 | subtree: &'a tt::Subtree, | ||
28 | } | ||
29 | |||
30 | impl<'a> SubtreeTokenSource<'a> { | ||
31 | pub fn new(subtree: &tt::Subtree) -> SubtreeTokenSource { | ||
32 | SubtreeTokenSource { tokens: TtTokenBuilder::build(subtree), tt_pos: 0, subtree } | ||
33 | } | ||
34 | |||
35 | pub fn advance(&mut self, curr: usize, skip_first_delimiter: bool) { | ||
36 | if skip_first_delimiter { | ||
37 | self.tt_pos += 1; | ||
38 | } | ||
39 | |||
40 | // Matching `TtToken` cursor to `tt::TokenTree` cursor | ||
41 | // It is because TtToken is not One to One mapping to tt::Token | ||
42 | // There are 3 case (`TtToken` <=> `tt::TokenTree`) : | ||
43 | // * One to One => ident, single char punch | ||
44 | // * Many to One => `tt::TokenTree::SubTree` | ||
45 | // * One to Many => multibyte punct | ||
46 | // | ||
47 | // Such that we cannot simpliy advance the cursor | ||
48 | // We have to bump it one by one | ||
49 | let mut pos = 0; | ||
50 | while pos < curr { | ||
51 | pos += self.bump(&self.subtree.token_trees[pos]); | ||
52 | } | ||
53 | } | ||
54 | |||
55 | pub fn querier(&self) -> SubtreeSourceQuerier { | ||
56 | SubtreeSourceQuerier { src: self } | ||
57 | } | ||
58 | |||
59 | fn count(&self, tt: &tt::TokenTree) -> usize { | ||
60 | assert!(!self.tokens.is_empty()); | ||
61 | TtTokenBuilder::count_tt_tokens(tt, None) | ||
62 | } | ||
63 | |||
64 | pub(crate) fn bump(&mut self, tt: &tt::TokenTree) -> usize { | ||
65 | let cur = &self.tokens[self.tt_pos]; | ||
66 | let n_tokens = cur.n_tokens; | ||
67 | self.tt_pos += self.count(tt); | ||
68 | n_tokens | ||
69 | } | ||
70 | |||
71 | pub(crate) fn bump_n( | ||
72 | &mut self, | ||
73 | n_tokens: usize, | ||
74 | mut token_pos: usize, | ||
75 | ) -> (usize, Vec<&tt::TokenTree>) { | ||
76 | let mut res = vec![]; | ||
77 | // Matching `TtToken` cursor to `tt::TokenTree` cursor | ||
78 | // It is because TtToken is not One to One mapping to tt::Token | ||
79 | // There are 3 case (`TtToken` <=> `tt::TokenTree`) : | ||
80 | // * One to One => ident, single char punch | ||
81 | // * Many to One => `tt::TokenTree::SubTree` | ||
82 | // * One to Many => multibyte punct | ||
83 | // | ||
84 | // Such that we cannot simpliy advance the cursor | ||
85 | // We have to bump it one by one | ||
86 | let next_pos = self.tt_pos + n_tokens; | ||
87 | let old_token_pos = token_pos; | ||
88 | |||
89 | while self.tt_pos < next_pos { | ||
90 | let current = &self.subtree.token_trees[token_pos]; | ||
91 | let n = self.bump(current); | ||
92 | res.extend((0..n).map(|i| &self.subtree.token_trees[token_pos + i])); | ||
93 | token_pos += n; | ||
94 | } | ||
95 | |||
96 | (token_pos - old_token_pos, res) | ||
97 | } | ||
98 | } | ||
99 | |||
100 | impl<'a> TokenSource for SubtreeTokenSource<'a> { | ||
101 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
102 | if let Some(tok) = self.tokens.get(self.tt_pos + pos) { | ||
103 | tok.kind | ||
104 | } else { | ||
105 | SyntaxKind::EOF | ||
106 | } | ||
107 | } | ||
108 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
109 | self.tokens[self.tt_pos + pos].is_joint_to_next | ||
110 | } | ||
111 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
112 | self.tokens[self.tt_pos + pos].text == *kw | ||
113 | } | ||
114 | } | ||
115 | |||
116 | struct TokenPeek<'a, I> | ||
117 | where | ||
118 | I: Iterator<Item = &'a tt::TokenTree>, | ||
119 | { | ||
120 | iter: itertools::MultiPeek<I>, | ||
121 | } | ||
122 | |||
123 | // helper function | ||
124 | fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { | ||
125 | if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt { | ||
126 | return Some(pp); | ||
127 | } | ||
128 | None | ||
129 | } | ||
130 | |||
131 | impl<'a, I> TokenPeek<'a, I> | ||
132 | where | ||
133 | I: Iterator<Item = &'a tt::TokenTree>, | ||
134 | { | ||
135 | pub fn new(iter: I) -> Self { | ||
136 | TokenPeek { iter: itertools::multipeek(iter) } | ||
137 | } | ||
138 | |||
139 | pub fn next(&mut self) -> Option<&tt::TokenTree> { | ||
140 | self.iter.next() | ||
141 | } | ||
142 | |||
143 | fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { | ||
144 | if p.spacing != tt::Spacing::Joint { | ||
145 | return None; | ||
146 | } | ||
147 | |||
148 | self.iter.reset_peek(); | ||
149 | let p1 = to_punct(self.iter.peek()?)?; | ||
150 | Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint)) | ||
151 | } | ||
152 | |||
153 | fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> { | ||
154 | self.current_punct2(p).and_then(|((p0, p1), last_joint)| { | ||
155 | if !last_joint { | ||
156 | None | ||
157 | } else { | ||
158 | let p2 = to_punct(*self.iter.peek()?)?; | ||
159 | Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint)) | ||
160 | } | ||
161 | }) | ||
162 | } | ||
163 | } | ||
164 | |||
165 | struct TtTokenBuilder { | ||
166 | tokens: Vec<TtToken>, | ||
167 | } | ||
168 | |||
169 | impl TtTokenBuilder { | ||
170 | fn build(sub: &tt::Subtree) -> Vec<TtToken> { | ||
171 | let mut res = TtTokenBuilder { tokens: vec![] }; | ||
172 | res.convert_subtree(sub); | ||
173 | res.tokens | ||
174 | } | ||
175 | |||
176 | fn convert_subtree(&mut self, sub: &tt::Subtree) { | ||
177 | self.push_delim(sub.delimiter, false); | ||
178 | let mut peek = TokenPeek::new(sub.token_trees.iter()); | ||
179 | while let Some(tt) = peek.iter.next() { | ||
180 | self.convert_tt(tt, &mut peek); | ||
181 | } | ||
182 | self.push_delim(sub.delimiter, true) | ||
183 | } | ||
184 | |||
185 | fn convert_tt<'b, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'b, I>) | ||
186 | where | ||
187 | I: Iterator<Item = &'b tt::TokenTree>, | ||
188 | { | ||
189 | match tt { | ||
190 | tt::TokenTree::Leaf(token) => self.convert_token(token, iter), | ||
191 | tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), | ||
192 | } | ||
193 | } | ||
194 | |||
195 | fn convert_token<'b, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'b, I>) | ||
196 | where | ||
197 | I: Iterator<Item = &'b tt::TokenTree>, | ||
198 | { | ||
199 | let tok = match token { | ||
200 | tt::Leaf::Literal(l) => TtToken { | ||
201 | kind: classify_literal(&l.text).unwrap().kind, | ||
202 | is_joint_to_next: false, | ||
203 | text: l.text.clone(), | ||
204 | n_tokens: 1, | ||
205 | }, | ||
206 | tt::Leaf::Punct(p) => { | ||
207 | if let Some((kind, is_joint_to_next, text, size)) = | ||
208 | Self::convert_multi_char_punct(p, iter) | ||
209 | { | ||
210 | for _ in 0..size - 1 { | ||
211 | iter.next(); | ||
212 | } | ||
213 | |||
214 | TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } | ||
215 | } else { | ||
216 | let kind = match p.char { | ||
217 | // lexer may produce combpund tokens for these ones | ||
218 | '.' => DOT, | ||
219 | ':' => COLON, | ||
220 | '=' => EQ, | ||
221 | '!' => EXCL, | ||
222 | '-' => MINUS, | ||
223 | c => SyntaxKind::from_char(c).unwrap(), | ||
224 | }; | ||
225 | let text = { | ||
226 | let mut buf = [0u8; 4]; | ||
227 | let s: &str = p.char.encode_utf8(&mut buf); | ||
228 | SmolStr::new(s) | ||
229 | }; | ||
230 | TtToken { | ||
231 | kind, | ||
232 | is_joint_to_next: p.spacing == tt::Spacing::Joint, | ||
233 | text, | ||
234 | n_tokens: 1, | ||
235 | } | ||
236 | } | ||
237 | } | ||
238 | tt::Leaf::Ident(ident) => { | ||
239 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); | ||
240 | TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } | ||
241 | } | ||
242 | }; | ||
243 | self.tokens.push(tok) | ||
244 | } | ||
245 | |||
246 | fn convert_multi_char_punct<'b, I>( | ||
247 | p: &tt::Punct, | ||
248 | iter: &mut TokenPeek<'b, I>, | ||
249 | ) -> Option<(SyntaxKind, bool, &'static str, usize)> | ||
250 | where | ||
251 | I: Iterator<Item = &'b tt::TokenTree>, | ||
252 | { | ||
253 | if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { | ||
254 | if let Some((kind, text)) = match m { | ||
255 | ('<', '<', '=') => Some((SHLEQ, "<<=")), | ||
256 | ('>', '>', '=') => Some((SHREQ, ">>=")), | ||
257 | ('.', '.', '.') => Some((DOTDOTDOT, "...")), | ||
258 | ('.', '.', '=') => Some((DOTDOTEQ, "..=")), | ||
259 | _ => None, | ||
260 | } { | ||
261 | return Some((kind, is_joint_to_next, text, 3)); | ||
262 | } | ||
263 | } | ||
264 | |||
265 | if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { | ||
266 | if let Some((kind, text)) = match m { | ||
267 | ('<', '<') => Some((SHL, "<<")), | ||
268 | ('>', '>') => Some((SHR, ">>")), | ||
269 | |||
270 | ('|', '|') => Some((PIPEPIPE, "||")), | ||
271 | ('&', '&') => Some((AMPAMP, "&&")), | ||
272 | ('%', '=') => Some((PERCENTEQ, "%=")), | ||
273 | ('*', '=') => Some((STAREQ, "*=")), | ||
274 | ('/', '=') => Some((SLASHEQ, "/=")), | ||
275 | ('^', '=') => Some((CARETEQ, "^=")), | ||
276 | |||
277 | ('&', '=') => Some((AMPEQ, "&=")), | ||
278 | ('|', '=') => Some((PIPEEQ, "|=")), | ||
279 | ('-', '=') => Some((MINUSEQ, "-=")), | ||
280 | ('+', '=') => Some((PLUSEQ, "+=")), | ||
281 | ('>', '=') => Some((GTEQ, ">=")), | ||
282 | ('<', '=') => Some((LTEQ, "<=")), | ||
283 | |||
284 | ('-', '>') => Some((THIN_ARROW, "->")), | ||
285 | ('!', '=') => Some((NEQ, "!=")), | ||
286 | ('=', '>') => Some((FAT_ARROW, "=>")), | ||
287 | ('=', '=') => Some((EQEQ, "==")), | ||
288 | ('.', '.') => Some((DOTDOT, "..")), | ||
289 | (':', ':') => Some((COLONCOLON, "::")), | ||
290 | |||
291 | _ => None, | ||
292 | } { | ||
293 | return Some((kind, is_joint_to_next, text, 2)); | ||
294 | } | ||
295 | } | ||
296 | |||
297 | None | ||
298 | } | ||
299 | |||
300 | fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { | ||
301 | let (kinds, texts) = match d { | ||
302 | tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), | ||
303 | tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), | ||
304 | tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), | ||
305 | tt::Delimiter::None => return, | ||
306 | }; | ||
307 | let idx = closing as usize; | ||
308 | let kind = kinds[idx]; | ||
309 | let text = &texts[idx..texts.len() - (1 - idx)]; | ||
310 | let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; | ||
311 | self.tokens.push(tok) | ||
312 | } | ||
313 | |||
314 | fn skip_sibling_leaf(leaf: &tt::Leaf, iter: &mut std::slice::Iter<tt::TokenTree>) { | ||
315 | if let tt::Leaf::Punct(p) = leaf { | ||
316 | let mut peek = TokenPeek::new(iter); | ||
317 | if let Some((_, _, _, size)) = TtTokenBuilder::convert_multi_char_punct(p, &mut peek) { | ||
318 | for _ in 0..size - 1 { | ||
319 | peek.next(); | ||
320 | } | ||
321 | } | ||
322 | } | ||
323 | } | ||
324 | |||
325 | fn count_tt_tokens( | ||
326 | tt: &tt::TokenTree, | ||
327 | iter: Option<&mut std::slice::Iter<tt::TokenTree>>, | ||
328 | ) -> usize { | ||
329 | match tt { | ||
330 | tt::TokenTree::Subtree(sub_tree) => { | ||
331 | let mut iter = sub_tree.token_trees.iter(); | ||
332 | let mut count = match sub_tree.delimiter { | ||
333 | tt::Delimiter::None => 0, | ||
334 | _ => 2, | ||
335 | }; | ||
336 | |||
337 | while let Some(tt) = iter.next() { | ||
338 | count += Self::count_tt_tokens(&tt, Some(&mut iter)); | ||
339 | } | ||
340 | count | ||
341 | } | ||
342 | |||
343 | tt::TokenTree::Leaf(leaf) => { | ||
344 | iter.map(|iter| { | ||
345 | Self::skip_sibling_leaf(leaf, iter); | ||
346 | }); | ||
347 | |||
348 | 1 | ||
349 | } | ||
350 | } | ||
351 | } | ||
352 | } | ||
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 3a0702a30..102bba341 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs | |||
@@ -1,9 +1,11 @@ | |||
1 | use ra_parser::{TokenSource, TreeSink, ParseError}; | 1 | use ra_parser::{TreeSink, ParseError}; |
2 | use ra_syntax::{ | 2 | use ra_syntax::{ |
3 | AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, SyntaxElement, | 3 | AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, SyntaxElement, |
4 | ast, SyntaxKind::*, TextUnit, classify_literal | 4 | ast, SyntaxKind::*, TextUnit |
5 | }; | 5 | }; |
6 | 6 | ||
7 | use crate::subtree_source::{SubtreeTokenSource, SubtreeSourceQuerier}; | ||
8 | |||
7 | /// Maps `tt::TokenId` to the relative range of the original token. | 9 | /// Maps `tt::TokenId` to the relative range of the original token. |
8 | #[derive(Default)] | 10 | #[derive(Default)] |
9 | pub struct TokenMap { | 11 | pub struct TokenMap { |
@@ -22,8 +24,8 @@ pub fn ast_to_token_tree(ast: &ast::TokenTree) -> Option<(tt::Subtree, TokenMap) | |||
22 | 24 | ||
23 | /// Parses the token tree (result of macro expansion) as a sequence of items | 25 | /// Parses the token tree (result of macro expansion) as a sequence of items |
24 | pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc<ast::SourceFile> { | 26 | pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc<ast::SourceFile> { |
25 | let token_source = TtTokenSource::new(tt); | 27 | let token_source = SubtreeTokenSource::new(tt); |
26 | let mut tree_sink = TtTreeSink::new(&token_source.tokens); | 28 | let mut tree_sink = TtTreeSink::new(token_source.querier()); |
27 | ra_parser::parse(&token_source, &mut tree_sink); | 29 | ra_parser::parse(&token_source, &mut tree_sink); |
28 | let syntax = tree_sink.inner.finish(); | 30 | let syntax = tree_sink.inner.finish(); |
29 | ast::SourceFile::cast(&syntax).unwrap().to_owned() | 31 | ast::SourceFile::cast(&syntax).unwrap().to_owned() |
@@ -103,243 +105,19 @@ fn convert_tt( | |||
103 | Some(res) | 105 | Some(res) |
104 | } | 106 | } |
105 | 107 | ||
106 | #[derive(Debug)] | ||
107 | pub(crate) struct TtTokenSource { | ||
108 | pub tokens: Vec<TtToken>, | ||
109 | } | ||
110 | |||
111 | #[derive(Debug)] | ||
112 | pub(crate) struct TtToken { | ||
113 | pub kind: SyntaxKind, | ||
114 | pub is_joint_to_next: bool, | ||
115 | pub text: SmolStr, | ||
116 | pub n_tokens: usize, | ||
117 | } | ||
118 | |||
119 | // Some helper functions | ||
120 | fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { | ||
121 | if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt { | ||
122 | return Some(pp); | ||
123 | } | ||
124 | None | ||
125 | } | ||
126 | |||
127 | pub(crate) struct TokenPeek<'a, I> | ||
128 | where | ||
129 | I: Iterator<Item = &'a tt::TokenTree>, | ||
130 | { | ||
131 | iter: itertools::MultiPeek<I>, | ||
132 | } | ||
133 | |||
134 | impl<'a, I> TokenPeek<'a, I> | ||
135 | where | ||
136 | I: Iterator<Item = &'a tt::TokenTree>, | ||
137 | { | ||
138 | pub fn new(iter: I) -> Self { | ||
139 | TokenPeek { iter: itertools::multipeek(iter) } | ||
140 | } | ||
141 | |||
142 | pub fn next(&mut self) -> Option<&tt::TokenTree> { | ||
143 | self.iter.next() | ||
144 | } | ||
145 | |||
146 | fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { | ||
147 | if p.spacing != tt::Spacing::Joint { | ||
148 | return None; | ||
149 | } | ||
150 | |||
151 | self.iter.reset_peek(); | ||
152 | let p1 = to_punct(self.iter.peek()?)?; | ||
153 | Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint)) | ||
154 | } | ||
155 | |||
156 | fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> { | ||
157 | self.current_punct2(p).and_then(|((p0, p1), last_joint)| { | ||
158 | if !last_joint { | ||
159 | None | ||
160 | } else { | ||
161 | let p2 = to_punct(*self.iter.peek()?)?; | ||
162 | Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint)) | ||
163 | } | ||
164 | }) | ||
165 | } | ||
166 | } | ||
167 | |||
168 | impl TtTokenSource { | ||
169 | pub fn new(tt: &tt::Subtree) -> TtTokenSource { | ||
170 | let mut res = TtTokenSource { tokens: Vec::new() }; | ||
171 | res.convert_subtree(tt); | ||
172 | res | ||
173 | } | ||
174 | fn convert_subtree(&mut self, sub: &tt::Subtree) { | ||
175 | self.push_delim(sub.delimiter, false); | ||
176 | let mut peek = TokenPeek::new(sub.token_trees.iter()); | ||
177 | while let Some(tt) = peek.iter.next() { | ||
178 | self.convert_tt(tt, &mut peek); | ||
179 | } | ||
180 | self.push_delim(sub.delimiter, true) | ||
181 | } | ||
182 | |||
183 | fn convert_tt<'a, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'a, I>) | ||
184 | where | ||
185 | I: Iterator<Item = &'a tt::TokenTree>, | ||
186 | { | ||
187 | match tt { | ||
188 | tt::TokenTree::Leaf(token) => self.convert_token(token, iter), | ||
189 | tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), | ||
190 | } | ||
191 | } | ||
192 | |||
193 | fn convert_token<'a, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'a, I>) | ||
194 | where | ||
195 | I: Iterator<Item = &'a tt::TokenTree>, | ||
196 | { | ||
197 | let tok = match token { | ||
198 | tt::Leaf::Literal(l) => TtToken { | ||
199 | kind: classify_literal(&l.text).unwrap().kind, | ||
200 | is_joint_to_next: false, | ||
201 | text: l.text.clone(), | ||
202 | n_tokens: 1, | ||
203 | }, | ||
204 | tt::Leaf::Punct(p) => { | ||
205 | if let Some((kind, is_joint_to_next, text, size)) = | ||
206 | Self::convert_multi_char_punct(p, iter) | ||
207 | { | ||
208 | for _ in 0..size - 1 { | ||
209 | iter.next(); | ||
210 | } | ||
211 | |||
212 | TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } | ||
213 | } else { | ||
214 | let kind = match p.char { | ||
215 | // lexer may produce combpund tokens for these ones | ||
216 | '.' => DOT, | ||
217 | ':' => COLON, | ||
218 | '=' => EQ, | ||
219 | '!' => EXCL, | ||
220 | '-' => MINUS, | ||
221 | c => SyntaxKind::from_char(c).unwrap(), | ||
222 | }; | ||
223 | let text = { | ||
224 | let mut buf = [0u8; 4]; | ||
225 | let s: &str = p.char.encode_utf8(&mut buf); | ||
226 | SmolStr::new(s) | ||
227 | }; | ||
228 | TtToken { | ||
229 | kind, | ||
230 | is_joint_to_next: p.spacing == tt::Spacing::Joint, | ||
231 | text, | ||
232 | n_tokens: 1, | ||
233 | } | ||
234 | } | ||
235 | } | ||
236 | tt::Leaf::Ident(ident) => { | ||
237 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); | ||
238 | TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } | ||
239 | } | ||
240 | }; | ||
241 | self.tokens.push(tok) | ||
242 | } | ||
243 | |||
244 | pub(crate) fn convert_multi_char_punct<'a, I>( | ||
245 | p: &tt::Punct, | ||
246 | iter: &mut TokenPeek<'a, I>, | ||
247 | ) -> Option<(SyntaxKind, bool, &'static str, usize)> | ||
248 | where | ||
249 | I: Iterator<Item = &'a tt::TokenTree>, | ||
250 | { | ||
251 | if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { | ||
252 | if let Some((kind, text)) = match m { | ||
253 | ('<', '<', '=') => Some((SHLEQ, "<<=")), | ||
254 | ('>', '>', '=') => Some((SHREQ, ">>=")), | ||
255 | ('.', '.', '.') => Some((DOTDOTDOT, "...")), | ||
256 | ('.', '.', '=') => Some((DOTDOTEQ, "..=")), | ||
257 | _ => None, | ||
258 | } { | ||
259 | return Some((kind, is_joint_to_next, text, 3)); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { | ||
264 | if let Some((kind, text)) = match m { | ||
265 | ('<', '<') => Some((SHL, "<<")), | ||
266 | ('>', '>') => Some((SHR, ">>")), | ||
267 | |||
268 | ('|', '|') => Some((PIPEPIPE, "||")), | ||
269 | ('&', '&') => Some((AMPAMP, "&&")), | ||
270 | ('%', '=') => Some((PERCENTEQ, "%=")), | ||
271 | ('*', '=') => Some((STAREQ, "*=")), | ||
272 | ('/', '=') => Some((SLASHEQ, "/=")), | ||
273 | ('^', '=') => Some((CARETEQ, "^=")), | ||
274 | |||
275 | ('&', '=') => Some((AMPEQ, "&=")), | ||
276 | ('|', '=') => Some((PIPEEQ, "|=")), | ||
277 | ('-', '=') => Some((MINUSEQ, "-=")), | ||
278 | ('+', '=') => Some((PLUSEQ, "+=")), | ||
279 | ('>', '=') => Some((GTEQ, ">=")), | ||
280 | ('<', '=') => Some((LTEQ, "<=")), | ||
281 | |||
282 | ('-', '>') => Some((THIN_ARROW, "->")), | ||
283 | ('!', '=') => Some((NEQ, "!=")), | ||
284 | ('=', '>') => Some((FAT_ARROW, "=>")), | ||
285 | ('=', '=') => Some((EQEQ, "==")), | ||
286 | ('.', '.') => Some((DOTDOT, "..")), | ||
287 | (':', ':') => Some((COLONCOLON, "::")), | ||
288 | |||
289 | _ => None, | ||
290 | } { | ||
291 | return Some((kind, is_joint_to_next, text, 2)); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | None | ||
296 | } | ||
297 | |||
298 | fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { | ||
299 | let (kinds, texts) = match d { | ||
300 | tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), | ||
301 | tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), | ||
302 | tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), | ||
303 | tt::Delimiter::None => return, | ||
304 | }; | ||
305 | let idx = closing as usize; | ||
306 | let kind = kinds[idx]; | ||
307 | let text = &texts[idx..texts.len() - (1 - idx)]; | ||
308 | let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; | ||
309 | self.tokens.push(tok) | ||
310 | } | ||
311 | } | ||
312 | |||
313 | impl TokenSource for TtTokenSource { | ||
314 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
315 | if let Some(tok) = self.tokens.get(pos) { | ||
316 | tok.kind | ||
317 | } else { | ||
318 | SyntaxKind::EOF | ||
319 | } | ||
320 | } | ||
321 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
322 | self.tokens[pos].is_joint_to_next | ||
323 | } | ||
324 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
325 | self.tokens[pos].text == *kw | ||
326 | } | ||
327 | } | ||
328 | |||
329 | #[derive(Default)] | ||
330 | struct TtTreeSink<'a> { | 108 | struct TtTreeSink<'a> { |
331 | buf: String, | 109 | buf: String, |
332 | tokens: &'a [TtToken], | 110 | src_querier: SubtreeSourceQuerier<'a>, |
333 | text_pos: TextUnit, | 111 | text_pos: TextUnit, |
334 | token_pos: usize, | 112 | token_pos: usize, |
335 | inner: SyntaxTreeBuilder, | 113 | inner: SyntaxTreeBuilder, |
336 | } | 114 | } |
337 | 115 | ||
338 | impl<'a> TtTreeSink<'a> { | 116 | impl<'a> TtTreeSink<'a> { |
339 | fn new(tokens: &'a [TtToken]) -> TtTreeSink { | 117 | fn new(src_querier: SubtreeSourceQuerier<'a>) -> TtTreeSink { |
340 | TtTreeSink { | 118 | TtTreeSink { |
341 | buf: String::new(), | 119 | buf: String::new(), |
342 | tokens, | 120 | src_querier, |
343 | text_pos: 0.into(), | 121 | text_pos: 0.into(), |
344 | token_pos: 0, | 122 | token_pos: 0, |
345 | inner: SyntaxTreeBuilder::default(), | 123 | inner: SyntaxTreeBuilder::default(), |
@@ -350,7 +128,7 @@ impl<'a> TtTreeSink<'a> { | |||
350 | impl<'a> TreeSink for TtTreeSink<'a> { | 128 | impl<'a> TreeSink for TtTreeSink<'a> { |
351 | fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { | 129 | fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { |
352 | for _ in 0..n_tokens { | 130 | for _ in 0..n_tokens { |
353 | self.buf += self.tokens[self.token_pos].text.as_str(); | 131 | self.buf += self.src_querier.token(self.token_pos).1; |
354 | self.token_pos += 1; | 132 | self.token_pos += 1; |
355 | } | 133 | } |
356 | self.text_pos += TextUnit::of_str(&self.buf); | 134 | self.text_pos += TextUnit::of_str(&self.buf); |
@@ -394,21 +172,23 @@ mod tests { | |||
394 | "#, | 172 | "#, |
395 | ); | 173 | ); |
396 | let expansion = expand(&rules, "literals!(foo)"); | 174 | let expansion = expand(&rules, "literals!(foo)"); |
397 | let tt_src = TtTokenSource::new(&expansion); | 175 | let tt_src = SubtreeTokenSource::new(&expansion); |
176 | |||
177 | let query = tt_src.querier(); | ||
398 | 178 | ||
399 | // [{] | 179 | // [{] |
400 | // [let] [a] [=] ['c'] [;] | 180 | // [let] [a] [=] ['c'] [;] |
401 | assert_eq!(tt_src.tokens[1 + 3].text, "'c'"); | 181 | assert_eq!(query.token(1 + 3).1, "'c'"); |
402 | assert_eq!(tt_src.tokens[1 + 3].kind, CHAR); | 182 | assert_eq!(query.token(1 + 3).0, CHAR); |
403 | // [let] [c] [=] [1000] [;] | 183 | // [let] [c] [=] [1000] [;] |
404 | assert_eq!(tt_src.tokens[1 + 5 + 3].text, "1000"); | 184 | assert_eq!(query.token(1 + 5 + 3).1, "1000"); |
405 | assert_eq!(tt_src.tokens[1 + 5 + 3].kind, INT_NUMBER); | 185 | assert_eq!(query.token(1 + 5 + 3).0, INT_NUMBER); |
406 | // [let] [f] [=] [12E+99_f64] [;] | 186 | // [let] [f] [=] [12E+99_f64] [;] |
407 | assert_eq!(tt_src.tokens[1 + 10 + 3].text, "12E+99_f64"); | 187 | assert_eq!(query.token(1 + 10 + 3).1, "12E+99_f64"); |
408 | assert_eq!(tt_src.tokens[1 + 10 + 3].kind, FLOAT_NUMBER); | 188 | assert_eq!(query.token(1 + 10 + 3).0, FLOAT_NUMBER); |
409 | 189 | ||
410 | // [let] [s] [=] ["rust1"] [;] | 190 | // [let] [s] [=] ["rust1"] [;] |
411 | assert_eq!(tt_src.tokens[1 + 15 + 3].text, "\"rust1\""); | 191 | assert_eq!(query.token(1 + 15 + 3).1, "\"rust1\""); |
412 | assert_eq!(tt_src.tokens[1 + 15 + 3].kind, STRING); | 192 | assert_eq!(query.token(1 + 15 + 3).0, STRING); |
413 | } | 193 | } |
414 | } | 194 | } |
diff --git a/crates/ra_mbe/src/tt_cursor.rs b/crates/ra_mbe/src/tt_cursor.rs index 6ac3ac187..52e072599 100644 --- a/crates/ra_mbe/src/tt_cursor.rs +++ b/crates/ra_mbe/src/tt_cursor.rs | |||
@@ -1,116 +1,17 @@ | |||
1 | use crate::ParseError; | 1 | use crate::ParseError; |
2 | use crate::syntax_bridge::{TtTokenSource, TtToken, TokenPeek}; | 2 | use crate::subtree_source::SubtreeTokenSource; |
3 | |||
3 | use ra_parser::{TokenSource, TreeSink}; | 4 | use ra_parser::{TokenSource, TreeSink}; |
4 | 5 | ||
5 | use ra_syntax::{ | 6 | use ra_syntax::{ |
6 | SyntaxKind | 7 | SyntaxKind |
7 | }; | 8 | }; |
8 | 9 | ||
9 | struct TtCursorTokenSource { | 10 | struct SubtreeTokenSink { |
10 | tt_pos: usize, | ||
11 | inner: TtTokenSource, | ||
12 | } | ||
13 | |||
14 | impl TtCursorTokenSource { | ||
15 | fn new(subtree: &tt::Subtree, curr: usize) -> TtCursorTokenSource { | ||
16 | let mut res = TtCursorTokenSource { inner: TtTokenSource::new(subtree), tt_pos: 1 }; | ||
17 | |||
18 | // Matching `TtToken` cursor to `tt::TokenTree` cursor | ||
19 | // It is because TtToken is not One to One mapping to tt::Token | ||
20 | // There are 3 case (`TtToken` <=> `tt::TokenTree`) : | ||
21 | // * One to One => ident, single char punch | ||
22 | // * Many to One => `tt::TokenTree::SubTree` | ||
23 | // * One to Many => multibyte punct | ||
24 | // | ||
25 | // Such that we cannot simpliy advance the cursor | ||
26 | // We have to bump it one by one | ||
27 | let mut pos = 0; | ||
28 | while pos < curr { | ||
29 | pos += res.bump(&subtree.token_trees[pos]); | ||
30 | } | ||
31 | |||
32 | res | ||
33 | } | ||
34 | |||
35 | fn skip_sibling_leaf(&self, leaf: &tt::Leaf, iter: &mut std::slice::Iter<tt::TokenTree>) { | ||
36 | if let tt::Leaf::Punct(p) = leaf { | ||
37 | let mut peek = TokenPeek::new(iter); | ||
38 | if let Some((_, _, _, size)) = TtTokenSource::convert_multi_char_punct(p, &mut peek) { | ||
39 | for _ in 0..size - 1 { | ||
40 | peek.next(); | ||
41 | } | ||
42 | } | ||
43 | } | ||
44 | } | ||
45 | |||
46 | fn count_tt_tokens( | ||
47 | &self, | ||
48 | tt: &tt::TokenTree, | ||
49 | iter: Option<&mut std::slice::Iter<tt::TokenTree>>, | ||
50 | ) -> usize { | ||
51 | assert!(!self.inner.tokens.is_empty()); | ||
52 | |||
53 | match tt { | ||
54 | tt::TokenTree::Subtree(sub_tree) => { | ||
55 | let mut iter = sub_tree.token_trees.iter(); | ||
56 | let mut count = match sub_tree.delimiter { | ||
57 | tt::Delimiter::None => 0, | ||
58 | _ => 2, | ||
59 | }; | ||
60 | |||
61 | while let Some(tt) = iter.next() { | ||
62 | count += self.count_tt_tokens(&tt, Some(&mut iter)); | ||
63 | } | ||
64 | count | ||
65 | } | ||
66 | |||
67 | tt::TokenTree::Leaf(leaf) => { | ||
68 | iter.map(|iter| { | ||
69 | self.skip_sibling_leaf(leaf, iter); | ||
70 | }); | ||
71 | |||
72 | 1 | ||
73 | } | ||
74 | } | ||
75 | } | ||
76 | |||
77 | fn count(&self, tt: &tt::TokenTree) -> usize { | ||
78 | self.count_tt_tokens(tt, None) | ||
79 | } | ||
80 | |||
81 | fn bump(&mut self, tt: &tt::TokenTree) -> usize { | ||
82 | let cur = self.current().unwrap(); | ||
83 | let n_tokens = cur.n_tokens; | ||
84 | self.tt_pos += self.count(tt); | ||
85 | n_tokens | ||
86 | } | ||
87 | |||
88 | fn current(&self) -> Option<&TtToken> { | ||
89 | self.inner.tokens.get(self.tt_pos) | ||
90 | } | ||
91 | } | ||
92 | |||
93 | impl TokenSource for TtCursorTokenSource { | ||
94 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
95 | if let Some(tok) = self.inner.tokens.get(self.tt_pos + pos) { | ||
96 | tok.kind | ||
97 | } else { | ||
98 | SyntaxKind::EOF | ||
99 | } | ||
100 | } | ||
101 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
102 | self.inner.tokens[self.tt_pos + pos].is_joint_to_next | ||
103 | } | ||
104 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
105 | self.inner.tokens[self.tt_pos + pos].text == *kw | ||
106 | } | ||
107 | } | ||
108 | |||
109 | struct TtCursorTokenSink { | ||
110 | token_pos: usize, | 11 | token_pos: usize, |
111 | } | 12 | } |
112 | 13 | ||
113 | impl TreeSink for TtCursorTokenSink { | 14 | impl TreeSink for SubtreeTokenSink { |
114 | fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) { | 15 | fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) { |
115 | self.token_pos += n_tokens as usize; | 16 | self.token_pos += n_tokens as usize; |
116 | } | 17 | } |
@@ -201,24 +102,10 @@ impl<'a> TtCursor<'a> { | |||
201 | fn eat_parse_result( | 102 | fn eat_parse_result( |
202 | &mut self, | 103 | &mut self, |
203 | parsed_token: usize, | 104 | parsed_token: usize, |
204 | src: &mut TtCursorTokenSource, | 105 | src: &mut SubtreeTokenSource, |
205 | ) -> Option<tt::TokenTree> { | 106 | ) -> Option<tt::TokenTree> { |
206 | let mut res = vec![]; | 107 | let (adv, res) = src.bump_n(parsed_token, self.pos); |
207 | 108 | self.pos += adv; | |
208 | // Matching `TtToken` cursor to `tt::TokenTree` cursor | ||
209 | // It is because TtToken is not One to One mapping to tt::Token | ||
210 | // There are 3 case (`TtToken` <=> `tt::TokenTree`) : | ||
211 | // * One to One => ident, single char punch | ||
212 | // * Many to One => `tt::TokenTree::SubTree` | ||
213 | // * One to Many => multibyte punct | ||
214 | // | ||
215 | // Such that we cannot simpliy advance the cursor | ||
216 | // We have to bump it one by one | ||
217 | let next_pos = src.tt_pos + parsed_token; | ||
218 | while src.tt_pos < next_pos { | ||
219 | let n = src.bump(self.current().unwrap()); | ||
220 | res.extend((0..n).map(|_| self.eat().unwrap())); | ||
221 | } | ||
222 | 109 | ||
223 | let res: Vec<_> = res.into_iter().cloned().collect(); | 110 | let res: Vec<_> = res.into_iter().cloned().collect(); |
224 | 111 | ||
@@ -236,8 +123,9 @@ impl<'a> TtCursor<'a> { | |||
236 | where | 123 | where |
237 | F: FnOnce(&dyn TokenSource, &mut dyn TreeSink), | 124 | F: FnOnce(&dyn TokenSource, &mut dyn TreeSink), |
238 | { | 125 | { |
239 | let mut src = TtCursorTokenSource::new(self.subtree, self.pos); | 126 | let mut src = SubtreeTokenSource::new(self.subtree); |
240 | let mut sink = TtCursorTokenSink { token_pos: 0 }; | 127 | src.advance(self.pos, true); |
128 | let mut sink = SubtreeTokenSink { token_pos: 0 }; | ||
241 | 129 | ||
242 | f(&src, &mut sink); | 130 | f(&src, &mut sink); |
243 | 131 | ||