diff options
Diffstat (limited to 'crates/ra_mbe/src/syntax_bridge.rs')
-rw-r--r-- | crates/ra_mbe/src/syntax_bridge.rs | 254 |
1 files changed, 24 insertions, 230 deletions
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 139a0fd33..19c17bd55 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs | |||
@@ -1,9 +1,11 @@ | |||
1 | use ra_parser::{TokenSource, TreeSink, ParseError}; | 1 | use ra_parser::{TreeSink, ParseError}; |
2 | use ra_syntax::{ | 2 | use ra_syntax::{ |
3 | AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, SyntaxElement, | 3 | AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, SyntaxElement, |
4 | ast, SyntaxKind::*, TextUnit, classify_literal | 4 | ast, SyntaxKind::*, TextUnit |
5 | }; | 5 | }; |
6 | 6 | ||
7 | use crate::subtree_source::{SubtreeTokenSource, Querier}; | ||
8 | |||
7 | /// Maps `tt::TokenId` to the relative range of the original token. | 9 | /// Maps `tt::TokenId` to the relative range of the original token. |
8 | #[derive(Default)] | 10 | #[derive(Default)] |
9 | pub struct TokenMap { | 11 | pub struct TokenMap { |
@@ -22,8 +24,8 @@ pub fn ast_to_token_tree(ast: &ast::TokenTree) -> Option<(tt::Subtree, TokenMap) | |||
22 | 24 | ||
23 | /// Parses the token tree (result of macro expansion) as a sequence of items | 25 | /// Parses the token tree (result of macro expansion) as a sequence of items |
24 | pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc<ast::SourceFile> { | 26 | pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc<ast::SourceFile> { |
25 | let token_source = TtTokenSource::new(tt); | 27 | let token_source = SubtreeTokenSource::new(tt); |
26 | let mut tree_sink = TtTreeSink::new(&token_source.tokens); | 28 | let mut tree_sink = TtTreeSink::new(token_source.querier()); |
27 | ra_parser::parse(&token_source, &mut tree_sink); | 29 | ra_parser::parse(&token_source, &mut tree_sink); |
28 | let syntax = tree_sink.inner.finish(); | 30 | let syntax = tree_sink.inner.finish(); |
29 | ast::SourceFile::cast(&syntax).unwrap().to_owned() | 31 | ast::SourceFile::cast(&syntax).unwrap().to_owned() |
@@ -103,229 +105,19 @@ fn convert_tt( | |||
103 | Some(res) | 105 | Some(res) |
104 | } | 106 | } |
105 | 107 | ||
106 | #[derive(Debug)] | 108 | struct TtTreeSink<'a, Q: Querier> { |
107 | struct TtTokenSource { | ||
108 | tokens: Vec<TtToken>, | ||
109 | } | ||
110 | |||
111 | #[derive(Debug)] | ||
112 | struct TtToken { | ||
113 | kind: SyntaxKind, | ||
114 | is_joint_to_next: bool, | ||
115 | text: SmolStr, | ||
116 | } | ||
117 | |||
118 | // Some helper functions | ||
119 | fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { | ||
120 | if let tt::TokenTree::Leaf(tt::Leaf::Punct(pp)) = tt { | ||
121 | return Some(pp); | ||
122 | } | ||
123 | None | ||
124 | } | ||
125 | |||
126 | struct TokenPeek<'a, I> | ||
127 | where | ||
128 | I: Iterator<Item = &'a tt::TokenTree>, | ||
129 | { | ||
130 | iter: itertools::MultiPeek<I>, | ||
131 | } | ||
132 | |||
133 | impl<'a, I> TokenPeek<'a, I> | ||
134 | where | ||
135 | I: Iterator<Item = &'a tt::TokenTree>, | ||
136 | { | ||
137 | fn next(&mut self) -> Option<&tt::TokenTree> { | ||
138 | self.iter.next() | ||
139 | } | ||
140 | |||
141 | fn current_punct2(&mut self, p: &tt::Punct) -> Option<((char, char), bool)> { | ||
142 | if p.spacing != tt::Spacing::Joint { | ||
143 | return None; | ||
144 | } | ||
145 | |||
146 | self.iter.reset_peek(); | ||
147 | let p1 = to_punct(self.iter.peek()?)?; | ||
148 | Some(((p.char, p1.char), p1.spacing == tt::Spacing::Joint)) | ||
149 | } | ||
150 | |||
151 | fn current_punct3(&mut self, p: &tt::Punct) -> Option<((char, char, char), bool)> { | ||
152 | self.current_punct2(p).and_then(|((p0, p1), last_joint)| { | ||
153 | if !last_joint { | ||
154 | None | ||
155 | } else { | ||
156 | let p2 = to_punct(*self.iter.peek()?)?; | ||
157 | Some(((p0, p1, p2.char), p2.spacing == tt::Spacing::Joint)) | ||
158 | } | ||
159 | }) | ||
160 | } | ||
161 | } | ||
162 | |||
163 | impl TtTokenSource { | ||
164 | fn new(tt: &tt::Subtree) -> TtTokenSource { | ||
165 | let mut res = TtTokenSource { tokens: Vec::new() }; | ||
166 | res.convert_subtree(tt); | ||
167 | res | ||
168 | } | ||
169 | fn convert_subtree(&mut self, sub: &tt::Subtree) { | ||
170 | self.push_delim(sub.delimiter, false); | ||
171 | let mut peek = TokenPeek { iter: itertools::multipeek(sub.token_trees.iter()) }; | ||
172 | while let Some(tt) = peek.iter.next() { | ||
173 | self.convert_tt(tt, &mut peek); | ||
174 | } | ||
175 | self.push_delim(sub.delimiter, true) | ||
176 | } | ||
177 | |||
178 | fn convert_tt<'a, I>(&mut self, tt: &tt::TokenTree, iter: &mut TokenPeek<'a, I>) | ||
179 | where | ||
180 | I: Iterator<Item = &'a tt::TokenTree>, | ||
181 | { | ||
182 | match tt { | ||
183 | tt::TokenTree::Leaf(token) => self.convert_token(token, iter), | ||
184 | tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), | ||
185 | } | ||
186 | } | ||
187 | |||
188 | fn convert_token<'a, I>(&mut self, token: &tt::Leaf, iter: &mut TokenPeek<'a, I>) | ||
189 | where | ||
190 | I: Iterator<Item = &'a tt::TokenTree>, | ||
191 | { | ||
192 | let tok = match token { | ||
193 | tt::Leaf::Literal(l) => TtToken { | ||
194 | kind: classify_literal(&l.text).unwrap().kind, | ||
195 | is_joint_to_next: false, | ||
196 | text: l.text.clone(), | ||
197 | }, | ||
198 | tt::Leaf::Punct(p) => { | ||
199 | if let Some(tt) = Self::convert_multi_char_punct(p, iter) { | ||
200 | tt | ||
201 | } else { | ||
202 | let kind = match p.char { | ||
203 | // lexer may produce combpund tokens for these ones | ||
204 | '.' => DOT, | ||
205 | ':' => COLON, | ||
206 | '=' => EQ, | ||
207 | '!' => EXCL, | ||
208 | '-' => MINUS, | ||
209 | c => SyntaxKind::from_char(c).unwrap(), | ||
210 | }; | ||
211 | let text = { | ||
212 | let mut buf = [0u8; 4]; | ||
213 | let s: &str = p.char.encode_utf8(&mut buf); | ||
214 | SmolStr::new(s) | ||
215 | }; | ||
216 | TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } | ||
217 | } | ||
218 | } | ||
219 | tt::Leaf::Ident(ident) => { | ||
220 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); | ||
221 | TtToken { kind, is_joint_to_next: false, text: ident.text.clone() } | ||
222 | } | ||
223 | }; | ||
224 | self.tokens.push(tok) | ||
225 | } | ||
226 | |||
227 | fn convert_multi_char_punct<'a, I>( | ||
228 | p: &tt::Punct, | ||
229 | iter: &mut TokenPeek<'a, I>, | ||
230 | ) -> Option<TtToken> | ||
231 | where | ||
232 | I: Iterator<Item = &'a tt::TokenTree>, | ||
233 | { | ||
234 | if let Some((m, is_joint_to_next)) = iter.current_punct3(p) { | ||
235 | if let Some((kind, text)) = match m { | ||
236 | ('<', '<', '=') => Some((SHLEQ, "<<=")), | ||
237 | ('>', '>', '=') => Some((SHREQ, ">>=")), | ||
238 | ('.', '.', '.') => Some((DOTDOTDOT, "...")), | ||
239 | ('.', '.', '=') => Some((DOTDOTEQ, "..=")), | ||
240 | _ => None, | ||
241 | } { | ||
242 | iter.next(); | ||
243 | iter.next(); | ||
244 | return Some(TtToken { kind, is_joint_to_next, text: text.into() }); | ||
245 | } | ||
246 | } | ||
247 | |||
248 | if let Some((m, is_joint_to_next)) = iter.current_punct2(p) { | ||
249 | if let Some((kind, text)) = match m { | ||
250 | ('<', '<') => Some((SHL, "<<")), | ||
251 | ('>', '>') => Some((SHR, ">>")), | ||
252 | |||
253 | ('|', '|') => Some((PIPEPIPE, "||")), | ||
254 | ('&', '&') => Some((AMPAMP, "&&")), | ||
255 | ('%', '=') => Some((PERCENTEQ, "%=")), | ||
256 | ('*', '=') => Some((STAREQ, "*=")), | ||
257 | ('/', '=') => Some((SLASHEQ, "/=")), | ||
258 | ('^', '=') => Some((CARETEQ, "^=")), | ||
259 | |||
260 | ('&', '=') => Some((AMPEQ, "&=")), | ||
261 | ('|', '=') => Some((PIPEEQ, "|=")), | ||
262 | ('-', '=') => Some((MINUSEQ, "-=")), | ||
263 | ('+', '=') => Some((PLUSEQ, "+=")), | ||
264 | ('>', '=') => Some((GTEQ, ">=")), | ||
265 | ('<', '=') => Some((LTEQ, "<=")), | ||
266 | |||
267 | ('-', '>') => Some((THIN_ARROW, "->")), | ||
268 | ('!', '=') => Some((NEQ, "!=")), | ||
269 | ('=', '>') => Some((FAT_ARROW, "=>")), | ||
270 | ('=', '=') => Some((EQEQ, "==")), | ||
271 | ('.', '.') => Some((DOTDOT, "..")), | ||
272 | (':', ':') => Some((COLONCOLON, "::")), | ||
273 | |||
274 | _ => None, | ||
275 | } { | ||
276 | iter.next(); | ||
277 | return Some(TtToken { kind, is_joint_to_next, text: text.into() }); | ||
278 | } | ||
279 | } | ||
280 | |||
281 | None | ||
282 | } | ||
283 | |||
284 | fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { | ||
285 | let (kinds, texts) = match d { | ||
286 | tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), | ||
287 | tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), | ||
288 | tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), | ||
289 | tt::Delimiter::None => return, | ||
290 | }; | ||
291 | let idx = closing as usize; | ||
292 | let kind = kinds[idx]; | ||
293 | let text = &texts[idx..texts.len() - (1 - idx)]; | ||
294 | let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) }; | ||
295 | self.tokens.push(tok) | ||
296 | } | ||
297 | } | ||
298 | |||
299 | impl TokenSource for TtTokenSource { | ||
300 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
301 | if let Some(tok) = self.tokens.get(pos) { | ||
302 | tok.kind | ||
303 | } else { | ||
304 | SyntaxKind::EOF | ||
305 | } | ||
306 | } | ||
307 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
308 | self.tokens[pos].is_joint_to_next | ||
309 | } | ||
310 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
311 | self.tokens[pos].text == *kw | ||
312 | } | ||
313 | } | ||
314 | |||
315 | #[derive(Default)] | ||
316 | struct TtTreeSink<'a> { | ||
317 | buf: String, | 109 | buf: String, |
318 | tokens: &'a [TtToken], | 110 | src_querier: &'a Q, |
319 | text_pos: TextUnit, | 111 | text_pos: TextUnit, |
320 | token_pos: usize, | 112 | token_pos: usize, |
321 | inner: SyntaxTreeBuilder, | 113 | inner: SyntaxTreeBuilder, |
322 | } | 114 | } |
323 | 115 | ||
324 | impl<'a> TtTreeSink<'a> { | 116 | impl<'a, Q: Querier> TtTreeSink<'a, Q> { |
325 | fn new(tokens: &'a [TtToken]) -> TtTreeSink { | 117 | fn new(src_querier: &'a Q) -> Self { |
326 | TtTreeSink { | 118 | TtTreeSink { |
327 | buf: String::new(), | 119 | buf: String::new(), |
328 | tokens, | 120 | src_querier, |
329 | text_pos: 0.into(), | 121 | text_pos: 0.into(), |
330 | token_pos: 0, | 122 | token_pos: 0, |
331 | inner: SyntaxTreeBuilder::default(), | 123 | inner: SyntaxTreeBuilder::default(), |
@@ -333,10 +125,10 @@ impl<'a> TtTreeSink<'a> { | |||
333 | } | 125 | } |
334 | } | 126 | } |
335 | 127 | ||
336 | impl<'a> TreeSink for TtTreeSink<'a> { | 128 | impl<'a, Q: Querier> TreeSink for TtTreeSink<'a, Q> { |
337 | fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { | 129 | fn token(&mut self, kind: SyntaxKind, n_tokens: u8) { |
338 | for _ in 0..n_tokens { | 130 | for _ in 0..n_tokens { |
339 | self.buf += self.tokens[self.token_pos].text.as_str(); | 131 | self.buf += &self.src_querier.token(self.token_pos).1; |
340 | self.token_pos += 1; | 132 | self.token_pos += 1; |
341 | } | 133 | } |
342 | self.text_pos += TextUnit::of_str(&self.buf); | 134 | self.text_pos += TextUnit::of_str(&self.buf); |
@@ -380,21 +172,23 @@ mod tests { | |||
380 | "#, | 172 | "#, |
381 | ); | 173 | ); |
382 | let expansion = expand(&rules, "literals!(foo)"); | 174 | let expansion = expand(&rules, "literals!(foo)"); |
383 | let tt_src = TtTokenSource::new(&expansion); | 175 | let tt_src = SubtreeTokenSource::new(&expansion); |
176 | |||
177 | let query = tt_src.querier(); | ||
384 | 178 | ||
385 | // [{] | 179 | // [{] |
386 | // [let] [a] [=] ['c'] [;] | 180 | // [let] [a] [=] ['c'] [;] |
387 | assert_eq!(tt_src.tokens[1 + 3].text, "'c'"); | 181 | assert_eq!(query.token(1 + 3).1, "'c'"); |
388 | assert_eq!(tt_src.tokens[1 + 3].kind, CHAR); | 182 | assert_eq!(query.token(1 + 3).0, CHAR); |
389 | // [let] [c] [=] [1000] [;] | 183 | // [let] [c] [=] [1000] [;] |
390 | assert_eq!(tt_src.tokens[1 + 5 + 3].text, "1000"); | 184 | assert_eq!(query.token(1 + 5 + 3).1, "1000"); |
391 | assert_eq!(tt_src.tokens[1 + 5 + 3].kind, INT_NUMBER); | 185 | assert_eq!(query.token(1 + 5 + 3).0, INT_NUMBER); |
392 | // [let] [f] [=] [12E+99_f64] [;] | 186 | // [let] [f] [=] [12E+99_f64] [;] |
393 | assert_eq!(tt_src.tokens[1 + 10 + 3].text, "12E+99_f64"); | 187 | assert_eq!(query.token(1 + 10 + 3).1, "12E+99_f64"); |
394 | assert_eq!(tt_src.tokens[1 + 10 + 3].kind, FLOAT_NUMBER); | 188 | assert_eq!(query.token(1 + 10 + 3).0, FLOAT_NUMBER); |
395 | 189 | ||
396 | // [let] [s] [=] ["rust1"] [;] | 190 | // [let] [s] [=] ["rust1"] [;] |
397 | assert_eq!(tt_src.tokens[1 + 15 + 3].text, "\"rust1\""); | 191 | assert_eq!(query.token(1 + 15 + 3).1, "\"rust1\""); |
398 | assert_eq!(tt_src.tokens[1 + 15 + 3].kind, STRING); | 192 | assert_eq!(query.token(1 + 15 + 3).0, STRING); |
399 | } | 193 | } |
400 | } | 194 | } |