diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2021-06-22 23:28:43 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2021-06-22 23:28:43 +0100 |
commit | 3762cb4535dce9eaf7c3dbd4aa9c33bf6dd30c87 (patch) | |
tree | d842fcbf296548edee40553157fef124ce9aab74 | |
parent | 38da41ea6e58255223686104b3fbca27392d8162 (diff) | |
parent | c6669776e1174170c157b685a66026d1a31ede85 (diff) |
Merge #9383
9383: internal: Rewrite token tree lowering to use an explicit stack r=jonas-schievink a=jonas-schievink
Part of https://github.com/rust-analyzer/rust-analyzer/issues/9358, this fixes the first cause of the stack overflow there. Unfortunately we now run into a stack overflow in the parser.
bors r+
Co-authored-by: Jonas Schievink <[email protected]>
-rw-r--r-- | crates/mbe/src/syntax_bridge.rs | 288 |
1 files changed, 164 insertions, 124 deletions
diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs index 7526bd8e6..ae6058cbc 100644 --- a/crates/mbe/src/syntax_bridge.rs +++ b/crates/mbe/src/syntax_bridge.rs | |||
@@ -24,7 +24,7 @@ pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> (tt::Subtree, TokenMap) { | |||
24 | pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) { | 24 | pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) { |
25 | let global_offset = node.text_range().start(); | 25 | let global_offset = node.text_range().start(); |
26 | let mut c = Convertor::new(node, global_offset); | 26 | let mut c = Convertor::new(node, global_offset); |
27 | let subtree = c.go(); | 27 | let subtree = convert_tokens(&mut c); |
28 | c.id_alloc.map.shrink_to_fit(); | 28 | c.id_alloc.map.shrink_to_fit(); |
29 | (subtree, c.id_alloc.map) | 29 | (subtree, c.id_alloc.map) |
30 | } | 30 | } |
@@ -80,7 +80,7 @@ pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> { | |||
80 | }, | 80 | }, |
81 | }; | 81 | }; |
82 | 82 | ||
83 | let subtree = conv.go(); | 83 | let subtree = convert_tokens(&mut conv); |
84 | Some((subtree, conv.id_alloc.map)) | 84 | Some((subtree, conv.id_alloc.map)) |
85 | } | 85 | } |
86 | 86 | ||
@@ -121,6 +121,159 @@ pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> { | |||
121 | res | 121 | res |
122 | } | 122 | } |
123 | 123 | ||
124 | fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree { | ||
125 | struct StackEntry { | ||
126 | subtree: tt::Subtree, | ||
127 | idx: usize, | ||
128 | open_range: TextRange, | ||
129 | } | ||
130 | |||
131 | let entry = StackEntry { | ||
132 | subtree: tt::Subtree { delimiter: None, ..Default::default() }, | ||
133 | // never used (delimiter is `None`) | ||
134 | idx: !0, | ||
135 | open_range: TextRange::empty(TextSize::of('.')), | ||
136 | }; | ||
137 | let mut stack = vec![entry]; | ||
138 | |||
139 | loop { | ||
140 | let entry = stack.last_mut().unwrap(); | ||
141 | let result = &mut entry.subtree.token_trees; | ||
142 | let (token, range) = match conv.bump() { | ||
143 | None => break, | ||
144 | Some(it) => it, | ||
145 | }; | ||
146 | |||
147 | let k: SyntaxKind = token.kind(); | ||
148 | if k == COMMENT { | ||
149 | if let Some(tokens) = conv.convert_doc_comment(&token) { | ||
150 | result.extend(tokens); | ||
151 | } | ||
152 | continue; | ||
153 | } | ||
154 | |||
155 | result.push(if k.is_punct() && k != UNDERSCORE { | ||
156 | assert_eq!(range.len(), TextSize::of('.')); | ||
157 | |||
158 | if let Some(delim) = entry.subtree.delimiter { | ||
159 | let expected = match delim.kind { | ||
160 | tt::DelimiterKind::Parenthesis => T![')'], | ||
161 | tt::DelimiterKind::Brace => T!['}'], | ||
162 | tt::DelimiterKind::Bracket => T![']'], | ||
163 | }; | ||
164 | |||
165 | if k == expected { | ||
166 | let entry = stack.pop().unwrap(); | ||
167 | conv.id_alloc().close_delim(entry.idx, Some(range)); | ||
168 | stack.last_mut().unwrap().subtree.token_trees.push(entry.subtree.into()); | ||
169 | continue; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | let delim = match k { | ||
174 | T!['('] => Some(tt::DelimiterKind::Parenthesis), | ||
175 | T!['{'] => Some(tt::DelimiterKind::Brace), | ||
176 | T!['['] => Some(tt::DelimiterKind::Bracket), | ||
177 | _ => None, | ||
178 | }; | ||
179 | |||
180 | if let Some(kind) = delim { | ||
181 | let mut subtree = tt::Subtree::default(); | ||
182 | let (id, idx) = conv.id_alloc().open_delim(range); | ||
183 | subtree.delimiter = Some(tt::Delimiter { id, kind }); | ||
184 | stack.push(StackEntry { subtree, idx, open_range: range }); | ||
185 | continue; | ||
186 | } else { | ||
187 | let spacing = match conv.peek() { | ||
188 | Some(next) | ||
189 | if next.kind().is_trivia() | ||
190 | || next.kind() == T!['['] | ||
191 | || next.kind() == T!['{'] | ||
192 | || next.kind() == T!['('] => | ||
193 | { | ||
194 | tt::Spacing::Alone | ||
195 | } | ||
196 | Some(next) if next.kind().is_punct() && next.kind() != UNDERSCORE => { | ||
197 | tt::Spacing::Joint | ||
198 | } | ||
199 | _ => tt::Spacing::Alone, | ||
200 | }; | ||
201 | let char = match token.to_char() { | ||
202 | Some(c) => c, | ||
203 | None => { | ||
204 | panic!("Token from lexer must be single char: token = {:#?}", token); | ||
205 | } | ||
206 | }; | ||
207 | tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range) }).into() | ||
208 | } | ||
209 | } else { | ||
210 | macro_rules! make_leaf { | ||
211 | ($i:ident) => { | ||
212 | tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text() }.into() | ||
213 | }; | ||
214 | } | ||
215 | let leaf: tt::Leaf = match k { | ||
216 | T![true] | T![false] => make_leaf!(Ident), | ||
217 | IDENT => make_leaf!(Ident), | ||
218 | UNDERSCORE => make_leaf!(Ident), | ||
219 | k if k.is_keyword() => make_leaf!(Ident), | ||
220 | k if k.is_literal() => make_leaf!(Literal), | ||
221 | LIFETIME_IDENT => { | ||
222 | let char_unit = TextSize::of('\''); | ||
223 | let r = TextRange::at(range.start(), char_unit); | ||
224 | let apostrophe = tt::Leaf::from(tt::Punct { | ||
225 | char: '\'', | ||
226 | spacing: tt::Spacing::Joint, | ||
227 | id: conv.id_alloc().alloc(r), | ||
228 | }); | ||
229 | result.push(apostrophe.into()); | ||
230 | |||
231 | let r = TextRange::at(range.start() + char_unit, range.len() - char_unit); | ||
232 | let ident = tt::Leaf::from(tt::Ident { | ||
233 | text: SmolStr::new(&token.to_text()[1..]), | ||
234 | id: conv.id_alloc().alloc(r), | ||
235 | }); | ||
236 | result.push(ident.into()); | ||
237 | continue; | ||
238 | } | ||
239 | _ => continue, | ||
240 | }; | ||
241 | |||
242 | leaf.into() | ||
243 | }); | ||
244 | } | ||
245 | |||
246 | // If we get here, we've consumed all input tokens. | ||
247 | // We might have more than one subtree in the stack, if the delimiters are improperly balanced. | ||
248 | // Merge them so we're left with one. | ||
249 | while stack.len() > 1 { | ||
250 | let entry = stack.pop().unwrap(); | ||
251 | let parent = stack.last_mut().unwrap(); | ||
252 | |||
253 | conv.id_alloc().close_delim(entry.idx, None); | ||
254 | let leaf: tt::Leaf = tt::Punct { | ||
255 | id: conv.id_alloc().alloc(entry.open_range), | ||
256 | char: match entry.subtree.delimiter.unwrap().kind { | ||
257 | tt::DelimiterKind::Parenthesis => '(', | ||
258 | tt::DelimiterKind::Brace => '{', | ||
259 | tt::DelimiterKind::Bracket => '[', | ||
260 | }, | ||
261 | spacing: tt::Spacing::Alone, | ||
262 | } | ||
263 | .into(); | ||
264 | parent.subtree.token_trees.push(leaf.into()); | ||
265 | parent.subtree.token_trees.extend(entry.subtree.token_trees); | ||
266 | } | ||
267 | |||
268 | let subtree = stack.pop().unwrap().subtree; | ||
269 | if subtree.token_trees.len() == 1 { | ||
270 | if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] { | ||
271 | return first.clone(); | ||
272 | } | ||
273 | } | ||
274 | subtree | ||
275 | } | ||
276 | |||
124 | /// Returns the textual content of a doc comment block as a quoted string | 277 | /// Returns the textual content of a doc comment block as a quoted string |
125 | /// That is, strips leading `///` (or `/**`, etc) | 278 | /// That is, strips leading `///` (or `/**`, etc) |
126 | /// and strips the ending `*/` | 279 | /// and strips the ending `*/` |
@@ -242,128 +395,6 @@ trait SrcToken: std::fmt::Debug { | |||
242 | trait TokenConvertor { | 395 | trait TokenConvertor { |
243 | type Token: SrcToken; | 396 | type Token: SrcToken; |
244 | 397 | ||
245 | fn go(&mut self) -> tt::Subtree { | ||
246 | let mut subtree = tt::Subtree { delimiter: None, ..Default::default() }; | ||
247 | while self.peek().is_some() { | ||
248 | self.collect_leaf(&mut subtree.token_trees); | ||
249 | } | ||
250 | if subtree.token_trees.len() == 1 { | ||
251 | if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] { | ||
252 | return first.clone(); | ||
253 | } | ||
254 | } | ||
255 | subtree | ||
256 | } | ||
257 | |||
258 | fn collect_leaf(&mut self, result: &mut Vec<tt::TokenTree>) { | ||
259 | let (token, range) = match self.bump() { | ||
260 | None => return, | ||
261 | Some(it) => it, | ||
262 | }; | ||
263 | |||
264 | let k: SyntaxKind = token.kind(); | ||
265 | if k == COMMENT { | ||
266 | if let Some(tokens) = self.convert_doc_comment(&token) { | ||
267 | result.extend(tokens); | ||
268 | } | ||
269 | return; | ||
270 | } | ||
271 | |||
272 | result.push(if k.is_punct() && k != UNDERSCORE { | ||
273 | assert_eq!(range.len(), TextSize::of('.')); | ||
274 | let delim = match k { | ||
275 | T!['('] => Some((tt::DelimiterKind::Parenthesis, T![')'])), | ||
276 | T!['{'] => Some((tt::DelimiterKind::Brace, T!['}'])), | ||
277 | T!['['] => Some((tt::DelimiterKind::Bracket, T![']'])), | ||
278 | _ => None, | ||
279 | }; | ||
280 | |||
281 | if let Some((kind, closed)) = delim { | ||
282 | let mut subtree = tt::Subtree::default(); | ||
283 | let (id, idx) = self.id_alloc().open_delim(range); | ||
284 | subtree.delimiter = Some(tt::Delimiter { id, kind }); | ||
285 | |||
286 | while self.peek().map_or(false, |it| it.kind() != closed) { | ||
287 | self.collect_leaf(&mut subtree.token_trees); | ||
288 | } | ||
289 | let last_range = match self.bump() { | ||
290 | None => { | ||
291 | // For error resilience, we insert an char punct for the opening delim here | ||
292 | self.id_alloc().close_delim(idx, None); | ||
293 | let leaf: tt::Leaf = tt::Punct { | ||
294 | id: self.id_alloc().alloc(range), | ||
295 | char: token.to_char().unwrap(), | ||
296 | spacing: tt::Spacing::Alone, | ||
297 | } | ||
298 | .into(); | ||
299 | result.push(leaf.into()); | ||
300 | result.extend(subtree.token_trees); | ||
301 | return; | ||
302 | } | ||
303 | Some(it) => it.1, | ||
304 | }; | ||
305 | self.id_alloc().close_delim(idx, Some(last_range)); | ||
306 | subtree.into() | ||
307 | } else { | ||
308 | let spacing = match self.peek() { | ||
309 | Some(next) | ||
310 | if next.kind().is_trivia() | ||
311 | || next.kind() == T!['['] | ||
312 | || next.kind() == T!['{'] | ||
313 | || next.kind() == T!['('] => | ||
314 | { | ||
315 | tt::Spacing::Alone | ||
316 | } | ||
317 | Some(next) if next.kind().is_punct() && next.kind() != UNDERSCORE => { | ||
318 | tt::Spacing::Joint | ||
319 | } | ||
320 | _ => tt::Spacing::Alone, | ||
321 | }; | ||
322 | let char = match token.to_char() { | ||
323 | Some(c) => c, | ||
324 | None => { | ||
325 | panic!("Token from lexer must be single char: token = {:#?}", token); | ||
326 | } | ||
327 | }; | ||
328 | tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc().alloc(range) }).into() | ||
329 | } | ||
330 | } else { | ||
331 | macro_rules! make_leaf { | ||
332 | ($i:ident) => { | ||
333 | tt::$i { id: self.id_alloc().alloc(range), text: token.to_text() }.into() | ||
334 | }; | ||
335 | } | ||
336 | let leaf: tt::Leaf = match k { | ||
337 | T![true] | T![false] => make_leaf!(Ident), | ||
338 | IDENT => make_leaf!(Ident), | ||
339 | UNDERSCORE => make_leaf!(Ident), | ||
340 | k if k.is_keyword() => make_leaf!(Ident), | ||
341 | k if k.is_literal() => make_leaf!(Literal), | ||
342 | LIFETIME_IDENT => { | ||
343 | let char_unit = TextSize::of('\''); | ||
344 | let r = TextRange::at(range.start(), char_unit); | ||
345 | let apostrophe = tt::Leaf::from(tt::Punct { | ||
346 | char: '\'', | ||
347 | spacing: tt::Spacing::Joint, | ||
348 | id: self.id_alloc().alloc(r), | ||
349 | }); | ||
350 | result.push(apostrophe.into()); | ||
351 | |||
352 | let r = TextRange::at(range.start() + char_unit, range.len() - char_unit); | ||
353 | let ident = tt::Leaf::from(tt::Ident { | ||
354 | text: SmolStr::new(&token.to_text()[1..]), | ||
355 | id: self.id_alloc().alloc(r), | ||
356 | }); | ||
357 | result.push(ident.into()); | ||
358 | return; | ||
359 | } | ||
360 | _ => return, | ||
361 | }; | ||
362 | |||
363 | leaf.into() | ||
364 | }); | ||
365 | } | ||
366 | |||
367 | fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>; | 398 | fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>; |
368 | 399 | ||
369 | fn bump(&mut self) -> Option<(Self::Token, TextRange)>; | 400 | fn bump(&mut self) -> Option<(Self::Token, TextRange)>; |
@@ -683,6 +714,7 @@ mod tests { | |||
683 | algo::{insert_children, InsertPosition}, | 714 | algo::{insert_children, InsertPosition}, |
684 | ast::AstNode, | 715 | ast::AstNode, |
685 | }; | 716 | }; |
717 | use test_utils::assert_eq_text; | ||
686 | 718 | ||
687 | #[test] | 719 | #[test] |
688 | fn convert_tt_token_source() { | 720 | fn convert_tt_token_source() { |
@@ -792,4 +824,12 @@ mod tests { | |||
792 | let tt = ast_to_token_tree(&struct_def).0; | 824 | let tt = ast_to_token_tree(&struct_def).0; |
793 | token_tree_to_syntax_node(&tt, FragmentKind::Item).unwrap(); | 825 | token_tree_to_syntax_node(&tt, FragmentKind::Item).unwrap(); |
794 | } | 826 | } |
827 | |||
828 | #[test] | ||
829 | fn test_missing_closing_delim() { | ||
830 | let source_file = ast::SourceFile::parse("m!(x").tree(); | ||
831 | let node = source_file.syntax().descendants().find_map(ast::TokenTree::cast).unwrap(); | ||
832 | let tt = ast_to_token_tree(&node).0.to_string(); | ||
833 | assert_eq_text!(&*tt, "( x"); | ||
834 | } | ||
795 | } | 835 | } |