aboutsummaryrefslogtreecommitdiff
path: root/crates/mbe
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2021-06-22 23:28:43 +0100
committerGitHub <[email protected]>2021-06-22 23:28:43 +0100
commit3762cb4535dce9eaf7c3dbd4aa9c33bf6dd30c87 (patch)
treed842fcbf296548edee40553157fef124ce9aab74 /crates/mbe
parent38da41ea6e58255223686104b3fbca27392d8162 (diff)
parentc6669776e1174170c157b685a66026d1a31ede85 (diff)
Merge #9383
9383: internal: Rewrite token tree lowering to use an explicit stack r=jonas-schievink a=jonas-schievink Part of https://github.com/rust-analyzer/rust-analyzer/issues/9358, this fixes the first cause of the stack overflow there. Unfortunately we now run into a stack overflow in the parser. bors r+ Co-authored-by: Jonas Schievink <[email protected]>
Diffstat (limited to 'crates/mbe')
-rw-r--r--crates/mbe/src/syntax_bridge.rs288
1 files changed, 164 insertions, 124 deletions
diff --git a/crates/mbe/src/syntax_bridge.rs b/crates/mbe/src/syntax_bridge.rs
index 7526bd8e6..ae6058cbc 100644
--- a/crates/mbe/src/syntax_bridge.rs
+++ b/crates/mbe/src/syntax_bridge.rs
@@ -24,7 +24,7 @@ pub fn ast_to_token_tree(ast: &impl ast::AstNode) -> (tt::Subtree, TokenMap) {
24pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) { 24pub fn syntax_node_to_token_tree(node: &SyntaxNode) -> (tt::Subtree, TokenMap) {
25 let global_offset = node.text_range().start(); 25 let global_offset = node.text_range().start();
26 let mut c = Convertor::new(node, global_offset); 26 let mut c = Convertor::new(node, global_offset);
27 let subtree = c.go(); 27 let subtree = convert_tokens(&mut c);
28 c.id_alloc.map.shrink_to_fit(); 28 c.id_alloc.map.shrink_to_fit();
29 (subtree, c.id_alloc.map) 29 (subtree, c.id_alloc.map)
30} 30}
@@ -80,7 +80,7 @@ pub fn parse_to_token_tree(text: &str) -> Option<(tt::Subtree, TokenMap)> {
80 }, 80 },
81 }; 81 };
82 82
83 let subtree = conv.go(); 83 let subtree = convert_tokens(&mut conv);
84 Some((subtree, conv.id_alloc.map)) 84 Some((subtree, conv.id_alloc.map))
85} 85}
86 86
@@ -121,6 +121,159 @@ pub fn parse_exprs_with_sep(tt: &tt::Subtree, sep: char) -> Vec<tt::Subtree> {
121 res 121 res
122} 122}
123 123
124fn convert_tokens<C: TokenConvertor>(conv: &mut C) -> tt::Subtree {
125 struct StackEntry {
126 subtree: tt::Subtree,
127 idx: usize,
128 open_range: TextRange,
129 }
130
131 let entry = StackEntry {
132 subtree: tt::Subtree { delimiter: None, ..Default::default() },
133 // never used (delimiter is `None`)
134 idx: !0,
135 open_range: TextRange::empty(TextSize::of('.')),
136 };
137 let mut stack = vec![entry];
138
139 loop {
140 let entry = stack.last_mut().unwrap();
141 let result = &mut entry.subtree.token_trees;
142 let (token, range) = match conv.bump() {
143 None => break,
144 Some(it) => it,
145 };
146
147 let k: SyntaxKind = token.kind();
148 if k == COMMENT {
149 if let Some(tokens) = conv.convert_doc_comment(&token) {
150 result.extend(tokens);
151 }
152 continue;
153 }
154
155 result.push(if k.is_punct() && k != UNDERSCORE {
156 assert_eq!(range.len(), TextSize::of('.'));
157
158 if let Some(delim) = entry.subtree.delimiter {
159 let expected = match delim.kind {
160 tt::DelimiterKind::Parenthesis => T![')'],
161 tt::DelimiterKind::Brace => T!['}'],
162 tt::DelimiterKind::Bracket => T![']'],
163 };
164
165 if k == expected {
166 let entry = stack.pop().unwrap();
167 conv.id_alloc().close_delim(entry.idx, Some(range));
168 stack.last_mut().unwrap().subtree.token_trees.push(entry.subtree.into());
169 continue;
170 }
171 }
172
173 let delim = match k {
174 T!['('] => Some(tt::DelimiterKind::Parenthesis),
175 T!['{'] => Some(tt::DelimiterKind::Brace),
176 T!['['] => Some(tt::DelimiterKind::Bracket),
177 _ => None,
178 };
179
180 if let Some(kind) = delim {
181 let mut subtree = tt::Subtree::default();
182 let (id, idx) = conv.id_alloc().open_delim(range);
183 subtree.delimiter = Some(tt::Delimiter { id, kind });
184 stack.push(StackEntry { subtree, idx, open_range: range });
185 continue;
186 } else {
187 let spacing = match conv.peek() {
188 Some(next)
189 if next.kind().is_trivia()
190 || next.kind() == T!['[']
191 || next.kind() == T!['{']
192 || next.kind() == T!['('] =>
193 {
194 tt::Spacing::Alone
195 }
196 Some(next) if next.kind().is_punct() && next.kind() != UNDERSCORE => {
197 tt::Spacing::Joint
198 }
199 _ => tt::Spacing::Alone,
200 };
201 let char = match token.to_char() {
202 Some(c) => c,
203 None => {
204 panic!("Token from lexer must be single char: token = {:#?}", token);
205 }
206 };
207 tt::Leaf::from(tt::Punct { char, spacing, id: conv.id_alloc().alloc(range) }).into()
208 }
209 } else {
210 macro_rules! make_leaf {
211 ($i:ident) => {
212 tt::$i { id: conv.id_alloc().alloc(range), text: token.to_text() }.into()
213 };
214 }
215 let leaf: tt::Leaf = match k {
216 T![true] | T![false] => make_leaf!(Ident),
217 IDENT => make_leaf!(Ident),
218 UNDERSCORE => make_leaf!(Ident),
219 k if k.is_keyword() => make_leaf!(Ident),
220 k if k.is_literal() => make_leaf!(Literal),
221 LIFETIME_IDENT => {
222 let char_unit = TextSize::of('\'');
223 let r = TextRange::at(range.start(), char_unit);
224 let apostrophe = tt::Leaf::from(tt::Punct {
225 char: '\'',
226 spacing: tt::Spacing::Joint,
227 id: conv.id_alloc().alloc(r),
228 });
229 result.push(apostrophe.into());
230
231 let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
232 let ident = tt::Leaf::from(tt::Ident {
233 text: SmolStr::new(&token.to_text()[1..]),
234 id: conv.id_alloc().alloc(r),
235 });
236 result.push(ident.into());
237 continue;
238 }
239 _ => continue,
240 };
241
242 leaf.into()
243 });
244 }
245
246 // If we get here, we've consumed all input tokens.
247 // We might have more than one subtree in the stack, if the delimiters are improperly balanced.
248 // Merge them so we're left with one.
249 while stack.len() > 1 {
250 let entry = stack.pop().unwrap();
251 let parent = stack.last_mut().unwrap();
252
253 conv.id_alloc().close_delim(entry.idx, None);
254 let leaf: tt::Leaf = tt::Punct {
255 id: conv.id_alloc().alloc(entry.open_range),
256 char: match entry.subtree.delimiter.unwrap().kind {
257 tt::DelimiterKind::Parenthesis => '(',
258 tt::DelimiterKind::Brace => '{',
259 tt::DelimiterKind::Bracket => '[',
260 },
261 spacing: tt::Spacing::Alone,
262 }
263 .into();
264 parent.subtree.token_trees.push(leaf.into());
265 parent.subtree.token_trees.extend(entry.subtree.token_trees);
266 }
267
268 let subtree = stack.pop().unwrap().subtree;
269 if subtree.token_trees.len() == 1 {
270 if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] {
271 return first.clone();
272 }
273 }
274 subtree
275}
276
124/// Returns the textual content of a doc comment block as a quoted string 277/// Returns the textual content of a doc comment block as a quoted string
125/// That is, strips leading `///` (or `/**`, etc) 278/// That is, strips leading `///` (or `/**`, etc)
126/// and strips the ending `*/` 279/// and strips the ending `*/`
@@ -242,128 +395,6 @@ trait SrcToken: std::fmt::Debug {
242trait TokenConvertor { 395trait TokenConvertor {
243 type Token: SrcToken; 396 type Token: SrcToken;
244 397
245 fn go(&mut self) -> tt::Subtree {
246 let mut subtree = tt::Subtree { delimiter: None, ..Default::default() };
247 while self.peek().is_some() {
248 self.collect_leaf(&mut subtree.token_trees);
249 }
250 if subtree.token_trees.len() == 1 {
251 if let tt::TokenTree::Subtree(first) = &subtree.token_trees[0] {
252 return first.clone();
253 }
254 }
255 subtree
256 }
257
258 fn collect_leaf(&mut self, result: &mut Vec<tt::TokenTree>) {
259 let (token, range) = match self.bump() {
260 None => return,
261 Some(it) => it,
262 };
263
264 let k: SyntaxKind = token.kind();
265 if k == COMMENT {
266 if let Some(tokens) = self.convert_doc_comment(&token) {
267 result.extend(tokens);
268 }
269 return;
270 }
271
272 result.push(if k.is_punct() && k != UNDERSCORE {
273 assert_eq!(range.len(), TextSize::of('.'));
274 let delim = match k {
275 T!['('] => Some((tt::DelimiterKind::Parenthesis, T![')'])),
276 T!['{'] => Some((tt::DelimiterKind::Brace, T!['}'])),
277 T!['['] => Some((tt::DelimiterKind::Bracket, T![']'])),
278 _ => None,
279 };
280
281 if let Some((kind, closed)) = delim {
282 let mut subtree = tt::Subtree::default();
283 let (id, idx) = self.id_alloc().open_delim(range);
284 subtree.delimiter = Some(tt::Delimiter { id, kind });
285
286 while self.peek().map_or(false, |it| it.kind() != closed) {
287 self.collect_leaf(&mut subtree.token_trees);
288 }
289 let last_range = match self.bump() {
290 None => {
291 // For error resilience, we insert an char punct for the opening delim here
292 self.id_alloc().close_delim(idx, None);
293 let leaf: tt::Leaf = tt::Punct {
294 id: self.id_alloc().alloc(range),
295 char: token.to_char().unwrap(),
296 spacing: tt::Spacing::Alone,
297 }
298 .into();
299 result.push(leaf.into());
300 result.extend(subtree.token_trees);
301 return;
302 }
303 Some(it) => it.1,
304 };
305 self.id_alloc().close_delim(idx, Some(last_range));
306 subtree.into()
307 } else {
308 let spacing = match self.peek() {
309 Some(next)
310 if next.kind().is_trivia()
311 || next.kind() == T!['[']
312 || next.kind() == T!['{']
313 || next.kind() == T!['('] =>
314 {
315 tt::Spacing::Alone
316 }
317 Some(next) if next.kind().is_punct() && next.kind() != UNDERSCORE => {
318 tt::Spacing::Joint
319 }
320 _ => tt::Spacing::Alone,
321 };
322 let char = match token.to_char() {
323 Some(c) => c,
324 None => {
325 panic!("Token from lexer must be single char: token = {:#?}", token);
326 }
327 };
328 tt::Leaf::from(tt::Punct { char, spacing, id: self.id_alloc().alloc(range) }).into()
329 }
330 } else {
331 macro_rules! make_leaf {
332 ($i:ident) => {
333 tt::$i { id: self.id_alloc().alloc(range), text: token.to_text() }.into()
334 };
335 }
336 let leaf: tt::Leaf = match k {
337 T![true] | T![false] => make_leaf!(Ident),
338 IDENT => make_leaf!(Ident),
339 UNDERSCORE => make_leaf!(Ident),
340 k if k.is_keyword() => make_leaf!(Ident),
341 k if k.is_literal() => make_leaf!(Literal),
342 LIFETIME_IDENT => {
343 let char_unit = TextSize::of('\'');
344 let r = TextRange::at(range.start(), char_unit);
345 let apostrophe = tt::Leaf::from(tt::Punct {
346 char: '\'',
347 spacing: tt::Spacing::Joint,
348 id: self.id_alloc().alloc(r),
349 });
350 result.push(apostrophe.into());
351
352 let r = TextRange::at(range.start() + char_unit, range.len() - char_unit);
353 let ident = tt::Leaf::from(tt::Ident {
354 text: SmolStr::new(&token.to_text()[1..]),
355 id: self.id_alloc().alloc(r),
356 });
357 result.push(ident.into());
358 return;
359 }
360 _ => return,
361 };
362
363 leaf.into()
364 });
365 }
366
367 fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>; 398 fn convert_doc_comment(&self, token: &Self::Token) -> Option<Vec<tt::TokenTree>>;
368 399
369 fn bump(&mut self) -> Option<(Self::Token, TextRange)>; 400 fn bump(&mut self) -> Option<(Self::Token, TextRange)>;
@@ -683,6 +714,7 @@ mod tests {
683 algo::{insert_children, InsertPosition}, 714 algo::{insert_children, InsertPosition},
684 ast::AstNode, 715 ast::AstNode,
685 }; 716 };
717 use test_utils::assert_eq_text;
686 718
687 #[test] 719 #[test]
688 fn convert_tt_token_source() { 720 fn convert_tt_token_source() {
@@ -792,4 +824,12 @@ mod tests {
792 let tt = ast_to_token_tree(&struct_def).0; 824 let tt = ast_to_token_tree(&struct_def).0;
793 token_tree_to_syntax_node(&tt, FragmentKind::Item).unwrap(); 825 token_tree_to_syntax_node(&tt, FragmentKind::Item).unwrap();
794 } 826 }
827
828 #[test]
829 fn test_missing_closing_delim() {
830 let source_file = ast::SourceFile::parse("m!(x").tree();
831 let node = source_file.syntax().descendants().find_map(ast::TokenTree::cast).unwrap();
832 let tt = ast_to_token_tree(&node).0.to_string();
833 assert_eq_text!(&*tt, "( x");
834 }
795} 835}