diff options
Diffstat (limited to 'crates/mbe/src/expander')
-rw-r--r-- | crates/mbe/src/expander/matcher.rs | 737 | ||||
-rw-r--r-- | crates/mbe/src/expander/transcriber.rs | 247 |
2 files changed, 984 insertions, 0 deletions
diff --git a/crates/mbe/src/expander/matcher.rs b/crates/mbe/src/expander/matcher.rs new file mode 100644 index 000000000..9d3d28055 --- /dev/null +++ b/crates/mbe/src/expander/matcher.rs | |||
@@ -0,0 +1,737 @@ | |||
1 | //! An NFA-based parser, which is porting from rustc mbe parsing code | ||
2 | //! | ||
3 | //! See https://github.com/rust-lang/rust/blob/70b18bc2cbac4712020019f5bf57c00905373205/compiler/rustc_expand/src/mbe/macro_parser.rs | ||
4 | //! Here is a quick intro to how the parser works, copied from rustc: | ||
5 | //! | ||
6 | //! A 'position' is a dot in the middle of a matcher, usually represented as a | ||
7 | //! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. | ||
8 | //! | ||
9 | //! The parser walks through the input a character at a time, maintaining a list | ||
10 | //! of threads consistent with the current position in the input string: `cur_items`. | ||
11 | //! | ||
12 | //! As it processes them, it fills up `eof_items` with threads that would be valid if | ||
13 | //! the macro invocation is now over, `bb_items` with threads that are waiting on | ||
14 | //! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting | ||
15 | //! on a particular token. Most of the logic concerns moving the · through the | ||
16 | //! repetitions indicated by Kleene stars. The rules for moving the · without | ||
17 | //! consuming any input are called epsilon transitions. It only advances or calls | ||
18 | //! out to the real Rust parser when no `cur_items` threads remain. | ||
19 | //! | ||
20 | //! Example: | ||
21 | //! | ||
22 | //! ```text, ignore | ||
23 | //! Start parsing a a a a b against [· a $( a )* a b]. | ||
24 | //! | ||
25 | //! Remaining input: a a a a b | ||
26 | //! next: [· a $( a )* a b] | ||
27 | //! | ||
28 | //! - - - Advance over an a. - - - | ||
29 | //! | ||
30 | //! Remaining input: a a a b | ||
31 | //! cur: [a · $( a )* a b] | ||
32 | //! Descend/Skip (first item). | ||
33 | //! next: [a $( · a )* a b] [a $( a )* · a b]. | ||
34 | //! | ||
35 | //! - - - Advance over an a. - - - | ||
36 | //! | ||
37 | //! Remaining input: a a b | ||
38 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | ||
39 | //! Follow epsilon transition: Finish/Repeat (first item) | ||
40 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | ||
41 | //! | ||
42 | //! - - - Advance over an a. - - - (this looks exactly like the last step) | ||
43 | //! | ||
44 | //! Remaining input: a b | ||
45 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | ||
46 | //! Follow epsilon transition: Finish/Repeat (first item) | ||
47 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | ||
48 | //! | ||
49 | //! - - - Advance over an a. - - - (this looks exactly like the last step) | ||
50 | //! | ||
51 | //! Remaining input: b | ||
52 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | ||
53 | //! Follow epsilon transition: Finish/Repeat (first item) | ||
54 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | ||
55 | //! | ||
56 | //! - - - Advance over a b. - - - | ||
57 | //! | ||
58 | //! Remaining input: '' | ||
59 | //! eof: [a $( a )* a b ·] | ||
60 | //! ``` | ||
61 | |||
62 | use crate::{ | ||
63 | expander::{Binding, Bindings, Fragment}, | ||
64 | parser::{Op, OpDelimited, OpDelimitedIter, RepeatKind, Separator}, | ||
65 | tt_iter::TtIter, | ||
66 | ExpandError, MetaTemplate, | ||
67 | }; | ||
68 | |||
69 | use super::ExpandResult; | ||
70 | use parser::FragmentKind::*; | ||
71 | use smallvec::{smallvec, SmallVec}; | ||
72 | use syntax::SmolStr; | ||
73 | |||
74 | impl Bindings { | ||
75 | fn push_optional(&mut self, name: &SmolStr) { | ||
76 | // FIXME: Do we have a better way to represent an empty token ? | ||
77 | // Insert an empty subtree for empty token | ||
78 | let tt = tt::Subtree::default().into(); | ||
79 | self.inner.push((name.clone(), Binding::Fragment(Fragment::Tokens(tt)))); | ||
80 | } | ||
81 | |||
82 | fn push_empty(&mut self, name: &SmolStr) { | ||
83 | self.inner.push((name.clone(), Binding::Empty)); | ||
84 | } | ||
85 | |||
86 | fn push_nested(&mut self, idx: usize, nested: Bindings) -> Result<(), ExpandError> { | ||
87 | for (key, value) in nested.inner { | ||
88 | if self.get_mut(&key).is_none() { | ||
89 | self.inner.push((key.clone(), Binding::Nested(Vec::new()))); | ||
90 | } | ||
91 | match self.get_mut(&key) { | ||
92 | Some(Binding::Nested(it)) => { | ||
93 | // insert empty nested bindings before this one | ||
94 | while it.len() < idx { | ||
95 | it.push(Binding::Nested(vec![])); | ||
96 | } | ||
97 | it.push(value); | ||
98 | } | ||
99 | _ => { | ||
100 | return Err(ExpandError::BindingError(format!( | ||
101 | "could not find binding `{}`", | ||
102 | key | ||
103 | ))); | ||
104 | } | ||
105 | } | ||
106 | } | ||
107 | Ok(()) | ||
108 | } | ||
109 | |||
110 | fn get_mut(&mut self, name: &str) -> Option<&mut Binding> { | ||
111 | self.inner.iter_mut().find_map(|(n, b)| if n == name { Some(b) } else { None }) | ||
112 | } | ||
113 | |||
114 | fn bindings(&self) -> impl Iterator<Item = &Binding> { | ||
115 | self.inner.iter().map(|(_, b)| b) | ||
116 | } | ||
117 | } | ||
118 | |||
119 | macro_rules! err { | ||
120 | () => { | ||
121 | ExpandError::BindingError(format!("")) | ||
122 | }; | ||
123 | ($($tt:tt)*) => { | ||
124 | ExpandError::BindingError(format!($($tt)*)) | ||
125 | }; | ||
126 | } | ||
127 | |||
128 | #[derive(Clone, Debug, Default, PartialEq, Eq)] | ||
129 | pub(super) struct Match { | ||
130 | pub(super) bindings: Bindings, | ||
131 | /// We currently just keep the first error and count the rest to compare matches. | ||
132 | pub(super) err: Option<ExpandError>, | ||
133 | pub(super) err_count: usize, | ||
134 | /// How many top-level token trees were left to match. | ||
135 | pub(super) unmatched_tts: usize, | ||
136 | /// The number of bound variables | ||
137 | pub(super) bound_count: usize, | ||
138 | } | ||
139 | |||
140 | impl Match { | ||
141 | fn add_err(&mut self, err: ExpandError) { | ||
142 | let prev_err = self.err.take(); | ||
143 | self.err = prev_err.or(Some(err)); | ||
144 | self.err_count += 1; | ||
145 | } | ||
146 | } | ||
147 | |||
148 | /// Matching errors are added to the `Match`. | ||
149 | pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree) -> Match { | ||
150 | let mut res = match_loop(pattern, &input); | ||
151 | res.bound_count = count(res.bindings.bindings()); | ||
152 | return res; | ||
153 | |||
154 | fn count<'a>(bindings: impl Iterator<Item = &'a Binding>) -> usize { | ||
155 | bindings | ||
156 | .map(|it| match it { | ||
157 | Binding::Fragment(_) => 1, | ||
158 | Binding::Empty => 1, | ||
159 | Binding::Nested(it) => count(it.iter()), | ||
160 | }) | ||
161 | .sum() | ||
162 | } | ||
163 | } | ||
164 | |||
165 | #[derive(Debug, Clone)] | ||
166 | struct MatchState<'t> { | ||
167 | /// The position of the "dot" in this matcher | ||
168 | dot: OpDelimitedIter<'t>, | ||
169 | |||
170 | /// Token subtree stack | ||
171 | /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. ) | ||
172 | /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does | ||
173 | /// that where the bottom of the stack is the outermost matcher. | ||
174 | stack: SmallVec<[OpDelimitedIter<'t>; 4]>, | ||
175 | |||
176 | /// The "parent" matcher position if we are in a repetition. That is, the matcher position just | ||
177 | /// before we enter the repetition. | ||
178 | up: Option<Box<MatchState<'t>>>, | ||
179 | |||
180 | /// The separator if we are in a repetition. | ||
181 | sep: Option<Separator>, | ||
182 | |||
183 | /// The KleeneOp of this sequence if we are in a repetition. | ||
184 | sep_kind: Option<RepeatKind>, | ||
185 | |||
186 | /// Number of tokens of seperator parsed | ||
187 | sep_parsed: Option<usize>, | ||
188 | |||
189 | /// Matched meta variables bindings | ||
190 | bindings: SmallVec<[Bindings; 4]>, | ||
191 | |||
192 | /// Cached result of meta variable parsing | ||
193 | meta_result: Option<(TtIter<'t>, ExpandResult<Option<Fragment>>)>, | ||
194 | |||
195 | /// Is error occuried in this state, will `poised` to "parent" | ||
196 | is_error: bool, | ||
197 | } | ||
198 | |||
199 | /// Process the matcher positions of `cur_items` until it is empty. In the process, this will | ||
200 | /// produce more items in `next_items`, `eof_items`, and `bb_items`. | ||
201 | /// | ||
202 | /// For more info about the how this happens, see the module-level doc comments and the inline | ||
203 | /// comments of this function. | ||
204 | /// | ||
205 | /// # Parameters | ||
206 | /// | ||
207 | /// - `src`: the current token of the parser. | ||
208 | /// - `stack`: the "parent" frames of the token tree | ||
209 | /// - `res`: the match result to store errors | ||
210 | /// - `cur_items`: the set of current items to be processed. This should be empty by the end of a | ||
211 | /// successful execution of this function. | ||
212 | /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in | ||
213 | /// the function `parse`. | ||
214 | /// - `eof_items`: the set of items that would be valid if this was the EOF. | ||
215 | /// - `bb_items`: the set of items that are waiting for the black-box parser. | ||
216 | /// - `error_items`: the set of items in errors, used for error-resilient parsing | ||
217 | fn match_loop_inner<'t>( | ||
218 | src: TtIter<'t>, | ||
219 | stack: &[TtIter<'t>], | ||
220 | res: &mut Match, | ||
221 | cur_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
222 | bb_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
223 | next_items: &mut Vec<MatchState<'t>>, | ||
224 | eof_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
225 | error_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
226 | ) { | ||
227 | macro_rules! try_push { | ||
228 | ($items: expr, $it:expr) => { | ||
229 | if $it.is_error { | ||
230 | error_items.push($it); | ||
231 | } else { | ||
232 | $items.push($it); | ||
233 | } | ||
234 | }; | ||
235 | } | ||
236 | |||
237 | while let Some(mut item) = cur_items.pop() { | ||
238 | while item.dot.is_eof() { | ||
239 | match item.stack.pop() { | ||
240 | Some(frame) => { | ||
241 | item.dot = frame; | ||
242 | item.dot.next(); | ||
243 | } | ||
244 | None => break, | ||
245 | } | ||
246 | } | ||
247 | let op = match item.dot.peek() { | ||
248 | None => { | ||
249 | // We are at or past the end of the matcher of `item`. | ||
250 | if item.up.is_some() { | ||
251 | if item.sep_parsed.is_none() { | ||
252 | // Get the `up` matcher | ||
253 | let mut new_pos = *item.up.clone().unwrap(); | ||
254 | // Add matches from this repetition to the `matches` of `up` | ||
255 | if let Some(bindings) = new_pos.bindings.last_mut() { | ||
256 | for (i, b) in item.bindings.iter_mut().enumerate() { | ||
257 | bindings.push_nested(i, b.clone()).unwrap(); | ||
258 | } | ||
259 | } | ||
260 | // Move the "dot" past the repetition in `up` | ||
261 | new_pos.dot.next(); | ||
262 | new_pos.is_error = new_pos.is_error || item.is_error; | ||
263 | cur_items.push(new_pos); | ||
264 | } | ||
265 | |||
266 | // Check if we need a separator. | ||
267 | // We check the separator one by one | ||
268 | let sep_idx = *item.sep_parsed.as_ref().unwrap_or(&0); | ||
269 | let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count); | ||
270 | if item.sep.is_some() && sep_idx != sep_len { | ||
271 | let sep = item.sep.as_ref().unwrap(); | ||
272 | if src.clone().expect_separator(&sep, sep_idx) { | ||
273 | item.dot.next(); | ||
274 | item.sep_parsed = Some(sep_idx + 1); | ||
275 | try_push!(next_items, item); | ||
276 | } | ||
277 | } | ||
278 | // We don't need a separator. Move the "dot" back to the beginning of the matcher | ||
279 | // and try to match again UNLESS we are only allowed to have _one_ repetition. | ||
280 | else if item.sep_kind != Some(RepeatKind::ZeroOrOne) { | ||
281 | item.dot = item.dot.reset(); | ||
282 | item.sep_parsed = None; | ||
283 | item.bindings.push(Bindings::default()); | ||
284 | cur_items.push(item); | ||
285 | } | ||
286 | } else { | ||
287 | // If we are not in a repetition, then being at the end of a matcher means that we have | ||
288 | // reached the potential end of the input. | ||
289 | try_push!(eof_items, item); | ||
290 | } | ||
291 | continue; | ||
292 | } | ||
293 | Some(it) => it, | ||
294 | }; | ||
295 | |||
296 | // We are in the middle of a matcher. | ||
297 | match op { | ||
298 | OpDelimited::Op(Op::Repeat { tokens, kind, separator }) => { | ||
299 | if matches!(kind, RepeatKind::ZeroOrMore | RepeatKind::ZeroOrOne) { | ||
300 | let mut new_item = item.clone(); | ||
301 | new_item.dot.next(); | ||
302 | let mut vars = Vec::new(); | ||
303 | let bindings = new_item.bindings.last_mut().unwrap(); | ||
304 | collect_vars(&mut vars, tokens); | ||
305 | for var in vars { | ||
306 | bindings.push_empty(&var); | ||
307 | } | ||
308 | cur_items.push(new_item); | ||
309 | } | ||
310 | cur_items.push(MatchState { | ||
311 | dot: tokens.iter_delimited(None), | ||
312 | stack: Default::default(), | ||
313 | up: Some(Box::new(item)), | ||
314 | sep: separator.clone(), | ||
315 | sep_kind: Some(*kind), | ||
316 | sep_parsed: None, | ||
317 | bindings: smallvec![Bindings::default()], | ||
318 | meta_result: None, | ||
319 | is_error: false, | ||
320 | }) | ||
321 | } | ||
322 | OpDelimited::Op(Op::Subtree { tokens, delimiter }) => { | ||
323 | if let Ok(subtree) = src.clone().expect_subtree() { | ||
324 | if subtree.delimiter_kind() == delimiter.map(|it| it.kind) { | ||
325 | item.stack.push(item.dot); | ||
326 | item.dot = tokens.iter_delimited(delimiter.as_ref()); | ||
327 | cur_items.push(item); | ||
328 | } | ||
329 | } | ||
330 | } | ||
331 | OpDelimited::Op(Op::Var { kind, name, .. }) => { | ||
332 | if let Some(kind) = kind { | ||
333 | let mut fork = src.clone(); | ||
334 | let match_res = match_meta_var(kind.as_str(), &mut fork); | ||
335 | match match_res.err { | ||
336 | None => { | ||
337 | // Some meta variables are optional (e.g. vis) | ||
338 | if match_res.value.is_some() { | ||
339 | item.meta_result = Some((fork, match_res)); | ||
340 | try_push!(bb_items, item); | ||
341 | } else { | ||
342 | item.bindings.last_mut().unwrap().push_optional(name); | ||
343 | item.dot.next(); | ||
344 | cur_items.push(item); | ||
345 | } | ||
346 | } | ||
347 | Some(err) => { | ||
348 | res.add_err(err); | ||
349 | match match_res.value { | ||
350 | Some(fragment) => { | ||
351 | item.bindings | ||
352 | .last_mut() | ||
353 | .unwrap() | ||
354 | .inner | ||
355 | .push((name.clone(), Binding::Fragment(fragment))); | ||
356 | } | ||
357 | _ => {} | ||
358 | } | ||
359 | item.is_error = true; | ||
360 | error_items.push(item); | ||
361 | } | ||
362 | } | ||
363 | } | ||
364 | } | ||
365 | OpDelimited::Op(Op::Leaf(leaf)) => { | ||
366 | if let Err(err) = match_leaf(&leaf, &mut src.clone()) { | ||
367 | res.add_err(err); | ||
368 | item.is_error = true; | ||
369 | } else { | ||
370 | item.dot.next(); | ||
371 | } | ||
372 | try_push!(next_items, item); | ||
373 | } | ||
374 | OpDelimited::Open => { | ||
375 | if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) { | ||
376 | item.dot.next(); | ||
377 | try_push!(next_items, item); | ||
378 | } | ||
379 | } | ||
380 | OpDelimited::Close => { | ||
381 | let is_delim_closed = src.peek_n(0).is_none() && !stack.is_empty(); | ||
382 | if is_delim_closed { | ||
383 | item.dot.next(); | ||
384 | try_push!(next_items, item); | ||
385 | } | ||
386 | } | ||
387 | } | ||
388 | } | ||
389 | } | ||
390 | |||
391 | fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { | ||
392 | let mut src = TtIter::new(src); | ||
393 | let mut stack: SmallVec<[TtIter; 1]> = SmallVec::new(); | ||
394 | let mut res = Match::default(); | ||
395 | let mut error_reover_item = None; | ||
396 | |||
397 | let mut cur_items = smallvec![MatchState { | ||
398 | dot: pattern.iter_delimited(None), | ||
399 | stack: Default::default(), | ||
400 | up: None, | ||
401 | sep: None, | ||
402 | sep_kind: None, | ||
403 | sep_parsed: None, | ||
404 | bindings: smallvec![Bindings::default()], | ||
405 | is_error: false, | ||
406 | meta_result: None, | ||
407 | }]; | ||
408 | |||
409 | let mut next_items = vec![]; | ||
410 | |||
411 | loop { | ||
412 | let mut bb_items = SmallVec::new(); | ||
413 | let mut eof_items = SmallVec::new(); | ||
414 | let mut error_items = SmallVec::new(); | ||
415 | |||
416 | stdx::always!(next_items.is_empty()); | ||
417 | |||
418 | match_loop_inner( | ||
419 | src.clone(), | ||
420 | &stack, | ||
421 | &mut res, | ||
422 | &mut cur_items, | ||
423 | &mut bb_items, | ||
424 | &mut next_items, | ||
425 | &mut eof_items, | ||
426 | &mut error_items, | ||
427 | ); | ||
428 | stdx::always!(cur_items.is_empty()); | ||
429 | |||
430 | if error_items.len() > 0 { | ||
431 | error_reover_item = error_items.pop(); | ||
432 | } else if eof_items.len() > 0 { | ||
433 | error_reover_item = Some(eof_items[0].clone()); | ||
434 | } | ||
435 | |||
436 | // We need to do some post processing after the `match_loop_inner`. | ||
437 | // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, | ||
438 | // either the parse is ambiguous (which should never happen) or there is a syntax error. | ||
439 | if src.peek_n(0).is_none() && stack.is_empty() { | ||
440 | if eof_items.len() == 1 { | ||
441 | // remove all errors, because it is the correct answer ! | ||
442 | res = Match::default(); | ||
443 | res.bindings = eof_items[0].bindings[0].clone(); | ||
444 | } else { | ||
445 | // Error recovery | ||
446 | if error_reover_item.is_some() { | ||
447 | res.bindings = error_reover_item.unwrap().bindings[0].clone(); | ||
448 | } | ||
449 | res.add_err(ExpandError::UnexpectedToken); | ||
450 | } | ||
451 | return res; | ||
452 | } | ||
453 | |||
454 | // If there are no possible next positions AND we aren't waiting for the black-box parser, | ||
455 | // then there is a syntax error. | ||
456 | // | ||
457 | // Another possibility is that we need to call out to parse some rust nonterminal | ||
458 | // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. | ||
459 | if (bb_items.is_empty() && next_items.is_empty()) | ||
460 | || (!bb_items.is_empty() && !next_items.is_empty()) | ||
461 | || bb_items.len() > 1 | ||
462 | { | ||
463 | res.unmatched_tts += src.len(); | ||
464 | while let Some(it) = stack.pop() { | ||
465 | src = it; | ||
466 | res.unmatched_tts += src.len(); | ||
467 | } | ||
468 | res.add_err(err!("leftover tokens")); | ||
469 | |||
470 | if let Some(mut error_reover_item) = error_reover_item { | ||
471 | res.bindings = error_reover_item.bindings.remove(0); | ||
472 | } | ||
473 | return res; | ||
474 | } | ||
475 | // Dump all possible `next_items` into `cur_items` for the next iteration. | ||
476 | else if !next_items.is_empty() { | ||
477 | // Now process the next token | ||
478 | cur_items.extend(next_items.drain(..)); | ||
479 | |||
480 | match src.next() { | ||
481 | Some(tt::TokenTree::Subtree(subtree)) => { | ||
482 | stack.push(src.clone()); | ||
483 | src = TtIter::new(subtree); | ||
484 | } | ||
485 | None if !stack.is_empty() => src = stack.pop().unwrap(), | ||
486 | _ => (), | ||
487 | } | ||
488 | } | ||
489 | // Finally, we have the case where we need to call the black-box parser to get some | ||
490 | // nonterminal. | ||
491 | else { | ||
492 | stdx::always!(bb_items.len() == 1); | ||
493 | let mut item = bb_items.pop().unwrap(); | ||
494 | |||
495 | if let Some(OpDelimited::Op(Op::Var { name, .. })) = item.dot.peek() { | ||
496 | let (iter, match_res) = item.meta_result.take().unwrap(); | ||
497 | let bindings = item.bindings.last_mut().unwrap(); | ||
498 | match match_res.value { | ||
499 | Some(fragment) => { | ||
500 | bindings.inner.push((name.clone(), Binding::Fragment(fragment))); | ||
501 | } | ||
502 | None if match_res.err.is_none() => bindings.push_optional(name), | ||
503 | _ => {} | ||
504 | } | ||
505 | if let Some(err) = match_res.err { | ||
506 | res.add_err(err); | ||
507 | } | ||
508 | src = iter.clone(); | ||
509 | item.dot.next(); | ||
510 | } else { | ||
511 | unreachable!() | ||
512 | } | ||
513 | cur_items.push(item); | ||
514 | } | ||
515 | stdx::always!(!cur_items.is_empty()); | ||
516 | } | ||
517 | } | ||
518 | |||
519 | fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter) -> Result<(), ExpandError> { | ||
520 | let rhs = match src.expect_leaf() { | ||
521 | Ok(l) => l, | ||
522 | Err(()) => { | ||
523 | return Err(err!("expected leaf: `{}`", lhs)); | ||
524 | } | ||
525 | }; | ||
526 | match (lhs, rhs) { | ||
527 | ( | ||
528 | tt::Leaf::Punct(tt::Punct { char: lhs, .. }), | ||
529 | tt::Leaf::Punct(tt::Punct { char: rhs, .. }), | ||
530 | ) if lhs == rhs => (), | ||
531 | ( | ||
532 | tt::Leaf::Ident(tt::Ident { text: lhs, .. }), | ||
533 | tt::Leaf::Ident(tt::Ident { text: rhs, .. }), | ||
534 | ) if lhs == rhs => (), | ||
535 | ( | ||
536 | tt::Leaf::Literal(tt::Literal { text: lhs, .. }), | ||
537 | tt::Leaf::Literal(tt::Literal { text: rhs, .. }), | ||
538 | ) if lhs == rhs => (), | ||
539 | _ => { | ||
540 | return Err(ExpandError::UnexpectedToken); | ||
541 | } | ||
542 | } | ||
543 | |||
544 | Ok(()) | ||
545 | } | ||
546 | |||
547 | fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragment>> { | ||
548 | let fragment = match kind { | ||
549 | "path" => Path, | ||
550 | "expr" => Expr, | ||
551 | "ty" => Type, | ||
552 | "pat" => Pattern, | ||
553 | "stmt" => Statement, | ||
554 | "block" => Block, | ||
555 | "meta" => MetaItem, | ||
556 | "item" => Item, | ||
557 | _ => { | ||
558 | let tt_result = match kind { | ||
559 | "ident" => input | ||
560 | .expect_ident() | ||
561 | .map(|ident| Some(tt::Leaf::from(ident.clone()).into())) | ||
562 | .map_err(|()| err!("expected ident")), | ||
563 | "tt" => input.expect_tt().map(Some).map_err(|()| err!()), | ||
564 | "lifetime" => input | ||
565 | .expect_lifetime() | ||
566 | .map(|tt| Some(tt)) | ||
567 | .map_err(|()| err!("expected lifetime")), | ||
568 | "literal" => { | ||
569 | let neg = input.eat_char('-'); | ||
570 | input | ||
571 | .expect_literal() | ||
572 | .map(|literal| { | ||
573 | let lit = tt::Leaf::from(literal.clone()); | ||
574 | match neg { | ||
575 | None => Some(lit.into()), | ||
576 | Some(neg) => Some(tt::TokenTree::Subtree(tt::Subtree { | ||
577 | delimiter: None, | ||
578 | token_trees: vec![neg, lit.into()], | ||
579 | })), | ||
580 | } | ||
581 | }) | ||
582 | .map_err(|()| err!()) | ||
583 | } | ||
584 | // `vis` is optional | ||
585 | "vis" => match input.eat_vis() { | ||
586 | Some(vis) => Ok(Some(vis)), | ||
587 | None => Ok(None), | ||
588 | }, | ||
589 | _ => Err(ExpandError::UnexpectedToken), | ||
590 | }; | ||
591 | return tt_result.map(|it| it.map(Fragment::Tokens)).into(); | ||
592 | } | ||
593 | }; | ||
594 | let result = input.expect_fragment(fragment); | ||
595 | result.map(|tt| if kind == "expr" { tt.map(Fragment::Ast) } else { tt.map(Fragment::Tokens) }) | ||
596 | } | ||
597 | |||
598 | fn collect_vars(buf: &mut Vec<SmolStr>, pattern: &MetaTemplate) { | ||
599 | for op in pattern.iter() { | ||
600 | match op { | ||
601 | Op::Var { name, .. } => buf.push(name.clone()), | ||
602 | Op::Leaf(_) => (), | ||
603 | Op::Subtree { tokens, .. } => collect_vars(buf, tokens), | ||
604 | Op::Repeat { tokens, .. } => collect_vars(buf, tokens), | ||
605 | } | ||
606 | } | ||
607 | } | ||
608 | |||
609 | impl<'a> TtIter<'a> { | ||
610 | fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool { | ||
611 | let mut fork = self.clone(); | ||
612 | let ok = match separator { | ||
613 | Separator::Ident(lhs) if idx == 0 => match fork.expect_ident() { | ||
614 | Ok(rhs) => rhs.text == lhs.text, | ||
615 | _ => false, | ||
616 | }, | ||
617 | Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() { | ||
618 | Ok(rhs) => match rhs { | ||
619 | tt::Leaf::Literal(rhs) => rhs.text == lhs.text, | ||
620 | tt::Leaf::Ident(rhs) => rhs.text == lhs.text, | ||
621 | tt::Leaf::Punct(_) => false, | ||
622 | }, | ||
623 | _ => false, | ||
624 | }, | ||
625 | Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() { | ||
626 | Ok(rhs) => rhs.char == lhss[idx].char, | ||
627 | _ => false, | ||
628 | }, | ||
629 | _ => false, | ||
630 | }; | ||
631 | if ok { | ||
632 | *self = fork; | ||
633 | } | ||
634 | ok | ||
635 | } | ||
636 | |||
637 | fn expect_tt(&mut self) -> Result<tt::TokenTree, ()> { | ||
638 | match self.peek_n(0) { | ||
639 | Some(tt::TokenTree::Leaf(tt::Leaf::Punct(punct))) if punct.char == '\'' => { | ||
640 | return self.expect_lifetime(); | ||
641 | } | ||
642 | _ => (), | ||
643 | } | ||
644 | |||
645 | let tt = self.next().ok_or_else(|| ())?.clone(); | ||
646 | let punct = match tt { | ||
647 | tt::TokenTree::Leaf(tt::Leaf::Punct(punct)) if punct.spacing == tt::Spacing::Joint => { | ||
648 | punct | ||
649 | } | ||
650 | _ => return Ok(tt), | ||
651 | }; | ||
652 | |||
653 | let (second, third) = match (self.peek_n(0), self.peek_n(1)) { | ||
654 | ( | ||
655 | Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), | ||
656 | Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p3))), | ||
657 | ) if p2.spacing == tt::Spacing::Joint => (p2.char, Some(p3.char)), | ||
658 | (Some(tt::TokenTree::Leaf(tt::Leaf::Punct(p2))), _) => (p2.char, None), | ||
659 | _ => return Ok(tt), | ||
660 | }; | ||
661 | |||
662 | match (punct.char, second, third) { | ||
663 | ('.', '.', Some('.')) | ||
664 | | ('.', '.', Some('=')) | ||
665 | | ('<', '<', Some('=')) | ||
666 | | ('>', '>', Some('=')) => { | ||
667 | let tt2 = self.next().unwrap().clone(); | ||
668 | let tt3 = self.next().unwrap().clone(); | ||
669 | Ok(tt::Subtree { delimiter: None, token_trees: vec![tt, tt2, tt3] }.into()) | ||
670 | } | ||
671 | ('-', '=', _) | ||
672 | | ('-', '>', _) | ||
673 | | (':', ':', _) | ||
674 | | ('!', '=', _) | ||
675 | | ('.', '.', _) | ||
676 | | ('*', '=', _) | ||
677 | | ('/', '=', _) | ||
678 | | ('&', '&', _) | ||
679 | | ('&', '=', _) | ||
680 | | ('%', '=', _) | ||
681 | | ('^', '=', _) | ||
682 | | ('+', '=', _) | ||
683 | | ('<', '<', _) | ||
684 | | ('<', '=', _) | ||
685 | | ('=', '=', _) | ||
686 | | ('=', '>', _) | ||
687 | | ('>', '=', _) | ||
688 | | ('>', '>', _) | ||
689 | | ('|', '=', _) | ||
690 | | ('|', '|', _) => { | ||
691 | let tt2 = self.next().unwrap().clone(); | ||
692 | Ok(tt::Subtree { delimiter: None, token_trees: vec![tt, tt2] }.into()) | ||
693 | } | ||
694 | _ => Ok(tt), | ||
695 | } | ||
696 | } | ||
697 | |||
698 | fn expect_lifetime(&mut self) -> Result<tt::TokenTree, ()> { | ||
699 | let punct = self.expect_punct()?; | ||
700 | if punct.char != '\'' { | ||
701 | return Err(()); | ||
702 | } | ||
703 | let ident = self.expect_ident()?; | ||
704 | |||
705 | Ok(tt::Subtree { | ||
706 | delimiter: None, | ||
707 | token_trees: vec![ | ||
708 | tt::Leaf::Punct(*punct).into(), | ||
709 | tt::Leaf::Ident(ident.clone()).into(), | ||
710 | ], | ||
711 | } | ||
712 | .into()) | ||
713 | } | ||
714 | |||
715 | fn eat_vis(&mut self) -> Option<tt::TokenTree> { | ||
716 | let mut fork = self.clone(); | ||
717 | match fork.expect_fragment(Visibility) { | ||
718 | ExpandResult { value: tt, err: None } => { | ||
719 | *self = fork; | ||
720 | tt | ||
721 | } | ||
722 | ExpandResult { value: _, err: Some(_) } => None, | ||
723 | } | ||
724 | } | ||
725 | |||
726 | fn eat_char(&mut self, c: char) -> Option<tt::TokenTree> { | ||
727 | let mut fork = self.clone(); | ||
728 | match fork.expect_char(c) { | ||
729 | Ok(_) => { | ||
730 | let tt = self.next().cloned(); | ||
731 | *self = fork; | ||
732 | tt | ||
733 | } | ||
734 | Err(_) => None, | ||
735 | } | ||
736 | } | ||
737 | } | ||
diff --git a/crates/mbe/src/expander/transcriber.rs b/crates/mbe/src/expander/transcriber.rs new file mode 100644 index 000000000..ad9953a7d --- /dev/null +++ b/crates/mbe/src/expander/transcriber.rs | |||
@@ -0,0 +1,247 @@ | |||
1 | //! Transcriber takes a template, like `fn $ident() {}`, a set of bindings like | ||
2 | //! `$ident => foo`, interpolates variables in the template, to get `fn foo() {}` | ||
3 | |||
4 | use syntax::SmolStr; | ||
5 | use tt::Delimiter; | ||
6 | |||
7 | use super::ExpandResult; | ||
8 | use crate::{ | ||
9 | expander::{Binding, Bindings, Fragment}, | ||
10 | parser::{Op, RepeatKind, Separator}, | ||
11 | ExpandError, MetaTemplate, | ||
12 | }; | ||
13 | |||
14 | impl Bindings { | ||
15 | fn contains(&self, name: &str) -> bool { | ||
16 | self.inner.iter().any(|(n, _)| n == name) | ||
17 | } | ||
18 | |||
19 | fn get(&self, name: &str, nesting: &mut [NestingState]) -> Result<&Fragment, ExpandError> { | ||
20 | let mut b: &Binding = self | ||
21 | .inner | ||
22 | .iter() | ||
23 | .find_map(|(n, b)| if n == name { Some(b) } else { None }) | ||
24 | .ok_or_else(|| { | ||
25 | ExpandError::BindingError(format!("could not find binding `{}`", name)) | ||
26 | })?; | ||
27 | for nesting_state in nesting.iter_mut() { | ||
28 | nesting_state.hit = true; | ||
29 | b = match b { | ||
30 | Binding::Fragment(_) => break, | ||
31 | Binding::Nested(bs) => bs.get(nesting_state.idx).ok_or_else(|| { | ||
32 | nesting_state.at_end = true; | ||
33 | ExpandError::BindingError(format!("could not find nested binding `{}`", name)) | ||
34 | })?, | ||
35 | Binding::Empty => { | ||
36 | nesting_state.at_end = true; | ||
37 | return Err(ExpandError::BindingError(format!( | ||
38 | "could not find empty binding `{}`", | ||
39 | name | ||
40 | ))); | ||
41 | } | ||
42 | }; | ||
43 | } | ||
44 | match b { | ||
45 | Binding::Fragment(it) => Ok(it), | ||
46 | Binding::Nested(_) => Err(ExpandError::BindingError(format!( | ||
47 | "expected simple binding, found nested binding `{}`", | ||
48 | name | ||
49 | ))), | ||
50 | Binding::Empty => Err(ExpandError::BindingError(format!( | ||
51 | "expected simple binding, found empty binding `{}`", | ||
52 | name | ||
53 | ))), | ||
54 | } | ||
55 | } | ||
56 | } | ||
57 | |||
58 | pub(super) fn transcribe( | ||
59 | template: &MetaTemplate, | ||
60 | bindings: &Bindings, | ||
61 | ) -> ExpandResult<tt::Subtree> { | ||
62 | let mut ctx = ExpandCtx { bindings: &bindings, nesting: Vec::new() }; | ||
63 | let mut arena: Vec<tt::TokenTree> = Vec::new(); | ||
64 | expand_subtree(&mut ctx, template, None, &mut arena) | ||
65 | } | ||
66 | |||
67 | #[derive(Debug)] | ||
68 | struct NestingState { | ||
69 | idx: usize, | ||
70 | /// `hit` is currently necessary to tell `expand_repeat` if it should stop | ||
71 | /// because there is no variable in use by the current repetition | ||
72 | hit: bool, | ||
73 | /// `at_end` is currently necessary to tell `expand_repeat` if it should stop | ||
74 | /// because there is no more value available for the current repetition | ||
75 | at_end: bool, | ||
76 | } | ||
77 | |||
78 | #[derive(Debug)] | ||
79 | struct ExpandCtx<'a> { | ||
80 | bindings: &'a Bindings, | ||
81 | nesting: Vec<NestingState>, | ||
82 | } | ||
83 | |||
84 | fn expand_subtree( | ||
85 | ctx: &mut ExpandCtx, | ||
86 | template: &MetaTemplate, | ||
87 | delimiter: Option<Delimiter>, | ||
88 | arena: &mut Vec<tt::TokenTree>, | ||
89 | ) -> ExpandResult<tt::Subtree> { | ||
90 | // remember how many elements are in the arena now - when returning, we want to drain exactly how many elements we added. This way, the recursive uses of the arena get their own "view" of the arena, but will reuse the allocation | ||
91 | let start_elements = arena.len(); | ||
92 | let mut err = None; | ||
93 | for op in template.iter() { | ||
94 | match op { | ||
95 | Op::Leaf(tt) => arena.push(tt.clone().into()), | ||
96 | Op::Subtree { tokens, delimiter } => { | ||
97 | let ExpandResult { value: tt, err: e } = | ||
98 | expand_subtree(ctx, &tokens, *delimiter, arena); | ||
99 | err = err.or(e); | ||
100 | arena.push(tt.into()); | ||
101 | } | ||
102 | Op::Var { name, id, .. } => { | ||
103 | let ExpandResult { value: fragment, err: e } = expand_var(ctx, &name, *id); | ||
104 | err = err.or(e); | ||
105 | push_fragment(arena, fragment); | ||
106 | } | ||
107 | Op::Repeat { tokens: subtree, kind, separator } => { | ||
108 | let ExpandResult { value: fragment, err: e } = | ||
109 | expand_repeat(ctx, subtree, *kind, separator, arena); | ||
110 | err = err.or(e); | ||
111 | push_fragment(arena, fragment) | ||
112 | } | ||
113 | } | ||
114 | } | ||
115 | // drain the elements added in this instance of expand_subtree | ||
116 | let tts = arena.drain(start_elements..arena.len()).collect(); | ||
117 | ExpandResult { value: tt::Subtree { delimiter, token_trees: tts }, err } | ||
118 | } | ||
119 | |||
120 | fn expand_var(ctx: &mut ExpandCtx, v: &SmolStr, id: tt::TokenId) -> ExpandResult<Fragment> { | ||
121 | // We already handle $crate case in mbe parser | ||
122 | debug_assert!(v != "crate"); | ||
123 | |||
124 | if !ctx.bindings.contains(v) { | ||
125 | // Note that it is possible to have a `$var` inside a macro which is not bound. | ||
126 | // For example: | ||
127 | // ``` | ||
128 | // macro_rules! foo { | ||
129 | // ($a:ident, $b:ident, $c:tt) => { | ||
130 | // macro_rules! bar { | ||
131 | // ($bi:ident) => { | ||
132 | // fn $bi() -> u8 {$c} | ||
133 | // } | ||
134 | // } | ||
135 | // } | ||
136 | // ``` | ||
137 | // We just treat it a normal tokens | ||
138 | let tt = tt::Subtree { | ||
139 | delimiter: None, | ||
140 | token_trees: vec![ | ||
141 | tt::Leaf::from(tt::Punct { char: '$', spacing: tt::Spacing::Alone, id }).into(), | ||
142 | tt::Leaf::from(tt::Ident { text: v.clone(), id }).into(), | ||
143 | ], | ||
144 | } | ||
145 | .into(); | ||
146 | ExpandResult::ok(Fragment::Tokens(tt)) | ||
147 | } else { | ||
148 | ctx.bindings.get(&v, &mut ctx.nesting).map_or_else( | ||
149 | |e| ExpandResult { value: Fragment::Tokens(tt::TokenTree::empty()), err: Some(e) }, | ||
150 | |b| ExpandResult::ok(b.clone()), | ||
151 | ) | ||
152 | } | ||
153 | } | ||
154 | |||
155 | fn expand_repeat( | ||
156 | ctx: &mut ExpandCtx, | ||
157 | template: &MetaTemplate, | ||
158 | kind: RepeatKind, | ||
159 | separator: &Option<Separator>, | ||
160 | arena: &mut Vec<tt::TokenTree>, | ||
161 | ) -> ExpandResult<Fragment> { | ||
162 | let mut buf: Vec<tt::TokenTree> = Vec::new(); | ||
163 | ctx.nesting.push(NestingState { idx: 0, at_end: false, hit: false }); | ||
164 | // Dirty hack to make macro-expansion terminate. | ||
165 | // This should be replaced by a proper macro-by-example implementation | ||
166 | let limit = 65536; | ||
167 | let mut has_seps = 0; | ||
168 | let mut counter = 0; | ||
169 | |||
170 | loop { | ||
171 | let ExpandResult { value: mut t, err: e } = expand_subtree(ctx, template, None, arena); | ||
172 | let nesting_state = ctx.nesting.last_mut().unwrap(); | ||
173 | if nesting_state.at_end || !nesting_state.hit { | ||
174 | break; | ||
175 | } | ||
176 | nesting_state.idx += 1; | ||
177 | nesting_state.hit = false; | ||
178 | |||
179 | counter += 1; | ||
180 | if counter == limit { | ||
181 | log::warn!("expand_tt in repeat pattern exceed limit => {:#?}\n{:#?}", template, ctx); | ||
182 | break; | ||
183 | } | ||
184 | |||
185 | if e.is_some() { | ||
186 | continue; | ||
187 | } | ||
188 | |||
189 | t.delimiter = None; | ||
190 | push_subtree(&mut buf, t); | ||
191 | |||
192 | if let Some(ref sep) = separator { | ||
193 | match sep { | ||
194 | Separator::Ident(ident) => { | ||
195 | has_seps = 1; | ||
196 | buf.push(tt::Leaf::from(ident.clone()).into()); | ||
197 | } | ||
198 | Separator::Literal(lit) => { | ||
199 | has_seps = 1; | ||
200 | buf.push(tt::Leaf::from(lit.clone()).into()); | ||
201 | } | ||
202 | |||
203 | Separator::Puncts(puncts) => { | ||
204 | has_seps = puncts.len(); | ||
205 | for punct in puncts { | ||
206 | buf.push(tt::Leaf::from(*punct).into()); | ||
207 | } | ||
208 | } | ||
209 | } | ||
210 | } | ||
211 | |||
212 | if RepeatKind::ZeroOrOne == kind { | ||
213 | break; | ||
214 | } | ||
215 | } | ||
216 | |||
217 | ctx.nesting.pop().unwrap(); | ||
218 | for _ in 0..has_seps { | ||
219 | buf.pop(); | ||
220 | } | ||
221 | |||
222 | // Check if it is a single token subtree without any delimiter | ||
223 | // e.g {Delimiter:None> ['>'] /Delimiter:None>} | ||
224 | let tt = tt::Subtree { delimiter: None, token_trees: buf }.into(); | ||
225 | |||
226 | if RepeatKind::OneOrMore == kind && counter == 0 { | ||
227 | return ExpandResult { | ||
228 | value: Fragment::Tokens(tt), | ||
229 | err: Some(ExpandError::UnexpectedToken), | ||
230 | }; | ||
231 | } | ||
232 | ExpandResult::ok(Fragment::Tokens(tt)) | ||
233 | } | ||
234 | |||
235 | fn push_fragment(buf: &mut Vec<tt::TokenTree>, fragment: Fragment) { | ||
236 | match fragment { | ||
237 | Fragment::Tokens(tt::TokenTree::Subtree(tt)) => push_subtree(buf, tt), | ||
238 | Fragment::Tokens(tt) | Fragment::Ast(tt) => buf.push(tt), | ||
239 | } | ||
240 | } | ||
241 | |||
242 | fn push_subtree(buf: &mut Vec<tt::TokenTree>, tt: tt::Subtree) { | ||
243 | match tt.delimiter { | ||
244 | None => buf.extend(tt.token_trees), | ||
245 | _ => buf.push(tt.into()), | ||
246 | } | ||
247 | } | ||