aboutsummaryrefslogtreecommitdiff
path: root/crates/mbe/src
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2021-03-02 13:20:47 +0000
committerGitHub <[email protected]>2021-03-02 13:20:47 +0000
commit91bf5fa827b2c4ef74cb68c172c79127115e394f (patch)
treeebbd7ebb043fe9a8bee8ac2419a461c3387b1888 /crates/mbe/src
parent8eee9149e87ea58d4191d04ebe6faf57ac8485a3 (diff)
parentcff2201c30bda7b346e3b47875d95a2cf9cafaa3 (diff)
Merge #7513
7513: NFA parser for mbe matcher r=matklad a=edwin0cheng Almost straight porting from rustc one, but a little bit slow :( ``` rust-analyzer analysis-stats -q . ``` From: ```log Database loaded: 636.11ms, 277minstr crates: 36, mods: 594, decls: 11527, fns: 9017 Item Collection: 10.99s, 60ginstr exprs: 249618, ??ty: 2699 (1%), ?ty: 2101 (0%), !ty: 932 Inference: 28.94s, 123ginstr Total: 39.93s, 184ginstr ``` To: ```log Database loaded: 630.90ms, 277minstr crates: 36, mods: 594, decls: 11528, fns: 9018 Item Collection: 13.70s, 77ginstr exprs: 249482, ??ty: 2699 (1%), ?ty: 2101 (0%), !ty: 932 Inference: 30.27s, 133ginstr Total: 43.97s, 211ginstr ``` Fixes #4777 Co-authored-by: Edwin Cheng <[email protected]>
Diffstat (limited to 'crates/mbe/src')
-rw-r--r--crates/mbe/src/benchmark.rs40
-rw-r--r--crates/mbe/src/expander.rs16
-rw-r--r--crates/mbe/src/expander/matcher.rs559
-rw-r--r--crates/mbe/src/expander/transcriber.rs12
-rw-r--r--crates/mbe/src/lib.rs11
-rw-r--r--crates/mbe/src/parser.rs80
-rw-r--r--crates/mbe/src/tests.rs23
7 files changed, 578 insertions, 163 deletions
diff --git a/crates/mbe/src/benchmark.rs b/crates/mbe/src/benchmark.rs
index 6d81be880..503ad1355 100644
--- a/crates/mbe/src/benchmark.rs
+++ b/crates/mbe/src/benchmark.rs
@@ -40,18 +40,12 @@ fn benchmark_expand_macro_rules() {
40 .into_iter() 40 .into_iter()
41 .map(|(id, tt)| { 41 .map(|(id, tt)| {
42 let res = rules[&id].expand(&tt); 42 let res = rules[&id].expand(&tt);
43 if res.err.is_some() { 43 assert!(res.err.is_none());
44 // FIXME:
45 // Currently `invocation_fixtures` will generate some correct invocations but
46 // cannot be expanded by mbe. We ignore errors here.
47 // See: https://github.com/rust-analyzer/rust-analyzer/issues/4777
48 eprintln!("err from {} {:?}", id, res.err);
49 }
50 res.value.token_trees.len() 44 res.value.token_trees.len()
51 }) 45 })
52 .sum() 46 .sum()
53 }; 47 };
54 assert_eq!(hash, 66995); 48 assert_eq!(hash, 69413);
55} 49}
56 50
57fn macro_rules_fixtures() -> FxHashMap<String, MacroRules> { 51fn macro_rules_fixtures() -> FxHashMap<String, MacroRules> {
@@ -77,7 +71,7 @@ fn macro_rules_fixtures_tt() -> FxHashMap<String, tt::Subtree> {
77 .collect() 71 .collect()
78} 72}
79 73
80// Generate random invocation fixtures from rules 74/// Generate random invocation fixtures from rules
81fn invocation_fixtures(rules: &FxHashMap<String, MacroRules>) -> Vec<(String, tt::Subtree)> { 75fn invocation_fixtures(rules: &FxHashMap<String, MacroRules>) -> Vec<(String, tt::Subtree)> {
82 let mut seed = 123456789; 76 let mut seed = 123456789;
83 let mut res = Vec::new(); 77 let mut res = Vec::new();
@@ -86,11 +80,31 @@ fn invocation_fixtures(rules: &FxHashMap<String, MacroRules>) -> Vec<(String, tt
86 for rule in &it.rules { 80 for rule in &it.rules {
87 // Generate twice 81 // Generate twice
88 for _ in 0..2 { 82 for _ in 0..2 {
89 let mut subtree = tt::Subtree::default(); 83 // The input are generated by filling the `Op` randomly.
90 for op in rule.lhs.iter() { 84 // However, there are some cases generated are ambiguous for expanding, for example:
91 collect_from_op(op, &mut subtree, &mut seed); 85 // ```rust
86 // macro_rules! m {
87 // ($($t:ident),* as $ty:ident) => {}
88 // }
89 // m!(as u32); // error: local ambiguity: multiple parsing options: built-in NTs ident ('t') or 1 other option.
90 // ```
91 //
92 // So we just skip any error cases and try again
93 let mut try_cnt = 0;
94 loop {
95 let mut subtree = tt::Subtree::default();
96 for op in rule.lhs.iter() {
97 collect_from_op(op, &mut subtree, &mut seed);
98 }
99 if it.expand(&subtree).err.is_none() {
100 res.push((name.clone(), subtree));
101 break;
102 }
103 try_cnt += 1;
104 if try_cnt > 100 {
105 panic!("invocaton fixture {} cannot be generated.\n", name);
106 }
92 } 107 }
93 res.push((name.clone(), subtree));
94 } 108 }
95 } 109 }
96 } 110 }
diff --git a/crates/mbe/src/expander.rs b/crates/mbe/src/expander.rs
index e7e14b3cc..2efff8f52 100644
--- a/crates/mbe/src/expander.rs
+++ b/crates/mbe/src/expander.rs
@@ -5,7 +5,7 @@
5mod matcher; 5mod matcher;
6mod transcriber; 6mod transcriber;
7 7
8use rustc_hash::FxHashMap; 8use smallvec::SmallVec;
9use syntax::SmolStr; 9use syntax::SmolStr;
10 10
11use crate::{ExpandError, ExpandResult}; 11use crate::{ExpandError, ExpandResult};
@@ -28,10 +28,10 @@ pub(crate) fn expand_rules(
28 return ExpandResult::ok(value); 28 return ExpandResult::ok(value);
29 } 29 }
30 } 30 }
31 // Use the rule if we matched more tokens, or had fewer errors 31 // Use the rule if we matched more tokens, or bound variables count
32 if let Some((prev_match, _)) = &match_ { 32 if let Some((prev_match, _)) = &match_ {
33 if (new_match.unmatched_tts, new_match.err_count) 33 if (new_match.unmatched_tts, -(new_match.bound_count as i32))
34 < (prev_match.unmatched_tts, prev_match.err_count) 34 < (prev_match.unmatched_tts, -(prev_match.bound_count as i32))
35 { 35 {
36 match_ = Some((new_match, rule)); 36 match_ = Some((new_match, rule));
37 } 37 }
@@ -94,19 +94,19 @@ pub(crate) fn expand_rules(
94/// In other words, `Bindings` is a *multi* mapping from `SmolStr` to 94/// In other words, `Bindings` is a *multi* mapping from `SmolStr` to
95/// `tt::TokenTree`, where the index to select a particular `TokenTree` among 95/// `tt::TokenTree`, where the index to select a particular `TokenTree` among
96/// many is not a plain `usize`, but an `&[usize]`. 96/// many is not a plain `usize`, but an `&[usize]`.
97#[derive(Debug, Default)] 97#[derive(Debug, Default, Clone, PartialEq, Eq)]
98struct Bindings { 98struct Bindings {
99 inner: FxHashMap<SmolStr, Binding>, 99 inner: SmallVec<[(SmolStr, Binding); 4]>,
100} 100}
101 101
102#[derive(Debug)] 102#[derive(Debug, Clone, PartialEq, Eq)]
103enum Binding { 103enum Binding {
104 Fragment(Fragment), 104 Fragment(Fragment),
105 Nested(Vec<Binding>), 105 Nested(Vec<Binding>),
106 Empty, 106 Empty,
107} 107}
108 108
109#[derive(Debug, Clone)] 109#[derive(Debug, Clone, PartialEq, Eq)]
110enum Fragment { 110enum Fragment {
111 /// token fragments are just copy-pasted into the output 111 /// token fragments are just copy-pasted into the output
112 Tokens(tt::TokenTree), 112 Tokens(tt::TokenTree),
diff --git a/crates/mbe/src/expander/matcher.rs b/crates/mbe/src/expander/matcher.rs
index e3bd4c09a..9d3d28055 100644
--- a/crates/mbe/src/expander/matcher.rs
+++ b/crates/mbe/src/expander/matcher.rs
@@ -1,14 +1,74 @@
1//! FIXME: write short doc here 1//! An NFA-based parser, which is porting from rustc mbe parsing code
2//!
3//! See https://github.com/rust-lang/rust/blob/70b18bc2cbac4712020019f5bf57c00905373205/compiler/rustc_expand/src/mbe/macro_parser.rs
4//! Here is a quick intro to how the parser works, copied from rustc:
5//!
6//! A 'position' is a dot in the middle of a matcher, usually represented as a
7//! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`.
8//!
9//! The parser walks through the input a character at a time, maintaining a list
10//! of threads consistent with the current position in the input string: `cur_items`.
11//!
12//! As it processes them, it fills up `eof_items` with threads that would be valid if
13//! the macro invocation is now over, `bb_items` with threads that are waiting on
14//! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting
15//! on a particular token. Most of the logic concerns moving the · through the
16//! repetitions indicated by Kleene stars. The rules for moving the · without
17//! consuming any input are called epsilon transitions. It only advances or calls
18//! out to the real Rust parser when no `cur_items` threads remain.
19//!
20//! Example:
21//!
22//! ```text, ignore
23//! Start parsing a a a a b against [· a $( a )* a b].
24//!
25//! Remaining input: a a a a b
26//! next: [· a $( a )* a b]
27//!
28//! - - - Advance over an a. - - -
29//!
30//! Remaining input: a a a b
31//! cur: [a · $( a )* a b]
32//! Descend/Skip (first item).
33//! next: [a $( · a )* a b] [a $( a )* · a b].
34//!
35//! - - - Advance over an a. - - -
36//!
37//! Remaining input: a a b
38//! cur: [a $( a · )* a b] [a $( a )* a · b]
39//! Follow epsilon transition: Finish/Repeat (first item)
40//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
41//!
42//! - - - Advance over an a. - - - (this looks exactly like the last step)
43//!
44//! Remaining input: a b
45//! cur: [a $( a · )* a b] [a $( a )* a · b]
46//! Follow epsilon transition: Finish/Repeat (first item)
47//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
48//!
49//! - - - Advance over an a. - - - (this looks exactly like the last step)
50//!
51//! Remaining input: b
52//! cur: [a $( a · )* a b] [a $( a )* a · b]
53//! Follow epsilon transition: Finish/Repeat (first item)
54//! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b]
55//!
56//! - - - Advance over a b. - - -
57//!
58//! Remaining input: ''
59//! eof: [a $( a )* a b ·]
60//! ```
2 61
3use crate::{ 62use crate::{
4 expander::{Binding, Bindings, Fragment}, 63 expander::{Binding, Bindings, Fragment},
5 parser::{Op, RepeatKind, Separator}, 64 parser::{Op, OpDelimited, OpDelimitedIter, RepeatKind, Separator},
6 tt_iter::TtIter, 65 tt_iter::TtIter,
7 ExpandError, MetaTemplate, 66 ExpandError, MetaTemplate,
8}; 67};
9 68
10use super::ExpandResult; 69use super::ExpandResult;
11use parser::FragmentKind::*; 70use parser::FragmentKind::*;
71use smallvec::{smallvec, SmallVec};
12use syntax::SmolStr; 72use syntax::SmolStr;
13 73
14impl Bindings { 74impl Bindings {
@@ -16,19 +76,19 @@ impl Bindings {
16 // FIXME: Do we have a better way to represent an empty token ? 76 // FIXME: Do we have a better way to represent an empty token ?
17 // Insert an empty subtree for empty token 77 // Insert an empty subtree for empty token
18 let tt = tt::Subtree::default().into(); 78 let tt = tt::Subtree::default().into();
19 self.inner.insert(name.clone(), Binding::Fragment(Fragment::Tokens(tt))); 79 self.inner.push((name.clone(), Binding::Fragment(Fragment::Tokens(tt))));
20 } 80 }
21 81
22 fn push_empty(&mut self, name: &SmolStr) { 82 fn push_empty(&mut self, name: &SmolStr) {
23 self.inner.insert(name.clone(), Binding::Empty); 83 self.inner.push((name.clone(), Binding::Empty));
24 } 84 }
25 85
26 fn push_nested(&mut self, idx: usize, nested: Bindings) -> Result<(), ExpandError> { 86 fn push_nested(&mut self, idx: usize, nested: Bindings) -> Result<(), ExpandError> {
27 for (key, value) in nested.inner { 87 for (key, value) in nested.inner {
28 if !self.inner.contains_key(&key) { 88 if self.get_mut(&key).is_none() {
29 self.inner.insert(key.clone(), Binding::Nested(Vec::new())); 89 self.inner.push((key.clone(), Binding::Nested(Vec::new())));
30 } 90 }
31 match self.inner.get_mut(&key) { 91 match self.get_mut(&key) {
32 Some(Binding::Nested(it)) => { 92 Some(Binding::Nested(it)) => {
33 // insert empty nested bindings before this one 93 // insert empty nested bindings before this one
34 while it.len() < idx { 94 while it.len() < idx {
@@ -46,6 +106,14 @@ impl Bindings {
46 } 106 }
47 Ok(()) 107 Ok(())
48 } 108 }
109
110 fn get_mut(&mut self, name: &str) -> Option<&mut Binding> {
111 self.inner.iter_mut().find_map(|(n, b)| if n == name { Some(b) } else { None })
112 }
113
114 fn bindings(&self) -> impl Iterator<Item = &Binding> {
115 self.inner.iter().map(|(_, b)| b)
116 }
49} 117}
50 118
51macro_rules! err { 119macro_rules! err {
@@ -57,7 +125,7 @@ macro_rules! err {
57 }; 125 };
58} 126}
59 127
60#[derive(Debug, Default)] 128#[derive(Clone, Debug, Default, PartialEq, Eq)]
61pub(super) struct Match { 129pub(super) struct Match {
62 pub(super) bindings: Bindings, 130 pub(super) bindings: Bindings,
63 /// We currently just keep the first error and count the rest to compare matches. 131 /// We currently just keep the first error and count the rest to compare matches.
@@ -65,6 +133,8 @@ pub(super) struct Match {
65 pub(super) err_count: usize, 133 pub(super) err_count: usize,
66 /// How many top-level token trees were left to match. 134 /// How many top-level token trees were left to match.
67 pub(super) unmatched_tts: usize, 135 pub(super) unmatched_tts: usize,
136 /// The number of bound variables
137 pub(super) bound_count: usize,
68} 138}
69 139
70impl Match { 140impl Match {
@@ -76,72 +146,373 @@ impl Match {
76} 146}
77 147
78/// Matching errors are added to the `Match`. 148/// Matching errors are added to the `Match`.
79pub(super) fn match_(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { 149pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree) -> Match {
80 let mut res = Match::default(); 150 let mut res = match_loop(pattern, &input);
81 let mut src = TtIter::new(src); 151 res.bound_count = count(res.bindings.bindings());
152 return res;
153
154 fn count<'a>(bindings: impl Iterator<Item = &'a Binding>) -> usize {
155 bindings
156 .map(|it| match it {
157 Binding::Fragment(_) => 1,
158 Binding::Empty => 1,
159 Binding::Nested(it) => count(it.iter()),
160 })
161 .sum()
162 }
163}
82 164
83 match_tokens(&mut res, pattern, &mut src); 165#[derive(Debug, Clone)]
166struct MatchState<'t> {
167 /// The position of the "dot" in this matcher
168 dot: OpDelimitedIter<'t>,
84 169
85 if src.len() > 0 { 170 /// Token subtree stack
86 res.unmatched_tts += src.len(); 171 /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. )
87 res.add_err(err!("leftover tokens")); 172 /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does
88 } 173 /// that where the bottom of the stack is the outermost matcher.
174 stack: SmallVec<[OpDelimitedIter<'t>; 4]>,
175
176 /// The "parent" matcher position if we are in a repetition. That is, the matcher position just
177 /// before we enter the repetition.
178 up: Option<Box<MatchState<'t>>>,
179
180 /// The separator if we are in a repetition.
181 sep: Option<Separator>,
182
183 /// The KleeneOp of this sequence if we are in a repetition.
184 sep_kind: Option<RepeatKind>,
89 185
90 res 186 /// Number of tokens of seperator parsed
187 sep_parsed: Option<usize>,
188
189 /// Matched meta variables bindings
190 bindings: SmallVec<[Bindings; 4]>,
191
192 /// Cached result of meta variable parsing
193 meta_result: Option<(TtIter<'t>, ExpandResult<Option<Fragment>>)>,
194
195 /// Is error occuried in this state, will `poised` to "parent"
196 is_error: bool,
91} 197}
92 198
93fn match_tokens(res: &mut Match, pattern: &MetaTemplate, src: &mut TtIter) { 199/// Process the matcher positions of `cur_items` until it is empty. In the process, this will
94 for op in pattern.iter() { 200/// produce more items in `next_items`, `eof_items`, and `bb_items`.
95 match op { 201///
96 Op::Leaf(lhs) => { 202/// For more info about the how this happens, see the module-level doc comments and the inline
97 if let Err(err) = match_leaf(lhs, src) { 203/// comments of this function.
98 res.add_err(err); 204///
99 continue; 205/// # Parameters
206///
207/// - `src`: the current token of the parser.
208/// - `stack`: the "parent" frames of the token tree
209/// - `res`: the match result to store errors
210/// - `cur_items`: the set of current items to be processed. This should be empty by the end of a
211/// successful execution of this function.
212/// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in
213/// the function `parse`.
214/// - `eof_items`: the set of items that would be valid if this was the EOF.
215/// - `bb_items`: the set of items that are waiting for the black-box parser.
216/// - `error_items`: the set of items in errors, used for error-resilient parsing
217fn match_loop_inner<'t>(
218 src: TtIter<'t>,
219 stack: &[TtIter<'t>],
220 res: &mut Match,
221 cur_items: &mut SmallVec<[MatchState<'t>; 1]>,
222 bb_items: &mut SmallVec<[MatchState<'t>; 1]>,
223 next_items: &mut Vec<MatchState<'t>>,
224 eof_items: &mut SmallVec<[MatchState<'t>; 1]>,
225 error_items: &mut SmallVec<[MatchState<'t>; 1]>,
226) {
227 macro_rules! try_push {
228 ($items: expr, $it:expr) => {
229 if $it.is_error {
230 error_items.push($it);
231 } else {
232 $items.push($it);
233 }
234 };
235 }
236
237 while let Some(mut item) = cur_items.pop() {
238 while item.dot.is_eof() {
239 match item.stack.pop() {
240 Some(frame) => {
241 item.dot = frame;
242 item.dot.next();
100 } 243 }
244 None => break,
101 } 245 }
102 Op::Subtree { tokens, delimiter: delim } => { 246 }
103 let rhs = match src.expect_subtree() { 247 let op = match item.dot.peek() {
104 Ok(s) => s, 248 None => {
105 Err(()) => { 249 // We are at or past the end of the matcher of `item`.
106 res.add_err(err!("expected subtree")); 250 if item.up.is_some() {
107 continue; 251 if item.sep_parsed.is_none() {
252 // Get the `up` matcher
253 let mut new_pos = *item.up.clone().unwrap();
254 // Add matches from this repetition to the `matches` of `up`
255 if let Some(bindings) = new_pos.bindings.last_mut() {
256 for (i, b) in item.bindings.iter_mut().enumerate() {
257 bindings.push_nested(i, b.clone()).unwrap();
258 }
259 }
260 // Move the "dot" past the repetition in `up`
261 new_pos.dot.next();
262 new_pos.is_error = new_pos.is_error || item.is_error;
263 cur_items.push(new_pos);
264 }
265
266 // Check if we need a separator.
267 // We check the separator one by one
268 let sep_idx = *item.sep_parsed.as_ref().unwrap_or(&0);
269 let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count);
270 if item.sep.is_some() && sep_idx != sep_len {
271 let sep = item.sep.as_ref().unwrap();
272 if src.clone().expect_separator(&sep, sep_idx) {
273 item.dot.next();
274 item.sep_parsed = Some(sep_idx + 1);
275 try_push!(next_items, item);
276 }
277 }
278 // We don't need a separator. Move the "dot" back to the beginning of the matcher
279 // and try to match again UNLESS we are only allowed to have _one_ repetition.
280 else if item.sep_kind != Some(RepeatKind::ZeroOrOne) {
281 item.dot = item.dot.reset();
282 item.sep_parsed = None;
283 item.bindings.push(Bindings::default());
284 cur_items.push(item);
285 }
286 } else {
287 // If we are not in a repetition, then being at the end of a matcher means that we have
288 // reached the potential end of the input.
289 try_push!(eof_items, item);
290 }
291 continue;
292 }
293 Some(it) => it,
294 };
295
296 // We are in the middle of a matcher.
297 match op {
298 OpDelimited::Op(Op::Repeat { tokens, kind, separator }) => {
299 if matches!(kind, RepeatKind::ZeroOrMore | RepeatKind::ZeroOrOne) {
300 let mut new_item = item.clone();
301 new_item.dot.next();
302 let mut vars = Vec::new();
303 let bindings = new_item.bindings.last_mut().unwrap();
304 collect_vars(&mut vars, tokens);
305 for var in vars {
306 bindings.push_empty(&var);
108 } 307 }
109 }; 308 cur_items.push(new_item);
110 if delim.map(|it| it.kind) != rhs.delimiter_kind() {
111 res.add_err(err!("mismatched delimiter"));
112 continue;
113 } 309 }
114 let mut src = TtIter::new(rhs); 310 cur_items.push(MatchState {
115 match_tokens(res, tokens, &mut src); 311 dot: tokens.iter_delimited(None),
116 if src.len() > 0 { 312 stack: Default::default(),
117 res.add_err(err!("leftover tokens")); 313 up: Some(Box::new(item)),
314 sep: separator.clone(),
315 sep_kind: Some(*kind),
316 sep_parsed: None,
317 bindings: smallvec![Bindings::default()],
318 meta_result: None,
319 is_error: false,
320 })
321 }
322 OpDelimited::Op(Op::Subtree { tokens, delimiter }) => {
323 if let Ok(subtree) = src.clone().expect_subtree() {
324 if subtree.delimiter_kind() == delimiter.map(|it| it.kind) {
325 item.stack.push(item.dot);
326 item.dot = tokens.iter_delimited(delimiter.as_ref());
327 cur_items.push(item);
328 }
118 } 329 }
119 } 330 }
120 Op::Var { name, kind, .. } => { 331 OpDelimited::Op(Op::Var { kind, name, .. }) => {
121 let kind = match kind { 332 if let Some(kind) = kind {
122 Some(k) => k, 333 let mut fork = src.clone();
123 None => { 334 let match_res = match_meta_var(kind.as_str(), &mut fork);
124 res.add_err(ExpandError::UnexpectedToken); 335 match match_res.err {
125 continue; 336 None => {
337 // Some meta variables are optional (e.g. vis)
338 if match_res.value.is_some() {
339 item.meta_result = Some((fork, match_res));
340 try_push!(bb_items, item);
341 } else {
342 item.bindings.last_mut().unwrap().push_optional(name);
343 item.dot.next();
344 cur_items.push(item);
345 }
346 }
347 Some(err) => {
348 res.add_err(err);
349 match match_res.value {
350 Some(fragment) => {
351 item.bindings
352 .last_mut()
353 .unwrap()
354 .inner
355 .push((name.clone(), Binding::Fragment(fragment)));
356 }
357 _ => {}
358 }
359 item.is_error = true;
360 error_items.push(item);
361 }
126 } 362 }
127 }; 363 }
128 let ExpandResult { value: matched, err: match_err } = 364 }
129 match_meta_var(kind.as_str(), src); 365 OpDelimited::Op(Op::Leaf(leaf)) => {
130 match matched { 366 if let Err(err) = match_leaf(&leaf, &mut src.clone()) {
367 res.add_err(err);
368 item.is_error = true;
369 } else {
370 item.dot.next();
371 }
372 try_push!(next_items, item);
373 }
374 OpDelimited::Open => {
375 if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) {
376 item.dot.next();
377 try_push!(next_items, item);
378 }
379 }
380 OpDelimited::Close => {
381 let is_delim_closed = src.peek_n(0).is_none() && !stack.is_empty();
382 if is_delim_closed {
383 item.dot.next();
384 try_push!(next_items, item);
385 }
386 }
387 }
388 }
389}
390
391fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match {
392 let mut src = TtIter::new(src);
393 let mut stack: SmallVec<[TtIter; 1]> = SmallVec::new();
394 let mut res = Match::default();
395 let mut error_reover_item = None;
396
397 let mut cur_items = smallvec![MatchState {
398 dot: pattern.iter_delimited(None),
399 stack: Default::default(),
400 up: None,
401 sep: None,
402 sep_kind: None,
403 sep_parsed: None,
404 bindings: smallvec![Bindings::default()],
405 is_error: false,
406 meta_result: None,
407 }];
408
409 let mut next_items = vec![];
410
411 loop {
412 let mut bb_items = SmallVec::new();
413 let mut eof_items = SmallVec::new();
414 let mut error_items = SmallVec::new();
415
416 stdx::always!(next_items.is_empty());
417
418 match_loop_inner(
419 src.clone(),
420 &stack,
421 &mut res,
422 &mut cur_items,
423 &mut bb_items,
424 &mut next_items,
425 &mut eof_items,
426 &mut error_items,
427 );
428 stdx::always!(cur_items.is_empty());
429
430 if error_items.len() > 0 {
431 error_reover_item = error_items.pop();
432 } else if eof_items.len() > 0 {
433 error_reover_item = Some(eof_items[0].clone());
434 }
435
436 // We need to do some post processing after the `match_loop_inner`.
437 // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise,
438 // either the parse is ambiguous (which should never happen) or there is a syntax error.
439 if src.peek_n(0).is_none() && stack.is_empty() {
440 if eof_items.len() == 1 {
441 // remove all errors, because it is the correct answer !
442 res = Match::default();
443 res.bindings = eof_items[0].bindings[0].clone();
444 } else {
445 // Error recovery
446 if error_reover_item.is_some() {
447 res.bindings = error_reover_item.unwrap().bindings[0].clone();
448 }
449 res.add_err(ExpandError::UnexpectedToken);
450 }
451 return res;
452 }
453
454 // If there are no possible next positions AND we aren't waiting for the black-box parser,
455 // then there is a syntax error.
456 //
457 // Another possibility is that we need to call out to parse some rust nonterminal
458 // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong.
459 if (bb_items.is_empty() && next_items.is_empty())
460 || (!bb_items.is_empty() && !next_items.is_empty())
461 || bb_items.len() > 1
462 {
463 res.unmatched_tts += src.len();
464 while let Some(it) = stack.pop() {
465 src = it;
466 res.unmatched_tts += src.len();
467 }
468 res.add_err(err!("leftover tokens"));
469
470 if let Some(mut error_reover_item) = error_reover_item {
471 res.bindings = error_reover_item.bindings.remove(0);
472 }
473 return res;
474 }
475 // Dump all possible `next_items` into `cur_items` for the next iteration.
476 else if !next_items.is_empty() {
477 // Now process the next token
478 cur_items.extend(next_items.drain(..));
479
480 match src.next() {
481 Some(tt::TokenTree::Subtree(subtree)) => {
482 stack.push(src.clone());
483 src = TtIter::new(subtree);
484 }
485 None if !stack.is_empty() => src = stack.pop().unwrap(),
486 _ => (),
487 }
488 }
489 // Finally, we have the case where we need to call the black-box parser to get some
490 // nonterminal.
491 else {
492 stdx::always!(bb_items.len() == 1);
493 let mut item = bb_items.pop().unwrap();
494
495 if let Some(OpDelimited::Op(Op::Var { name, .. })) = item.dot.peek() {
496 let (iter, match_res) = item.meta_result.take().unwrap();
497 let bindings = item.bindings.last_mut().unwrap();
498 match match_res.value {
131 Some(fragment) => { 499 Some(fragment) => {
132 res.bindings.inner.insert(name.clone(), Binding::Fragment(fragment)); 500 bindings.inner.push((name.clone(), Binding::Fragment(fragment)));
133 } 501 }
134 None if match_err.is_none() => res.bindings.push_optional(name), 502 None if match_res.err.is_none() => bindings.push_optional(name),
135 _ => {} 503 _ => {}
136 } 504 }
137 if let Some(err) = match_err { 505 if let Some(err) = match_res.err {
138 res.add_err(err); 506 res.add_err(err);
139 } 507 }
508 src = iter.clone();
509 item.dot.next();
510 } else {
511 unreachable!()
140 } 512 }
141 Op::Repeat { tokens: subtree, kind, separator } => { 513 cur_items.push(item);
142 match_repeat(res, subtree, *kind, separator, src);
143 }
144 } 514 }
515 stdx::always!(!cur_items.is_empty());
145 } 516 }
146} 517}
147 518
@@ -173,73 +544,6 @@ fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter) -> Result<(), ExpandError> {
173 Ok(()) 544 Ok(())
174} 545}
175 546
176fn match_repeat(
177 res: &mut Match,
178 pattern: &MetaTemplate,
179 kind: RepeatKind,
180 separator: &Option<Separator>,
181 src: &mut TtIter,
182) {
183 // Dirty hack to make macro-expansion terminate.
184 // This should be replaced by a proper macro-by-example implementation
185 let mut limit = 65536;
186 let mut counter = 0;
187
188 for i in 0.. {
189 let mut fork = src.clone();
190
191 if let Some(separator) = &separator {
192 if i != 0 && !fork.eat_separator(separator) {
193 break;
194 }
195 }
196
197 let mut nested = Match::default();
198 match_tokens(&mut nested, pattern, &mut fork);
199 if nested.err.is_none() {
200 limit -= 1;
201 if limit == 0 {
202 log::warn!(
203 "match_lhs exceeded repeat pattern limit => {:#?}\n{:#?}\n{:#?}\n{:#?}",
204 pattern,
205 src,
206 kind,
207 separator
208 );
209 break;
210 }
211 *src = fork;
212
213 if let Err(err) = res.bindings.push_nested(counter, nested.bindings) {
214 res.add_err(err);
215 }
216 counter += 1;
217 if counter == 1 {
218 if let RepeatKind::ZeroOrOne = kind {
219 break;
220 }
221 }
222 } else {
223 break;
224 }
225 }
226
227 match (kind, counter) {
228 (RepeatKind::OneOrMore, 0) => {
229 res.add_err(ExpandError::UnexpectedToken);
230 }
231 (_, 0) => {
232 // Collect all empty variables in subtrees
233 let mut vars = Vec::new();
234 collect_vars(&mut vars, pattern);
235 for var in vars {
236 res.bindings.push_empty(&var)
237 }
238 }
239 _ => (),
240 }
241}
242
243fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragment>> { 547fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragment>> {
244 let fragment = match kind { 548 let fragment = match kind {
245 "path" => Path, 549 "path" => Path,
@@ -303,14 +607,14 @@ fn collect_vars(buf: &mut Vec<SmolStr>, pattern: &MetaTemplate) {
303} 607}
304 608
305impl<'a> TtIter<'a> { 609impl<'a> TtIter<'a> {
306 fn eat_separator(&mut self, separator: &Separator) -> bool { 610 fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool {
307 let mut fork = self.clone(); 611 let mut fork = self.clone();
308 let ok = match separator { 612 let ok = match separator {
309 Separator::Ident(lhs) => match fork.expect_ident() { 613 Separator::Ident(lhs) if idx == 0 => match fork.expect_ident() {
310 Ok(rhs) => rhs.text == lhs.text, 614 Ok(rhs) => rhs.text == lhs.text,
311 _ => false, 615 _ => false,
312 }, 616 },
313 Separator::Literal(lhs) => match fork.expect_literal() { 617 Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() {
314 Ok(rhs) => match rhs { 618 Ok(rhs) => match rhs {
315 tt::Leaf::Literal(rhs) => rhs.text == lhs.text, 619 tt::Leaf::Literal(rhs) => rhs.text == lhs.text,
316 tt::Leaf::Ident(rhs) => rhs.text == lhs.text, 620 tt::Leaf::Ident(rhs) => rhs.text == lhs.text,
@@ -318,10 +622,11 @@ impl<'a> TtIter<'a> {
318 }, 622 },
319 _ => false, 623 _ => false,
320 }, 624 },
321 Separator::Puncts(lhss) => lhss.iter().all(|lhs| match fork.expect_punct() { 625 Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() {
322 Ok(rhs) => rhs.char == lhs.char, 626 Ok(rhs) => rhs.char == lhss[idx].char,
323 _ => false, 627 _ => false,
324 }), 628 },
629 _ => false,
325 }; 630 };
326 if ok { 631 if ok {
327 *self = fork; 632 *self = fork;
diff --git a/crates/mbe/src/expander/transcriber.rs b/crates/mbe/src/expander/transcriber.rs
index 78368a33e..ad9953a7d 100644
--- a/crates/mbe/src/expander/transcriber.rs
+++ b/crates/mbe/src/expander/transcriber.rs
@@ -13,13 +13,17 @@ use crate::{
13 13
14impl Bindings { 14impl Bindings {
15 fn contains(&self, name: &str) -> bool { 15 fn contains(&self, name: &str) -> bool {
16 self.inner.contains_key(name) 16 self.inner.iter().any(|(n, _)| n == name)
17 } 17 }
18 18
19 fn get(&self, name: &str, nesting: &mut [NestingState]) -> Result<&Fragment, ExpandError> { 19 fn get(&self, name: &str, nesting: &mut [NestingState]) -> Result<&Fragment, ExpandError> {
20 let mut b = self.inner.get(name).ok_or_else(|| { 20 let mut b: &Binding = self
21 ExpandError::BindingError(format!("could not find binding `{}`", name)) 21 .inner
22 })?; 22 .iter()
23 .find_map(|(n, b)| if n == name { Some(b) } else { None })
24 .ok_or_else(|| {
25 ExpandError::BindingError(format!("could not find binding `{}`", name))
26 })?;
23 for nesting_state in nesting.iter_mut() { 27 for nesting_state in nesting.iter_mut() {
24 nesting_state.hit = true; 28 nesting_state.hit = true;
25 b = match b { 29 b = match b {
diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs
index 4c298f85f..f3d2da55a 100644
--- a/crates/mbe/src/lib.rs
+++ b/crates/mbe/src/lib.rs
@@ -21,7 +21,7 @@ use test_utils::mark;
21pub use tt::{Delimiter, DelimiterKind, Punct}; 21pub use tt::{Delimiter, DelimiterKind, Punct};
22 22
23use crate::{ 23use crate::{
24 parser::{parse_pattern, parse_template, Op}, 24 parser::{parse_pattern, parse_template, MetaTemplate, Op},
25 tt_iter::TtIter, 25 tt_iter::TtIter,
26}; 26};
27 27
@@ -94,15 +94,6 @@ struct Rule {
94 rhs: MetaTemplate, 94 rhs: MetaTemplate,
95} 95}
96 96
97#[derive(Clone, Debug, PartialEq, Eq)]
98struct MetaTemplate(Vec<Op>);
99
100impl<'a> MetaTemplate {
101 fn iter(&self) -> impl Iterator<Item = &Op> {
102 self.0.iter()
103 }
104}
105
106#[derive(Clone, Copy, Debug, PartialEq, Eq)] 97#[derive(Clone, Copy, Debug, PartialEq, Eq)]
107struct Shift(u32); 98struct Shift(u32);
108 99
diff --git a/crates/mbe/src/parser.rs b/crates/mbe/src/parser.rs
index f891ec29c..8671322e1 100644
--- a/crates/mbe/src/parser.rs
+++ b/crates/mbe/src/parser.rs
@@ -5,7 +5,75 @@ use smallvec::SmallVec;
5use syntax::SmolStr; 5use syntax::SmolStr;
6use tt::Delimiter; 6use tt::Delimiter;
7 7
8use crate::{tt_iter::TtIter, MetaTemplate, ParseError}; 8use crate::{tt_iter::TtIter, ParseError};
9
10#[derive(Clone, Debug, PartialEq, Eq)]
11pub(crate) struct MetaTemplate(pub(crate) Vec<Op>);
12
13#[derive(Debug, Clone, Copy)]
14pub(crate) enum OpDelimited<'a> {
15 Op(&'a Op),
16 Open,
17 Close,
18}
19
20#[derive(Debug, Clone, Copy)]
21pub(crate) struct OpDelimitedIter<'a> {
22 inner: &'a Vec<Op>,
23 delimited: Option<&'a Delimiter>,
24 idx: usize,
25}
26
27impl<'a> OpDelimitedIter<'a> {
28 pub(crate) fn is_eof(&self) -> bool {
29 let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 };
30 self.idx >= len
31 }
32
33 pub(crate) fn peek(&self) -> Option<OpDelimited<'a>> {
34 match self.delimited {
35 None => self.inner.get(self.idx).map(OpDelimited::Op),
36 Some(_) => match self.idx {
37 0 => Some(OpDelimited::Open),
38 i if i == self.inner.len() + 1 => Some(OpDelimited::Close),
39 i => self.inner.get(i - 1).map(OpDelimited::Op),
40 },
41 }
42 }
43
44 pub(crate) fn reset(&self) -> Self {
45 Self { inner: &self.inner, idx: 0, delimited: self.delimited }
46 }
47}
48
49impl<'a> Iterator for OpDelimitedIter<'a> {
50 type Item = OpDelimited<'a>;
51
52 fn next(&mut self) -> Option<Self::Item> {
53 let res = self.peek();
54 self.idx += 1;
55 res
56 }
57
58 fn size_hint(&self) -> (usize, Option<usize>) {
59 let len = self.inner.len() + if self.delimited.is_some() { 2 } else { 0 };
60 let remain = len.checked_sub(self.idx).unwrap_or(0);
61 (remain, Some(remain))
62 }
63}
64
65impl<'a> MetaTemplate {
66 pub(crate) fn iter(&self) -> impl Iterator<Item = &Op> {
67 self.0.iter()
68 }
69
70 pub(crate) fn iter_delimited(
71 &'a self,
72 delimited: Option<&'a Delimiter>,
73 ) -> OpDelimitedIter<'a> {
74 OpDelimitedIter { inner: &self.0, idx: 0, delimited }
75 }
76}
9 77
10#[derive(Clone, Debug, PartialEq, Eq)] 78#[derive(Clone, Debug, PartialEq, Eq)]
11pub(crate) enum Op { 79pub(crate) enum Op {
@@ -47,6 +115,16 @@ impl PartialEq for Separator {
47 } 115 }
48} 116}
49 117
118impl Separator {
119 pub(crate) fn tt_count(&self) -> usize {
120 match self {
121 Separator::Literal(_) => 1,
122 Separator::Ident(_) => 1,
123 Separator::Puncts(it) => it.len(),
124 }
125 }
126}
127
50pub(crate) fn parse_template(template: &tt::Subtree) -> Result<Vec<Op>, ParseError> { 128pub(crate) fn parse_template(template: &tt::Subtree) -> Result<Vec<Op>, ParseError> {
51 parse_inner(&template, Mode::Template).into_iter().collect() 129 parse_inner(&template, Mode::Template).into_iter().collect()
52} 130}
diff --git a/crates/mbe/src/tests.rs b/crates/mbe/src/tests.rs
index f1eadcd1e..5c641ebf2 100644
--- a/crates/mbe/src/tests.rs
+++ b/crates/mbe/src/tests.rs
@@ -457,6 +457,17 @@ fn test_match_group_with_multichar_sep() {
457} 457}
458 458
459#[test] 459#[test]
460fn test_match_group_with_multichar_sep2() {
461 parse_macro(
462 r#"
463 macro_rules! foo {
464 (fn $name:ident {$($i:literal)&&*} ) => ( fn $name() -> bool { $($i)&&*} );
465 }"#,
466 )
467 .assert_expand_items("foo! (fn baz {true && true} );", "fn baz () -> bool {true &&true}");
468}
469
470#[test]
460fn test_match_group_zero_match() { 471fn test_match_group_zero_match() {
461 parse_macro( 472 parse_macro(
462 r#" 473 r#"
@@ -1267,6 +1278,18 @@ macro_rules! m {
1267 .is_some()); 1278 .is_some());
1268} 1279}
1269 1280
1281#[test]
1282fn test_match_is_not_greedy() {
1283 parse_macro(
1284 r#"
1285macro_rules! foo {
1286 ($($i:ident $(,)*),*) => {};
1287}
1288"#,
1289 )
1290 .assert_expand_items(r#"foo!(a,b);"#, r#""#);
1291}
1292
1270// The following tests are based on real world situations 1293// The following tests are based on real world situations
1271#[test] 1294#[test]
1272fn test_vec() { 1295fn test_vec() {