diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2021-03-02 13:20:47 +0000 |
---|---|---|
committer | GitHub <[email protected]> | 2021-03-02 13:20:47 +0000 |
commit | 91bf5fa827b2c4ef74cb68c172c79127115e394f (patch) | |
tree | ebbd7ebb043fe9a8bee8ac2419a461c3387b1888 /crates/mbe/src/expander | |
parent | 8eee9149e87ea58d4191d04ebe6faf57ac8485a3 (diff) | |
parent | cff2201c30bda7b346e3b47875d95a2cf9cafaa3 (diff) |
Merge #7513
7513: NFA parser for mbe matcher r=matklad a=edwin0cheng
Almost straight porting from rustc one, but a little bit slow :(
```
rust-analyzer analysis-stats -q .
```
From:
```log
Database loaded: 636.11ms, 277minstr
crates: 36, mods: 594, decls: 11527, fns: 9017
Item Collection: 10.99s, 60ginstr
exprs: 249618, ??ty: 2699 (1%), ?ty: 2101 (0%), !ty: 932
Inference: 28.94s, 123ginstr
Total: 39.93s, 184ginstr
```
To:
```log
Database loaded: 630.90ms, 277minstr
crates: 36, mods: 594, decls: 11528, fns: 9018
Item Collection: 13.70s, 77ginstr
exprs: 249482, ??ty: 2699 (1%), ?ty: 2101 (0%), !ty: 932
Inference: 30.27s, 133ginstr
Total: 43.97s, 211ginstr
```
Fixes #4777
Co-authored-by: Edwin Cheng <[email protected]>
Diffstat (limited to 'crates/mbe/src/expander')
-rw-r--r-- | crates/mbe/src/expander/matcher.rs | 559 | ||||
-rw-r--r-- | crates/mbe/src/expander/transcriber.rs | 12 |
2 files changed, 440 insertions, 131 deletions
diff --git a/crates/mbe/src/expander/matcher.rs b/crates/mbe/src/expander/matcher.rs index e3bd4c09a..9d3d28055 100644 --- a/crates/mbe/src/expander/matcher.rs +++ b/crates/mbe/src/expander/matcher.rs | |||
@@ -1,14 +1,74 @@ | |||
1 | //! FIXME: write short doc here | 1 | //! An NFA-based parser, which is porting from rustc mbe parsing code |
2 | //! | ||
3 | //! See https://github.com/rust-lang/rust/blob/70b18bc2cbac4712020019f5bf57c00905373205/compiler/rustc_expand/src/mbe/macro_parser.rs | ||
4 | //! Here is a quick intro to how the parser works, copied from rustc: | ||
5 | //! | ||
6 | //! A 'position' is a dot in the middle of a matcher, usually represented as a | ||
7 | //! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. | ||
8 | //! | ||
9 | //! The parser walks through the input a character at a time, maintaining a list | ||
10 | //! of threads consistent with the current position in the input string: `cur_items`. | ||
11 | //! | ||
12 | //! As it processes them, it fills up `eof_items` with threads that would be valid if | ||
13 | //! the macro invocation is now over, `bb_items` with threads that are waiting on | ||
14 | //! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting | ||
15 | //! on a particular token. Most of the logic concerns moving the · through the | ||
16 | //! repetitions indicated by Kleene stars. The rules for moving the · without | ||
17 | //! consuming any input are called epsilon transitions. It only advances or calls | ||
18 | //! out to the real Rust parser when no `cur_items` threads remain. | ||
19 | //! | ||
20 | //! Example: | ||
21 | //! | ||
22 | //! ```text, ignore | ||
23 | //! Start parsing a a a a b against [· a $( a )* a b]. | ||
24 | //! | ||
25 | //! Remaining input: a a a a b | ||
26 | //! next: [· a $( a )* a b] | ||
27 | //! | ||
28 | //! - - - Advance over an a. - - - | ||
29 | //! | ||
30 | //! Remaining input: a a a b | ||
31 | //! cur: [a · $( a )* a b] | ||
32 | //! Descend/Skip (first item). | ||
33 | //! next: [a $( · a )* a b] [a $( a )* · a b]. | ||
34 | //! | ||
35 | //! - - - Advance over an a. - - - | ||
36 | //! | ||
37 | //! Remaining input: a a b | ||
38 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | ||
39 | //! Follow epsilon transition: Finish/Repeat (first item) | ||
40 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | ||
41 | //! | ||
42 | //! - - - Advance over an a. - - - (this looks exactly like the last step) | ||
43 | //! | ||
44 | //! Remaining input: a b | ||
45 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | ||
46 | //! Follow epsilon transition: Finish/Repeat (first item) | ||
47 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | ||
48 | //! | ||
49 | //! - - - Advance over an a. - - - (this looks exactly like the last step) | ||
50 | //! | ||
51 | //! Remaining input: b | ||
52 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | ||
53 | //! Follow epsilon transition: Finish/Repeat (first item) | ||
54 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | ||
55 | //! | ||
56 | //! - - - Advance over a b. - - - | ||
57 | //! | ||
58 | //! Remaining input: '' | ||
59 | //! eof: [a $( a )* a b ·] | ||
60 | //! ``` | ||
2 | 61 | ||
3 | use crate::{ | 62 | use crate::{ |
4 | expander::{Binding, Bindings, Fragment}, | 63 | expander::{Binding, Bindings, Fragment}, |
5 | parser::{Op, RepeatKind, Separator}, | 64 | parser::{Op, OpDelimited, OpDelimitedIter, RepeatKind, Separator}, |
6 | tt_iter::TtIter, | 65 | tt_iter::TtIter, |
7 | ExpandError, MetaTemplate, | 66 | ExpandError, MetaTemplate, |
8 | }; | 67 | }; |
9 | 68 | ||
10 | use super::ExpandResult; | 69 | use super::ExpandResult; |
11 | use parser::FragmentKind::*; | 70 | use parser::FragmentKind::*; |
71 | use smallvec::{smallvec, SmallVec}; | ||
12 | use syntax::SmolStr; | 72 | use syntax::SmolStr; |
13 | 73 | ||
14 | impl Bindings { | 74 | impl Bindings { |
@@ -16,19 +76,19 @@ impl Bindings { | |||
16 | // FIXME: Do we have a better way to represent an empty token ? | 76 | // FIXME: Do we have a better way to represent an empty token ? |
17 | // Insert an empty subtree for empty token | 77 | // Insert an empty subtree for empty token |
18 | let tt = tt::Subtree::default().into(); | 78 | let tt = tt::Subtree::default().into(); |
19 | self.inner.insert(name.clone(), Binding::Fragment(Fragment::Tokens(tt))); | 79 | self.inner.push((name.clone(), Binding::Fragment(Fragment::Tokens(tt)))); |
20 | } | 80 | } |
21 | 81 | ||
22 | fn push_empty(&mut self, name: &SmolStr) { | 82 | fn push_empty(&mut self, name: &SmolStr) { |
23 | self.inner.insert(name.clone(), Binding::Empty); | 83 | self.inner.push((name.clone(), Binding::Empty)); |
24 | } | 84 | } |
25 | 85 | ||
26 | fn push_nested(&mut self, idx: usize, nested: Bindings) -> Result<(), ExpandError> { | 86 | fn push_nested(&mut self, idx: usize, nested: Bindings) -> Result<(), ExpandError> { |
27 | for (key, value) in nested.inner { | 87 | for (key, value) in nested.inner { |
28 | if !self.inner.contains_key(&key) { | 88 | if self.get_mut(&key).is_none() { |
29 | self.inner.insert(key.clone(), Binding::Nested(Vec::new())); | 89 | self.inner.push((key.clone(), Binding::Nested(Vec::new()))); |
30 | } | 90 | } |
31 | match self.inner.get_mut(&key) { | 91 | match self.get_mut(&key) { |
32 | Some(Binding::Nested(it)) => { | 92 | Some(Binding::Nested(it)) => { |
33 | // insert empty nested bindings before this one | 93 | // insert empty nested bindings before this one |
34 | while it.len() < idx { | 94 | while it.len() < idx { |
@@ -46,6 +106,14 @@ impl Bindings { | |||
46 | } | 106 | } |
47 | Ok(()) | 107 | Ok(()) |
48 | } | 108 | } |
109 | |||
110 | fn get_mut(&mut self, name: &str) -> Option<&mut Binding> { | ||
111 | self.inner.iter_mut().find_map(|(n, b)| if n == name { Some(b) } else { None }) | ||
112 | } | ||
113 | |||
114 | fn bindings(&self) -> impl Iterator<Item = &Binding> { | ||
115 | self.inner.iter().map(|(_, b)| b) | ||
116 | } | ||
49 | } | 117 | } |
50 | 118 | ||
51 | macro_rules! err { | 119 | macro_rules! err { |
@@ -57,7 +125,7 @@ macro_rules! err { | |||
57 | }; | 125 | }; |
58 | } | 126 | } |
59 | 127 | ||
60 | #[derive(Debug, Default)] | 128 | #[derive(Clone, Debug, Default, PartialEq, Eq)] |
61 | pub(super) struct Match { | 129 | pub(super) struct Match { |
62 | pub(super) bindings: Bindings, | 130 | pub(super) bindings: Bindings, |
63 | /// We currently just keep the first error and count the rest to compare matches. | 131 | /// We currently just keep the first error and count the rest to compare matches. |
@@ -65,6 +133,8 @@ pub(super) struct Match { | |||
65 | pub(super) err_count: usize, | 133 | pub(super) err_count: usize, |
66 | /// How many top-level token trees were left to match. | 134 | /// How many top-level token trees were left to match. |
67 | pub(super) unmatched_tts: usize, | 135 | pub(super) unmatched_tts: usize, |
136 | /// The number of bound variables | ||
137 | pub(super) bound_count: usize, | ||
68 | } | 138 | } |
69 | 139 | ||
70 | impl Match { | 140 | impl Match { |
@@ -76,72 +146,373 @@ impl Match { | |||
76 | } | 146 | } |
77 | 147 | ||
78 | /// Matching errors are added to the `Match`. | 148 | /// Matching errors are added to the `Match`. |
79 | pub(super) fn match_(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { | 149 | pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree) -> Match { |
80 | let mut res = Match::default(); | 150 | let mut res = match_loop(pattern, &input); |
81 | let mut src = TtIter::new(src); | 151 | res.bound_count = count(res.bindings.bindings()); |
152 | return res; | ||
153 | |||
154 | fn count<'a>(bindings: impl Iterator<Item = &'a Binding>) -> usize { | ||
155 | bindings | ||
156 | .map(|it| match it { | ||
157 | Binding::Fragment(_) => 1, | ||
158 | Binding::Empty => 1, | ||
159 | Binding::Nested(it) => count(it.iter()), | ||
160 | }) | ||
161 | .sum() | ||
162 | } | ||
163 | } | ||
82 | 164 | ||
83 | match_tokens(&mut res, pattern, &mut src); | 165 | #[derive(Debug, Clone)] |
166 | struct MatchState<'t> { | ||
167 | /// The position of the "dot" in this matcher | ||
168 | dot: OpDelimitedIter<'t>, | ||
84 | 169 | ||
85 | if src.len() > 0 { | 170 | /// Token subtree stack |
86 | res.unmatched_tts += src.len(); | 171 | /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. ) |
87 | res.add_err(err!("leftover tokens")); | 172 | /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does |
88 | } | 173 | /// that where the bottom of the stack is the outermost matcher. |
174 | stack: SmallVec<[OpDelimitedIter<'t>; 4]>, | ||
175 | |||
176 | /// The "parent" matcher position if we are in a repetition. That is, the matcher position just | ||
177 | /// before we enter the repetition. | ||
178 | up: Option<Box<MatchState<'t>>>, | ||
179 | |||
180 | /// The separator if we are in a repetition. | ||
181 | sep: Option<Separator>, | ||
182 | |||
183 | /// The KleeneOp of this sequence if we are in a repetition. | ||
184 | sep_kind: Option<RepeatKind>, | ||
89 | 185 | ||
90 | res | 186 | /// Number of tokens of seperator parsed |
187 | sep_parsed: Option<usize>, | ||
188 | |||
189 | /// Matched meta variables bindings | ||
190 | bindings: SmallVec<[Bindings; 4]>, | ||
191 | |||
192 | /// Cached result of meta variable parsing | ||
193 | meta_result: Option<(TtIter<'t>, ExpandResult<Option<Fragment>>)>, | ||
194 | |||
195 | /// Is error occuried in this state, will `poised` to "parent" | ||
196 | is_error: bool, | ||
91 | } | 197 | } |
92 | 198 | ||
93 | fn match_tokens(res: &mut Match, pattern: &MetaTemplate, src: &mut TtIter) { | 199 | /// Process the matcher positions of `cur_items` until it is empty. In the process, this will |
94 | for op in pattern.iter() { | 200 | /// produce more items in `next_items`, `eof_items`, and `bb_items`. |
95 | match op { | 201 | /// |
96 | Op::Leaf(lhs) => { | 202 | /// For more info about the how this happens, see the module-level doc comments and the inline |
97 | if let Err(err) = match_leaf(lhs, src) { | 203 | /// comments of this function. |
98 | res.add_err(err); | 204 | /// |
99 | continue; | 205 | /// # Parameters |
206 | /// | ||
207 | /// - `src`: the current token of the parser. | ||
208 | /// - `stack`: the "parent" frames of the token tree | ||
209 | /// - `res`: the match result to store errors | ||
210 | /// - `cur_items`: the set of current items to be processed. This should be empty by the end of a | ||
211 | /// successful execution of this function. | ||
212 | /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in | ||
213 | /// the function `parse`. | ||
214 | /// - `eof_items`: the set of items that would be valid if this was the EOF. | ||
215 | /// - `bb_items`: the set of items that are waiting for the black-box parser. | ||
216 | /// - `error_items`: the set of items in errors, used for error-resilient parsing | ||
217 | fn match_loop_inner<'t>( | ||
218 | src: TtIter<'t>, | ||
219 | stack: &[TtIter<'t>], | ||
220 | res: &mut Match, | ||
221 | cur_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
222 | bb_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
223 | next_items: &mut Vec<MatchState<'t>>, | ||
224 | eof_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
225 | error_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
226 | ) { | ||
227 | macro_rules! try_push { | ||
228 | ($items: expr, $it:expr) => { | ||
229 | if $it.is_error { | ||
230 | error_items.push($it); | ||
231 | } else { | ||
232 | $items.push($it); | ||
233 | } | ||
234 | }; | ||
235 | } | ||
236 | |||
237 | while let Some(mut item) = cur_items.pop() { | ||
238 | while item.dot.is_eof() { | ||
239 | match item.stack.pop() { | ||
240 | Some(frame) => { | ||
241 | item.dot = frame; | ||
242 | item.dot.next(); | ||
100 | } | 243 | } |
244 | None => break, | ||
101 | } | 245 | } |
102 | Op::Subtree { tokens, delimiter: delim } => { | 246 | } |
103 | let rhs = match src.expect_subtree() { | 247 | let op = match item.dot.peek() { |
104 | Ok(s) => s, | 248 | None => { |
105 | Err(()) => { | 249 | // We are at or past the end of the matcher of `item`. |
106 | res.add_err(err!("expected subtree")); | 250 | if item.up.is_some() { |
107 | continue; | 251 | if item.sep_parsed.is_none() { |
252 | // Get the `up` matcher | ||
253 | let mut new_pos = *item.up.clone().unwrap(); | ||
254 | // Add matches from this repetition to the `matches` of `up` | ||
255 | if let Some(bindings) = new_pos.bindings.last_mut() { | ||
256 | for (i, b) in item.bindings.iter_mut().enumerate() { | ||
257 | bindings.push_nested(i, b.clone()).unwrap(); | ||
258 | } | ||
259 | } | ||
260 | // Move the "dot" past the repetition in `up` | ||
261 | new_pos.dot.next(); | ||
262 | new_pos.is_error = new_pos.is_error || item.is_error; | ||
263 | cur_items.push(new_pos); | ||
264 | } | ||
265 | |||
266 | // Check if we need a separator. | ||
267 | // We check the separator one by one | ||
268 | let sep_idx = *item.sep_parsed.as_ref().unwrap_or(&0); | ||
269 | let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count); | ||
270 | if item.sep.is_some() && sep_idx != sep_len { | ||
271 | let sep = item.sep.as_ref().unwrap(); | ||
272 | if src.clone().expect_separator(&sep, sep_idx) { | ||
273 | item.dot.next(); | ||
274 | item.sep_parsed = Some(sep_idx + 1); | ||
275 | try_push!(next_items, item); | ||
276 | } | ||
277 | } | ||
278 | // We don't need a separator. Move the "dot" back to the beginning of the matcher | ||
279 | // and try to match again UNLESS we are only allowed to have _one_ repetition. | ||
280 | else if item.sep_kind != Some(RepeatKind::ZeroOrOne) { | ||
281 | item.dot = item.dot.reset(); | ||
282 | item.sep_parsed = None; | ||
283 | item.bindings.push(Bindings::default()); | ||
284 | cur_items.push(item); | ||
285 | } | ||
286 | } else { | ||
287 | // If we are not in a repetition, then being at the end of a matcher means that we have | ||
288 | // reached the potential end of the input. | ||
289 | try_push!(eof_items, item); | ||
290 | } | ||
291 | continue; | ||
292 | } | ||
293 | Some(it) => it, | ||
294 | }; | ||
295 | |||
296 | // We are in the middle of a matcher. | ||
297 | match op { | ||
298 | OpDelimited::Op(Op::Repeat { tokens, kind, separator }) => { | ||
299 | if matches!(kind, RepeatKind::ZeroOrMore | RepeatKind::ZeroOrOne) { | ||
300 | let mut new_item = item.clone(); | ||
301 | new_item.dot.next(); | ||
302 | let mut vars = Vec::new(); | ||
303 | let bindings = new_item.bindings.last_mut().unwrap(); | ||
304 | collect_vars(&mut vars, tokens); | ||
305 | for var in vars { | ||
306 | bindings.push_empty(&var); | ||
108 | } | 307 | } |
109 | }; | 308 | cur_items.push(new_item); |
110 | if delim.map(|it| it.kind) != rhs.delimiter_kind() { | ||
111 | res.add_err(err!("mismatched delimiter")); | ||
112 | continue; | ||
113 | } | 309 | } |
114 | let mut src = TtIter::new(rhs); | 310 | cur_items.push(MatchState { |
115 | match_tokens(res, tokens, &mut src); | 311 | dot: tokens.iter_delimited(None), |
116 | if src.len() > 0 { | 312 | stack: Default::default(), |
117 | res.add_err(err!("leftover tokens")); | 313 | up: Some(Box::new(item)), |
314 | sep: separator.clone(), | ||
315 | sep_kind: Some(*kind), | ||
316 | sep_parsed: None, | ||
317 | bindings: smallvec![Bindings::default()], | ||
318 | meta_result: None, | ||
319 | is_error: false, | ||
320 | }) | ||
321 | } | ||
322 | OpDelimited::Op(Op::Subtree { tokens, delimiter }) => { | ||
323 | if let Ok(subtree) = src.clone().expect_subtree() { | ||
324 | if subtree.delimiter_kind() == delimiter.map(|it| it.kind) { | ||
325 | item.stack.push(item.dot); | ||
326 | item.dot = tokens.iter_delimited(delimiter.as_ref()); | ||
327 | cur_items.push(item); | ||
328 | } | ||
118 | } | 329 | } |
119 | } | 330 | } |
120 | Op::Var { name, kind, .. } => { | 331 | OpDelimited::Op(Op::Var { kind, name, .. }) => { |
121 | let kind = match kind { | 332 | if let Some(kind) = kind { |
122 | Some(k) => k, | 333 | let mut fork = src.clone(); |
123 | None => { | 334 | let match_res = match_meta_var(kind.as_str(), &mut fork); |
124 | res.add_err(ExpandError::UnexpectedToken); | 335 | match match_res.err { |
125 | continue; | 336 | None => { |
337 | // Some meta variables are optional (e.g. vis) | ||
338 | if match_res.value.is_some() { | ||
339 | item.meta_result = Some((fork, match_res)); | ||
340 | try_push!(bb_items, item); | ||
341 | } else { | ||
342 | item.bindings.last_mut().unwrap().push_optional(name); | ||
343 | item.dot.next(); | ||
344 | cur_items.push(item); | ||
345 | } | ||
346 | } | ||
347 | Some(err) => { | ||
348 | res.add_err(err); | ||
349 | match match_res.value { | ||
350 | Some(fragment) => { | ||
351 | item.bindings | ||
352 | .last_mut() | ||
353 | .unwrap() | ||
354 | .inner | ||
355 | .push((name.clone(), Binding::Fragment(fragment))); | ||
356 | } | ||
357 | _ => {} | ||
358 | } | ||
359 | item.is_error = true; | ||
360 | error_items.push(item); | ||
361 | } | ||
126 | } | 362 | } |
127 | }; | 363 | } |
128 | let ExpandResult { value: matched, err: match_err } = | 364 | } |
129 | match_meta_var(kind.as_str(), src); | 365 | OpDelimited::Op(Op::Leaf(leaf)) => { |
130 | match matched { | 366 | if let Err(err) = match_leaf(&leaf, &mut src.clone()) { |
367 | res.add_err(err); | ||
368 | item.is_error = true; | ||
369 | } else { | ||
370 | item.dot.next(); | ||
371 | } | ||
372 | try_push!(next_items, item); | ||
373 | } | ||
374 | OpDelimited::Open => { | ||
375 | if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) { | ||
376 | item.dot.next(); | ||
377 | try_push!(next_items, item); | ||
378 | } | ||
379 | } | ||
380 | OpDelimited::Close => { | ||
381 | let is_delim_closed = src.peek_n(0).is_none() && !stack.is_empty(); | ||
382 | if is_delim_closed { | ||
383 | item.dot.next(); | ||
384 | try_push!(next_items, item); | ||
385 | } | ||
386 | } | ||
387 | } | ||
388 | } | ||
389 | } | ||
390 | |||
391 | fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { | ||
392 | let mut src = TtIter::new(src); | ||
393 | let mut stack: SmallVec<[TtIter; 1]> = SmallVec::new(); | ||
394 | let mut res = Match::default(); | ||
395 | let mut error_reover_item = None; | ||
396 | |||
397 | let mut cur_items = smallvec![MatchState { | ||
398 | dot: pattern.iter_delimited(None), | ||
399 | stack: Default::default(), | ||
400 | up: None, | ||
401 | sep: None, | ||
402 | sep_kind: None, | ||
403 | sep_parsed: None, | ||
404 | bindings: smallvec![Bindings::default()], | ||
405 | is_error: false, | ||
406 | meta_result: None, | ||
407 | }]; | ||
408 | |||
409 | let mut next_items = vec![]; | ||
410 | |||
411 | loop { | ||
412 | let mut bb_items = SmallVec::new(); | ||
413 | let mut eof_items = SmallVec::new(); | ||
414 | let mut error_items = SmallVec::new(); | ||
415 | |||
416 | stdx::always!(next_items.is_empty()); | ||
417 | |||
418 | match_loop_inner( | ||
419 | src.clone(), | ||
420 | &stack, | ||
421 | &mut res, | ||
422 | &mut cur_items, | ||
423 | &mut bb_items, | ||
424 | &mut next_items, | ||
425 | &mut eof_items, | ||
426 | &mut error_items, | ||
427 | ); | ||
428 | stdx::always!(cur_items.is_empty()); | ||
429 | |||
430 | if error_items.len() > 0 { | ||
431 | error_reover_item = error_items.pop(); | ||
432 | } else if eof_items.len() > 0 { | ||
433 | error_reover_item = Some(eof_items[0].clone()); | ||
434 | } | ||
435 | |||
436 | // We need to do some post processing after the `match_loop_inner`. | ||
437 | // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, | ||
438 | // either the parse is ambiguous (which should never happen) or there is a syntax error. | ||
439 | if src.peek_n(0).is_none() && stack.is_empty() { | ||
440 | if eof_items.len() == 1 { | ||
441 | // remove all errors, because it is the correct answer ! | ||
442 | res = Match::default(); | ||
443 | res.bindings = eof_items[0].bindings[0].clone(); | ||
444 | } else { | ||
445 | // Error recovery | ||
446 | if error_reover_item.is_some() { | ||
447 | res.bindings = error_reover_item.unwrap().bindings[0].clone(); | ||
448 | } | ||
449 | res.add_err(ExpandError::UnexpectedToken); | ||
450 | } | ||
451 | return res; | ||
452 | } | ||
453 | |||
454 | // If there are no possible next positions AND we aren't waiting for the black-box parser, | ||
455 | // then there is a syntax error. | ||
456 | // | ||
457 | // Another possibility is that we need to call out to parse some rust nonterminal | ||
458 | // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. | ||
459 | if (bb_items.is_empty() && next_items.is_empty()) | ||
460 | || (!bb_items.is_empty() && !next_items.is_empty()) | ||
461 | || bb_items.len() > 1 | ||
462 | { | ||
463 | res.unmatched_tts += src.len(); | ||
464 | while let Some(it) = stack.pop() { | ||
465 | src = it; | ||
466 | res.unmatched_tts += src.len(); | ||
467 | } | ||
468 | res.add_err(err!("leftover tokens")); | ||
469 | |||
470 | if let Some(mut error_reover_item) = error_reover_item { | ||
471 | res.bindings = error_reover_item.bindings.remove(0); | ||
472 | } | ||
473 | return res; | ||
474 | } | ||
475 | // Dump all possible `next_items` into `cur_items` for the next iteration. | ||
476 | else if !next_items.is_empty() { | ||
477 | // Now process the next token | ||
478 | cur_items.extend(next_items.drain(..)); | ||
479 | |||
480 | match src.next() { | ||
481 | Some(tt::TokenTree::Subtree(subtree)) => { | ||
482 | stack.push(src.clone()); | ||
483 | src = TtIter::new(subtree); | ||
484 | } | ||
485 | None if !stack.is_empty() => src = stack.pop().unwrap(), | ||
486 | _ => (), | ||
487 | } | ||
488 | } | ||
489 | // Finally, we have the case where we need to call the black-box parser to get some | ||
490 | // nonterminal. | ||
491 | else { | ||
492 | stdx::always!(bb_items.len() == 1); | ||
493 | let mut item = bb_items.pop().unwrap(); | ||
494 | |||
495 | if let Some(OpDelimited::Op(Op::Var { name, .. })) = item.dot.peek() { | ||
496 | let (iter, match_res) = item.meta_result.take().unwrap(); | ||
497 | let bindings = item.bindings.last_mut().unwrap(); | ||
498 | match match_res.value { | ||
131 | Some(fragment) => { | 499 | Some(fragment) => { |
132 | res.bindings.inner.insert(name.clone(), Binding::Fragment(fragment)); | 500 | bindings.inner.push((name.clone(), Binding::Fragment(fragment))); |
133 | } | 501 | } |
134 | None if match_err.is_none() => res.bindings.push_optional(name), | 502 | None if match_res.err.is_none() => bindings.push_optional(name), |
135 | _ => {} | 503 | _ => {} |
136 | } | 504 | } |
137 | if let Some(err) = match_err { | 505 | if let Some(err) = match_res.err { |
138 | res.add_err(err); | 506 | res.add_err(err); |
139 | } | 507 | } |
508 | src = iter.clone(); | ||
509 | item.dot.next(); | ||
510 | } else { | ||
511 | unreachable!() | ||
140 | } | 512 | } |
141 | Op::Repeat { tokens: subtree, kind, separator } => { | 513 | cur_items.push(item); |
142 | match_repeat(res, subtree, *kind, separator, src); | ||
143 | } | ||
144 | } | 514 | } |
515 | stdx::always!(!cur_items.is_empty()); | ||
145 | } | 516 | } |
146 | } | 517 | } |
147 | 518 | ||
@@ -173,73 +544,6 @@ fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter) -> Result<(), ExpandError> { | |||
173 | Ok(()) | 544 | Ok(()) |
174 | } | 545 | } |
175 | 546 | ||
176 | fn match_repeat( | ||
177 | res: &mut Match, | ||
178 | pattern: &MetaTemplate, | ||
179 | kind: RepeatKind, | ||
180 | separator: &Option<Separator>, | ||
181 | src: &mut TtIter, | ||
182 | ) { | ||
183 | // Dirty hack to make macro-expansion terminate. | ||
184 | // This should be replaced by a proper macro-by-example implementation | ||
185 | let mut limit = 65536; | ||
186 | let mut counter = 0; | ||
187 | |||
188 | for i in 0.. { | ||
189 | let mut fork = src.clone(); | ||
190 | |||
191 | if let Some(separator) = &separator { | ||
192 | if i != 0 && !fork.eat_separator(separator) { | ||
193 | break; | ||
194 | } | ||
195 | } | ||
196 | |||
197 | let mut nested = Match::default(); | ||
198 | match_tokens(&mut nested, pattern, &mut fork); | ||
199 | if nested.err.is_none() { | ||
200 | limit -= 1; | ||
201 | if limit == 0 { | ||
202 | log::warn!( | ||
203 | "match_lhs exceeded repeat pattern limit => {:#?}\n{:#?}\n{:#?}\n{:#?}", | ||
204 | pattern, | ||
205 | src, | ||
206 | kind, | ||
207 | separator | ||
208 | ); | ||
209 | break; | ||
210 | } | ||
211 | *src = fork; | ||
212 | |||
213 | if let Err(err) = res.bindings.push_nested(counter, nested.bindings) { | ||
214 | res.add_err(err); | ||
215 | } | ||
216 | counter += 1; | ||
217 | if counter == 1 { | ||
218 | if let RepeatKind::ZeroOrOne = kind { | ||
219 | break; | ||
220 | } | ||
221 | } | ||
222 | } else { | ||
223 | break; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | match (kind, counter) { | ||
228 | (RepeatKind::OneOrMore, 0) => { | ||
229 | res.add_err(ExpandError::UnexpectedToken); | ||
230 | } | ||
231 | (_, 0) => { | ||
232 | // Collect all empty variables in subtrees | ||
233 | let mut vars = Vec::new(); | ||
234 | collect_vars(&mut vars, pattern); | ||
235 | for var in vars { | ||
236 | res.bindings.push_empty(&var) | ||
237 | } | ||
238 | } | ||
239 | _ => (), | ||
240 | } | ||
241 | } | ||
242 | |||
243 | fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragment>> { | 547 | fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragment>> { |
244 | let fragment = match kind { | 548 | let fragment = match kind { |
245 | "path" => Path, | 549 | "path" => Path, |
@@ -303,14 +607,14 @@ fn collect_vars(buf: &mut Vec<SmolStr>, pattern: &MetaTemplate) { | |||
303 | } | 607 | } |
304 | 608 | ||
305 | impl<'a> TtIter<'a> { | 609 | impl<'a> TtIter<'a> { |
306 | fn eat_separator(&mut self, separator: &Separator) -> bool { | 610 | fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool { |
307 | let mut fork = self.clone(); | 611 | let mut fork = self.clone(); |
308 | let ok = match separator { | 612 | let ok = match separator { |
309 | Separator::Ident(lhs) => match fork.expect_ident() { | 613 | Separator::Ident(lhs) if idx == 0 => match fork.expect_ident() { |
310 | Ok(rhs) => rhs.text == lhs.text, | 614 | Ok(rhs) => rhs.text == lhs.text, |
311 | _ => false, | 615 | _ => false, |
312 | }, | 616 | }, |
313 | Separator::Literal(lhs) => match fork.expect_literal() { | 617 | Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() { |
314 | Ok(rhs) => match rhs { | 618 | Ok(rhs) => match rhs { |
315 | tt::Leaf::Literal(rhs) => rhs.text == lhs.text, | 619 | tt::Leaf::Literal(rhs) => rhs.text == lhs.text, |
316 | tt::Leaf::Ident(rhs) => rhs.text == lhs.text, | 620 | tt::Leaf::Ident(rhs) => rhs.text == lhs.text, |
@@ -318,10 +622,11 @@ impl<'a> TtIter<'a> { | |||
318 | }, | 622 | }, |
319 | _ => false, | 623 | _ => false, |
320 | }, | 624 | }, |
321 | Separator::Puncts(lhss) => lhss.iter().all(|lhs| match fork.expect_punct() { | 625 | Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() { |
322 | Ok(rhs) => rhs.char == lhs.char, | 626 | Ok(rhs) => rhs.char == lhss[idx].char, |
323 | _ => false, | 627 | _ => false, |
324 | }), | 628 | }, |
629 | _ => false, | ||
325 | }; | 630 | }; |
326 | if ok { | 631 | if ok { |
327 | *self = fork; | 632 | *self = fork; |
diff --git a/crates/mbe/src/expander/transcriber.rs b/crates/mbe/src/expander/transcriber.rs index 78368a33e..ad9953a7d 100644 --- a/crates/mbe/src/expander/transcriber.rs +++ b/crates/mbe/src/expander/transcriber.rs | |||
@@ -13,13 +13,17 @@ use crate::{ | |||
13 | 13 | ||
14 | impl Bindings { | 14 | impl Bindings { |
15 | fn contains(&self, name: &str) -> bool { | 15 | fn contains(&self, name: &str) -> bool { |
16 | self.inner.contains_key(name) | 16 | self.inner.iter().any(|(n, _)| n == name) |
17 | } | 17 | } |
18 | 18 | ||
19 | fn get(&self, name: &str, nesting: &mut [NestingState]) -> Result<&Fragment, ExpandError> { | 19 | fn get(&self, name: &str, nesting: &mut [NestingState]) -> Result<&Fragment, ExpandError> { |
20 | let mut b = self.inner.get(name).ok_or_else(|| { | 20 | let mut b: &Binding = self |
21 | ExpandError::BindingError(format!("could not find binding `{}`", name)) | 21 | .inner |
22 | })?; | 22 | .iter() |
23 | .find_map(|(n, b)| if n == name { Some(b) } else { None }) | ||
24 | .ok_or_else(|| { | ||
25 | ExpandError::BindingError(format!("could not find binding `{}`", name)) | ||
26 | })?; | ||
23 | for nesting_state in nesting.iter_mut() { | 27 | for nesting_state in nesting.iter_mut() { |
24 | nesting_state.hit = true; | 28 | nesting_state.hit = true; |
25 | b = match b { | 29 | b = match b { |