diff options
Diffstat (limited to 'crates/mbe/src/expander/matcher.rs')
-rw-r--r-- | crates/mbe/src/expander/matcher.rs | 817 |
1 files changed, 608 insertions, 209 deletions
diff --git a/crates/mbe/src/expander/matcher.rs b/crates/mbe/src/expander/matcher.rs index 800931cd1..54db76d5d 100644 --- a/crates/mbe/src/expander/matcher.rs +++ b/crates/mbe/src/expander/matcher.rs | |||
@@ -1,17 +1,77 @@ | |||
1 | //! FIXME: write short doc here | 1 | //! An NFA-based parser, which is porting from rustc mbe parsing code |
2 | //! | ||
3 | //! See https://github.com/rust-lang/rust/blob/70b18bc2cbac4712020019f5bf57c00905373205/compiler/rustc_expand/src/mbe/macro_parser.rs | ||
4 | //! Here is a quick intro to how the parser works, copied from rustc: | ||
5 | //! | ||
6 | //! A 'position' is a dot in the middle of a matcher, usually represented as a | ||
7 | //! dot. For example `· a $( a )* a b` is a position, as is `a $( · a )* a b`. | ||
8 | //! | ||
9 | //! The parser walks through the input a character at a time, maintaining a list | ||
10 | //! of threads consistent with the current position in the input string: `cur_items`. | ||
11 | //! | ||
12 | //! As it processes them, it fills up `eof_items` with threads that would be valid if | ||
13 | //! the macro invocation is now over, `bb_items` with threads that are waiting on | ||
14 | //! a Rust non-terminal like `$e:expr`, and `next_items` with threads that are waiting | ||
15 | //! on a particular token. Most of the logic concerns moving the · through the | ||
16 | //! repetitions indicated by Kleene stars. The rules for moving the · without | ||
17 | //! consuming any input are called epsilon transitions. It only advances or calls | ||
18 | //! out to the real Rust parser when no `cur_items` threads remain. | ||
19 | //! | ||
20 | //! Example: | ||
21 | //! | ||
22 | //! ```text, ignore | ||
23 | //! Start parsing a a a a b against [· a $( a )* a b]. | ||
24 | //! | ||
25 | //! Remaining input: a a a a b | ||
26 | //! next: [· a $( a )* a b] | ||
27 | //! | ||
28 | //! - - - Advance over an a. - - - | ||
29 | //! | ||
30 | //! Remaining input: a a a b | ||
31 | //! cur: [a · $( a )* a b] | ||
32 | //! Descend/Skip (first item). | ||
33 | //! next: [a $( · a )* a b] [a $( a )* · a b]. | ||
34 | //! | ||
35 | //! - - - Advance over an a. - - - | ||
36 | //! | ||
37 | //! Remaining input: a a b | ||
38 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | ||
39 | //! Follow epsilon transition: Finish/Repeat (first item) | ||
40 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | ||
41 | //! | ||
42 | //! - - - Advance over an a. - - - (this looks exactly like the last step) | ||
43 | //! | ||
44 | //! Remaining input: a b | ||
45 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | ||
46 | //! Follow epsilon transition: Finish/Repeat (first item) | ||
47 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | ||
48 | //! | ||
49 | //! - - - Advance over an a. - - - (this looks exactly like the last step) | ||
50 | //! | ||
51 | //! Remaining input: b | ||
52 | //! cur: [a $( a · )* a b] [a $( a )* a · b] | ||
53 | //! Follow epsilon transition: Finish/Repeat (first item) | ||
54 | //! next: [a $( a )* · a b] [a $( · a )* a b] [a $( a )* a · b] | ||
55 | //! | ||
56 | //! - - - Advance over a b. - - - | ||
57 | //! | ||
58 | //! Remaining input: '' | ||
59 | //! eof: [a $( a )* a b ·] | ||
60 | //! ``` | ||
61 | |||
62 | use std::rc::Rc; | ||
2 | 63 | ||
3 | use crate::{ | 64 | use crate::{ |
4 | expander::{Binding, Bindings, Fragment}, | 65 | expander::{Binding, Bindings, Fragment}, |
5 | parser::{Op, RepeatKind, Separator}, | 66 | parser::{Op, OpDelimited, OpDelimitedIter, RepeatKind, Separator}, |
6 | subtree_source::SubtreeTokenSource, | ||
7 | tt_iter::TtIter, | 67 | tt_iter::TtIter, |
8 | ExpandError, MetaTemplate, | 68 | ExpandError, MetaTemplate, |
9 | }; | 69 | }; |
10 | 70 | ||
11 | use super::ExpandResult; | 71 | use super::ExpandResult; |
12 | use parser::{FragmentKind::*, TreeSink}; | 72 | use parser::FragmentKind::*; |
13 | use syntax::{SmolStr, SyntaxKind}; | 73 | use smallvec::{smallvec, SmallVec}; |
14 | use tt::buffer::{Cursor, TokenBuffer}; | 74 | use syntax::SmolStr; |
15 | 75 | ||
16 | impl Bindings { | 76 | impl Bindings { |
17 | fn push_optional(&mut self, name: &SmolStr) { | 77 | fn push_optional(&mut self, name: &SmolStr) { |
@@ -25,28 +85,8 @@ impl Bindings { | |||
25 | self.inner.insert(name.clone(), Binding::Empty); | 85 | self.inner.insert(name.clone(), Binding::Empty); |
26 | } | 86 | } |
27 | 87 | ||
28 | fn push_nested(&mut self, idx: usize, nested: Bindings) -> Result<(), ExpandError> { | 88 | fn bindings(&self) -> impl Iterator<Item = &Binding> { |
29 | for (key, value) in nested.inner { | 89 | self.inner.values() |
30 | if !self.inner.contains_key(&key) { | ||
31 | self.inner.insert(key.clone(), Binding::Nested(Vec::new())); | ||
32 | } | ||
33 | match self.inner.get_mut(&key) { | ||
34 | Some(Binding::Nested(it)) => { | ||
35 | // insert empty nested bindings before this one | ||
36 | while it.len() < idx { | ||
37 | it.push(Binding::Nested(vec![])); | ||
38 | } | ||
39 | it.push(value); | ||
40 | } | ||
41 | _ => { | ||
42 | return Err(ExpandError::BindingError(format!( | ||
43 | "could not find binding `{}`", | ||
44 | key | ||
45 | ))); | ||
46 | } | ||
47 | } | ||
48 | } | ||
49 | Ok(()) | ||
50 | } | 90 | } |
51 | } | 91 | } |
52 | 92 | ||
@@ -59,7 +99,7 @@ macro_rules! err { | |||
59 | }; | 99 | }; |
60 | } | 100 | } |
61 | 101 | ||
62 | #[derive(Debug, Default)] | 102 | #[derive(Clone, Debug, Default, PartialEq, Eq)] |
63 | pub(super) struct Match { | 103 | pub(super) struct Match { |
64 | pub(super) bindings: Bindings, | 104 | pub(super) bindings: Bindings, |
65 | /// We currently just keep the first error and count the rest to compare matches. | 105 | /// We currently just keep the first error and count the rest to compare matches. |
@@ -67,6 +107,8 @@ pub(super) struct Match { | |||
67 | pub(super) err_count: usize, | 107 | pub(super) err_count: usize, |
68 | /// How many top-level token trees were left to match. | 108 | /// How many top-level token trees were left to match. |
69 | pub(super) unmatched_tts: usize, | 109 | pub(super) unmatched_tts: usize, |
110 | /// The number of bound variables | ||
111 | pub(super) bound_count: usize, | ||
70 | } | 112 | } |
71 | 113 | ||
72 | impl Match { | 114 | impl Match { |
@@ -78,72 +120,557 @@ impl Match { | |||
78 | } | 120 | } |
79 | 121 | ||
80 | /// Matching errors are added to the `Match`. | 122 | /// Matching errors are added to the `Match`. |
81 | pub(super) fn match_(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { | 123 | pub(super) fn match_(pattern: &MetaTemplate, input: &tt::Subtree) -> Match { |
82 | let mut res = Match::default(); | 124 | let mut res = match_loop(pattern, &input); |
83 | let mut src = TtIter::new(src); | 125 | res.bound_count = count(res.bindings.bindings()); |
126 | return res; | ||
127 | |||
128 | fn count<'a>(bindings: impl Iterator<Item = &'a Binding>) -> usize { | ||
129 | bindings | ||
130 | .map(|it| match it { | ||
131 | Binding::Fragment(_) => 1, | ||
132 | Binding::Empty => 1, | ||
133 | Binding::Nested(it) => count(it.iter()), | ||
134 | }) | ||
135 | .sum() | ||
136 | } | ||
137 | } | ||
138 | |||
139 | #[derive(Debug, Clone)] | ||
140 | enum BindingKind { | ||
141 | Empty(SmolStr), | ||
142 | Optional(SmolStr), | ||
143 | Fragment(SmolStr, Fragment), | ||
144 | Nested(usize, usize), | ||
145 | } | ||
146 | |||
147 | #[derive(Debug, Clone)] | ||
148 | struct BindingsIdx(usize, usize); | ||
149 | |||
150 | #[derive(Debug, Clone)] | ||
151 | enum LinkNode<T> { | ||
152 | Node(T), | ||
153 | Parent { idx: usize, len: usize }, | ||
154 | } | ||
155 | |||
156 | #[derive(Default)] | ||
157 | struct BindingsBuilder { | ||
158 | nodes: Vec<Vec<LinkNode<Rc<BindingKind>>>>, | ||
159 | nested: Vec<Vec<LinkNode<usize>>>, | ||
160 | } | ||
161 | |||
162 | impl BindingsBuilder { | ||
163 | fn alloc(&mut self) -> BindingsIdx { | ||
164 | let idx = self.nodes.len(); | ||
165 | self.nodes.push(Vec::with_capacity(8)); | ||
166 | let nidx = self.nested.len(); | ||
167 | self.nested.push(Vec::with_capacity(8)); | ||
168 | BindingsIdx(idx, nidx) | ||
169 | } | ||
170 | |||
171 | fn copy(&mut self, bindings: &BindingsIdx) -> BindingsIdx { | ||
172 | let idx = copy_parent(bindings.0, &mut self.nodes); | ||
173 | let nidx = copy_parent(bindings.1, &mut self.nested); | ||
174 | return BindingsIdx(idx, nidx); | ||
175 | |||
176 | fn copy_parent<T>(idx: usize, target: &mut Vec<Vec<LinkNode<T>>>) -> usize | ||
177 | where | ||
178 | T: Clone, | ||
179 | { | ||
180 | let new_idx = target.len(); | ||
181 | let len = target[idx].len(); | ||
182 | if len < 4 { | ||
183 | target.push(target[idx].clone()) | ||
184 | } else { | ||
185 | let mut item = Vec::with_capacity(8); | ||
186 | item.push(LinkNode::Parent { idx, len }); | ||
187 | target.push(item); | ||
188 | } | ||
189 | |||
190 | new_idx | ||
191 | } | ||
192 | } | ||
193 | |||
194 | fn push_empty(&mut self, idx: &mut BindingsIdx, var: &SmolStr) { | ||
195 | self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Empty(var.clone())))); | ||
196 | } | ||
197 | |||
198 | fn push_optional(&mut self, idx: &mut BindingsIdx, var: &SmolStr) { | ||
199 | self.nodes[idx.0].push(LinkNode::Node(Rc::new(BindingKind::Optional(var.clone())))); | ||
200 | } | ||
201 | |||
202 | fn push_fragment(&mut self, idx: &mut BindingsIdx, var: &SmolStr, fragment: Fragment) { | ||
203 | self.nodes[idx.0] | ||
204 | .push(LinkNode::Node(Rc::new(BindingKind::Fragment(var.clone(), fragment)))); | ||
205 | } | ||
206 | |||
207 | fn push_nested(&mut self, parent: &mut BindingsIdx, child: &BindingsIdx) { | ||
208 | let BindingsIdx(idx, nidx) = self.copy(&child); | ||
209 | self.nodes[parent.0].push(LinkNode::Node(Rc::new(BindingKind::Nested(idx, nidx)))); | ||
210 | } | ||
211 | |||
212 | fn push_default(&mut self, idx: &mut BindingsIdx) { | ||
213 | self.nested[idx.1].push(LinkNode::Node(idx.0)); | ||
214 | let new_idx = self.nodes.len(); | ||
215 | self.nodes.push(Vec::with_capacity(8)); | ||
216 | idx.0 = new_idx; | ||
217 | } | ||
218 | |||
219 | fn build(self, idx: &BindingsIdx) -> Bindings { | ||
220 | let mut bindings = Bindings::default(); | ||
221 | self.build_recur(&mut bindings, self.nodes[idx.0].clone()); | ||
222 | bindings | ||
223 | } | ||
224 | |||
225 | fn build_recur(&self, bindings: &mut Bindings, nodes: Vec<LinkNode<Rc<BindingKind>>>) { | ||
226 | for cmd in self.flatten_nodes(nodes) { | ||
227 | match &*cmd { | ||
228 | BindingKind::Empty(name) => { | ||
229 | bindings.push_empty(name); | ||
230 | } | ||
231 | BindingKind::Optional(name) => { | ||
232 | bindings.push_optional(name); | ||
233 | } | ||
234 | BindingKind::Fragment(name, fragment) => { | ||
235 | bindings.inner.insert(name.clone(), Binding::Fragment(fragment.clone())); | ||
236 | } | ||
237 | BindingKind::Nested(idx, list) => { | ||
238 | let list = self.flatten_nested(*idx, *list); | ||
239 | |||
240 | for (idx, iter) in list.enumerate() { | ||
241 | for (key, value) in &iter.inner { | ||
242 | let bindings = bindings | ||
243 | .inner | ||
244 | .entry(key.clone()) | ||
245 | .or_insert_with(|| Binding::Nested(Vec::new())); | ||
246 | |||
247 | if let Binding::Nested(it) = bindings { | ||
248 | // insert empty nested bindings before this one | ||
249 | while it.len() < idx { | ||
250 | it.push(Binding::Nested(Vec::new())); | ||
251 | } | ||
252 | it.push(value.clone()); | ||
253 | } | ||
254 | } | ||
255 | } | ||
256 | } | ||
257 | } | ||
258 | } | ||
259 | } | ||
84 | 260 | ||
85 | match_tokens(&mut res, pattern, &mut src); | 261 | fn flatten_nested_ref(&self, id: usize, len: usize) -> Vec<Vec<LinkNode<Rc<BindingKind>>>> { |
262 | self.nested[id] | ||
263 | .iter() | ||
264 | .take(len) | ||
265 | .map(|it| match it { | ||
266 | LinkNode::Node(id) => vec![self.nodes[*id].clone()], | ||
267 | LinkNode::Parent { idx, len } => self.flatten_nested_ref(*idx, *len), | ||
268 | }) | ||
269 | .flatten() | ||
270 | .collect() | ||
271 | } | ||
272 | |||
273 | fn flatten_nested<'a>( | ||
274 | &'a self, | ||
275 | idx: usize, | ||
276 | list: usize, | ||
277 | ) -> impl Iterator<Item = Bindings> + 'a { | ||
278 | let last = self.nodes[idx].clone(); | ||
279 | self.nested[list] | ||
280 | .iter() | ||
281 | .map(move |it| match *it { | ||
282 | LinkNode::Node(idx) => vec![self.nodes[idx].clone()], | ||
283 | LinkNode::Parent { idx, len } => self.flatten_nested_ref(idx, len), | ||
284 | }) | ||
285 | .flatten() | ||
286 | .chain(std::iter::once(last)) | ||
287 | .map(move |iter| { | ||
288 | let mut child_bindings = Bindings::default(); | ||
289 | self.build_recur(&mut child_bindings, iter); | ||
290 | child_bindings | ||
291 | }) | ||
292 | } | ||
86 | 293 | ||
87 | if src.len() > 0 { | 294 | fn flatten_nodes_ref(&self, id: usize, len: usize) -> Vec<Rc<BindingKind>> { |
88 | res.unmatched_tts += src.len(); | 295 | self.nodes[id] |
89 | res.add_err(err!("leftover tokens")); | 296 | .iter() |
297 | .take(len) | ||
298 | .map(|it| match it { | ||
299 | LinkNode::Node(it) => vec![it.clone()], | ||
300 | LinkNode::Parent { idx, len } => self.flatten_nodes_ref(*idx, *len), | ||
301 | }) | ||
302 | .flatten() | ||
303 | .collect() | ||
90 | } | 304 | } |
91 | 305 | ||
92 | res | 306 | fn flatten_nodes<'a>( |
307 | &'a self, | ||
308 | nodes: Vec<LinkNode<Rc<BindingKind>>>, | ||
309 | ) -> impl Iterator<Item = Rc<BindingKind>> + 'a { | ||
310 | nodes | ||
311 | .into_iter() | ||
312 | .map(move |it| match it { | ||
313 | LinkNode::Node(it) => vec![it], | ||
314 | LinkNode::Parent { idx, len } => self.flatten_nodes_ref(idx, len), | ||
315 | }) | ||
316 | .flatten() | ||
317 | } | ||
93 | } | 318 | } |
94 | 319 | ||
95 | fn match_tokens(res: &mut Match, pattern: &MetaTemplate, src: &mut TtIter) { | 320 | #[derive(Debug, Clone)] |
96 | for op in pattern.iter() { | 321 | struct MatchState<'t> { |
97 | match op { | 322 | /// The position of the "dot" in this matcher |
98 | Op::Leaf(lhs) => { | 323 | dot: OpDelimitedIter<'t>, |
99 | if let Err(err) = match_leaf(lhs, src) { | 324 | |
100 | res.add_err(err); | 325 | /// Token subtree stack |
101 | continue; | 326 | /// When matching against matchers with nested delimited submatchers (e.g., `pat ( pat ( .. ) |
327 | /// pat ) pat`), we need to keep track of the matchers we are descending into. This stack does | ||
328 | /// that where the bottom of the stack is the outermost matcher. | ||
329 | stack: SmallVec<[OpDelimitedIter<'t>; 4]>, | ||
330 | |||
331 | /// The "parent" matcher position if we are in a repetition. That is, the matcher position just | ||
332 | /// before we enter the repetition. | ||
333 | up: Option<Box<MatchState<'t>>>, | ||
334 | |||
335 | /// The separator if we are in a repetition. | ||
336 | sep: Option<Separator>, | ||
337 | |||
338 | /// The KleeneOp of this sequence if we are in a repetition. | ||
339 | sep_kind: Option<RepeatKind>, | ||
340 | |||
341 | /// Number of tokens of seperator parsed | ||
342 | sep_parsed: Option<usize>, | ||
343 | |||
344 | /// Matched meta variables bindings | ||
345 | bindings: BindingsIdx, | ||
346 | |||
347 | /// Cached result of meta variable parsing | ||
348 | meta_result: Option<(TtIter<'t>, ExpandResult<Option<Fragment>>)>, | ||
349 | |||
350 | /// Is error occuried in this state, will `poised` to "parent" | ||
351 | is_error: bool, | ||
352 | } | ||
353 | |||
354 | /// Process the matcher positions of `cur_items` until it is empty. In the process, this will | ||
355 | /// produce more items in `next_items`, `eof_items`, and `bb_items`. | ||
356 | /// | ||
357 | /// For more info about the how this happens, see the module-level doc comments and the inline | ||
358 | /// comments of this function. | ||
359 | /// | ||
360 | /// # Parameters | ||
361 | /// | ||
362 | /// - `src`: the current token of the parser. | ||
363 | /// - `stack`: the "parent" frames of the token tree | ||
364 | /// - `res`: the match result to store errors | ||
365 | /// - `cur_items`: the set of current items to be processed. This should be empty by the end of a | ||
366 | /// successful execution of this function. | ||
367 | /// - `next_items`: the set of newly generated items. These are used to replenish `cur_items` in | ||
368 | /// the function `parse`. | ||
369 | /// - `eof_items`: the set of items that would be valid if this was the EOF. | ||
370 | /// - `bb_items`: the set of items that are waiting for the black-box parser. | ||
371 | /// - `error_items`: the set of items in errors, used for error-resilient parsing | ||
372 | fn match_loop_inner<'t>( | ||
373 | src: TtIter<'t>, | ||
374 | stack: &[TtIter<'t>], | ||
375 | res: &mut Match, | ||
376 | bindings_builder: &mut BindingsBuilder, | ||
377 | cur_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
378 | bb_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
379 | next_items: &mut Vec<MatchState<'t>>, | ||
380 | eof_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
381 | error_items: &mut SmallVec<[MatchState<'t>; 1]>, | ||
382 | ) { | ||
383 | macro_rules! try_push { | ||
384 | ($items: expr, $it:expr) => { | ||
385 | if $it.is_error { | ||
386 | error_items.push($it); | ||
387 | } else { | ||
388 | $items.push($it); | ||
389 | } | ||
390 | }; | ||
391 | } | ||
392 | |||
393 | while let Some(mut item) = cur_items.pop() { | ||
394 | while item.dot.is_eof() { | ||
395 | match item.stack.pop() { | ||
396 | Some(frame) => { | ||
397 | item.dot = frame; | ||
398 | item.dot.next(); | ||
102 | } | 399 | } |
400 | None => break, | ||
103 | } | 401 | } |
104 | Op::Subtree { tokens, delimiter: delim } => { | 402 | } |
105 | let rhs = match src.expect_subtree() { | 403 | let op = match item.dot.peek() { |
106 | Ok(s) => s, | 404 | None => { |
107 | Err(()) => { | 405 | // We are at or past the end of the matcher of `item`. |
108 | res.add_err(err!("expected subtree")); | 406 | if item.up.is_some() { |
109 | continue; | 407 | if item.sep_parsed.is_none() { |
408 | // Get the `up` matcher | ||
409 | let mut new_pos = *item.up.clone().unwrap(); | ||
410 | new_pos.bindings = bindings_builder.copy(&new_pos.bindings); | ||
411 | // Add matches from this repetition to the `matches` of `up` | ||
412 | bindings_builder.push_nested(&mut new_pos.bindings, &item.bindings); | ||
413 | |||
414 | // Move the "dot" past the repetition in `up` | ||
415 | new_pos.dot.next(); | ||
416 | new_pos.is_error = new_pos.is_error || item.is_error; | ||
417 | cur_items.push(new_pos); | ||
418 | } | ||
419 | |||
420 | // Check if we need a separator. | ||
421 | // We check the separator one by one | ||
422 | let sep_idx = *item.sep_parsed.as_ref().unwrap_or(&0); | ||
423 | let sep_len = item.sep.as_ref().map_or(0, Separator::tt_count); | ||
424 | if item.sep.is_some() && sep_idx != sep_len { | ||
425 | let sep = item.sep.as_ref().unwrap(); | ||
426 | if src.clone().expect_separator(&sep, sep_idx) { | ||
427 | item.dot.next(); | ||
428 | item.sep_parsed = Some(sep_idx + 1); | ||
429 | try_push!(next_items, item); | ||
430 | } | ||
110 | } | 431 | } |
111 | }; | 432 | // We don't need a separator. Move the "dot" back to the beginning of the matcher |
112 | if delim.map(|it| it.kind) != rhs.delimiter_kind() { | 433 | // and try to match again UNLESS we are only allowed to have _one_ repetition. |
113 | res.add_err(err!("mismatched delimiter")); | 434 | else if item.sep_kind != Some(RepeatKind::ZeroOrOne) { |
114 | continue; | 435 | item.dot = item.dot.reset(); |
436 | item.sep_parsed = None; | ||
437 | bindings_builder.push_default(&mut item.bindings); | ||
438 | cur_items.push(item); | ||
439 | } | ||
440 | } else { | ||
441 | // If we are not in a repetition, then being at the end of a matcher means that we have | ||
442 | // reached the potential end of the input. | ||
443 | try_push!(eof_items, item); | ||
115 | } | 444 | } |
116 | let mut src = TtIter::new(rhs); | 445 | continue; |
117 | match_tokens(res, tokens, &mut src); | 446 | } |
118 | if src.len() > 0 { | 447 | Some(it) => it, |
119 | res.add_err(err!("leftover tokens")); | 448 | }; |
449 | |||
450 | // We are in the middle of a matcher. | ||
451 | match op { | ||
452 | OpDelimited::Op(Op::Repeat { tokens, kind, separator }) => { | ||
453 | if matches!(kind, RepeatKind::ZeroOrMore | RepeatKind::ZeroOrOne) { | ||
454 | let mut new_item = item.clone(); | ||
455 | new_item.bindings = bindings_builder.copy(&new_item.bindings); | ||
456 | new_item.dot.next(); | ||
457 | let mut vars = Vec::new(); | ||
458 | collect_vars(&mut vars, tokens); | ||
459 | for var in vars { | ||
460 | bindings_builder.push_empty(&mut new_item.bindings, &var); | ||
461 | } | ||
462 | cur_items.push(new_item); | ||
120 | } | 463 | } |
464 | cur_items.push(MatchState { | ||
465 | dot: tokens.iter_delimited(None), | ||
466 | stack: Default::default(), | ||
467 | up: Some(Box::new(item)), | ||
468 | sep: separator.clone(), | ||
469 | sep_kind: Some(*kind), | ||
470 | sep_parsed: None, | ||
471 | bindings: bindings_builder.alloc(), | ||
472 | meta_result: None, | ||
473 | is_error: false, | ||
474 | }) | ||
121 | } | 475 | } |
122 | Op::Var { name, kind, .. } => { | 476 | OpDelimited::Op(Op::Subtree { tokens, delimiter }) => { |
123 | let kind = match kind { | 477 | if let Ok(subtree) = src.clone().expect_subtree() { |
124 | Some(k) => k, | 478 | if subtree.delimiter_kind() == delimiter.map(|it| it.kind) { |
125 | None => { | 479 | item.stack.push(item.dot); |
126 | res.add_err(ExpandError::UnexpectedToken); | 480 | item.dot = tokens.iter_delimited(delimiter.as_ref()); |
127 | continue; | 481 | cur_items.push(item); |
128 | } | 482 | } |
129 | }; | 483 | } |
130 | let ExpandResult { value: matched, err: match_err } = | 484 | } |
131 | match_meta_var(kind.as_str(), src); | 485 | OpDelimited::Op(Op::Var { kind, name, .. }) => { |
132 | match matched { | 486 | if let Some(kind) = kind { |
487 | let mut fork = src.clone(); | ||
488 | let match_res = match_meta_var(kind.as_str(), &mut fork); | ||
489 | match match_res.err { | ||
490 | None => { | ||
491 | // Some meta variables are optional (e.g. vis) | ||
492 | if match_res.value.is_some() { | ||
493 | item.meta_result = Some((fork, match_res)); | ||
494 | try_push!(bb_items, item); | ||
495 | } else { | ||
496 | bindings_builder.push_optional(&mut item.bindings, &name); | ||
497 | item.dot.next(); | ||
498 | cur_items.push(item); | ||
499 | } | ||
500 | } | ||
501 | Some(err) => { | ||
502 | res.add_err(err); | ||
503 | match match_res.value { | ||
504 | Some(fragment) => { | ||
505 | bindings_builder.push_fragment( | ||
506 | &mut item.bindings, | ||
507 | &name, | ||
508 | fragment, | ||
509 | ); | ||
510 | } | ||
511 | _ => {} | ||
512 | } | ||
513 | item.is_error = true; | ||
514 | error_items.push(item); | ||
515 | } | ||
516 | } | ||
517 | } | ||
518 | } | ||
519 | OpDelimited::Op(Op::Leaf(leaf)) => { | ||
520 | if let Err(err) = match_leaf(&leaf, &mut src.clone()) { | ||
521 | res.add_err(err); | ||
522 | item.is_error = true; | ||
523 | } else { | ||
524 | item.dot.next(); | ||
525 | } | ||
526 | try_push!(next_items, item); | ||
527 | } | ||
528 | OpDelimited::Open => { | ||
529 | if matches!(src.clone().next(), Some(tt::TokenTree::Subtree(..))) { | ||
530 | item.dot.next(); | ||
531 | try_push!(next_items, item); | ||
532 | } | ||
533 | } | ||
534 | OpDelimited::Close => { | ||
535 | let is_delim_closed = src.peek_n(0).is_none() && !stack.is_empty(); | ||
536 | if is_delim_closed { | ||
537 | item.dot.next(); | ||
538 | try_push!(next_items, item); | ||
539 | } | ||
540 | } | ||
541 | } | ||
542 | } | ||
543 | } | ||
544 | |||
545 | fn match_loop(pattern: &MetaTemplate, src: &tt::Subtree) -> Match { | ||
546 | let mut src = TtIter::new(src); | ||
547 | let mut stack: SmallVec<[TtIter; 1]> = SmallVec::new(); | ||
548 | let mut res = Match::default(); | ||
549 | let mut error_reover_item = None; | ||
550 | |||
551 | let mut bindings_builder = BindingsBuilder::default(); | ||
552 | |||
553 | let mut cur_items = smallvec![MatchState { | ||
554 | dot: pattern.iter_delimited(None), | ||
555 | stack: Default::default(), | ||
556 | up: None, | ||
557 | sep: None, | ||
558 | sep_kind: None, | ||
559 | sep_parsed: None, | ||
560 | bindings: bindings_builder.alloc(), | ||
561 | is_error: false, | ||
562 | meta_result: None, | ||
563 | }]; | ||
564 | |||
565 | let mut next_items = vec![]; | ||
566 | |||
567 | loop { | ||
568 | let mut bb_items = SmallVec::new(); | ||
569 | let mut eof_items = SmallVec::new(); | ||
570 | let mut error_items = SmallVec::new(); | ||
571 | |||
572 | stdx::always!(next_items.is_empty()); | ||
573 | |||
574 | match_loop_inner( | ||
575 | src.clone(), | ||
576 | &stack, | ||
577 | &mut res, | ||
578 | &mut bindings_builder, | ||
579 | &mut cur_items, | ||
580 | &mut bb_items, | ||
581 | &mut next_items, | ||
582 | &mut eof_items, | ||
583 | &mut error_items, | ||
584 | ); | ||
585 | stdx::always!(cur_items.is_empty()); | ||
586 | |||
587 | if error_items.len() > 0 { | ||
588 | error_reover_item = error_items.pop().map(|it| it.bindings); | ||
589 | } else if eof_items.len() > 0 { | ||
590 | error_reover_item = Some(eof_items[0].bindings.clone()); | ||
591 | } | ||
592 | |||
593 | // We need to do some post processing after the `match_loop_inner`. | ||
594 | // If we reached the EOF, check that there is EXACTLY ONE possible matcher. Otherwise, | ||
595 | // either the parse is ambiguous (which should never happen) or there is a syntax error. | ||
596 | if src.peek_n(0).is_none() && stack.is_empty() { | ||
597 | if eof_items.len() == 1 { | ||
598 | // remove all errors, because it is the correct answer ! | ||
599 | res = Match::default(); | ||
600 | res.bindings = bindings_builder.build(&eof_items[0].bindings); | ||
601 | } else { | ||
602 | // Error recovery | ||
603 | if error_reover_item.is_some() { | ||
604 | res.bindings = bindings_builder.build(&error_reover_item.unwrap()); | ||
605 | } | ||
606 | res.add_err(ExpandError::UnexpectedToken); | ||
607 | } | ||
608 | return res; | ||
609 | } | ||
610 | |||
611 | // If there are no possible next positions AND we aren't waiting for the black-box parser, | ||
612 | // then there is a syntax error. | ||
613 | // | ||
614 | // Another possibility is that we need to call out to parse some rust nonterminal | ||
615 | // (black-box) parser. However, if there is not EXACTLY ONE of these, something is wrong. | ||
616 | if (bb_items.is_empty() && next_items.is_empty()) | ||
617 | || (!bb_items.is_empty() && !next_items.is_empty()) | ||
618 | || bb_items.len() > 1 | ||
619 | { | ||
620 | res.unmatched_tts += src.len(); | ||
621 | while let Some(it) = stack.pop() { | ||
622 | src = it; | ||
623 | res.unmatched_tts += src.len(); | ||
624 | } | ||
625 | res.add_err(err!("leftover tokens")); | ||
626 | |||
627 | if let Some(error_reover_item) = error_reover_item { | ||
628 | res.bindings = bindings_builder.build(&error_reover_item); | ||
629 | } | ||
630 | return res; | ||
631 | } | ||
632 | // Dump all possible `next_items` into `cur_items` for the next iteration. | ||
633 | else if !next_items.is_empty() { | ||
634 | // Now process the next token | ||
635 | cur_items.extend(next_items.drain(..)); | ||
636 | |||
637 | match src.next() { | ||
638 | Some(tt::TokenTree::Subtree(subtree)) => { | ||
639 | stack.push(src.clone()); | ||
640 | src = TtIter::new(subtree); | ||
641 | } | ||
642 | None if !stack.is_empty() => src = stack.pop().unwrap(), | ||
643 | _ => (), | ||
644 | } | ||
645 | } | ||
646 | // Finally, we have the case where we need to call the black-box parser to get some | ||
647 | // nonterminal. | ||
648 | else { | ||
649 | stdx::always!(bb_items.len() == 1); | ||
650 | let mut item = bb_items.pop().unwrap(); | ||
651 | |||
652 | if let Some(OpDelimited::Op(Op::Var { name, .. })) = item.dot.peek() { | ||
653 | let (iter, match_res) = item.meta_result.take().unwrap(); | ||
654 | match match_res.value { | ||
133 | Some(fragment) => { | 655 | Some(fragment) => { |
134 | res.bindings.inner.insert(name.clone(), Binding::Fragment(fragment)); | 656 | bindings_builder.push_fragment(&mut item.bindings, &name, fragment); |
657 | } | ||
658 | None if match_res.err.is_none() => { | ||
659 | bindings_builder.push_optional(&mut item.bindings, &name); | ||
135 | } | 660 | } |
136 | None if match_err.is_none() => res.bindings.push_optional(name), | ||
137 | _ => {} | 661 | _ => {} |
138 | } | 662 | } |
139 | if let Some(err) = match_err { | 663 | if let Some(err) = match_res.err { |
140 | res.add_err(err); | 664 | res.add_err(err); |
141 | } | 665 | } |
666 | src = iter.clone(); | ||
667 | item.dot.next(); | ||
668 | } else { | ||
669 | unreachable!() | ||
142 | } | 670 | } |
143 | Op::Repeat { tokens: subtree, kind, separator } => { | 671 | cur_items.push(item); |
144 | match_repeat(res, subtree, *kind, separator, src); | ||
145 | } | ||
146 | } | 672 | } |
673 | stdx::always!(!cur_items.is_empty()); | ||
147 | } | 674 | } |
148 | } | 675 | } |
149 | 676 | ||
@@ -175,73 +702,6 @@ fn match_leaf(lhs: &tt::Leaf, src: &mut TtIter) -> Result<(), ExpandError> { | |||
175 | Ok(()) | 702 | Ok(()) |
176 | } | 703 | } |
177 | 704 | ||
178 | fn match_repeat( | ||
179 | res: &mut Match, | ||
180 | pattern: &MetaTemplate, | ||
181 | kind: RepeatKind, | ||
182 | separator: &Option<Separator>, | ||
183 | src: &mut TtIter, | ||
184 | ) { | ||
185 | // Dirty hack to make macro-expansion terminate. | ||
186 | // This should be replaced by a proper macro-by-example implementation | ||
187 | let mut limit = 65536; | ||
188 | let mut counter = 0; | ||
189 | |||
190 | for i in 0.. { | ||
191 | let mut fork = src.clone(); | ||
192 | |||
193 | if let Some(separator) = &separator { | ||
194 | if i != 0 && !fork.eat_separator(separator) { | ||
195 | break; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | let mut nested = Match::default(); | ||
200 | match_tokens(&mut nested, pattern, &mut fork); | ||
201 | if nested.err.is_none() { | ||
202 | limit -= 1; | ||
203 | if limit == 0 { | ||
204 | log::warn!( | ||
205 | "match_lhs exceeded repeat pattern limit => {:#?}\n{:#?}\n{:#?}\n{:#?}", | ||
206 | pattern, | ||
207 | src, | ||
208 | kind, | ||
209 | separator | ||
210 | ); | ||
211 | break; | ||
212 | } | ||
213 | *src = fork; | ||
214 | |||
215 | if let Err(err) = res.bindings.push_nested(counter, nested.bindings) { | ||
216 | res.add_err(err); | ||
217 | } | ||
218 | counter += 1; | ||
219 | if counter == 1 { | ||
220 | if let RepeatKind::ZeroOrOne = kind { | ||
221 | break; | ||
222 | } | ||
223 | } | ||
224 | } else { | ||
225 | break; | ||
226 | } | ||
227 | } | ||
228 | |||
229 | match (kind, counter) { | ||
230 | (RepeatKind::OneOrMore, 0) => { | ||
231 | res.add_err(ExpandError::UnexpectedToken); | ||
232 | } | ||
233 | (_, 0) => { | ||
234 | // Collect all empty variables in subtrees | ||
235 | let mut vars = Vec::new(); | ||
236 | collect_vars(&mut vars, pattern); | ||
237 | for var in vars { | ||
238 | res.bindings.push_empty(&var) | ||
239 | } | ||
240 | } | ||
241 | _ => (), | ||
242 | } | ||
243 | } | ||
244 | |||
245 | fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragment>> { | 705 | fn match_meta_var(kind: &str, input: &mut TtIter) -> ExpandResult<Option<Fragment>> { |
246 | let fragment = match kind { | 706 | let fragment = match kind { |
247 | "path" => Path, | 707 | "path" => Path, |
@@ -305,14 +765,14 @@ fn collect_vars(buf: &mut Vec<SmolStr>, pattern: &MetaTemplate) { | |||
305 | } | 765 | } |
306 | 766 | ||
307 | impl<'a> TtIter<'a> { | 767 | impl<'a> TtIter<'a> { |
308 | fn eat_separator(&mut self, separator: &Separator) -> bool { | 768 | fn expect_separator(&mut self, separator: &Separator, idx: usize) -> bool { |
309 | let mut fork = self.clone(); | 769 | let mut fork = self.clone(); |
310 | let ok = match separator { | 770 | let ok = match separator { |
311 | Separator::Ident(lhs) => match fork.expect_ident() { | 771 | Separator::Ident(lhs) if idx == 0 => match fork.expect_ident() { |
312 | Ok(rhs) => rhs.text == lhs.text, | 772 | Ok(rhs) => rhs.text == lhs.text, |
313 | _ => false, | 773 | _ => false, |
314 | }, | 774 | }, |
315 | Separator::Literal(lhs) => match fork.expect_literal() { | 775 | Separator::Literal(lhs) if idx == 0 => match fork.expect_literal() { |
316 | Ok(rhs) => match rhs { | 776 | Ok(rhs) => match rhs { |
317 | tt::Leaf::Literal(rhs) => rhs.text == lhs.text, | 777 | tt::Leaf::Literal(rhs) => rhs.text == lhs.text, |
318 | tt::Leaf::Ident(rhs) => rhs.text == lhs.text, | 778 | tt::Leaf::Ident(rhs) => rhs.text == lhs.text, |
@@ -320,10 +780,11 @@ impl<'a> TtIter<'a> { | |||
320 | }, | 780 | }, |
321 | _ => false, | 781 | _ => false, |
322 | }, | 782 | }, |
323 | Separator::Puncts(lhss) => lhss.iter().all(|lhs| match fork.expect_punct() { | 783 | Separator::Puncts(lhss) if idx < lhss.len() => match fork.expect_punct() { |
324 | Ok(rhs) => rhs.char == lhs.char, | 784 | Ok(rhs) => rhs.char == lhss[idx].char, |
325 | _ => false, | 785 | _ => false, |
326 | }), | 786 | }, |
787 | _ => false, | ||
327 | }; | 788 | }; |
328 | if ok { | 789 | if ok { |
329 | *self = fork; | 790 | *self = fork; |
@@ -409,68 +870,6 @@ impl<'a> TtIter<'a> { | |||
409 | .into()) | 870 | .into()) |
410 | } | 871 | } |
411 | 872 | ||
412 | fn expect_fragment( | ||
413 | &mut self, | ||
414 | fragment_kind: parser::FragmentKind, | ||
415 | ) -> ExpandResult<Option<tt::TokenTree>> { | ||
416 | struct OffsetTokenSink<'a> { | ||
417 | cursor: Cursor<'a>, | ||
418 | error: bool, | ||
419 | } | ||
420 | |||
421 | impl<'a> TreeSink for OffsetTokenSink<'a> { | ||
422 | fn token(&mut self, kind: SyntaxKind, mut n_tokens: u8) { | ||
423 | if kind == SyntaxKind::LIFETIME_IDENT { | ||
424 | n_tokens = 2; | ||
425 | } | ||
426 | for _ in 0..n_tokens { | ||
427 | self.cursor = self.cursor.bump_subtree(); | ||
428 | } | ||
429 | } | ||
430 | fn start_node(&mut self, _kind: SyntaxKind) {} | ||
431 | fn finish_node(&mut self) {} | ||
432 | fn error(&mut self, _error: parser::ParseError) { | ||
433 | self.error = true; | ||
434 | } | ||
435 | } | ||
436 | |||
437 | let buffer = TokenBuffer::from_tokens(&self.inner.as_slice()); | ||
438 | let mut src = SubtreeTokenSource::new(&buffer); | ||
439 | let mut sink = OffsetTokenSink { cursor: buffer.begin(), error: false }; | ||
440 | |||
441 | parser::parse_fragment(&mut src, &mut sink, fragment_kind); | ||
442 | |||
443 | let mut err = None; | ||
444 | if !sink.cursor.is_root() || sink.error { | ||
445 | err = Some(err!("expected {:?}", fragment_kind)); | ||
446 | } | ||
447 | |||
448 | let mut curr = buffer.begin(); | ||
449 | let mut res = vec![]; | ||
450 | |||
451 | if sink.cursor.is_root() { | ||
452 | while curr != sink.cursor { | ||
453 | if let Some(token) = curr.token_tree() { | ||
454 | res.push(token); | ||
455 | } | ||
456 | curr = curr.bump(); | ||
457 | } | ||
458 | } | ||
459 | self.inner = self.inner.as_slice()[res.len()..].iter(); | ||
460 | if res.len() == 0 && err.is_none() { | ||
461 | err = Some(err!("no tokens consumed")); | ||
462 | } | ||
463 | let res = match res.len() { | ||
464 | 1 => Some(res[0].cloned()), | ||
465 | 0 => None, | ||
466 | _ => Some(tt::TokenTree::Subtree(tt::Subtree { | ||
467 | delimiter: None, | ||
468 | token_trees: res.into_iter().map(|it| it.cloned()).collect(), | ||
469 | })), | ||
470 | }; | ||
471 | ExpandResult { value: res, err } | ||
472 | } | ||
473 | |||
474 | fn eat_vis(&mut self) -> Option<tt::TokenTree> { | 873 | fn eat_vis(&mut self) -> Option<tt::TokenTree> { |
475 | let mut fork = self.clone(); | 874 | let mut fork = self.clone(); |
476 | match fork.expect_fragment(Visibility) { | 875 | match fork.expect_fragment(Visibility) { |