aboutsummaryrefslogtreecommitdiff
path: root/crates/libsyntax2/src/grammar
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2018-09-08 17:10:20 +0100
committerAleksey Kladov <[email protected]>2018-09-08 17:10:40 +0100
commita5c333c3ed98d539fcadcc723e992f5295d22d5c (patch)
tree30ced64ce9e769e1dfb1242685bb9c46bfd92f19 /crates/libsyntax2/src/grammar
parent3ab9f4ad7fa44cb20c0a13ae69f76ee13e4f53d2 (diff)
Fix yet another parser infinite loop
This commit is an example of fixing a common parser error: infinite loop due to error recovery. This error typically happens when we parse a list of items and fail to parse a specific item at the current position. One choices is to skip a token and try to parse a list item at the next position. This is a good, but not universal, default. When parsing a list of arguments in a function call, you, for example, don't want to skip over `fn`, because it's most likely that it is a function declaration, and not a mistyped arg: ``` fn foo() { quux(1, 2 fn bar() { } ``` Another choice is to bail out of the loop immediately, but it isn't perfect either: sometimes skipping over garbage helps: ``` quux(1, foo:, 92) // should skip over `:`, b/c that's part of `foo::bar` ``` In general, parser tries to balance these two cases, though we don't have a definitive strategy yet. However, if the parser accidentally neither skips over a token, nor breaks out of the loop, then it becomes stuck in the loop infinitely (there's an internal counter to self-check this situation and panic though), and that's exactly what is demonstrated by the test. To fix such situation, first of all, add the test case to tests/data/parser/{err,fuzz-failures}. Then, run ``` RUST_BACKTRACE=short cargo test --package libsyntax2 ```` to verify that parser indeed panics, and to get an idea what grammar production is the culprit (look for `_list` functions!). In this case, I see ``` 10: libsyntax2::grammar::expressions::atom::match_arm_list at crates/libsyntax2/src/grammar/expressions/atom.rs:309 ``` and that's look like it might be a culprit. I verify it by adding `eprintln!("loopy {:?}", p.current());` and indeed I see that this is printed repeatedly. Diagnosing this a bit shows that the problem is that `pattern::pattern` function does not consume anything if the next token is `let`. That is a good default to make cases like ``` let let foo = 92; ``` where the user hasn't typed the pattern yet, to parse in a reasonable they correctly. For match arms, pretty much the single thing we expect is a pattern, so, for a fix, I introduce a special variant of pattern that does not do recovery.
Diffstat (limited to 'crates/libsyntax2/src/grammar')
-rw-r--r--crates/libsyntax2/src/grammar/expressions/atom.rs6
-rw-r--r--crates/libsyntax2/src/grammar/patterns.rs12
2 files changed, 10 insertions, 8 deletions
diff --git a/crates/libsyntax2/src/grammar/expressions/atom.rs b/crates/libsyntax2/src/grammar/expressions/atom.rs
index fdb4718ba..8335c700f 100644
--- a/crates/libsyntax2/src/grammar/expressions/atom.rs
+++ b/crates/libsyntax2/src/grammar/expressions/atom.rs
@@ -323,11 +323,9 @@ fn match_arm_list(p: &mut Parser) {
323// } 323// }
324fn match_arm(p: &mut Parser) -> BlockLike { 324fn match_arm(p: &mut Parser) -> BlockLike {
325 let m = p.start(); 325 let m = p.start();
326 loop { 326 patterns::pattern_r(p, TokenSet::EMPTY);
327 while p.eat(PIPE) {
327 patterns::pattern(p); 328 patterns::pattern(p);
328 if !p.eat(PIPE) {
329 break;
330 }
331 } 329 }
332 if p.eat(IF_KW) { 330 if p.eat(IF_KW) {
333 expr_no_struct(p); 331 expr_no_struct(p);
diff --git a/crates/libsyntax2/src/grammar/patterns.rs b/crates/libsyntax2/src/grammar/patterns.rs
index 6dd3ab2fa..29a55cb46 100644
--- a/crates/libsyntax2/src/grammar/patterns.rs
+++ b/crates/libsyntax2/src/grammar/patterns.rs
@@ -8,7 +8,11 @@ pub(super) const PATTERN_FIRST: TokenSet =
8 ]; 8 ];
9 9
10pub(super) fn pattern(p: &mut Parser) { 10pub(super) fn pattern(p: &mut Parser) {
11 if let Some(lhs) = atom_pat(p) { 11 pattern_r(p, PAT_RECOVERY_SET)
12}
13
14pub(super) fn pattern_r(p: &mut Parser, recovery_set: TokenSet) {
15 if let Some(lhs) = atom_pat(p, recovery_set) {
12 // test range_pat 16 // test range_pat
13 // fn main() { 17 // fn main() {
14 // match 92 { 0 ... 100 => () } 18 // match 92 { 0 ... 100 => () }
@@ -16,7 +20,7 @@ pub(super) fn pattern(p: &mut Parser) {
16 if p.at(DOTDOTDOT) { 20 if p.at(DOTDOTDOT) {
17 let m = lhs.precede(p); 21 let m = lhs.precede(p);
18 p.bump(); 22 p.bump();
19 atom_pat(p); 23 atom_pat(p, recovery_set);
20 m.complete(p, RANGE_PAT); 24 m.complete(p, RANGE_PAT);
21 } 25 }
22 } 26 }
@@ -26,7 +30,7 @@ const PAT_RECOVERY_SET: TokenSet =
26 token_set![LET_KW, IF_KW, WHILE_KW, LOOP_KW, MATCH_KW, R_PAREN, COMMA]; 30 token_set![LET_KW, IF_KW, WHILE_KW, LOOP_KW, MATCH_KW, R_PAREN, COMMA];
27 31
28 32
29fn atom_pat(p: &mut Parser) -> Option<CompletedMarker> { 33fn atom_pat(p: &mut Parser, recovery_set: TokenSet) -> Option<CompletedMarker> {
30 let la0 = p.nth(0); 34 let la0 = p.nth(0);
31 let la1 = p.nth(1); 35 let la1 = p.nth(1);
32 if la0 == REF_KW || la0 == MUT_KW 36 if la0 == REF_KW || la0 == MUT_KW
@@ -56,7 +60,7 @@ fn atom_pat(p: &mut Parser) -> Option<CompletedMarker> {
56 L_PAREN => tuple_pat(p), 60 L_PAREN => tuple_pat(p),
57 L_BRACK => slice_pat(p), 61 L_BRACK => slice_pat(p),
58 _ => { 62 _ => {
59 p.err_recover("expected pattern", PAT_RECOVERY_SET); 63 p.err_recover("expected pattern", recovery_set);
60 return None; 64 return None;
61 } 65 }
62 }; 66 };