diff options
author | Aleksey Kladov <[email protected]> | 2018-09-08 17:10:20 +0100 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2018-09-08 17:10:40 +0100 |
commit | a5c333c3ed98d539fcadcc723e992f5295d22d5c (patch) | |
tree | 30ced64ce9e769e1dfb1242685bb9c46bfd92f19 /crates/libsyntax2/src/grammar | |
parent | 3ab9f4ad7fa44cb20c0a13ae69f76ee13e4f53d2 (diff) |
Fix yet another parser infinite loop
This commit is an example of fixing a common parser error: infinite
loop due to error recovery.
This error typically happens when we parse a list of items and fail to
parse a specific item at the current position.
One choices is to skip a token and try to parse a list item at the
next position. This is a good, but not universal, default. When
parsing a list of arguments in a function call, you, for example,
don't want to skip over `fn`, because it's most likely that it is a
function declaration, and not a mistyped arg:
```
fn foo() {
quux(1, 2
fn bar() {
}
```
Another choice is to bail out of the loop immediately, but it isn't
perfect either: sometimes skipping over garbage helps:
```
quux(1, foo:, 92) // should skip over `:`, b/c that's part of `foo::bar`
```
In general, parser tries to balance these two cases, though we don't
have a definitive strategy yet.
However, if the parser accidentally neither skips over a token, nor
breaks out of the loop, then it becomes stuck in the loop infinitely
(there's an internal counter to self-check this situation and panic
though), and that's exactly what is demonstrated by the test.
To fix such situation, first of all, add the test case to tests/data/parser/{err,fuzz-failures}.
Then, run
```
RUST_BACKTRACE=short cargo test --package libsyntax2
````
to verify that parser indeed panics, and to get an idea what grammar
production is the culprit (look for `_list` functions!).
In this case, I see
```
10: libsyntax2::grammar::expressions::atom::match_arm_list
at crates/libsyntax2/src/grammar/expressions/atom.rs:309
```
and that's look like it might be a culprit. I verify it by adding
`eprintln!("loopy {:?}", p.current());` and indeed I see that this is
printed repeatedly.
Diagnosing this a bit shows that the problem is that
`pattern::pattern` function does not consume anything if the next
token is `let`. That is a good default to make cases like
```
let
let foo = 92;
```
where the user hasn't typed the pattern yet, to parse in a reasonable
they correctly.
For match arms, pretty much the single thing we expect is a pattern,
so, for a fix, I introduce a special variant of pattern that does not
do recovery.
Diffstat (limited to 'crates/libsyntax2/src/grammar')
-rw-r--r-- | crates/libsyntax2/src/grammar/expressions/atom.rs | 6 | ||||
-rw-r--r-- | crates/libsyntax2/src/grammar/patterns.rs | 12 |
2 files changed, 10 insertions, 8 deletions
diff --git a/crates/libsyntax2/src/grammar/expressions/atom.rs b/crates/libsyntax2/src/grammar/expressions/atom.rs index fdb4718ba..8335c700f 100644 --- a/crates/libsyntax2/src/grammar/expressions/atom.rs +++ b/crates/libsyntax2/src/grammar/expressions/atom.rs | |||
@@ -323,11 +323,9 @@ fn match_arm_list(p: &mut Parser) { | |||
323 | // } | 323 | // } |
324 | fn match_arm(p: &mut Parser) -> BlockLike { | 324 | fn match_arm(p: &mut Parser) -> BlockLike { |
325 | let m = p.start(); | 325 | let m = p.start(); |
326 | loop { | 326 | patterns::pattern_r(p, TokenSet::EMPTY); |
327 | while p.eat(PIPE) { | ||
327 | patterns::pattern(p); | 328 | patterns::pattern(p); |
328 | if !p.eat(PIPE) { | ||
329 | break; | ||
330 | } | ||
331 | } | 329 | } |
332 | if p.eat(IF_KW) { | 330 | if p.eat(IF_KW) { |
333 | expr_no_struct(p); | 331 | expr_no_struct(p); |
diff --git a/crates/libsyntax2/src/grammar/patterns.rs b/crates/libsyntax2/src/grammar/patterns.rs index 6dd3ab2fa..29a55cb46 100644 --- a/crates/libsyntax2/src/grammar/patterns.rs +++ b/crates/libsyntax2/src/grammar/patterns.rs | |||
@@ -8,7 +8,11 @@ pub(super) const PATTERN_FIRST: TokenSet = | |||
8 | ]; | 8 | ]; |
9 | 9 | ||
10 | pub(super) fn pattern(p: &mut Parser) { | 10 | pub(super) fn pattern(p: &mut Parser) { |
11 | if let Some(lhs) = atom_pat(p) { | 11 | pattern_r(p, PAT_RECOVERY_SET) |
12 | } | ||
13 | |||
14 | pub(super) fn pattern_r(p: &mut Parser, recovery_set: TokenSet) { | ||
15 | if let Some(lhs) = atom_pat(p, recovery_set) { | ||
12 | // test range_pat | 16 | // test range_pat |
13 | // fn main() { | 17 | // fn main() { |
14 | // match 92 { 0 ... 100 => () } | 18 | // match 92 { 0 ... 100 => () } |
@@ -16,7 +20,7 @@ pub(super) fn pattern(p: &mut Parser) { | |||
16 | if p.at(DOTDOTDOT) { | 20 | if p.at(DOTDOTDOT) { |
17 | let m = lhs.precede(p); | 21 | let m = lhs.precede(p); |
18 | p.bump(); | 22 | p.bump(); |
19 | atom_pat(p); | 23 | atom_pat(p, recovery_set); |
20 | m.complete(p, RANGE_PAT); | 24 | m.complete(p, RANGE_PAT); |
21 | } | 25 | } |
22 | } | 26 | } |
@@ -26,7 +30,7 @@ const PAT_RECOVERY_SET: TokenSet = | |||
26 | token_set![LET_KW, IF_KW, WHILE_KW, LOOP_KW, MATCH_KW, R_PAREN, COMMA]; | 30 | token_set![LET_KW, IF_KW, WHILE_KW, LOOP_KW, MATCH_KW, R_PAREN, COMMA]; |
27 | 31 | ||
28 | 32 | ||
29 | fn atom_pat(p: &mut Parser) -> Option<CompletedMarker> { | 33 | fn atom_pat(p: &mut Parser, recovery_set: TokenSet) -> Option<CompletedMarker> { |
30 | let la0 = p.nth(0); | 34 | let la0 = p.nth(0); |
31 | let la1 = p.nth(1); | 35 | let la1 = p.nth(1); |
32 | if la0 == REF_KW || la0 == MUT_KW | 36 | if la0 == REF_KW || la0 == MUT_KW |
@@ -56,7 +60,7 @@ fn atom_pat(p: &mut Parser) -> Option<CompletedMarker> { | |||
56 | L_PAREN => tuple_pat(p), | 60 | L_PAREN => tuple_pat(p), |
57 | L_BRACK => slice_pat(p), | 61 | L_BRACK => slice_pat(p), |
58 | _ => { | 62 | _ => { |
59 | p.err_recover("expected pattern", PAT_RECOVERY_SET); | 63 | p.err_recover("expected pattern", recovery_set); |
60 | return None; | 64 | return None; |
61 | } | 65 | } |
62 | }; | 66 | }; |