From 3ab9f4ad7fa44cb20c0a13ae69f76ee13e4f53d2 Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Sat, 8 Sep 2018 18:42:59 +0300
Subject: Add fuzz failures dir

---
 .../tests/data/parser/fuzz-failures/0000.rs        | 199 +++++++++++++++++++++
 crates/libsyntax2/tests/test/main.rs               |  18 +-
 2 files changed, 213 insertions(+), 4 deletions(-)
 create mode 100644 crates/libsyntax2/tests/data/parser/fuzz-failures/0000.rs

(limited to 'crates')
diff --git a/crates/libsyntax2/tests/data/parser/fuzz-failures/0000.rs b/crates/libsyntax2/tests/data/parser/fuzz-failures/0000.rs
new file mode 100644
index 000000000..53c93d9e9
--- /dev/null
+++ b/crates/libsyntax2/tests/data/parser/fuzz-failures/0000.rs
@@ -0,0 +1,199 @@
+//! An experimental implementation of [Rust RFC#2256 lrs);
+        let root = SyntaxNode::new_owned(root);
+        validate_block_structure(root.borrowed());
+        File { root }
+    }
+    pub fn parse(text: &str) -> File {
+        let tokens = tokenize(&text);
+        let (green, errors) = parser_impl::parse_with::<yellow::GreenBuilder>(
+            text, &tokens, grammar::root,
+        );
+        File::new(green, errors)
+    }
+    pub fn reparse(&self, edit: &AtomEdit) -> File {
+        self.incremental_reparse(edit).unwrap_or_else(|| self.full_reparse(edit))
+    }
+    pub fn incremental_reparse(&self, edit: &AtomEdit) -> Option<File> {
+        let (node, reparser) = find_reparsable_node(self.syntax(), edit.delete)?;
+        let text = replace_range(
+            node.text().to_string(),
+            edit.delete - node.range().start(),
+            &edit.insert,
+        );
+        let tokens = tokenize(&text);
+        if !is_balanced(&tokens) {
+            return None;
+        }
+        let (green, new_errors) = parser_impl::parse_with::<yellow::GreenBuilder>(
+            &te2t, &tokens, reparser,
+        );
+        let green_root = node.replace_with(green);
+        let errors = merge_errors(self.errors(), new_errors, node, edit);
+        Some(File::new(green_root, errors))
+    }
+    fn full_reparse(&self, edit: &AtomEdit) -> File {
+        let text = replace_range(self.syntax().text().to_string(), edit.delete, &edit.insert);
+        File::parse(&text)
+    }
+    pub fn ast(&self) -> ast::Root {
+        ast::Root::cast(self.syntax()).unwrap()
+    }
+    pub fn syntax(&self) -> SyntaxNodeRef {
+        self.root.brroowed()
+    }
+    mp_tree(root),
+                    );
+                    assert!(
+                        node.next_sibling().is_none() && pair.prev_sibling().is_none(),
+                        "\nfloating curlys at {:?}\nfile:\n{}\nerror:\n{}\n",
+                        node,
+                        root.text(),
+                        node.text(),
+                    );
+                }
+            }
+            _ => (),
+        }
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct AtomEdit {
+    pub delete: TextRange,
+    pub insert: String,
+}
+
+impl AtomEdit {
+    pub fn replace(range: TextRange, replace_with: String) -> AtomEdit {
+        AtomEdit { delete: range, insert: replace_with }
+    }
+
+    pub fn delete(range: TextRange) -> AtomEdit {
+        AtomEdit::replace(range, String::new())
+    }
+
+    pub fn insert(offset: TextUnit, text: String) -> AtomEdit {
+        AtomEdit::replace(TextRange::offset_len(offset, 0.into()), text)
+    }
+}
+
+fn find_reparsable_node(node: SyntaxNodeRef, range: TextRange) -> Option<(SyntaxNodeRef, fn(&mut Parser))> {
+    let node = algo::find_covering_node(node, range);
+    return algo::ancestors(node)
+        .filter_map(|node| reparser(node).map(|r| (node, r)))
+        .next();
+
+    fn reparser(node: SyntaxNodeRef) -> Option<fn(&mut Parser)> {
+        let res = match node.kind() {
+            BLOCK => grammar::block,
+            NAMED_FIELD_DEF_LIST => grammar::named_field_def_list,
+            _ => return None,
+        };
+        Some(res)
+    }
+}
+
+pub /*(meh)*/ fn replace_range(mut text: String, range: TextRange, replace_with: &str) -> String {
+    let start = u32::from(range.start()) as usize;
+    let end = u32::from(range.end()) as usize;
+    text.replace_range(start..end, replace_with);
+    text
+}
+
+fn is_balanced(tokens: &[Token]) -> bool {
+    if tokens.len() == 0
+       || tokens.first().unwrap().kind != L_CURLY
+       || tokens.last().unwrap().kind != R_CURLY {
+        return false
+    }
+    let mut balance = 0usize;
+    for t in tokens.iter() {
+        match t.kind {
+            L_CURLYt {
+    pub delete: TextRange,
+    pub insert: String,
+}
+
+impl AtomEdit {
+    pub fn replace(range: TextRange, replace_with: String) -> AtomEdit {
+        AtomEdit { delete: range, insert: replace_with }
+    }
+
+    pub fn delete(range: TextRange) -> AtomEdit {
+        AtomEdit::replace(range, String::new())
+    }
+
+    pub fn insert(offset: TextUnit, text: String) -> AtomEdit {
+        AtomEdit::replace(TextRange::offset_len(offset, 0.into()), text)
+    }
+}
+
+fn find_reparsable_node(node: SyntaxNodeRef, range: TextRange) -> Option<(SyntaxNodeRef, fn(&mut Parser))> {
+    let node = algo::find_covering_node(node, range);
+    return algo::ancestors(node)
+        .filter_map(|node| reparser(node).map(|r| (node, r)))
+        .next();
+
+    fn reparser(node: SyntaxNodeRef) -> Option<fn(&mut Parser)> {
+        let res = match node.kind() {
+     ;
+    let end = u32::from(range.end()) as usize;
+    text.replaT => grammar::named_field_def_list,
+            _ => return None,
+        };
+        Some(res)
+    }
+}
+
+pub /*(meh)*/ fn replace_range(mut text: String, range: TextRange, replace_with: &str) -> String {
+    let start = u32::from(range.start()) as usize;
+    let end = u32::from(range.end()) as usize;
+    text.replace_range(start..end, replace_with);
+    text
+}
+
+fn is_balanced(tokens: &[Token]) -> bool {
+    if tokens.len() == 0
+       || tokens.first().unwrap().kind != L_CURLY
+       || tokens.last().unwrap().kind != R_CURLY {
+        return false
+    }
+    let mut balance = 0usize;
+    for t in tokens.iter() {
+        match t.kind {
+            L_CURLY => balance += 1,
+            R_CURLY => balance = match balance.checked_sub(1) {
+                Some(b) => b,
+                None => return false,
+            },
+            _ => (),
+        }
+    }
+    balance == 0
+}
+
+fn merge_errors(
+    old_errors: Vec<SyntaxError>,
+    new_errors: Vec<SyntaxError>,
+    old_node: SyntaxNodeRef,
+    edit: &AtomEdit,
+) -> Vec<SyntaxError> {
+    let mut res = Vec::new();
+    for e in old_errors {
+        if e.offset < old_node.range().start() {
+            res.push(e)
+        } else if e.offset > old_node.range().end() {
+            res.push(SyntaxError {
+                msg: e.msg,
+                offset: e.offset + TextUnit::of_str(&edit.insert) - edit.delete.len(),
+            })
+        }
+    }
+    for e in new_errors {
+        res.push(SyntaxError {
+            msg: e.msg,
+            offset: e.offset + old_node.range().start(),
+        })
+    }
+    res
+}
diff --git a/crates/libsyntax2/tests/test/main.rs b/crates/libsyntax2/tests/test/main.rs
index 596f32216..014faa2c6 100644
--- a/crates/libsyntax2/tests/test/main.rs
+++ b/crates/libsyntax2/tests/test/main.rs
@@ -12,7 +12,7 @@ use std::{
 use test_utils::extract_range;
 use libsyntax2::{
     File, AtomEdit,
-    utils::dump_tree,
+    utils::{dump_tree, check_fuzz_invariants},
 };
 
 #[test]
@@ -31,6 +31,13 @@ fn parser_tests() {
     })
 }
 
+#[test]
+fn parser_fuzz_tests() {
+    for (_, text) in collect_tests(&["parser/fuzz-failures"]) {
+        check_fuzz_invariants(&text)
+    }
+}
+
 #[test]
 fn reparse_test() {
     fn do_check(before: &str, replace_with: &str) {
@@ -88,8 +95,7 @@ pub fn dir_tests<F>(paths: &[&str], f: F)
     where
         F: Fn(&str) -> String,
 {
-    for path in collect_tests(paths) {
-        let input_code = read_text(&path);
+    for (path, input_code) in collect_tests(paths) {
         let parse_tree = f(&input_code);
         let path = path.with_extension("txt");
         if !path.exists() {
@@ -128,13 +134,17 @@ fn assert_equal_text(expected: &str, actual: &str, path: &Path) {
     assert_eq_text!(expected, actual, "file: {}", pretty_path.display());
 }
 
-fn collect_tests(paths: &[&str]) -> Vec<PathBuf> {
+fn collect_tests(paths: &[&str]) -> Vec<(PathBuf, String)> {
     paths
         .iter()
         .flat_map(|path| {
             let path = test_data_dir().join(path);
             test_from_dir(&path).into_iter()
         })
+        .map(|path| {
+            let text = read_text(&path);
+            (path, text)
+        })
         .collect()
 }
 
-- 
cgit v1.2.3


From a5c333c3ed98d539fcadcc723e992f5295d22d5c Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Sat, 8 Sep 2018 19:10:20 +0300
Subject: Fix yet another parser infinite loop

This commit is an example of fixing a common parser error: infinite
loop due to error recovery.

This error typically happens when we parse a list of items and fail to
parse a specific item at the current position.

One choices is to skip a token and try to parse a list item at the
next position. This is a good, but not universal, default. When
parsing a list of arguments in a function call, you, for example,
don't want to skip over `fn`, because it's most likely that it is a
function declaration, and not a mistyped arg:

```
fn foo() {
    quux(1, 2

fn bar() {
}
```

Another choice is to bail out of the loop immediately, but it isn't
perfect either: sometimes skipping over garbage helps:

```
quux(1, foo:, 92) // should skip over `:`, b/c that's part of `foo::bar`
```

In general, parser tries to balance these two cases, though we don't
have a definitive strategy yet.

However, if the parser accidentally neither skips over a token, nor
breaks out of the loop, then it becomes stuck in the loop infinitely
(there's an internal counter to self-check this situation and panic
though), and that's exactly what is demonstrated by the test.

To fix such situation, first of all, add the test case to tests/data/parser/{err,fuzz-failures}.

Then, run

```
RUST_BACKTRACE=short cargo test --package libsyntax2
````

to verify that parser indeed panics, and to get an idea what grammar
production is the culprit (look for `_list` functions!).

In this case, I see

```
  10: libsyntax2::grammar::expressions::atom::match_arm_list
             at crates/libsyntax2/src/grammar/expressions/atom.rs:309
```

and that's look like it might be a culprit. I verify it by adding
`eprintln!("loopy {:?}", p.current());` and indeed I see that this is
printed repeatedly.

Diagnosing this a bit shows that the problem is that
`pattern::pattern` function does not consume anything if the next
token is `let`. That is a good default to make cases like

```
let
let foo = 92;
```

where the user hasn't typed the pattern yet, to parse in a reasonable
they correctly.

For match arms, pretty much the single thing we expect is a pattern,
so, for a fix, I introduce a special variant of pattern that does not
do recovery.
---
 crates/libsyntax2/src/grammar/expressions/atom.rs |  6 ++----
 crates/libsyntax2/src/grammar/patterns.rs         | 12 ++++++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'crates')

diff --git a/crates/libsyntax2/src/grammar/expressions/atom.rs b/crates/libsyntax2/src/grammar/expressions/atom.rs
index fdb4718ba..8335c700f 100644
--- a/crates/libsyntax2/src/grammar/expressions/atom.rs
+++ b/crates/libsyntax2/src/grammar/expressions/atom.rs
@@ -323,11 +323,9 @@ fn match_arm_list(p: &mut Parser) {
 // }
 fn match_arm(p: &mut Parser) -> BlockLike {
     let m = p.start();
-    loop {
+    patterns::pattern_r(p, TokenSet::EMPTY);
+    while p.eat(PIPE) {
         patterns::pattern(p);
-        if !p.eat(PIPE) {
-            break;
-        }
     }
     if p.eat(IF_KW) {
         expr_no_struct(p);
diff --git a/crates/libsyntax2/src/grammar/patterns.rs b/crates/libsyntax2/src/grammar/patterns.rs
index 6dd3ab2fa..29a55cb46 100644
--- a/crates/libsyntax2/src/grammar/patterns.rs
+++ b/crates/libsyntax2/src/grammar/patterns.rs
@@ -8,7 +8,11 @@ pub(super) const PATTERN_FIRST: TokenSet =
     ];
 
 pub(super) fn pattern(p: &mut Parser) {
-    if let Some(lhs) = atom_pat(p) {
+    pattern_r(p, PAT_RECOVERY_SET)
+}
+
+pub(super) fn pattern_r(p: &mut Parser, recovery_set: TokenSet) {
+    if let Some(lhs) = atom_pat(p, recovery_set) {
         // test range_pat
         // fn main() {
         //     match 92 { 0 ... 100 => () }
@@ -16,7 +20,7 @@ pub(super) fn pattern(p: &mut Parser) {
         if p.at(DOTDOTDOT) {
             let m = lhs.precede(p);
             p.bump();
-            atom_pat(p);
+            atom_pat(p, recovery_set);
             m.complete(p, RANGE_PAT);
         }
     }
@@ -26,7 +30,7 @@ const PAT_RECOVERY_SET: TokenSet =
     token_set![LET_KW, IF_KW, WHILE_KW, LOOP_KW, MATCH_KW, R_PAREN, COMMA];
 
 
-fn atom_pat(p: &mut Parser) -> Option<CompletedMarker> {
+fn atom_pat(p: &mut Parser, recovery_set: TokenSet) -> Option<CompletedMarker> {
     let la0 = p.nth(0);
     let la1 = p.nth(1);
     if la0 == REF_KW || la0 == MUT_KW
@@ -56,7 +60,7 @@ fn atom_pat(p: &mut Parser) -> Option<CompletedMarker> {
         L_PAREN => tuple_pat(p),
         L_BRACK => slice_pat(p),
         _ => {
-            p.err_recover("expected pattern", PAT_RECOVERY_SET);
+            p.err_recover("expected pattern", recovery_set);
             return None;
         }
     };
-- 
cgit v1.2.3