From e734190c24d2a5aca5b62c2b1ab7e6136017a25c Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 02:05:12 +0900 Subject: Refactor parser fuzz testing --- crates/ra_syntax/fuzz/Cargo.toml | 8 ++++---- crates/ra_syntax/fuzz/fuzz_targets/parser.rs | 6 +++--- crates/ra_syntax/src/fuzz.rs | 12 ++++++++++++ crates/ra_syntax/src/lib.rs | 9 ++------- crates/ra_syntax/tests/test.rs | 4 ++-- 5 files changed, 23 insertions(+), 16 deletions(-) create mode 100644 crates/ra_syntax/src/fuzz.rs diff --git a/crates/ra_syntax/fuzz/Cargo.toml b/crates/ra_syntax/fuzz/Cargo.toml index 4a255882e..c54d12813 100644 --- a/crates/ra_syntax/fuzz/Cargo.toml +++ b/crates/ra_syntax/fuzz/Cargo.toml @@ -4,14 +4,14 @@ name = "ra_syntax-fuzz" version = "0.0.1" authors = ["rust-analyzer developers"] publish = false +edition = "2018" [package.metadata] cargo-fuzz = true -[dependencies.ra_syntax] -path = ".." -[dependencies.libfuzzer-sys] -git = "https://github.com/rust-fuzz/libfuzzer-sys.git" +[dependencies] +ra_syntax = { path = ".." } +libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" } # Prevent this from interfering with workspaces [workspace] diff --git a/crates/ra_syntax/fuzz/fuzz_targets/parser.rs b/crates/ra_syntax/fuzz/fuzz_targets/parser.rs index 4667d5579..76a8b08d0 100644 --- a/crates/ra_syntax/fuzz/fuzz_targets/parser.rs +++ b/crates/ra_syntax/fuzz/fuzz_targets/parser.rs @@ -1,9 +1,9 @@ #![no_main] -#[macro_use] extern crate libfuzzer_sys; -extern crate ra_syntax; +use libfuzzer_sys::fuzz_target; +use ra_syntax::fuzz::check_parser; fuzz_target!(|data: &[u8]| { if let Ok(text) = std::str::from_utf8(data) { - ra_syntax::check_fuzz_invariants(text) + check_parser(text) } }); diff --git a/crates/ra_syntax/src/fuzz.rs b/crates/ra_syntax/src/fuzz.rs new file mode 100644 index 000000000..03f453a6e --- /dev/null +++ b/crates/ra_syntax/src/fuzz.rs @@ -0,0 +1,12 @@ +use crate::{SourceFile, validation, AstNode}; + +fn check_file_invariants(file: &SourceFile) { + let root = file.syntax(); + validation::validate_block_structure(root); + let _ = file.errors(); +} + +pub fn check_parser(text: &str) { + let file = SourceFile::parse(text); + check_file_invariants(&file); +} diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs index 7334d53ef..4f3020440 100644 --- a/crates/ra_syntax/src/lib.rs +++ b/crates/ra_syntax/src/lib.rs @@ -29,6 +29,8 @@ mod ptr; pub mod algo; pub mod ast; +#[doc(hidden)] +pub mod fuzz; pub use rowan::{SmolStr, TextRange, TextUnit}; pub use ra_parser::SyntaxKind; @@ -83,13 +85,6 @@ impl SourceFile { } } -pub fn check_fuzz_invariants(text: &str) { - let file = SourceFile::parse(text); - let root = file.syntax(); - validation::validate_block_structure(root); - let _ = file.errors(); -} - /// This test does not assert anything and instead just shows off the crate's /// API. #[test] diff --git a/crates/ra_syntax/tests/test.rs b/crates/ra_syntax/tests/test.rs index 458740c13..3de4a65af 100644 --- a/crates/ra_syntax/tests/test.rs +++ b/crates/ra_syntax/tests/test.rs @@ -8,7 +8,7 @@ use std::{ }; use test_utils::{project_dir, dir_tests, read_text, collect_tests}; -use ra_syntax::{SourceFile, AstNode, check_fuzz_invariants}; +use ra_syntax::{SourceFile, AstNode, fuzz}; #[test] fn lexer_tests() { @@ -47,7 +47,7 @@ fn parser_tests() { #[test] fn parser_fuzz_tests() { for (_, text) in collect_tests(&test_data_dir(), &["parser/fuzz-failures"]) { - check_fuzz_invariants(&text) + fuzz::check_parser(&text) } } -- cgit v1.2.3 From 4c7142d0c9be90c8947deb788993d903b2e0a5d1 Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 02:06:48 +0900 Subject: Add fuzz test for reparsing --- crates/ra_syntax/fuzz/Cargo.toml | 5 ++++ crates/ra_syntax/fuzz/fuzz_targets/reparse.rs | 9 ++++++ crates/ra_syntax/src/fuzz.rs | 42 ++++++++++++++++++++++++++- crates/ra_syntax/tests/test.rs | 9 ++++++ 4 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 crates/ra_syntax/fuzz/fuzz_targets/reparse.rs diff --git a/crates/ra_syntax/fuzz/Cargo.toml b/crates/ra_syntax/fuzz/Cargo.toml index c54d12813..613ad2857 100644 --- a/crates/ra_syntax/fuzz/Cargo.toml +++ b/crates/ra_syntax/fuzz/Cargo.toml @@ -11,6 +11,7 @@ cargo-fuzz = true [dependencies] ra_syntax = { path = ".." } +ra_text_edit = { path = "../../ra_text_edit" } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" } # Prevent this from interfering with workspaces @@ -20,3 +21,7 @@ members = ["."] [[bin]] name = "parser" path = "fuzz_targets/parser.rs" + +[[bin]] +name = "reparse" +path = "fuzz_targets/reparse.rs" diff --git a/crates/ra_syntax/fuzz/fuzz_targets/reparse.rs b/crates/ra_syntax/fuzz/fuzz_targets/reparse.rs new file mode 100644 index 000000000..45524d4c1 --- /dev/null +++ b/crates/ra_syntax/fuzz/fuzz_targets/reparse.rs @@ -0,0 +1,9 @@ +#![no_main] +use libfuzzer_sys::fuzz_target; +use ra_syntax::fuzz::CheckReparse; + +fuzz_target!(|data: &[u8]| { + if let Some(check) = CheckReparse::from_data(data) { + check.run(); + } +}); diff --git a/crates/ra_syntax/src/fuzz.rs b/crates/ra_syntax/src/fuzz.rs index 03f453a6e..efb080ac2 100644 --- a/crates/ra_syntax/src/fuzz.rs +++ b/crates/ra_syntax/src/fuzz.rs @@ -1,4 +1,6 @@ -use crate::{SourceFile, validation, AstNode}; +use crate::{SourceFile, validation, TextUnit, TextRange, AstNode}; +use ra_text_edit::AtomTextEdit; +use std::str::{self, FromStr}; fn check_file_invariants(file: &SourceFile) { let root = file.syntax(); @@ -10,3 +12,41 @@ pub fn check_parser(text: &str) { let file = SourceFile::parse(text); check_file_invariants(&file); } + +#[derive(Debug, Clone)] +pub struct CheckReparse { + text: String, + edit: AtomTextEdit, + edited_text: String, +} + +impl CheckReparse { + pub fn from_data(data: &[u8]) -> Option { + let data = str::from_utf8(data).ok()?; + let mut lines = data.lines(); + let delete_start = usize::from_str(lines.next()?).ok()?; + let delete_len = usize::from_str(lines.next()?).ok()?; + let insert = lines.next()?.to_string(); + let text = lines.collect::>().join("\n"); + text.get(delete_start..delete_start.checked_add(delete_len)?)?; // make sure delete is a valid range + let delete = TextRange::offset_len( + TextUnit::from_usize(delete_start), + TextUnit::from_usize(delete_len), + ); + let edited_text = + format!("{}{}{}", &text[..delete_start], &insert, &text[delete_start + delete_len..]); + let edit = AtomTextEdit { delete, insert }; + Some(CheckReparse { text, edit, edited_text }) + } + + pub fn run(&self) { + let file = SourceFile::parse(&self.text); + let new_file = file.reparse(&self.edit); + check_file_invariants(&new_file); + assert_eq!(&new_file.syntax().text().to_string(), &self.edited_text); + let full_reparse = SourceFile::parse(&self.edited_text); + for (a, b) in new_file.syntax().descendants().zip(full_reparse.syntax().descendants()) { + assert_eq!(a.kind(), b.kind(), "different syntax tree produced by a full reparse"); + } + } +} diff --git a/crates/ra_syntax/tests/test.rs b/crates/ra_syntax/tests/test.rs index 3de4a65af..537b01368 100644 --- a/crates/ra_syntax/tests/test.rs +++ b/crates/ra_syntax/tests/test.rs @@ -51,6 +51,15 @@ fn parser_fuzz_tests() { } } +#[test] +fn reparse_fuzz_tests() { + for (_, text) in collect_tests(&test_data_dir(), &["reparse/fuzz-failures"]) { + let check = fuzz::CheckReparse::from_data(text.as_bytes()).unwrap(); + println!("{:?}", check); + check.run(); + } +} + /// Test that Rust-analyzer can parse and validate the rust-analyser /// TODO: Use this as a benchmark #[test] -- cgit v1.2.3 From 0acb61a911659537c51daf6793c67ef0c2b55bc9 Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 02:22:06 +0900 Subject: Fix an arithmetic overflow in reparser --- crates/ra_syntax/src/parsing/reparsing.rs | 4 ++-- crates/ra_syntax/src/syntax_error.rs | 6 +++--- crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs | 6 ++++++ 3 files changed, 11 insertions(+), 5 deletions(-) create mode 100644 crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index ba77a3b6c..b44cca49d 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -130,11 +130,11 @@ fn merge_errors( if e.offset() <= old_node.range().start() { res.push(e) } else if e.offset() >= old_node.range().end() { - res.push(e.add_offset(TextUnit::of_str(&edit.insert) - edit.delete.len())); + res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len())); } } for e in new_errors { - res.push(e.add_offset(old_node.range().start())); + res.push(e.add_offset(old_node.range().start(), 0.into())); } res } diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs index bdd431742..4b8c22a57 100644 --- a/crates/ra_syntax/src/syntax_error.rs +++ b/crates/ra_syntax/src/syntax_error.rs @@ -48,10 +48,10 @@ impl SyntaxError { } } - pub fn add_offset(mut self, plus_offset: TextUnit) -> SyntaxError { + pub fn add_offset(mut self, plus_offset: TextUnit, minus_offset: TextUnit) -> SyntaxError { self.location = match self.location { - Location::Range(range) => Location::Range(range + plus_offset), - Location::Offset(offset) => Location::Offset(offset + plus_offset), + Location::Range(range) => Location::Range(range + plus_offset - minus_offset), + Location::Offset(offset) => Location::Offset(offset + plus_offset - minus_offset), }; self diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs new file mode 100644 index 000000000..388eb74ed --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs @@ -0,0 +1,6 @@ +0 +1 + + + +0 \ No newline at end of file -- cgit v1.2.3 From c622000413351915d08e270e8962f5fbaedf0437 Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 03:15:16 +0900 Subject: Improve reparse fuzz test --- crates/ra_syntax/src/fuzz.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/crates/ra_syntax/src/fuzz.rs b/crates/ra_syntax/src/fuzz.rs index efb080ac2..c7084bc6d 100644 --- a/crates/ra_syntax/src/fuzz.rs +++ b/crates/ra_syntax/src/fuzz.rs @@ -46,7 +46,19 @@ impl CheckReparse { assert_eq!(&new_file.syntax().text().to_string(), &self.edited_text); let full_reparse = SourceFile::parse(&self.edited_text); for (a, b) in new_file.syntax().descendants().zip(full_reparse.syntax().descendants()) { - assert_eq!(a.kind(), b.kind(), "different syntax tree produced by a full reparse"); + if (a.kind(), a.range()) != (b.kind(), b.range()) { + eprint!("original:\n{}", file.syntax().debug_dump()); + eprint!("reparsed:\n{}", new_file.syntax().debug_dump()); + eprint!("full reparse:\n{}", full_reparse.syntax().debug_dump()); + assert_eq!( + format!("{:?}", a), + format!("{:?}", b), + "different syntax tree produced by the full reparse" + ); + } } + // FIXME + // assert_eq!(new_file.errors(), full_reparse.errors()); + assert_eq!(new_file.errors().is_empty(), full_reparse.errors().is_empty()); } } -- cgit v1.2.3 From 9623e77d9f24b6ff8e718f7321a92081997ea1e9 Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 03:27:00 +0900 Subject: Fix reparsing bug when lex result is different --- crates/ra_syntax/src/parsing/reparsing.rs | 11 +++++++++-- crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs | 4 ++++ crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs | 4 ++++ 3 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs create mode 100644 crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index b44cca49d..3b5471bf8 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -33,10 +33,10 @@ pub(crate) fn incremental_reparse( } fn reparse_leaf<'node>( - node: &'node SyntaxNode, + root: &'node SyntaxNode, edit: &AtomTextEdit, ) -> Option<(&'node SyntaxNode, GreenNode, Vec)> { - let node = algo::find_covering_node(node, edit.delete); + let node = algo::find_covering_node(root, edit.delete); match node.kind() { WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { let text = get_text_after_edit(node, &edit); @@ -50,6 +50,13 @@ fn reparse_leaf<'node>( return None; } + if let Some(next_char) = root.text().char_at(node.range().end()) { + let tokens_with_next_char = tokenize(&format!("{}{}", text, next_char)); + if tokens_with_next_char.len() == 1 { + return None; + } + } + let green = GreenNode::new_leaf(node.kind(), text.into()); let new_errors = vec![]; Some((node, green, new_errors)) diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs new file mode 100644 index 000000000..d2d42c6f9 --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs @@ -0,0 +1,4 @@ +0 +1 + +bb" \ No newline at end of file diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs new file mode 100644 index 000000000..3fbee1548 --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs @@ -0,0 +1,4 @@ +1 +1 + +""! \ No newline at end of file -- cgit v1.2.3 From ec59d9bda569057727a9b04314c63e3b3adaaa95 Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 03:39:05 +0900 Subject: Fix lexer not producing right token on "_" --- crates/ra_syntax/src/parsing/lexer.rs | 1 + crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs | Bin 0 -> 8 bytes 2 files changed, 1 insertion(+) create mode 100644 crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index f9362120e..36e841609 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -195,6 +195,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { ptr.bump(); true } + ('_', None) => return UNDERSCORE, ('_', Some(c)) if !is_ident_continue(c) => return UNDERSCORE, _ => false, }; diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs new file mode 100644 index 000000000..d2757cd08 Binary files /dev/null and b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs differ -- cgit v1.2.3 From c60ec02d75a7cfa57a5a2b1e2256b70224deb531 Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 04:11:21 +0900 Subject: Use template text to improve reparse fuzzing --- crates/ra_syntax/src/fuzz.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/ra_syntax/src/fuzz.rs b/crates/ra_syntax/src/fuzz.rs index c7084bc6d..1153f0fb9 100644 --- a/crates/ra_syntax/src/fuzz.rs +++ b/crates/ra_syntax/src/fuzz.rs @@ -22,12 +22,16 @@ pub struct CheckReparse { impl CheckReparse { pub fn from_data(data: &[u8]) -> Option { + const PREFIX: &'static str = "fn main(){\n\t"; + const SUFFIX: &'static str = "\n}"; + let data = str::from_utf8(data).ok()?; let mut lines = data.lines(); - let delete_start = usize::from_str(lines.next()?).ok()?; + let delete_start = usize::from_str(lines.next()?).ok()? + PREFIX.len(); let delete_len = usize::from_str(lines.next()?).ok()?; let insert = lines.next()?.to_string(); let text = lines.collect::>().join("\n"); + let text = format!("{}{}{}", PREFIX, text, SUFFIX); text.get(delete_start..delete_start.checked_add(delete_len)?)?; // make sure delete is a valid range let delete = TextRange::offset_len( TextUnit::from_usize(delete_start), -- cgit v1.2.3 From 7669113e7c9088a2b656c720cca5c2e46e3cb896 Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 04:14:28 +0900 Subject: Completely ignore errors for reparse fuzz --- crates/ra_syntax/src/fuzz.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/ra_syntax/src/fuzz.rs b/crates/ra_syntax/src/fuzz.rs index 1153f0fb9..af11b2e1a 100644 --- a/crates/ra_syntax/src/fuzz.rs +++ b/crates/ra_syntax/src/fuzz.rs @@ -63,6 +63,5 @@ impl CheckReparse { } // FIXME // assert_eq!(new_file.errors(), full_reparse.errors()); - assert_eq!(new_file.errors().is_empty(), full_reparse.errors().is_empty()); } } -- cgit v1.2.3 From edbc18516d211c8dbf07539358c7d8aeca24bb30 Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 04:33:05 +0900 Subject: Fix reparsing bug on "{}{}" --- crates/ra_syntax/src/parsing/reparsing.rs | 2 +- crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) create mode 100644 crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 3b5471bf8..d54669a95 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -111,7 +111,7 @@ fn is_balanced(tokens: &[Token]) -> bool { return false; } let mut balance = 0usize; - for t in tokens.iter() { + for t in &tokens[1..tokens.len() - 1] { match t.kind { L_CURLY => balance += 1, R_CURLY => { diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs new file mode 100644 index 000000000..481617a70 --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs @@ -0,0 +1,4 @@ +0 +0 +} +{; \ No newline at end of file -- cgit v1.2.3 From bf8e7930daa3fb168106534b1cc418f5bc44e8c0 Mon Sep 17 00:00:00 2001 From: pcpthm Date: Fri, 22 Mar 2019 05:29:27 +0900 Subject: Fix reparsing failure when removing newline --- crates/ra_syntax/src/parsing/reparsing.rs | 7 +++++++ crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs | 7 +++++++ 2 files changed, 14 insertions(+) create mode 100644 crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index d54669a95..7e7f914f5 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -39,6 +39,13 @@ fn reparse_leaf<'node>( let node = algo::find_covering_node(root, edit.delete); match node.kind() { WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { + if node.kind() == WHITESPACE || node.kind() == COMMENT { + // removing a new line may extends previous token + if node.text().to_string()[edit.delete - node.range().start()].contains('\n') { + return None; + } + } + let text = get_text_after_edit(node, &edit); let tokens = tokenize(&text); let token = match tokens[..] { diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs new file mode 100644 index 000000000..074d761c7 --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs @@ -0,0 +1,7 @@ +05 +1 + + + +b' + \ No newline at end of file -- cgit v1.2.3