diff options
author | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-03-22 05:48:55 +0000 |
---|---|---|
committer | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-03-22 05:48:55 +0000 |
commit | 2a6544f906818263e2791bc4cdf4fcbdf7260ab9 (patch) | |
tree | 12cc178506343e5dbbea0285e1dcd0bd0035398c /crates | |
parent | ed823cb38d6c6852b2645f6bcd4c3b699b4b7539 (diff) | |
parent | bf8e7930daa3fb168106534b1cc418f5bc44e8c0 (diff) |
Merge #1013
1013: Fuzz reparsing and fix found bugs r=matklad a=pcpthm
Add fuzz test for reparsing which:
- Checks reparsing doesn't panic and validate result syntax tree.
- Checks that incremental reparsing produces the same syntax tree as full reparse.
- Check for that errors are the same as full reparsing is disabled because errors are less important than syntax tree and produce failures which I couldn't figure out how to fix immediately (FIXME comment).
I guess the current input generation is inefficient but still found several bugs:
- Arithmetic overflow (negative result on an unsigned type). I changed the signature of `SyntaxError::add_offset` to solve this problem.
- When reparsing a leaf, the token of the leaf can be joined to the next characters. Such case was not considered.
- UNDERSCORE token was not produced when text length is exactly 1 (not a reparsing bug).
- When reparsing a block, *inner* curly braces should be balanced. i.e. `{}{}` is invalid.
- Effects of deleting newlines were not considered.
Co-authored-by: pcpthm <[email protected]>
Diffstat (limited to 'crates')
-rw-r--r-- | crates/ra_syntax/fuzz/Cargo.toml | 13 | ||||
-rw-r--r-- | crates/ra_syntax/fuzz/fuzz_targets/parser.rs | 6 | ||||
-rw-r--r-- | crates/ra_syntax/fuzz/fuzz_targets/reparse.rs | 9 | ||||
-rw-r--r-- | crates/ra_syntax/src/fuzz.rs | 67 | ||||
-rw-r--r-- | crates/ra_syntax/src/lib.rs | 9 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/lexer.rs | 1 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/reparsing.rs | 24 | ||||
-rw-r--r-- | crates/ra_syntax/src/syntax_error.rs | 6 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs | 6 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs | 4 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs | 4 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs | bin | 0 -> 8 bytes | |||
-rw-r--r-- | crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs | 4 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs | 7 | ||||
-rw-r--r-- | crates/ra_syntax/tests/test.rs | 13 |
15 files changed, 149 insertions, 24 deletions
diff --git a/crates/ra_syntax/fuzz/Cargo.toml b/crates/ra_syntax/fuzz/Cargo.toml index 4a255882e..613ad2857 100644 --- a/crates/ra_syntax/fuzz/Cargo.toml +++ b/crates/ra_syntax/fuzz/Cargo.toml | |||
@@ -4,14 +4,15 @@ name = "ra_syntax-fuzz" | |||
4 | version = "0.0.1" | 4 | version = "0.0.1" |
5 | authors = ["rust-analyzer developers"] | 5 | authors = ["rust-analyzer developers"] |
6 | publish = false | 6 | publish = false |
7 | edition = "2018" | ||
7 | 8 | ||
8 | [package.metadata] | 9 | [package.metadata] |
9 | cargo-fuzz = true | 10 | cargo-fuzz = true |
10 | 11 | ||
11 | [dependencies.ra_syntax] | 12 | [dependencies] |
12 | path = ".." | 13 | ra_syntax = { path = ".." } |
13 | [dependencies.libfuzzer-sys] | 14 | ra_text_edit = { path = "../../ra_text_edit" } |
14 | git = "https://github.com/rust-fuzz/libfuzzer-sys.git" | 15 | libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" } |
15 | 16 | ||
16 | # Prevent this from interfering with workspaces | 17 | # Prevent this from interfering with workspaces |
17 | [workspace] | 18 | [workspace] |
@@ -20,3 +21,7 @@ members = ["."] | |||
20 | [[bin]] | 21 | [[bin]] |
21 | name = "parser" | 22 | name = "parser" |
22 | path = "fuzz_targets/parser.rs" | 23 | path = "fuzz_targets/parser.rs" |
24 | |||
25 | [[bin]] | ||
26 | name = "reparse" | ||
27 | path = "fuzz_targets/reparse.rs" | ||
diff --git a/crates/ra_syntax/fuzz/fuzz_targets/parser.rs b/crates/ra_syntax/fuzz/fuzz_targets/parser.rs index 4667d5579..76a8b08d0 100644 --- a/crates/ra_syntax/fuzz/fuzz_targets/parser.rs +++ b/crates/ra_syntax/fuzz/fuzz_targets/parser.rs | |||
@@ -1,9 +1,9 @@ | |||
1 | #![no_main] | 1 | #![no_main] |
2 | #[macro_use] extern crate libfuzzer_sys; | 2 | use libfuzzer_sys::fuzz_target; |
3 | extern crate ra_syntax; | 3 | use ra_syntax::fuzz::check_parser; |
4 | 4 | ||
5 | fuzz_target!(|data: &[u8]| { | 5 | fuzz_target!(|data: &[u8]| { |
6 | if let Ok(text) = std::str::from_utf8(data) { | 6 | if let Ok(text) = std::str::from_utf8(data) { |
7 | ra_syntax::check_fuzz_invariants(text) | 7 | check_parser(text) |
8 | } | 8 | } |
9 | }); | 9 | }); |
diff --git a/crates/ra_syntax/fuzz/fuzz_targets/reparse.rs b/crates/ra_syntax/fuzz/fuzz_targets/reparse.rs new file mode 100644 index 000000000..45524d4c1 --- /dev/null +++ b/crates/ra_syntax/fuzz/fuzz_targets/reparse.rs | |||
@@ -0,0 +1,9 @@ | |||
1 | #![no_main] | ||
2 | use libfuzzer_sys::fuzz_target; | ||
3 | use ra_syntax::fuzz::CheckReparse; | ||
4 | |||
5 | fuzz_target!(|data: &[u8]| { | ||
6 | if let Some(check) = CheckReparse::from_data(data) { | ||
7 | check.run(); | ||
8 | } | ||
9 | }); | ||
diff --git a/crates/ra_syntax/src/fuzz.rs b/crates/ra_syntax/src/fuzz.rs new file mode 100644 index 000000000..af11b2e1a --- /dev/null +++ b/crates/ra_syntax/src/fuzz.rs | |||
@@ -0,0 +1,67 @@ | |||
1 | use crate::{SourceFile, validation, TextUnit, TextRange, AstNode}; | ||
2 | use ra_text_edit::AtomTextEdit; | ||
3 | use std::str::{self, FromStr}; | ||
4 | |||
5 | fn check_file_invariants(file: &SourceFile) { | ||
6 | let root = file.syntax(); | ||
7 | validation::validate_block_structure(root); | ||
8 | let _ = file.errors(); | ||
9 | } | ||
10 | |||
11 | pub fn check_parser(text: &str) { | ||
12 | let file = SourceFile::parse(text); | ||
13 | check_file_invariants(&file); | ||
14 | } | ||
15 | |||
16 | #[derive(Debug, Clone)] | ||
17 | pub struct CheckReparse { | ||
18 | text: String, | ||
19 | edit: AtomTextEdit, | ||
20 | edited_text: String, | ||
21 | } | ||
22 | |||
23 | impl CheckReparse { | ||
24 | pub fn from_data(data: &[u8]) -> Option<Self> { | ||
25 | const PREFIX: &'static str = "fn main(){\n\t"; | ||
26 | const SUFFIX: &'static str = "\n}"; | ||
27 | |||
28 | let data = str::from_utf8(data).ok()?; | ||
29 | let mut lines = data.lines(); | ||
30 | let delete_start = usize::from_str(lines.next()?).ok()? + PREFIX.len(); | ||
31 | let delete_len = usize::from_str(lines.next()?).ok()?; | ||
32 | let insert = lines.next()?.to_string(); | ||
33 | let text = lines.collect::<Vec<_>>().join("\n"); | ||
34 | let text = format!("{}{}{}", PREFIX, text, SUFFIX); | ||
35 | text.get(delete_start..delete_start.checked_add(delete_len)?)?; // make sure delete is a valid range | ||
36 | let delete = TextRange::offset_len( | ||
37 | TextUnit::from_usize(delete_start), | ||
38 | TextUnit::from_usize(delete_len), | ||
39 | ); | ||
40 | let edited_text = | ||
41 | format!("{}{}{}", &text[..delete_start], &insert, &text[delete_start + delete_len..]); | ||
42 | let edit = AtomTextEdit { delete, insert }; | ||
43 | Some(CheckReparse { text, edit, edited_text }) | ||
44 | } | ||
45 | |||
46 | pub fn run(&self) { | ||
47 | let file = SourceFile::parse(&self.text); | ||
48 | let new_file = file.reparse(&self.edit); | ||
49 | check_file_invariants(&new_file); | ||
50 | assert_eq!(&new_file.syntax().text().to_string(), &self.edited_text); | ||
51 | let full_reparse = SourceFile::parse(&self.edited_text); | ||
52 | for (a, b) in new_file.syntax().descendants().zip(full_reparse.syntax().descendants()) { | ||
53 | if (a.kind(), a.range()) != (b.kind(), b.range()) { | ||
54 | eprint!("original:\n{}", file.syntax().debug_dump()); | ||
55 | eprint!("reparsed:\n{}", new_file.syntax().debug_dump()); | ||
56 | eprint!("full reparse:\n{}", full_reparse.syntax().debug_dump()); | ||
57 | assert_eq!( | ||
58 | format!("{:?}", a), | ||
59 | format!("{:?}", b), | ||
60 | "different syntax tree produced by the full reparse" | ||
61 | ); | ||
62 | } | ||
63 | } | ||
64 | // FIXME | ||
65 | // assert_eq!(new_file.errors(), full_reparse.errors()); | ||
66 | } | ||
67 | } | ||
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs index 7334d53ef..4f3020440 100644 --- a/crates/ra_syntax/src/lib.rs +++ b/crates/ra_syntax/src/lib.rs | |||
@@ -29,6 +29,8 @@ mod ptr; | |||
29 | 29 | ||
30 | pub mod algo; | 30 | pub mod algo; |
31 | pub mod ast; | 31 | pub mod ast; |
32 | #[doc(hidden)] | ||
33 | pub mod fuzz; | ||
32 | 34 | ||
33 | pub use rowan::{SmolStr, TextRange, TextUnit}; | 35 | pub use rowan::{SmolStr, TextRange, TextUnit}; |
34 | pub use ra_parser::SyntaxKind; | 36 | pub use ra_parser::SyntaxKind; |
@@ -83,13 +85,6 @@ impl SourceFile { | |||
83 | } | 85 | } |
84 | } | 86 | } |
85 | 87 | ||
86 | pub fn check_fuzz_invariants(text: &str) { | ||
87 | let file = SourceFile::parse(text); | ||
88 | let root = file.syntax(); | ||
89 | validation::validate_block_structure(root); | ||
90 | let _ = file.errors(); | ||
91 | } | ||
92 | |||
93 | /// This test does not assert anything and instead just shows off the crate's | 88 | /// This test does not assert anything and instead just shows off the crate's |
94 | /// API. | 89 | /// API. |
95 | #[test] | 90 | #[test] |
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index f9362120e..36e841609 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs | |||
@@ -195,6 +195,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
195 | ptr.bump(); | 195 | ptr.bump(); |
196 | true | 196 | true |
197 | } | 197 | } |
198 | ('_', None) => return UNDERSCORE, | ||
198 | ('_', Some(c)) if !is_ident_continue(c) => return UNDERSCORE, | 199 | ('_', Some(c)) if !is_ident_continue(c) => return UNDERSCORE, |
199 | _ => false, | 200 | _ => false, |
200 | }; | 201 | }; |
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index ba77a3b6c..7e7f914f5 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs | |||
@@ -33,12 +33,19 @@ pub(crate) fn incremental_reparse( | |||
33 | } | 33 | } |
34 | 34 | ||
35 | fn reparse_leaf<'node>( | 35 | fn reparse_leaf<'node>( |
36 | node: &'node SyntaxNode, | 36 | root: &'node SyntaxNode, |
37 | edit: &AtomTextEdit, | 37 | edit: &AtomTextEdit, |
38 | ) -> Option<(&'node SyntaxNode, GreenNode, Vec<SyntaxError>)> { | 38 | ) -> Option<(&'node SyntaxNode, GreenNode, Vec<SyntaxError>)> { |
39 | let node = algo::find_covering_node(node, edit.delete); | 39 | let node = algo::find_covering_node(root, edit.delete); |
40 | match node.kind() { | 40 | match node.kind() { |
41 | WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { | 41 | WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { |
42 | if node.kind() == WHITESPACE || node.kind() == COMMENT { | ||
43 | // removing a new line may extends previous token | ||
44 | if node.text().to_string()[edit.delete - node.range().start()].contains('\n') { | ||
45 | return None; | ||
46 | } | ||
47 | } | ||
48 | |||
42 | let text = get_text_after_edit(node, &edit); | 49 | let text = get_text_after_edit(node, &edit); |
43 | let tokens = tokenize(&text); | 50 | let tokens = tokenize(&text); |
44 | let token = match tokens[..] { | 51 | let token = match tokens[..] { |
@@ -50,6 +57,13 @@ fn reparse_leaf<'node>( | |||
50 | return None; | 57 | return None; |
51 | } | 58 | } |
52 | 59 | ||
60 | if let Some(next_char) = root.text().char_at(node.range().end()) { | ||
61 | let tokens_with_next_char = tokenize(&format!("{}{}", text, next_char)); | ||
62 | if tokens_with_next_char.len() == 1 { | ||
63 | return None; | ||
64 | } | ||
65 | } | ||
66 | |||
53 | let green = GreenNode::new_leaf(node.kind(), text.into()); | 67 | let green = GreenNode::new_leaf(node.kind(), text.into()); |
54 | let new_errors = vec![]; | 68 | let new_errors = vec![]; |
55 | Some((node, green, new_errors)) | 69 | Some((node, green, new_errors)) |
@@ -104,7 +118,7 @@ fn is_balanced(tokens: &[Token]) -> bool { | |||
104 | return false; | 118 | return false; |
105 | } | 119 | } |
106 | let mut balance = 0usize; | 120 | let mut balance = 0usize; |
107 | for t in tokens.iter() { | 121 | for t in &tokens[1..tokens.len() - 1] { |
108 | match t.kind { | 122 | match t.kind { |
109 | L_CURLY => balance += 1, | 123 | L_CURLY => balance += 1, |
110 | R_CURLY => { | 124 | R_CURLY => { |
@@ -130,11 +144,11 @@ fn merge_errors( | |||
130 | if e.offset() <= old_node.range().start() { | 144 | if e.offset() <= old_node.range().start() { |
131 | res.push(e) | 145 | res.push(e) |
132 | } else if e.offset() >= old_node.range().end() { | 146 | } else if e.offset() >= old_node.range().end() { |
133 | res.push(e.add_offset(TextUnit::of_str(&edit.insert) - edit.delete.len())); | 147 | res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len())); |
134 | } | 148 | } |
135 | } | 149 | } |
136 | for e in new_errors { | 150 | for e in new_errors { |
137 | res.push(e.add_offset(old_node.range().start())); | 151 | res.push(e.add_offset(old_node.range().start(), 0.into())); |
138 | } | 152 | } |
139 | res | 153 | res |
140 | } | 154 | } |
diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs index bdd431742..4b8c22a57 100644 --- a/crates/ra_syntax/src/syntax_error.rs +++ b/crates/ra_syntax/src/syntax_error.rs | |||
@@ -48,10 +48,10 @@ impl SyntaxError { | |||
48 | } | 48 | } |
49 | } | 49 | } |
50 | 50 | ||
51 | pub fn add_offset(mut self, plus_offset: TextUnit) -> SyntaxError { | 51 | pub fn add_offset(mut self, plus_offset: TextUnit, minus_offset: TextUnit) -> SyntaxError { |
52 | self.location = match self.location { | 52 | self.location = match self.location { |
53 | Location::Range(range) => Location::Range(range + plus_offset), | 53 | Location::Range(range) => Location::Range(range + plus_offset - minus_offset), |
54 | Location::Offset(offset) => Location::Offset(offset + plus_offset), | 54 | Location::Offset(offset) => Location::Offset(offset + plus_offset - minus_offset), |
55 | }; | 55 | }; |
56 | 56 | ||
57 | self | 57 | self |
diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs new file mode 100644 index 000000000..388eb74ed --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0000.rs | |||
@@ -0,0 +1,6 @@ | |||
1 | 0 | ||
2 | 1 | ||
3 | |||
4 | |||
5 | |||
6 | 0 \ No newline at end of file | ||
diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs new file mode 100644 index 000000000..d2d42c6f9 --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0001.rs | |||
@@ -0,0 +1,4 @@ | |||
1 | 0 | ||
2 | 1 | ||
3 | |||
4 | bb" \ No newline at end of file | ||
diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs new file mode 100644 index 000000000..3fbee1548 --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0002.rs | |||
@@ -0,0 +1,4 @@ | |||
1 | 1 | ||
2 | 1 | ||
3 | |||
4 | ""! \ No newline at end of file | ||
diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs new file mode 100644 index 000000000..d2757cd08 --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0003.rs | |||
Binary files differ | |||
diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs new file mode 100644 index 000000000..481617a70 --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0004.rs | |||
@@ -0,0 +1,4 @@ | |||
1 | 0 | ||
2 | 0 | ||
3 | } | ||
4 | {; \ No newline at end of file | ||
diff --git a/crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs new file mode 100644 index 000000000..074d761c7 --- /dev/null +++ b/crates/ra_syntax/tests/data/reparse/fuzz-failures/0005.rs | |||
@@ -0,0 +1,7 @@ | |||
1 | 05 | ||
2 | 1 | ||
3 | |||
4 | |||
5 | |||
6 | b' | ||
7 | \ No newline at end of file | ||
diff --git a/crates/ra_syntax/tests/test.rs b/crates/ra_syntax/tests/test.rs index 458740c13..537b01368 100644 --- a/crates/ra_syntax/tests/test.rs +++ b/crates/ra_syntax/tests/test.rs | |||
@@ -8,7 +8,7 @@ use std::{ | |||
8 | }; | 8 | }; |
9 | 9 | ||
10 | use test_utils::{project_dir, dir_tests, read_text, collect_tests}; | 10 | use test_utils::{project_dir, dir_tests, read_text, collect_tests}; |
11 | use ra_syntax::{SourceFile, AstNode, check_fuzz_invariants}; | 11 | use ra_syntax::{SourceFile, AstNode, fuzz}; |
12 | 12 | ||
13 | #[test] | 13 | #[test] |
14 | fn lexer_tests() { | 14 | fn lexer_tests() { |
@@ -47,7 +47,16 @@ fn parser_tests() { | |||
47 | #[test] | 47 | #[test] |
48 | fn parser_fuzz_tests() { | 48 | fn parser_fuzz_tests() { |
49 | for (_, text) in collect_tests(&test_data_dir(), &["parser/fuzz-failures"]) { | 49 | for (_, text) in collect_tests(&test_data_dir(), &["parser/fuzz-failures"]) { |
50 | check_fuzz_invariants(&text) | 50 | fuzz::check_parser(&text) |
51 | } | ||
52 | } | ||
53 | |||
54 | #[test] | ||
55 | fn reparse_fuzz_tests() { | ||
56 | for (_, text) in collect_tests(&test_data_dir(), &["reparse/fuzz-failures"]) { | ||
57 | let check = fuzz::CheckReparse::from_data(text.as_bytes()).unwrap(); | ||
58 | println!("{:?}", check); | ||
59 | check.run(); | ||
51 | } | 60 | } |
52 | } | 61 | } |
53 | 62 | ||