From 327323ad25d126f6394f26e1442667647022c383 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 13 Apr 2021 12:49:10 +0300 Subject: internal: fix flakiness of accidentally quadratic test --- crates/ide/src/syntax_highlighting/tests.rs | 98 +++++------------------- crates/test_utils/src/assert_linear.rs | 112 ++++++++++++++++++++++++++++ crates/test_utils/src/lib.rs | 3 +- 3 files changed, 131 insertions(+), 82 deletions(-) create mode 100644 crates/test_utils/src/assert_linear.rs (limited to 'crates') diff --git a/crates/ide/src/syntax_highlighting/tests.rs b/crates/ide/src/syntax_highlighting/tests.rs index b4818060f..933cfa6f3 100644 --- a/crates/ide/src/syntax_highlighting/tests.rs +++ b/crates/ide/src/syntax_highlighting/tests.rs @@ -2,8 +2,7 @@ use std::time::Instant; use expect_test::{expect_file, ExpectFile}; use ide_db::SymbolKind; -use stdx::format_to; -use test_utils::{bench, bench_fixture, skip_slow_tests}; +use test_utils::{bench, bench_fixture, skip_slow_tests, AssertLinear}; use crate::{fixture, FileRange, HlTag, TextRange}; @@ -266,90 +265,27 @@ fn syntax_highlighting_not_quadratic() { return; } - let mut measures = Vec::new(); - for i in 6..=10 { - let n = 1 << i; - let fixture = bench_fixture::big_struct_n(n); - let (analysis, file_id) = fixture::file(&fixture); + let mut al = AssertLinear::default(); + while al.next_round() { + for i in 6..=10 { + let n = 1 << i; - let time = Instant::now(); + let fixture = bench_fixture::big_struct_n(n); + let (analysis, file_id) = fixture::file(&fixture); - let hash = analysis - .highlight(file_id) - .unwrap() - .iter() - .filter(|it| it.highlight.tag == HlTag::Symbol(SymbolKind::Struct)) - .count(); - assert!(hash > n as usize); + let time = Instant::now(); - let elapsed = time.elapsed(); - measures.push((n as f64, elapsed.as_millis() as f64)) - } + let hash = analysis + .highlight(file_id) + .unwrap() + .iter() + .filter(|it| it.highlight.tag == HlTag::Symbol(SymbolKind::Struct)) + .count(); + assert!(hash > n as usize); - assert_linear(&measures) -} - -/// Checks that a set of measurements looks like a linear function rather than -/// like a quadratic function. Algorithm: -/// -/// 1. Linearly scale input to be in [0; 1) -/// 2. Using linear regression, compute the best linear function approximating -/// the input. -/// 3. Compute RMSE and maximal absolute error. -/// 4. Check that errors are within tolerances and that the constant term is not -/// too negative. -/// -/// Ideally, we should use a proper "model selection" to directly compare -/// quadratic and linear models, but that sounds rather complicated: -/// -/// https://stats.stackexchange.com/questions/21844/selecting-best-model-based-on-linear-quadratic-and-cubic-fit-of-data -fn assert_linear(xy: &[(f64, f64)]) { - let (mut xs, mut ys): (Vec<_>, Vec<_>) = xy.iter().copied().unzip(); - normalize(&mut xs); - normalize(&mut ys); - let xy = xs.iter().copied().zip(ys.iter().copied()); - - // Linear regression: finding a and b to fit y = a + b*x. - - let mean_x = mean(&xs); - let mean_y = mean(&ys); - - let b = { - let mut num = 0.0; - let mut denom = 0.0; - for (x, y) in xy.clone() { - num += (x - mean_x) * (y - mean_y); - denom += (x - mean_x).powi(2); + let elapsed = time.elapsed(); + al.sample(n as f64, elapsed.as_millis() as f64); } - num / denom - }; - - let a = mean_y - b * mean_x; - - let mut plot = format!("y_pred = {:.3} + {:.3} * x\n\nx y y_pred\n", a, b); - - let mut se = 0.0; - let mut max_error = 0.0f64; - for (x, y) in xy { - let y_pred = a + b * x; - se += (y - y_pred).powi(2); - max_error = max_error.max((y_pred - y).abs()); - - format_to!(plot, "{:.3} {:.3} {:.3}\n", x, y, y_pred); - } - - let rmse = (se / xs.len() as f64).sqrt(); - format_to!(plot, "\nrmse = {:.3} max error = {:.3}", rmse, max_error); - - assert!(rmse < 0.05 && max_error < 0.1 && a > -0.1, "\nLooks quadratic\n{}", plot); - - fn normalize(xs: &mut Vec) { - let max = xs.iter().copied().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); - xs.iter_mut().for_each(|it| *it /= max); - } - - fn mean(xs: &[f64]) -> f64 { - xs.iter().copied().sum::() / (xs.len() as f64) } } diff --git a/crates/test_utils/src/assert_linear.rs b/crates/test_utils/src/assert_linear.rs new file mode 100644 index 000000000..6ecc232e1 --- /dev/null +++ b/crates/test_utils/src/assert_linear.rs @@ -0,0 +1,112 @@ +//! Checks that a set of measurements looks like a linear function rather than +//! like a quadratic function. Algorithm: +//! +//! 1. Linearly scale input to be in [0; 1) +//! 2. Using linear regression, compute the best linear function approximating +//! the input. +//! 3. Compute RMSE and maximal absolute error. +//! 4. Check that errors are within tolerances and that the constant term is not +//! too negative. +//! +//! Ideally, we should use a proper "model selection" to directly compare +//! quadratic and linear models, but that sounds rather complicated: +//! +//! https://stats.stackexchange.com/questions/21844/selecting-best-model-based-on-linear-quadratic-and-cubic-fit-of-data +//! +//! We might get false positives on a VM, but never false negatives. So, if the +//! first round fails, we repeat the ordeal three more times and fail only if +//! every time there's a fault. +use stdx::format_to; + +#[derive(Default)] +pub struct AssertLinear { + rounds: Vec, +} + +#[derive(Default)] +struct Round { + samples: Vec<(f64, f64)>, + plot: String, + linear: bool, +} + +impl AssertLinear { + pub fn next_round(&mut self) -> bool { + if let Some(round) = self.rounds.last_mut() { + round.finish(); + } + if self.rounds.iter().any(|it| it.linear) || self.rounds.len() == 4 { + return false; + } + self.rounds.push(Round::default()); + true + } + + pub fn sample(&mut self, x: f64, y: f64) { + self.rounds.last_mut().unwrap().samples.push((x, y)) + } +} + +impl Drop for AssertLinear { + fn drop(&mut self) { + assert!(!self.rounds.is_empty()); + if self.rounds.iter().all(|it| !it.linear) { + for round in &self.rounds { + eprintln!("\n{}", round.plot); + } + panic!("Doesn't look linear!") + } + } +} + +impl Round { + fn finish(&mut self) { + let (mut xs, mut ys): (Vec<_>, Vec<_>) = self.samples.iter().copied().unzip(); + normalize(&mut xs); + normalize(&mut ys); + let xy = xs.iter().copied().zip(ys.iter().copied()); + + // Linear regression: finding a and b to fit y = a + b*x. + + let mean_x = mean(&xs); + let mean_y = mean(&ys); + + let b = { + let mut num = 0.0; + let mut denom = 0.0; + for (x, y) in xy.clone() { + num += (x - mean_x) * (y - mean_y); + denom += (x - mean_x).powi(2); + } + num / denom + }; + + let a = mean_y - b * mean_x; + + self.plot = format!("y_pred = {:.3} + {:.3} * x\n\nx y y_pred\n", a, b); + + let mut se = 0.0; + let mut max_error = 0.0f64; + for (x, y) in xy { + let y_pred = a + b * x; + se += (y - y_pred).powi(2); + max_error = max_error.max((y_pred - y).abs()); + + format_to!(self.plot, "{:.3} {:.3} {:.3}\n", x, y, y_pred); + } + + let rmse = (se / xs.len() as f64).sqrt(); + format_to!(self.plot, "\nrmse = {:.3} max error = {:.3}", rmse, max_error); + + self.linear = rmse < 0.05 && max_error < 0.1 && a > -0.1; + + fn normalize(xs: &mut Vec) { + let max = xs.iter().copied().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + xs.iter_mut().for_each(|it| *it /= max); + } + + fn mean(xs: &[f64]) -> f64 { + xs.iter().copied().sum::() / (xs.len() as f64) + } + } +} diff --git a/crates/test_utils/src/lib.rs b/crates/test_utils/src/lib.rs index c5f859790..72466c957 100644 --- a/crates/test_utils/src/lib.rs +++ b/crates/test_utils/src/lib.rs @@ -8,6 +8,7 @@ pub mod bench_fixture; mod fixture; +mod assert_linear; use std::{ convert::{TryFrom, TryInto}, @@ -22,7 +23,7 @@ use text_size::{TextRange, TextSize}; pub use dissimilar::diff as __diff; pub use rustc_hash::FxHashMap; -pub use crate::fixture::Fixture; +pub use crate::{assert_linear::AssertLinear, fixture::Fixture}; pub const CURSOR_MARKER: &str = "$0"; pub const ESCAPED_CURSOR_MARKER: &str = "\\$0"; -- cgit v1.2.3