From 327323ad25d126f6394f26e1442667647022c383 Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Tue, 13 Apr 2021 12:49:10 +0300
Subject: internal: fix flakiness of accidentally quadratic test

---
 crates/ide/src/syntax_highlighting/tests.rs |  98 +++++-------------------
 crates/test_utils/src/assert_linear.rs      | 112 ++++++++++++++++++++++++++++
 crates/test_utils/src/lib.rs                |   3 +-
 3 files changed, 131 insertions(+), 82 deletions(-)
 create mode 100644 crates/test_utils/src/assert_linear.rs

(limited to 'crates')

diff --git a/crates/ide/src/syntax_highlighting/tests.rs b/crates/ide/src/syntax_highlighting/tests.rs
index b4818060f..933cfa6f3 100644
--- a/crates/ide/src/syntax_highlighting/tests.rs
+++ b/crates/ide/src/syntax_highlighting/tests.rs
@@ -2,8 +2,7 @@ use std::time::Instant;
 
 use expect_test::{expect_file, ExpectFile};
 use ide_db::SymbolKind;
-use stdx::format_to;
-use test_utils::{bench, bench_fixture, skip_slow_tests};
+use test_utils::{bench, bench_fixture, skip_slow_tests, AssertLinear};
 
 use crate::{fixture, FileRange, HlTag, TextRange};
 
@@ -266,90 +265,27 @@ fn syntax_highlighting_not_quadratic() {
         return;
     }
 
-    let mut measures = Vec::new();
-    for i in 6..=10 {
-        let n = 1 << i;
-        let fixture = bench_fixture::big_struct_n(n);
-        let (analysis, file_id) = fixture::file(&fixture);
+    let mut al = AssertLinear::default();
+    while al.next_round() {
+        for i in 6..=10 {
+            let n = 1 << i;
 
-        let time = Instant::now();
+            let fixture = bench_fixture::big_struct_n(n);
+            let (analysis, file_id) = fixture::file(&fixture);
 
-        let hash = analysis
-            .highlight(file_id)
-            .unwrap()
-            .iter()
-            .filter(|it| it.highlight.tag == HlTag::Symbol(SymbolKind::Struct))
-            .count();
-        assert!(hash > n as usize);
+            let time = Instant::now();
 
-        let elapsed = time.elapsed();
-        measures.push((n as f64, elapsed.as_millis() as f64))
-    }
+            let hash = analysis
+                .highlight(file_id)
+                .unwrap()
+                .iter()
+                .filter(|it| it.highlight.tag == HlTag::Symbol(SymbolKind::Struct))
+                .count();
+            assert!(hash > n as usize);
 
-    assert_linear(&measures)
-}
-
-/// Checks that a set of measurements looks like a linear function rather than
-/// like a quadratic function. Algorithm:
-///
-/// 1. Linearly scale input to be in [0; 1)
-/// 2. Using linear regression, compute the best linear function approximating
-///    the input.
-/// 3. Compute RMSE and  maximal absolute error.
-/// 4. Check that errors are within tolerances and that the constant term is not
-///    too negative.
-///
-/// Ideally, we should use a proper "model selection" to directly compare
-/// quadratic and linear models, but that sounds rather complicated:
-///
-///     https://stats.stackexchange.com/questions/21844/selecting-best-model-based-on-linear-quadratic-and-cubic-fit-of-data
-fn assert_linear(xy: &[(f64, f64)]) {
-    let (mut xs, mut ys): (Vec<_>, Vec<_>) = xy.iter().copied().unzip();
-    normalize(&mut xs);
-    normalize(&mut ys);
-    let xy = xs.iter().copied().zip(ys.iter().copied());
-
-    // Linear regression: finding a and b to fit y = a + b*x.
-
-    let mean_x = mean(&xs);
-    let mean_y = mean(&ys);
-
-    let b = {
-        let mut num = 0.0;
-        let mut denom = 0.0;
-        for (x, y) in xy.clone() {
-            num += (x - mean_x) * (y - mean_y);
-            denom += (x - mean_x).powi(2);
+            let elapsed = time.elapsed();
+            al.sample(n as f64, elapsed.as_millis() as f64);
         }
-        num / denom
-    };
-
-    let a = mean_y - b * mean_x;
-
-    let mut plot = format!("y_pred = {:.3} + {:.3} * x\n\nx     y     y_pred\n", a, b);
-
-    let mut se = 0.0;
-    let mut max_error = 0.0f64;
-    for (x, y) in xy {
-        let y_pred = a + b * x;
-        se += (y - y_pred).powi(2);
-        max_error = max_error.max((y_pred - y).abs());
-
-        format_to!(plot, "{:.3} {:.3} {:.3}\n", x, y, y_pred);
-    }
-
-    let rmse = (se / xs.len() as f64).sqrt();
-    format_to!(plot, "\nrmse = {:.3} max error = {:.3}", rmse, max_error);
-
-    assert!(rmse < 0.05 && max_error < 0.1 && a > -0.1, "\nLooks quadratic\n{}", plot);
-
-    fn normalize(xs: &mut Vec<f64>) {
-        let max = xs.iter().copied().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
-        xs.iter_mut().for_each(|it| *it /= max);
-    }
-
-    fn mean(xs: &[f64]) -> f64 {
-        xs.iter().copied().sum::<f64>() / (xs.len() as f64)
     }
 }
 
diff --git a/crates/test_utils/src/assert_linear.rs b/crates/test_utils/src/assert_linear.rs
new file mode 100644
index 000000000..6ecc232e1
--- /dev/null
+++ b/crates/test_utils/src/assert_linear.rs
@@ -0,0 +1,112 @@
+//! Checks that a set of measurements looks like a linear function rather than
+//! like a quadratic function. Algorithm:
+//!
+//! 1. Linearly scale input to be in [0; 1)
+//! 2. Using linear regression, compute the best linear function approximating
+//!    the input.
+//! 3. Compute RMSE and  maximal absolute error.
+//! 4. Check that errors are within tolerances and that the constant term is not
+//!    too negative.
+//!
+//! Ideally, we should use a proper "model selection" to directly compare
+//! quadratic and linear models, but that sounds rather complicated:
+//!
+//!     https://stats.stackexchange.com/questions/21844/selecting-best-model-based-on-linear-quadratic-and-cubic-fit-of-data
+//!
+//! We might get false positives on a VM, but never false negatives. So, if the
+//! first round fails, we repeat the ordeal three more times and fail only if
+//! every time there's a fault.
+use stdx::format_to;
+
+#[derive(Default)]
+pub struct AssertLinear {
+    rounds: Vec<Round>,
+}
+
+#[derive(Default)]
+struct Round {
+    samples: Vec<(f64, f64)>,
+    plot: String,
+    linear: bool,
+}
+
+impl AssertLinear {
+    pub fn next_round(&mut self) -> bool {
+        if let Some(round) = self.rounds.last_mut() {
+            round.finish();
+        }
+        if self.rounds.iter().any(|it| it.linear) || self.rounds.len() == 4 {
+            return false;
+        }
+        self.rounds.push(Round::default());
+        true
+    }
+
+    pub fn sample(&mut self, x: f64, y: f64) {
+        self.rounds.last_mut().unwrap().samples.push((x, y))
+    }
+}
+
+impl Drop for AssertLinear {
+    fn drop(&mut self) {
+        assert!(!self.rounds.is_empty());
+        if self.rounds.iter().all(|it| !it.linear) {
+            for round in &self.rounds {
+                eprintln!("\n{}", round.plot);
+            }
+            panic!("Doesn't look linear!")
+        }
+    }
+}
+
+impl Round {
+    fn finish(&mut self) {
+        let (mut xs, mut ys): (Vec<_>, Vec<_>) = self.samples.iter().copied().unzip();
+        normalize(&mut xs);
+        normalize(&mut ys);
+        let xy = xs.iter().copied().zip(ys.iter().copied());
+
+        // Linear regression: finding a and b to fit y = a + b*x.
+
+        let mean_x = mean(&xs);
+        let mean_y = mean(&ys);
+
+        let b = {
+            let mut num = 0.0;
+            let mut denom = 0.0;
+            for (x, y) in xy.clone() {
+                num += (x - mean_x) * (y - mean_y);
+                denom += (x - mean_x).powi(2);
+            }
+            num / denom
+        };
+
+        let a = mean_y - b * mean_x;
+
+        self.plot = format!("y_pred = {:.3} + {:.3} * x\n\nx     y     y_pred\n", a, b);
+
+        let mut se = 0.0;
+        let mut max_error = 0.0f64;
+        for (x, y) in xy {
+            let y_pred = a + b * x;
+            se += (y - y_pred).powi(2);
+            max_error = max_error.max((y_pred - y).abs());
+
+            format_to!(self.plot, "{:.3} {:.3} {:.3}\n", x, y, y_pred);
+        }
+
+        let rmse = (se / xs.len() as f64).sqrt();
+        format_to!(self.plot, "\nrmse = {:.3} max error = {:.3}", rmse, max_error);
+
+        self.linear = rmse < 0.05 && max_error < 0.1 && a > -0.1;
+
+        fn normalize(xs: &mut Vec<f64>) {
+            let max = xs.iter().copied().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
+            xs.iter_mut().for_each(|it| *it /= max);
+        }
+
+        fn mean(xs: &[f64]) -> f64 {
+            xs.iter().copied().sum::<f64>() / (xs.len() as f64)
+        }
+    }
+}
diff --git a/crates/test_utils/src/lib.rs b/crates/test_utils/src/lib.rs
index c5f859790..72466c957 100644
--- a/crates/test_utils/src/lib.rs
+++ b/crates/test_utils/src/lib.rs
@@ -8,6 +8,7 @@
 
 pub mod bench_fixture;
 mod fixture;
+mod assert_linear;
 
 use std::{
     convert::{TryFrom, TryInto},
@@ -22,7 +23,7 @@ use text_size::{TextRange, TextSize};
 pub use dissimilar::diff as __diff;
 pub use rustc_hash::FxHashMap;
 
-pub use crate::fixture::Fixture;
+pub use crate::{assert_linear::AssertLinear, fixture::Fixture};
 
 pub const CURSOR_MARKER: &str = "$0";
 pub const ESCAPED_CURSOR_MARKER: &str = "\\$0";
-- 
cgit v1.2.3