From e012efca275976b9702e3c47c3919023708680cc Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Tue, 6 Apr 2021 21:59:47 +0300
Subject: Let's try testing for "is not quadratic" condition

---
 crates/ide/src/syntax_highlighting/tests.rs | 96 +++++++++++++++++++++++++++++
 1 file changed, 96 insertions(+)

(limited to 'crates/ide/src/syntax_highlighting')
diff --git a/crates/ide/src/syntax_highlighting/tests.rs b/crates/ide/src/syntax_highlighting/tests.rs
index 1b02857ec..de2d22ac7 100644
--- a/crates/ide/src/syntax_highlighting/tests.rs
+++ b/crates/ide/src/syntax_highlighting/tests.rs
@@ -1,5 +1,8 @@
+use std::time::Instant;
+
 use expect_test::{expect_file, ExpectFile};
 use ide_db::SymbolKind;
+use stdx::format_to;
 use test_utils::{bench, bench_fixture, skip_slow_tests};
 
 use crate::{fixture, FileRange, HlTag, TextRange};
@@ -257,6 +260,99 @@ fn benchmark_syntax_highlighting_long_struct() {
     assert_eq!(hash, 2001);
 }
 
+#[test]
+fn syntax_highlighting_not_quadratic() {
+    if skip_slow_tests() {
+        return;
+    }
+
+    let mut measures = Vec::new();
+    for i in 6..=10 {
+        let n = 1 << i;
+        let fixture = bench_fixture::big_struct_n(n);
+        let (analysis, file_id) = fixture::file(&fixture);
+
+        let time = Instant::now();
+
+        let hash = analysis
+            .highlight(file_id)
+            .unwrap()
+            .iter()
+            .filter(|it| it.highlight.tag == HlTag::Symbol(SymbolKind::Struct))
+            .count();
+        assert!(hash > n as usize);
+
+        let elapsed = time.elapsed();
+        measures.push((n as f64, elapsed.as_millis() as f64))
+    }
+
+    assert_linear(&measures)
+}
+
+/// Checks that a set of measurements looks like a liner function rather than
+/// like a quadratic function. Algorithm:
+///
+/// 1. Linearly scale input to be in [0; 1)
+/// 2. Using linear regression, compute the best linear function approximating
+///    the input.
+/// 3. Compute RMSE and  maximal absolute error.
+/// 4. Check that errors are within tolerances and that the constant term is not
+///    too negative.
+///
+/// Ideally, we should use a proper "model selection" to directly compare
+/// quadratic and linear models, but that sounds rather complicated:
+///
+///     https://stats.stackexchange.com/questions/21844/selecting-best-model-based-on-linear-quadratic-and-cubic-fit-of-data
+fn assert_linear(xy: &[(f64, f64)]) {
+    let (mut xs, mut ys): (Vec<_>, Vec<_>) = xy.iter().copied().unzip();
+    normalize(&mut xs);
+    normalize(&mut ys);
+    let xy = xs.iter().copied().zip(ys.iter().copied());
+
+    // Linear regression: finding a and b to fit y = a + b*x.
+
+    let mean_x = mean(&xs);
+    let mean_y = mean(&ys);
+
+    let b = {
+        let mut num = 0.0;
+        let mut denom = 0.0;
+        for (x, y) in xy.clone() {
+            num += (x - mean_x) * (y - mean_y);
+            denom += (x - mean_x).powi(2);
+        }
+        num / denom
+    };
+
+    let a = mean_y - b * mean_x;
+
+    let mut plot = format!("y_pred = {:.3} + {:.3} * x\n\nx     y     y_pred\n", a, b);
+
+    let mut se = 0.0;
+    let mut max_error = 0.0f64;
+    for (x, y) in xy {
+        let y_pred = a + b * x;
+        se += (y - y_pred).powi(2);
+        max_error = max_error.max((y_pred - y).abs());
+
+        format_to!(plot, "{:.3} {:.3} {:.3}\n", x, y, y_pred);
+    }
+
+    let rmse = (se / xs.len() as f64).sqrt();
+    format_to!(plot, "\nrmse = {:.3} max error = {:.3}", rmse, max_error);
+
+    assert!(rmse < 0.05 && max_error < 0.1 && a > -0.1, "\nLooks quadratic\n{}", plot);
+
+    fn normalize(xs: &mut Vec<f64>) {
+        let max = xs.iter().copied().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
+        xs.iter_mut().for_each(|it| *it /= max);
+    }
+
+    fn mean(xs: &[f64]) -> f64 {
+        xs.iter().copied().sum::<f64>() / (xs.len() as f64)
+    }
+}
+
 #[test]
 fn benchmark_syntax_highlighting_parser() {
     if skip_slow_tests() {
-- 
cgit v1.2.3