From e012efca275976b9702e3c47c3919023708680cc Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 6 Apr 2021 21:59:47 +0300 Subject: Let's try testing for "is not quadratic" condition --- crates/ide/src/syntax_highlighting/tests.rs | 96 +++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) (limited to 'crates/ide/src') diff --git a/crates/ide/src/syntax_highlighting/tests.rs b/crates/ide/src/syntax_highlighting/tests.rs index 1b02857ec..de2d22ac7 100644 --- a/crates/ide/src/syntax_highlighting/tests.rs +++ b/crates/ide/src/syntax_highlighting/tests.rs @@ -1,5 +1,8 @@ +use std::time::Instant; + use expect_test::{expect_file, ExpectFile}; use ide_db::SymbolKind; +use stdx::format_to; use test_utils::{bench, bench_fixture, skip_slow_tests}; use crate::{fixture, FileRange, HlTag, TextRange}; @@ -257,6 +260,99 @@ fn benchmark_syntax_highlighting_long_struct() { assert_eq!(hash, 2001); } +#[test] +fn syntax_highlighting_not_quadratic() { + if skip_slow_tests() { + return; + } + + let mut measures = Vec::new(); + for i in 6..=10 { + let n = 1 << i; + let fixture = bench_fixture::big_struct_n(n); + let (analysis, file_id) = fixture::file(&fixture); + + let time = Instant::now(); + + let hash = analysis + .highlight(file_id) + .unwrap() + .iter() + .filter(|it| it.highlight.tag == HlTag::Symbol(SymbolKind::Struct)) + .count(); + assert!(hash > n as usize); + + let elapsed = time.elapsed(); + measures.push((n as f64, elapsed.as_millis() as f64)) + } + + assert_linear(&measures) +} + +/// Checks that a set of measurements looks like a liner function rather than +/// like a quadratic function. Algorithm: +/// +/// 1. Linearly scale input to be in [0; 1) +/// 2. Using linear regression, compute the best linear function approximating +/// the input. +/// 3. Compute RMSE and maximal absolute error. +/// 4. Check that errors are within tolerances and that the constant term is not +/// too negative. +/// +/// Ideally, we should use a proper "model selection" to directly compare +/// quadratic and linear models, but that sounds rather complicated: +/// +/// https://stats.stackexchange.com/questions/21844/selecting-best-model-based-on-linear-quadratic-and-cubic-fit-of-data +fn assert_linear(xy: &[(f64, f64)]) { + let (mut xs, mut ys): (Vec<_>, Vec<_>) = xy.iter().copied().unzip(); + normalize(&mut xs); + normalize(&mut ys); + let xy = xs.iter().copied().zip(ys.iter().copied()); + + // Linear regression: finding a and b to fit y = a + b*x. + + let mean_x = mean(&xs); + let mean_y = mean(&ys); + + let b = { + let mut num = 0.0; + let mut denom = 0.0; + for (x, y) in xy.clone() { + num += (x - mean_x) * (y - mean_y); + denom += (x - mean_x).powi(2); + } + num / denom + }; + + let a = mean_y - b * mean_x; + + let mut plot = format!("y_pred = {:.3} + {:.3} * x\n\nx y y_pred\n", a, b); + + let mut se = 0.0; + let mut max_error = 0.0f64; + for (x, y) in xy { + let y_pred = a + b * x; + se += (y - y_pred).powi(2); + max_error = max_error.max((y_pred - y).abs()); + + format_to!(plot, "{:.3} {:.3} {:.3}\n", x, y, y_pred); + } + + let rmse = (se / xs.len() as f64).sqrt(); + format_to!(plot, "\nrmse = {:.3} max error = {:.3}", rmse, max_error); + + assert!(rmse < 0.05 && max_error < 0.1 && a > -0.1, "\nLooks quadratic\n{}", plot); + + fn normalize(xs: &mut Vec) { + let max = xs.iter().copied().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap(); + xs.iter_mut().for_each(|it| *it /= max); + } + + fn mean(xs: &[f64]) -> f64 { + xs.iter().copied().sum::() / (xs.len() as f64) + } +} + #[test] fn benchmark_syntax_highlighting_parser() { if skip_slow_tests() { -- cgit v1.2.3