Let's try testing for "is not quadratic" condition

author: Aleksey Kladov <[email protected]> 2021-04-06 19:59:47 +0100
committer: Aleksey Kladov <[email protected]> 2021-04-10 10:43:07 +0100
commit: e012efca275976b9702e3c47c3919023708680cc (patch)
tree: 498688bb1fcbf6f41665f6f86a0ec8930434c6f5 /crates/ide/src
parent: 00cdbceb9d7afdabc82788a88df88c6eb1035839 (diff)
1 files changed, 96 insertions, 0 deletions
diff --git a/crates/ide/src/syntax_highlighting/tests.rs b/crates/ide/src/syntax_highlighting/tests.rs
index 1b02857ec..de2d22ac7 100644
--- a/crates/ide/src/syntax_highlighting/tests.rs
+++ b/crates/ide/src/syntax_highlighting/tests.rs
@@ -1,5 +1,8 @@
+use std::time::Instant;
 use expect_test::{expect_file, ExpectFile};
 use ide_db::SymbolKind;
+use stdx::format_to;
 use test_utils::{bench, bench_fixture, skip_slow_tests};
 use crate::{fixture, FileRange, HlTag, TextRange};
@@ -258,6 +261,99 @@ fn benchmark_syntax_highlighting_long_struct() {
 }
 #[test]
+fn syntax_highlighting_not_quadratic() {
+    if skip_slow_tests() {
+        return;
+    }
+    let mut measures = Vec::new();
+    for i in 6..=10 {
+        let n = 1 << i;
+        let fixture = bench_fixture::big_struct_n(n);
+        let (analysis, file_id) = fixture::file(&fixture);
+        let time = Instant::now();
+        let hash = analysis
+            .highlight(file_id)
+            .unwrap()
+            .iter()
+            .filter(|it| it.highlight.tag == HlTag::Symbol(SymbolKind::Struct))
+            .count();
+        assert!(hash > n as usize);
+        let elapsed = time.elapsed();
+        measures.push((n as f64, elapsed.as_millis() as f64))
+    }
+    assert_linear(&measures)
+}
+/// Checks that a set of measurements looks like a liner function rather than
+/// like a quadratic function. Algorithm:
+///
+/// 1. Linearly scale input to be in [0; 1)
+/// 2. Using linear regression, compute the best linear function approximating
+///    the input.
+/// 3. Compute RMSE and  maximal absolute error.
+/// 4. Check that errors are within tolerances and that the constant term is not
+///    too negative.
+///
+/// Ideally, we should use a proper "model selection" to directly compare
+/// quadratic and linear models, but that sounds rather complicated:
+///
+///     https://stats.stackexchange.com/questions/21844/selecting-best-model-based-on-linear-quadratic-and-cubic-fit-of-data
+fn assert_linear(xy: &[(f64, f64)]) {
+    let (mut xs, mut ys): (Vec<_>, Vec<_>) = xy.iter().copied().unzip();
+    normalize(&mut xs);
+    normalize(&mut ys);
+    let xy = xs.iter().copied().zip(ys.iter().copied());
+    // Linear regression: finding a and b to fit y = a + b*x.
+    let mean_x = mean(&xs);
+    let mean_y = mean(&ys);
+    let b = {
+        let mut num = 0.0;
+        let mut denom = 0.0;
+        for (x, y) in xy.clone() {
+            num += (x - mean_x) * (y - mean_y);
+            denom += (x - mean_x).powi(2);
+        }
+        num / denom
+    };
+    let a = mean_y - b * mean_x;
+    let mut plot = format!("y_pred = {:.3} + {:.3} * x\n\nx     y     y_pred\n", a, b);
+    let mut se = 0.0;
+    let mut max_error = 0.0f64;
+    for (x, y) in xy {
+        let y_pred = a + b * x;
+        se += (y - y_pred).powi(2);
+        max_error = max_error.max((y_pred - y).abs());
+        format_to!(plot, "{:.3} {:.3} {:.3}\n", x, y, y_pred);
+    }
+    let rmse = (se / xs.len() as f64).sqrt();
+    format_to!(plot, "\nrmse = {:.3} max error = {:.3}", rmse, max_error);
+    assert!(rmse < 0.05 && max_error < 0.1 && a > -0.1, "\nLooks quadratic\n{}", plot);
+    fn normalize(xs: &mut Vec<f64>) {
+        let max = xs.iter().copied().max_by(|a, b| a.partial_cmp(b).unwrap()).unwrap();
+        xs.iter_mut().for_each(|it| *it /= max);
+    }
+    fn mean(xs: &[f64]) -> f64 {
+        xs.iter().copied().sum::<f64>() / (xs.len() as f64)
+    }
+}
+#[test]
 fn benchmark_syntax_highlighting_parser() {
    if skip_slow_tests() {
        return;
author	Aleksey Kladov <[email protected]>	2021-04-06 19:59:47 +0100
committer	Aleksey Kladov <[email protected]>	2021-04-10 10:43:07 +0100
commit	e012efca275976b9702e3c47c3919023708680cc (patch)
tree	498688bb1fcbf6f41665f6f86a0ec8930434c6f5 /crates/ide/src
parent	00cdbceb9d7afdabc82788a88df88c6eb1035839 (diff)

diff --git a/crates/ide/src/syntax_highlighting/tests.rs b/crates/ide/src/syntax_highlighting/tests.rs index 1b02857ec..de2d22ac7 100644 --- a/crates/ide/src/syntax_highlighting/tests.rs +++ b/crates/ide/src/syntax_highlighting/tests.rs
@@ -1,5 +1,8 @@
		1	use std::time::Instant;
		2
1	use expect_test::{expect_file, ExpectFile};	3	use expect_test::{expect_file, ExpectFile};
2	use ide_db::SymbolKind;	4	use ide_db::SymbolKind;
		5	use stdx::format_to;
3	use test_utils::{bench, bench_fixture, skip_slow_tests};	6	use test_utils::{bench, bench_fixture, skip_slow_tests};
4		7
5	use crate::{fixture, FileRange, HlTag, TextRange};	8	use crate::{fixture, FileRange, HlTag, TextRange};
@@ -258,6 +261,99 @@ fn benchmark_syntax_highlighting_long_struct() {
258	}	261	}
259		262
260	#[test]	263	#[test]
		264	fn syntax_highlighting_not_quadratic() {
		265	if skip_slow_tests() {
		266	return;
		267	}
		268
		269	let mut measures = Vec::new();
		270	for i in 6..=10 {
		271	let n = 1 << i;
		272	let fixture = bench_fixture::big_struct_n(n);
		273	let (analysis, file_id) = fixture::file(&fixture);
		274
		275	let time = Instant::now();
		276
		277	let hash = analysis
		278	.highlight(file_id)
		279	.unwrap()
		280	.iter()
		281	.filter(\|it\| it.highlight.tag == HlTag::Symbol(SymbolKind::Struct))
		282	.count();
		283	assert!(hash > n as usize);
		284
		285	let elapsed = time.elapsed();
		286	measures.push((n as f64, elapsed.as_millis() as f64))
		287	}
		288
		289	assert_linear(&measures)
		290	}
		291
		292	/// Checks that a set of measurements looks like a liner function rather than
		293	/// like a quadratic function. Algorithm:
		294	///
		295	/// 1. Linearly scale input to be in [0; 1)
		296	/// 2. Using linear regression, compute the best linear function approximating
		297	/// the input.
		298	/// 3. Compute RMSE and maximal absolute error.
		299	/// 4. Check that errors are within tolerances and that the constant term is not
		300	/// too negative.
		301	///
		302	/// Ideally, we should use a proper "model selection" to directly compare
		303	/// quadratic and linear models, but that sounds rather complicated:
		304	///
		305	/// https://stats.stackexchange.com/questions/21844/selecting-best-model-based-on-linear-quadratic-and-cubic-fit-of-data
		306	fn assert_linear(xy: &[(f64, f64)]) {
		307	let (mut xs, mut ys): (Vec<_>, Vec<_>) = xy.iter().copied().unzip();
		308	normalize(&mut xs);
		309	normalize(&mut ys);
		310	let xy = xs.iter().copied().zip(ys.iter().copied());
		311
		312	// Linear regression: finding a and b to fit y = a + b*x.
		313
		314	let mean_x = mean(&xs);
		315	let mean_y = mean(&ys);
		316
		317	let b = {
		318	let mut num = 0.0;
		319	let mut denom = 0.0;
		320	for (x, y) in xy.clone() {
		321	num += (x - mean_x) * (y - mean_y);
		322	denom += (x - mean_x).powi(2);
		323	}
		324	num / denom
		325	};
		326
		327	let a = mean_y - b * mean_x;
		328
		329	let mut plot = format!("y_pred = {:.3} + {:.3} * x\n\nx y y_pred\n", a, b);
		330
		331	let mut se = 0.0;
		332	let mut max_error = 0.0f64;
		333	for (x, y) in xy {
		334	let y_pred = a + b * x;
		335	se += (y - y_pred).powi(2);
		336	max_error = max_error.max((y_pred - y).abs());
		337
		338	format_to!(plot, "{:.3} {:.3} {:.3}\n", x, y, y_pred);
		339	}
		340
		341	let rmse = (se / xs.len() as f64).sqrt();
		342	format_to!(plot, "\nrmse = {:.3} max error = {:.3}", rmse, max_error);
		343
		344	assert!(rmse < 0.05 && max_error < 0.1 && a > -0.1, "\nLooks quadratic\n{}", plot);
		345
		346	fn normalize(xs: &mut Vec<f64>) {
		347	let max = xs.iter().copied().max_by(\|a, b\| a.partial_cmp(b).unwrap()).unwrap();
		348	xs.iter_mut().for_each(\|it\| *it /= max);
		349	}
		350
		351	fn mean(xs: &[f64]) -> f64 {
		352	xs.iter().copied().sum::<f64>() / (xs.len() as f64)
		353	}
		354	}
		355
		356	#[test]
261	fn benchmark_syntax_highlighting_parser() {	357	fn benchmark_syntax_highlighting_parser() {
262	if skip_slow_tests() {	358	if skip_slow_tests() {
263	return;	359	return;