Merge #2895

2895: Rewrite ra_prof's profile printing r=michalt a=michalt This changes the way we print things to first construct a mapping from events to the children and uses that mapping to actually print things. It should not change the actual output that we produce. The new approach two benefits: * It avoids a potential quadratic behavior of the previous approach. For instance, for a vector of N elements: ``` [Message{level: (N - 1)}, ..., Message{level: 1}, Message{level: 0}] ``` we would first do a linear scan to find entry with level 0, then another scan to find one with level 1, etc. * It makes it much easier to improve the output in the future, because we now pre-compute the children for each entry and can easily take that into account when printing. Signed-off-by: Michal Terepeta <[email protected]> Co-authored-by: Michal Terepeta <[email protected]>
author: bors[bot] <26634292+bors[bot]@users.noreply.github.com> 2020-01-29 20:09:49 +0000
committer: GitHub <[email protected]> 2020-01-29 20:09:49 +0000
commit: 5dcd9fdf5e95573de9bc46f32b86d6fbc5e7c18d (patch)
tree: 0dd24c080fb5052428023373cb3109bd566f8f83 /crates/ra_prof/src
parent: d92a15c1637860a669970142abe37d1daf5f2c73 (diff)
parent: 644c383f65324b5fe1ada42680923c2529accb26 (diff)
1 files changed, 71 insertions, 46 deletions
diff --git a/crates/ra_prof/src/lib.rs b/crates/ra_prof/src/lib.rs
index c7973ddf4..4a49e9f95 100644
--- a/crates/ra_prof/src/lib.rs
+++ b/crates/ra_prof/src/lib.rs
@@ -6,9 +6,9 @@ mod google_cpu_profiler;
 use std::{
    cell::RefCell,
+    collections::BTreeMap,
    collections::HashSet,
    io::{stderr, Write},
-    iter::repeat,
    mem,
    sync::{
        atomic::{AtomicBool, Ordering},
@@ -17,7 +17,6 @@ use std::{
    time::{Duration, Instant},
 };
-use itertools::Itertools;
 use once_cell::sync::Lazy;
 pub use crate::memory_usage::{Bytes, MemoryUsage};
@@ -216,7 +215,7 @@ impl Drop for Profiler {
                        // (otherwise we could print `0ms` despite user's `>0` filter when
                        // `duration` is just a few nanos).
                        if duration.as_millis() > longer_than.as_millis() {
-                            print(0, &stack.messages, &mut stdout.lock(), longer_than, None);
+                            print(&stack.messages, longer_than, &mut stdout.lock());
                        }
                        stack.messages.clear();
                    }
@@ -227,59 +226,85 @@ impl Drop for Profiler {
    }
 }
-fn print(
+fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) {
-    lvl: usize,
-    msgs: &[Message],
-    out: &mut impl Write,
-    longer_than: Duration,
-    total: Option<Duration>,
-) {
    if msgs.is_empty() {
        return;
    }
-    // The index of the first element that will be included in the slice when we recurse.
+    let children_map = idx_to_children(msgs);
-    let mut next_start = 0;
+    let root_idx = msgs.len() - 1;
-    let indent = repeat("    ").take(lvl).collect::<String>();
+    print_for_idx(root_idx, &children_map, msgs, longer_than, out);
-    // We output hierarchy for long calls, but sum up all short calls
+}
-    let mut short = Vec::new();
+fn print_for_idx(
+    current_idx: usize,
+    children_map: &[Vec<usize>],
+    msgs: &[Message],
+    longer_than: Duration,
+    out: &mut impl Write,
+) {
+    let current = &msgs[current_idx];
+    let current_indent = "    ".repeat(current.level);
+    writeln!(out, "{}{:5}ms - {}", current_indent, current.duration.as_millis(), current.message)
+        .expect("printing profiling info");
+    let longer_than_millis = longer_than.as_millis();
+    let children_indices = &children_map[current_idx];
    let mut accounted_for = Duration::default();
-    for (i, &Message { level, duration, message: ref msg }) in msgs.iter().enumerate() {
+    let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
-        if level != lvl {
-            continue;
-        }
-        accounted_for += duration;
-        if duration.as_millis() > longer_than.as_millis() {
-            writeln!(out, "{}{:5}ms - {}", indent, duration.as_millis(), msg)
-                .expect("printing profiling info to stdout");
-            print(lvl + 1, &msgs[next_start..i], out, longer_than, Some(duration));
+    for child_idx in children_indices.iter() {
+        let child = &msgs[*child_idx];
+        if child.duration.as_millis() > longer_than_millis {
+            print_for_idx(*child_idx, children_map, msgs, longer_than, out);
        } else {
-            short.push((msg, duration))
+            let pair = short_children.entry(&child.message).or_insert((Duration::default(), 0));
+            pair.0 += child.duration;
+            pair.1 += 1;
        }
+        accounted_for += child.duration;
+    }
-        next_start = i + 1;
+    for (child_msg, (duration, count)) in short_children.iter() {
+        let millis = duration.as_millis();
+        writeln!(out, "    {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
+            .expect("printing profiling info");
    }
-    short.sort_by_key(|(msg, _time)| *msg);
-    for (msg, entires) in short.iter().group_by(|(msg, _time)| msg).into_iter() {
+    let unaccounted_millis = (current.duration - accounted_for).as_millis();
-        let mut count = 0;
+    if !children_indices.is_empty()
-        let mut total_duration = Duration::default();
+        && unaccounted_millis > 0
-        entires.for_each(|(_msg, time)| {
+        && unaccounted_millis > longer_than_millis
-            count += 1;
+    {
-            total_duration += *time;
+        writeln!(out, "    {}{:5}ms - ???", current_indent, unaccounted_millis)
-        });
+            .expect("printing profiling info");
-        writeln!(out, "{}{:5}ms - {} ({} calls)", indent, total_duration.as_millis(), msg, count)
-            .expect("printing profiling info to stdout");
    }
+}
-    if let Some(total) = total {
+/// Returns a mapping from an index in the `msgs` to the vector with the indices of its children.
-        if let Some(unaccounted) = total.checked_sub(accounted_for) {
+///
-            let unaccounted_millis = unaccounted.as_millis();
+/// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish.
-            if unaccounted_millis > longer_than.as_millis() && unaccounted_millis > 0 {
+/// In other words, a postorder of the call graph. In particular, the root is the last element of
-                writeln!(out, "{}{:5}ms - ???", indent, unaccounted_millis)
+/// `msgs`.
-                    .expect("printing profiling info to stdout");
+fn idx_to_children(msgs: &[Message]) -> Vec<Vec<usize>> {
-            }
+    // Initialize with the index of the root; `msgs` and `ancestors` should be never empty.
+    assert!(!msgs.is_empty());
+    let mut ancestors = vec![msgs.len() - 1];
+    let mut result: Vec<Vec<usize>> = vec![vec![]; msgs.len()];
+    for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() {
+        // We need to find the parent of the current message, i.e., the last ancestor that has a
+        // level lower than the current message.
+        while msgs[*ancestors.last().unwrap()].level >= msg.level {
+            ancestors.pop();
        }
+        result[*ancestors.last().unwrap()].push(idx);
+        ancestors.push(idx);
+    }
+    // Note that above we visited all children from the last to the first one. Let's reverse vectors
+    // to get the more natural order where the first element is the first child.
+    for vec in result.iter_mut() {
+        vec.reverse();
    }
+    result
 }
 /// Prints backtrace to stderr, useful for debugging.
@@ -388,7 +413,7 @@ mod tests {
            Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() },
            Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() },
        ];
-        print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));
+        print(&msgs, Duration::from_millis(0), &mut result);
        // The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed
        // when printing.
        assert_eq!(
@@ -404,7 +429,7 @@ mod tests {
            Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() },
            Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() },
        ];
-        print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));
+        print(&msgs, Duration::from_millis(0), &mut result);
        assert_eq!(
            std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
            vec![
@@ -426,7 +451,7 @@ mod tests {
            Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() },
            Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() },
        ];
-        print(0, &msgs, &mut result, Duration::from_millis(0), None);
+        print(&msgs, Duration::from_millis(0), &mut result);
        assert_eq!(
            std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
            vec![
author	bors[bot] <26634292+bors[bot]@users.noreply.github.com>	2020-01-29 20:09:49 +0000
committer	GitHub <[email protected]>	2020-01-29 20:09:49 +0000
commit	5dcd9fdf5e95573de9bc46f32b86d6fbc5e7c18d (patch)
tree	0dd24c080fb5052428023373cb3109bd566f8f83 /crates/ra_prof/src
parent	d92a15c1637860a669970142abe37d1daf5f2c73 (diff)
parent	644c383f65324b5fe1ada42680923c2529accb26 (diff)

diff --git a/crates/ra_prof/src/lib.rs b/crates/ra_prof/src/lib.rs index c7973ddf4..4a49e9f95 100644 --- a/crates/ra_prof/src/lib.rs +++ b/crates/ra_prof/src/lib.rs
@@ -6,9 +6,9 @@ mod google_cpu_profiler;
6		6
7	use std::{	7	use std::{
8	cell::RefCell,	8	cell::RefCell,
		9	collections::BTreeMap,
9	collections::HashSet,	10	collections::HashSet,
10	io::{stderr, Write},	11	io::{stderr, Write},
11	iter::repeat,
12	mem,	12	mem,
13	sync::{	13	sync::{
14	atomic::{AtomicBool, Ordering},	14	atomic::{AtomicBool, Ordering},
@@ -17,7 +17,6 @@ use std::{
17	time::{Duration, Instant},	17	time::{Duration, Instant},
18	};	18	};
19		19
20	use itertools::Itertools;
21	use once_cell::sync::Lazy;	20	use once_cell::sync::Lazy;
22		21
23	pub use crate::memory_usage::{Bytes, MemoryUsage};	22	pub use crate::memory_usage::{Bytes, MemoryUsage};
@@ -216,7 +215,7 @@ impl Drop for Profiler {
216	// (otherwise we could print `0ms` despite user's `>0` filter when	215	// (otherwise we could print `0ms` despite user's `>0` filter when
217	// `duration` is just a few nanos).	216	// `duration` is just a few nanos).
218	if duration.as_millis() > longer_than.as_millis() {	217	if duration.as_millis() > longer_than.as_millis() {
219	print(0, &stack.messages, &mut stdout.lock(), longer_than, None);	218	print(&stack.messages, longer_than, &mut stdout.lock());
220	}	219	}
221	stack.messages.clear();	220	stack.messages.clear();
222	}	221	}
@@ -227,59 +226,85 @@ impl Drop for Profiler {
227	}	226	}
228	}	227	}
229		228
230	fn print(	229	fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) {
231	lvl: usize,
232	msgs: &[Message],
233	out: &mut impl Write,
234	longer_than: Duration,
235	total: Option<Duration>,
236	) {
237	if msgs.is_empty() {	230	if msgs.is_empty() {
238	return;	231	return;
239	}	232	}
240	// The index of the first element that will be included in the slice when we recurse.	233	let children_map = idx_to_children(msgs);
241	let mut next_start = 0;	234	let root_idx = msgs.len() - 1;
242	let indent = repeat(" ").take(lvl).collect::<String>();	235	print_for_idx(root_idx, &children_map, msgs, longer_than, out);
243	// We output hierarchy for long calls, but sum up all short calls	236	}
244	let mut short = Vec::new();	237
		238	fn print_for_idx(
		239	current_idx: usize,
		240	children_map: &[Vec<usize>],
		241	msgs: &[Message],
		242	longer_than: Duration,
		243	out: &mut impl Write,
		244	) {
		245	let current = &msgs[current_idx];
		246	let current_indent = " ".repeat(current.level);
		247	writeln!(out, "{}{:5}ms - {}", current_indent, current.duration.as_millis(), current.message)
		248	.expect("printing profiling info");
		249
		250	let longer_than_millis = longer_than.as_millis();
		251	let children_indices = &children_map[current_idx];
245	let mut accounted_for = Duration::default();	252	let mut accounted_for = Duration::default();
246	for (i, &Message { level, duration, message: ref msg }) in msgs.iter().enumerate() {	253	let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
247	if level != lvl {
248	continue;
249	}
250	accounted_for += duration;
251	if duration.as_millis() > longer_than.as_millis() {
252	writeln!(out, "{}{:5}ms - {}", indent, duration.as_millis(), msg)
253	.expect("printing profiling info to stdout");
254		254
255	print(lvl + 1, &msgs[next_start..i], out, longer_than, Some(duration));	255	for child_idx in children_indices.iter() {
		256	let child = &msgs[*child_idx];
		257	if child.duration.as_millis() > longer_than_millis {
		258	print_for_idx(*child_idx, children_map, msgs, longer_than, out);
256	} else {	259	} else {
257	short.push((msg, duration))	260	let pair = short_children.entry(&child.message).or_insert((Duration::default(), 0));
		261	pair.0 += child.duration;
		262	pair.1 += 1;
258	}	263	}
		264	accounted_for += child.duration;
		265	}
259		266
260	next_start = i + 1;	267	for (child_msg, (duration, count)) in short_children.iter() {
		268	let millis = duration.as_millis();
		269	writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
		270	.expect("printing profiling info");
261	}	271	}
262	short.sort_by_key(\|(msg, _time)\| *msg);	272
263	for (msg, entires) in short.iter().group_by(\|(msg, _time)\| msg).into_iter() {	273	let unaccounted_millis = (current.duration - accounted_for).as_millis();
264	let mut count = 0;	274	if !children_indices.is_empty()
265	let mut total_duration = Duration::default();	275	&& unaccounted_millis > 0
266	entires.for_each(\|(_msg, time)\| {	276	&& unaccounted_millis > longer_than_millis
267	count += 1;	277	{
268	total_duration += *time;	278	writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted_millis)
269	});	279	.expect("printing profiling info");
270	writeln!(out, "{}{:5}ms - {} ({} calls)", indent, total_duration.as_millis(), msg, count)
271	.expect("printing profiling info to stdout");
272	}	280	}
		281	}
273		282
274	if let Some(total) = total {	283	/// Returns a mapping from an index in the `msgs` to the vector with the indices of its children.
275	if let Some(unaccounted) = total.checked_sub(accounted_for) {	284	///
276	let unaccounted_millis = unaccounted.as_millis();	285	/// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish.
277	if unaccounted_millis > longer_than.as_millis() && unaccounted_millis > 0 {	286	/// In other words, a postorder of the call graph. In particular, the root is the last element of
278	writeln!(out, "{}{:5}ms - ???", indent, unaccounted_millis)	287	/// `msgs`.
279	.expect("printing profiling info to stdout");	288	fn idx_to_children(msgs: &[Message]) -> Vec<Vec<usize>> {
280	}	289	// Initialize with the index of the root; `msgs` and `ancestors` should be never empty.
		290	assert!(!msgs.is_empty());
		291	let mut ancestors = vec![msgs.len() - 1];
		292	let mut result: Vec<Vec<usize>> = vec![vec![]; msgs.len()];
		293	for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() {
		294	// We need to find the parent of the current message, i.e., the last ancestor that has a
		295	// level lower than the current message.
		296	while msgs[*ancestors.last().unwrap()].level >= msg.level {
		297	ancestors.pop();
281	}	298	}
		299	result[*ancestors.last().unwrap()].push(idx);
		300	ancestors.push(idx);
		301	}
		302	// Note that above we visited all children from the last to the first one. Let's reverse vectors
		303	// to get the more natural order where the first element is the first child.
		304	for vec in result.iter_mut() {
		305	vec.reverse();
282	}	306	}
		307	result
283	}	308	}
284		309
285	/// Prints backtrace to stderr, useful for debugging.	310	/// Prints backtrace to stderr, useful for debugging.
@@ -388,7 +413,7 @@ mod tests {
388	Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() },	413	Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() },
389	Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() },	414	Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() },
390	];	415	];
391	print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));	416	print(&msgs, Duration::from_millis(0), &mut result);
392	// The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed	417	// The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed
393	// when printing.	418	// when printing.
394	assert_eq!(	419	assert_eq!(
@@ -404,7 +429,7 @@ mod tests {
404	Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() },	429	Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() },
405	Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() },	430	Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() },
406	];	431	];
407	print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));	432	print(&msgs, Duration::from_millis(0), &mut result);
408	assert_eq!(	433	assert_eq!(
409	std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),	434	std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
410	vec![	435	vec![
@@ -426,7 +451,7 @@ mod tests {
426	Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() },	451	Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() },
427	Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() },	452	Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() },
428	];	453	];
429	print(0, &msgs, &mut result, Duration::from_millis(0), None);	454	print(&msgs, Duration::from_millis(0), &mut result);
430	assert_eq!(	455	assert_eq!(
431	std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),	456	std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
432	vec![	457	vec![