Rewrite ra_prof's profile printing

This changes the way we print things to first construct a mapping from events to the children and uses that mapping to actually print things. It should not change the actual output that we produce. The new approach two benefits: * It avoids a potential quadratic behavior of the previous approach. For instance, for a vector of N elements: ``` [Message{level: (N - 1)}, ..., Message{level: 1}, Message{level: 0}] ``` we would first do a linear scan to find entry with level 0, then another scan to find one with level 1, etc. * It makes it much easier to improve the output in the future, because we now pre-compute the children for each entry and can easily take that into account when printing. Signed-off-by: Michal Terepeta <[email protected]>
author: Michal Terepeta <[email protected]> 2020-01-19 15:07:15 +0000
committer: Michal Terepeta <[email protected]> 2020-01-22 19:13:56 +0000
commit: 39cbb6b620244fcf921f45669ed40cc6ebdf6520 (patch)
tree: e8516f6712b932bf51e558302cec4e1621255fd7 /crates
parent: 9b1465af322e311152a23c1f61f967bda2bffcee (diff)
1 files changed, 71 insertions, 46 deletions
diff --git a/crates/ra_prof/src/lib.rs b/crates/ra_prof/src/lib.rs
index da541005a..d95e7eb1b 100644
--- a/crates/ra_prof/src/lib.rs
+++ b/crates/ra_prof/src/lib.rs
@@ -6,9 +6,9 @@ mod google_cpu_profiler;
 use std::{
    cell::RefCell,
+    collections::BTreeMap,
    collections::HashSet,
    io::{stderr, Write},
-    iter::repeat,
    mem,
    sync::{
        atomic::{AtomicBool, Ordering},
@@ -17,7 +17,6 @@ use std::{
    time::{Duration, Instant},
 };
-use itertools::Itertools;
 use once_cell::sync::Lazy;
 pub use crate::memory_usage::{Bytes, MemoryUsage};
@@ -201,7 +200,7 @@ impl Drop for Profiler {
                        // (otherwise we could print `0ms` despite user's `>0` filter when
                        // `duration` is just a few nanos).
                        if duration.as_millis() > longer_than.as_millis() {
-                            print(0, &stack.messages, &mut stdout.lock(), longer_than, None);
+                            print(&stack.messages, longer_than, &mut stdout.lock());
                        }
                        stack.messages.clear();
                    }
@@ -212,59 +211,85 @@ impl Drop for Profiler {
    }
 }
-fn print(
+fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) {
-    lvl: usize,
-    msgs: &[Message],
-    out: &mut impl Write,
-    longer_than: Duration,
-    total: Option<Duration>,
-) {
    if msgs.is_empty() {
        return;
    }
-    // The index of the first element that will be included in the slice when we recurse.
+    let children_map = idx_to_children(msgs);
-    let mut next_start = 0;
+    let root_idx = msgs.len() - 1;
-    let indent = repeat("    ").take(lvl).collect::<String>();
+    print_for_idx(root_idx, &children_map, msgs, longer_than, out);
-    // We output hierarchy for long calls, but sum up all short calls
+}
-    let mut short = Vec::new();
+fn print_for_idx(
+    current_idx: usize,
+    children_map: &[Vec<usize>],
+    msgs: &[Message],
+    longer_than: Duration,
+    out: &mut impl Write,
+) {
+    let current = &msgs[current_idx];
+    let current_indent = "    ".repeat(current.level);
+    writeln!(out, "{}{:5}ms - {}", current_indent, current.duration.as_millis(), current.message)
+        .expect("printing profiling info");
+    let longer_than_millis = longer_than.as_millis();
+    let children_indices = &children_map[current_idx];
    let mut accounted_for = Duration::default();
-    for (i, &Message { level, duration, message: ref msg }) in msgs.iter().enumerate() {
+    let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
-        if level != lvl {
-            continue;
-        }
-        accounted_for += duration;
-        if duration.as_millis() > longer_than.as_millis() {
-            writeln!(out, "{}{:5}ms - {}", indent, duration.as_millis(), msg)
-                .expect("printing profiling info to stdout");
-            print(lvl + 1, &msgs[next_start..i], out, longer_than, Some(duration));
+    for child_idx in children_indices.iter() {
+        let child = &msgs[*child_idx];
+        if child.duration.as_millis() > longer_than_millis {
+            print_for_idx(*child_idx, children_map, msgs, longer_than, out);
        } else {
-            short.push((msg, duration))
+            let pair = short_children.entry(&child.message).or_insert((Duration::default(), 0));
+            pair.0 += child.duration;
+            pair.1 += 1;
        }
+        accounted_for += child.duration;
+    }
-        next_start = i + 1;
+    for (child_msg, (duration, count)) in short_children.iter() {
+        let millis = duration.as_millis();
+        writeln!(out, "    {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
+            .expect("printing profiling info");
    }
-    short.sort_by_key(|(msg, _time)| *msg);
-    for (msg, entires) in short.iter().group_by(|(msg, _time)| msg).into_iter() {
+    let unaccounted_millis = (current.duration - accounted_for).as_millis();
-        let mut count = 0;
+    if !children_indices.is_empty()
-        let mut total_duration = Duration::default();
+        && unaccounted_millis > 0
-        entires.for_each(|(_msg, time)| {
+        && unaccounted_millis > longer_than_millis
-            count += 1;
+    {
-            total_duration += *time;
+        writeln!(out, "    {}{:5}ms - ???", current_indent, unaccounted_millis)
-        });
+            .expect("printing profiling info");
-        writeln!(out, "{}{:5}ms - {} ({} calls)", indent, total_duration.as_millis(), msg, count)
-            .expect("printing profiling info to stdout");
    }
+}
-    if let Some(total) = total {
+/// Returns a mapping from an index in the `msgs` to the vector with the indices of its children.
-        if let Some(unaccounted) = total.checked_sub(accounted_for) {
+///
-            let unaccounted_millis = unaccounted.as_millis();
+/// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish.
-            if unaccounted_millis > longer_than.as_millis() && unaccounted_millis > 0 {
+/// In other words, a postorder of the call graph. In particular, the root is the last element of
-                writeln!(out, "{}{:5}ms - ???", indent, unaccounted_millis)
+/// `msgs`.
-                    .expect("printing profiling info to stdout");
+fn idx_to_children(msgs: &[Message]) -> Vec<Vec<usize>> {
-            }
+    // Initialize with the index of the root; `ancestors` should be never empty.
+    let mut ancestors = vec![msgs.len() - 1];
+    let mut result: Vec<Vec<usize>> = vec![];
+    result.resize_with(msgs.len(), Default::default);
+    for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() {
+        // We need to find the parent of the current message, i.e., the last ancestor that has a
+        // level lower than the current message.
+        while msgs[*ancestors.last().unwrap()].level >= msg.level {
+            ancestors.pop();
        }
+        result[*ancestors.last().unwrap()].push(idx);
+        ancestors.push(idx);
+    }
+    // Note that above we visited all children from the last to the first one. Let's reverse vectors
+    // to get the more natural order where the first element is the first child.
+    for vec in result.iter_mut() {
+        vec.reverse();
    }
+    result
 }
 /// Prints backtrace to stderr, useful for debugging.
@@ -373,7 +398,7 @@ mod tests {
            Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() },
            Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() },
        ];
-        print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));
+        print(&msgs, Duration::from_millis(0), &mut result);
        // The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed
        // when printing.
        assert_eq!(
@@ -389,7 +414,7 @@ mod tests {
            Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() },
            Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() },
        ];
-        print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));
+        print(&msgs, Duration::from_millis(0), &mut result);
        assert_eq!(
            std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
            vec![
@@ -411,7 +436,7 @@ mod tests {
            Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() },
            Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() },
        ];
-        print(0, &msgs, &mut result, Duration::from_millis(0), None);
+        print(&msgs, Duration::from_millis(0), &mut result);
        assert_eq!(
            std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
            vec![
author	Michal Terepeta <[email protected]>	2020-01-19 15:07:15 +0000
committer	Michal Terepeta <[email protected]>	2020-01-22 19:13:56 +0000
commit	39cbb6b620244fcf921f45669ed40cc6ebdf6520 (patch)
tree	e8516f6712b932bf51e558302cec4e1621255fd7 /crates
parent	9b1465af322e311152a23c1f61f967bda2bffcee (diff)

diff --git a/crates/ra_prof/src/lib.rs b/crates/ra_prof/src/lib.rs index da541005a..d95e7eb1b 100644 --- a/crates/ra_prof/src/lib.rs +++ b/crates/ra_prof/src/lib.rs
@@ -6,9 +6,9 @@ mod google_cpu_profiler;
6		6
7	use std::{	7	use std::{
8	cell::RefCell,	8	cell::RefCell,
		9	collections::BTreeMap,
9	collections::HashSet,	10	collections::HashSet,
10	io::{stderr, Write},	11	io::{stderr, Write},
11	iter::repeat,
12	mem,	12	mem,
13	sync::{	13	sync::{
14	atomic::{AtomicBool, Ordering},	14	atomic::{AtomicBool, Ordering},
@@ -17,7 +17,6 @@ use std::{
17	time::{Duration, Instant},	17	time::{Duration, Instant},
18	};	18	};
19		19
20	use itertools::Itertools;
21	use once_cell::sync::Lazy;	20	use once_cell::sync::Lazy;
22		21
23	pub use crate::memory_usage::{Bytes, MemoryUsage};	22	pub use crate::memory_usage::{Bytes, MemoryUsage};
@@ -201,7 +200,7 @@ impl Drop for Profiler {
201	// (otherwise we could print `0ms` despite user's `>0` filter when	200	// (otherwise we could print `0ms` despite user's `>0` filter when
202	// `duration` is just a few nanos).	201	// `duration` is just a few nanos).
203	if duration.as_millis() > longer_than.as_millis() {	202	if duration.as_millis() > longer_than.as_millis() {
204	print(0, &stack.messages, &mut stdout.lock(), longer_than, None);	203	print(&stack.messages, longer_than, &mut stdout.lock());
205	}	204	}
206	stack.messages.clear();	205	stack.messages.clear();
207	}	206	}
@@ -212,59 +211,85 @@ impl Drop for Profiler {
212	}	211	}
213	}	212	}
214		213
215	fn print(	214	fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) {
216	lvl: usize,
217	msgs: &[Message],
218	out: &mut impl Write,
219	longer_than: Duration,
220	total: Option<Duration>,
221	) {
222	if msgs.is_empty() {	215	if msgs.is_empty() {
223	return;	216	return;
224	}	217	}
225	// The index of the first element that will be included in the slice when we recurse.	218	let children_map = idx_to_children(msgs);
226	let mut next_start = 0;	219	let root_idx = msgs.len() - 1;
227	let indent = repeat(" ").take(lvl).collect::<String>();	220	print_for_idx(root_idx, &children_map, msgs, longer_than, out);
228	// We output hierarchy for long calls, but sum up all short calls	221	}
229	let mut short = Vec::new();	222
		223	fn print_for_idx(
		224	current_idx: usize,
		225	children_map: &[Vec<usize>],
		226	msgs: &[Message],
		227	longer_than: Duration,
		228	out: &mut impl Write,
		229	) {
		230	let current = &msgs[current_idx];
		231	let current_indent = " ".repeat(current.level);
		232	writeln!(out, "{}{:5}ms - {}", current_indent, current.duration.as_millis(), current.message)
		233	.expect("printing profiling info");
		234
		235	let longer_than_millis = longer_than.as_millis();
		236	let children_indices = &children_map[current_idx];
230	let mut accounted_for = Duration::default();	237	let mut accounted_for = Duration::default();
231	for (i, &Message { level, duration, message: ref msg }) in msgs.iter().enumerate() {	238	let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
232	if level != lvl {
233	continue;
234	}
235	accounted_for += duration;
236	if duration.as_millis() > longer_than.as_millis() {
237	writeln!(out, "{}{:5}ms - {}", indent, duration.as_millis(), msg)
238	.expect("printing profiling info to stdout");
239		239
240	print(lvl + 1, &msgs[next_start..i], out, longer_than, Some(duration));	240	for child_idx in children_indices.iter() {
		241	let child = &msgs[*child_idx];
		242	if child.duration.as_millis() > longer_than_millis {
		243	print_for_idx(*child_idx, children_map, msgs, longer_than, out);
241	} else {	244	} else {
242	short.push((msg, duration))	245	let pair = short_children.entry(&child.message).or_insert((Duration::default(), 0));
		246	pair.0 += child.duration;
		247	pair.1 += 1;
243	}	248	}
		249	accounted_for += child.duration;
		250	}
244		251
245	next_start = i + 1;	252	for (child_msg, (duration, count)) in short_children.iter() {
		253	let millis = duration.as_millis();
		254	writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
		255	.expect("printing profiling info");
246	}	256	}
247	short.sort_by_key(\|(msg, _time)\| *msg);	257
248	for (msg, entires) in short.iter().group_by(\|(msg, _time)\| msg).into_iter() {	258	let unaccounted_millis = (current.duration - accounted_for).as_millis();
249	let mut count = 0;	259	if !children_indices.is_empty()
250	let mut total_duration = Duration::default();	260	&& unaccounted_millis > 0
251	entires.for_each(\|(_msg, time)\| {	261	&& unaccounted_millis > longer_than_millis
252	count += 1;	262	{
253	total_duration += *time;	263	writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted_millis)
254	});	264	.expect("printing profiling info");
255	writeln!(out, "{}{:5}ms - {} ({} calls)", indent, total_duration.as_millis(), msg, count)
256	.expect("printing profiling info to stdout");
257	}	265	}
		266	}
258		267
259	if let Some(total) = total {	268	/// Returns a mapping from an index in the `msgs` to the vector with the indices of its children.
260	if let Some(unaccounted) = total.checked_sub(accounted_for) {	269	///
261	let unaccounted_millis = unaccounted.as_millis();	270	/// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish.
262	if unaccounted_millis > longer_than.as_millis() && unaccounted_millis > 0 {	271	/// In other words, a postorder of the call graph. In particular, the root is the last element of
263	writeln!(out, "{}{:5}ms - ???", indent, unaccounted_millis)	272	/// `msgs`.
264	.expect("printing profiling info to stdout");	273	fn idx_to_children(msgs: &[Message]) -> Vec<Vec<usize>> {
265	}	274	// Initialize with the index of the root; `ancestors` should be never empty.
		275	let mut ancestors = vec![msgs.len() - 1];
		276	let mut result: Vec<Vec<usize>> = vec![];
		277	result.resize_with(msgs.len(), Default::default);
		278	for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() {
		279	// We need to find the parent of the current message, i.e., the last ancestor that has a
		280	// level lower than the current message.
		281	while msgs[*ancestors.last().unwrap()].level >= msg.level {
		282	ancestors.pop();
266	}	283	}
		284	result[*ancestors.last().unwrap()].push(idx);
		285	ancestors.push(idx);
		286	}
		287	// Note that above we visited all children from the last to the first one. Let's reverse vectors
		288	// to get the more natural order where the first element is the first child.
		289	for vec in result.iter_mut() {
		290	vec.reverse();
267	}	291	}
		292	result
268	}	293	}
269		294
270	/// Prints backtrace to stderr, useful for debugging.	295	/// Prints backtrace to stderr, useful for debugging.
@@ -373,7 +398,7 @@ mod tests {
373	Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() },	398	Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() },
374	Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() },	399	Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() },
375	];	400	];
376	print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));	401	print(&msgs, Duration::from_millis(0), &mut result);
377	// The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed	402	// The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed
378	// when printing.	403	// when printing.
379	assert_eq!(	404	assert_eq!(
@@ -389,7 +414,7 @@ mod tests {
389	Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() },	414	Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() },
390	Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() },	415	Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() },
391	];	416	];
392	print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1)));	417	print(&msgs, Duration::from_millis(0), &mut result);
393	assert_eq!(	418	assert_eq!(
394	std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),	419	std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
395	vec![	420	vec![
@@ -411,7 +436,7 @@ mod tests {
411	Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() },	436	Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() },
412	Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() },	437	Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() },
413	];	438	];
414	print(0, &msgs, &mut result, Duration::from_millis(0), None);	439	print(&msgs, Duration::from_millis(0), &mut result);
415	assert_eq!(	440	assert_eq!(
416	std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),	441	std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(),
417	vec![	442	vec![