From 39cbb6b620244fcf921f45669ed40cc6ebdf6520 Mon Sep 17 00:00:00 2001 From: Michal Terepeta Date: Sun, 19 Jan 2020 16:07:15 +0100 Subject: Rewrite ra_prof's profile printing This changes the way we print things to first construct a mapping from events to the children and uses that mapping to actually print things. It should not change the actual output that we produce. The new approach two benefits: * It avoids a potential quadratic behavior of the previous approach. For instance, for a vector of N elements: ``` [Message{level: (N - 1)}, ..., Message{level: 1}, Message{level: 0}] ``` we would first do a linear scan to find entry with level 0, then another scan to find one with level 1, etc. * It makes it much easier to improve the output in the future, because we now pre-compute the children for each entry and can easily take that into account when printing. Signed-off-by: Michal Terepeta --- crates/ra_prof/src/lib.rs | 117 ++++++++++++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 46 deletions(-) (limited to 'crates/ra_prof/src/lib.rs') diff --git a/crates/ra_prof/src/lib.rs b/crates/ra_prof/src/lib.rs index da541005a..d95e7eb1b 100644 --- a/crates/ra_prof/src/lib.rs +++ b/crates/ra_prof/src/lib.rs @@ -6,9 +6,9 @@ mod google_cpu_profiler; use std::{ cell::RefCell, + collections::BTreeMap, collections::HashSet, io::{stderr, Write}, - iter::repeat, mem, sync::{ atomic::{AtomicBool, Ordering}, @@ -17,7 +17,6 @@ use std::{ time::{Duration, Instant}, }; -use itertools::Itertools; use once_cell::sync::Lazy; pub use crate::memory_usage::{Bytes, MemoryUsage}; @@ -201,7 +200,7 @@ impl Drop for Profiler { // (otherwise we could print `0ms` despite user's `>0` filter when // `duration` is just a few nanos). if duration.as_millis() > longer_than.as_millis() { - print(0, &stack.messages, &mut stdout.lock(), longer_than, None); + print(&stack.messages, longer_than, &mut stdout.lock()); } stack.messages.clear(); } @@ -212,59 +211,85 @@ impl Drop for Profiler { } } -fn print( - lvl: usize, - msgs: &[Message], - out: &mut impl Write, - longer_than: Duration, - total: Option, -) { +fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) { if msgs.is_empty() { return; } - // The index of the first element that will be included in the slice when we recurse. - let mut next_start = 0; - let indent = repeat(" ").take(lvl).collect::(); - // We output hierarchy for long calls, but sum up all short calls - let mut short = Vec::new(); + let children_map = idx_to_children(msgs); + let root_idx = msgs.len() - 1; + print_for_idx(root_idx, &children_map, msgs, longer_than, out); +} + +fn print_for_idx( + current_idx: usize, + children_map: &[Vec], + msgs: &[Message], + longer_than: Duration, + out: &mut impl Write, +) { + let current = &msgs[current_idx]; + let current_indent = " ".repeat(current.level); + writeln!(out, "{}{:5}ms - {}", current_indent, current.duration.as_millis(), current.message) + .expect("printing profiling info"); + + let longer_than_millis = longer_than.as_millis(); + let children_indices = &children_map[current_idx]; let mut accounted_for = Duration::default(); - for (i, &Message { level, duration, message: ref msg }) in msgs.iter().enumerate() { - if level != lvl { - continue; - } - accounted_for += duration; - if duration.as_millis() > longer_than.as_millis() { - writeln!(out, "{}{:5}ms - {}", indent, duration.as_millis(), msg) - .expect("printing profiling info to stdout"); + let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output. - print(lvl + 1, &msgs[next_start..i], out, longer_than, Some(duration)); + for child_idx in children_indices.iter() { + let child = &msgs[*child_idx]; + if child.duration.as_millis() > longer_than_millis { + print_for_idx(*child_idx, children_map, msgs, longer_than, out); } else { - short.push((msg, duration)) + let pair = short_children.entry(&child.message).or_insert((Duration::default(), 0)); + pair.0 += child.duration; + pair.1 += 1; } + accounted_for += child.duration; + } - next_start = i + 1; + for (child_msg, (duration, count)) in short_children.iter() { + let millis = duration.as_millis(); + writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count) + .expect("printing profiling info"); } - short.sort_by_key(|(msg, _time)| *msg); - for (msg, entires) in short.iter().group_by(|(msg, _time)| msg).into_iter() { - let mut count = 0; - let mut total_duration = Duration::default(); - entires.for_each(|(_msg, time)| { - count += 1; - total_duration += *time; - }); - writeln!(out, "{}{:5}ms - {} ({} calls)", indent, total_duration.as_millis(), msg, count) - .expect("printing profiling info to stdout"); + + let unaccounted_millis = (current.duration - accounted_for).as_millis(); + if !children_indices.is_empty() + && unaccounted_millis > 0 + && unaccounted_millis > longer_than_millis + { + writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted_millis) + .expect("printing profiling info"); } +} - if let Some(total) = total { - if let Some(unaccounted) = total.checked_sub(accounted_for) { - let unaccounted_millis = unaccounted.as_millis(); - if unaccounted_millis > longer_than.as_millis() && unaccounted_millis > 0 { - writeln!(out, "{}{:5}ms - ???", indent, unaccounted_millis) - .expect("printing profiling info to stdout"); - } +/// Returns a mapping from an index in the `msgs` to the vector with the indices of its children. +/// +/// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish. +/// In other words, a postorder of the call graph. In particular, the root is the last element of +/// `msgs`. +fn idx_to_children(msgs: &[Message]) -> Vec> { + // Initialize with the index of the root; `ancestors` should be never empty. + let mut ancestors = vec![msgs.len() - 1]; + let mut result: Vec> = vec![]; + result.resize_with(msgs.len(), Default::default); + for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() { + // We need to find the parent of the current message, i.e., the last ancestor that has a + // level lower than the current message. + while msgs[*ancestors.last().unwrap()].level >= msg.level { + ancestors.pop(); } + result[*ancestors.last().unwrap()].push(idx); + ancestors.push(idx); + } + // Note that above we visited all children from the last to the first one. Let's reverse vectors + // to get the more natural order where the first element is the first child. + for vec in result.iter_mut() { + vec.reverse(); } + result } /// Prints backtrace to stderr, useful for debugging. @@ -373,7 +398,7 @@ mod tests { Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() }, Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() }, ]; - print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1))); + print(&msgs, Duration::from_millis(0), &mut result); // The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed // when printing. assert_eq!( @@ -389,7 +414,7 @@ mod tests { Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() }, Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() }, ]; - print(0, &msgs, &mut result, Duration::from_millis(0), Some(Duration::from_millis(1))); + print(&msgs, Duration::from_millis(0), &mut result); assert_eq!( std::str::from_utf8(&result).unwrap().lines().collect::>(), vec![ @@ -411,7 +436,7 @@ mod tests { Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() }, Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() }, ]; - print(0, &msgs, &mut result, Duration::from_millis(0), None); + print(&msgs, Duration::from_millis(0), &mut result); assert_eq!( std::str::from_utf8(&result).unwrap().lines().collect::>(), vec![ -- cgit v1.2.3 From 6577a7622d5053f35c16eed47516265c98e5212e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 25 Jan 2020 11:59:11 +0100 Subject: Add print_time helper --- crates/ra_prof/src/lib.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'crates/ra_prof/src/lib.rs') diff --git a/crates/ra_prof/src/lib.rs b/crates/ra_prof/src/lib.rs index da541005a..c7973ddf4 100644 --- a/crates/ra_prof/src/lib.rs +++ b/crates/ra_prof/src/lib.rs @@ -106,6 +106,21 @@ pub fn profile(desc: &str) -> Profiler { }) } +pub fn print_time(desc: &str) -> impl Drop + '_ { + struct Guard<'a> { + desc: &'a str, + start: Instant, + } + + impl Drop for Guard<'_> { + fn drop(&mut self) { + eprintln!("{}: {:?}", self.desc, self.start.elapsed()) + } + } + + Guard { desc, start: Instant::now() } +} + pub struct Profiler { desc: Option, } -- cgit v1.2.3 From 644c383f65324b5fe1ada42680923c2529accb26 Mon Sep 17 00:00:00 2001 From: Michal Terepeta Date: Wed, 29 Jan 2020 20:55:35 +0100 Subject: A couple of small improvements to ra_prof printing Based on suggestions from @matklad. Signed-off-by: Michal Terepeta --- crates/ra_prof/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'crates/ra_prof/src/lib.rs') diff --git a/crates/ra_prof/src/lib.rs b/crates/ra_prof/src/lib.rs index d95e7eb1b..e2c32c130 100644 --- a/crates/ra_prof/src/lib.rs +++ b/crates/ra_prof/src/lib.rs @@ -271,10 +271,10 @@ fn print_for_idx( /// In other words, a postorder of the call graph. In particular, the root is the last element of /// `msgs`. fn idx_to_children(msgs: &[Message]) -> Vec> { - // Initialize with the index of the root; `ancestors` should be never empty. + // Initialize with the index of the root; `msgs` and `ancestors` should be never empty. + assert!(!msgs.is_empty()); let mut ancestors = vec![msgs.len() - 1]; - let mut result: Vec> = vec![]; - result.resize_with(msgs.len(), Default::default); + let mut result: Vec> = vec![vec![]; msgs.len()]; for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() { // We need to find the parent of the current message, i.e., the last ancestor that has a // level lower than the current message. -- cgit v1.2.3 From 24ad1cce2c3cf2c0ce8288fc02c4c55529598990 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 2 Feb 2020 18:54:26 +0100 Subject: Avoid premature pessimization The extra allocation for message should not matter here at all, but using a static string is just as ergonomic, if not more, and there's no reason to write deliberately slow code --- crates/ra_prof/src/lib.rs | 64 +++++++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'crates/ra_prof/src/lib.rs') diff --git a/crates/ra_prof/src/lib.rs b/crates/ra_prof/src/lib.rs index 4a49e9f95..d38ff397e 100644 --- a/crates/ra_prof/src/lib.rs +++ b/crates/ra_prof/src/lib.rs @@ -9,7 +9,6 @@ use std::{ collections::BTreeMap, collections::HashSet, io::{stderr, Write}, - mem, sync::{ atomic::{AtomicBool, Ordering}, RwLock, @@ -50,6 +49,8 @@ pub fn set_filter(f: Filter) { *old = filter_data; } +pub type Label = &'static str; + /// This function starts a profiling scope in the current execution stack with a given description. /// It returns a Profile structure and measure elapsed time between this method invocation and Profile structure drop. /// It supports nested profiling scopes in case when this function invoked multiple times at the execution stack. In this case the profiling information will be nested at the output. @@ -77,10 +78,10 @@ pub fn set_filter(f: Filter) { /// 0ms - profile /// 0ms - profile2 /// ``` -pub fn profile(desc: &str) -> Profiler { - assert!(!desc.is_empty()); +pub fn profile(label: Label) -> Profiler { + assert!(!label.is_empty()); if !PROFILING_ENABLED.load(Ordering::Relaxed) { - return Profiler { desc: None }; + return Profiler { label: None }; } PROFILE_STACK.with(|stack| { @@ -93,35 +94,35 @@ pub fn profile(desc: &str) -> Profiler { }; } if stack.starts.len() > stack.filter_data.depth { - return Profiler { desc: None }; + return Profiler { label: None }; } let allowed = &stack.filter_data.allowed; - if stack.starts.is_empty() && !allowed.is_empty() && !allowed.contains(desc) { - return Profiler { desc: None }; + if stack.starts.is_empty() && !allowed.is_empty() && !allowed.contains(label) { + return Profiler { label: None }; } stack.starts.push(Instant::now()); - Profiler { desc: Some(desc.to_string()) } + Profiler { label: Some(label) } }) } -pub fn print_time(desc: &str) -> impl Drop + '_ { - struct Guard<'a> { - desc: &'a str, +pub fn print_time(label: Label) -> impl Drop { + struct Guard { + label: Label, start: Instant, } - impl Drop for Guard<'_> { + impl Drop for Guard { fn drop(&mut self) { - eprintln!("{}: {:?}", self.desc, self.start.elapsed()) + eprintln!("{}: {:?}", self.label, self.start.elapsed()) } } - Guard { desc, start: Instant::now() } + Guard { label, start: Instant::now() } } pub struct Profiler { - desc: Option, + label: Option