//! FIXME: write short doc here mod memory_usage; #[cfg(feature = "cpu_profiler")] mod google_cpu_profiler; use std::{ cell::RefCell, collections::BTreeMap, collections::HashSet, io::{stderr, Write}, mem, sync::{ atomic::{AtomicBool, Ordering}, RwLock, }, time::{Duration, Instant}, }; use once_cell::sync::Lazy; pub use crate::memory_usage::{Bytes, MemoryUsage}; // We use jemalloc mainly to get heap usage statistics, actual performance // difference is not measures. #[cfg(all(feature = "jemalloc", not(target_env = "msvc")))] #[global_allocator] static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; /// Set profiling filter. It specifies descriptions allowed to profile. /// This is helpful when call stack has too many nested profiling scopes. /// Additionally filter can specify maximum depth of profiling scopes nesting. /// /// #Example /// ``` /// use ra_prof::{set_filter, Filter}; /// let f = Filter::from_spec("profile1|profile2@2"); /// set_filter(f); /// ``` pub fn set_filter(f: Filter) { PROFILING_ENABLED.store(f.depth > 0, Ordering::SeqCst); let set: HashSet<_> = f.allowed.iter().cloned().collect(); let mut old = FILTER.write().unwrap(); let filter_data = FilterData { depth: f.depth, allowed: set, longer_than: f.longer_than, version: old.version + 1, }; *old = filter_data; } /// This function starts a profiling scope in the current execution stack with a given description. /// It returns a Profile structure and measure elapsed time between this method invocation and Profile structure drop. /// It supports nested profiling scopes in case when this function invoked multiple times at the execution stack. In this case the profiling information will be nested at the output. /// Profiling information is being printed in the stderr. /// /// # Example /// ``` /// use ra_prof::{profile, set_filter, Filter}; /// /// let f = Filter::from_spec("profile1|profile2@2"); /// set_filter(f); /// profiling_function1(); /// /// fn profiling_function1() { /// let _p = profile("profile1"); /// profiling_function2(); /// } /// /// fn profiling_function2() { /// let _p = profile("profile2"); /// } /// ``` /// This will print in the stderr the following: /// ```text /// 0ms - profile /// 0ms - profile2 /// ``` pub fn profile(desc: &str) -> Profiler { assert!(!desc.is_empty()); if !PROFILING_ENABLED.load(Ordering::Relaxed) { return Profiler { desc: None }; } PROFILE_STACK.with(|stack| { let mut stack = stack.borrow_mut(); if stack.starts.is_empty() { if let Ok(f) = FILTER.try_read() { if f.version > stack.filter_data.version { stack.filter_data = f.clone(); } }; } if stack.starts.len() > stack.filter_data.depth { return Profiler { desc: None }; } let allowed = &stack.filter_data.allowed; if stack.starts.is_empty() && !allowed.is_empty() && !allowed.contains(desc) { return Profiler { desc: None }; } stack.starts.push(Instant::now()); Profiler { desc: Some(desc.to_string()) } }) } pub fn print_time(desc: &str) -> impl Drop + '_ { struct Guard<'a> { desc: &'a str, start: Instant, } impl Drop for Guard<'_> { fn drop(&mut self) { eprintln!("{}: {:?}", self.desc, self.start.elapsed()) } } Guard { desc, start: Instant::now() } } pub struct Profiler { desc: Option, } pub struct Filter { depth: usize, allowed: Vec, longer_than: Duration, } impl Filter { // Filtering syntax // env RA_PROFILE=* // dump everything // env RA_PROFILE=foo|bar|baz // enabled only selected entries // env RA_PROFILE=*@3>10 // dump everything, up to depth 3, if it takes more than 10 ms pub fn from_spec(mut spec: &str) -> Filter { let longer_than = if let Some(idx) = spec.rfind('>') { let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than"); spec = &spec[..idx]; Duration::from_millis(longer_than) } else { Duration::new(0, 0) }; let depth = if let Some(idx) = spec.rfind('@') { let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth"); spec = &spec[..idx]; depth } else { 999 }; let allowed = if spec == "*" { Vec::new() } else { spec.split('|').map(String::from).collect() }; Filter::new(depth, allowed, longer_than) } pub fn disabled() -> Filter { Filter::new(0, Vec::new(), Duration::new(0, 0)) } pub fn new(depth: usize, allowed: Vec, longer_than: Duration) -> Filter { Filter { depth, allowed, longer_than } } } struct ProfileStack { starts: Vec, messages: Vec, filter_data: FilterData, } struct Message { level: usize, duration: Duration, message: String, } impl ProfileStack { fn new() -> ProfileStack { ProfileStack { starts: Vec::new(), messages: Vec::new(), filter_data: Default::default() } } } #[derive(Default, Clone)] struct FilterData { depth: usize, version: usize, allowed: HashSet, longer_than: Duration, } static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false); static FILTER: Lazy> = Lazy::new(Default::default); thread_local!(static PROFILE_STACK: RefCell = RefCell::new(ProfileStack::new())); impl Drop for Profiler { fn drop(&mut self) { match self { Profiler { desc: Some(desc) } => { PROFILE_STACK.with(|stack| { let mut stack = stack.borrow_mut(); let start = stack.starts.pop().unwrap(); let duration = start.elapsed(); let level = stack.starts.len(); let message = mem::replace(desc, String::new()); stack.messages.push(Message { level, duration, message }); if level == 0 { let stdout = stderr(); let longer_than = stack.filter_data.longer_than; // Convert to millis for comparison to avoid problems with rounding // (otherwise we could print `0ms` despite user's `>0` filter when // `duration` is just a few nanos). if duration.as_millis() > longer_than.as_millis() { print(&stack.messages, longer_than, &mut stdout.lock()); } stack.messages.clear(); } }); } Profiler { desc: None } => (), } } } fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) { if msgs.is_empty() { return; } let children_map = idx_to_children(msgs); let root_idx = msgs.len() - 1; print_for_idx(root_idx, &children_map, msgs, longer_than, out); } fn print_for_idx( current_idx: usize, children_map: &[Vec], msgs: &[Message], longer_than: Duration, out: &mut impl Write, ) { let current = &msgs[current_idx]; let current_indent = " ".repeat(current.level); writeln!(out, "{}{:5}ms - {}", current_indent, current.duration.as_millis(), current.message) .expect("printing profiling info"); let longer_than_millis = longer_than.as_millis(); let children_indices = &children_map[current_idx]; let mut accounted_for = Duration::default(); let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output. for child_idx in children_indices.iter() { let child = &msgs[*child_idx]; if child.duration.as_millis() > longer_than_millis { print_for_idx(*child_idx, children_map, msgs, longer_than, out); } else { let pair = short_children.entry(&child.message).or_insert((Duration::default(), 0)); pair.0 += child.duration; pair.1 += 1; } accounted_for += child.duration; } for (child_msg, (duration, count)) in short_children.iter() { let millis = duration.as_millis(); writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count) .expect("printing profiling info"); } let unaccounted_millis = (current.duration - accounted_for).as_millis(); if !children_indices.is_empty() && unaccounted_millis > 0 && unaccounted_millis > longer_than_millis { writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted_millis) .expect("printing profiling info"); } } /// Returns a mapping from an index in the `msgs` to the vector with the indices of its children. /// /// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish. /// In other words, a postorder of the call graph. In particular, the root is the last element of /// `msgs`. fn idx_to_children(msgs: &[Message]) -> Vec> { // Initialize with the index of the root; `msgs` and `ancestors` should be never empty. assert!(!msgs.is_empty()); let mut ancestors = vec![msgs.len() - 1]; let mut result: Vec> = vec![vec![]; msgs.len()]; for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() { // We need to find the parent of the current message, i.e., the last ancestor that has a // level lower than the current message. while msgs[*ancestors.last().unwrap()].level >= msg.level { ancestors.pop(); } result[*ancestors.last().unwrap()].push(idx); ancestors.push(idx); } // Note that above we visited all children from the last to the first one. Let's reverse vectors // to get the more natural order where the first element is the first child. for vec in result.iter_mut() { vec.reverse(); } result } /// Prints backtrace to stderr, useful for debugging. pub fn print_backtrace() { let bt = backtrace::Backtrace::new(); eprintln!("{:?}", bt); } thread_local!(static IN_SCOPE: RefCell = RefCell::new(false)); /// Allows to check if the current code is withing some dynamic scope, can be /// useful during debugging to figure out why a function is called. pub struct Scope { prev: bool, } impl Scope { pub fn enter() -> Scope { let prev = IN_SCOPE.with(|slot| std::mem::replace(&mut *slot.borrow_mut(), true)); Scope { prev } } pub fn is_active() -> bool { IN_SCOPE.with(|slot| *slot.borrow()) } } impl Drop for Scope { fn drop(&mut self) { IN_SCOPE.with(|slot| *slot.borrow_mut() = self.prev); } } /// A wrapper around google_cpu_profiler. /// /// Usage: /// 1. Install gpref_tools (https://github.com/gperftools/gperftools), probably packaged with your Linux distro. /// 2. Build with `cpu_profiler` feature. /// 3. Tun the code, the *raw* output would be in the `./out.profile` file. /// 4. Install pprof for visualization (https://github.com/google/pprof). /// 5. Use something like `pprof -svg target/release/ra_cli ./out.profile` to see the results. /// /// For example, here's how I run profiling on NixOS: /// /// ```bash /// $ nix-shell -p gperftools --run \ /// 'cargo run --release -p ra_cli -- parse < ~/projects/rustbench/parser.rs > /dev/null' /// ``` #[derive(Debug)] pub struct CpuProfiler { _private: (), } pub fn cpu_profiler() -> CpuProfiler { #[cfg(feature = "cpu_profiler")] { google_cpu_profiler::start("./out.profile".as_ref()) } #[cfg(not(feature = "cpu_profiler"))] { eprintln!("cpu_profiler feature is disabled") } CpuProfiler { _private: () } } impl Drop for CpuProfiler { fn drop(&mut self) { #[cfg(feature = "cpu_profiler")] { google_cpu_profiler::stop() } } } pub fn memory_usage() -> MemoryUsage { MemoryUsage::current() } #[cfg(test)] mod tests { use super::*; #[test] fn test_basic_profile() { let s = vec!["profile1".to_string(), "profile2".to_string()]; let f = Filter::new(2, s, Duration::new(0, 0)); set_filter(f); profiling_function1(); } fn profiling_function1() { let _p = profile("profile1"); profiling_function2(); } fn profiling_function2() { let _p = profile("profile2"); } #[test] fn test_longer_than() { let mut result = vec![]; let msgs = vec![ Message { level: 1, duration: Duration::from_nanos(3), message: "bar".to_owned() }, Message { level: 1, duration: Duration::from_nanos(2), message: "bar".to_owned() }, Message { level: 0, duration: Duration::from_millis(1), message: "foo".to_owned() }, ]; print(&msgs, Duration::from_millis(0), &mut result); // The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed // when printing. assert_eq!( std::str::from_utf8(&result).unwrap(), " 1ms - foo\n 0ms - bar (2 calls)\n" ); } #[test] fn test_unaccounted_for_topmost() { let mut result = vec![]; let msgs = vec![ Message { level: 1, duration: Duration::from_millis(2), message: "bar".to_owned() }, Message { level: 0, duration: Duration::from_millis(5), message: "foo".to_owned() }, ]; print(&msgs, Duration::from_millis(0), &mut result); assert_eq!( std::str::from_utf8(&result).unwrap().lines().collect::>(), vec![ " 5ms - foo", " 2ms - bar", " 3ms - ???", // Dummy comment to improve formatting ] ); } #[test] fn test_unaccounted_for_multiple_levels() { let mut result = vec![]; let msgs = vec![ Message { level: 2, duration: Duration::from_millis(3), message: "baz".to_owned() }, Message { level: 1, duration: Duration::from_millis(5), message: "bar".to_owned() }, Message { level: 2, duration: Duration::from_millis(2), message: "baz".to_owned() }, Message { level: 1, duration: Duration::from_millis(4), message: "bar".to_owned() }, Message { level: 0, duration: Duration::from_millis(9), message: "foo".to_owned() }, ]; print(&msgs, Duration::from_millis(0), &mut result); assert_eq!( std::str::from_utf8(&result).unwrap().lines().collect::>(), vec![ " 9ms - foo", " 5ms - bar", " 3ms - baz", " 2ms - ???", " 4ms - bar", " 2ms - baz", " 2ms - ???", ] ); } }