From 208b7bd7ba687fb570feb1b89219f14c63712ce8 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 12 Aug 2020 16:32:36 +0200 Subject: Rename ra_prof -> profile --- crates/profile/src/google_cpu_profiler.rs | 39 +++++ crates/profile/src/hprof.rs | 240 ++++++++++++++++++++++++++++++ crates/profile/src/lib.rs | 109 ++++++++++++++ crates/profile/src/memory_usage.rs | 75 ++++++++++ crates/profile/src/stop_watch.rs | 86 +++++++++++ crates/profile/src/tree.rs | 84 +++++++++++ 6 files changed, 633 insertions(+) create mode 100644 crates/profile/src/google_cpu_profiler.rs create mode 100644 crates/profile/src/hprof.rs create mode 100644 crates/profile/src/lib.rs create mode 100644 crates/profile/src/memory_usage.rs create mode 100644 crates/profile/src/stop_watch.rs create mode 100644 crates/profile/src/tree.rs (limited to 'crates/profile/src') diff --git a/crates/profile/src/google_cpu_profiler.rs b/crates/profile/src/google_cpu_profiler.rs new file mode 100644 index 000000000..db865c65b --- /dev/null +++ b/crates/profile/src/google_cpu_profiler.rs @@ -0,0 +1,39 @@ +//! https://github.com/gperftools/gperftools + +use std::{ + ffi::CString, + os::raw::c_char, + path::Path, + sync::atomic::{AtomicUsize, Ordering}, +}; + +#[link(name = "profiler")] +#[allow(non_snake_case)] +extern "C" { + fn ProfilerStart(fname: *const c_char) -> i32; + fn ProfilerStop(); +} + +static PROFILER_STATE: AtomicUsize = AtomicUsize::new(OFF); +const OFF: usize = 0; +const ON: usize = 1; +const PENDING: usize = 2; + +pub fn start(path: &Path) { + if PROFILER_STATE.compare_and_swap(OFF, PENDING, Ordering::SeqCst) != OFF { + panic!("profiler already started"); + } + let path = CString::new(path.display().to_string()).unwrap(); + if unsafe { ProfilerStart(path.as_ptr()) } == 0 { + panic!("profiler failed to start") + } + assert!(PROFILER_STATE.compare_and_swap(PENDING, ON, Ordering::SeqCst) == PENDING); +} + +pub fn stop() { + if PROFILER_STATE.compare_and_swap(ON, PENDING, Ordering::SeqCst) != ON { + panic!("profiler is not started") + } + unsafe { ProfilerStop() }; + assert!(PROFILER_STATE.compare_and_swap(PENDING, OFF, Ordering::SeqCst) == PENDING); +} diff --git a/crates/profile/src/hprof.rs b/crates/profile/src/hprof.rs new file mode 100644 index 000000000..934cc8e37 --- /dev/null +++ b/crates/profile/src/hprof.rs @@ -0,0 +1,240 @@ +//! Simple hierarchical profiler +use once_cell::sync::Lazy; +use std::{ + cell::RefCell, + collections::{BTreeMap, HashSet}, + io::{stderr, Write}, + sync::{ + atomic::{AtomicBool, Ordering}, + RwLock, + }, + time::{Duration, Instant}, +}; + +use crate::tree::{Idx, Tree}; + +/// Filtering syntax +/// env RA_PROFILE=* // dump everything +/// env RA_PROFILE=foo|bar|baz // enabled only selected entries +/// env RA_PROFILE=*@3>10 // dump everything, up to depth 3, if it takes more than 10 ms +pub fn init() { + let spec = std::env::var("RA_PROFILE").unwrap_or_default(); + init_from(&spec); +} + +pub fn init_from(spec: &str) { + let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) }; + filter.install(); +} + +pub type Label = &'static str; + +/// This function starts a profiling scope in the current execution stack with a given description. +/// It returns a `Profile` struct that measures elapsed time between this method invocation and `Profile` struct drop. +/// It supports nested profiling scopes in case when this function is invoked multiple times at the execution stack. +/// In this case the profiling information will be nested at the output. +/// Profiling information is being printed in the stderr. +/// +/// # Example +/// ``` +/// profile::init_from("profile1|profile2@2"); +/// profiling_function1(); +/// +/// fn profiling_function1() { +/// let _p = profile::span("profile1"); +/// profiling_function2(); +/// } +/// +/// fn profiling_function2() { +/// let _p = profile::span("profile2"); +/// } +/// ``` +/// This will print in the stderr the following: +/// ```text +/// 0ms - profile +/// 0ms - profile2 +/// ``` +pub fn span(label: Label) -> ProfileSpan { + assert!(!label.is_empty()); + + if PROFILING_ENABLED.load(Ordering::Relaxed) + && PROFILE_STACK.with(|stack| stack.borrow_mut().push(label)) + { + ProfileSpan(Some(ProfilerImpl { label, detail: None })) + } else { + ProfileSpan(None) + } +} + +pub struct ProfileSpan(Option); + +struct ProfilerImpl { + label: Label, + detail: Option, +} + +impl ProfileSpan { + pub fn detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan { + if let Some(profiler) = &mut self.0 { + profiler.detail = Some(detail()) + } + self + } +} + +impl Drop for ProfilerImpl { + fn drop(&mut self) { + PROFILE_STACK.with(|it| it.borrow_mut().pop(self.label, self.detail.take())); + } +} + +static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false); +static FILTER: Lazy> = Lazy::new(Default::default); +thread_local!(static PROFILE_STACK: RefCell = RefCell::new(ProfileStack::new())); + +#[derive(Default, Clone, Debug)] +struct Filter { + depth: usize, + allowed: HashSet, + longer_than: Duration, + version: usize, +} + +impl Filter { + fn disabled() -> Filter { + Filter::default() + } + + fn from_spec(mut spec: &str) -> Filter { + let longer_than = if let Some(idx) = spec.rfind('>') { + let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than"); + spec = &spec[..idx]; + Duration::from_millis(longer_than) + } else { + Duration::new(0, 0) + }; + + let depth = if let Some(idx) = spec.rfind('@') { + let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth"); + spec = &spec[..idx]; + depth + } else { + 999 + }; + let allowed = + if spec == "*" { HashSet::new() } else { spec.split('|').map(String::from).collect() }; + Filter { depth, allowed, longer_than, version: 0 } + } + + fn install(mut self) { + PROFILING_ENABLED.store(self.depth > 0, Ordering::SeqCst); + let mut old = FILTER.write().unwrap(); + self.version = old.version + 1; + *old = self; + } +} + +struct ProfileStack { + starts: Vec, + filter: Filter, + messages: Tree, +} + +#[derive(Default)] +struct Message { + duration: Duration, + label: Label, + detail: Option, +} + +impl ProfileStack { + fn new() -> ProfileStack { + ProfileStack { starts: Vec::new(), messages: Tree::default(), filter: Default::default() } + } + + fn push(&mut self, label: Label) -> bool { + if self.starts.is_empty() { + if let Ok(f) = FILTER.try_read() { + if f.version > self.filter.version { + self.filter = f.clone(); + } + }; + } + if self.starts.len() > self.filter.depth { + return false; + } + let allowed = &self.filter.allowed; + if self.starts.is_empty() && !allowed.is_empty() && !allowed.contains(label) { + return false; + } + + self.starts.push(Instant::now()); + self.messages.start(); + true + } + + pub fn pop(&mut self, label: Label, detail: Option) { + let start = self.starts.pop().unwrap(); + let duration = start.elapsed(); + self.messages.finish(Message { duration, label, detail }); + if self.starts.is_empty() { + let longer_than = self.filter.longer_than; + // Convert to millis for comparison to avoid problems with rounding + // (otherwise we could print `0ms` despite user's `>0` filter when + // `duration` is just a few nanos). + if duration.as_millis() > longer_than.as_millis() { + if let Some(root) = self.messages.root() { + print(&self.messages, root, 0, longer_than, &mut stderr().lock()); + } + } + self.messages.clear(); + } + } +} + +fn print( + tree: &Tree, + curr: Idx, + level: u32, + longer_than: Duration, + out: &mut impl Write, +) { + let current_indent = " ".repeat(level as usize); + let detail = tree[curr].detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default(); + writeln!( + out, + "{}{:5}ms - {}{}", + current_indent, + tree[curr].duration.as_millis(), + tree[curr].label, + detail, + ) + .expect("printing profiling info"); + + let mut accounted_for = Duration::default(); + let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output. + for child in tree.children(curr) { + accounted_for += tree[child].duration; + + if tree[child].duration.as_millis() > longer_than.as_millis() { + print(tree, child, level + 1, longer_than, out) + } else { + let (total_duration, cnt) = + short_children.entry(tree[child].label).or_insert((Duration::default(), 0)); + *total_duration += tree[child].duration; + *cnt += 1; + } + } + + for (child_msg, (duration, count)) in short_children.iter() { + let millis = duration.as_millis(); + writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count) + .expect("printing profiling info"); + } + + let unaccounted = tree[curr].duration - accounted_for; + if tree.children(curr).next().is_some() && unaccounted > longer_than { + writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted.as_millis()) + .expect("printing profiling info"); + } +} diff --git a/crates/profile/src/lib.rs b/crates/profile/src/lib.rs new file mode 100644 index 000000000..ab19271c7 --- /dev/null +++ b/crates/profile/src/lib.rs @@ -0,0 +1,109 @@ +//! A collection of tools for profiling rust-analyzer. + +mod stop_watch; +mod memory_usage; +#[cfg(feature = "cpu_profiler")] +mod google_cpu_profiler; +mod hprof; +mod tree; + +use std::cell::RefCell; + +pub use crate::{ + hprof::{init, init_from, span}, + memory_usage::{Bytes, MemoryUsage}, + stop_watch::{StopWatch, StopWatchSpan}, +}; + +/// Prints backtrace to stderr, useful for debugging. +#[cfg(feature = "backtrace")] +pub fn print_backtrace() { + let bt = backtrace::Backtrace::new(); + eprintln!("{:?}", bt); +} +#[cfg(not(feature = "backtrace"))] +pub fn print_backtrace() { + eprintln!( + r#"enable the backtrace feature: + profile = {{ path = "../profile", features = [ "backtrace"] }} +"# + ); +} + +thread_local!(static IN_SCOPE: RefCell = RefCell::new(false)); + +/// Allows to check if the current code is withing some dynamic scope, can be +/// useful during debugging to figure out why a function is called. +pub struct Scope { + prev: bool, +} + +impl Scope { + #[must_use] + pub fn enter() -> Scope { + let prev = IN_SCOPE.with(|slot| std::mem::replace(&mut *slot.borrow_mut(), true)); + Scope { prev } + } + pub fn is_active() -> bool { + IN_SCOPE.with(|slot| *slot.borrow()) + } +} + +impl Drop for Scope { + fn drop(&mut self) { + IN_SCOPE.with(|slot| *slot.borrow_mut() = self.prev); + } +} + +/// A wrapper around google_cpu_profiler. +/// +/// Usage: +/// 1. Install gpref_tools (https://github.com/gperftools/gperftools), probably packaged with your Linux distro. +/// 2. Build with `cpu_profiler` feature. +/// 3. Tun the code, the *raw* output would be in the `./out.profile` file. +/// 4. Install pprof for visualization (https://github.com/google/pprof). +/// 5. Bump sampling frequency to once per ms: `export CPUPROFILE_FREQUENCY=1000` +/// 6. Use something like `pprof -svg target/release/rust-analyzer ./out.profile` to see the results. +/// +/// For example, here's how I run profiling on NixOS: +/// +/// ```bash +/// $ nix-shell -p gperftools --run \ +/// 'cargo run --release -p rust-analyzer -- parse < ~/projects/rustbench/parser.rs > /dev/null' +/// ``` +/// +/// See this diff for how to profile completions: +/// +/// https://github.com/rust-analyzer/rust-analyzer/pull/5306 +#[derive(Debug)] +pub struct CpuSpan { + _private: (), +} + +#[must_use] +pub fn cpu_span() -> CpuSpan { + #[cfg(feature = "cpu_profiler")] + { + google_cpu_profiler::start("./out.profile".as_ref()) + } + + #[cfg(not(feature = "cpu_profiler"))] + { + eprintln!("cpu_profiler feature is disabled") + } + + CpuSpan { _private: () } +} + +impl Drop for CpuSpan { + fn drop(&mut self) { + #[cfg(feature = "cpu_profiler")] + { + google_cpu_profiler::stop() + } + } +} + +pub fn memory_usage() -> MemoryUsage { + MemoryUsage::current() +} diff --git a/crates/profile/src/memory_usage.rs b/crates/profile/src/memory_usage.rs new file mode 100644 index 000000000..83390212a --- /dev/null +++ b/crates/profile/src/memory_usage.rs @@ -0,0 +1,75 @@ +//! FIXME: write short doc here +use std::fmt; + +use cfg_if::cfg_if; + +#[derive(Copy, Clone)] +pub struct MemoryUsage { + pub allocated: Bytes, +} + +impl fmt::Display for MemoryUsage { + fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { + write!(fmt, "{}", self.allocated) + } +} + +impl std::ops::Sub for MemoryUsage { + type Output = MemoryUsage; + fn sub(self, rhs: MemoryUsage) -> MemoryUsage { + MemoryUsage { allocated: self.allocated - rhs.allocated } + } +} + +impl MemoryUsage { + pub fn current() -> MemoryUsage { + cfg_if! { + if #[cfg(all(target_os = "linux", target_env = "gnu"))] { + // Note: This is incredibly slow. + let alloc = unsafe { libc::mallinfo() }.uordblks as isize; + MemoryUsage { allocated: Bytes(alloc) } + } else { + MemoryUsage { allocated: Bytes(0) } + } + } + } +} + +#[derive(Default, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)] +pub struct Bytes(isize); + +impl Bytes { + pub fn megabytes(self) -> isize { + self.0 / 1024 / 1024 + } +} + +impl fmt::Display for Bytes { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let bytes = self.0; + let mut value = bytes; + let mut suffix = "b"; + if value.abs() > 4096 { + value /= 1024; + suffix = "kb"; + if value.abs() > 4096 { + value /= 1024; + suffix = "mb"; + } + } + f.pad(&format!("{}{}", value, suffix)) + } +} + +impl std::ops::AddAssign for Bytes { + fn add_assign(&mut self, x: usize) { + self.0 += x as isize; + } +} + +impl std::ops::Sub for Bytes { + type Output = Bytes; + fn sub(self, rhs: Bytes) -> Bytes { + Bytes(self.0 - rhs.0) + } +} diff --git a/crates/profile/src/stop_watch.rs b/crates/profile/src/stop_watch.rs new file mode 100644 index 000000000..5e276190e --- /dev/null +++ b/crates/profile/src/stop_watch.rs @@ -0,0 +1,86 @@ +//! Like `std::time::Instant`, but also measures memory & CPU cycles. +use std::{ + fmt, + time::{Duration, Instant}, +}; + +use crate::MemoryUsage; + +pub struct StopWatch { + time: Instant, + #[cfg(target_os = "linux")] + counter: Option, + memory: Option, +} + +pub struct StopWatchSpan { + pub time: Duration, + pub instructions: Option, + pub memory: Option, +} + +impl StopWatch { + pub fn start() -> StopWatch { + #[cfg(target_os = "linux")] + let counter = { + let mut counter = perf_event::Builder::new() + .build() + .map_err(|err| eprintln!("Failed to create perf counter: {}", err)) + .ok(); + if let Some(counter) = &mut counter { + if let Err(err) = counter.enable() { + eprintln!("Failed to start perf counter: {}", err) + } + } + counter + }; + let time = Instant::now(); + StopWatch { + time, + #[cfg(target_os = "linux")] + counter, + memory: None, + } + } + pub fn memory(mut self, yes: bool) -> StopWatch { + if yes { + self.memory = Some(MemoryUsage::current()); + } + self + } + pub fn elapsed(&mut self) -> StopWatchSpan { + let time = self.time.elapsed(); + + #[cfg(target_os = "linux")] + let instructions = self.counter.as_mut().and_then(|it| { + it.read().map_err(|err| eprintln!("Failed to read perf counter: {}", err)).ok() + }); + #[cfg(not(target_os = "linux"))] + let instructions = None; + + let memory = self.memory.map(|it| MemoryUsage::current() - it); + StopWatchSpan { time, instructions, memory } + } +} + +impl fmt::Display for StopWatchSpan { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:.2?}", self.time)?; + if let Some(mut instructions) = self.instructions { + let mut prefix = ""; + if instructions > 10000 { + instructions /= 1000; + prefix = "k" + } + if instructions > 10000 { + instructions /= 1000; + prefix = "m" + } + write!(f, ", {}{}i", instructions, prefix)?; + } + if let Some(memory) = self.memory { + write!(f, ", {}", memory)?; + } + Ok(()) + } +} diff --git a/crates/profile/src/tree.rs b/crates/profile/src/tree.rs new file mode 100644 index 000000000..096f58511 --- /dev/null +++ b/crates/profile/src/tree.rs @@ -0,0 +1,84 @@ +//! A simple tree implementation which tries to not allocate all over the place. +use std::ops; + +use arena::Arena; + +#[derive(Default)] +pub struct Tree { + nodes: Arena>, + current_path: Vec<(Idx, Option>)>, +} + +pub type Idx = arena::Idx>; + +impl Tree { + pub fn start(&mut self) + where + T: Default, + { + let me = self.nodes.alloc(Node::new(T::default())); + if let Some((parent, last_child)) = self.current_path.last_mut() { + let slot = match *last_child { + Some(last_child) => &mut self.nodes[last_child].next_sibling, + None => &mut self.nodes[*parent].first_child, + }; + let prev = slot.replace(me); + assert!(prev.is_none()); + *last_child = Some(me); + } + + self.current_path.push((me, None)); + } + + pub fn finish(&mut self, data: T) { + let (me, _last_child) = self.current_path.pop().unwrap(); + self.nodes[me].data = data; + } + + pub fn root(&self) -> Option> { + self.nodes.iter().next().map(|(idx, _)| idx) + } + + pub fn children(&self, idx: Idx) -> impl Iterator> + '_ { + NodeIter { nodes: &self.nodes, next: self.nodes[idx].first_child } + } + pub fn clear(&mut self) { + self.nodes.clear(); + self.current_path.clear(); + } +} + +impl ops::Index> for Tree { + type Output = T; + fn index(&self, index: Idx) -> &T { + &self.nodes[index].data + } +} + +pub struct Node { + data: T, + first_child: Option>, + next_sibling: Option>, +} + +impl Node { + fn new(data: T) -> Node { + Node { data, first_child: None, next_sibling: None } + } +} + +struct NodeIter<'a, T> { + nodes: &'a Arena>, + next: Option>, +} + +impl<'a, T> Iterator for NodeIter<'a, T> { + type Item = Idx; + + fn next(&mut self) -> Option> { + self.next.map(|next| { + self.next = self.nodes[next].next_sibling; + next + }) + } +} -- cgit v1.2.3