Rename ra_prof -> profile

author: Aleksey Kladov <[email protected]> 2020-08-12 15:32:36 +0100
committer: Aleksey Kladov <[email protected]> 2020-08-12 15:35:29 +0100
commit: 208b7bd7ba687fb570feb1b89219f14c63712ce8 (patch)
tree: c39749d1b71b73b4017c2d0d848ebdc85e570c39 /crates/profile/src
parent: 98baa9b569b49162392ed4149dd435854fe941b8 (diff)
6 files changed, 633 insertions, 0 deletions
diff --git a/crates/profile/src/google_cpu_profiler.rs b/crates/profile/src/google_cpu_profiler.rs
new file mode 100644
index 000000000..db865c65b
--- /dev/null
+++ b/crates/profile/src/google_cpu_profiler.rs
@@ -0,0 +1,39 @@
+//! https://github.com/gperftools/gperftools
+use std::{
+    ffi::CString,
+    os::raw::c_char,
+    path::Path,
+    sync::atomic::{AtomicUsize, Ordering},
+};
+#[link(name = "profiler")]
+#[allow(non_snake_case)]
+extern "C" {
+    fn ProfilerStart(fname: *const c_char) -> i32;
+    fn ProfilerStop();
+}
+static PROFILER_STATE: AtomicUsize = AtomicUsize::new(OFF);
+const OFF: usize = 0;
+const ON: usize = 1;
+const PENDING: usize = 2;
+pub fn start(path: &Path) {
+    if PROFILER_STATE.compare_and_swap(OFF, PENDING, Ordering::SeqCst) != OFF {
+        panic!("profiler already started");
+    }
+    let path = CString::new(path.display().to_string()).unwrap();
+    if unsafe { ProfilerStart(path.as_ptr()) } == 0 {
+        panic!("profiler failed to start")
+    }
+    assert!(PROFILER_STATE.compare_and_swap(PENDING, ON, Ordering::SeqCst) == PENDING);
+}
+pub fn stop() {
+    if PROFILER_STATE.compare_and_swap(ON, PENDING, Ordering::SeqCst) != ON {
+        panic!("profiler is not started")
+    }
+    unsafe { ProfilerStop() };
+    assert!(PROFILER_STATE.compare_and_swap(PENDING, OFF, Ordering::SeqCst) == PENDING);
+}
diff --git a/crates/profile/src/hprof.rs b/crates/profile/src/hprof.rs
new file mode 100644
index 000000000..934cc8e37
--- /dev/null
+++ b/crates/profile/src/hprof.rs
@@ -0,0 +1,240 @@
+//! Simple hierarchical profiler
+use once_cell::sync::Lazy;
+use std::{
+    cell::RefCell,
+    collections::{BTreeMap, HashSet},
+    io::{stderr, Write},
+    sync::{
+        atomic::{AtomicBool, Ordering},
+        RwLock,
+    },
+    time::{Duration, Instant},
+};
+use crate::tree::{Idx, Tree};
+/// Filtering syntax
+/// env RA_PROFILE=*             // dump everything
+/// env RA_PROFILE=foo|bar|baz   // enabled only selected entries
+/// env RA_PROFILE=*@3>10        // dump everything, up to depth 3, if it takes more than 10 ms
+pub fn init() {
+    let spec = std::env::var("RA_PROFILE").unwrap_or_default();
+    init_from(&spec);
+}
+pub fn init_from(spec: &str) {
+    let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) };
+    filter.install();
+}
+pub type Label = &'static str;
+/// This function starts a profiling scope in the current execution stack with a given description.
+/// It returns a `Profile` struct that measures elapsed time between this method invocation and `Profile` struct drop.
+/// It supports nested profiling scopes in case when this function is invoked multiple times at the execution stack.
+/// In this case the profiling information will be nested at the output.
+/// Profiling information is being printed in the stderr.
+///
+/// # Example
+/// ```
+/// profile::init_from("profile1|profile2@2");
+/// profiling_function1();
+///
+/// fn profiling_function1() {
+///     let _p = profile::span("profile1");
+///     profiling_function2();
+/// }
+///
+/// fn profiling_function2() {
+///     let _p = profile::span("profile2");
+/// }
+/// ```
+/// This will print in the stderr the following:
+/// ```text
+///  0ms - profile
+///      0ms - profile2
+/// ```
+pub fn span(label: Label) -> ProfileSpan {
+    assert!(!label.is_empty());
+    if PROFILING_ENABLED.load(Ordering::Relaxed)
+        && PROFILE_STACK.with(|stack| stack.borrow_mut().push(label))
+    {
+        ProfileSpan(Some(ProfilerImpl { label, detail: None }))
+    } else {
+        ProfileSpan(None)
+    }
+}
+pub struct ProfileSpan(Option<ProfilerImpl>);
+struct ProfilerImpl {
+    label: Label,
+    detail: Option<String>,
+}
+impl ProfileSpan {
+    pub fn detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan {
+        if let Some(profiler) = &mut self.0 {
+            profiler.detail = Some(detail())
+        }
+        self
+    }
+}
+impl Drop for ProfilerImpl {
+    fn drop(&mut self) {
+        PROFILE_STACK.with(|it| it.borrow_mut().pop(self.label, self.detail.take()));
+    }
+}
+static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
+static FILTER: Lazy<RwLock<Filter>> = Lazy::new(Default::default);
+thread_local!(static PROFILE_STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new()));
+#[derive(Default, Clone, Debug)]
+struct Filter {
+    depth: usize,
+    allowed: HashSet<String>,
+    longer_than: Duration,
+    version: usize,
+}
+impl Filter {
+    fn disabled() -> Filter {
+        Filter::default()
+    }
+    fn from_spec(mut spec: &str) -> Filter {
+        let longer_than = if let Some(idx) = spec.rfind('>') {
+            let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than");
+            spec = &spec[..idx];
+            Duration::from_millis(longer_than)
+        } else {
+            Duration::new(0, 0)
+        };
+        let depth = if let Some(idx) = spec.rfind('@') {
+            let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth");
+            spec = &spec[..idx];
+            depth
+        } else {
+            999
+        };
+        let allowed =
+            if spec == "*" { HashSet::new() } else { spec.split('|').map(String::from).collect() };
+        Filter { depth, allowed, longer_than, version: 0 }
+    }
+    fn install(mut self) {
+        PROFILING_ENABLED.store(self.depth > 0, Ordering::SeqCst);
+        let mut old = FILTER.write().unwrap();
+        self.version = old.version + 1;
+        *old = self;
+    }
+}
+struct ProfileStack {
+    starts: Vec<Instant>,
+    filter: Filter,
+    messages: Tree<Message>,
+}
+#[derive(Default)]
+struct Message {
+    duration: Duration,
+    label: Label,
+    detail: Option<String>,
+}
+impl ProfileStack {
+    fn new() -> ProfileStack {
+        ProfileStack { starts: Vec::new(), messages: Tree::default(), filter: Default::default() }
+    }
+    fn push(&mut self, label: Label) -> bool {
+        if self.starts.is_empty() {
+            if let Ok(f) = FILTER.try_read() {
+                if f.version > self.filter.version {
+                    self.filter = f.clone();
+                }
+            };
+        }
+        if self.starts.len() > self.filter.depth {
+            return false;
+        }
+        let allowed = &self.filter.allowed;
+        if self.starts.is_empty() && !allowed.is_empty() && !allowed.contains(label) {
+            return false;
+        }
+        self.starts.push(Instant::now());
+        self.messages.start();
+        true
+    }
+    pub fn pop(&mut self, label: Label, detail: Option<String>) {
+        let start = self.starts.pop().unwrap();
+        let duration = start.elapsed();
+        self.messages.finish(Message { duration, label, detail });
+        if self.starts.is_empty() {
+            let longer_than = self.filter.longer_than;
+            // Convert to millis for comparison to avoid problems with rounding
+            // (otherwise we could print `0ms` despite user's `>0` filter when
+            // `duration` is just a few nanos).
+            if duration.as_millis() > longer_than.as_millis() {
+                if let Some(root) = self.messages.root() {
+                    print(&self.messages, root, 0, longer_than, &mut stderr().lock());
+                }
+            }
+            self.messages.clear();
+        }
+    }
+}
+fn print(
+    tree: &Tree<Message>,
+    curr: Idx<Message>,
+    level: u32,
+    longer_than: Duration,
+    out: &mut impl Write,
+) {
+    let current_indent = "    ".repeat(level as usize);
+    let detail = tree[curr].detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default();
+    writeln!(
+        out,
+        "{}{:5}ms - {}{}",
+        current_indent,
+        tree[curr].duration.as_millis(),
+        tree[curr].label,
+        detail,
+    )
+    .expect("printing profiling info");
+    let mut accounted_for = Duration::default();
+    let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
+    for child in tree.children(curr) {
+        accounted_for += tree[child].duration;
+        if tree[child].duration.as_millis() > longer_than.as_millis() {
+            print(tree, child, level + 1, longer_than, out)
+        } else {
+            let (total_duration, cnt) =
+                short_children.entry(tree[child].label).or_insert((Duration::default(), 0));
+            *total_duration += tree[child].duration;
+            *cnt += 1;
+        }
+    }
+    for (child_msg, (duration, count)) in short_children.iter() {
+        let millis = duration.as_millis();
+        writeln!(out, "    {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
+            .expect("printing profiling info");
+    }
+    let unaccounted = tree[curr].duration - accounted_for;
+    if tree.children(curr).next().is_some() && unaccounted > longer_than {
+        writeln!(out, "    {}{:5}ms - ???", current_indent, unaccounted.as_millis())
+            .expect("printing profiling info");
+    }
+}
diff --git a/crates/profile/src/lib.rs b/crates/profile/src/lib.rs
new file mode 100644
index 000000000..ab19271c7
--- /dev/null
+++ b/crates/profile/src/lib.rs
@@ -0,0 +1,109 @@
+//! A collection of tools for profiling rust-analyzer.
+mod stop_watch;
+mod memory_usage;
+#[cfg(feature = "cpu_profiler")]
+mod google_cpu_profiler;
+mod hprof;
+mod tree;
+use std::cell::RefCell;
+pub use crate::{
+    hprof::{init, init_from, span},
+    memory_usage::{Bytes, MemoryUsage},
+    stop_watch::{StopWatch, StopWatchSpan},
+};
+/// Prints backtrace to stderr, useful for debugging.
+#[cfg(feature = "backtrace")]
+pub fn print_backtrace() {
+    let bt = backtrace::Backtrace::new();
+    eprintln!("{:?}", bt);
+}
+#[cfg(not(feature = "backtrace"))]
+pub fn print_backtrace() {
+    eprintln!(
+        r#"enable the backtrace feature:
+    profile = {{ path = "../profile", features = [ "backtrace"] }}
+"#
+    );
+}
+thread_local!(static IN_SCOPE: RefCell<bool> = RefCell::new(false));
+/// Allows to check if the current code is withing some dynamic scope, can be
+/// useful during debugging to figure out why a function is called.
+pub struct Scope {
+    prev: bool,
+}
+impl Scope {
+    #[must_use]
+    pub fn enter() -> Scope {
+        let prev = IN_SCOPE.with(|slot| std::mem::replace(&mut *slot.borrow_mut(), true));
+        Scope { prev }
+    }
+    pub fn is_active() -> bool {
+        IN_SCOPE.with(|slot| *slot.borrow())
+    }
+}
+impl Drop for Scope {
+    fn drop(&mut self) {
+        IN_SCOPE.with(|slot| *slot.borrow_mut() = self.prev);
+    }
+}
+/// A wrapper around google_cpu_profiler.
+///
+/// Usage:
+/// 1. Install gpref_tools (https://github.com/gperftools/gperftools), probably packaged with your Linux distro.
+/// 2. Build with `cpu_profiler` feature.
+/// 3. Tun the code, the *raw* output would be in the `./out.profile` file.
+/// 4. Install pprof for visualization (https://github.com/google/pprof).
+/// 5. Bump sampling frequency to once per ms: `export CPUPROFILE_FREQUENCY=1000`
+/// 6. Use something like `pprof -svg target/release/rust-analyzer ./out.profile` to see the results.
+///
+/// For example, here's how I run profiling on NixOS:
+///
+/// ```bash
+/// $ nix-shell -p gperftools --run \
+///     'cargo run --release -p rust-analyzer -- parse < ~/projects/rustbench/parser.rs > /dev/null'
+/// ```
+///
+/// See this diff for how to profile completions:
+///
+/// https://github.com/rust-analyzer/rust-analyzer/pull/5306
+#[derive(Debug)]
+pub struct CpuSpan {
+    _private: (),
+}
+#[must_use]
+pub fn cpu_span() -> CpuSpan {
+    #[cfg(feature = "cpu_profiler")]
+    {
+        google_cpu_profiler::start("./out.profile".as_ref())
+    }
+    #[cfg(not(feature = "cpu_profiler"))]
+    {
+        eprintln!("cpu_profiler feature is disabled")
+    }
+    CpuSpan { _private: () }
+}
+impl Drop for CpuSpan {
+    fn drop(&mut self) {
+        #[cfg(feature = "cpu_profiler")]
+        {
+            google_cpu_profiler::stop()
+        }
+    }
+}
+pub fn memory_usage() -> MemoryUsage {
+    MemoryUsage::current()
+}
diff --git a/crates/profile/src/memory_usage.rs b/crates/profile/src/memory_usage.rs
new file mode 100644
index 000000000..83390212a
--- /dev/null
+++ b/crates/profile/src/memory_usage.rs
@@ -0,0 +1,75 @@
+//! FIXME: write short doc here
+use std::fmt;
+use cfg_if::cfg_if;
+#[derive(Copy, Clone)]
+pub struct MemoryUsage {
+    pub allocated: Bytes,
+}
+impl fmt::Display for MemoryUsage {
+    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+        write!(fmt, "{}", self.allocated)
+    }
+}
+impl std::ops::Sub for MemoryUsage {
+    type Output = MemoryUsage;
+    fn sub(self, rhs: MemoryUsage) -> MemoryUsage {
+        MemoryUsage { allocated: self.allocated - rhs.allocated }
+    }
+}
+impl MemoryUsage {
+    pub fn current() -> MemoryUsage {
+        cfg_if! {
+            if #[cfg(all(target_os = "linux", target_env = "gnu"))] {
+                // Note: This is incredibly slow.
+                let alloc = unsafe { libc::mallinfo() }.uordblks as isize;
+                MemoryUsage { allocated: Bytes(alloc) }
+            } else {
+                MemoryUsage { allocated: Bytes(0) }
+            }
+        }
+    }
+}
+#[derive(Default, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
+pub struct Bytes(isize);
+impl Bytes {
+    pub fn megabytes(self) -> isize {
+        self.0 / 1024 / 1024
+    }
+}
+impl fmt::Display for Bytes {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        let bytes = self.0;
+        let mut value = bytes;
+        let mut suffix = "b";
+        if value.abs() > 4096 {
+            value /= 1024;
+            suffix = "kb";
+            if value.abs() > 4096 {
+                value /= 1024;
+                suffix = "mb";
+            }
+        }
+        f.pad(&format!("{}{}", value, suffix))
+    }
+}
+impl std::ops::AddAssign<usize> for Bytes {
+    fn add_assign(&mut self, x: usize) {
+        self.0 += x as isize;
+    }
+}
+impl std::ops::Sub for Bytes {
+    type Output = Bytes;
+    fn sub(self, rhs: Bytes) -> Bytes {
+        Bytes(self.0 - rhs.0)
+    }
+}
diff --git a/crates/profile/src/stop_watch.rs b/crates/profile/src/stop_watch.rs
new file mode 100644
index 000000000..5e276190e
--- /dev/null
+++ b/crates/profile/src/stop_watch.rs
@@ -0,0 +1,86 @@
+//! Like `std::time::Instant`, but also measures memory & CPU cycles.
+use std::{
+    fmt,
+    time::{Duration, Instant},
+};
+use crate::MemoryUsage;
+pub struct StopWatch {
+    time: Instant,
+    #[cfg(target_os = "linux")]
+    counter: Option<perf_event::Counter>,
+    memory: Option<MemoryUsage>,
+}
+pub struct StopWatchSpan {
+    pub time: Duration,
+    pub instructions: Option<u64>,
+    pub memory: Option<MemoryUsage>,
+}
+impl StopWatch {
+    pub fn start() -> StopWatch {
+        #[cfg(target_os = "linux")]
+        let counter = {
+            let mut counter = perf_event::Builder::new()
+                .build()
+                .map_err(|err| eprintln!("Failed to create perf counter: {}", err))
+                .ok();
+            if let Some(counter) = &mut counter {
+                if let Err(err) = counter.enable() {
+                    eprintln!("Failed to start perf counter: {}", err)
+                }
+            }
+            counter
+        };
+        let time = Instant::now();
+        StopWatch {
+            time,
+            #[cfg(target_os = "linux")]
+            counter,
+            memory: None,
+        }
+    }
+    pub fn memory(mut self, yes: bool) -> StopWatch {
+        if yes {
+            self.memory = Some(MemoryUsage::current());
+        }
+        self
+    }
+    pub fn elapsed(&mut self) -> StopWatchSpan {
+        let time = self.time.elapsed();
+        #[cfg(target_os = "linux")]
+        let instructions = self.counter.as_mut().and_then(|it| {
+            it.read().map_err(|err| eprintln!("Failed to read perf counter: {}", err)).ok()
+        });
+        #[cfg(not(target_os = "linux"))]
+        let instructions = None;
+        let memory = self.memory.map(|it| MemoryUsage::current() - it);
+        StopWatchSpan { time, instructions, memory }
+    }
+}
+impl fmt::Display for StopWatchSpan {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{:.2?}", self.time)?;
+        if let Some(mut instructions) = self.instructions {
+            let mut prefix = "";
+            if instructions > 10000 {
+                instructions /= 1000;
+                prefix = "k"
+            }
+            if instructions > 10000 {
+                instructions /= 1000;
+                prefix = "m"
+            }
+            write!(f, ", {}{}i", instructions, prefix)?;
+        }
+        if let Some(memory) = self.memory {
+            write!(f, ", {}", memory)?;
+        }
+        Ok(())
+    }
+}
diff --git a/crates/profile/src/tree.rs b/crates/profile/src/tree.rs
new file mode 100644
index 000000000..096f58511
--- /dev/null
+++ b/crates/profile/src/tree.rs
@@ -0,0 +1,84 @@
+//! A simple tree implementation which tries to not allocate all over the place.
+use std::ops;
+use arena::Arena;
+#[derive(Default)]
+pub struct Tree<T> {
+    nodes: Arena<Node<T>>,
+    current_path: Vec<(Idx<T>, Option<Idx<T>>)>,
+}
+pub type Idx<T> = arena::Idx<Node<T>>;
+impl<T> Tree<T> {
+    pub fn start(&mut self)
+    where
+        T: Default,
+    {
+        let me = self.nodes.alloc(Node::new(T::default()));
+        if let Some((parent, last_child)) = self.current_path.last_mut() {
+            let slot = match *last_child {
+                Some(last_child) => &mut self.nodes[last_child].next_sibling,
+                None => &mut self.nodes[*parent].first_child,
+            };
+            let prev = slot.replace(me);
+            assert!(prev.is_none());
+            *last_child = Some(me);
+        }
+        self.current_path.push((me, None));
+    }
+    pub fn finish(&mut self, data: T) {
+        let (me, _last_child) = self.current_path.pop().unwrap();
+        self.nodes[me].data = data;
+    }
+    pub fn root(&self) -> Option<Idx<T>> {
+        self.nodes.iter().next().map(|(idx, _)| idx)
+    }
+    pub fn children(&self, idx: Idx<T>) -> impl Iterator<Item = Idx<T>> + '_ {
+        NodeIter { nodes: &self.nodes, next: self.nodes[idx].first_child }
+    }
+    pub fn clear(&mut self) {
+        self.nodes.clear();
+        self.current_path.clear();
+    }
+}
+impl<T> ops::Index<Idx<T>> for Tree<T> {
+    type Output = T;
+    fn index(&self, index: Idx<T>) -> &T {
+        &self.nodes[index].data
+    }
+}
+pub struct Node<T> {
+    data: T,
+    first_child: Option<Idx<T>>,
+    next_sibling: Option<Idx<T>>,
+}
+impl<T> Node<T> {
+    fn new(data: T) -> Node<T> {
+        Node { data, first_child: None, next_sibling: None }
+    }
+}
+struct NodeIter<'a, T> {
+    nodes: &'a Arena<Node<T>>,
+    next: Option<Idx<T>>,
+}
+impl<'a, T> Iterator for NodeIter<'a, T> {
+    type Item = Idx<T>;
+    fn next(&mut self) -> Option<Idx<T>> {
+        self.next.map(|next| {
+            self.next = self.nodes[next].next_sibling;
+            next
+        })
+    }
+}
author	Aleksey Kladov <[email protected]>	2020-08-12 15:32:36 +0100
committer	Aleksey Kladov <[email protected]>	2020-08-12 15:35:29 +0100
commit	208b7bd7ba687fb570feb1b89219f14c63712ce8 (patch)
tree	c39749d1b71b73b4017c2d0d848ebdc85e570c39 /crates/profile/src
parent	98baa9b569b49162392ed4149dd435854fe941b8 (diff)

diff --git a/crates/profile/src/google_cpu_profiler.rs b/crates/profile/src/google_cpu_profiler.rs new file mode 100644 index 000000000..db865c65b --- /dev/null +++ b/crates/profile/src/google_cpu_profiler.rs
@@ -0,0 +1,39 @@
	1	//! https://github.com/gperftools/gperftools
	2
	3	use std::{
	4	ffi::CString,
	5	os::raw::c_char,
	6	path::Path,
	7	sync::atomic::{AtomicUsize, Ordering},
	8	};
	9
	10	#[link(name = "profiler")]
	11	#[allow(non_snake_case)]
	12	extern "C" {
	13	fn ProfilerStart(fname: *const c_char) -> i32;
	14	fn ProfilerStop();
	15	}
	16
	17	static PROFILER_STATE: AtomicUsize = AtomicUsize::new(OFF);
	18	const OFF: usize = 0;
	19	const ON: usize = 1;
	20	const PENDING: usize = 2;
	21
	22	pub fn start(path: &Path) {
	23	if PROFILER_STATE.compare_and_swap(OFF, PENDING, Ordering::SeqCst) != OFF {
	24	panic!("profiler already started");
	25	}
	26	let path = CString::new(path.display().to_string()).unwrap();
	27	if unsafe { ProfilerStart(path.as_ptr()) } == 0 {
	28	panic!("profiler failed to start")
	29	}
	30	assert!(PROFILER_STATE.compare_and_swap(PENDING, ON, Ordering::SeqCst) == PENDING);
	31	}
	32
	33	pub fn stop() {
	34	if PROFILER_STATE.compare_and_swap(ON, PENDING, Ordering::SeqCst) != ON {
	35	panic!("profiler is not started")
	36	}
	37	unsafe { ProfilerStop() };
	38	assert!(PROFILER_STATE.compare_and_swap(PENDING, OFF, Ordering::SeqCst) == PENDING);
	39	}


diff --git a/crates/profile/src/hprof.rs b/crates/profile/src/hprof.rs new file mode 100644 index 000000000..934cc8e37 --- /dev/null +++ b/crates/profile/src/hprof.rs
@@ -0,0 +1,240 @@
	1	//! Simple hierarchical profiler
	2	use once_cell::sync::Lazy;
	3	use std::{
	4	cell::RefCell,
	5	collections::{BTreeMap, HashSet},
	6	io::{stderr, Write},
	7	sync::{
	8	atomic::{AtomicBool, Ordering},
	9	RwLock,
	10	},
	11	time::{Duration, Instant},
	12	};
	13
	14	use crate::tree::{Idx, Tree};
	15
	16	/// Filtering syntax
	17	/// env RA_PROFILE=* // dump everything
	18	/// env RA_PROFILE=foo\|bar\|baz // enabled only selected entries
	19	/// env RA_PROFILE=*@3>10 // dump everything, up to depth 3, if it takes more than 10 ms
	20	pub fn init() {
	21	let spec = std::env::var("RA_PROFILE").unwrap_or_default();
	22	init_from(&spec);
	23	}
	24
	25	pub fn init_from(spec: &str) {
	26	let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) };
	27	filter.install();
	28	}
	29
	30	pub type Label = &'static str;
	31
	32	/// This function starts a profiling scope in the current execution stack with a given description.
	33	/// It returns a `Profile` struct that measures elapsed time between this method invocation and `Profile` struct drop.
	34	/// It supports nested profiling scopes in case when this function is invoked multiple times at the execution stack.
	35	/// In this case the profiling information will be nested at the output.
	36	/// Profiling information is being printed in the stderr.
	37	///
	38	/// # Example
	39	/// ```
	40	/// profile::init_from("profile1\|profile2@2");
	41	/// profiling_function1();
	42	///
	43	/// fn profiling_function1() {
	44	/// let _p = profile::span("profile1");
	45	/// profiling_function2();
	46	/// }
	47	///
	48	/// fn profiling_function2() {
	49	/// let _p = profile::span("profile2");
	50	/// }
	51	/// ```
	52	/// This will print in the stderr the following:
	53	/// ```text
	54	/// 0ms - profile
	55	/// 0ms - profile2
	56	/// ```
	57	pub fn span(label: Label) -> ProfileSpan {
	58	assert!(!label.is_empty());
	59
	60	if PROFILING_ENABLED.load(Ordering::Relaxed)
	61	&& PROFILE_STACK.with(\|stack\| stack.borrow_mut().push(label))
	62	{
	63	ProfileSpan(Some(ProfilerImpl { label, detail: None }))
	64	} else {
	65	ProfileSpan(None)
	66	}
	67	}
	68
	69	pub struct ProfileSpan(Option<ProfilerImpl>);
	70
	71	struct ProfilerImpl {
	72	label: Label,
	73	detail: Option<String>,
	74	}
	75
	76	impl ProfileSpan {
	77	pub fn detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan {
	78	if let Some(profiler) = &mut self.0 {
	79	profiler.detail = Some(detail())
	80	}
	81	self
	82	}
	83	}
	84
	85	impl Drop for ProfilerImpl {
	86	fn drop(&mut self) {
	87	PROFILE_STACK.with(\|it\| it.borrow_mut().pop(self.label, self.detail.take()));
	88	}
	89	}
	90
	91	static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
	92	static FILTER: Lazy<RwLock<Filter>> = Lazy::new(Default::default);
	93	thread_local!(static PROFILE_STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new()));
	94
	95	#[derive(Default, Clone, Debug)]
	96	struct Filter {
	97	depth: usize,
	98	allowed: HashSet<String>,
	99	longer_than: Duration,
	100	version: usize,
	101	}
	102
	103	impl Filter {
	104	fn disabled() -> Filter {
	105	Filter::default()
	106	}
	107
	108	fn from_spec(mut spec: &str) -> Filter {
	109	let longer_than = if let Some(idx) = spec.rfind('>') {
	110	let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than");
	111	spec = &spec[..idx];
	112	Duration::from_millis(longer_than)
	113	} else {
	114	Duration::new(0, 0)
	115	};
	116
	117	let depth = if let Some(idx) = spec.rfind('@') {
	118	let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth");
	119	spec = &spec[..idx];
	120	depth
	121	} else {
	122	999
	123	};
	124	let allowed =
	125	if spec == "*" { HashSet::new() } else { spec.split('\|').map(String::from).collect() };
	126	Filter { depth, allowed, longer_than, version: 0 }
	127	}
	128
	129	fn install(mut self) {
	130	PROFILING_ENABLED.store(self.depth > 0, Ordering::SeqCst);
	131	let mut old = FILTER.write().unwrap();
	132	self.version = old.version + 1;
	133	*old = self;
	134	}
	135	}
	136
	137	struct ProfileStack {
	138	starts: Vec<Instant>,
	139	filter: Filter,
	140	messages: Tree<Message>,
	141	}
	142
	143	#[derive(Default)]
	144	struct Message {
	145	duration: Duration,
	146	label: Label,
	147	detail: Option<String>,
	148	}
	149
	150	impl ProfileStack {
	151	fn new() -> ProfileStack {
	152	ProfileStack { starts: Vec::new(), messages: Tree::default(), filter: Default::default() }
	153	}
	154
	155	fn push(&mut self, label: Label) -> bool {
	156	if self.starts.is_empty() {
	157	if let Ok(f) = FILTER.try_read() {
	158	if f.version > self.filter.version {
	159	self.filter = f.clone();
	160	}
	161	};
	162	}
	163	if self.starts.len() > self.filter.depth {
	164	return false;
	165	}
	166	let allowed = &self.filter.allowed;
	167	if self.starts.is_empty() && !allowed.is_empty() && !allowed.contains(label) {
	168	return false;
	169	}
	170
	171	self.starts.push(Instant::now());
	172	self.messages.start();
	173	true
	174	}
	175
	176	pub fn pop(&mut self, label: Label, detail: Option<String>) {
	177	let start = self.starts.pop().unwrap();
	178	let duration = start.elapsed();
	179	self.messages.finish(Message { duration, label, detail });
	180	if self.starts.is_empty() {
	181	let longer_than = self.filter.longer_than;
	182	// Convert to millis for comparison to avoid problems with rounding
	183	// (otherwise we could print `0ms` despite user's `>0` filter when
	184	// `duration` is just a few nanos).
	185	if duration.as_millis() > longer_than.as_millis() {
	186	if let Some(root) = self.messages.root() {
	187	print(&self.messages, root, 0, longer_than, &mut stderr().lock());
	188	}
	189	}
	190	self.messages.clear();
	191	}
	192	}
	193	}
	194
	195	fn print(
	196	tree: &Tree<Message>,
	197	curr: Idx<Message>,
	198	level: u32,
	199	longer_than: Duration,
	200	out: &mut impl Write,
	201	) {
	202	let current_indent = " ".repeat(level as usize);
	203	let detail = tree[curr].detail.as_ref().map(\|it\| format!(" @ {}", it)).unwrap_or_default();
	204	writeln!(
	205	out,
	206	"{}{:5}ms - {}{}",
	207	current_indent,
	208	tree[curr].duration.as_millis(),
	209	tree[curr].label,
	210	detail,
	211	)
	212	.expect("printing profiling info");
	213
	214	let mut accounted_for = Duration::default();
	215	let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
	216	for child in tree.children(curr) {
	217	accounted_for += tree[child].duration;
	218
	219	if tree[child].duration.as_millis() > longer_than.as_millis() {
	220	print(tree, child, level + 1, longer_than, out)
	221	} else {
	222	let (total_duration, cnt) =
	223	short_children.entry(tree[child].label).or_insert((Duration::default(), 0));
	224	*total_duration += tree[child].duration;
	225	*cnt += 1;
	226	}
	227	}
	228
	229	for (child_msg, (duration, count)) in short_children.iter() {
	230	let millis = duration.as_millis();
	231	writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
	232	.expect("printing profiling info");
	233	}
	234
	235	let unaccounted = tree[curr].duration - accounted_for;
	236	if tree.children(curr).next().is_some() && unaccounted > longer_than {
	237	writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted.as_millis())
	238	.expect("printing profiling info");
	239	}
	240	}


diff --git a/crates/profile/src/lib.rs b/crates/profile/src/lib.rs new file mode 100644 index 000000000..ab19271c7 --- /dev/null +++ b/crates/profile/src/lib.rs
@@ -0,0 +1,109 @@
	1	//! A collection of tools for profiling rust-analyzer.
	2
	3	mod stop_watch;
	4	mod memory_usage;
	5	#[cfg(feature = "cpu_profiler")]
	6	mod google_cpu_profiler;
	7	mod hprof;
	8	mod tree;
	9
	10	use std::cell::RefCell;
	11
	12	pub use crate::{
	13	hprof::{init, init_from, span},
	14	memory_usage::{Bytes, MemoryUsage},
	15	stop_watch::{StopWatch, StopWatchSpan},
	16	};
	17
	18	/// Prints backtrace to stderr, useful for debugging.
	19	#[cfg(feature = "backtrace")]
	20	pub fn print_backtrace() {
	21	let bt = backtrace::Backtrace::new();
	22	eprintln!("{:?}", bt);
	23	}
	24	#[cfg(not(feature = "backtrace"))]
	25	pub fn print_backtrace() {
	26	eprintln!(
	27	r#"enable the backtrace feature:
	28	profile = {{ path = "../profile", features = [ "backtrace"] }}
	29	"#
	30	);
	31	}
	32
	33	thread_local!(static IN_SCOPE: RefCell<bool> = RefCell::new(false));
	34
	35	/// Allows to check if the current code is withing some dynamic scope, can be
	36	/// useful during debugging to figure out why a function is called.
	37	pub struct Scope {
	38	prev: bool,
	39	}
	40
	41	impl Scope {
	42	#[must_use]
	43	pub fn enter() -> Scope {
	44	let prev = IN_SCOPE.with(\|slot\| std::mem::replace(&mut *slot.borrow_mut(), true));
	45	Scope { prev }
	46	}
	47	pub fn is_active() -> bool {
	48	IN_SCOPE.with(\|slot\| *slot.borrow())
	49	}
	50	}
	51
	52	impl Drop for Scope {
	53	fn drop(&mut self) {
	54	IN_SCOPE.with(\|slot\| *slot.borrow_mut() = self.prev);
	55	}
	56	}
	57
	58	/// A wrapper around google_cpu_profiler.
	59	///
	60	/// Usage:
	61	/// 1. Install gpref_tools (https://github.com/gperftools/gperftools), probably packaged with your Linux distro.
	62	/// 2. Build with `cpu_profiler` feature.
	63	/// 3. Tun the code, the raw output would be in the `./out.profile` file.
	64	/// 4. Install pprof for visualization (https://github.com/google/pprof).
	65	/// 5. Bump sampling frequency to once per ms: `export CPUPROFILE_FREQUENCY=1000`
	66	/// 6. Use something like `pprof -svg target/release/rust-analyzer ./out.profile` to see the results.
	67	///
	68	/// For example, here's how I run profiling on NixOS:
	69	///
	70	/// ```bash
	71	/// $ nix-shell -p gperftools --run \
	72	/// 'cargo run --release -p rust-analyzer -- parse < ~/projects/rustbench/parser.rs > /dev/null'
	73	/// ```
	74	///
	75	/// See this diff for how to profile completions:
	76	///
	77	/// https://github.com/rust-analyzer/rust-analyzer/pull/5306
	78	#[derive(Debug)]
	79	pub struct CpuSpan {
	80	_private: (),
	81	}
	82
	83	#[must_use]
	84	pub fn cpu_span() -> CpuSpan {
	85	#[cfg(feature = "cpu_profiler")]
	86	{
	87	google_cpu_profiler::start("./out.profile".as_ref())
	88	}
	89
	90	#[cfg(not(feature = "cpu_profiler"))]
	91	{
	92	eprintln!("cpu_profiler feature is disabled")
	93	}
	94
	95	CpuSpan { _private: () }
	96	}
	97
	98	impl Drop for CpuSpan {
	99	fn drop(&mut self) {
	100	#[cfg(feature = "cpu_profiler")]
	101	{
	102	google_cpu_profiler::stop()
	103	}
	104	}
	105	}
	106
	107	pub fn memory_usage() -> MemoryUsage {
	108	MemoryUsage::current()
	109	}


diff --git a/crates/profile/src/memory_usage.rs b/crates/profile/src/memory_usage.rs new file mode 100644 index 000000000..83390212a --- /dev/null +++ b/crates/profile/src/memory_usage.rs
@@ -0,0 +1,75 @@
	1	//! FIXME: write short doc here
	2	use std::fmt;
	3
	4	use cfg_if::cfg_if;
	5
	6	#[derive(Copy, Clone)]
	7	pub struct MemoryUsage {
	8	pub allocated: Bytes,
	9	}
	10
	11	impl fmt::Display for MemoryUsage {
	12	fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
	13	write!(fmt, "{}", self.allocated)
	14	}
	15	}
	16
	17	impl std::ops::Sub for MemoryUsage {
	18	type Output = MemoryUsage;
	19	fn sub(self, rhs: MemoryUsage) -> MemoryUsage {
	20	MemoryUsage { allocated: self.allocated - rhs.allocated }
	21	}
	22	}
	23
	24	impl MemoryUsage {
	25	pub fn current() -> MemoryUsage {
	26	cfg_if! {
	27	if #[cfg(all(target_os = "linux", target_env = "gnu"))] {
	28	// Note: This is incredibly slow.
	29	let alloc = unsafe { libc::mallinfo() }.uordblks as isize;
	30	MemoryUsage { allocated: Bytes(alloc) }
	31	} else {
	32	MemoryUsage { allocated: Bytes(0) }
	33	}
	34	}
	35	}
	36	}
	37
	38	#[derive(Default, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
	39	pub struct Bytes(isize);
	40
	41	impl Bytes {
	42	pub fn megabytes(self) -> isize {
	43	self.0 / 1024 / 1024
	44	}
	45	}
	46
	47	impl fmt::Display for Bytes {
	48	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
	49	let bytes = self.0;
	50	let mut value = bytes;
	51	let mut suffix = "b";
	52	if value.abs() > 4096 {
	53	value /= 1024;
	54	suffix = "kb";
	55	if value.abs() > 4096 {
	56	value /= 1024;
	57	suffix = "mb";
	58	}
	59	}
	60	f.pad(&format!("{}{}", value, suffix))
	61	}
	62	}
	63
	64	impl std::ops::AddAssign<usize> for Bytes {
	65	fn add_assign(&mut self, x: usize) {
	66	self.0 += x as isize;
	67	}
	68	}
	69
	70	impl std::ops::Sub for Bytes {
	71	type Output = Bytes;
	72	fn sub(self, rhs: Bytes) -> Bytes {
	73	Bytes(self.0 - rhs.0)
	74	}
	75	}


diff --git a/crates/profile/src/stop_watch.rs b/crates/profile/src/stop_watch.rs new file mode 100644 index 000000000..5e276190e --- /dev/null +++ b/crates/profile/src/stop_watch.rs
@@ -0,0 +1,86 @@
	1	//! Like `std::time::Instant`, but also measures memory & CPU cycles.
	2	use std::{
	3	fmt,
	4	time::{Duration, Instant},
	5	};
	6
	7	use crate::MemoryUsage;
	8
	9	pub struct StopWatch {
	10	time: Instant,
	11	#[cfg(target_os = "linux")]
	12	counter: Option<perf_event::Counter>,
	13	memory: Option<MemoryUsage>,
	14	}
	15
	16	pub struct StopWatchSpan {
	17	pub time: Duration,
	18	pub instructions: Option<u64>,
	19	pub memory: Option<MemoryUsage>,
	20	}
	21
	22	impl StopWatch {
	23	pub fn start() -> StopWatch {
	24	#[cfg(target_os = "linux")]
	25	let counter = {
	26	let mut counter = perf_event::Builder::new()
	27	.build()
	28	.map_err(\|err\| eprintln!("Failed to create perf counter: {}", err))
	29	.ok();
	30	if let Some(counter) = &mut counter {
	31	if let Err(err) = counter.enable() {
	32	eprintln!("Failed to start perf counter: {}", err)
	33	}
	34	}
	35	counter
	36	};
	37	let time = Instant::now();
	38	StopWatch {
	39	time,
	40	#[cfg(target_os = "linux")]
	41	counter,
	42	memory: None,
	43	}
	44	}
	45	pub fn memory(mut self, yes: bool) -> StopWatch {
	46	if yes {
	47	self.memory = Some(MemoryUsage::current());
	48	}
	49	self
	50	}
	51	pub fn elapsed(&mut self) -> StopWatchSpan {
	52	let time = self.time.elapsed();
	53
	54	#[cfg(target_os = "linux")]
	55	let instructions = self.counter.as_mut().and_then(\|it\| {
	56	it.read().map_err(\|err\| eprintln!("Failed to read perf counter: {}", err)).ok()
	57	});
	58	#[cfg(not(target_os = "linux"))]
	59	let instructions = None;
	60
	61	let memory = self.memory.map(\|it\| MemoryUsage::current() - it);
	62	StopWatchSpan { time, instructions, memory }
	63	}
	64	}
	65
	66	impl fmt::Display for StopWatchSpan {
	67	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
	68	write!(f, "{:.2?}", self.time)?;
	69	if let Some(mut instructions) = self.instructions {
	70	let mut prefix = "";
	71	if instructions > 10000 {
	72	instructions /= 1000;
	73	prefix = "k"
	74	}
	75	if instructions > 10000 {
	76	instructions /= 1000;
	77	prefix = "m"
	78	}
	79	write!(f, ", {}{}i", instructions, prefix)?;
	80	}
	81	if let Some(memory) = self.memory {
	82	write!(f, ", {}", memory)?;
	83	}
	84	Ok(())
	85	}
	86	}


diff --git a/crates/profile/src/tree.rs b/crates/profile/src/tree.rs new file mode 100644 index 000000000..096f58511 --- /dev/null +++ b/crates/profile/src/tree.rs
@@ -0,0 +1,84 @@
	1	//! A simple tree implementation which tries to not allocate all over the place.
	2	use std::ops;
	3
	4	use arena::Arena;
	5
	6	#[derive(Default)]
	7	pub struct Tree<T> {
	8	nodes: Arena<Node<T>>,
	9	current_path: Vec<(Idx<T>, Option<Idx<T>>)>,
	10	}
	11
	12	pub type Idx<T> = arena::Idx<Node<T>>;
	13
	14	impl<T> Tree<T> {
	15	pub fn start(&mut self)
	16	where
	17	T: Default,
	18	{
	19	let me = self.nodes.alloc(Node::new(T::default()));
	20	if let Some((parent, last_child)) = self.current_path.last_mut() {
	21	let slot = match *last_child {
	22	Some(last_child) => &mut self.nodes[last_child].next_sibling,
	23	None => &mut self.nodes[*parent].first_child,
	24	};
	25	let prev = slot.replace(me);
	26	assert!(prev.is_none());
	27	*last_child = Some(me);
	28	}
	29
	30	self.current_path.push((me, None));
	31	}
	32
	33	pub fn finish(&mut self, data: T) {
	34	let (me, _last_child) = self.current_path.pop().unwrap();
	35	self.nodes[me].data = data;
	36	}
	37
	38	pub fn root(&self) -> Option<Idx<T>> {
	39	self.nodes.iter().next().map(\|(idx, _)\| idx)
	40	}
	41
	42	pub fn children(&self, idx: Idx<T>) -> impl Iterator<Item = Idx<T>> + '_ {
	43	NodeIter { nodes: &self.nodes, next: self.nodes[idx].first_child }
	44	}
	45	pub fn clear(&mut self) {
	46	self.nodes.clear();
	47	self.current_path.clear();
	48	}
	49	}
	50
	51	impl<T> ops::Index<Idx<T>> for Tree<T> {
	52	type Output = T;
	53	fn index(&self, index: Idx<T>) -> &T {
	54	&self.nodes[index].data
	55	}
	56	}
	57
	58	pub struct Node<T> {
	59	data: T,
	60	first_child: Option<Idx<T>>,
	61	next_sibling: Option<Idx<T>>,
	62	}
	63
	64	impl<T> Node<T> {
	65	fn new(data: T) -> Node<T> {
	66	Node { data, first_child: None, next_sibling: None }
	67	}
	68	}
	69
	70	struct NodeIter<'a, T> {
	71	nodes: &'a Arena<Node<T>>,
	72	next: Option<Idx<T>>,
	73	}
	74
	75	impl<'a, T> Iterator for NodeIter<'a, T> {
	76	type Item = Idx<T>;
	77
	78	fn next(&mut self) -> Option<Idx<T>> {
	79	self.next.map(\|next\| {
	80	self.next = self.nodes[next].next_sibling;
	81	next
	82	})
	83	}
	84	}