aboutsummaryrefslogtreecommitdiff
path: root/crates/profile
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2020-08-12 15:32:36 +0100
committerAleksey Kladov <[email protected]>2020-08-12 15:35:29 +0100
commit208b7bd7ba687fb570feb1b89219f14c63712ce8 (patch)
treec39749d1b71b73b4017c2d0d848ebdc85e570c39 /crates/profile
parent98baa9b569b49162392ed4149dd435854fe941b8 (diff)
Rename ra_prof -> profile
Diffstat (limited to 'crates/profile')
-rw-r--r--crates/profile/Cargo.toml27
-rw-r--r--crates/profile/src/google_cpu_profiler.rs39
-rw-r--r--crates/profile/src/hprof.rs240
-rw-r--r--crates/profile/src/lib.rs109
-rw-r--r--crates/profile/src/memory_usage.rs75
-rw-r--r--crates/profile/src/stop_watch.rs86
-rw-r--r--crates/profile/src/tree.rs84
7 files changed, 660 insertions, 0 deletions
diff --git a/crates/profile/Cargo.toml b/crates/profile/Cargo.toml
new file mode 100644
index 000000000..e271e3a56
--- /dev/null
+++ b/crates/profile/Cargo.toml
@@ -0,0 +1,27 @@
1[package]
2name = "profile"
3version = "0.0.0"
4license = "MIT OR Apache-2.0"
5authors = ["rust-analyzer developers"]
6edition = "2018"
7
8[lib]
9doctest = false
10
11[dependencies]
12once_cell = "1.3.1"
13cfg-if = "0.1.10"
14libc = "0.2.73"
15backtrace = { version = "0.3.44", optional = true }
16
17arena = { path = "../arena" }
18
19[target.'cfg(target_os = "linux")'.dependencies]
20perf-event = "0.4"
21
22[features]
23cpu_profiler = []
24
25# Uncomment to enable for the whole crate graph
26# default = [ "backtrace" ]
27# default = [ "cpu_profiler" ]
diff --git a/crates/profile/src/google_cpu_profiler.rs b/crates/profile/src/google_cpu_profiler.rs
new file mode 100644
index 000000000..db865c65b
--- /dev/null
+++ b/crates/profile/src/google_cpu_profiler.rs
@@ -0,0 +1,39 @@
1//! https://github.com/gperftools/gperftools
2
3use std::{
4 ffi::CString,
5 os::raw::c_char,
6 path::Path,
7 sync::atomic::{AtomicUsize, Ordering},
8};
9
10#[link(name = "profiler")]
11#[allow(non_snake_case)]
12extern "C" {
13 fn ProfilerStart(fname: *const c_char) -> i32;
14 fn ProfilerStop();
15}
16
17static PROFILER_STATE: AtomicUsize = AtomicUsize::new(OFF);
18const OFF: usize = 0;
19const ON: usize = 1;
20const PENDING: usize = 2;
21
22pub fn start(path: &Path) {
23 if PROFILER_STATE.compare_and_swap(OFF, PENDING, Ordering::SeqCst) != OFF {
24 panic!("profiler already started");
25 }
26 let path = CString::new(path.display().to_string()).unwrap();
27 if unsafe { ProfilerStart(path.as_ptr()) } == 0 {
28 panic!("profiler failed to start")
29 }
30 assert!(PROFILER_STATE.compare_and_swap(PENDING, ON, Ordering::SeqCst) == PENDING);
31}
32
33pub fn stop() {
34 if PROFILER_STATE.compare_and_swap(ON, PENDING, Ordering::SeqCst) != ON {
35 panic!("profiler is not started")
36 }
37 unsafe { ProfilerStop() };
38 assert!(PROFILER_STATE.compare_and_swap(PENDING, OFF, Ordering::SeqCst) == PENDING);
39}
diff --git a/crates/profile/src/hprof.rs b/crates/profile/src/hprof.rs
new file mode 100644
index 000000000..934cc8e37
--- /dev/null
+++ b/crates/profile/src/hprof.rs
@@ -0,0 +1,240 @@
1//! Simple hierarchical profiler
2use once_cell::sync::Lazy;
3use std::{
4 cell::RefCell,
5 collections::{BTreeMap, HashSet},
6 io::{stderr, Write},
7 sync::{
8 atomic::{AtomicBool, Ordering},
9 RwLock,
10 },
11 time::{Duration, Instant},
12};
13
14use crate::tree::{Idx, Tree};
15
16/// Filtering syntax
17/// env RA_PROFILE=* // dump everything
18/// env RA_PROFILE=foo|bar|baz // enabled only selected entries
19/// env RA_PROFILE=*@3>10 // dump everything, up to depth 3, if it takes more than 10 ms
20pub fn init() {
21 let spec = std::env::var("RA_PROFILE").unwrap_or_default();
22 init_from(&spec);
23}
24
25pub fn init_from(spec: &str) {
26 let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) };
27 filter.install();
28}
29
30pub type Label = &'static str;
31
32/// This function starts a profiling scope in the current execution stack with a given description.
33/// It returns a `Profile` struct that measures elapsed time between this method invocation and `Profile` struct drop.
34/// It supports nested profiling scopes in case when this function is invoked multiple times at the execution stack.
35/// In this case the profiling information will be nested at the output.
36/// Profiling information is being printed in the stderr.
37///
38/// # Example
39/// ```
40/// profile::init_from("profile1|profile2@2");
41/// profiling_function1();
42///
43/// fn profiling_function1() {
44/// let _p = profile::span("profile1");
45/// profiling_function2();
46/// }
47///
48/// fn profiling_function2() {
49/// let _p = profile::span("profile2");
50/// }
51/// ```
52/// This will print in the stderr the following:
53/// ```text
54/// 0ms - profile
55/// 0ms - profile2
56/// ```
57pub fn span(label: Label) -> ProfileSpan {
58 assert!(!label.is_empty());
59
60 if PROFILING_ENABLED.load(Ordering::Relaxed)
61 && PROFILE_STACK.with(|stack| stack.borrow_mut().push(label))
62 {
63 ProfileSpan(Some(ProfilerImpl { label, detail: None }))
64 } else {
65 ProfileSpan(None)
66 }
67}
68
69pub struct ProfileSpan(Option<ProfilerImpl>);
70
71struct ProfilerImpl {
72 label: Label,
73 detail: Option<String>,
74}
75
76impl ProfileSpan {
77 pub fn detail(mut self, detail: impl FnOnce() -> String) -> ProfileSpan {
78 if let Some(profiler) = &mut self.0 {
79 profiler.detail = Some(detail())
80 }
81 self
82 }
83}
84
85impl Drop for ProfilerImpl {
86 fn drop(&mut self) {
87 PROFILE_STACK.with(|it| it.borrow_mut().pop(self.label, self.detail.take()));
88 }
89}
90
91static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false);
92static FILTER: Lazy<RwLock<Filter>> = Lazy::new(Default::default);
93thread_local!(static PROFILE_STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new()));
94
95#[derive(Default, Clone, Debug)]
96struct Filter {
97 depth: usize,
98 allowed: HashSet<String>,
99 longer_than: Duration,
100 version: usize,
101}
102
103impl Filter {
104 fn disabled() -> Filter {
105 Filter::default()
106 }
107
108 fn from_spec(mut spec: &str) -> Filter {
109 let longer_than = if let Some(idx) = spec.rfind('>') {
110 let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than");
111 spec = &spec[..idx];
112 Duration::from_millis(longer_than)
113 } else {
114 Duration::new(0, 0)
115 };
116
117 let depth = if let Some(idx) = spec.rfind('@') {
118 let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth");
119 spec = &spec[..idx];
120 depth
121 } else {
122 999
123 };
124 let allowed =
125 if spec == "*" { HashSet::new() } else { spec.split('|').map(String::from).collect() };
126 Filter { depth, allowed, longer_than, version: 0 }
127 }
128
129 fn install(mut self) {
130 PROFILING_ENABLED.store(self.depth > 0, Ordering::SeqCst);
131 let mut old = FILTER.write().unwrap();
132 self.version = old.version + 1;
133 *old = self;
134 }
135}
136
137struct ProfileStack {
138 starts: Vec<Instant>,
139 filter: Filter,
140 messages: Tree<Message>,
141}
142
143#[derive(Default)]
144struct Message {
145 duration: Duration,
146 label: Label,
147 detail: Option<String>,
148}
149
150impl ProfileStack {
151 fn new() -> ProfileStack {
152 ProfileStack { starts: Vec::new(), messages: Tree::default(), filter: Default::default() }
153 }
154
155 fn push(&mut self, label: Label) -> bool {
156 if self.starts.is_empty() {
157 if let Ok(f) = FILTER.try_read() {
158 if f.version > self.filter.version {
159 self.filter = f.clone();
160 }
161 };
162 }
163 if self.starts.len() > self.filter.depth {
164 return false;
165 }
166 let allowed = &self.filter.allowed;
167 if self.starts.is_empty() && !allowed.is_empty() && !allowed.contains(label) {
168 return false;
169 }
170
171 self.starts.push(Instant::now());
172 self.messages.start();
173 true
174 }
175
176 pub fn pop(&mut self, label: Label, detail: Option<String>) {
177 let start = self.starts.pop().unwrap();
178 let duration = start.elapsed();
179 self.messages.finish(Message { duration, label, detail });
180 if self.starts.is_empty() {
181 let longer_than = self.filter.longer_than;
182 // Convert to millis for comparison to avoid problems with rounding
183 // (otherwise we could print `0ms` despite user's `>0` filter when
184 // `duration` is just a few nanos).
185 if duration.as_millis() > longer_than.as_millis() {
186 if let Some(root) = self.messages.root() {
187 print(&self.messages, root, 0, longer_than, &mut stderr().lock());
188 }
189 }
190 self.messages.clear();
191 }
192 }
193}
194
195fn print(
196 tree: &Tree<Message>,
197 curr: Idx<Message>,
198 level: u32,
199 longer_than: Duration,
200 out: &mut impl Write,
201) {
202 let current_indent = " ".repeat(level as usize);
203 let detail = tree[curr].detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default();
204 writeln!(
205 out,
206 "{}{:5}ms - {}{}",
207 current_indent,
208 tree[curr].duration.as_millis(),
209 tree[curr].label,
210 detail,
211 )
212 .expect("printing profiling info");
213
214 let mut accounted_for = Duration::default();
215 let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output.
216 for child in tree.children(curr) {
217 accounted_for += tree[child].duration;
218
219 if tree[child].duration.as_millis() > longer_than.as_millis() {
220 print(tree, child, level + 1, longer_than, out)
221 } else {
222 let (total_duration, cnt) =
223 short_children.entry(tree[child].label).or_insert((Duration::default(), 0));
224 *total_duration += tree[child].duration;
225 *cnt += 1;
226 }
227 }
228
229 for (child_msg, (duration, count)) in short_children.iter() {
230 let millis = duration.as_millis();
231 writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count)
232 .expect("printing profiling info");
233 }
234
235 let unaccounted = tree[curr].duration - accounted_for;
236 if tree.children(curr).next().is_some() && unaccounted > longer_than {
237 writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted.as_millis())
238 .expect("printing profiling info");
239 }
240}
diff --git a/crates/profile/src/lib.rs b/crates/profile/src/lib.rs
new file mode 100644
index 000000000..ab19271c7
--- /dev/null
+++ b/crates/profile/src/lib.rs
@@ -0,0 +1,109 @@
1//! A collection of tools for profiling rust-analyzer.
2
3mod stop_watch;
4mod memory_usage;
5#[cfg(feature = "cpu_profiler")]
6mod google_cpu_profiler;
7mod hprof;
8mod tree;
9
10use std::cell::RefCell;
11
12pub use crate::{
13 hprof::{init, init_from, span},
14 memory_usage::{Bytes, MemoryUsage},
15 stop_watch::{StopWatch, StopWatchSpan},
16};
17
18/// Prints backtrace to stderr, useful for debugging.
19#[cfg(feature = "backtrace")]
20pub fn print_backtrace() {
21 let bt = backtrace::Backtrace::new();
22 eprintln!("{:?}", bt);
23}
24#[cfg(not(feature = "backtrace"))]
25pub fn print_backtrace() {
26 eprintln!(
27 r#"enable the backtrace feature:
28 profile = {{ path = "../profile", features = [ "backtrace"] }}
29"#
30 );
31}
32
33thread_local!(static IN_SCOPE: RefCell<bool> = RefCell::new(false));
34
35/// Allows to check if the current code is withing some dynamic scope, can be
36/// useful during debugging to figure out why a function is called.
37pub struct Scope {
38 prev: bool,
39}
40
41impl Scope {
42 #[must_use]
43 pub fn enter() -> Scope {
44 let prev = IN_SCOPE.with(|slot| std::mem::replace(&mut *slot.borrow_mut(), true));
45 Scope { prev }
46 }
47 pub fn is_active() -> bool {
48 IN_SCOPE.with(|slot| *slot.borrow())
49 }
50}
51
52impl Drop for Scope {
53 fn drop(&mut self) {
54 IN_SCOPE.with(|slot| *slot.borrow_mut() = self.prev);
55 }
56}
57
58/// A wrapper around google_cpu_profiler.
59///
60/// Usage:
61/// 1. Install gpref_tools (https://github.com/gperftools/gperftools), probably packaged with your Linux distro.
62/// 2. Build with `cpu_profiler` feature.
63/// 3. Tun the code, the *raw* output would be in the `./out.profile` file.
64/// 4. Install pprof for visualization (https://github.com/google/pprof).
65/// 5. Bump sampling frequency to once per ms: `export CPUPROFILE_FREQUENCY=1000`
66/// 6. Use something like `pprof -svg target/release/rust-analyzer ./out.profile` to see the results.
67///
68/// For example, here's how I run profiling on NixOS:
69///
70/// ```bash
71/// $ nix-shell -p gperftools --run \
72/// 'cargo run --release -p rust-analyzer -- parse < ~/projects/rustbench/parser.rs > /dev/null'
73/// ```
74///
75/// See this diff for how to profile completions:
76///
77/// https://github.com/rust-analyzer/rust-analyzer/pull/5306
78#[derive(Debug)]
79pub struct CpuSpan {
80 _private: (),
81}
82
83#[must_use]
84pub fn cpu_span() -> CpuSpan {
85 #[cfg(feature = "cpu_profiler")]
86 {
87 google_cpu_profiler::start("./out.profile".as_ref())
88 }
89
90 #[cfg(not(feature = "cpu_profiler"))]
91 {
92 eprintln!("cpu_profiler feature is disabled")
93 }
94
95 CpuSpan { _private: () }
96}
97
98impl Drop for CpuSpan {
99 fn drop(&mut self) {
100 #[cfg(feature = "cpu_profiler")]
101 {
102 google_cpu_profiler::stop()
103 }
104 }
105}
106
107pub fn memory_usage() -> MemoryUsage {
108 MemoryUsage::current()
109}
diff --git a/crates/profile/src/memory_usage.rs b/crates/profile/src/memory_usage.rs
new file mode 100644
index 000000000..83390212a
--- /dev/null
+++ b/crates/profile/src/memory_usage.rs
@@ -0,0 +1,75 @@
1//! FIXME: write short doc here
2use std::fmt;
3
4use cfg_if::cfg_if;
5
6#[derive(Copy, Clone)]
7pub struct MemoryUsage {
8 pub allocated: Bytes,
9}
10
11impl fmt::Display for MemoryUsage {
12 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
13 write!(fmt, "{}", self.allocated)
14 }
15}
16
17impl std::ops::Sub for MemoryUsage {
18 type Output = MemoryUsage;
19 fn sub(self, rhs: MemoryUsage) -> MemoryUsage {
20 MemoryUsage { allocated: self.allocated - rhs.allocated }
21 }
22}
23
24impl MemoryUsage {
25 pub fn current() -> MemoryUsage {
26 cfg_if! {
27 if #[cfg(all(target_os = "linux", target_env = "gnu"))] {
28 // Note: This is incredibly slow.
29 let alloc = unsafe { libc::mallinfo() }.uordblks as isize;
30 MemoryUsage { allocated: Bytes(alloc) }
31 } else {
32 MemoryUsage { allocated: Bytes(0) }
33 }
34 }
35 }
36}
37
38#[derive(Default, PartialEq, Eq, PartialOrd, Ord, Hash, Clone, Copy)]
39pub struct Bytes(isize);
40
41impl Bytes {
42 pub fn megabytes(self) -> isize {
43 self.0 / 1024 / 1024
44 }
45}
46
47impl fmt::Display for Bytes {
48 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
49 let bytes = self.0;
50 let mut value = bytes;
51 let mut suffix = "b";
52 if value.abs() > 4096 {
53 value /= 1024;
54 suffix = "kb";
55 if value.abs() > 4096 {
56 value /= 1024;
57 suffix = "mb";
58 }
59 }
60 f.pad(&format!("{}{}", value, suffix))
61 }
62}
63
64impl std::ops::AddAssign<usize> for Bytes {
65 fn add_assign(&mut self, x: usize) {
66 self.0 += x as isize;
67 }
68}
69
70impl std::ops::Sub for Bytes {
71 type Output = Bytes;
72 fn sub(self, rhs: Bytes) -> Bytes {
73 Bytes(self.0 - rhs.0)
74 }
75}
diff --git a/crates/profile/src/stop_watch.rs b/crates/profile/src/stop_watch.rs
new file mode 100644
index 000000000..5e276190e
--- /dev/null
+++ b/crates/profile/src/stop_watch.rs
@@ -0,0 +1,86 @@
1//! Like `std::time::Instant`, but also measures memory & CPU cycles.
2use std::{
3 fmt,
4 time::{Duration, Instant},
5};
6
7use crate::MemoryUsage;
8
9pub struct StopWatch {
10 time: Instant,
11 #[cfg(target_os = "linux")]
12 counter: Option<perf_event::Counter>,
13 memory: Option<MemoryUsage>,
14}
15
16pub struct StopWatchSpan {
17 pub time: Duration,
18 pub instructions: Option<u64>,
19 pub memory: Option<MemoryUsage>,
20}
21
22impl StopWatch {
23 pub fn start() -> StopWatch {
24 #[cfg(target_os = "linux")]
25 let counter = {
26 let mut counter = perf_event::Builder::new()
27 .build()
28 .map_err(|err| eprintln!("Failed to create perf counter: {}", err))
29 .ok();
30 if let Some(counter) = &mut counter {
31 if let Err(err) = counter.enable() {
32 eprintln!("Failed to start perf counter: {}", err)
33 }
34 }
35 counter
36 };
37 let time = Instant::now();
38 StopWatch {
39 time,
40 #[cfg(target_os = "linux")]
41 counter,
42 memory: None,
43 }
44 }
45 pub fn memory(mut self, yes: bool) -> StopWatch {
46 if yes {
47 self.memory = Some(MemoryUsage::current());
48 }
49 self
50 }
51 pub fn elapsed(&mut self) -> StopWatchSpan {
52 let time = self.time.elapsed();
53
54 #[cfg(target_os = "linux")]
55 let instructions = self.counter.as_mut().and_then(|it| {
56 it.read().map_err(|err| eprintln!("Failed to read perf counter: {}", err)).ok()
57 });
58 #[cfg(not(target_os = "linux"))]
59 let instructions = None;
60
61 let memory = self.memory.map(|it| MemoryUsage::current() - it);
62 StopWatchSpan { time, instructions, memory }
63 }
64}
65
66impl fmt::Display for StopWatchSpan {
67 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
68 write!(f, "{:.2?}", self.time)?;
69 if let Some(mut instructions) = self.instructions {
70 let mut prefix = "";
71 if instructions > 10000 {
72 instructions /= 1000;
73 prefix = "k"
74 }
75 if instructions > 10000 {
76 instructions /= 1000;
77 prefix = "m"
78 }
79 write!(f, ", {}{}i", instructions, prefix)?;
80 }
81 if let Some(memory) = self.memory {
82 write!(f, ", {}", memory)?;
83 }
84 Ok(())
85 }
86}
diff --git a/crates/profile/src/tree.rs b/crates/profile/src/tree.rs
new file mode 100644
index 000000000..096f58511
--- /dev/null
+++ b/crates/profile/src/tree.rs
@@ -0,0 +1,84 @@
1//! A simple tree implementation which tries to not allocate all over the place.
2use std::ops;
3
4use arena::Arena;
5
6#[derive(Default)]
7pub struct Tree<T> {
8 nodes: Arena<Node<T>>,
9 current_path: Vec<(Idx<T>, Option<Idx<T>>)>,
10}
11
12pub type Idx<T> = arena::Idx<Node<T>>;
13
14impl<T> Tree<T> {
15 pub fn start(&mut self)
16 where
17 T: Default,
18 {
19 let me = self.nodes.alloc(Node::new(T::default()));
20 if let Some((parent, last_child)) = self.current_path.last_mut() {
21 let slot = match *last_child {
22 Some(last_child) => &mut self.nodes[last_child].next_sibling,
23 None => &mut self.nodes[*parent].first_child,
24 };
25 let prev = slot.replace(me);
26 assert!(prev.is_none());
27 *last_child = Some(me);
28 }
29
30 self.current_path.push((me, None));
31 }
32
33 pub fn finish(&mut self, data: T) {
34 let (me, _last_child) = self.current_path.pop().unwrap();
35 self.nodes[me].data = data;
36 }
37
38 pub fn root(&self) -> Option<Idx<T>> {
39 self.nodes.iter().next().map(|(idx, _)| idx)
40 }
41
42 pub fn children(&self, idx: Idx<T>) -> impl Iterator<Item = Idx<T>> + '_ {
43 NodeIter { nodes: &self.nodes, next: self.nodes[idx].first_child }
44 }
45 pub fn clear(&mut self) {
46 self.nodes.clear();
47 self.current_path.clear();
48 }
49}
50
51impl<T> ops::Index<Idx<T>> for Tree<T> {
52 type Output = T;
53 fn index(&self, index: Idx<T>) -> &T {
54 &self.nodes[index].data
55 }
56}
57
58pub struct Node<T> {
59 data: T,
60 first_child: Option<Idx<T>>,
61 next_sibling: Option<Idx<T>>,
62}
63
64impl<T> Node<T> {
65 fn new(data: T) -> Node<T> {
66 Node { data, first_child: None, next_sibling: None }
67 }
68}
69
70struct NodeIter<'a, T> {
71 nodes: &'a Arena<Node<T>>,
72 next: Option<Idx<T>>,
73}
74
75impl<'a, T> Iterator for NodeIter<'a, T> {
76 type Item = Idx<T>;
77
78 fn next(&mut self) -> Option<Idx<T>> {
79 self.next.map(|next| {
80 self.next = self.nodes[next].next_sibling;
81 next
82 })
83 }
84}