diff options
-rw-r--r-- | crates/ra_prof/src/hprof.rs | 391 | ||||
-rw-r--r-- | crates/ra_prof/src/lib.rs | 400 |
2 files changed, 398 insertions, 393 deletions
diff --git a/crates/ra_prof/src/hprof.rs b/crates/ra_prof/src/hprof.rs new file mode 100644 index 000000000..79268513d --- /dev/null +++ b/crates/ra_prof/src/hprof.rs | |||
@@ -0,0 +1,391 @@ | |||
1 | //! Simple hierarchical profiler | ||
2 | use std::{ | ||
3 | cell::RefCell, | ||
4 | collections::{BTreeMap, HashSet}, | ||
5 | io::{stderr, Write}, | ||
6 | sync::{ | ||
7 | atomic::{AtomicBool, Ordering}, | ||
8 | RwLock, | ||
9 | }, | ||
10 | time::{Duration, Instant}, | ||
11 | }; | ||
12 | |||
13 | use once_cell::sync::Lazy; | ||
14 | |||
15 | /// Filtering syntax | ||
16 | /// env RA_PROFILE=* // dump everything | ||
17 | /// env RA_PROFILE=foo|bar|baz // enabled only selected entries | ||
18 | /// env RA_PROFILE=*@3>10 // dump everything, up to depth 3, if it takes more than 10 ms | ||
19 | pub fn init() { | ||
20 | let spec = std::env::var("RA_PROFILE").unwrap_or_default(); | ||
21 | init_from(&spec); | ||
22 | } | ||
23 | |||
24 | pub fn init_from(spec: &str) { | ||
25 | let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) }; | ||
26 | set_filter(filter); | ||
27 | } | ||
28 | |||
29 | pub type Label = &'static str; | ||
30 | |||
31 | /// This function starts a profiling scope in the current execution stack with a given description. | ||
32 | /// It returns a Profile structure and measure elapsed time between this method invocation and Profile structure drop. | ||
33 | /// It supports nested profiling scopes in case when this function invoked multiple times at the execution stack. In this case the profiling information will be nested at the output. | ||
34 | /// Profiling information is being printed in the stderr. | ||
35 | /// | ||
36 | /// # Example | ||
37 | /// ``` | ||
38 | /// use ra_prof::{profile, set_filter, Filter}; | ||
39 | /// | ||
40 | /// let f = Filter::from_spec("profile1|profile2@2"); | ||
41 | /// set_filter(f); | ||
42 | /// profiling_function1(); | ||
43 | /// | ||
44 | /// fn profiling_function1() { | ||
45 | /// let _p = profile("profile1"); | ||
46 | /// profiling_function2(); | ||
47 | /// } | ||
48 | /// | ||
49 | /// fn profiling_function2() { | ||
50 | /// let _p = profile("profile2"); | ||
51 | /// } | ||
52 | /// ``` | ||
53 | /// This will print in the stderr the following: | ||
54 | /// ```text | ||
55 | /// 0ms - profile | ||
56 | /// 0ms - profile2 | ||
57 | /// ``` | ||
58 | pub fn profile(label: Label) -> Profiler { | ||
59 | assert!(!label.is_empty()); | ||
60 | if !PROFILING_ENABLED.load(Ordering::Relaxed) { | ||
61 | return Profiler { label: None, detail: None }; | ||
62 | } | ||
63 | |||
64 | PROFILE_STACK.with(|stack| { | ||
65 | let mut stack = stack.borrow_mut(); | ||
66 | if stack.starts.is_empty() { | ||
67 | if let Ok(f) = FILTER.try_read() { | ||
68 | if f.version > stack.filter_data.version { | ||
69 | stack.filter_data = f.clone(); | ||
70 | } | ||
71 | }; | ||
72 | } | ||
73 | if stack.starts.len() > stack.filter_data.depth { | ||
74 | return Profiler { label: None, detail: None }; | ||
75 | } | ||
76 | let allowed = &stack.filter_data.allowed; | ||
77 | if stack.starts.is_empty() && !allowed.is_empty() && !allowed.contains(label) { | ||
78 | return Profiler { label: None, detail: None }; | ||
79 | } | ||
80 | |||
81 | stack.starts.push(Instant::now()); | ||
82 | Profiler { label: Some(label), detail: None } | ||
83 | }) | ||
84 | } | ||
85 | |||
86 | pub struct Profiler { | ||
87 | label: Option<Label>, | ||
88 | detail: Option<String>, | ||
89 | } | ||
90 | |||
91 | impl Profiler { | ||
92 | pub fn detail(mut self, detail: impl FnOnce() -> String) -> Profiler { | ||
93 | if self.label.is_some() { | ||
94 | self.detail = Some(detail()) | ||
95 | } | ||
96 | self | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /// Set profiling filter. It specifies descriptions allowed to profile. | ||
101 | /// This is helpful when call stack has too many nested profiling scopes. | ||
102 | /// Additionally filter can specify maximum depth of profiling scopes nesting. | ||
103 | /// | ||
104 | /// #Example | ||
105 | /// ``` | ||
106 | /// use ra_prof::{set_filter, Filter}; | ||
107 | /// let f = Filter::from_spec("profile1|profile2@2"); | ||
108 | /// set_filter(f); | ||
109 | /// ``` | ||
110 | fn set_filter(f: Filter) { | ||
111 | PROFILING_ENABLED.store(f.depth > 0, Ordering::SeqCst); | ||
112 | let set: HashSet<_> = f.allowed.iter().cloned().collect(); | ||
113 | let mut old = FILTER.write().unwrap(); | ||
114 | let filter_data = FilterData { | ||
115 | depth: f.depth, | ||
116 | allowed: set, | ||
117 | longer_than: f.longer_than, | ||
118 | version: old.version + 1, | ||
119 | }; | ||
120 | *old = filter_data; | ||
121 | } | ||
122 | |||
123 | struct Filter { | ||
124 | depth: usize, | ||
125 | allowed: Vec<String>, | ||
126 | longer_than: Duration, | ||
127 | } | ||
128 | |||
129 | impl Filter { | ||
130 | fn from_spec(mut spec: &str) -> Filter { | ||
131 | let longer_than = if let Some(idx) = spec.rfind('>') { | ||
132 | let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than"); | ||
133 | spec = &spec[..idx]; | ||
134 | Duration::from_millis(longer_than) | ||
135 | } else { | ||
136 | Duration::new(0, 0) | ||
137 | }; | ||
138 | |||
139 | let depth = if let Some(idx) = spec.rfind('@') { | ||
140 | let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth"); | ||
141 | spec = &spec[..idx]; | ||
142 | depth | ||
143 | } else { | ||
144 | 999 | ||
145 | }; | ||
146 | let allowed = | ||
147 | if spec == "*" { Vec::new() } else { spec.split('|').map(String::from).collect() }; | ||
148 | Filter::new(depth, allowed, longer_than) | ||
149 | } | ||
150 | |||
151 | pub fn disabled() -> Filter { | ||
152 | Filter::new(0, Vec::new(), Duration::new(0, 0)) | ||
153 | } | ||
154 | |||
155 | pub fn new(depth: usize, allowed: Vec<String>, longer_than: Duration) -> Filter { | ||
156 | Filter { depth, allowed, longer_than } | ||
157 | } | ||
158 | } | ||
159 | |||
160 | struct ProfileStack { | ||
161 | starts: Vec<Instant>, | ||
162 | messages: Vec<Message>, | ||
163 | filter_data: FilterData, | ||
164 | } | ||
165 | |||
166 | struct Message { | ||
167 | level: usize, | ||
168 | duration: Duration, | ||
169 | label: Label, | ||
170 | detail: Option<String>, | ||
171 | } | ||
172 | |||
173 | impl ProfileStack { | ||
174 | fn new() -> ProfileStack { | ||
175 | ProfileStack { starts: Vec::new(), messages: Vec::new(), filter_data: Default::default() } | ||
176 | } | ||
177 | } | ||
178 | |||
179 | #[derive(Default, Clone)] | ||
180 | struct FilterData { | ||
181 | depth: usize, | ||
182 | version: usize, | ||
183 | allowed: HashSet<String>, | ||
184 | longer_than: Duration, | ||
185 | } | ||
186 | |||
187 | static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false); | ||
188 | |||
189 | static FILTER: Lazy<RwLock<FilterData>> = Lazy::new(Default::default); | ||
190 | |||
191 | thread_local!(static PROFILE_STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new())); | ||
192 | |||
193 | impl Drop for Profiler { | ||
194 | fn drop(&mut self) { | ||
195 | match self { | ||
196 | Profiler { label: Some(label), detail } => { | ||
197 | PROFILE_STACK.with(|stack| { | ||
198 | let mut stack = stack.borrow_mut(); | ||
199 | let start = stack.starts.pop().unwrap(); | ||
200 | let duration = start.elapsed(); | ||
201 | let level = stack.starts.len(); | ||
202 | stack.messages.push(Message { level, duration, label, detail: detail.take() }); | ||
203 | if level == 0 { | ||
204 | let stdout = stderr(); | ||
205 | let longer_than = stack.filter_data.longer_than; | ||
206 | // Convert to millis for comparison to avoid problems with rounding | ||
207 | // (otherwise we could print `0ms` despite user's `>0` filter when | ||
208 | // `duration` is just a few nanos). | ||
209 | if duration.as_millis() > longer_than.as_millis() { | ||
210 | print(&stack.messages, longer_than, &mut stdout.lock()); | ||
211 | } | ||
212 | stack.messages.clear(); | ||
213 | } | ||
214 | }); | ||
215 | } | ||
216 | Profiler { label: None, .. } => (), | ||
217 | } | ||
218 | } | ||
219 | } | ||
220 | |||
221 | fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) { | ||
222 | if msgs.is_empty() { | ||
223 | return; | ||
224 | } | ||
225 | let children_map = idx_to_children(msgs); | ||
226 | let root_idx = msgs.len() - 1; | ||
227 | print_for_idx(root_idx, &children_map, msgs, longer_than, out); | ||
228 | } | ||
229 | |||
230 | fn print_for_idx( | ||
231 | current_idx: usize, | ||
232 | children_map: &[Vec<usize>], | ||
233 | msgs: &[Message], | ||
234 | longer_than: Duration, | ||
235 | out: &mut impl Write, | ||
236 | ) { | ||
237 | let current = &msgs[current_idx]; | ||
238 | let current_indent = " ".repeat(current.level); | ||
239 | let detail = current.detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default(); | ||
240 | writeln!( | ||
241 | out, | ||
242 | "{}{:5}ms - {}{}", | ||
243 | current_indent, | ||
244 | current.duration.as_millis(), | ||
245 | current.label, | ||
246 | detail, | ||
247 | ) | ||
248 | .expect("printing profiling info"); | ||
249 | |||
250 | let longer_than_millis = longer_than.as_millis(); | ||
251 | let children_indices = &children_map[current_idx]; | ||
252 | let mut accounted_for = Duration::default(); | ||
253 | let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output. | ||
254 | |||
255 | for child_idx in children_indices.iter() { | ||
256 | let child = &msgs[*child_idx]; | ||
257 | if child.duration.as_millis() > longer_than_millis { | ||
258 | print_for_idx(*child_idx, children_map, msgs, longer_than, out); | ||
259 | } else { | ||
260 | let pair = short_children.entry(child.label).or_insert((Duration::default(), 0)); | ||
261 | pair.0 += child.duration; | ||
262 | pair.1 += 1; | ||
263 | } | ||
264 | accounted_for += child.duration; | ||
265 | } | ||
266 | |||
267 | for (child_msg, (duration, count)) in short_children.iter() { | ||
268 | let millis = duration.as_millis(); | ||
269 | writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count) | ||
270 | .expect("printing profiling info"); | ||
271 | } | ||
272 | |||
273 | let unaccounted_millis = (current.duration - accounted_for).as_millis(); | ||
274 | if !children_indices.is_empty() | ||
275 | && unaccounted_millis > 0 | ||
276 | && unaccounted_millis > longer_than_millis | ||
277 | { | ||
278 | writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted_millis) | ||
279 | .expect("printing profiling info"); | ||
280 | } | ||
281 | } | ||
282 | |||
283 | /// Returns a mapping from an index in the `msgs` to the vector with the indices of its children. | ||
284 | /// | ||
285 | /// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish. | ||
286 | /// In other words, a postorder of the call graph. In particular, the root is the last element of | ||
287 | /// `msgs`. | ||
288 | fn idx_to_children(msgs: &[Message]) -> Vec<Vec<usize>> { | ||
289 | // Initialize with the index of the root; `msgs` and `ancestors` should be never empty. | ||
290 | assert!(!msgs.is_empty()); | ||
291 | let mut ancestors = vec![msgs.len() - 1]; | ||
292 | let mut result: Vec<Vec<usize>> = vec![vec![]; msgs.len()]; | ||
293 | for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() { | ||
294 | // We need to find the parent of the current message, i.e., the last ancestor that has a | ||
295 | // level lower than the current message. | ||
296 | while msgs[*ancestors.last().unwrap()].level >= msg.level { | ||
297 | ancestors.pop(); | ||
298 | } | ||
299 | result[*ancestors.last().unwrap()].push(idx); | ||
300 | ancestors.push(idx); | ||
301 | } | ||
302 | // Note that above we visited all children from the last to the first one. Let's reverse vectors | ||
303 | // to get the more natural order where the first element is the first child. | ||
304 | for vec in result.iter_mut() { | ||
305 | vec.reverse(); | ||
306 | } | ||
307 | result | ||
308 | } | ||
309 | |||
310 | #[cfg(test)] | ||
311 | mod tests { | ||
312 | use super::*; | ||
313 | |||
314 | #[test] | ||
315 | fn test_basic_profile() { | ||
316 | let s = vec!["profile1".to_string(), "profile2".to_string()]; | ||
317 | let f = Filter::new(2, s, Duration::new(0, 0)); | ||
318 | set_filter(f); | ||
319 | profiling_function1(); | ||
320 | } | ||
321 | |||
322 | fn profiling_function1() { | ||
323 | let _p = profile("profile1"); | ||
324 | profiling_function2(); | ||
325 | } | ||
326 | |||
327 | fn profiling_function2() { | ||
328 | let _p = profile("profile2"); | ||
329 | } | ||
330 | |||
331 | #[test] | ||
332 | fn test_longer_than() { | ||
333 | let mut result = vec![]; | ||
334 | let msgs = vec![ | ||
335 | Message { level: 1, duration: Duration::from_nanos(3), label: "bar", detail: None }, | ||
336 | Message { level: 1, duration: Duration::from_nanos(2), label: "bar", detail: None }, | ||
337 | Message { level: 0, duration: Duration::from_millis(1), label: "foo", detail: None }, | ||
338 | ]; | ||
339 | print(&msgs, Duration::from_millis(0), &mut result); | ||
340 | // The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed | ||
341 | // when printing. | ||
342 | assert_eq!( | ||
343 | std::str::from_utf8(&result).unwrap(), | ||
344 | " 1ms - foo\n 0ms - bar (2 calls)\n" | ||
345 | ); | ||
346 | } | ||
347 | |||
348 | #[test] | ||
349 | fn test_unaccounted_for_topmost() { | ||
350 | let mut result = vec![]; | ||
351 | let msgs = vec![ | ||
352 | Message { level: 1, duration: Duration::from_millis(2), label: "bar", detail: None }, | ||
353 | Message { level: 0, duration: Duration::from_millis(5), label: "foo", detail: None }, | ||
354 | ]; | ||
355 | print(&msgs, Duration::from_millis(0), &mut result); | ||
356 | assert_eq!( | ||
357 | std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(), | ||
358 | vec![ | ||
359 | " 5ms - foo", | ||
360 | " 2ms - bar", | ||
361 | " 3ms - ???", | ||
362 | // Dummy comment to improve formatting | ||
363 | ] | ||
364 | ); | ||
365 | } | ||
366 | |||
367 | #[test] | ||
368 | fn test_unaccounted_for_multiple_levels() { | ||
369 | let mut result = vec![]; | ||
370 | let msgs = vec![ | ||
371 | Message { level: 2, duration: Duration::from_millis(3), label: "baz", detail: None }, | ||
372 | Message { level: 1, duration: Duration::from_millis(5), label: "bar", detail: None }, | ||
373 | Message { level: 2, duration: Duration::from_millis(2), label: "baz", detail: None }, | ||
374 | Message { level: 1, duration: Duration::from_millis(4), label: "bar", detail: None }, | ||
375 | Message { level: 0, duration: Duration::from_millis(9), label: "foo", detail: None }, | ||
376 | ]; | ||
377 | print(&msgs, Duration::from_millis(0), &mut result); | ||
378 | assert_eq!( | ||
379 | std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(), | ||
380 | vec![ | ||
381 | " 9ms - foo", | ||
382 | " 5ms - bar", | ||
383 | " 3ms - baz", | ||
384 | " 2ms - ???", | ||
385 | " 4ms - bar", | ||
386 | " 2ms - baz", | ||
387 | " 2ms - ???", | ||
388 | ] | ||
389 | ); | ||
390 | } | ||
391 | } | ||
diff --git a/crates/ra_prof/src/lib.rs b/crates/ra_prof/src/lib.rs index d95ad3107..e6d672ef5 100644 --- a/crates/ra_prof/src/lib.rs +++ b/crates/ra_prof/src/lib.rs | |||
@@ -1,24 +1,16 @@ | |||
1 | //! FIXME: write short doc here | 1 | //! A collection of tools for profiling rust-analyzer. |
2 | 2 | ||
3 | mod memory_usage; | 3 | mod memory_usage; |
4 | #[cfg(feature = "cpu_profiler")] | 4 | #[cfg(feature = "cpu_profiler")] |
5 | mod google_cpu_profiler; | 5 | mod google_cpu_profiler; |
6 | mod hprof; | ||
6 | 7 | ||
7 | use std::{ | 8 | use std::cell::RefCell; |
8 | cell::RefCell, | ||
9 | collections::BTreeMap, | ||
10 | collections::HashSet, | ||
11 | io::{stderr, Write}, | ||
12 | sync::{ | ||
13 | atomic::{AtomicBool, Ordering}, | ||
14 | RwLock, | ||
15 | }, | ||
16 | time::{Duration, Instant}, | ||
17 | }; | ||
18 | |||
19 | use once_cell::sync::Lazy; | ||
20 | 9 | ||
21 | pub use crate::memory_usage::{Bytes, MemoryUsage}; | 10 | pub use crate::{ |
11 | hprof::{init, init_from, profile}, | ||
12 | memory_usage::{Bytes, MemoryUsage}, | ||
13 | }; | ||
22 | 14 | ||
23 | // We use jemalloc mainly to get heap usage statistics, actual performance | 15 | // We use jemalloc mainly to get heap usage statistics, actual performance |
24 | // difference is not measures. | 16 | // difference is not measures. |
@@ -26,301 +18,6 @@ pub use crate::memory_usage::{Bytes, MemoryUsage}; | |||
26 | #[global_allocator] | 18 | #[global_allocator] |
27 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; | 19 | static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; |
28 | 20 | ||
29 | /// Filtering syntax | ||
30 | /// env RA_PROFILE=* // dump everything | ||
31 | /// env RA_PROFILE=foo|bar|baz // enabled only selected entries | ||
32 | /// env RA_PROFILE=*@3>10 // dump everything, up to depth 3, if it takes more than 10 ms | ||
33 | pub fn init() { | ||
34 | let spec = std::env::var("RA_PROFILE").unwrap_or_default(); | ||
35 | init_from(&spec); | ||
36 | } | ||
37 | |||
38 | pub fn init_from(spec: &str) { | ||
39 | let filter = if spec.is_empty() { Filter::disabled() } else { Filter::from_spec(spec) }; | ||
40 | set_filter(filter); | ||
41 | } | ||
42 | |||
43 | /// Set profiling filter. It specifies descriptions allowed to profile. | ||
44 | /// This is helpful when call stack has too many nested profiling scopes. | ||
45 | /// Additionally filter can specify maximum depth of profiling scopes nesting. | ||
46 | /// | ||
47 | /// #Example | ||
48 | /// ``` | ||
49 | /// use ra_prof::{set_filter, Filter}; | ||
50 | /// let f = Filter::from_spec("profile1|profile2@2"); | ||
51 | /// set_filter(f); | ||
52 | /// ``` | ||
53 | fn set_filter(f: Filter) { | ||
54 | PROFILING_ENABLED.store(f.depth > 0, Ordering::SeqCst); | ||
55 | let set: HashSet<_> = f.allowed.iter().cloned().collect(); | ||
56 | let mut old = FILTER.write().unwrap(); | ||
57 | let filter_data = FilterData { | ||
58 | depth: f.depth, | ||
59 | allowed: set, | ||
60 | longer_than: f.longer_than, | ||
61 | version: old.version + 1, | ||
62 | }; | ||
63 | *old = filter_data; | ||
64 | } | ||
65 | |||
66 | pub type Label = &'static str; | ||
67 | |||
68 | /// This function starts a profiling scope in the current execution stack with a given description. | ||
69 | /// It returns a Profile structure and measure elapsed time between this method invocation and Profile structure drop. | ||
70 | /// It supports nested profiling scopes in case when this function invoked multiple times at the execution stack. In this case the profiling information will be nested at the output. | ||
71 | /// Profiling information is being printed in the stderr. | ||
72 | /// | ||
73 | /// # Example | ||
74 | /// ``` | ||
75 | /// use ra_prof::{profile, set_filter, Filter}; | ||
76 | /// | ||
77 | /// let f = Filter::from_spec("profile1|profile2@2"); | ||
78 | /// set_filter(f); | ||
79 | /// profiling_function1(); | ||
80 | /// | ||
81 | /// fn profiling_function1() { | ||
82 | /// let _p = profile("profile1"); | ||
83 | /// profiling_function2(); | ||
84 | /// } | ||
85 | /// | ||
86 | /// fn profiling_function2() { | ||
87 | /// let _p = profile("profile2"); | ||
88 | /// } | ||
89 | /// ``` | ||
90 | /// This will print in the stderr the following: | ||
91 | /// ```text | ||
92 | /// 0ms - profile | ||
93 | /// 0ms - profile2 | ||
94 | /// ``` | ||
95 | pub fn profile(label: Label) -> Profiler { | ||
96 | assert!(!label.is_empty()); | ||
97 | if !PROFILING_ENABLED.load(Ordering::Relaxed) { | ||
98 | return Profiler { label: None, detail: None }; | ||
99 | } | ||
100 | |||
101 | PROFILE_STACK.with(|stack| { | ||
102 | let mut stack = stack.borrow_mut(); | ||
103 | if stack.starts.is_empty() { | ||
104 | if let Ok(f) = FILTER.try_read() { | ||
105 | if f.version > stack.filter_data.version { | ||
106 | stack.filter_data = f.clone(); | ||
107 | } | ||
108 | }; | ||
109 | } | ||
110 | if stack.starts.len() > stack.filter_data.depth { | ||
111 | return Profiler { label: None, detail: None }; | ||
112 | } | ||
113 | let allowed = &stack.filter_data.allowed; | ||
114 | if stack.starts.is_empty() && !allowed.is_empty() && !allowed.contains(label) { | ||
115 | return Profiler { label: None, detail: None }; | ||
116 | } | ||
117 | |||
118 | stack.starts.push(Instant::now()); | ||
119 | Profiler { label: Some(label), detail: None } | ||
120 | }) | ||
121 | } | ||
122 | |||
123 | pub struct Profiler { | ||
124 | label: Option<Label>, | ||
125 | detail: Option<String>, | ||
126 | } | ||
127 | |||
128 | impl Profiler { | ||
129 | pub fn detail(mut self, detail: impl FnOnce() -> String) -> Profiler { | ||
130 | if self.label.is_some() { | ||
131 | self.detail = Some(detail()) | ||
132 | } | ||
133 | self | ||
134 | } | ||
135 | } | ||
136 | |||
137 | struct Filter { | ||
138 | depth: usize, | ||
139 | allowed: Vec<String>, | ||
140 | longer_than: Duration, | ||
141 | } | ||
142 | |||
143 | impl Filter { | ||
144 | fn from_spec(mut spec: &str) -> Filter { | ||
145 | let longer_than = if let Some(idx) = spec.rfind('>') { | ||
146 | let longer_than = spec[idx + 1..].parse().expect("invalid profile longer_than"); | ||
147 | spec = &spec[..idx]; | ||
148 | Duration::from_millis(longer_than) | ||
149 | } else { | ||
150 | Duration::new(0, 0) | ||
151 | }; | ||
152 | |||
153 | let depth = if let Some(idx) = spec.rfind('@') { | ||
154 | let depth: usize = spec[idx + 1..].parse().expect("invalid profile depth"); | ||
155 | spec = &spec[..idx]; | ||
156 | depth | ||
157 | } else { | ||
158 | 999 | ||
159 | }; | ||
160 | let allowed = | ||
161 | if spec == "*" { Vec::new() } else { spec.split('|').map(String::from).collect() }; | ||
162 | Filter::new(depth, allowed, longer_than) | ||
163 | } | ||
164 | |||
165 | pub fn disabled() -> Filter { | ||
166 | Filter::new(0, Vec::new(), Duration::new(0, 0)) | ||
167 | } | ||
168 | |||
169 | pub fn new(depth: usize, allowed: Vec<String>, longer_than: Duration) -> Filter { | ||
170 | Filter { depth, allowed, longer_than } | ||
171 | } | ||
172 | } | ||
173 | |||
174 | struct ProfileStack { | ||
175 | starts: Vec<Instant>, | ||
176 | messages: Vec<Message>, | ||
177 | filter_data: FilterData, | ||
178 | } | ||
179 | |||
180 | struct Message { | ||
181 | level: usize, | ||
182 | duration: Duration, | ||
183 | label: Label, | ||
184 | detail: Option<String>, | ||
185 | } | ||
186 | |||
187 | impl ProfileStack { | ||
188 | fn new() -> ProfileStack { | ||
189 | ProfileStack { starts: Vec::new(), messages: Vec::new(), filter_data: Default::default() } | ||
190 | } | ||
191 | } | ||
192 | |||
193 | #[derive(Default, Clone)] | ||
194 | struct FilterData { | ||
195 | depth: usize, | ||
196 | version: usize, | ||
197 | allowed: HashSet<String>, | ||
198 | longer_than: Duration, | ||
199 | } | ||
200 | |||
201 | static PROFILING_ENABLED: AtomicBool = AtomicBool::new(false); | ||
202 | |||
203 | static FILTER: Lazy<RwLock<FilterData>> = Lazy::new(Default::default); | ||
204 | |||
205 | thread_local!(static PROFILE_STACK: RefCell<ProfileStack> = RefCell::new(ProfileStack::new())); | ||
206 | |||
207 | impl Drop for Profiler { | ||
208 | fn drop(&mut self) { | ||
209 | match self { | ||
210 | Profiler { label: Some(label), detail } => { | ||
211 | PROFILE_STACK.with(|stack| { | ||
212 | let mut stack = stack.borrow_mut(); | ||
213 | let start = stack.starts.pop().unwrap(); | ||
214 | let duration = start.elapsed(); | ||
215 | let level = stack.starts.len(); | ||
216 | stack.messages.push(Message { level, duration, label, detail: detail.take() }); | ||
217 | if level == 0 { | ||
218 | let stdout = stderr(); | ||
219 | let longer_than = stack.filter_data.longer_than; | ||
220 | // Convert to millis for comparison to avoid problems with rounding | ||
221 | // (otherwise we could print `0ms` despite user's `>0` filter when | ||
222 | // `duration` is just a few nanos). | ||
223 | if duration.as_millis() > longer_than.as_millis() { | ||
224 | print(&stack.messages, longer_than, &mut stdout.lock()); | ||
225 | } | ||
226 | stack.messages.clear(); | ||
227 | } | ||
228 | }); | ||
229 | } | ||
230 | Profiler { label: None, .. } => (), | ||
231 | } | ||
232 | } | ||
233 | } | ||
234 | |||
235 | fn print(msgs: &[Message], longer_than: Duration, out: &mut impl Write) { | ||
236 | if msgs.is_empty() { | ||
237 | return; | ||
238 | } | ||
239 | let children_map = idx_to_children(msgs); | ||
240 | let root_idx = msgs.len() - 1; | ||
241 | print_for_idx(root_idx, &children_map, msgs, longer_than, out); | ||
242 | } | ||
243 | |||
244 | fn print_for_idx( | ||
245 | current_idx: usize, | ||
246 | children_map: &[Vec<usize>], | ||
247 | msgs: &[Message], | ||
248 | longer_than: Duration, | ||
249 | out: &mut impl Write, | ||
250 | ) { | ||
251 | let current = &msgs[current_idx]; | ||
252 | let current_indent = " ".repeat(current.level); | ||
253 | let detail = current.detail.as_ref().map(|it| format!(" @ {}", it)).unwrap_or_default(); | ||
254 | writeln!( | ||
255 | out, | ||
256 | "{}{:5}ms - {}{}", | ||
257 | current_indent, | ||
258 | current.duration.as_millis(), | ||
259 | current.label, | ||
260 | detail, | ||
261 | ) | ||
262 | .expect("printing profiling info"); | ||
263 | |||
264 | let longer_than_millis = longer_than.as_millis(); | ||
265 | let children_indices = &children_map[current_idx]; | ||
266 | let mut accounted_for = Duration::default(); | ||
267 | let mut short_children = BTreeMap::new(); // Use `BTreeMap` to get deterministic output. | ||
268 | |||
269 | for child_idx in children_indices.iter() { | ||
270 | let child = &msgs[*child_idx]; | ||
271 | if child.duration.as_millis() > longer_than_millis { | ||
272 | print_for_idx(*child_idx, children_map, msgs, longer_than, out); | ||
273 | } else { | ||
274 | let pair = short_children.entry(child.label).or_insert((Duration::default(), 0)); | ||
275 | pair.0 += child.duration; | ||
276 | pair.1 += 1; | ||
277 | } | ||
278 | accounted_for += child.duration; | ||
279 | } | ||
280 | |||
281 | for (child_msg, (duration, count)) in short_children.iter() { | ||
282 | let millis = duration.as_millis(); | ||
283 | writeln!(out, " {}{:5}ms - {} ({} calls)", current_indent, millis, child_msg, count) | ||
284 | .expect("printing profiling info"); | ||
285 | } | ||
286 | |||
287 | let unaccounted_millis = (current.duration - accounted_for).as_millis(); | ||
288 | if !children_indices.is_empty() | ||
289 | && unaccounted_millis > 0 | ||
290 | && unaccounted_millis > longer_than_millis | ||
291 | { | ||
292 | writeln!(out, " {}{:5}ms - ???", current_indent, unaccounted_millis) | ||
293 | .expect("printing profiling info"); | ||
294 | } | ||
295 | } | ||
296 | |||
297 | /// Returns a mapping from an index in the `msgs` to the vector with the indices of its children. | ||
298 | /// | ||
299 | /// This assumes that the entries in `msgs` are in the order of when the calls to `profile` finish. | ||
300 | /// In other words, a postorder of the call graph. In particular, the root is the last element of | ||
301 | /// `msgs`. | ||
302 | fn idx_to_children(msgs: &[Message]) -> Vec<Vec<usize>> { | ||
303 | // Initialize with the index of the root; `msgs` and `ancestors` should be never empty. | ||
304 | assert!(!msgs.is_empty()); | ||
305 | let mut ancestors = vec![msgs.len() - 1]; | ||
306 | let mut result: Vec<Vec<usize>> = vec![vec![]; msgs.len()]; | ||
307 | for (idx, msg) in msgs[..msgs.len() - 1].iter().enumerate().rev() { | ||
308 | // We need to find the parent of the current message, i.e., the last ancestor that has a | ||
309 | // level lower than the current message. | ||
310 | while msgs[*ancestors.last().unwrap()].level >= msg.level { | ||
311 | ancestors.pop(); | ||
312 | } | ||
313 | result[*ancestors.last().unwrap()].push(idx); | ||
314 | ancestors.push(idx); | ||
315 | } | ||
316 | // Note that above we visited all children from the last to the first one. Let's reverse vectors | ||
317 | // to get the more natural order where the first element is the first child. | ||
318 | for vec in result.iter_mut() { | ||
319 | vec.reverse(); | ||
320 | } | ||
321 | result | ||
322 | } | ||
323 | |||
324 | /// Prints backtrace to stderr, useful for debugging. | 21 | /// Prints backtrace to stderr, useful for debugging. |
325 | #[cfg(feature = "backtrace")] | 22 | #[cfg(feature = "backtrace")] |
326 | pub fn print_backtrace() { | 23 | pub fn print_backtrace() { |
@@ -406,86 +103,3 @@ impl Drop for CpuProfiler { | |||
406 | pub fn memory_usage() -> MemoryUsage { | 103 | pub fn memory_usage() -> MemoryUsage { |
407 | MemoryUsage::current() | 104 | MemoryUsage::current() |
408 | } | 105 | } |
409 | |||
410 | #[cfg(test)] | ||
411 | mod tests { | ||
412 | use super::*; | ||
413 | |||
414 | #[test] | ||
415 | fn test_basic_profile() { | ||
416 | let s = vec!["profile1".to_string(), "profile2".to_string()]; | ||
417 | let f = Filter::new(2, s, Duration::new(0, 0)); | ||
418 | set_filter(f); | ||
419 | profiling_function1(); | ||
420 | } | ||
421 | |||
422 | fn profiling_function1() { | ||
423 | let _p = profile("profile1"); | ||
424 | profiling_function2(); | ||
425 | } | ||
426 | |||
427 | fn profiling_function2() { | ||
428 | let _p = profile("profile2"); | ||
429 | } | ||
430 | |||
431 | #[test] | ||
432 | fn test_longer_than() { | ||
433 | let mut result = vec![]; | ||
434 | let msgs = vec![ | ||
435 | Message { level: 1, duration: Duration::from_nanos(3), label: "bar", detail: None }, | ||
436 | Message { level: 1, duration: Duration::from_nanos(2), label: "bar", detail: None }, | ||
437 | Message { level: 0, duration: Duration::from_millis(1), label: "foo", detail: None }, | ||
438 | ]; | ||
439 | print(&msgs, Duration::from_millis(0), &mut result); | ||
440 | // The calls to `bar` are so short that they'll be rounded to 0ms and should get collapsed | ||
441 | // when printing. | ||
442 | assert_eq!( | ||
443 | std::str::from_utf8(&result).unwrap(), | ||
444 | " 1ms - foo\n 0ms - bar (2 calls)\n" | ||
445 | ); | ||
446 | } | ||
447 | |||
448 | #[test] | ||
449 | fn test_unaccounted_for_topmost() { | ||
450 | let mut result = vec![]; | ||
451 | let msgs = vec![ | ||
452 | Message { level: 1, duration: Duration::from_millis(2), label: "bar", detail: None }, | ||
453 | Message { level: 0, duration: Duration::from_millis(5), label: "foo", detail: None }, | ||
454 | ]; | ||
455 | print(&msgs, Duration::from_millis(0), &mut result); | ||
456 | assert_eq!( | ||
457 | std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(), | ||
458 | vec![ | ||
459 | " 5ms - foo", | ||
460 | " 2ms - bar", | ||
461 | " 3ms - ???", | ||
462 | // Dummy comment to improve formatting | ||
463 | ] | ||
464 | ); | ||
465 | } | ||
466 | |||
467 | #[test] | ||
468 | fn test_unaccounted_for_multiple_levels() { | ||
469 | let mut result = vec![]; | ||
470 | let msgs = vec![ | ||
471 | Message { level: 2, duration: Duration::from_millis(3), label: "baz", detail: None }, | ||
472 | Message { level: 1, duration: Duration::from_millis(5), label: "bar", detail: None }, | ||
473 | Message { level: 2, duration: Duration::from_millis(2), label: "baz", detail: None }, | ||
474 | Message { level: 1, duration: Duration::from_millis(4), label: "bar", detail: None }, | ||
475 | Message { level: 0, duration: Duration::from_millis(9), label: "foo", detail: None }, | ||
476 | ]; | ||
477 | print(&msgs, Duration::from_millis(0), &mut result); | ||
478 | assert_eq!( | ||
479 | std::str::from_utf8(&result).unwrap().lines().collect::<Vec<_>>(), | ||
480 | vec![ | ||
481 | " 9ms - foo", | ||
482 | " 5ms - bar", | ||
483 | " 3ms - baz", | ||
484 | " 2ms - ???", | ||
485 | " 4ms - bar", | ||
486 | " 2ms - baz", | ||
487 | " 2ms - ???", | ||
488 | ] | ||
489 | ); | ||
490 | } | ||
491 | } | ||