diff options
Diffstat (limited to 'crates/ide_db/src/symbol_index.rs')
-rw-r--r-- | crates/ide_db/src/symbol_index.rs | 429 |
1 files changed, 429 insertions, 0 deletions
diff --git a/crates/ide_db/src/symbol_index.rs b/crates/ide_db/src/symbol_index.rs new file mode 100644 index 000000000..654df898e --- /dev/null +++ b/crates/ide_db/src/symbol_index.rs | |||
@@ -0,0 +1,429 @@ | |||
1 | //! This module handles fuzzy-searching of functions, structs and other symbols | ||
2 | //! by name across the whole workspace and dependencies. | ||
3 | //! | ||
4 | //! It works by building an incrementally-updated text-search index of all | ||
5 | //! symbols. The backbone of the index is the **awesome** `fst` crate by | ||
6 | //! @BurntSushi. | ||
7 | //! | ||
8 | //! In a nutshell, you give a set of strings to `fst`, and it builds a | ||
9 | //! finite state machine describing this set of strings. The strings which | ||
10 | //! could fuzzy-match a pattern can also be described by a finite state machine. | ||
11 | //! What is freaking cool is that you can now traverse both state machines in | ||
12 | //! lock-step to enumerate the strings which are both in the input set and | ||
13 | //! fuzz-match the query. Or, more formally, given two languages described by | ||
14 | //! FSTs, one can build a product FST which describes the intersection of the | ||
15 | //! languages. | ||
16 | //! | ||
17 | //! `fst` does not support cheap updating of the index, but it supports unioning | ||
18 | //! of state machines. So, to account for changing source code, we build an FST | ||
19 | //! for each library (which is assumed to never change) and an FST for each Rust | ||
20 | //! file in the current workspace, and run a query against the union of all | ||
21 | //! those FSTs. | ||
22 | |||
23 | use std::{ | ||
24 | cmp::Ordering, | ||
25 | fmt, | ||
26 | hash::{Hash, Hasher}, | ||
27 | mem, | ||
28 | sync::Arc, | ||
29 | }; | ||
30 | |||
31 | use base_db::{ | ||
32 | salsa::{self, ParallelDatabase}, | ||
33 | CrateId, FileId, SourceDatabaseExt, SourceRootId, | ||
34 | }; | ||
35 | use fst::{self, Streamer}; | ||
36 | use hir::db::DefDatabase; | ||
37 | use rayon::prelude::*; | ||
38 | use rustc_hash::{FxHashMap, FxHashSet}; | ||
39 | use syntax::{ | ||
40 | ast::{self, NameOwner}, | ||
41 | match_ast, AstNode, Parse, SmolStr, SourceFile, | ||
42 | SyntaxKind::{self, *}, | ||
43 | SyntaxNode, SyntaxNodePtr, TextRange, WalkEvent, | ||
44 | }; | ||
45 | |||
46 | use crate::RootDatabase; | ||
47 | |||
48 | #[derive(Debug)] | ||
49 | pub struct Query { | ||
50 | query: String, | ||
51 | lowercased: String, | ||
52 | only_types: bool, | ||
53 | libs: bool, | ||
54 | exact: bool, | ||
55 | limit: usize, | ||
56 | } | ||
57 | |||
58 | impl Query { | ||
59 | pub fn new(query: String) -> Query { | ||
60 | let lowercased = query.to_lowercase(); | ||
61 | Query { | ||
62 | query, | ||
63 | lowercased, | ||
64 | only_types: false, | ||
65 | libs: false, | ||
66 | exact: false, | ||
67 | limit: usize::max_value(), | ||
68 | } | ||
69 | } | ||
70 | |||
71 | pub fn only_types(&mut self) { | ||
72 | self.only_types = true; | ||
73 | } | ||
74 | |||
75 | pub fn libs(&mut self) { | ||
76 | self.libs = true; | ||
77 | } | ||
78 | |||
79 | pub fn exact(&mut self) { | ||
80 | self.exact = true; | ||
81 | } | ||
82 | |||
83 | pub fn limit(&mut self, limit: usize) { | ||
84 | self.limit = limit | ||
85 | } | ||
86 | } | ||
87 | |||
88 | #[salsa::query_group(SymbolsDatabaseStorage)] | ||
89 | pub trait SymbolsDatabase: hir::db::HirDatabase + SourceDatabaseExt { | ||
90 | fn file_symbols(&self, file_id: FileId) -> Arc<SymbolIndex>; | ||
91 | fn library_symbols(&self) -> Arc<FxHashMap<SourceRootId, SymbolIndex>>; | ||
92 | /// The set of "local" (that is, from the current workspace) roots. | ||
93 | /// Files in local roots are assumed to change frequently. | ||
94 | #[salsa::input] | ||
95 | fn local_roots(&self) -> Arc<FxHashSet<SourceRootId>>; | ||
96 | /// The set of roots for crates.io libraries. | ||
97 | /// Files in libraries are assumed to never change. | ||
98 | #[salsa::input] | ||
99 | fn library_roots(&self) -> Arc<FxHashSet<SourceRootId>>; | ||
100 | } | ||
101 | |||
102 | fn library_symbols(db: &dyn SymbolsDatabase) -> Arc<FxHashMap<SourceRootId, SymbolIndex>> { | ||
103 | let _p = profile::span("library_symbols"); | ||
104 | |||
105 | let roots = db.library_roots(); | ||
106 | let res = roots | ||
107 | .iter() | ||
108 | .map(|&root_id| { | ||
109 | let root = db.source_root(root_id); | ||
110 | let files = root | ||
111 | .iter() | ||
112 | .map(|it| (it, SourceDatabaseExt::file_text(db, it))) | ||
113 | .collect::<Vec<_>>(); | ||
114 | let symbol_index = SymbolIndex::for_files( | ||
115 | files.into_par_iter().map(|(file, text)| (file, SourceFile::parse(&text))), | ||
116 | ); | ||
117 | (root_id, symbol_index) | ||
118 | }) | ||
119 | .collect(); | ||
120 | Arc::new(res) | ||
121 | } | ||
122 | |||
123 | fn file_symbols(db: &dyn SymbolsDatabase, file_id: FileId) -> Arc<SymbolIndex> { | ||
124 | db.check_canceled(); | ||
125 | let parse = db.parse(file_id); | ||
126 | |||
127 | let symbols = source_file_to_file_symbols(&parse.tree(), file_id); | ||
128 | |||
129 | // FIXME: add macros here | ||
130 | |||
131 | Arc::new(SymbolIndex::new(symbols)) | ||
132 | } | ||
133 | |||
134 | /// Need to wrap Snapshot to provide `Clone` impl for `map_with` | ||
135 | struct Snap<DB>(DB); | ||
136 | impl<DB: ParallelDatabase> Clone for Snap<salsa::Snapshot<DB>> { | ||
137 | fn clone(&self) -> Snap<salsa::Snapshot<DB>> { | ||
138 | Snap(self.0.snapshot()) | ||
139 | } | ||
140 | } | ||
141 | |||
142 | // Feature: Workspace Symbol | ||
143 | // | ||
144 | // Uses fuzzy-search to find types, modules and functions by name across your | ||
145 | // project and dependencies. This is **the** most useful feature, which improves code | ||
146 | // navigation tremendously. It mostly works on top of the built-in LSP | ||
147 | // functionality, however `#` and `*` symbols can be used to narrow down the | ||
148 | // search. Specifically, | ||
149 | // | ||
150 | // - `Foo` searches for `Foo` type in the current workspace | ||
151 | // - `foo#` searches for `foo` function in the current workspace | ||
152 | // - `Foo*` searches for `Foo` type among dependencies, including `stdlib` | ||
153 | // - `foo#*` searches for `foo` function among dependencies | ||
154 | // | ||
155 | // That is, `#` switches from "types" to all symbols, `*` switches from the current | ||
156 | // workspace to dependencies. | ||
157 | // | ||
158 | // |=== | ||
159 | // | Editor | Shortcut | ||
160 | // | ||
161 | // | VS Code | kbd:[Ctrl+T] | ||
162 | // |=== | ||
163 | pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> { | ||
164 | let _p = profile::span("world_symbols").detail(|| query.query.clone()); | ||
165 | |||
166 | let tmp1; | ||
167 | let tmp2; | ||
168 | let buf: Vec<&SymbolIndex> = if query.libs { | ||
169 | tmp1 = db.library_symbols(); | ||
170 | tmp1.values().collect() | ||
171 | } else { | ||
172 | let mut files = Vec::new(); | ||
173 | for &root in db.local_roots().iter() { | ||
174 | let sr = db.source_root(root); | ||
175 | files.extend(sr.iter()) | ||
176 | } | ||
177 | |||
178 | let snap = Snap(db.snapshot()); | ||
179 | tmp2 = files | ||
180 | .par_iter() | ||
181 | .map_with(snap, |db, &file_id| db.0.file_symbols(file_id)) | ||
182 | .collect::<Vec<_>>(); | ||
183 | tmp2.iter().map(|it| &**it).collect() | ||
184 | }; | ||
185 | query.search(&buf) | ||
186 | } | ||
187 | |||
188 | pub fn crate_symbols(db: &RootDatabase, krate: CrateId, query: Query) -> Vec<FileSymbol> { | ||
189 | // FIXME(#4842): This now depends on CrateDefMap, why not build the entire symbol index from | ||
190 | // that instead? | ||
191 | |||
192 | let def_map = db.crate_def_map(krate); | ||
193 | let mut files = Vec::new(); | ||
194 | let mut modules = vec![def_map.root]; | ||
195 | while let Some(module) = modules.pop() { | ||
196 | let data = &def_map[module]; | ||
197 | files.extend(data.origin.file_id()); | ||
198 | modules.extend(data.children.values()); | ||
199 | } | ||
200 | |||
201 | let snap = Snap(db.snapshot()); | ||
202 | |||
203 | let buf = files | ||
204 | .par_iter() | ||
205 | .map_with(snap, |db, &file_id| db.0.file_symbols(file_id)) | ||
206 | .collect::<Vec<_>>(); | ||
207 | let buf = buf.iter().map(|it| &**it).collect::<Vec<_>>(); | ||
208 | |||
209 | query.search(&buf) | ||
210 | } | ||
211 | |||
212 | pub fn index_resolve(db: &RootDatabase, name_ref: &ast::NameRef) -> Vec<FileSymbol> { | ||
213 | let name = name_ref.text(); | ||
214 | let mut query = Query::new(name.to_string()); | ||
215 | query.exact(); | ||
216 | query.limit(4); | ||
217 | world_symbols(db, query) | ||
218 | } | ||
219 | |||
220 | #[derive(Default)] | ||
221 | pub struct SymbolIndex { | ||
222 | symbols: Vec<FileSymbol>, | ||
223 | map: fst::Map<Vec<u8>>, | ||
224 | } | ||
225 | |||
226 | impl fmt::Debug for SymbolIndex { | ||
227 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
228 | f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish() | ||
229 | } | ||
230 | } | ||
231 | |||
232 | impl PartialEq for SymbolIndex { | ||
233 | fn eq(&self, other: &SymbolIndex) -> bool { | ||
234 | self.symbols == other.symbols | ||
235 | } | ||
236 | } | ||
237 | |||
238 | impl Eq for SymbolIndex {} | ||
239 | |||
240 | impl Hash for SymbolIndex { | ||
241 | fn hash<H: Hasher>(&self, hasher: &mut H) { | ||
242 | self.symbols.hash(hasher) | ||
243 | } | ||
244 | } | ||
245 | |||
246 | impl SymbolIndex { | ||
247 | fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex { | ||
248 | fn cmp(lhs: &FileSymbol, rhs: &FileSymbol) -> Ordering { | ||
249 | let lhs_chars = lhs.name.chars().map(|c| c.to_ascii_lowercase()); | ||
250 | let rhs_chars = rhs.name.chars().map(|c| c.to_ascii_lowercase()); | ||
251 | lhs_chars.cmp(rhs_chars) | ||
252 | } | ||
253 | |||
254 | symbols.par_sort_by(cmp); | ||
255 | |||
256 | let mut builder = fst::MapBuilder::memory(); | ||
257 | |||
258 | let mut last_batch_start = 0; | ||
259 | |||
260 | for idx in 0..symbols.len() { | ||
261 | if let Some(next_symbol) = symbols.get(idx + 1) { | ||
262 | if cmp(&symbols[last_batch_start], next_symbol) == Ordering::Equal { | ||
263 | continue; | ||
264 | } | ||
265 | } | ||
266 | |||
267 | let start = last_batch_start; | ||
268 | let end = idx + 1; | ||
269 | last_batch_start = end; | ||
270 | |||
271 | let key = symbols[start].name.as_str().to_ascii_lowercase(); | ||
272 | let value = SymbolIndex::range_to_map_value(start, end); | ||
273 | |||
274 | builder.insert(key, value).unwrap(); | ||
275 | } | ||
276 | |||
277 | let map = fst::Map::new(builder.into_inner().unwrap()).unwrap(); | ||
278 | SymbolIndex { symbols, map } | ||
279 | } | ||
280 | |||
281 | pub fn len(&self) -> usize { | ||
282 | self.symbols.len() | ||
283 | } | ||
284 | |||
285 | pub fn memory_size(&self) -> usize { | ||
286 | self.map.as_fst().size() + self.symbols.len() * mem::size_of::<FileSymbol>() | ||
287 | } | ||
288 | |||
289 | pub(crate) fn for_files( | ||
290 | files: impl ParallelIterator<Item = (FileId, Parse<ast::SourceFile>)>, | ||
291 | ) -> SymbolIndex { | ||
292 | let symbols = files | ||
293 | .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id)) | ||
294 | .collect::<Vec<_>>(); | ||
295 | SymbolIndex::new(symbols) | ||
296 | } | ||
297 | |||
298 | fn range_to_map_value(start: usize, end: usize) -> u64 { | ||
299 | debug_assert![start <= (std::u32::MAX as usize)]; | ||
300 | debug_assert![end <= (std::u32::MAX as usize)]; | ||
301 | |||
302 | ((start as u64) << 32) | end as u64 | ||
303 | } | ||
304 | |||
305 | fn map_value_to_range(value: u64) -> (usize, usize) { | ||
306 | let end = value as u32 as usize; | ||
307 | let start = (value >> 32) as usize; | ||
308 | (start, end) | ||
309 | } | ||
310 | } | ||
311 | |||
312 | impl Query { | ||
313 | pub(crate) fn search(self, indices: &[&SymbolIndex]) -> Vec<FileSymbol> { | ||
314 | let mut op = fst::map::OpBuilder::new(); | ||
315 | for file_symbols in indices.iter() { | ||
316 | let automaton = fst::automaton::Subsequence::new(&self.lowercased); | ||
317 | op = op.add(file_symbols.map.search(automaton)) | ||
318 | } | ||
319 | let mut stream = op.union(); | ||
320 | let mut res = Vec::new(); | ||
321 | while let Some((_, indexed_values)) = stream.next() { | ||
322 | for indexed_value in indexed_values { | ||
323 | let symbol_index = &indices[indexed_value.index]; | ||
324 | let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value); | ||
325 | |||
326 | for symbol in &symbol_index.symbols[start..end] { | ||
327 | if self.only_types && !is_type(symbol.kind) { | ||
328 | continue; | ||
329 | } | ||
330 | if self.exact && symbol.name != self.query { | ||
331 | continue; | ||
332 | } | ||
333 | |||
334 | res.push(symbol.clone()); | ||
335 | if res.len() >= self.limit { | ||
336 | return res; | ||
337 | } | ||
338 | } | ||
339 | } | ||
340 | } | ||
341 | res | ||
342 | } | ||
343 | } | ||
344 | |||
345 | fn is_type(kind: SyntaxKind) -> bool { | ||
346 | matches!(kind, STRUCT | ENUM | TRAIT | TYPE_ALIAS) | ||
347 | } | ||
348 | |||
349 | /// The actual data that is stored in the index. It should be as compact as | ||
350 | /// possible. | ||
351 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
352 | pub struct FileSymbol { | ||
353 | pub file_id: FileId, | ||
354 | pub name: SmolStr, | ||
355 | pub kind: SyntaxKind, | ||
356 | pub range: TextRange, | ||
357 | pub ptr: SyntaxNodePtr, | ||
358 | pub name_range: Option<TextRange>, | ||
359 | pub container_name: Option<SmolStr>, | ||
360 | } | ||
361 | |||
362 | fn source_file_to_file_symbols(source_file: &SourceFile, file_id: FileId) -> Vec<FileSymbol> { | ||
363 | let mut symbols = Vec::new(); | ||
364 | let mut stack = Vec::new(); | ||
365 | |||
366 | for event in source_file.syntax().preorder() { | ||
367 | match event { | ||
368 | WalkEvent::Enter(node) => { | ||
369 | if let Some(mut symbol) = to_file_symbol(&node, file_id) { | ||
370 | symbol.container_name = stack.last().cloned(); | ||
371 | |||
372 | stack.push(symbol.name.clone()); | ||
373 | symbols.push(symbol); | ||
374 | } | ||
375 | } | ||
376 | |||
377 | WalkEvent::Leave(node) => { | ||
378 | if to_symbol(&node).is_some() { | ||
379 | stack.pop(); | ||
380 | } | ||
381 | } | ||
382 | } | ||
383 | } | ||
384 | |||
385 | symbols | ||
386 | } | ||
387 | |||
388 | fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
389 | fn decl<N: NameOwner>(node: N) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
390 | let name = node.name()?; | ||
391 | let name_range = name.syntax().text_range(); | ||
392 | let name = name.text().clone(); | ||
393 | let ptr = SyntaxNodePtr::new(node.syntax()); | ||
394 | |||
395 | Some((name, ptr, name_range)) | ||
396 | } | ||
397 | match_ast! { | ||
398 | match node { | ||
399 | ast::Fn(it) => decl(it), | ||
400 | ast::Struct(it) => decl(it), | ||
401 | ast::Enum(it) => decl(it), | ||
402 | ast::Trait(it) => decl(it), | ||
403 | ast::Module(it) => decl(it), | ||
404 | ast::TypeAlias(it) => decl(it), | ||
405 | ast::Const(it) => decl(it), | ||
406 | ast::Static(it) => decl(it), | ||
407 | ast::MacroCall(it) => { | ||
408 | if it.is_macro_rules().is_some() { | ||
409 | decl(it) | ||
410 | } else { | ||
411 | None | ||
412 | } | ||
413 | }, | ||
414 | _ => None, | ||
415 | } | ||
416 | } | ||
417 | } | ||
418 | |||
419 | fn to_file_symbol(node: &SyntaxNode, file_id: FileId) -> Option<FileSymbol> { | ||
420 | to_symbol(node).map(move |(name, ptr, name_range)| FileSymbol { | ||
421 | name, | ||
422 | kind: node.kind(), | ||
423 | range: node.text_range(), | ||
424 | ptr, | ||
425 | file_id, | ||
426 | name_range: Some(name_range), | ||
427 | container_name: None, | ||
428 | }) | ||
429 | } | ||