aboutsummaryrefslogtreecommitdiff
path: root/crates/ide_db/src/symbol_index.rs
diff options
context:
space:
mode:
authorZac Pullar-Strecker <[email protected]>2020-08-24 10:19:53 +0100
committerZac Pullar-Strecker <[email protected]>2020-08-24 10:20:13 +0100
commit7bbca7a1b3f9293d2f5cc5745199bc5f8396f2f0 (patch)
treebdb47765991cb973b2cd5481a088fac636bd326c /crates/ide_db/src/symbol_index.rs
parentca464650eeaca6195891199a93f4f76cf3e7e697 (diff)
parente65d48d1fb3d4d91d9dc1148a7a836ff5c9a3c87 (diff)
Merge remote-tracking branch 'upstream/master' into 503-hover-doc-links
Diffstat (limited to 'crates/ide_db/src/symbol_index.rs')
-rw-r--r--crates/ide_db/src/symbol_index.rs429
1 files changed, 429 insertions, 0 deletions
diff --git a/crates/ide_db/src/symbol_index.rs b/crates/ide_db/src/symbol_index.rs
new file mode 100644
index 000000000..654df898e
--- /dev/null
+++ b/crates/ide_db/src/symbol_index.rs
@@ -0,0 +1,429 @@
1//! This module handles fuzzy-searching of functions, structs and other symbols
2//! by name across the whole workspace and dependencies.
3//!
4//! It works by building an incrementally-updated text-search index of all
5//! symbols. The backbone of the index is the **awesome** `fst` crate by
6//! @BurntSushi.
7//!
8//! In a nutshell, you give a set of strings to `fst`, and it builds a
9//! finite state machine describing this set of strings. The strings which
10//! could fuzzy-match a pattern can also be described by a finite state machine.
11//! What is freaking cool is that you can now traverse both state machines in
12//! lock-step to enumerate the strings which are both in the input set and
13//! fuzz-match the query. Or, more formally, given two languages described by
14//! FSTs, one can build a product FST which describes the intersection of the
15//! languages.
16//!
17//! `fst` does not support cheap updating of the index, but it supports unioning
18//! of state machines. So, to account for changing source code, we build an FST
19//! for each library (which is assumed to never change) and an FST for each Rust
20//! file in the current workspace, and run a query against the union of all
21//! those FSTs.
22
23use std::{
24 cmp::Ordering,
25 fmt,
26 hash::{Hash, Hasher},
27 mem,
28 sync::Arc,
29};
30
31use base_db::{
32 salsa::{self, ParallelDatabase},
33 CrateId, FileId, SourceDatabaseExt, SourceRootId,
34};
35use fst::{self, Streamer};
36use hir::db::DefDatabase;
37use rayon::prelude::*;
38use rustc_hash::{FxHashMap, FxHashSet};
39use syntax::{
40 ast::{self, NameOwner},
41 match_ast, AstNode, Parse, SmolStr, SourceFile,
42 SyntaxKind::{self, *},
43 SyntaxNode, SyntaxNodePtr, TextRange, WalkEvent,
44};
45
46use crate::RootDatabase;
47
48#[derive(Debug)]
49pub struct Query {
50 query: String,
51 lowercased: String,
52 only_types: bool,
53 libs: bool,
54 exact: bool,
55 limit: usize,
56}
57
58impl Query {
59 pub fn new(query: String) -> Query {
60 let lowercased = query.to_lowercase();
61 Query {
62 query,
63 lowercased,
64 only_types: false,
65 libs: false,
66 exact: false,
67 limit: usize::max_value(),
68 }
69 }
70
71 pub fn only_types(&mut self) {
72 self.only_types = true;
73 }
74
75 pub fn libs(&mut self) {
76 self.libs = true;
77 }
78
79 pub fn exact(&mut self) {
80 self.exact = true;
81 }
82
83 pub fn limit(&mut self, limit: usize) {
84 self.limit = limit
85 }
86}
87
88#[salsa::query_group(SymbolsDatabaseStorage)]
89pub trait SymbolsDatabase: hir::db::HirDatabase + SourceDatabaseExt {
90 fn file_symbols(&self, file_id: FileId) -> Arc<SymbolIndex>;
91 fn library_symbols(&self) -> Arc<FxHashMap<SourceRootId, SymbolIndex>>;
92 /// The set of "local" (that is, from the current workspace) roots.
93 /// Files in local roots are assumed to change frequently.
94 #[salsa::input]
95 fn local_roots(&self) -> Arc<FxHashSet<SourceRootId>>;
96 /// The set of roots for crates.io libraries.
97 /// Files in libraries are assumed to never change.
98 #[salsa::input]
99 fn library_roots(&self) -> Arc<FxHashSet<SourceRootId>>;
100}
101
102fn library_symbols(db: &dyn SymbolsDatabase) -> Arc<FxHashMap<SourceRootId, SymbolIndex>> {
103 let _p = profile::span("library_symbols");
104
105 let roots = db.library_roots();
106 let res = roots
107 .iter()
108 .map(|&root_id| {
109 let root = db.source_root(root_id);
110 let files = root
111 .iter()
112 .map(|it| (it, SourceDatabaseExt::file_text(db, it)))
113 .collect::<Vec<_>>();
114 let symbol_index = SymbolIndex::for_files(
115 files.into_par_iter().map(|(file, text)| (file, SourceFile::parse(&text))),
116 );
117 (root_id, symbol_index)
118 })
119 .collect();
120 Arc::new(res)
121}
122
123fn file_symbols(db: &dyn SymbolsDatabase, file_id: FileId) -> Arc<SymbolIndex> {
124 db.check_canceled();
125 let parse = db.parse(file_id);
126
127 let symbols = source_file_to_file_symbols(&parse.tree(), file_id);
128
129 // FIXME: add macros here
130
131 Arc::new(SymbolIndex::new(symbols))
132}
133
134/// Need to wrap Snapshot to provide `Clone` impl for `map_with`
135struct Snap<DB>(DB);
136impl<DB: ParallelDatabase> Clone for Snap<salsa::Snapshot<DB>> {
137 fn clone(&self) -> Snap<salsa::Snapshot<DB>> {
138 Snap(self.0.snapshot())
139 }
140}
141
142// Feature: Workspace Symbol
143//
144// Uses fuzzy-search to find types, modules and functions by name across your
145// project and dependencies. This is **the** most useful feature, which improves code
146// navigation tremendously. It mostly works on top of the built-in LSP
147// functionality, however `#` and `*` symbols can be used to narrow down the
148// search. Specifically,
149//
150// - `Foo` searches for `Foo` type in the current workspace
151// - `foo#` searches for `foo` function in the current workspace
152// - `Foo*` searches for `Foo` type among dependencies, including `stdlib`
153// - `foo#*` searches for `foo` function among dependencies
154//
155// That is, `#` switches from "types" to all symbols, `*` switches from the current
156// workspace to dependencies.
157//
158// |===
159// | Editor | Shortcut
160//
161// | VS Code | kbd:[Ctrl+T]
162// |===
163pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> {
164 let _p = profile::span("world_symbols").detail(|| query.query.clone());
165
166 let tmp1;
167 let tmp2;
168 let buf: Vec<&SymbolIndex> = if query.libs {
169 tmp1 = db.library_symbols();
170 tmp1.values().collect()
171 } else {
172 let mut files = Vec::new();
173 for &root in db.local_roots().iter() {
174 let sr = db.source_root(root);
175 files.extend(sr.iter())
176 }
177
178 let snap = Snap(db.snapshot());
179 tmp2 = files
180 .par_iter()
181 .map_with(snap, |db, &file_id| db.0.file_symbols(file_id))
182 .collect::<Vec<_>>();
183 tmp2.iter().map(|it| &**it).collect()
184 };
185 query.search(&buf)
186}
187
188pub fn crate_symbols(db: &RootDatabase, krate: CrateId, query: Query) -> Vec<FileSymbol> {
189 // FIXME(#4842): This now depends on CrateDefMap, why not build the entire symbol index from
190 // that instead?
191
192 let def_map = db.crate_def_map(krate);
193 let mut files = Vec::new();
194 let mut modules = vec![def_map.root];
195 while let Some(module) = modules.pop() {
196 let data = &def_map[module];
197 files.extend(data.origin.file_id());
198 modules.extend(data.children.values());
199 }
200
201 let snap = Snap(db.snapshot());
202
203 let buf = files
204 .par_iter()
205 .map_with(snap, |db, &file_id| db.0.file_symbols(file_id))
206 .collect::<Vec<_>>();
207 let buf = buf.iter().map(|it| &**it).collect::<Vec<_>>();
208
209 query.search(&buf)
210}
211
212pub fn index_resolve(db: &RootDatabase, name_ref: &ast::NameRef) -> Vec<FileSymbol> {
213 let name = name_ref.text();
214 let mut query = Query::new(name.to_string());
215 query.exact();
216 query.limit(4);
217 world_symbols(db, query)
218}
219
220#[derive(Default)]
221pub struct SymbolIndex {
222 symbols: Vec<FileSymbol>,
223 map: fst::Map<Vec<u8>>,
224}
225
226impl fmt::Debug for SymbolIndex {
227 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
228 f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish()
229 }
230}
231
232impl PartialEq for SymbolIndex {
233 fn eq(&self, other: &SymbolIndex) -> bool {
234 self.symbols == other.symbols
235 }
236}
237
238impl Eq for SymbolIndex {}
239
240impl Hash for SymbolIndex {
241 fn hash<H: Hasher>(&self, hasher: &mut H) {
242 self.symbols.hash(hasher)
243 }
244}
245
246impl SymbolIndex {
247 fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex {
248 fn cmp(lhs: &FileSymbol, rhs: &FileSymbol) -> Ordering {
249 let lhs_chars = lhs.name.chars().map(|c| c.to_ascii_lowercase());
250 let rhs_chars = rhs.name.chars().map(|c| c.to_ascii_lowercase());
251 lhs_chars.cmp(rhs_chars)
252 }
253
254 symbols.par_sort_by(cmp);
255
256 let mut builder = fst::MapBuilder::memory();
257
258 let mut last_batch_start = 0;
259
260 for idx in 0..symbols.len() {
261 if let Some(next_symbol) = symbols.get(idx + 1) {
262 if cmp(&symbols[last_batch_start], next_symbol) == Ordering::Equal {
263 continue;
264 }
265 }
266
267 let start = last_batch_start;
268 let end = idx + 1;
269 last_batch_start = end;
270
271 let key = symbols[start].name.as_str().to_ascii_lowercase();
272 let value = SymbolIndex::range_to_map_value(start, end);
273
274 builder.insert(key, value).unwrap();
275 }
276
277 let map = fst::Map::new(builder.into_inner().unwrap()).unwrap();
278 SymbolIndex { symbols, map }
279 }
280
281 pub fn len(&self) -> usize {
282 self.symbols.len()
283 }
284
285 pub fn memory_size(&self) -> usize {
286 self.map.as_fst().size() + self.symbols.len() * mem::size_of::<FileSymbol>()
287 }
288
289 pub(crate) fn for_files(
290 files: impl ParallelIterator<Item = (FileId, Parse<ast::SourceFile>)>,
291 ) -> SymbolIndex {
292 let symbols = files
293 .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id))
294 .collect::<Vec<_>>();
295 SymbolIndex::new(symbols)
296 }
297
298 fn range_to_map_value(start: usize, end: usize) -> u64 {
299 debug_assert![start <= (std::u32::MAX as usize)];
300 debug_assert![end <= (std::u32::MAX as usize)];
301
302 ((start as u64) << 32) | end as u64
303 }
304
305 fn map_value_to_range(value: u64) -> (usize, usize) {
306 let end = value as u32 as usize;
307 let start = (value >> 32) as usize;
308 (start, end)
309 }
310}
311
312impl Query {
313 pub(crate) fn search(self, indices: &[&SymbolIndex]) -> Vec<FileSymbol> {
314 let mut op = fst::map::OpBuilder::new();
315 for file_symbols in indices.iter() {
316 let automaton = fst::automaton::Subsequence::new(&self.lowercased);
317 op = op.add(file_symbols.map.search(automaton))
318 }
319 let mut stream = op.union();
320 let mut res = Vec::new();
321 while let Some((_, indexed_values)) = stream.next() {
322 for indexed_value in indexed_values {
323 let symbol_index = &indices[indexed_value.index];
324 let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value);
325
326 for symbol in &symbol_index.symbols[start..end] {
327 if self.only_types && !is_type(symbol.kind) {
328 continue;
329 }
330 if self.exact && symbol.name != self.query {
331 continue;
332 }
333
334 res.push(symbol.clone());
335 if res.len() >= self.limit {
336 return res;
337 }
338 }
339 }
340 }
341 res
342 }
343}
344
345fn is_type(kind: SyntaxKind) -> bool {
346 matches!(kind, STRUCT | ENUM | TRAIT | TYPE_ALIAS)
347}
348
349/// The actual data that is stored in the index. It should be as compact as
350/// possible.
351#[derive(Debug, Clone, PartialEq, Eq, Hash)]
352pub struct FileSymbol {
353 pub file_id: FileId,
354 pub name: SmolStr,
355 pub kind: SyntaxKind,
356 pub range: TextRange,
357 pub ptr: SyntaxNodePtr,
358 pub name_range: Option<TextRange>,
359 pub container_name: Option<SmolStr>,
360}
361
362fn source_file_to_file_symbols(source_file: &SourceFile, file_id: FileId) -> Vec<FileSymbol> {
363 let mut symbols = Vec::new();
364 let mut stack = Vec::new();
365
366 for event in source_file.syntax().preorder() {
367 match event {
368 WalkEvent::Enter(node) => {
369 if let Some(mut symbol) = to_file_symbol(&node, file_id) {
370 symbol.container_name = stack.last().cloned();
371
372 stack.push(symbol.name.clone());
373 symbols.push(symbol);
374 }
375 }
376
377 WalkEvent::Leave(node) => {
378 if to_symbol(&node).is_some() {
379 stack.pop();
380 }
381 }
382 }
383 }
384
385 symbols
386}
387
388fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> {
389 fn decl<N: NameOwner>(node: N) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> {
390 let name = node.name()?;
391 let name_range = name.syntax().text_range();
392 let name = name.text().clone();
393 let ptr = SyntaxNodePtr::new(node.syntax());
394
395 Some((name, ptr, name_range))
396 }
397 match_ast! {
398 match node {
399 ast::Fn(it) => decl(it),
400 ast::Struct(it) => decl(it),
401 ast::Enum(it) => decl(it),
402 ast::Trait(it) => decl(it),
403 ast::Module(it) => decl(it),
404 ast::TypeAlias(it) => decl(it),
405 ast::Const(it) => decl(it),
406 ast::Static(it) => decl(it),
407 ast::MacroCall(it) => {
408 if it.is_macro_rules().is_some() {
409 decl(it)
410 } else {
411 None
412 }
413 },
414 _ => None,
415 }
416 }
417}
418
419fn to_file_symbol(node: &SyntaxNode, file_id: FileId) -> Option<FileSymbol> {
420 to_symbol(node).map(move |(name, ptr, name_range)| FileSymbol {
421 name,
422 kind: node.kind(),
423 range: node.text_range(),
424 ptr,
425 file_id,
426 name_range: Some(name_range),
427 container_name: None,
428 })
429}