aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_ide_db/src/symbol_index.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_ide_db/src/symbol_index.rs')
-rw-r--r--crates/ra_ide_db/src/symbol_index.rs445
1 files changed, 445 insertions, 0 deletions
diff --git a/crates/ra_ide_db/src/symbol_index.rs b/crates/ra_ide_db/src/symbol_index.rs
new file mode 100644
index 000000000..33f042d88
--- /dev/null
+++ b/crates/ra_ide_db/src/symbol_index.rs
@@ -0,0 +1,445 @@
1//! This module handles fuzzy-searching of functions, structs and other symbols
2//! by name across the whole workspace and dependencies.
3//!
4//! It works by building an incrementally-updated text-search index of all
5//! symbols. The backbone of the index is the **awesome** `fst` crate by
6//! @BurntSushi.
7//!
8//! In a nutshell, you give a set of strings to `fst`, and it builds a
9//! finite state machine describing this set of strings. The strings which
10//! could fuzzy-match a pattern can also be described by a finite state machine.
11//! What is freaking cool is that you can now traverse both state machines in
12//! lock-step to enumerate the strings which are both in the input set and
13//! fuzz-match the query. Or, more formally, given two languages described by
14//! FSTs, one can build a product FST which describes the intersection of the
15//! languages.
16//!
17//! `fst` does not support cheap updating of the index, but it supports unioning
18//! of state machines. So, to account for changing source code, we build an FST
19//! for each library (which is assumed to never change) and an FST for each Rust
20//! file in the current workspace, and run a query against the union of all
21//! those FSTs.
22use std::{
23 fmt,
24 hash::{Hash, Hasher},
25 mem,
26 sync::Arc,
27};
28
29use fst::{self, Streamer};
30use ra_db::{
31 salsa::{self, ParallelDatabase},
32 FileId, SourceDatabaseExt, SourceRootId,
33};
34use ra_syntax::{
35 ast::{self, NameOwner},
36 match_ast, AstNode, Parse, SmolStr, SourceFile,
37 SyntaxKind::{self, *},
38 SyntaxNode, SyntaxNodePtr, TextRange, WalkEvent,
39};
40#[cfg(not(feature = "wasm"))]
41use rayon::prelude::*;
42
43use crate::RootDatabase;
44
45#[derive(Debug)]
46pub struct Query {
47 query: String,
48 lowercased: String,
49 only_types: bool,
50 libs: bool,
51 exact: bool,
52 limit: usize,
53}
54
55impl Query {
56 pub fn new(query: String) -> Query {
57 let lowercased = query.to_lowercase();
58 Query {
59 query,
60 lowercased,
61 only_types: false,
62 libs: false,
63 exact: false,
64 limit: usize::max_value(),
65 }
66 }
67
68 pub fn only_types(&mut self) {
69 self.only_types = true;
70 }
71
72 pub fn libs(&mut self) {
73 self.libs = true;
74 }
75
76 pub fn exact(&mut self) {
77 self.exact = true;
78 }
79
80 pub fn limit(&mut self, limit: usize) {
81 self.limit = limit
82 }
83}
84
85#[salsa::query_group(SymbolsDatabaseStorage)]
86pub trait SymbolsDatabase: hir::db::HirDatabase {
87 fn file_symbols(&self, file_id: FileId) -> Arc<SymbolIndex>;
88 #[salsa::input]
89 fn library_symbols(&self, id: SourceRootId) -> Arc<SymbolIndex>;
90 /// The set of "local" (that is, from the current workspace) roots.
91 /// Files in local roots are assumed to change frequently.
92 #[salsa::input]
93 fn local_roots(&self) -> Arc<Vec<SourceRootId>>;
94 /// The set of roots for crates.io libraries.
95 /// Files in libraries are assumed to never change.
96 #[salsa::input]
97 fn library_roots(&self) -> Arc<Vec<SourceRootId>>;
98}
99
100fn file_symbols(db: &impl SymbolsDatabase, file_id: FileId) -> Arc<SymbolIndex> {
101 db.check_canceled();
102 let parse = db.parse(file_id);
103
104 let symbols = source_file_to_file_symbols(&parse.tree(), file_id);
105
106 // FIXME: add macros here
107
108 Arc::new(SymbolIndex::new(symbols))
109}
110
111pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> {
112 /// Need to wrap Snapshot to provide `Clone` impl for `map_with`
113 struct Snap(salsa::Snapshot<RootDatabase>);
114 impl Clone for Snap {
115 fn clone(&self) -> Snap {
116 Snap(self.0.snapshot())
117 }
118 }
119
120 let buf: Vec<Arc<SymbolIndex>> = if query.libs {
121 let snap = Snap(db.snapshot());
122 #[cfg(not(feature = "wasm"))]
123 let buf = db
124 .library_roots()
125 .par_iter()
126 .map_with(snap, |db, &lib_id| db.0.library_symbols(lib_id))
127 .collect();
128
129 #[cfg(feature = "wasm")]
130 let buf = db.library_roots().iter().map(|&lib_id| snap.0.library_symbols(lib_id)).collect();
131
132 buf
133 } else {
134 let mut files = Vec::new();
135 for &root in db.local_roots().iter() {
136 let sr = db.source_root(root);
137 files.extend(sr.walk())
138 }
139
140 let snap = Snap(db.snapshot());
141 #[cfg(not(feature = "wasm"))]
142 let buf =
143 files.par_iter().map_with(snap, |db, &file_id| db.0.file_symbols(file_id)).collect();
144
145 #[cfg(feature = "wasm")]
146 let buf = files.iter().map(|&file_id| snap.0.file_symbols(file_id)).collect();
147
148 buf
149 };
150 query.search(&buf)
151}
152
153pub fn index_resolve(db: &RootDatabase, name_ref: &ast::NameRef) -> Vec<FileSymbol> {
154 let name = name_ref.text();
155 let mut query = Query::new(name.to_string());
156 query.exact();
157 query.limit(4);
158 world_symbols(db, query)
159}
160
161#[derive(Default)]
162pub struct SymbolIndex {
163 symbols: Vec<FileSymbol>,
164 map: fst::Map,
165}
166
167impl fmt::Debug for SymbolIndex {
168 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
169 f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish()
170 }
171}
172
173impl PartialEq for SymbolIndex {
174 fn eq(&self, other: &SymbolIndex) -> bool {
175 self.symbols == other.symbols
176 }
177}
178
179impl Eq for SymbolIndex {}
180
181impl Hash for SymbolIndex {
182 fn hash<H: Hasher>(&self, hasher: &mut H) {
183 self.symbols.hash(hasher)
184 }
185}
186
187impl SymbolIndex {
188 fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex {
189 fn cmp_key<'a>(s1: &'a FileSymbol) -> impl Ord + 'a {
190 unicase::Ascii::new(s1.name.as_str())
191 }
192 #[cfg(not(feature = "wasm"))]
193 symbols.par_sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2)));
194
195 #[cfg(feature = "wasm")]
196 symbols.sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2)));
197
198 let mut builder = fst::MapBuilder::memory();
199
200 let mut last_batch_start = 0;
201
202 for idx in 0..symbols.len() {
203 if symbols.get(last_batch_start).map(cmp_key) == symbols.get(idx + 1).map(cmp_key) {
204 continue;
205 }
206
207 let start = last_batch_start;
208 let end = idx + 1;
209 last_batch_start = end;
210
211 let key = symbols[start].name.as_str().to_lowercase();
212 let value = SymbolIndex::range_to_map_value(start, end);
213
214 builder.insert(key, value).unwrap();
215 }
216
217 let map = fst::Map::from_bytes(builder.into_inner().unwrap()).unwrap();
218 SymbolIndex { symbols, map }
219 }
220
221 pub fn len(&self) -> usize {
222 self.symbols.len()
223 }
224
225 pub fn memory_size(&self) -> usize {
226 self.map.as_fst().size() + self.symbols.len() * mem::size_of::<FileSymbol>()
227 }
228
229 #[cfg(not(feature = "wasm"))]
230 pub(crate) fn for_files(
231 files: impl ParallelIterator<Item = (FileId, Parse<ast::SourceFile>)>,
232 ) -> SymbolIndex {
233 let symbols = files
234 .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id))
235 .collect::<Vec<_>>();
236 SymbolIndex::new(symbols)
237 }
238
239 #[cfg(feature = "wasm")]
240 pub(crate) fn for_files(
241 files: impl Iterator<Item = (FileId, Parse<ast::SourceFile>)>,
242 ) -> SymbolIndex {
243 let symbols = files
244 .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id))
245 .collect::<Vec<_>>();
246 SymbolIndex::new(symbols)
247 }
248
249 fn range_to_map_value(start: usize, end: usize) -> u64 {
250 debug_assert![start <= (std::u32::MAX as usize)];
251 debug_assert![end <= (std::u32::MAX as usize)];
252
253 ((start as u64) << 32) | end as u64
254 }
255
256 fn map_value_to_range(value: u64) -> (usize, usize) {
257 let end = value as u32 as usize;
258 let start = (value >> 32) as usize;
259 (start, end)
260 }
261}
262
263impl Query {
264 pub(crate) fn search(self, indices: &[Arc<SymbolIndex>]) -> Vec<FileSymbol> {
265 let mut op = fst::map::OpBuilder::new();
266 for file_symbols in indices.iter() {
267 let automaton = fst::automaton::Subsequence::new(&self.lowercased);
268 op = op.add(file_symbols.map.search(automaton))
269 }
270 let mut stream = op.union();
271 let mut res = Vec::new();
272 while let Some((_, indexed_values)) = stream.next() {
273 if res.len() >= self.limit {
274 break;
275 }
276 for indexed_value in indexed_values {
277 let symbol_index = &indices[indexed_value.index];
278 let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value);
279
280 for symbol in &symbol_index.symbols[start..end] {
281 if self.only_types && !is_type(symbol.ptr.kind()) {
282 continue;
283 }
284 if self.exact && symbol.name != self.query {
285 continue;
286 }
287 res.push(symbol.clone());
288 }
289 }
290 }
291 res
292 }
293}
294
295fn is_type(kind: SyntaxKind) -> bool {
296 match kind {
297 STRUCT_DEF | ENUM_DEF | TRAIT_DEF | TYPE_ALIAS_DEF => true,
298 _ => false,
299 }
300}
301
302/// The actual data that is stored in the index. It should be as compact as
303/// possible.
304#[derive(Debug, Clone, PartialEq, Eq, Hash)]
305pub struct FileSymbol {
306 pub file_id: FileId,
307 pub name: SmolStr,
308 pub ptr: SyntaxNodePtr,
309 pub name_range: Option<TextRange>,
310 pub container_name: Option<SmolStr>,
311}
312
313fn source_file_to_file_symbols(source_file: &SourceFile, file_id: FileId) -> Vec<FileSymbol> {
314 let mut symbols = Vec::new();
315 let mut stack = Vec::new();
316
317 for event in source_file.syntax().preorder() {
318 match event {
319 WalkEvent::Enter(node) => {
320 if let Some(mut symbol) = to_file_symbol(&node, file_id) {
321 symbol.container_name = stack.last().cloned();
322
323 stack.push(symbol.name.clone());
324 symbols.push(symbol);
325 }
326 }
327
328 WalkEvent::Leave(node) => {
329 if to_symbol(&node).is_some() {
330 stack.pop();
331 }
332 }
333 }
334 }
335
336 symbols
337}
338
339fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> {
340 fn decl<N: NameOwner>(node: N) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> {
341 let name = node.name()?;
342 let name_range = name.syntax().text_range();
343 let name = name.text().clone();
344 let ptr = SyntaxNodePtr::new(node.syntax());
345
346 Some((name, ptr, name_range))
347 }
348 match_ast! {
349 match node {
350 ast::FnDef(it) => { decl(it) },
351 ast::StructDef(it) => { decl(it) },
352 ast::EnumDef(it) => { decl(it) },
353 ast::TraitDef(it) => { decl(it) },
354 ast::Module(it) => { decl(it) },
355 ast::TypeAliasDef(it) => { decl(it) },
356 ast::ConstDef(it) => { decl(it) },
357 ast::StaticDef(it) => { decl(it) },
358 _ => None,
359 }
360 }
361}
362
363fn to_file_symbol(node: &SyntaxNode, file_id: FileId) -> Option<FileSymbol> {
364 to_symbol(node).map(move |(name, ptr, name_range)| FileSymbol {
365 name,
366 ptr,
367 file_id,
368 name_range: Some(name_range),
369 container_name: None,
370 })
371}
372
373#[cfg(test)]
374mod tests {
375 use crate::{display::NavigationTarget, mock_analysis::single_file, Query};
376 use ra_syntax::{
377 SmolStr,
378 SyntaxKind::{FN_DEF, STRUCT_DEF},
379 };
380
381 #[test]
382 fn test_world_symbols_with_no_container() {
383 let code = r#"
384 enum FooInner { }
385 "#;
386
387 let mut symbols = get_symbols_matching(code, "FooInner");
388
389 let s = symbols.pop().unwrap();
390
391 assert_eq!(s.name(), "FooInner");
392 assert!(s.container_name().is_none());
393 }
394
395 #[test]
396 fn test_world_symbols_include_container_name() {
397 let code = r#"
398fn foo() {
399 enum FooInner { }
400}
401 "#;
402
403 let mut symbols = get_symbols_matching(code, "FooInner");
404
405 let s = symbols.pop().unwrap();
406
407 assert_eq!(s.name(), "FooInner");
408 assert_eq!(s.container_name(), Some(&SmolStr::new("foo")));
409
410 let code = r#"
411mod foo {
412 struct FooInner;
413}
414 "#;
415
416 let mut symbols = get_symbols_matching(code, "FooInner");
417
418 let s = symbols.pop().unwrap();
419
420 assert_eq!(s.name(), "FooInner");
421 assert_eq!(s.container_name(), Some(&SmolStr::new("foo")));
422 }
423
424 #[test]
425 fn test_world_symbols_are_case_sensitive() {
426 let code = r#"
427fn foo() {}
428
429struct Foo;
430 "#;
431
432 let symbols = get_symbols_matching(code, "Foo");
433
434 let fn_match = symbols.iter().find(|s| s.name() == "foo").map(|s| s.kind());
435 let struct_match = symbols.iter().find(|s| s.name() == "Foo").map(|s| s.kind());
436
437 assert_eq!(fn_match, Some(FN_DEF));
438 assert_eq!(struct_match, Some(STRUCT_DEF));
439 }
440
441 fn get_symbols_matching(text: &str, query: &str) -> Vec<NavigationTarget> {
442 let (analysis, _) = single_file(text);
443 analysis.symbol_search(Query::new(query.into())).unwrap()
444 }
445}