diff options
Diffstat (limited to 'crates/ra_ide_api/src/symbol_index.rs')
-rw-r--r-- | crates/ra_ide_api/src/symbol_index.rs | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/crates/ra_ide_api/src/symbol_index.rs b/crates/ra_ide_api/src/symbol_index.rs new file mode 100644 index 000000000..8dd15b40e --- /dev/null +++ b/crates/ra_ide_api/src/symbol_index.rs | |||
@@ -0,0 +1,222 @@ | |||
1 | //! This module handles fuzzy-searching of functions, structs and other symbols | ||
2 | //! by name across the whole workspace and dependencies. | ||
3 | //! | ||
4 | //! It works by building an incrementally-updated text-search index of all | ||
5 | //! symbols. The backbone of the index is the **awesome** `fst` crate by | ||
6 | //! @BurntSushi. | ||
7 | //! | ||
8 | //! In a nutshell, you give a set of strings to the `fst`, and it builds a | ||
9 | //! finite state machine describing this set of strtings. The strings which | ||
10 | //! could fuzzy-match a pattern can also be described by a finite state machine. | ||
11 | //! What is freakingly cool is that you can now traverse both state machines in | ||
12 | //! lock-step to enumerate the strings which are both in the input set and | ||
13 | //! fuzz-match the query. Or, more formally, given two langauges described by | ||
14 | //! fsts, one can build an product fst which describes the intersection of the | ||
15 | //! languages. | ||
16 | //! | ||
17 | //! `fst` does not support cheap updating of the index, but it supports unioning | ||
18 | //! of state machines. So, to account for changing source code, we build an fst | ||
19 | //! for each library (which is assumed to never change) and an fst for each rust | ||
20 | //! file in the current workspace, and run a query aginst the union of all | ||
21 | //! thouse fsts. | ||
22 | use std::{ | ||
23 | cmp::Ordering, | ||
24 | hash::{Hash, Hasher}, | ||
25 | sync::Arc, | ||
26 | }; | ||
27 | |||
28 | use fst::{self, Streamer}; | ||
29 | use ra_syntax::{ | ||
30 | SyntaxNode, SourceFile, SmolStr, TreePtr, AstNode, | ||
31 | algo::{visit::{visitor, Visitor}, find_covering_node}, | ||
32 | SyntaxKind::{self, *}, | ||
33 | ast::{self, NameOwner}, | ||
34 | }; | ||
35 | use ra_db::{SourceRootId, FilesDatabase, LocalSyntaxPtr}; | ||
36 | use salsa::ParallelDatabase; | ||
37 | use rayon::prelude::*; | ||
38 | |||
39 | use crate::{ | ||
40 | Cancelable, FileId, Query, | ||
41 | db::RootDatabase, | ||
42 | }; | ||
43 | |||
44 | salsa::query_group! { | ||
45 | pub(crate) trait SymbolsDatabase: hir::db::HirDatabase { | ||
46 | fn file_symbols(file_id: FileId) -> Cancelable<Arc<SymbolIndex>> { | ||
47 | type FileSymbolsQuery; | ||
48 | } | ||
49 | fn library_symbols(id: SourceRootId) -> Arc<SymbolIndex> { | ||
50 | type LibrarySymbolsQuery; | ||
51 | storage input; | ||
52 | } | ||
53 | } | ||
54 | } | ||
55 | |||
56 | fn file_symbols(db: &impl SymbolsDatabase, file_id: FileId) -> Cancelable<Arc<SymbolIndex>> { | ||
57 | db.check_canceled()?; | ||
58 | let source_file = db.source_file(file_id); | ||
59 | let mut symbols = source_file | ||
60 | .syntax() | ||
61 | .descendants() | ||
62 | .filter_map(to_symbol) | ||
63 | .map(move |(name, ptr)| FileSymbol { name, ptr, file_id }) | ||
64 | .collect::<Vec<_>>(); | ||
65 | |||
66 | for (name, text_range) in hir::source_binder::macro_symbols(db, file_id)? { | ||
67 | let node = find_covering_node(source_file.syntax(), text_range); | ||
68 | let ptr = LocalSyntaxPtr::new(node); | ||
69 | symbols.push(FileSymbol { file_id, name, ptr }) | ||
70 | } | ||
71 | |||
72 | Ok(Arc::new(SymbolIndex::new(symbols))) | ||
73 | } | ||
74 | |||
75 | pub(crate) fn world_symbols(db: &RootDatabase, query: Query) -> Cancelable<Vec<FileSymbol>> { | ||
76 | /// Need to wrap Snapshot to provide `Clone` impl for `map_with` | ||
77 | struct Snap(salsa::Snapshot<RootDatabase>); | ||
78 | impl Clone for Snap { | ||
79 | fn clone(&self) -> Snap { | ||
80 | Snap(self.0.snapshot()) | ||
81 | } | ||
82 | } | ||
83 | |||
84 | let buf: Vec<Arc<SymbolIndex>> = if query.libs { | ||
85 | let snap = Snap(db.snapshot()); | ||
86 | db.library_roots() | ||
87 | .par_iter() | ||
88 | .map_with(snap, |db, &lib_id| db.0.library_symbols(lib_id)) | ||
89 | .collect() | ||
90 | } else { | ||
91 | let mut files = Vec::new(); | ||
92 | for &root in db.local_roots().iter() { | ||
93 | let sr = db.source_root(root); | ||
94 | files.extend(sr.files.values().map(|&it| it)) | ||
95 | } | ||
96 | |||
97 | let snap = Snap(db.snapshot()); | ||
98 | files | ||
99 | .par_iter() | ||
100 | .map_with(snap, |db, &file_id| db.0.file_symbols(file_id)) | ||
101 | .filter_map(|it| it.ok()) | ||
102 | .collect() | ||
103 | }; | ||
104 | Ok(query.search(&buf)) | ||
105 | } | ||
106 | |||
107 | #[derive(Default, Debug)] | ||
108 | pub(crate) struct SymbolIndex { | ||
109 | symbols: Vec<FileSymbol>, | ||
110 | map: fst::Map, | ||
111 | } | ||
112 | |||
113 | impl PartialEq for SymbolIndex { | ||
114 | fn eq(&self, other: &SymbolIndex) -> bool { | ||
115 | self.symbols == other.symbols | ||
116 | } | ||
117 | } | ||
118 | |||
119 | impl Eq for SymbolIndex {} | ||
120 | |||
121 | impl Hash for SymbolIndex { | ||
122 | fn hash<H: Hasher>(&self, hasher: &mut H) { | ||
123 | self.symbols.hash(hasher) | ||
124 | } | ||
125 | } | ||
126 | |||
127 | impl SymbolIndex { | ||
128 | fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex { | ||
129 | fn cmp(s1: &FileSymbol, s2: &FileSymbol) -> Ordering { | ||
130 | unicase::Ascii::new(s1.name.as_str()).cmp(&unicase::Ascii::new(s2.name.as_str())) | ||
131 | } | ||
132 | symbols.par_sort_by(cmp); | ||
133 | symbols.dedup_by(|s1, s2| cmp(s1, s2) == Ordering::Equal); | ||
134 | let names = symbols.iter().map(|it| it.name.as_str().to_lowercase()); | ||
135 | let map = fst::Map::from_iter(names.into_iter().zip(0u64..)).unwrap(); | ||
136 | SymbolIndex { symbols, map } | ||
137 | } | ||
138 | |||
139 | pub(crate) fn len(&self) -> usize { | ||
140 | self.symbols.len() | ||
141 | } | ||
142 | |||
143 | pub(crate) fn for_files( | ||
144 | files: impl ParallelIterator<Item = (FileId, TreePtr<SourceFile>)>, | ||
145 | ) -> SymbolIndex { | ||
146 | let symbols = files | ||
147 | .flat_map(|(file_id, file)| { | ||
148 | file.syntax() | ||
149 | .descendants() | ||
150 | .filter_map(to_symbol) | ||
151 | .map(move |(name, ptr)| FileSymbol { name, ptr, file_id }) | ||
152 | .collect::<Vec<_>>() | ||
153 | }) | ||
154 | .collect::<Vec<_>>(); | ||
155 | SymbolIndex::new(symbols) | ||
156 | } | ||
157 | } | ||
158 | |||
159 | impl Query { | ||
160 | pub(crate) fn search(self, indices: &[Arc<SymbolIndex>]) -> Vec<FileSymbol> { | ||
161 | let mut op = fst::map::OpBuilder::new(); | ||
162 | for file_symbols in indices.iter() { | ||
163 | let automaton = fst::automaton::Subsequence::new(&self.lowercased); | ||
164 | op = op.add(file_symbols.map.search(automaton)) | ||
165 | } | ||
166 | let mut stream = op.union(); | ||
167 | let mut res = Vec::new(); | ||
168 | while let Some((_, indexed_values)) = stream.next() { | ||
169 | if res.len() >= self.limit { | ||
170 | break; | ||
171 | } | ||
172 | for indexed_value in indexed_values { | ||
173 | let file_symbols = &indices[indexed_value.index]; | ||
174 | let idx = indexed_value.value as usize; | ||
175 | |||
176 | let symbol = &file_symbols.symbols[idx]; | ||
177 | if self.only_types && !is_type(symbol.ptr.kind()) { | ||
178 | continue; | ||
179 | } | ||
180 | if self.exact && symbol.name != self.query { | ||
181 | continue; | ||
182 | } | ||
183 | res.push(symbol.clone()); | ||
184 | } | ||
185 | } | ||
186 | res | ||
187 | } | ||
188 | } | ||
189 | |||
190 | fn is_type(kind: SyntaxKind) -> bool { | ||
191 | match kind { | ||
192 | STRUCT_DEF | ENUM_DEF | TRAIT_DEF | TYPE_DEF => true, | ||
193 | _ => false, | ||
194 | } | ||
195 | } | ||
196 | |||
197 | /// The actual data that is stored in the index. It should be as compact as | ||
198 | /// possible. | ||
199 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
200 | pub(crate) struct FileSymbol { | ||
201 | pub(crate) file_id: FileId, | ||
202 | pub(crate) name: SmolStr, | ||
203 | pub(crate) ptr: LocalSyntaxPtr, | ||
204 | } | ||
205 | |||
206 | fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, LocalSyntaxPtr)> { | ||
207 | fn decl<N: NameOwner>(node: &N) -> Option<(SmolStr, LocalSyntaxPtr)> { | ||
208 | let name = node.name()?.text().clone(); | ||
209 | let ptr = LocalSyntaxPtr::new(node.syntax()); | ||
210 | Some((name, ptr)) | ||
211 | } | ||
212 | visitor() | ||
213 | .visit(decl::<ast::FnDef>) | ||
214 | .visit(decl::<ast::StructDef>) | ||
215 | .visit(decl::<ast::EnumDef>) | ||
216 | .visit(decl::<ast::TraitDef>) | ||
217 | .visit(decl::<ast::Module>) | ||
218 | .visit(decl::<ast::TypeDef>) | ||
219 | .visit(decl::<ast::ConstDef>) | ||
220 | .visit(decl::<ast::StaticDef>) | ||
221 | .accept(node)? | ||
222 | } | ||