diff options
Diffstat (limited to 'crates/ra_ide_db/src/symbol_index.rs')
-rw-r--r-- | crates/ra_ide_db/src/symbol_index.rs | 372 |
1 files changed, 372 insertions, 0 deletions
diff --git a/crates/ra_ide_db/src/symbol_index.rs b/crates/ra_ide_db/src/symbol_index.rs new file mode 100644 index 000000000..64ddf2f95 --- /dev/null +++ b/crates/ra_ide_db/src/symbol_index.rs | |||
@@ -0,0 +1,372 @@ | |||
1 | //! This module handles fuzzy-searching of functions, structs and other symbols | ||
2 | //! by name across the whole workspace and dependencies. | ||
3 | //! | ||
4 | //! It works by building an incrementally-updated text-search index of all | ||
5 | //! symbols. The backbone of the index is the **awesome** `fst` crate by | ||
6 | //! @BurntSushi. | ||
7 | //! | ||
8 | //! In a nutshell, you give a set of strings to `fst`, and it builds a | ||
9 | //! finite state machine describing this set of strings. The strings which | ||
10 | //! could fuzzy-match a pattern can also be described by a finite state machine. | ||
11 | //! What is freaking cool is that you can now traverse both state machines in | ||
12 | //! lock-step to enumerate the strings which are both in the input set and | ||
13 | //! fuzz-match the query. Or, more formally, given two languages described by | ||
14 | //! FSTs, one can build a product FST which describes the intersection of the | ||
15 | //! languages. | ||
16 | //! | ||
17 | //! `fst` does not support cheap updating of the index, but it supports unioning | ||
18 | //! of state machines. So, to account for changing source code, we build an FST | ||
19 | //! for each library (which is assumed to never change) and an FST for each Rust | ||
20 | //! file in the current workspace, and run a query against the union of all | ||
21 | //! those FSTs. | ||
22 | |||
23 | use std::{ | ||
24 | fmt, | ||
25 | hash::{Hash, Hasher}, | ||
26 | mem, | ||
27 | sync::Arc, | ||
28 | }; | ||
29 | |||
30 | use fst::{self, Streamer}; | ||
31 | use ra_db::{ | ||
32 | salsa::{self, ParallelDatabase}, | ||
33 | FileId, SourceDatabaseExt, SourceRootId, | ||
34 | }; | ||
35 | use ra_syntax::{ | ||
36 | ast::{self, NameOwner}, | ||
37 | match_ast, AstNode, Parse, SmolStr, SourceFile, | ||
38 | SyntaxKind::{self, *}, | ||
39 | SyntaxNode, SyntaxNodePtr, TextRange, WalkEvent, | ||
40 | }; | ||
41 | #[cfg(not(feature = "wasm"))] | ||
42 | use rayon::prelude::*; | ||
43 | |||
44 | use crate::RootDatabase; | ||
45 | |||
46 | #[derive(Debug)] | ||
47 | pub struct Query { | ||
48 | query: String, | ||
49 | lowercased: String, | ||
50 | only_types: bool, | ||
51 | libs: bool, | ||
52 | exact: bool, | ||
53 | limit: usize, | ||
54 | } | ||
55 | |||
56 | impl Query { | ||
57 | pub fn new(query: String) -> Query { | ||
58 | let lowercased = query.to_lowercase(); | ||
59 | Query { | ||
60 | query, | ||
61 | lowercased, | ||
62 | only_types: false, | ||
63 | libs: false, | ||
64 | exact: false, | ||
65 | limit: usize::max_value(), | ||
66 | } | ||
67 | } | ||
68 | |||
69 | pub fn only_types(&mut self) { | ||
70 | self.only_types = true; | ||
71 | } | ||
72 | |||
73 | pub fn libs(&mut self) { | ||
74 | self.libs = true; | ||
75 | } | ||
76 | |||
77 | pub fn exact(&mut self) { | ||
78 | self.exact = true; | ||
79 | } | ||
80 | |||
81 | pub fn limit(&mut self, limit: usize) { | ||
82 | self.limit = limit | ||
83 | } | ||
84 | } | ||
85 | |||
86 | #[salsa::query_group(SymbolsDatabaseStorage)] | ||
87 | pub trait SymbolsDatabase: hir::db::HirDatabase { | ||
88 | fn file_symbols(&self, file_id: FileId) -> Arc<SymbolIndex>; | ||
89 | #[salsa::input] | ||
90 | fn library_symbols(&self, id: SourceRootId) -> Arc<SymbolIndex>; | ||
91 | /// The set of "local" (that is, from the current workspace) roots. | ||
92 | /// Files in local roots are assumed to change frequently. | ||
93 | #[salsa::input] | ||
94 | fn local_roots(&self) -> Arc<Vec<SourceRootId>>; | ||
95 | /// The set of roots for crates.io libraries. | ||
96 | /// Files in libraries are assumed to never change. | ||
97 | #[salsa::input] | ||
98 | fn library_roots(&self) -> Arc<Vec<SourceRootId>>; | ||
99 | } | ||
100 | |||
101 | fn file_symbols(db: &impl SymbolsDatabase, file_id: FileId) -> Arc<SymbolIndex> { | ||
102 | db.check_canceled(); | ||
103 | let parse = db.parse(file_id); | ||
104 | |||
105 | let symbols = source_file_to_file_symbols(&parse.tree(), file_id); | ||
106 | |||
107 | // FIXME: add macros here | ||
108 | |||
109 | Arc::new(SymbolIndex::new(symbols)) | ||
110 | } | ||
111 | |||
112 | pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> { | ||
113 | /// Need to wrap Snapshot to provide `Clone` impl for `map_with` | ||
114 | struct Snap(salsa::Snapshot<RootDatabase>); | ||
115 | impl Clone for Snap { | ||
116 | fn clone(&self) -> Snap { | ||
117 | Snap(self.0.snapshot()) | ||
118 | } | ||
119 | } | ||
120 | |||
121 | let buf: Vec<Arc<SymbolIndex>> = if query.libs { | ||
122 | let snap = Snap(db.snapshot()); | ||
123 | #[cfg(not(feature = "wasm"))] | ||
124 | let buf = db | ||
125 | .library_roots() | ||
126 | .par_iter() | ||
127 | .map_with(snap, |db, &lib_id| db.0.library_symbols(lib_id)) | ||
128 | .collect(); | ||
129 | |||
130 | #[cfg(feature = "wasm")] | ||
131 | let buf = db.library_roots().iter().map(|&lib_id| snap.0.library_symbols(lib_id)).collect(); | ||
132 | |||
133 | buf | ||
134 | } else { | ||
135 | let mut files = Vec::new(); | ||
136 | for &root in db.local_roots().iter() { | ||
137 | let sr = db.source_root(root); | ||
138 | files.extend(sr.walk()) | ||
139 | } | ||
140 | |||
141 | let snap = Snap(db.snapshot()); | ||
142 | #[cfg(not(feature = "wasm"))] | ||
143 | let buf = | ||
144 | files.par_iter().map_with(snap, |db, &file_id| db.0.file_symbols(file_id)).collect(); | ||
145 | |||
146 | #[cfg(feature = "wasm")] | ||
147 | let buf = files.iter().map(|&file_id| snap.0.file_symbols(file_id)).collect(); | ||
148 | |||
149 | buf | ||
150 | }; | ||
151 | query.search(&buf) | ||
152 | } | ||
153 | |||
154 | pub fn index_resolve(db: &RootDatabase, name_ref: &ast::NameRef) -> Vec<FileSymbol> { | ||
155 | let name = name_ref.text(); | ||
156 | let mut query = Query::new(name.to_string()); | ||
157 | query.exact(); | ||
158 | query.limit(4); | ||
159 | world_symbols(db, query) | ||
160 | } | ||
161 | |||
162 | #[derive(Default)] | ||
163 | pub struct SymbolIndex { | ||
164 | symbols: Vec<FileSymbol>, | ||
165 | map: fst::Map, | ||
166 | } | ||
167 | |||
168 | impl fmt::Debug for SymbolIndex { | ||
169 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
170 | f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish() | ||
171 | } | ||
172 | } | ||
173 | |||
174 | impl PartialEq for SymbolIndex { | ||
175 | fn eq(&self, other: &SymbolIndex) -> bool { | ||
176 | self.symbols == other.symbols | ||
177 | } | ||
178 | } | ||
179 | |||
180 | impl Eq for SymbolIndex {} | ||
181 | |||
182 | impl Hash for SymbolIndex { | ||
183 | fn hash<H: Hasher>(&self, hasher: &mut H) { | ||
184 | self.symbols.hash(hasher) | ||
185 | } | ||
186 | } | ||
187 | |||
188 | impl SymbolIndex { | ||
189 | fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex { | ||
190 | fn cmp_key<'a>(s1: &'a FileSymbol) -> impl Ord + 'a { | ||
191 | unicase::Ascii::new(s1.name.as_str()) | ||
192 | } | ||
193 | #[cfg(not(feature = "wasm"))] | ||
194 | symbols.par_sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2))); | ||
195 | |||
196 | #[cfg(feature = "wasm")] | ||
197 | symbols.sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2))); | ||
198 | |||
199 | let mut builder = fst::MapBuilder::memory(); | ||
200 | |||
201 | let mut last_batch_start = 0; | ||
202 | |||
203 | for idx in 0..symbols.len() { | ||
204 | if symbols.get(last_batch_start).map(cmp_key) == symbols.get(idx + 1).map(cmp_key) { | ||
205 | continue; | ||
206 | } | ||
207 | |||
208 | let start = last_batch_start; | ||
209 | let end = idx + 1; | ||
210 | last_batch_start = end; | ||
211 | |||
212 | let key = symbols[start].name.as_str().to_lowercase(); | ||
213 | let value = SymbolIndex::range_to_map_value(start, end); | ||
214 | |||
215 | builder.insert(key, value).unwrap(); | ||
216 | } | ||
217 | |||
218 | let map = fst::Map::from_bytes(builder.into_inner().unwrap()).unwrap(); | ||
219 | SymbolIndex { symbols, map } | ||
220 | } | ||
221 | |||
222 | pub fn len(&self) -> usize { | ||
223 | self.symbols.len() | ||
224 | } | ||
225 | |||
226 | pub fn memory_size(&self) -> usize { | ||
227 | self.map.as_fst().size() + self.symbols.len() * mem::size_of::<FileSymbol>() | ||
228 | } | ||
229 | |||
230 | #[cfg(not(feature = "wasm"))] | ||
231 | pub(crate) fn for_files( | ||
232 | files: impl ParallelIterator<Item = (FileId, Parse<ast::SourceFile>)>, | ||
233 | ) -> SymbolIndex { | ||
234 | let symbols = files | ||
235 | .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id)) | ||
236 | .collect::<Vec<_>>(); | ||
237 | SymbolIndex::new(symbols) | ||
238 | } | ||
239 | |||
240 | #[cfg(feature = "wasm")] | ||
241 | pub(crate) fn for_files( | ||
242 | files: impl Iterator<Item = (FileId, Parse<ast::SourceFile>)>, | ||
243 | ) -> SymbolIndex { | ||
244 | let symbols = files | ||
245 | .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id)) | ||
246 | .collect::<Vec<_>>(); | ||
247 | SymbolIndex::new(symbols) | ||
248 | } | ||
249 | |||
250 | fn range_to_map_value(start: usize, end: usize) -> u64 { | ||
251 | debug_assert![start <= (std::u32::MAX as usize)]; | ||
252 | debug_assert![end <= (std::u32::MAX as usize)]; | ||
253 | |||
254 | ((start as u64) << 32) | end as u64 | ||
255 | } | ||
256 | |||
257 | fn map_value_to_range(value: u64) -> (usize, usize) { | ||
258 | let end = value as u32 as usize; | ||
259 | let start = (value >> 32) as usize; | ||
260 | (start, end) | ||
261 | } | ||
262 | } | ||
263 | |||
264 | impl Query { | ||
265 | pub(crate) fn search(self, indices: &[Arc<SymbolIndex>]) -> Vec<FileSymbol> { | ||
266 | let mut op = fst::map::OpBuilder::new(); | ||
267 | for file_symbols in indices.iter() { | ||
268 | let automaton = fst::automaton::Subsequence::new(&self.lowercased); | ||
269 | op = op.add(file_symbols.map.search(automaton)) | ||
270 | } | ||
271 | let mut stream = op.union(); | ||
272 | let mut res = Vec::new(); | ||
273 | while let Some((_, indexed_values)) = stream.next() { | ||
274 | if res.len() >= self.limit { | ||
275 | break; | ||
276 | } | ||
277 | for indexed_value in indexed_values { | ||
278 | let symbol_index = &indices[indexed_value.index]; | ||
279 | let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value); | ||
280 | |||
281 | for symbol in &symbol_index.symbols[start..end] { | ||
282 | if self.only_types && !is_type(symbol.ptr.kind()) { | ||
283 | continue; | ||
284 | } | ||
285 | if self.exact && symbol.name != self.query { | ||
286 | continue; | ||
287 | } | ||
288 | res.push(symbol.clone()); | ||
289 | } | ||
290 | } | ||
291 | } | ||
292 | res | ||
293 | } | ||
294 | } | ||
295 | |||
296 | fn is_type(kind: SyntaxKind) -> bool { | ||
297 | match kind { | ||
298 | STRUCT_DEF | ENUM_DEF | TRAIT_DEF | TYPE_ALIAS_DEF => true, | ||
299 | _ => false, | ||
300 | } | ||
301 | } | ||
302 | |||
303 | /// The actual data that is stored in the index. It should be as compact as | ||
304 | /// possible. | ||
305 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
306 | pub struct FileSymbol { | ||
307 | pub file_id: FileId, | ||
308 | pub name: SmolStr, | ||
309 | pub ptr: SyntaxNodePtr, | ||
310 | pub name_range: Option<TextRange>, | ||
311 | pub container_name: Option<SmolStr>, | ||
312 | } | ||
313 | |||
314 | fn source_file_to_file_symbols(source_file: &SourceFile, file_id: FileId) -> Vec<FileSymbol> { | ||
315 | let mut symbols = Vec::new(); | ||
316 | let mut stack = Vec::new(); | ||
317 | |||
318 | for event in source_file.syntax().preorder() { | ||
319 | match event { | ||
320 | WalkEvent::Enter(node) => { | ||
321 | if let Some(mut symbol) = to_file_symbol(&node, file_id) { | ||
322 | symbol.container_name = stack.last().cloned(); | ||
323 | |||
324 | stack.push(symbol.name.clone()); | ||
325 | symbols.push(symbol); | ||
326 | } | ||
327 | } | ||
328 | |||
329 | WalkEvent::Leave(node) => { | ||
330 | if to_symbol(&node).is_some() { | ||
331 | stack.pop(); | ||
332 | } | ||
333 | } | ||
334 | } | ||
335 | } | ||
336 | |||
337 | symbols | ||
338 | } | ||
339 | |||
340 | fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
341 | fn decl<N: NameOwner>(node: N) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
342 | let name = node.name()?; | ||
343 | let name_range = name.syntax().text_range(); | ||
344 | let name = name.text().clone(); | ||
345 | let ptr = SyntaxNodePtr::new(node.syntax()); | ||
346 | |||
347 | Some((name, ptr, name_range)) | ||
348 | } | ||
349 | match_ast! { | ||
350 | match node { | ||
351 | ast::FnDef(it) => { decl(it) }, | ||
352 | ast::StructDef(it) => { decl(it) }, | ||
353 | ast::EnumDef(it) => { decl(it) }, | ||
354 | ast::TraitDef(it) => { decl(it) }, | ||
355 | ast::Module(it) => { decl(it) }, | ||
356 | ast::TypeAliasDef(it) => { decl(it) }, | ||
357 | ast::ConstDef(it) => { decl(it) }, | ||
358 | ast::StaticDef(it) => { decl(it) }, | ||
359 | _ => None, | ||
360 | } | ||
361 | } | ||
362 | } | ||
363 | |||
364 | fn to_file_symbol(node: &SyntaxNode, file_id: FileId) -> Option<FileSymbol> { | ||
365 | to_symbol(node).map(move |(name, ptr, name_range)| FileSymbol { | ||
366 | name, | ||
367 | ptr, | ||
368 | file_id, | ||
369 | name_range: Some(name_range), | ||
370 | container_name: None, | ||
371 | }) | ||
372 | } | ||