diff options
Diffstat (limited to 'crates/ra_ide/src/symbol_index.rs')
-rw-r--r-- | crates/ra_ide/src/symbol_index.rs | 405 |
1 files changed, 405 insertions, 0 deletions
diff --git a/crates/ra_ide/src/symbol_index.rs b/crates/ra_ide/src/symbol_index.rs new file mode 100644 index 000000000..5729eb5b3 --- /dev/null +++ b/crates/ra_ide/src/symbol_index.rs | |||
@@ -0,0 +1,405 @@ | |||
1 | //! This module handles fuzzy-searching of functions, structs and other symbols | ||
2 | //! by name across the whole workspace and dependencies. | ||
3 | //! | ||
4 | //! It works by building an incrementally-updated text-search index of all | ||
5 | //! symbols. The backbone of the index is the **awesome** `fst` crate by | ||
6 | //! @BurntSushi. | ||
7 | //! | ||
8 | //! In a nutshell, you give a set of strings to `fst`, and it builds a | ||
9 | //! finite state machine describing this set of strings. The strings which | ||
10 | //! could fuzzy-match a pattern can also be described by a finite state machine. | ||
11 | //! What is freaking cool is that you can now traverse both state machines in | ||
12 | //! lock-step to enumerate the strings which are both in the input set and | ||
13 | //! fuzz-match the query. Or, more formally, given two languages described by | ||
14 | //! FSTs, one can build a product FST which describes the intersection of the | ||
15 | //! languages. | ||
16 | //! | ||
17 | //! `fst` does not support cheap updating of the index, but it supports unioning | ||
18 | //! of state machines. So, to account for changing source code, we build an FST | ||
19 | //! for each library (which is assumed to never change) and an FST for each Rust | ||
20 | //! file in the current workspace, and run a query against the union of all | ||
21 | //! those FSTs. | ||
22 | use std::{ | ||
23 | fmt, | ||
24 | hash::{Hash, Hasher}, | ||
25 | mem, | ||
26 | sync::Arc, | ||
27 | }; | ||
28 | |||
29 | use fst::{self, Streamer}; | ||
30 | use ra_db::{ | ||
31 | salsa::{self, ParallelDatabase}, | ||
32 | SourceDatabaseExt, SourceRootId, | ||
33 | }; | ||
34 | use ra_syntax::{ | ||
35 | ast::{self, NameOwner}, | ||
36 | match_ast, AstNode, Parse, SmolStr, SourceFile, | ||
37 | SyntaxKind::{self, *}, | ||
38 | SyntaxNode, SyntaxNodePtr, TextRange, WalkEvent, | ||
39 | }; | ||
40 | #[cfg(not(feature = "wasm"))] | ||
41 | use rayon::prelude::*; | ||
42 | |||
43 | use crate::{db::RootDatabase, FileId, Query}; | ||
44 | |||
45 | #[salsa::query_group(SymbolsDatabaseStorage)] | ||
46 | pub(crate) trait SymbolsDatabase: hir::db::HirDatabase { | ||
47 | fn file_symbols(&self, file_id: FileId) -> Arc<SymbolIndex>; | ||
48 | #[salsa::input] | ||
49 | fn library_symbols(&self, id: SourceRootId) -> Arc<SymbolIndex>; | ||
50 | /// The set of "local" (that is, from the current workspace) roots. | ||
51 | /// Files in local roots are assumed to change frequently. | ||
52 | #[salsa::input] | ||
53 | fn local_roots(&self) -> Arc<Vec<SourceRootId>>; | ||
54 | /// The set of roots for crates.io libraries. | ||
55 | /// Files in libraries are assumed to never change. | ||
56 | #[salsa::input] | ||
57 | fn library_roots(&self) -> Arc<Vec<SourceRootId>>; | ||
58 | } | ||
59 | |||
60 | fn file_symbols(db: &impl SymbolsDatabase, file_id: FileId) -> Arc<SymbolIndex> { | ||
61 | db.check_canceled(); | ||
62 | let parse = db.parse(file_id); | ||
63 | |||
64 | let symbols = source_file_to_file_symbols(&parse.tree(), file_id); | ||
65 | |||
66 | // FIXME: add macros here | ||
67 | |||
68 | Arc::new(SymbolIndex::new(symbols)) | ||
69 | } | ||
70 | |||
71 | pub(crate) fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> { | ||
72 | /// Need to wrap Snapshot to provide `Clone` impl for `map_with` | ||
73 | struct Snap(salsa::Snapshot<RootDatabase>); | ||
74 | impl Clone for Snap { | ||
75 | fn clone(&self) -> Snap { | ||
76 | Snap(self.0.snapshot()) | ||
77 | } | ||
78 | } | ||
79 | |||
80 | let buf: Vec<Arc<SymbolIndex>> = if query.libs { | ||
81 | let snap = Snap(db.snapshot()); | ||
82 | #[cfg(not(feature = "wasm"))] | ||
83 | let buf = db | ||
84 | .library_roots() | ||
85 | .par_iter() | ||
86 | .map_with(snap, |db, &lib_id| db.0.library_symbols(lib_id)) | ||
87 | .collect(); | ||
88 | |||
89 | #[cfg(feature = "wasm")] | ||
90 | let buf = db.library_roots().iter().map(|&lib_id| snap.0.library_symbols(lib_id)).collect(); | ||
91 | |||
92 | buf | ||
93 | } else { | ||
94 | let mut files = Vec::new(); | ||
95 | for &root in db.local_roots().iter() { | ||
96 | let sr = db.source_root(root); | ||
97 | files.extend(sr.walk()) | ||
98 | } | ||
99 | |||
100 | let snap = Snap(db.snapshot()); | ||
101 | #[cfg(not(feature = "wasm"))] | ||
102 | let buf = | ||
103 | files.par_iter().map_with(snap, |db, &file_id| db.0.file_symbols(file_id)).collect(); | ||
104 | |||
105 | #[cfg(feature = "wasm")] | ||
106 | let buf = files.iter().map(|&file_id| snap.0.file_symbols(file_id)).collect(); | ||
107 | |||
108 | buf | ||
109 | }; | ||
110 | query.search(&buf) | ||
111 | } | ||
112 | |||
113 | pub(crate) fn index_resolve(db: &RootDatabase, name_ref: &ast::NameRef) -> Vec<FileSymbol> { | ||
114 | let name = name_ref.text(); | ||
115 | let mut query = Query::new(name.to_string()); | ||
116 | query.exact(); | ||
117 | query.limit(4); | ||
118 | crate::symbol_index::world_symbols(db, query) | ||
119 | } | ||
120 | |||
121 | #[derive(Default)] | ||
122 | pub(crate) struct SymbolIndex { | ||
123 | symbols: Vec<FileSymbol>, | ||
124 | map: fst::Map, | ||
125 | } | ||
126 | |||
127 | impl fmt::Debug for SymbolIndex { | ||
128 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
129 | f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish() | ||
130 | } | ||
131 | } | ||
132 | |||
133 | impl PartialEq for SymbolIndex { | ||
134 | fn eq(&self, other: &SymbolIndex) -> bool { | ||
135 | self.symbols == other.symbols | ||
136 | } | ||
137 | } | ||
138 | |||
139 | impl Eq for SymbolIndex {} | ||
140 | |||
141 | impl Hash for SymbolIndex { | ||
142 | fn hash<H: Hasher>(&self, hasher: &mut H) { | ||
143 | self.symbols.hash(hasher) | ||
144 | } | ||
145 | } | ||
146 | |||
147 | impl SymbolIndex { | ||
148 | fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex { | ||
149 | fn cmp_key<'a>(s1: &'a FileSymbol) -> impl Ord + 'a { | ||
150 | unicase::Ascii::new(s1.name.as_str()) | ||
151 | } | ||
152 | #[cfg(not(feature = "wasm"))] | ||
153 | symbols.par_sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2))); | ||
154 | |||
155 | #[cfg(feature = "wasm")] | ||
156 | symbols.sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2))); | ||
157 | |||
158 | let mut builder = fst::MapBuilder::memory(); | ||
159 | |||
160 | let mut last_batch_start = 0; | ||
161 | |||
162 | for idx in 0..symbols.len() { | ||
163 | if symbols.get(last_batch_start).map(cmp_key) == symbols.get(idx + 1).map(cmp_key) { | ||
164 | continue; | ||
165 | } | ||
166 | |||
167 | let start = last_batch_start; | ||
168 | let end = idx + 1; | ||
169 | last_batch_start = end; | ||
170 | |||
171 | let key = symbols[start].name.as_str().to_lowercase(); | ||
172 | let value = SymbolIndex::range_to_map_value(start, end); | ||
173 | |||
174 | builder.insert(key, value).unwrap(); | ||
175 | } | ||
176 | |||
177 | let map = fst::Map::from_bytes(builder.into_inner().unwrap()).unwrap(); | ||
178 | SymbolIndex { symbols, map } | ||
179 | } | ||
180 | |||
181 | pub(crate) fn len(&self) -> usize { | ||
182 | self.symbols.len() | ||
183 | } | ||
184 | |||
185 | pub(crate) fn memory_size(&self) -> usize { | ||
186 | self.map.as_fst().size() + self.symbols.len() * mem::size_of::<FileSymbol>() | ||
187 | } | ||
188 | |||
189 | #[cfg(not(feature = "wasm"))] | ||
190 | pub(crate) fn for_files( | ||
191 | files: impl ParallelIterator<Item = (FileId, Parse<ast::SourceFile>)>, | ||
192 | ) -> SymbolIndex { | ||
193 | let symbols = files | ||
194 | .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id)) | ||
195 | .collect::<Vec<_>>(); | ||
196 | SymbolIndex::new(symbols) | ||
197 | } | ||
198 | |||
199 | #[cfg(feature = "wasm")] | ||
200 | pub(crate) fn for_files( | ||
201 | files: impl Iterator<Item = (FileId, Parse<ast::SourceFile>)>, | ||
202 | ) -> SymbolIndex { | ||
203 | let symbols = files | ||
204 | .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id)) | ||
205 | .collect::<Vec<_>>(); | ||
206 | SymbolIndex::new(symbols) | ||
207 | } | ||
208 | |||
209 | fn range_to_map_value(start: usize, end: usize) -> u64 { | ||
210 | debug_assert![start <= (std::u32::MAX as usize)]; | ||
211 | debug_assert![end <= (std::u32::MAX as usize)]; | ||
212 | |||
213 | ((start as u64) << 32) | end as u64 | ||
214 | } | ||
215 | |||
216 | fn map_value_to_range(value: u64) -> (usize, usize) { | ||
217 | let end = value as u32 as usize; | ||
218 | let start = (value >> 32) as usize; | ||
219 | (start, end) | ||
220 | } | ||
221 | } | ||
222 | |||
223 | impl Query { | ||
224 | pub(crate) fn search(self, indices: &[Arc<SymbolIndex>]) -> Vec<FileSymbol> { | ||
225 | let mut op = fst::map::OpBuilder::new(); | ||
226 | for file_symbols in indices.iter() { | ||
227 | let automaton = fst::automaton::Subsequence::new(&self.lowercased); | ||
228 | op = op.add(file_symbols.map.search(automaton)) | ||
229 | } | ||
230 | let mut stream = op.union(); | ||
231 | let mut res = Vec::new(); | ||
232 | while let Some((_, indexed_values)) = stream.next() { | ||
233 | if res.len() >= self.limit { | ||
234 | break; | ||
235 | } | ||
236 | for indexed_value in indexed_values { | ||
237 | let symbol_index = &indices[indexed_value.index]; | ||
238 | let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value); | ||
239 | |||
240 | for symbol in &symbol_index.symbols[start..end] { | ||
241 | if self.only_types && !is_type(symbol.ptr.kind()) { | ||
242 | continue; | ||
243 | } | ||
244 | if self.exact && symbol.name != self.query { | ||
245 | continue; | ||
246 | } | ||
247 | res.push(symbol.clone()); | ||
248 | } | ||
249 | } | ||
250 | } | ||
251 | res | ||
252 | } | ||
253 | } | ||
254 | |||
255 | fn is_type(kind: SyntaxKind) -> bool { | ||
256 | match kind { | ||
257 | STRUCT_DEF | ENUM_DEF | TRAIT_DEF | TYPE_ALIAS_DEF => true, | ||
258 | _ => false, | ||
259 | } | ||
260 | } | ||
261 | |||
262 | /// The actual data that is stored in the index. It should be as compact as | ||
263 | /// possible. | ||
264 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
265 | pub(crate) struct FileSymbol { | ||
266 | pub(crate) file_id: FileId, | ||
267 | pub(crate) name: SmolStr, | ||
268 | pub(crate) ptr: SyntaxNodePtr, | ||
269 | pub(crate) name_range: Option<TextRange>, | ||
270 | pub(crate) container_name: Option<SmolStr>, | ||
271 | } | ||
272 | |||
273 | fn source_file_to_file_symbols(source_file: &SourceFile, file_id: FileId) -> Vec<FileSymbol> { | ||
274 | let mut symbols = Vec::new(); | ||
275 | let mut stack = Vec::new(); | ||
276 | |||
277 | for event in source_file.syntax().preorder() { | ||
278 | match event { | ||
279 | WalkEvent::Enter(node) => { | ||
280 | if let Some(mut symbol) = to_file_symbol(&node, file_id) { | ||
281 | symbol.container_name = stack.last().cloned(); | ||
282 | |||
283 | stack.push(symbol.name.clone()); | ||
284 | symbols.push(symbol); | ||
285 | } | ||
286 | } | ||
287 | |||
288 | WalkEvent::Leave(node) => { | ||
289 | if to_symbol(&node).is_some() { | ||
290 | stack.pop(); | ||
291 | } | ||
292 | } | ||
293 | } | ||
294 | } | ||
295 | |||
296 | symbols | ||
297 | } | ||
298 | |||
299 | fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
300 | fn decl<N: NameOwner>(node: N) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
301 | let name = node.name()?; | ||
302 | let name_range = name.syntax().text_range(); | ||
303 | let name = name.text().clone(); | ||
304 | let ptr = SyntaxNodePtr::new(node.syntax()); | ||
305 | |||
306 | Some((name, ptr, name_range)) | ||
307 | } | ||
308 | match_ast! { | ||
309 | match node { | ||
310 | ast::FnDef(it) => { decl(it) }, | ||
311 | ast::StructDef(it) => { decl(it) }, | ||
312 | ast::EnumDef(it) => { decl(it) }, | ||
313 | ast::TraitDef(it) => { decl(it) }, | ||
314 | ast::Module(it) => { decl(it) }, | ||
315 | ast::TypeAliasDef(it) => { decl(it) }, | ||
316 | ast::ConstDef(it) => { decl(it) }, | ||
317 | ast::StaticDef(it) => { decl(it) }, | ||
318 | _ => None, | ||
319 | } | ||
320 | } | ||
321 | } | ||
322 | |||
323 | fn to_file_symbol(node: &SyntaxNode, file_id: FileId) -> Option<FileSymbol> { | ||
324 | to_symbol(node).map(move |(name, ptr, name_range)| FileSymbol { | ||
325 | name, | ||
326 | ptr, | ||
327 | file_id, | ||
328 | name_range: Some(name_range), | ||
329 | container_name: None, | ||
330 | }) | ||
331 | } | ||
332 | |||
333 | #[cfg(test)] | ||
334 | mod tests { | ||
335 | use crate::{display::NavigationTarget, mock_analysis::single_file, Query}; | ||
336 | use ra_syntax::{ | ||
337 | SmolStr, | ||
338 | SyntaxKind::{FN_DEF, STRUCT_DEF}, | ||
339 | }; | ||
340 | |||
341 | #[test] | ||
342 | fn test_world_symbols_with_no_container() { | ||
343 | let code = r#" | ||
344 | enum FooInner { } | ||
345 | "#; | ||
346 | |||
347 | let mut symbols = get_symbols_matching(code, "FooInner"); | ||
348 | |||
349 | let s = symbols.pop().unwrap(); | ||
350 | |||
351 | assert_eq!(s.name(), "FooInner"); | ||
352 | assert!(s.container_name().is_none()); | ||
353 | } | ||
354 | |||
355 | #[test] | ||
356 | fn test_world_symbols_include_container_name() { | ||
357 | let code = r#" | ||
358 | fn foo() { | ||
359 | enum FooInner { } | ||
360 | } | ||
361 | "#; | ||
362 | |||
363 | let mut symbols = get_symbols_matching(code, "FooInner"); | ||
364 | |||
365 | let s = symbols.pop().unwrap(); | ||
366 | |||
367 | assert_eq!(s.name(), "FooInner"); | ||
368 | assert_eq!(s.container_name(), Some(&SmolStr::new("foo"))); | ||
369 | |||
370 | let code = r#" | ||
371 | mod foo { | ||
372 | struct FooInner; | ||
373 | } | ||
374 | "#; | ||
375 | |||
376 | let mut symbols = get_symbols_matching(code, "FooInner"); | ||
377 | |||
378 | let s = symbols.pop().unwrap(); | ||
379 | |||
380 | assert_eq!(s.name(), "FooInner"); | ||
381 | assert_eq!(s.container_name(), Some(&SmolStr::new("foo"))); | ||
382 | } | ||
383 | |||
384 | #[test] | ||
385 | fn test_world_symbols_are_case_sensitive() { | ||
386 | let code = r#" | ||
387 | fn foo() {} | ||
388 | |||
389 | struct Foo; | ||
390 | "#; | ||
391 | |||
392 | let symbols = get_symbols_matching(code, "Foo"); | ||
393 | |||
394 | let fn_match = symbols.iter().find(|s| s.name() == "foo").map(|s| s.kind()); | ||
395 | let struct_match = symbols.iter().find(|s| s.name() == "Foo").map(|s| s.kind()); | ||
396 | |||
397 | assert_eq!(fn_match, Some(FN_DEF)); | ||
398 | assert_eq!(struct_match, Some(STRUCT_DEF)); | ||
399 | } | ||
400 | |||
401 | fn get_symbols_matching(text: &str, query: &str) -> Vec<NavigationTarget> { | ||
402 | let (analysis, _) = single_file(text); | ||
403 | analysis.symbol_search(Query::new(query.into())).unwrap() | ||
404 | } | ||
405 | } | ||