diff options
Diffstat (limited to 'crates/ra_ide/src/ide_db/symbol_index.rs')
-rw-r--r-- | crates/ra_ide/src/ide_db/symbol_index.rs | 445 |
1 files changed, 0 insertions, 445 deletions
diff --git a/crates/ra_ide/src/ide_db/symbol_index.rs b/crates/ra_ide/src/ide_db/symbol_index.rs deleted file mode 100644 index c66eeb8e2..000000000 --- a/crates/ra_ide/src/ide_db/symbol_index.rs +++ /dev/null | |||
@@ -1,445 +0,0 @@ | |||
1 | //! This module handles fuzzy-searching of functions, structs and other symbols | ||
2 | //! by name across the whole workspace and dependencies. | ||
3 | //! | ||
4 | //! It works by building an incrementally-updated text-search index of all | ||
5 | //! symbols. The backbone of the index is the **awesome** `fst` crate by | ||
6 | //! @BurntSushi. | ||
7 | //! | ||
8 | //! In a nutshell, you give a set of strings to `fst`, and it builds a | ||
9 | //! finite state machine describing this set of strings. The strings which | ||
10 | //! could fuzzy-match a pattern can also be described by a finite state machine. | ||
11 | //! What is freaking cool is that you can now traverse both state machines in | ||
12 | //! lock-step to enumerate the strings which are both in the input set and | ||
13 | //! fuzz-match the query. Or, more formally, given two languages described by | ||
14 | //! FSTs, one can build a product FST which describes the intersection of the | ||
15 | //! languages. | ||
16 | //! | ||
17 | //! `fst` does not support cheap updating of the index, but it supports unioning | ||
18 | //! of state machines. So, to account for changing source code, we build an FST | ||
19 | //! for each library (which is assumed to never change) and an FST for each Rust | ||
20 | //! file in the current workspace, and run a query against the union of all | ||
21 | //! those FSTs. | ||
22 | use std::{ | ||
23 | fmt, | ||
24 | hash::{Hash, Hasher}, | ||
25 | mem, | ||
26 | sync::Arc, | ||
27 | }; | ||
28 | |||
29 | use fst::{self, Streamer}; | ||
30 | use ra_db::{ | ||
31 | salsa::{self, ParallelDatabase}, | ||
32 | FileId, SourceDatabaseExt, SourceRootId, | ||
33 | }; | ||
34 | use ra_syntax::{ | ||
35 | ast::{self, NameOwner}, | ||
36 | match_ast, AstNode, Parse, SmolStr, SourceFile, | ||
37 | SyntaxKind::{self, *}, | ||
38 | SyntaxNode, SyntaxNodePtr, TextRange, WalkEvent, | ||
39 | }; | ||
40 | #[cfg(not(feature = "wasm"))] | ||
41 | use rayon::prelude::*; | ||
42 | |||
43 | use crate::ide_db::RootDatabase; | ||
44 | |||
45 | #[derive(Debug)] | ||
46 | pub struct Query { | ||
47 | query: String, | ||
48 | lowercased: String, | ||
49 | only_types: bool, | ||
50 | libs: bool, | ||
51 | exact: bool, | ||
52 | limit: usize, | ||
53 | } | ||
54 | |||
55 | impl Query { | ||
56 | pub fn new(query: String) -> Query { | ||
57 | let lowercased = query.to_lowercase(); | ||
58 | Query { | ||
59 | query, | ||
60 | lowercased, | ||
61 | only_types: false, | ||
62 | libs: false, | ||
63 | exact: false, | ||
64 | limit: usize::max_value(), | ||
65 | } | ||
66 | } | ||
67 | |||
68 | pub fn only_types(&mut self) { | ||
69 | self.only_types = true; | ||
70 | } | ||
71 | |||
72 | pub fn libs(&mut self) { | ||
73 | self.libs = true; | ||
74 | } | ||
75 | |||
76 | pub fn exact(&mut self) { | ||
77 | self.exact = true; | ||
78 | } | ||
79 | |||
80 | pub fn limit(&mut self, limit: usize) { | ||
81 | self.limit = limit | ||
82 | } | ||
83 | } | ||
84 | |||
85 | #[salsa::query_group(SymbolsDatabaseStorage)] | ||
86 | pub(crate) trait SymbolsDatabase: hir::db::HirDatabase { | ||
87 | fn file_symbols(&self, file_id: FileId) -> Arc<SymbolIndex>; | ||
88 | #[salsa::input] | ||
89 | fn library_symbols(&self, id: SourceRootId) -> Arc<SymbolIndex>; | ||
90 | /// The set of "local" (that is, from the current workspace) roots. | ||
91 | /// Files in local roots are assumed to change frequently. | ||
92 | #[salsa::input] | ||
93 | fn local_roots(&self) -> Arc<Vec<SourceRootId>>; | ||
94 | /// The set of roots for crates.io libraries. | ||
95 | /// Files in libraries are assumed to never change. | ||
96 | #[salsa::input] | ||
97 | fn library_roots(&self) -> Arc<Vec<SourceRootId>>; | ||
98 | } | ||
99 | |||
100 | fn file_symbols(db: &impl SymbolsDatabase, file_id: FileId) -> Arc<SymbolIndex> { | ||
101 | db.check_canceled(); | ||
102 | let parse = db.parse(file_id); | ||
103 | |||
104 | let symbols = source_file_to_file_symbols(&parse.tree(), file_id); | ||
105 | |||
106 | // FIXME: add macros here | ||
107 | |||
108 | Arc::new(SymbolIndex::new(symbols)) | ||
109 | } | ||
110 | |||
111 | pub(crate) fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> { | ||
112 | /// Need to wrap Snapshot to provide `Clone` impl for `map_with` | ||
113 | struct Snap(salsa::Snapshot<RootDatabase>); | ||
114 | impl Clone for Snap { | ||
115 | fn clone(&self) -> Snap { | ||
116 | Snap(self.0.snapshot()) | ||
117 | } | ||
118 | } | ||
119 | |||
120 | let buf: Vec<Arc<SymbolIndex>> = if query.libs { | ||
121 | let snap = Snap(db.snapshot()); | ||
122 | #[cfg(not(feature = "wasm"))] | ||
123 | let buf = db | ||
124 | .library_roots() | ||
125 | .par_iter() | ||
126 | .map_with(snap, |db, &lib_id| db.0.library_symbols(lib_id)) | ||
127 | .collect(); | ||
128 | |||
129 | #[cfg(feature = "wasm")] | ||
130 | let buf = db.library_roots().iter().map(|&lib_id| snap.0.library_symbols(lib_id)).collect(); | ||
131 | |||
132 | buf | ||
133 | } else { | ||
134 | let mut files = Vec::new(); | ||
135 | for &root in db.local_roots().iter() { | ||
136 | let sr = db.source_root(root); | ||
137 | files.extend(sr.walk()) | ||
138 | } | ||
139 | |||
140 | let snap = Snap(db.snapshot()); | ||
141 | #[cfg(not(feature = "wasm"))] | ||
142 | let buf = | ||
143 | files.par_iter().map_with(snap, |db, &file_id| db.0.file_symbols(file_id)).collect(); | ||
144 | |||
145 | #[cfg(feature = "wasm")] | ||
146 | let buf = files.iter().map(|&file_id| snap.0.file_symbols(file_id)).collect(); | ||
147 | |||
148 | buf | ||
149 | }; | ||
150 | query.search(&buf) | ||
151 | } | ||
152 | |||
153 | pub(crate) fn index_resolve(db: &RootDatabase, name_ref: &ast::NameRef) -> Vec<FileSymbol> { | ||
154 | let name = name_ref.text(); | ||
155 | let mut query = Query::new(name.to_string()); | ||
156 | query.exact(); | ||
157 | query.limit(4); | ||
158 | world_symbols(db, query) | ||
159 | } | ||
160 | |||
161 | #[derive(Default)] | ||
162 | pub(crate) struct SymbolIndex { | ||
163 | symbols: Vec<FileSymbol>, | ||
164 | map: fst::Map, | ||
165 | } | ||
166 | |||
167 | impl fmt::Debug for SymbolIndex { | ||
168 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
169 | f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish() | ||
170 | } | ||
171 | } | ||
172 | |||
173 | impl PartialEq for SymbolIndex { | ||
174 | fn eq(&self, other: &SymbolIndex) -> bool { | ||
175 | self.symbols == other.symbols | ||
176 | } | ||
177 | } | ||
178 | |||
179 | impl Eq for SymbolIndex {} | ||
180 | |||
181 | impl Hash for SymbolIndex { | ||
182 | fn hash<H: Hasher>(&self, hasher: &mut H) { | ||
183 | self.symbols.hash(hasher) | ||
184 | } | ||
185 | } | ||
186 | |||
187 | impl SymbolIndex { | ||
188 | fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex { | ||
189 | fn cmp_key<'a>(s1: &'a FileSymbol) -> impl Ord + 'a { | ||
190 | unicase::Ascii::new(s1.name.as_str()) | ||
191 | } | ||
192 | #[cfg(not(feature = "wasm"))] | ||
193 | symbols.par_sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2))); | ||
194 | |||
195 | #[cfg(feature = "wasm")] | ||
196 | symbols.sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2))); | ||
197 | |||
198 | let mut builder = fst::MapBuilder::memory(); | ||
199 | |||
200 | let mut last_batch_start = 0; | ||
201 | |||
202 | for idx in 0..symbols.len() { | ||
203 | if symbols.get(last_batch_start).map(cmp_key) == symbols.get(idx + 1).map(cmp_key) { | ||
204 | continue; | ||
205 | } | ||
206 | |||
207 | let start = last_batch_start; | ||
208 | let end = idx + 1; | ||
209 | last_batch_start = end; | ||
210 | |||
211 | let key = symbols[start].name.as_str().to_lowercase(); | ||
212 | let value = SymbolIndex::range_to_map_value(start, end); | ||
213 | |||
214 | builder.insert(key, value).unwrap(); | ||
215 | } | ||
216 | |||
217 | let map = fst::Map::from_bytes(builder.into_inner().unwrap()).unwrap(); | ||
218 | SymbolIndex { symbols, map } | ||
219 | } | ||
220 | |||
221 | pub(crate) fn len(&self) -> usize { | ||
222 | self.symbols.len() | ||
223 | } | ||
224 | |||
225 | pub(crate) fn memory_size(&self) -> usize { | ||
226 | self.map.as_fst().size() + self.symbols.len() * mem::size_of::<FileSymbol>() | ||
227 | } | ||
228 | |||
229 | #[cfg(not(feature = "wasm"))] | ||
230 | pub(crate) fn for_files( | ||
231 | files: impl ParallelIterator<Item = (FileId, Parse<ast::SourceFile>)>, | ||
232 | ) -> SymbolIndex { | ||
233 | let symbols = files | ||
234 | .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id)) | ||
235 | .collect::<Vec<_>>(); | ||
236 | SymbolIndex::new(symbols) | ||
237 | } | ||
238 | |||
239 | #[cfg(feature = "wasm")] | ||
240 | pub(crate) fn for_files( | ||
241 | files: impl Iterator<Item = (FileId, Parse<ast::SourceFile>)>, | ||
242 | ) -> SymbolIndex { | ||
243 | let symbols = files | ||
244 | .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id)) | ||
245 | .collect::<Vec<_>>(); | ||
246 | SymbolIndex::new(symbols) | ||
247 | } | ||
248 | |||
249 | fn range_to_map_value(start: usize, end: usize) -> u64 { | ||
250 | debug_assert![start <= (std::u32::MAX as usize)]; | ||
251 | debug_assert![end <= (std::u32::MAX as usize)]; | ||
252 | |||
253 | ((start as u64) << 32) | end as u64 | ||
254 | } | ||
255 | |||
256 | fn map_value_to_range(value: u64) -> (usize, usize) { | ||
257 | let end = value as u32 as usize; | ||
258 | let start = (value >> 32) as usize; | ||
259 | (start, end) | ||
260 | } | ||
261 | } | ||
262 | |||
263 | impl Query { | ||
264 | pub(crate) fn search(self, indices: &[Arc<SymbolIndex>]) -> Vec<FileSymbol> { | ||
265 | let mut op = fst::map::OpBuilder::new(); | ||
266 | for file_symbols in indices.iter() { | ||
267 | let automaton = fst::automaton::Subsequence::new(&self.lowercased); | ||
268 | op = op.add(file_symbols.map.search(automaton)) | ||
269 | } | ||
270 | let mut stream = op.union(); | ||
271 | let mut res = Vec::new(); | ||
272 | while let Some((_, indexed_values)) = stream.next() { | ||
273 | if res.len() >= self.limit { | ||
274 | break; | ||
275 | } | ||
276 | for indexed_value in indexed_values { | ||
277 | let symbol_index = &indices[indexed_value.index]; | ||
278 | let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value); | ||
279 | |||
280 | for symbol in &symbol_index.symbols[start..end] { | ||
281 | if self.only_types && !is_type(symbol.ptr.kind()) { | ||
282 | continue; | ||
283 | } | ||
284 | if self.exact && symbol.name != self.query { | ||
285 | continue; | ||
286 | } | ||
287 | res.push(symbol.clone()); | ||
288 | } | ||
289 | } | ||
290 | } | ||
291 | res | ||
292 | } | ||
293 | } | ||
294 | |||
295 | fn is_type(kind: SyntaxKind) -> bool { | ||
296 | match kind { | ||
297 | STRUCT_DEF | ENUM_DEF | TRAIT_DEF | TYPE_ALIAS_DEF => true, | ||
298 | _ => false, | ||
299 | } | ||
300 | } | ||
301 | |||
302 | /// The actual data that is stored in the index. It should be as compact as | ||
303 | /// possible. | ||
304 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
305 | pub(crate) struct FileSymbol { | ||
306 | pub(crate) file_id: FileId, | ||
307 | pub(crate) name: SmolStr, | ||
308 | pub(crate) ptr: SyntaxNodePtr, | ||
309 | pub(crate) name_range: Option<TextRange>, | ||
310 | pub(crate) container_name: Option<SmolStr>, | ||
311 | } | ||
312 | |||
313 | fn source_file_to_file_symbols(source_file: &SourceFile, file_id: FileId) -> Vec<FileSymbol> { | ||
314 | let mut symbols = Vec::new(); | ||
315 | let mut stack = Vec::new(); | ||
316 | |||
317 | for event in source_file.syntax().preorder() { | ||
318 | match event { | ||
319 | WalkEvent::Enter(node) => { | ||
320 | if let Some(mut symbol) = to_file_symbol(&node, file_id) { | ||
321 | symbol.container_name = stack.last().cloned(); | ||
322 | |||
323 | stack.push(symbol.name.clone()); | ||
324 | symbols.push(symbol); | ||
325 | } | ||
326 | } | ||
327 | |||
328 | WalkEvent::Leave(node) => { | ||
329 | if to_symbol(&node).is_some() { | ||
330 | stack.pop(); | ||
331 | } | ||
332 | } | ||
333 | } | ||
334 | } | ||
335 | |||
336 | symbols | ||
337 | } | ||
338 | |||
339 | fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
340 | fn decl<N: NameOwner>(node: N) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
341 | let name = node.name()?; | ||
342 | let name_range = name.syntax().text_range(); | ||
343 | let name = name.text().clone(); | ||
344 | let ptr = SyntaxNodePtr::new(node.syntax()); | ||
345 | |||
346 | Some((name, ptr, name_range)) | ||
347 | } | ||
348 | match_ast! { | ||
349 | match node { | ||
350 | ast::FnDef(it) => { decl(it) }, | ||
351 | ast::StructDef(it) => { decl(it) }, | ||
352 | ast::EnumDef(it) => { decl(it) }, | ||
353 | ast::TraitDef(it) => { decl(it) }, | ||
354 | ast::Module(it) => { decl(it) }, | ||
355 | ast::TypeAliasDef(it) => { decl(it) }, | ||
356 | ast::ConstDef(it) => { decl(it) }, | ||
357 | ast::StaticDef(it) => { decl(it) }, | ||
358 | _ => None, | ||
359 | } | ||
360 | } | ||
361 | } | ||
362 | |||
363 | fn to_file_symbol(node: &SyntaxNode, file_id: FileId) -> Option<FileSymbol> { | ||
364 | to_symbol(node).map(move |(name, ptr, name_range)| FileSymbol { | ||
365 | name, | ||
366 | ptr, | ||
367 | file_id, | ||
368 | name_range: Some(name_range), | ||
369 | container_name: None, | ||
370 | }) | ||
371 | } | ||
372 | |||
373 | #[cfg(test)] | ||
374 | mod tests { | ||
375 | use crate::{display::NavigationTarget, mock_analysis::single_file, Query}; | ||
376 | use ra_syntax::{ | ||
377 | SmolStr, | ||
378 | SyntaxKind::{FN_DEF, STRUCT_DEF}, | ||
379 | }; | ||
380 | |||
381 | #[test] | ||
382 | fn test_world_symbols_with_no_container() { | ||
383 | let code = r#" | ||
384 | enum FooInner { } | ||
385 | "#; | ||
386 | |||
387 | let mut symbols = get_symbols_matching(code, "FooInner"); | ||
388 | |||
389 | let s = symbols.pop().unwrap(); | ||
390 | |||
391 | assert_eq!(s.name(), "FooInner"); | ||
392 | assert!(s.container_name().is_none()); | ||
393 | } | ||
394 | |||
395 | #[test] | ||
396 | fn test_world_symbols_include_container_name() { | ||
397 | let code = r#" | ||
398 | fn foo() { | ||
399 | enum FooInner { } | ||
400 | } | ||
401 | "#; | ||
402 | |||
403 | let mut symbols = get_symbols_matching(code, "FooInner"); | ||
404 | |||
405 | let s = symbols.pop().unwrap(); | ||
406 | |||
407 | assert_eq!(s.name(), "FooInner"); | ||
408 | assert_eq!(s.container_name(), Some(&SmolStr::new("foo"))); | ||
409 | |||
410 | let code = r#" | ||
411 | mod foo { | ||
412 | struct FooInner; | ||
413 | } | ||
414 | "#; | ||
415 | |||
416 | let mut symbols = get_symbols_matching(code, "FooInner"); | ||
417 | |||
418 | let s = symbols.pop().unwrap(); | ||
419 | |||
420 | assert_eq!(s.name(), "FooInner"); | ||
421 | assert_eq!(s.container_name(), Some(&SmolStr::new("foo"))); | ||
422 | } | ||
423 | |||
424 | #[test] | ||
425 | fn test_world_symbols_are_case_sensitive() { | ||
426 | let code = r#" | ||
427 | fn foo() {} | ||
428 | |||
429 | struct Foo; | ||
430 | "#; | ||
431 | |||
432 | let symbols = get_symbols_matching(code, "Foo"); | ||
433 | |||
434 | let fn_match = symbols.iter().find(|s| s.name() == "foo").map(|s| s.kind()); | ||
435 | let struct_match = symbols.iter().find(|s| s.name() == "Foo").map(|s| s.kind()); | ||
436 | |||
437 | assert_eq!(fn_match, Some(FN_DEF)); | ||
438 | assert_eq!(struct_match, Some(STRUCT_DEF)); | ||
439 | } | ||
440 | |||
441 | fn get_symbols_matching(text: &str, query: &str) -> Vec<NavigationTarget> { | ||
442 | let (analysis, _) = single_file(text); | ||
443 | analysis.symbol_search(Query::new(query.into())).unwrap() | ||
444 | } | ||
445 | } | ||