diff options
Diffstat (limited to 'crates/ra_ide_db/src/symbol_index.rs')
-rw-r--r-- | crates/ra_ide_db/src/symbol_index.rs | 430 |
1 files changed, 0 insertions, 430 deletions
diff --git a/crates/ra_ide_db/src/symbol_index.rs b/crates/ra_ide_db/src/symbol_index.rs deleted file mode 100644 index 35a2c5be3..000000000 --- a/crates/ra_ide_db/src/symbol_index.rs +++ /dev/null | |||
@@ -1,430 +0,0 @@ | |||
1 | //! This module handles fuzzy-searching of functions, structs and other symbols | ||
2 | //! by name across the whole workspace and dependencies. | ||
3 | //! | ||
4 | //! It works by building an incrementally-updated text-search index of all | ||
5 | //! symbols. The backbone of the index is the **awesome** `fst` crate by | ||
6 | //! @BurntSushi. | ||
7 | //! | ||
8 | //! In a nutshell, you give a set of strings to `fst`, and it builds a | ||
9 | //! finite state machine describing this set of strings. The strings which | ||
10 | //! could fuzzy-match a pattern can also be described by a finite state machine. | ||
11 | //! What is freaking cool is that you can now traverse both state machines in | ||
12 | //! lock-step to enumerate the strings which are both in the input set and | ||
13 | //! fuzz-match the query. Or, more formally, given two languages described by | ||
14 | //! FSTs, one can build a product FST which describes the intersection of the | ||
15 | //! languages. | ||
16 | //! | ||
17 | //! `fst` does not support cheap updating of the index, but it supports unioning | ||
18 | //! of state machines. So, to account for changing source code, we build an FST | ||
19 | //! for each library (which is assumed to never change) and an FST for each Rust | ||
20 | //! file in the current workspace, and run a query against the union of all | ||
21 | //! those FSTs. | ||
22 | |||
23 | use std::{ | ||
24 | cmp::Ordering, | ||
25 | fmt, | ||
26 | hash::{Hash, Hasher}, | ||
27 | mem, | ||
28 | sync::Arc, | ||
29 | }; | ||
30 | |||
31 | use fst::{self, Streamer}; | ||
32 | use hir::db::DefDatabase; | ||
33 | use ra_db::{ | ||
34 | salsa::{self, ParallelDatabase}, | ||
35 | CrateId, FileId, SourceDatabaseExt, SourceRootId, | ||
36 | }; | ||
37 | use ra_prof::profile; | ||
38 | use ra_syntax::{ | ||
39 | ast::{self, NameOwner}, | ||
40 | match_ast, AstNode, Parse, SmolStr, SourceFile, | ||
41 | SyntaxKind::{self, *}, | ||
42 | SyntaxNode, SyntaxNodePtr, TextRange, WalkEvent, | ||
43 | }; | ||
44 | use rayon::prelude::*; | ||
45 | use rustc_hash::{FxHashMap, FxHashSet}; | ||
46 | |||
47 | use crate::RootDatabase; | ||
48 | |||
49 | #[derive(Debug)] | ||
50 | pub struct Query { | ||
51 | query: String, | ||
52 | lowercased: String, | ||
53 | only_types: bool, | ||
54 | libs: bool, | ||
55 | exact: bool, | ||
56 | limit: usize, | ||
57 | } | ||
58 | |||
59 | impl Query { | ||
60 | pub fn new(query: String) -> Query { | ||
61 | let lowercased = query.to_lowercase(); | ||
62 | Query { | ||
63 | query, | ||
64 | lowercased, | ||
65 | only_types: false, | ||
66 | libs: false, | ||
67 | exact: false, | ||
68 | limit: usize::max_value(), | ||
69 | } | ||
70 | } | ||
71 | |||
72 | pub fn only_types(&mut self) { | ||
73 | self.only_types = true; | ||
74 | } | ||
75 | |||
76 | pub fn libs(&mut self) { | ||
77 | self.libs = true; | ||
78 | } | ||
79 | |||
80 | pub fn exact(&mut self) { | ||
81 | self.exact = true; | ||
82 | } | ||
83 | |||
84 | pub fn limit(&mut self, limit: usize) { | ||
85 | self.limit = limit | ||
86 | } | ||
87 | } | ||
88 | |||
89 | #[salsa::query_group(SymbolsDatabaseStorage)] | ||
90 | pub trait SymbolsDatabase: hir::db::HirDatabase + SourceDatabaseExt { | ||
91 | fn file_symbols(&self, file_id: FileId) -> Arc<SymbolIndex>; | ||
92 | fn library_symbols(&self) -> Arc<FxHashMap<SourceRootId, SymbolIndex>>; | ||
93 | /// The set of "local" (that is, from the current workspace) roots. | ||
94 | /// Files in local roots are assumed to change frequently. | ||
95 | #[salsa::input] | ||
96 | fn local_roots(&self) -> Arc<FxHashSet<SourceRootId>>; | ||
97 | /// The set of roots for crates.io libraries. | ||
98 | /// Files in libraries are assumed to never change. | ||
99 | #[salsa::input] | ||
100 | fn library_roots(&self) -> Arc<FxHashSet<SourceRootId>>; | ||
101 | } | ||
102 | |||
103 | fn library_symbols(db: &dyn SymbolsDatabase) -> Arc<FxHashMap<SourceRootId, SymbolIndex>> { | ||
104 | let _p = profile("library_symbols"); | ||
105 | |||
106 | let roots = db.library_roots(); | ||
107 | let res = roots | ||
108 | .iter() | ||
109 | .map(|&root_id| { | ||
110 | let root = db.source_root(root_id); | ||
111 | let files = root | ||
112 | .iter() | ||
113 | .map(|it| (it, SourceDatabaseExt::file_text(db, it))) | ||
114 | .collect::<Vec<_>>(); | ||
115 | let symbol_index = SymbolIndex::for_files( | ||
116 | files.into_par_iter().map(|(file, text)| (file, SourceFile::parse(&text))), | ||
117 | ); | ||
118 | (root_id, symbol_index) | ||
119 | }) | ||
120 | .collect(); | ||
121 | Arc::new(res) | ||
122 | } | ||
123 | |||
124 | fn file_symbols(db: &dyn SymbolsDatabase, file_id: FileId) -> Arc<SymbolIndex> { | ||
125 | db.check_canceled(); | ||
126 | let parse = db.parse(file_id); | ||
127 | |||
128 | let symbols = source_file_to_file_symbols(&parse.tree(), file_id); | ||
129 | |||
130 | // FIXME: add macros here | ||
131 | |||
132 | Arc::new(SymbolIndex::new(symbols)) | ||
133 | } | ||
134 | |||
135 | /// Need to wrap Snapshot to provide `Clone` impl for `map_with` | ||
136 | struct Snap<DB>(DB); | ||
137 | impl<DB: ParallelDatabase> Clone for Snap<salsa::Snapshot<DB>> { | ||
138 | fn clone(&self) -> Snap<salsa::Snapshot<DB>> { | ||
139 | Snap(self.0.snapshot()) | ||
140 | } | ||
141 | } | ||
142 | |||
143 | // Feature: Workspace Symbol | ||
144 | // | ||
145 | // Uses fuzzy-search to find types, modules and functions by name across your | ||
146 | // project and dependencies. This is **the** most useful feature, which improves code | ||
147 | // navigation tremendously. It mostly works on top of the built-in LSP | ||
148 | // functionality, however `#` and `*` symbols can be used to narrow down the | ||
149 | // search. Specifically, | ||
150 | // | ||
151 | // - `Foo` searches for `Foo` type in the current workspace | ||
152 | // - `foo#` searches for `foo` function in the current workspace | ||
153 | // - `Foo*` searches for `Foo` type among dependencies, including `stdlib` | ||
154 | // - `foo#*` searches for `foo` function among dependencies | ||
155 | // | ||
156 | // That is, `#` switches from "types" to all symbols, `*` switches from the current | ||
157 | // workspace to dependencies. | ||
158 | // | ||
159 | // |=== | ||
160 | // | Editor | Shortcut | ||
161 | // | ||
162 | // | VS Code | kbd:[Ctrl+T] | ||
163 | // |=== | ||
164 | pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> { | ||
165 | let _p = ra_prof::profile("world_symbols").detail(|| query.query.clone()); | ||
166 | |||
167 | let tmp1; | ||
168 | let tmp2; | ||
169 | let buf: Vec<&SymbolIndex> = if query.libs { | ||
170 | tmp1 = db.library_symbols(); | ||
171 | tmp1.values().collect() | ||
172 | } else { | ||
173 | let mut files = Vec::new(); | ||
174 | for &root in db.local_roots().iter() { | ||
175 | let sr = db.source_root(root); | ||
176 | files.extend(sr.iter()) | ||
177 | } | ||
178 | |||
179 | let snap = Snap(db.snapshot()); | ||
180 | tmp2 = files | ||
181 | .par_iter() | ||
182 | .map_with(snap, |db, &file_id| db.0.file_symbols(file_id)) | ||
183 | .collect::<Vec<_>>(); | ||
184 | tmp2.iter().map(|it| &**it).collect() | ||
185 | }; | ||
186 | query.search(&buf) | ||
187 | } | ||
188 | |||
189 | pub fn crate_symbols(db: &RootDatabase, krate: CrateId, query: Query) -> Vec<FileSymbol> { | ||
190 | // FIXME(#4842): This now depends on CrateDefMap, why not build the entire symbol index from | ||
191 | // that instead? | ||
192 | |||
193 | let def_map = db.crate_def_map(krate); | ||
194 | let mut files = Vec::new(); | ||
195 | let mut modules = vec![def_map.root]; | ||
196 | while let Some(module) = modules.pop() { | ||
197 | let data = &def_map[module]; | ||
198 | files.extend(data.origin.file_id()); | ||
199 | modules.extend(data.children.values()); | ||
200 | } | ||
201 | |||
202 | let snap = Snap(db.snapshot()); | ||
203 | |||
204 | let buf = files | ||
205 | .par_iter() | ||
206 | .map_with(snap, |db, &file_id| db.0.file_symbols(file_id)) | ||
207 | .collect::<Vec<_>>(); | ||
208 | let buf = buf.iter().map(|it| &**it).collect::<Vec<_>>(); | ||
209 | |||
210 | query.search(&buf) | ||
211 | } | ||
212 | |||
213 | pub fn index_resolve(db: &RootDatabase, name_ref: &ast::NameRef) -> Vec<FileSymbol> { | ||
214 | let name = name_ref.text(); | ||
215 | let mut query = Query::new(name.to_string()); | ||
216 | query.exact(); | ||
217 | query.limit(4); | ||
218 | world_symbols(db, query) | ||
219 | } | ||
220 | |||
221 | #[derive(Default)] | ||
222 | pub struct SymbolIndex { | ||
223 | symbols: Vec<FileSymbol>, | ||
224 | map: fst::Map<Vec<u8>>, | ||
225 | } | ||
226 | |||
227 | impl fmt::Debug for SymbolIndex { | ||
228 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
229 | f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish() | ||
230 | } | ||
231 | } | ||
232 | |||
233 | impl PartialEq for SymbolIndex { | ||
234 | fn eq(&self, other: &SymbolIndex) -> bool { | ||
235 | self.symbols == other.symbols | ||
236 | } | ||
237 | } | ||
238 | |||
239 | impl Eq for SymbolIndex {} | ||
240 | |||
241 | impl Hash for SymbolIndex { | ||
242 | fn hash<H: Hasher>(&self, hasher: &mut H) { | ||
243 | self.symbols.hash(hasher) | ||
244 | } | ||
245 | } | ||
246 | |||
247 | impl SymbolIndex { | ||
248 | fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex { | ||
249 | fn cmp(lhs: &FileSymbol, rhs: &FileSymbol) -> Ordering { | ||
250 | let lhs_chars = lhs.name.chars().map(|c| c.to_ascii_lowercase()); | ||
251 | let rhs_chars = rhs.name.chars().map(|c| c.to_ascii_lowercase()); | ||
252 | lhs_chars.cmp(rhs_chars) | ||
253 | } | ||
254 | |||
255 | symbols.par_sort_by(cmp); | ||
256 | |||
257 | let mut builder = fst::MapBuilder::memory(); | ||
258 | |||
259 | let mut last_batch_start = 0; | ||
260 | |||
261 | for idx in 0..symbols.len() { | ||
262 | if let Some(next_symbol) = symbols.get(idx + 1) { | ||
263 | if cmp(&symbols[last_batch_start], next_symbol) == Ordering::Equal { | ||
264 | continue; | ||
265 | } | ||
266 | } | ||
267 | |||
268 | let start = last_batch_start; | ||
269 | let end = idx + 1; | ||
270 | last_batch_start = end; | ||
271 | |||
272 | let key = symbols[start].name.as_str().to_ascii_lowercase(); | ||
273 | let value = SymbolIndex::range_to_map_value(start, end); | ||
274 | |||
275 | builder.insert(key, value).unwrap(); | ||
276 | } | ||
277 | |||
278 | let map = fst::Map::new(builder.into_inner().unwrap()).unwrap(); | ||
279 | SymbolIndex { symbols, map } | ||
280 | } | ||
281 | |||
282 | pub fn len(&self) -> usize { | ||
283 | self.symbols.len() | ||
284 | } | ||
285 | |||
286 | pub fn memory_size(&self) -> usize { | ||
287 | self.map.as_fst().size() + self.symbols.len() * mem::size_of::<FileSymbol>() | ||
288 | } | ||
289 | |||
290 | pub(crate) fn for_files( | ||
291 | files: impl ParallelIterator<Item = (FileId, Parse<ast::SourceFile>)>, | ||
292 | ) -> SymbolIndex { | ||
293 | let symbols = files | ||
294 | .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id)) | ||
295 | .collect::<Vec<_>>(); | ||
296 | SymbolIndex::new(symbols) | ||
297 | } | ||
298 | |||
299 | fn range_to_map_value(start: usize, end: usize) -> u64 { | ||
300 | debug_assert![start <= (std::u32::MAX as usize)]; | ||
301 | debug_assert![end <= (std::u32::MAX as usize)]; | ||
302 | |||
303 | ((start as u64) << 32) | end as u64 | ||
304 | } | ||
305 | |||
306 | fn map_value_to_range(value: u64) -> (usize, usize) { | ||
307 | let end = value as u32 as usize; | ||
308 | let start = (value >> 32) as usize; | ||
309 | (start, end) | ||
310 | } | ||
311 | } | ||
312 | |||
313 | impl Query { | ||
314 | pub(crate) fn search(self, indices: &[&SymbolIndex]) -> Vec<FileSymbol> { | ||
315 | let mut op = fst::map::OpBuilder::new(); | ||
316 | for file_symbols in indices.iter() { | ||
317 | let automaton = fst::automaton::Subsequence::new(&self.lowercased); | ||
318 | op = op.add(file_symbols.map.search(automaton)) | ||
319 | } | ||
320 | let mut stream = op.union(); | ||
321 | let mut res = Vec::new(); | ||
322 | while let Some((_, indexed_values)) = stream.next() { | ||
323 | for indexed_value in indexed_values { | ||
324 | let symbol_index = &indices[indexed_value.index]; | ||
325 | let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value); | ||
326 | |||
327 | for symbol in &symbol_index.symbols[start..end] { | ||
328 | if self.only_types && !is_type(symbol.kind) { | ||
329 | continue; | ||
330 | } | ||
331 | if self.exact && symbol.name != self.query { | ||
332 | continue; | ||
333 | } | ||
334 | |||
335 | res.push(symbol.clone()); | ||
336 | if res.len() >= self.limit { | ||
337 | return res; | ||
338 | } | ||
339 | } | ||
340 | } | ||
341 | } | ||
342 | res | ||
343 | } | ||
344 | } | ||
345 | |||
346 | fn is_type(kind: SyntaxKind) -> bool { | ||
347 | matches!(kind, STRUCT | ENUM | TRAIT | TYPE_ALIAS) | ||
348 | } | ||
349 | |||
350 | /// The actual data that is stored in the index. It should be as compact as | ||
351 | /// possible. | ||
352 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
353 | pub struct FileSymbol { | ||
354 | pub file_id: FileId, | ||
355 | pub name: SmolStr, | ||
356 | pub kind: SyntaxKind, | ||
357 | pub range: TextRange, | ||
358 | pub ptr: SyntaxNodePtr, | ||
359 | pub name_range: Option<TextRange>, | ||
360 | pub container_name: Option<SmolStr>, | ||
361 | } | ||
362 | |||
363 | fn source_file_to_file_symbols(source_file: &SourceFile, file_id: FileId) -> Vec<FileSymbol> { | ||
364 | let mut symbols = Vec::new(); | ||
365 | let mut stack = Vec::new(); | ||
366 | |||
367 | for event in source_file.syntax().preorder() { | ||
368 | match event { | ||
369 | WalkEvent::Enter(node) => { | ||
370 | if let Some(mut symbol) = to_file_symbol(&node, file_id) { | ||
371 | symbol.container_name = stack.last().cloned(); | ||
372 | |||
373 | stack.push(symbol.name.clone()); | ||
374 | symbols.push(symbol); | ||
375 | } | ||
376 | } | ||
377 | |||
378 | WalkEvent::Leave(node) => { | ||
379 | if to_symbol(&node).is_some() { | ||
380 | stack.pop(); | ||
381 | } | ||
382 | } | ||
383 | } | ||
384 | } | ||
385 | |||
386 | symbols | ||
387 | } | ||
388 | |||
389 | fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
390 | fn decl<N: NameOwner>(node: N) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { | ||
391 | let name = node.name()?; | ||
392 | let name_range = name.syntax().text_range(); | ||
393 | let name = name.text().clone(); | ||
394 | let ptr = SyntaxNodePtr::new(node.syntax()); | ||
395 | |||
396 | Some((name, ptr, name_range)) | ||
397 | } | ||
398 | match_ast! { | ||
399 | match node { | ||
400 | ast::Fn(it) => decl(it), | ||
401 | ast::Struct(it) => decl(it), | ||
402 | ast::Enum(it) => decl(it), | ||
403 | ast::Trait(it) => decl(it), | ||
404 | ast::Module(it) => decl(it), | ||
405 | ast::TypeAlias(it) => decl(it), | ||
406 | ast::Const(it) => decl(it), | ||
407 | ast::Static(it) => decl(it), | ||
408 | ast::MacroCall(it) => { | ||
409 | if it.is_macro_rules().is_some() { | ||
410 | decl(it) | ||
411 | } else { | ||
412 | None | ||
413 | } | ||
414 | }, | ||
415 | _ => None, | ||
416 | } | ||
417 | } | ||
418 | } | ||
419 | |||
420 | fn to_file_symbol(node: &SyntaxNode, file_id: FileId) -> Option<FileSymbol> { | ||
421 | to_symbol(node).map(move |(name, ptr, name_range)| FileSymbol { | ||
422 | name, | ||
423 | kind: node.kind(), | ||
424 | range: node.text_range(), | ||
425 | ptr, | ||
426 | file_id, | ||
427 | name_range: Some(name_range), | ||
428 | container_name: None, | ||
429 | }) | ||
430 | } | ||