aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_ide_db/src/symbol_index.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_ide_db/src/symbol_index.rs')
-rw-r--r--crates/ra_ide_db/src/symbol_index.rs372
1 files changed, 372 insertions, 0 deletions
diff --git a/crates/ra_ide_db/src/symbol_index.rs b/crates/ra_ide_db/src/symbol_index.rs
new file mode 100644
index 000000000..64ddf2f95
--- /dev/null
+++ b/crates/ra_ide_db/src/symbol_index.rs
@@ -0,0 +1,372 @@
1//! This module handles fuzzy-searching of functions, structs and other symbols
2//! by name across the whole workspace and dependencies.
3//!
4//! It works by building an incrementally-updated text-search index of all
5//! symbols. The backbone of the index is the **awesome** `fst` crate by
6//! @BurntSushi.
7//!
8//! In a nutshell, you give a set of strings to `fst`, and it builds a
9//! finite state machine describing this set of strings. The strings which
10//! could fuzzy-match a pattern can also be described by a finite state machine.
11//! What is freaking cool is that you can now traverse both state machines in
12//! lock-step to enumerate the strings which are both in the input set and
13//! fuzz-match the query. Or, more formally, given two languages described by
14//! FSTs, one can build a product FST which describes the intersection of the
15//! languages.
16//!
17//! `fst` does not support cheap updating of the index, but it supports unioning
18//! of state machines. So, to account for changing source code, we build an FST
19//! for each library (which is assumed to never change) and an FST for each Rust
20//! file in the current workspace, and run a query against the union of all
21//! those FSTs.
22
23use std::{
24 fmt,
25 hash::{Hash, Hasher},
26 mem,
27 sync::Arc,
28};
29
30use fst::{self, Streamer};
31use ra_db::{
32 salsa::{self, ParallelDatabase},
33 FileId, SourceDatabaseExt, SourceRootId,
34};
35use ra_syntax::{
36 ast::{self, NameOwner},
37 match_ast, AstNode, Parse, SmolStr, SourceFile,
38 SyntaxKind::{self, *},
39 SyntaxNode, SyntaxNodePtr, TextRange, WalkEvent,
40};
41#[cfg(not(feature = "wasm"))]
42use rayon::prelude::*;
43
44use crate::RootDatabase;
45
46#[derive(Debug)]
47pub struct Query {
48 query: String,
49 lowercased: String,
50 only_types: bool,
51 libs: bool,
52 exact: bool,
53 limit: usize,
54}
55
56impl Query {
57 pub fn new(query: String) -> Query {
58 let lowercased = query.to_lowercase();
59 Query {
60 query,
61 lowercased,
62 only_types: false,
63 libs: false,
64 exact: false,
65 limit: usize::max_value(),
66 }
67 }
68
69 pub fn only_types(&mut self) {
70 self.only_types = true;
71 }
72
73 pub fn libs(&mut self) {
74 self.libs = true;
75 }
76
77 pub fn exact(&mut self) {
78 self.exact = true;
79 }
80
81 pub fn limit(&mut self, limit: usize) {
82 self.limit = limit
83 }
84}
85
86#[salsa::query_group(SymbolsDatabaseStorage)]
87pub trait SymbolsDatabase: hir::db::HirDatabase {
88 fn file_symbols(&self, file_id: FileId) -> Arc<SymbolIndex>;
89 #[salsa::input]
90 fn library_symbols(&self, id: SourceRootId) -> Arc<SymbolIndex>;
91 /// The set of "local" (that is, from the current workspace) roots.
92 /// Files in local roots are assumed to change frequently.
93 #[salsa::input]
94 fn local_roots(&self) -> Arc<Vec<SourceRootId>>;
95 /// The set of roots for crates.io libraries.
96 /// Files in libraries are assumed to never change.
97 #[salsa::input]
98 fn library_roots(&self) -> Arc<Vec<SourceRootId>>;
99}
100
101fn file_symbols(db: &impl SymbolsDatabase, file_id: FileId) -> Arc<SymbolIndex> {
102 db.check_canceled();
103 let parse = db.parse(file_id);
104
105 let symbols = source_file_to_file_symbols(&parse.tree(), file_id);
106
107 // FIXME: add macros here
108
109 Arc::new(SymbolIndex::new(symbols))
110}
111
112pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec<FileSymbol> {
113 /// Need to wrap Snapshot to provide `Clone` impl for `map_with`
114 struct Snap(salsa::Snapshot<RootDatabase>);
115 impl Clone for Snap {
116 fn clone(&self) -> Snap {
117 Snap(self.0.snapshot())
118 }
119 }
120
121 let buf: Vec<Arc<SymbolIndex>> = if query.libs {
122 let snap = Snap(db.snapshot());
123 #[cfg(not(feature = "wasm"))]
124 let buf = db
125 .library_roots()
126 .par_iter()
127 .map_with(snap, |db, &lib_id| db.0.library_symbols(lib_id))
128 .collect();
129
130 #[cfg(feature = "wasm")]
131 let buf = db.library_roots().iter().map(|&lib_id| snap.0.library_symbols(lib_id)).collect();
132
133 buf
134 } else {
135 let mut files = Vec::new();
136 for &root in db.local_roots().iter() {
137 let sr = db.source_root(root);
138 files.extend(sr.walk())
139 }
140
141 let snap = Snap(db.snapshot());
142 #[cfg(not(feature = "wasm"))]
143 let buf =
144 files.par_iter().map_with(snap, |db, &file_id| db.0.file_symbols(file_id)).collect();
145
146 #[cfg(feature = "wasm")]
147 let buf = files.iter().map(|&file_id| snap.0.file_symbols(file_id)).collect();
148
149 buf
150 };
151 query.search(&buf)
152}
153
154pub fn index_resolve(db: &RootDatabase, name_ref: &ast::NameRef) -> Vec<FileSymbol> {
155 let name = name_ref.text();
156 let mut query = Query::new(name.to_string());
157 query.exact();
158 query.limit(4);
159 world_symbols(db, query)
160}
161
162#[derive(Default)]
163pub struct SymbolIndex {
164 symbols: Vec<FileSymbol>,
165 map: fst::Map,
166}
167
168impl fmt::Debug for SymbolIndex {
169 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
170 f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish()
171 }
172}
173
174impl PartialEq for SymbolIndex {
175 fn eq(&self, other: &SymbolIndex) -> bool {
176 self.symbols == other.symbols
177 }
178}
179
180impl Eq for SymbolIndex {}
181
182impl Hash for SymbolIndex {
183 fn hash<H: Hasher>(&self, hasher: &mut H) {
184 self.symbols.hash(hasher)
185 }
186}
187
188impl SymbolIndex {
189 fn new(mut symbols: Vec<FileSymbol>) -> SymbolIndex {
190 fn cmp_key<'a>(s1: &'a FileSymbol) -> impl Ord + 'a {
191 unicase::Ascii::new(s1.name.as_str())
192 }
193 #[cfg(not(feature = "wasm"))]
194 symbols.par_sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2)));
195
196 #[cfg(feature = "wasm")]
197 symbols.sort_by(|s1, s2| cmp_key(s1).cmp(&cmp_key(s2)));
198
199 let mut builder = fst::MapBuilder::memory();
200
201 let mut last_batch_start = 0;
202
203 for idx in 0..symbols.len() {
204 if symbols.get(last_batch_start).map(cmp_key) == symbols.get(idx + 1).map(cmp_key) {
205 continue;
206 }
207
208 let start = last_batch_start;
209 let end = idx + 1;
210 last_batch_start = end;
211
212 let key = symbols[start].name.as_str().to_lowercase();
213 let value = SymbolIndex::range_to_map_value(start, end);
214
215 builder.insert(key, value).unwrap();
216 }
217
218 let map = fst::Map::from_bytes(builder.into_inner().unwrap()).unwrap();
219 SymbolIndex { symbols, map }
220 }
221
222 pub fn len(&self) -> usize {
223 self.symbols.len()
224 }
225
226 pub fn memory_size(&self) -> usize {
227 self.map.as_fst().size() + self.symbols.len() * mem::size_of::<FileSymbol>()
228 }
229
230 #[cfg(not(feature = "wasm"))]
231 pub(crate) fn for_files(
232 files: impl ParallelIterator<Item = (FileId, Parse<ast::SourceFile>)>,
233 ) -> SymbolIndex {
234 let symbols = files
235 .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id))
236 .collect::<Vec<_>>();
237 SymbolIndex::new(symbols)
238 }
239
240 #[cfg(feature = "wasm")]
241 pub(crate) fn for_files(
242 files: impl Iterator<Item = (FileId, Parse<ast::SourceFile>)>,
243 ) -> SymbolIndex {
244 let symbols = files
245 .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id))
246 .collect::<Vec<_>>();
247 SymbolIndex::new(symbols)
248 }
249
250 fn range_to_map_value(start: usize, end: usize) -> u64 {
251 debug_assert![start <= (std::u32::MAX as usize)];
252 debug_assert![end <= (std::u32::MAX as usize)];
253
254 ((start as u64) << 32) | end as u64
255 }
256
257 fn map_value_to_range(value: u64) -> (usize, usize) {
258 let end = value as u32 as usize;
259 let start = (value >> 32) as usize;
260 (start, end)
261 }
262}
263
264impl Query {
265 pub(crate) fn search(self, indices: &[Arc<SymbolIndex>]) -> Vec<FileSymbol> {
266 let mut op = fst::map::OpBuilder::new();
267 for file_symbols in indices.iter() {
268 let automaton = fst::automaton::Subsequence::new(&self.lowercased);
269 op = op.add(file_symbols.map.search(automaton))
270 }
271 let mut stream = op.union();
272 let mut res = Vec::new();
273 while let Some((_, indexed_values)) = stream.next() {
274 if res.len() >= self.limit {
275 break;
276 }
277 for indexed_value in indexed_values {
278 let symbol_index = &indices[indexed_value.index];
279 let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value);
280
281 for symbol in &symbol_index.symbols[start..end] {
282 if self.only_types && !is_type(symbol.ptr.kind()) {
283 continue;
284 }
285 if self.exact && symbol.name != self.query {
286 continue;
287 }
288 res.push(symbol.clone());
289 }
290 }
291 }
292 res
293 }
294}
295
296fn is_type(kind: SyntaxKind) -> bool {
297 match kind {
298 STRUCT_DEF | ENUM_DEF | TRAIT_DEF | TYPE_ALIAS_DEF => true,
299 _ => false,
300 }
301}
302
303/// The actual data that is stored in the index. It should be as compact as
304/// possible.
305#[derive(Debug, Clone, PartialEq, Eq, Hash)]
306pub struct FileSymbol {
307 pub file_id: FileId,
308 pub name: SmolStr,
309 pub ptr: SyntaxNodePtr,
310 pub name_range: Option<TextRange>,
311 pub container_name: Option<SmolStr>,
312}
313
314fn source_file_to_file_symbols(source_file: &SourceFile, file_id: FileId) -> Vec<FileSymbol> {
315 let mut symbols = Vec::new();
316 let mut stack = Vec::new();
317
318 for event in source_file.syntax().preorder() {
319 match event {
320 WalkEvent::Enter(node) => {
321 if let Some(mut symbol) = to_file_symbol(&node, file_id) {
322 symbol.container_name = stack.last().cloned();
323
324 stack.push(symbol.name.clone());
325 symbols.push(symbol);
326 }
327 }
328
329 WalkEvent::Leave(node) => {
330 if to_symbol(&node).is_some() {
331 stack.pop();
332 }
333 }
334 }
335 }
336
337 symbols
338}
339
340fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> {
341 fn decl<N: NameOwner>(node: N) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> {
342 let name = node.name()?;
343 let name_range = name.syntax().text_range();
344 let name = name.text().clone();
345 let ptr = SyntaxNodePtr::new(node.syntax());
346
347 Some((name, ptr, name_range))
348 }
349 match_ast! {
350 match node {
351 ast::FnDef(it) => { decl(it) },
352 ast::StructDef(it) => { decl(it) },
353 ast::EnumDef(it) => { decl(it) },
354 ast::TraitDef(it) => { decl(it) },
355 ast::Module(it) => { decl(it) },
356 ast::TypeAliasDef(it) => { decl(it) },
357 ast::ConstDef(it) => { decl(it) },
358 ast::StaticDef(it) => { decl(it) },
359 _ => None,
360 }
361 }
362}
363
364fn to_file_symbol(node: &SyntaxNode, file_id: FileId) -> Option<FileSymbol> {
365 to_symbol(node).map(move |(name, ptr, name_range)| FileSymbol {
366 name,
367 ptr,
368 file_id,
369 name_range: Some(name_range),
370 container_name: None,
371 })
372}