From bb5c189b7dae1ea63ccd5d7a0c2e097d7c676f77 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 13 Aug 2020 16:39:16 +0200 Subject: Rename ra_ide_db -> ide_db --- crates/ide_db/src/symbol_index.rs | 429 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 429 insertions(+) create mode 100644 crates/ide_db/src/symbol_index.rs (limited to 'crates/ide_db/src/symbol_index.rs') diff --git a/crates/ide_db/src/symbol_index.rs b/crates/ide_db/src/symbol_index.rs new file mode 100644 index 000000000..654df898e --- /dev/null +++ b/crates/ide_db/src/symbol_index.rs @@ -0,0 +1,429 @@ +//! This module handles fuzzy-searching of functions, structs and other symbols +//! by name across the whole workspace and dependencies. +//! +//! It works by building an incrementally-updated text-search index of all +//! symbols. The backbone of the index is the **awesome** `fst` crate by +//! @BurntSushi. +//! +//! In a nutshell, you give a set of strings to `fst`, and it builds a +//! finite state machine describing this set of strings. The strings which +//! could fuzzy-match a pattern can also be described by a finite state machine. +//! What is freaking cool is that you can now traverse both state machines in +//! lock-step to enumerate the strings which are both in the input set and +//! fuzz-match the query. Or, more formally, given two languages described by +//! FSTs, one can build a product FST which describes the intersection of the +//! languages. +//! +//! `fst` does not support cheap updating of the index, but it supports unioning +//! of state machines. So, to account for changing source code, we build an FST +//! for each library (which is assumed to never change) and an FST for each Rust +//! file in the current workspace, and run a query against the union of all +//! those FSTs. + +use std::{ + cmp::Ordering, + fmt, + hash::{Hash, Hasher}, + mem, + sync::Arc, +}; + +use base_db::{ + salsa::{self, ParallelDatabase}, + CrateId, FileId, SourceDatabaseExt, SourceRootId, +}; +use fst::{self, Streamer}; +use hir::db::DefDatabase; +use rayon::prelude::*; +use rustc_hash::{FxHashMap, FxHashSet}; +use syntax::{ + ast::{self, NameOwner}, + match_ast, AstNode, Parse, SmolStr, SourceFile, + SyntaxKind::{self, *}, + SyntaxNode, SyntaxNodePtr, TextRange, WalkEvent, +}; + +use crate::RootDatabase; + +#[derive(Debug)] +pub struct Query { + query: String, + lowercased: String, + only_types: bool, + libs: bool, + exact: bool, + limit: usize, +} + +impl Query { + pub fn new(query: String) -> Query { + let lowercased = query.to_lowercase(); + Query { + query, + lowercased, + only_types: false, + libs: false, + exact: false, + limit: usize::max_value(), + } + } + + pub fn only_types(&mut self) { + self.only_types = true; + } + + pub fn libs(&mut self) { + self.libs = true; + } + + pub fn exact(&mut self) { + self.exact = true; + } + + pub fn limit(&mut self, limit: usize) { + self.limit = limit + } +} + +#[salsa::query_group(SymbolsDatabaseStorage)] +pub trait SymbolsDatabase: hir::db::HirDatabase + SourceDatabaseExt { + fn file_symbols(&self, file_id: FileId) -> Arc; + fn library_symbols(&self) -> Arc>; + /// The set of "local" (that is, from the current workspace) roots. + /// Files in local roots are assumed to change frequently. + #[salsa::input] + fn local_roots(&self) -> Arc>; + /// The set of roots for crates.io libraries. + /// Files in libraries are assumed to never change. + #[salsa::input] + fn library_roots(&self) -> Arc>; +} + +fn library_symbols(db: &dyn SymbolsDatabase) -> Arc> { + let _p = profile::span("library_symbols"); + + let roots = db.library_roots(); + let res = roots + .iter() + .map(|&root_id| { + let root = db.source_root(root_id); + let files = root + .iter() + .map(|it| (it, SourceDatabaseExt::file_text(db, it))) + .collect::>(); + let symbol_index = SymbolIndex::for_files( + files.into_par_iter().map(|(file, text)| (file, SourceFile::parse(&text))), + ); + (root_id, symbol_index) + }) + .collect(); + Arc::new(res) +} + +fn file_symbols(db: &dyn SymbolsDatabase, file_id: FileId) -> Arc { + db.check_canceled(); + let parse = db.parse(file_id); + + let symbols = source_file_to_file_symbols(&parse.tree(), file_id); + + // FIXME: add macros here + + Arc::new(SymbolIndex::new(symbols)) +} + +/// Need to wrap Snapshot to provide `Clone` impl for `map_with` +struct Snap(DB); +impl Clone for Snap> { + fn clone(&self) -> Snap> { + Snap(self.0.snapshot()) + } +} + +// Feature: Workspace Symbol +// +// Uses fuzzy-search to find types, modules and functions by name across your +// project and dependencies. This is **the** most useful feature, which improves code +// navigation tremendously. It mostly works on top of the built-in LSP +// functionality, however `#` and `*` symbols can be used to narrow down the +// search. Specifically, +// +// - `Foo` searches for `Foo` type in the current workspace +// - `foo#` searches for `foo` function in the current workspace +// - `Foo*` searches for `Foo` type among dependencies, including `stdlib` +// - `foo#*` searches for `foo` function among dependencies +// +// That is, `#` switches from "types" to all symbols, `*` switches from the current +// workspace to dependencies. +// +// |=== +// | Editor | Shortcut +// +// | VS Code | kbd:[Ctrl+T] +// |=== +pub fn world_symbols(db: &RootDatabase, query: Query) -> Vec { + let _p = profile::span("world_symbols").detail(|| query.query.clone()); + + let tmp1; + let tmp2; + let buf: Vec<&SymbolIndex> = if query.libs { + tmp1 = db.library_symbols(); + tmp1.values().collect() + } else { + let mut files = Vec::new(); + for &root in db.local_roots().iter() { + let sr = db.source_root(root); + files.extend(sr.iter()) + } + + let snap = Snap(db.snapshot()); + tmp2 = files + .par_iter() + .map_with(snap, |db, &file_id| db.0.file_symbols(file_id)) + .collect::>(); + tmp2.iter().map(|it| &**it).collect() + }; + query.search(&buf) +} + +pub fn crate_symbols(db: &RootDatabase, krate: CrateId, query: Query) -> Vec { + // FIXME(#4842): This now depends on CrateDefMap, why not build the entire symbol index from + // that instead? + + let def_map = db.crate_def_map(krate); + let mut files = Vec::new(); + let mut modules = vec![def_map.root]; + while let Some(module) = modules.pop() { + let data = &def_map[module]; + files.extend(data.origin.file_id()); + modules.extend(data.children.values()); + } + + let snap = Snap(db.snapshot()); + + let buf = files + .par_iter() + .map_with(snap, |db, &file_id| db.0.file_symbols(file_id)) + .collect::>(); + let buf = buf.iter().map(|it| &**it).collect::>(); + + query.search(&buf) +} + +pub fn index_resolve(db: &RootDatabase, name_ref: &ast::NameRef) -> Vec { + let name = name_ref.text(); + let mut query = Query::new(name.to_string()); + query.exact(); + query.limit(4); + world_symbols(db, query) +} + +#[derive(Default)] +pub struct SymbolIndex { + symbols: Vec, + map: fst::Map>, +} + +impl fmt::Debug for SymbolIndex { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("SymbolIndex").field("n_symbols", &self.symbols.len()).finish() + } +} + +impl PartialEq for SymbolIndex { + fn eq(&self, other: &SymbolIndex) -> bool { + self.symbols == other.symbols + } +} + +impl Eq for SymbolIndex {} + +impl Hash for SymbolIndex { + fn hash(&self, hasher: &mut H) { + self.symbols.hash(hasher) + } +} + +impl SymbolIndex { + fn new(mut symbols: Vec) -> SymbolIndex { + fn cmp(lhs: &FileSymbol, rhs: &FileSymbol) -> Ordering { + let lhs_chars = lhs.name.chars().map(|c| c.to_ascii_lowercase()); + let rhs_chars = rhs.name.chars().map(|c| c.to_ascii_lowercase()); + lhs_chars.cmp(rhs_chars) + } + + symbols.par_sort_by(cmp); + + let mut builder = fst::MapBuilder::memory(); + + let mut last_batch_start = 0; + + for idx in 0..symbols.len() { + if let Some(next_symbol) = symbols.get(idx + 1) { + if cmp(&symbols[last_batch_start], next_symbol) == Ordering::Equal { + continue; + } + } + + let start = last_batch_start; + let end = idx + 1; + last_batch_start = end; + + let key = symbols[start].name.as_str().to_ascii_lowercase(); + let value = SymbolIndex::range_to_map_value(start, end); + + builder.insert(key, value).unwrap(); + } + + let map = fst::Map::new(builder.into_inner().unwrap()).unwrap(); + SymbolIndex { symbols, map } + } + + pub fn len(&self) -> usize { + self.symbols.len() + } + + pub fn memory_size(&self) -> usize { + self.map.as_fst().size() + self.symbols.len() * mem::size_of::() + } + + pub(crate) fn for_files( + files: impl ParallelIterator)>, + ) -> SymbolIndex { + let symbols = files + .flat_map(|(file_id, file)| source_file_to_file_symbols(&file.tree(), file_id)) + .collect::>(); + SymbolIndex::new(symbols) + } + + fn range_to_map_value(start: usize, end: usize) -> u64 { + debug_assert![start <= (std::u32::MAX as usize)]; + debug_assert![end <= (std::u32::MAX as usize)]; + + ((start as u64) << 32) | end as u64 + } + + fn map_value_to_range(value: u64) -> (usize, usize) { + let end = value as u32 as usize; + let start = (value >> 32) as usize; + (start, end) + } +} + +impl Query { + pub(crate) fn search(self, indices: &[&SymbolIndex]) -> Vec { + let mut op = fst::map::OpBuilder::new(); + for file_symbols in indices.iter() { + let automaton = fst::automaton::Subsequence::new(&self.lowercased); + op = op.add(file_symbols.map.search(automaton)) + } + let mut stream = op.union(); + let mut res = Vec::new(); + while let Some((_, indexed_values)) = stream.next() { + for indexed_value in indexed_values { + let symbol_index = &indices[indexed_value.index]; + let (start, end) = SymbolIndex::map_value_to_range(indexed_value.value); + + for symbol in &symbol_index.symbols[start..end] { + if self.only_types && !is_type(symbol.kind) { + continue; + } + if self.exact && symbol.name != self.query { + continue; + } + + res.push(symbol.clone()); + if res.len() >= self.limit { + return res; + } + } + } + } + res + } +} + +fn is_type(kind: SyntaxKind) -> bool { + matches!(kind, STRUCT | ENUM | TRAIT | TYPE_ALIAS) +} + +/// The actual data that is stored in the index. It should be as compact as +/// possible. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FileSymbol { + pub file_id: FileId, + pub name: SmolStr, + pub kind: SyntaxKind, + pub range: TextRange, + pub ptr: SyntaxNodePtr, + pub name_range: Option, + pub container_name: Option, +} + +fn source_file_to_file_symbols(source_file: &SourceFile, file_id: FileId) -> Vec { + let mut symbols = Vec::new(); + let mut stack = Vec::new(); + + for event in source_file.syntax().preorder() { + match event { + WalkEvent::Enter(node) => { + if let Some(mut symbol) = to_file_symbol(&node, file_id) { + symbol.container_name = stack.last().cloned(); + + stack.push(symbol.name.clone()); + symbols.push(symbol); + } + } + + WalkEvent::Leave(node) => { + if to_symbol(&node).is_some() { + stack.pop(); + } + } + } + } + + symbols +} + +fn to_symbol(node: &SyntaxNode) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { + fn decl(node: N) -> Option<(SmolStr, SyntaxNodePtr, TextRange)> { + let name = node.name()?; + let name_range = name.syntax().text_range(); + let name = name.text().clone(); + let ptr = SyntaxNodePtr::new(node.syntax()); + + Some((name, ptr, name_range)) + } + match_ast! { + match node { + ast::Fn(it) => decl(it), + ast::Struct(it) => decl(it), + ast::Enum(it) => decl(it), + ast::Trait(it) => decl(it), + ast::Module(it) => decl(it), + ast::TypeAlias(it) => decl(it), + ast::Const(it) => decl(it), + ast::Static(it) => decl(it), + ast::MacroCall(it) => { + if it.is_macro_rules().is_some() { + decl(it) + } else { + None + } + }, + _ => None, + } + } +} + +fn to_file_symbol(node: &SyntaxNode, file_id: FileId) -> Option { + to_symbol(node).map(move |(name, ptr, name_range)| FileSymbol { + name, + kind: node.kind(), + range: node.text_range(), + ptr, + file_id, + name_range: Some(name_range), + container_name: None, + }) +} -- cgit v1.2.3