From 133d001d8296e51bcb4d0dc0982671f55c2c77d9 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 13 Aug 2018 15:10:20 +0300 Subject: world symbols --- crates/libanalysis/Cargo.toml | 1 + crates/libanalysis/idx.rs | 84 ++++++++++++++++++++++++++++++++++ crates/libanalysis/src/lib.rs | 47 ++++++++++++++++--- crates/libanalysis/src/symbol_index.rs | 74 ++++++++++++++++++++++++++++++ 4 files changed, 199 insertions(+), 7 deletions(-) create mode 100644 crates/libanalysis/idx.rs create mode 100644 crates/libanalysis/src/symbol_index.rs (limited to 'crates') diff --git a/crates/libanalysis/Cargo.toml b/crates/libanalysis/Cargo.toml index c773f4211..88b1099f2 100644 --- a/crates/libanalysis/Cargo.toml +++ b/crates/libanalysis/Cargo.toml @@ -8,5 +8,6 @@ log = "0.4.2" failure = "0.1.2" parking_lot = "0.6.3" once_cell = "0.1.4" +fst = { git = "https://github.com/matklad/fst", branch = "subsequence"} libsyntax2 = { path = "../libsyntax2" } libeditor = { path = "../libeditor" } diff --git a/crates/libanalysis/idx.rs b/crates/libanalysis/idx.rs new file mode 100644 index 000000000..69a635aef --- /dev/null +++ b/crates/libanalysis/idx.rs @@ -0,0 +1,84 @@ +use std::path::PathBuf; + +use fst; +use fst::IntoStreamer; +use file; + +use fall_tree::{TextRange, NodeType}; +use indxr::{FileIndex, IndexableFileSet}; + +use editor::line_index::{LineCol, LineIndex}; +use editor::fst_subseq::FstSubSeq; +use editor::file_symbols::process_symbols; + +use syntax::{STRUCT_DEF, ENUM_DEF, TRAIT_DEF, TYPE_DEF}; + + +pub struct SymbolIndex { + index: FileIndex, +} + +impl SymbolIndex { + pub fn new(roots: Vec) -> SymbolIndex { + let file_set = IndexableFileSet::new(roots, "rs"); + let index = FileIndex::new(file_set, Box::new(|path| { + let text = file::get_text(path).ok()?; + Some(FileSymbols::new(text)) + })); + SymbolIndex { index } + } + + pub fn query(&self, query: &str) -> Vec<(PathBuf, Symbol)> { + let mut query = Query::new(query); + let mut result = Vec::new(); + self.process_query(&query, &mut result); + if result.is_empty() && !query.all_symbols { + query.all_symbols = true; + self.process_query(&query, &mut result); + } + result + } + + fn process_query(&self, query: &Query, acc: &mut Vec<(PathBuf, Symbol)>) { + self.index.process_files(&mut |file| { + query.process(&file.value, &mut |symbol| { + acc.push((file.path.clone(), symbol)) + }); + acc.len() > 512 + }); + } +} + +struct Query { + query: String, + all_symbols: bool, +} + +impl Query { + fn new(query: &str) -> Query { + let all_symbols = query.contains("#"); + let query: String = query.chars() + .filter(|&c| c != '#') + .flat_map(char::to_lowercase) + .collect(); + Query { query, all_symbols } + } + + fn process(&self, file: &FileSymbols, acc: &mut FnMut(Symbol)) { + fn is_type(ty: NodeType) -> bool { + match ty { + STRUCT_DEF | ENUM_DEF | TRAIT_DEF| TYPE_DEF => true, + _ => false, + } + } + + let a = FstSubSeq::new(&self.query); + for idx in file.map.search(a).into_stream().into_values() { + let idx = idx as usize; + let symbol = file.symbols[idx].clone(); + if self.all_symbols || is_type(symbol.ty) { + acc(symbol) + } + } + } +} diff --git a/crates/libanalysis/src/lib.rs b/crates/libanalysis/src/lib.rs index e4df3de2e..f0d0cf0a4 100644 --- a/crates/libanalysis/src/lib.rs +++ b/crates/libanalysis/src/lib.rs @@ -6,6 +6,9 @@ extern crate log; extern crate once_cell; extern crate libsyntax2; extern crate libeditor; +extern crate fst; + +mod symbol_index; use once_cell::sync::OnceCell; @@ -14,8 +17,11 @@ use std::{ collections::hash_map::HashMap, path::{PathBuf, Path}, }; + use libsyntax2::ast; -use libeditor::LineIndex; +use libeditor::{LineIndex, FileSymbol}; + +use self::symbol_index::{FileSymbols, Query}; pub type Result = ::std::result::Result; @@ -70,12 +76,7 @@ impl WorldState { impl World { pub fn file_syntax(&self, path: &Path) -> Result { let data = self.file_data(path)?; - let syntax = data.syntax - .get_or_init(|| { - trace!("parsing: {}", path.display()); - ast::File::parse(&data.text) - }).clone(); - Ok(syntax) + Ok(data.syntax(path).clone()) } pub fn file_line_index(&self, path: &Path) -> Result { @@ -88,6 +89,16 @@ impl World { Ok(index.clone()) } + pub fn world_symbols(&self, query: &str, f: &mut FnMut(&Path, &FileSymbol) -> Search) { + let q = Query::new(query); + for (path, data) in self.data.file_map.iter() { + let symbols = data.symbols(path.as_path()); + if q.process(symbols, &mut |symbol| f(path, symbol)) == Search::Break { + break; + } + } + } + fn file_data(&self, path: &Path) -> Result> { match self.data.file_map.get(path) { Some(data) => Ok(data.clone()), @@ -96,6 +107,12 @@ impl World { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Search { + Continue, + Break, +} + #[derive(Default, Debug)] struct WorldData { @@ -105,6 +122,7 @@ struct WorldData { #[derive(Debug)] struct FileData { text: String, + symbols: OnceCell, syntax: OnceCell, lines: OnceCell, } @@ -113,8 +131,23 @@ impl FileData { fn new(text: String) -> FileData { FileData { text, + symbols: OnceCell::new(), syntax: OnceCell::new(), lines: OnceCell::new(), } } + + fn syntax(&self, path: &Path) -> &ast::File { + self.syntax + .get_or_init(|| { + trace!("parsing: {}", path.display()); + ast::File::parse(&self.text) + }) + } + + fn symbols(&self, path: &Path) -> &FileSymbols { + let syntax = self.syntax(path); + self.symbols + .get_or_init(|| FileSymbols::new(syntax)) + } } diff --git a/crates/libanalysis/src/symbol_index.rs b/crates/libanalysis/src/symbol_index.rs new file mode 100644 index 000000000..1878fae99 --- /dev/null +++ b/crates/libanalysis/src/symbol_index.rs @@ -0,0 +1,74 @@ +use libeditor::{FileSymbol, file_symbols}; +use libsyntax2::{ + ast, + SyntaxKind::{self, *}, +}; +use fst::{self, IntoStreamer}; + +use Search; + +#[derive(Debug)] +pub(crate) struct FileSymbols { + symbols: Vec, + map: fst::Map, +} + +impl FileSymbols { + pub(crate) fn new(file: &ast::File) -> FileSymbols { + let mut symbols = file_symbols(file) + .into_iter() + .map(|s| (s.name.as_str().to_lowercase(), s)) + .collect::>(); + + symbols.sort_by(|s1, s2| s1.0.cmp(&s2.0)); + symbols.dedup_by(|s1, s2| s1.0 == s2.0); + let (names, symbols): (Vec, Vec) = + symbols.into_iter().unzip(); + + let map = fst::Map::from_iter( + names.into_iter().zip(0u64..) + ).unwrap(); + FileSymbols { symbols, map } + } +} + +pub(crate) struct Query { + query: String, + all_symbols: bool, +} + +impl Query { + pub(crate) fn new(query: &str) -> Query { + let all_symbols = query.contains("#"); + let query: String = query.chars() + .filter(|&c| c != '#') + .flat_map(char::to_lowercase) + .collect(); + Query { query, all_symbols } + } + + pub(crate) fn process( + &self, + file: &FileSymbols, + acc: &mut FnMut(&FileSymbol) -> Search, + ) -> Search { + fn is_type(kind: SyntaxKind) -> bool { + match kind { + STRUCT | ENUM | TRAIT | TYPE_ITEM => true, + _ => false, + } + } + let automaton = fst::automaton::Subsequence::new(&self.query); + for idx in file.map.search(automaton).into_stream().into_values() { + let idx = idx as usize; + let symbol = &file.symbols[idx]; + if self.all_symbols || is_type(symbol.kind) { + if acc(&symbol) == Search::Break { + return Search::Break; + } + } + } + Search::Continue + } +} + -- cgit v1.2.3