From 4bcf8c8c68bd791f295aa06ef7903c006be3f356 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Tue, 9 Jun 2020 17:32:42 +0200 Subject: Add an FST index to `ImportMap` --- crates/ra_hir_def/src/import_map.rs | 253 +++++++++++++++++++++++++++++++++++- 1 file changed, 250 insertions(+), 3 deletions(-) (limited to 'crates/ra_hir_def/src') diff --git a/crates/ra_hir_def/src/import_map.rs b/crates/ra_hir_def/src/import_map.rs index 4284a0a91..e9b2fe26e 100644 --- a/crates/ra_hir_def/src/import_map.rs +++ b/crates/ra_hir_def/src/import_map.rs @@ -1,7 +1,10 @@ //! A map of all publicly exported items in a crate. +use std::cmp::Ordering; use std::{collections::hash_map::Entry, fmt, sync::Arc}; +use fst::{self, Streamer}; +use itertools::Itertools; use ra_db::CrateId; use rustc_hash::FxHashMap; @@ -21,9 +24,17 @@ use crate::{ /// /// Note that all paths are relative to the containing crate's root, so the crate name still needs /// to be prepended to the `ModPath` before the path is valid. -#[derive(Eq, PartialEq)] pub struct ImportMap { map: FxHashMap, + + /// List of keys stored in `map`, sorted lexicographically by their `ModPath`. Indexed by the + /// values returned by running `fst`. + /// + /// Since a path can refer to multiple items due to namespacing, we store all items with the + /// same path right after each other. This allows us to find all items after the FST gives us + /// the index of the first one. + importables: Vec, + fst: fst::Map>, } impl ImportMap { @@ -88,7 +99,34 @@ impl ImportMap { } } - Arc::new(Self { map: import_map }) + let mut importables = import_map.iter().collect::>(); + + importables.sort_by(cmp); + + // Build the FST, taking care not to insert duplicate values. + + let mut builder = fst::MapBuilder::memory(); + let mut last_batch_start = 0; + + for idx in 0..importables.len() { + if let Some(next_item) = importables.get(idx + 1) { + if cmp(&importables[last_batch_start], next_item) == Ordering::Equal { + continue; + } + } + + let start = last_batch_start; + last_batch_start = idx + 1; + + let key: String = fst_path(&importables[start].1).collect(); + + builder.insert(key, start as u64).unwrap(); + } + + let fst = fst::Map::new(builder.into_inner().unwrap()).unwrap(); + let importables = importables.iter().map(|(item, _)| **item).collect(); + + Arc::new(Self { map: import_map, fst, importables }) } /// Returns the `ModPath` needed to import/mention `item`, relative to this crate's root. @@ -97,6 +135,14 @@ impl ImportMap { } } +impl PartialEq for ImportMap { + fn eq(&self, other: &Self) -> bool { + self.importables == other.importables + } +} + +impl Eq for ImportMap {} + impl fmt::Debug for ImportMap { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let mut importable_paths: Vec<_> = self @@ -117,13 +163,97 @@ impl fmt::Debug for ImportMap { } } +fn fst_path(path: &ModPath) -> impl Iterator + '_ { + path.segments + .iter() + .map(|name| name.as_text().unwrap()) + .intersperse("::") + .flat_map(|s| s.chars().map(|c| c.to_ascii_lowercase())) +} + +fn cmp((_, lhs): &(&ItemInNs, &ModPath), (_, rhs): &(&ItemInNs, &ModPath)) -> Ordering { + let lhs_chars = fst_path(lhs); + let rhs_chars = fst_path(rhs); + lhs_chars.cmp(rhs_chars) +} + +#[derive(Debug)] +pub struct Query { + query: String, + anchor_end: bool, +} + +impl Query { + pub fn new(query: impl AsRef) -> Self { + Self { query: query.as_ref().to_lowercase(), anchor_end: false } + } + + /// Only returns items whose paths end with the (case-insensitive) query string as their last + /// segment. + pub fn anchor_end(self) -> Self { + Self { anchor_end: true, ..self } + } +} + +/// Searches dependencies of `krate` for an importable path matching `query`. +/// +/// This returns all items that could be imported from within `krate`, excluding paths inside +/// `krate` itself. +pub fn search_dependencies<'a>( + db: &'a dyn DefDatabase, + krate: CrateId, + query: Query, +) -> Vec { + let _p = ra_prof::profile("import_map::global_search").detail(|| format!("{:?}", query)); + + let graph = db.crate_graph(); + let import_maps: Vec<_> = + graph[krate].dependencies.iter().map(|dep| db.import_map(dep.crate_id)).collect(); + + let automaton = fst::automaton::Subsequence::new(&query.query); + + let mut op = fst::map::OpBuilder::new(); + for map in &import_maps { + op = op.add(map.fst.search(&automaton)); + } + + let mut stream = op.union(); + let mut res = Vec::new(); + while let Some((_, indexed_values)) = stream.next() { + for indexed_value in indexed_values { + let import_map = &import_maps[indexed_value.index]; + let importables = &import_map.importables[indexed_value.value as usize..]; + + // Path shared by the importable items in this group. + let path = &import_map.map[&importables[0]]; + + if query.anchor_end { + // Last segment must match query. + let last = path.segments.last().unwrap().to_string(); + if last.to_lowercase() != query.query { + continue; + } + } + + // Add the items from this `ModPath` group. Those are all subsequent items in + // `importables` whose paths match `path`. + res.extend(importables.iter().copied().take_while(|item| { + let item_path = &import_map.map[item]; + fst_path(item_path).eq(fst_path(path)) + })); + } + } + + res +} + #[cfg(test)] mod tests { use super::*; use crate::test_db::TestDB; use insta::assert_snapshot; use ra_db::fixture::WithFixture; - use ra_db::SourceDatabase; + use ra_db::{SourceDatabase, Upcast}; fn import_map(ra_fixture: &str) -> String { let db = TestDB::with_files(ra_fixture); @@ -144,6 +274,40 @@ mod tests { import_maps.join("\n") } + fn search_dependencies_of(ra_fixture: &str, krate_name: &str, query: Query) -> String { + let db = TestDB::with_files(ra_fixture); + let crate_graph = db.crate_graph(); + let krate = crate_graph + .iter() + .find(|krate| { + crate_graph[*krate].display_name.as_ref().map(|n| n.to_string()) + == Some(krate_name.to_string()) + }) + .unwrap(); + + search_dependencies(db.upcast(), krate, query) + .into_iter() + .filter_map(|item| { + let mark = match item { + ItemInNs::Types(_) => "t", + ItemInNs::Values(_) => "v", + ItemInNs::Macros(_) => "m", + }; + item.krate(db.upcast()).map(|krate| { + let map = db.import_map(krate); + let path = map.path_of(item).unwrap(); + format!( + "{}::{} ({})", + crate_graph[krate].display_name.as_ref().unwrap(), + path, + mark + ) + }) + }) + .collect::>() + .join("\n") + } + #[test] fn smoke() { let map = import_map( @@ -328,4 +492,87 @@ mod tests { lib: "###); } + + #[test] + fn namespacing() { + let map = import_map( + r" + //- /lib.rs crate:lib + pub struct Thing; // t + v + #[macro_export] + macro_rules! Thing { // m + () => {}; + } + ", + ); + + assert_snapshot!(map, @r###" + lib: + - Thing (m) + - Thing (t) + - Thing (v) + "###); + + let map = import_map( + r" + //- /lib.rs crate:lib + pub mod Thing {} // t + #[macro_export] + macro_rules! Thing { // m + () => {}; + } + ", + ); + + assert_snapshot!(map, @r###" + lib: + - Thing (m) + - Thing (t) + "###); + } + + #[test] + fn search() { + let ra_fixture = r#" + //- /main.rs crate:main deps:dep + //- /dep.rs crate:dep deps:tdep + use tdep::fmt as fmt_dep; + pub mod fmt { + pub trait Display { + fn fmt(); + } + } + #[macro_export] + macro_rules! Fmt { + () => {}; + } + pub struct Fmt; + + pub fn format() {} + pub fn no() {} + + //- /tdep.rs crate:tdep + pub mod fmt { + pub struct NotImportableFromMain; + } + "#; + + let res = search_dependencies_of(ra_fixture, "main", Query::new("fmt")); + assert_snapshot!(res, @r###" + dep::Fmt (v) + dep::fmt (t) + dep::Fmt (t) + dep::Fmt (m) + dep::fmt::Display (t) + dep::format (v) + "###); + + let res = search_dependencies_of(ra_fixture, "main", Query::new("fmt").anchor_end()); + assert_snapshot!(res, @r###" + dep::Fmt (v) + dep::fmt (t) + dep::Fmt (t) + dep::Fmt (m) + "###); + } } -- cgit v1.2.3 From bcf875f46ae5142c42ddac8094e1b6652182d4be Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Wed, 10 Jun 2020 11:52:00 +0200 Subject: Clean up import_map.rs --- crates/ra_hir_def/src/import_map.rs | 45 +++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 25 deletions(-) (limited to 'crates/ra_hir_def/src') diff --git a/crates/ra_hir_def/src/import_map.rs b/crates/ra_hir_def/src/import_map.rs index e9b2fe26e..f2e4ca2db 100644 --- a/crates/ra_hir_def/src/import_map.rs +++ b/crates/ra_hir_def/src/import_map.rs @@ -1,10 +1,8 @@ //! A map of all publicly exported items in a crate. -use std::cmp::Ordering; -use std::{collections::hash_map::Entry, fmt, sync::Arc}; +use std::{cmp::Ordering, collections::hash_map::Entry, fmt, sync::Arc}; use fst::{self, Streamer}; -use itertools::Itertools; use ra_db::CrateId; use rustc_hash::FxHashMap; @@ -118,7 +116,7 @@ impl ImportMap { let start = last_batch_start; last_batch_start = idx + 1; - let key: String = fst_path(&importables[start].1).collect(); + let key = fst_path(&importables[start].1); builder.insert(key, start as u64).unwrap(); } @@ -137,7 +135,8 @@ impl ImportMap { impl PartialEq for ImportMap { fn eq(&self, other: &Self) -> bool { - self.importables == other.importables + // `fst` and `importables` are built from `map`, so we don't need to compare them. + self.map == other.map } } @@ -163,18 +162,16 @@ impl fmt::Debug for ImportMap { } } -fn fst_path(path: &ModPath) -> impl Iterator + '_ { - path.segments - .iter() - .map(|name| name.as_text().unwrap()) - .intersperse("::") - .flat_map(|s| s.chars().map(|c| c.to_ascii_lowercase())) +fn fst_path(path: &ModPath) -> String { + let mut s = path.to_string(); + s.make_ascii_lowercase(); + s } fn cmp((_, lhs): &(&ItemInNs, &ModPath), (_, rhs): &(&ItemInNs, &ModPath)) -> Ordering { - let lhs_chars = fst_path(lhs); - let rhs_chars = fst_path(rhs); - lhs_chars.cmp(rhs_chars) + let lhs_str = fst_path(lhs); + let rhs_str = fst_path(rhs); + lhs_str.cmp(&rhs_str) } #[derive(Debug)] @@ -184,8 +181,8 @@ pub struct Query { } impl Query { - pub fn new(query: impl AsRef) -> Self { - Self { query: query.as_ref().to_lowercase(), anchor_end: false } + pub fn new(query: &str) -> Self { + Self { query: query.to_lowercase(), anchor_end: false } } /// Only returns items whose paths end with the (case-insensitive) query string as their last @@ -197,14 +194,13 @@ impl Query { /// Searches dependencies of `krate` for an importable path matching `query`. /// -/// This returns all items that could be imported from within `krate`, excluding paths inside -/// `krate` itself. +/// This returns a list of items that could be imported from dependencies of `krate`. pub fn search_dependencies<'a>( db: &'a dyn DefDatabase, krate: CrateId, query: Query, ) -> Vec { - let _p = ra_prof::profile("import_map::global_search").detail(|| format!("{:?}", query)); + let _p = ra_prof::profile("search_dependencies").detail(|| format!("{:?}", query)); let graph = db.crate_graph(); let import_maps: Vec<_> = @@ -239,7 +235,7 @@ pub fn search_dependencies<'a>( // `importables` whose paths match `path`. res.extend(importables.iter().copied().take_while(|item| { let item_path = &import_map.map[item]; - fst_path(item_path).eq(fst_path(path)) + fst_path(item_path) == fst_path(path) })); } } @@ -252,6 +248,7 @@ mod tests { use super::*; use crate::test_db::TestDB; use insta::assert_snapshot; + use itertools::Itertools; use ra_db::fixture::WithFixture; use ra_db::{SourceDatabase, Upcast}; @@ -259,7 +256,7 @@ mod tests { let db = TestDB::with_files(ra_fixture); let crate_graph = db.crate_graph(); - let import_maps: Vec<_> = crate_graph + let s = crate_graph .iter() .filter_map(|krate| { let cdata = &crate_graph[krate]; @@ -269,9 +266,8 @@ mod tests { Some(format!("{}:\n{:?}", name, map)) }) - .collect(); - - import_maps.join("\n") + .join("\n"); + s } fn search_dependencies_of(ra_fixture: &str, krate_name: &str, query: Query) -> String { @@ -304,7 +300,6 @@ mod tests { ) }) }) - .collect::>() .join("\n") } -- cgit v1.2.3 From 56c7145993f94a12bf923f08cbd62d963e62bbd1 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Wed, 10 Jun 2020 12:30:33 +0200 Subject: Limit import map queries --- crates/ra_hir_def/src/import_map.rs | 42 ++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) (limited to 'crates/ra_hir_def/src') diff --git a/crates/ra_hir_def/src/import_map.rs b/crates/ra_hir_def/src/import_map.rs index f2e4ca2db..70368d8df 100644 --- a/crates/ra_hir_def/src/import_map.rs +++ b/crates/ra_hir_def/src/import_map.rs @@ -178,11 +178,12 @@ fn cmp((_, lhs): &(&ItemInNs, &ModPath), (_, rhs): &(&ItemInNs, &ModPath)) -> Or pub struct Query { query: String, anchor_end: bool, + limit: usize, } impl Query { pub fn new(query: &str) -> Self { - Self { query: query.to_lowercase(), anchor_end: false } + Self { query: query.to_lowercase(), anchor_end: false, limit: usize::max_value() } } /// Only returns items whose paths end with the (case-insensitive) query string as their last @@ -190,6 +191,11 @@ impl Query { pub fn anchor_end(self) -> Self { Self { anchor_end: true, ..self } } + + /// Limits the returned number of items to `limit`. + pub fn limit(self, limit: usize) -> Self { + Self { limit, ..self } + } } /// Searches dependencies of `krate` for an importable path matching `query`. @@ -237,6 +243,11 @@ pub fn search_dependencies<'a>( let item_path = &import_map.map[item]; fst_path(item_path) == fst_path(path) })); + + if res.len() >= query.limit { + res.truncate(query.limit); + return res; + } } } @@ -570,4 +581,33 @@ mod tests { dep::Fmt (m) "###); } + + #[test] + fn search_limit() { + let res = search_dependencies_of( + r#" + //- /main.rs crate:main deps:dep + //- /dep.rs crate:dep + pub mod fmt { + pub trait Display { + fn fmt(); + } + } + #[macro_export] + macro_rules! Fmt { + () => {}; + } + pub struct Fmt; + + pub fn format() {} + pub fn no() {} + "#, + "main", + Query::new("").limit(2), + ); + assert_snapshot!(res, @r###" + dep::fmt (t) + dep::Fmt (t) + "###); + } } -- cgit v1.2.3 From 7e83ed99a887f959bd4cf97357faf373a09f9269 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Wed, 10 Jun 2020 16:04:55 +0200 Subject: Respect casing when searching for imports --- crates/ra_hir_def/src/import_map.rs | 60 +++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 5 deletions(-) (limited to 'crates/ra_hir_def/src') diff --git a/crates/ra_hir_def/src/import_map.rs b/crates/ra_hir_def/src/import_map.rs index 70368d8df..a55d7d83b 100644 --- a/crates/ra_hir_def/src/import_map.rs +++ b/crates/ra_hir_def/src/import_map.rs @@ -177,13 +177,21 @@ fn cmp((_, lhs): &(&ItemInNs, &ModPath), (_, rhs): &(&ItemInNs, &ModPath)) -> Or #[derive(Debug)] pub struct Query { query: String, + lowercased: String, anchor_end: bool, + case_sensitive: bool, limit: usize, } impl Query { pub fn new(query: &str) -> Self { - Self { query: query.to_lowercase(), anchor_end: false, limit: usize::max_value() } + Self { + lowercased: query.to_lowercase(), + query: query.to_string(), + anchor_end: false, + case_sensitive: false, + limit: usize::max_value(), + } } /// Only returns items whose paths end with the (case-insensitive) query string as their last @@ -196,6 +204,11 @@ impl Query { pub fn limit(self, limit: usize) -> Self { Self { limit, ..self } } + + /// Respect casing of the query string when matching. + pub fn case_sensitive(self) -> Self { + Self { case_sensitive: true, ..self } + } } /// Searches dependencies of `krate` for an importable path matching `query`. @@ -212,7 +225,7 @@ pub fn search_dependencies<'a>( let import_maps: Vec<_> = graph[krate].dependencies.iter().map(|dep| db.import_map(dep.crate_id)).collect(); - let automaton = fst::automaton::Subsequence::new(&query.query); + let automaton = fst::automaton::Subsequence::new(&query.lowercased); let mut op = fst::map::OpBuilder::new(); for map in &import_maps { @@ -232,17 +245,27 @@ pub fn search_dependencies<'a>( if query.anchor_end { // Last segment must match query. let last = path.segments.last().unwrap().to_string(); - if last.to_lowercase() != query.query { + if last.to_lowercase() != query.lowercased { continue; } } // Add the items from this `ModPath` group. Those are all subsequent items in // `importables` whose paths match `path`. - res.extend(importables.iter().copied().take_while(|item| { + let iter = importables.iter().copied().take_while(|item| { let item_path = &import_map.map[item]; fst_path(item_path) == fst_path(path) - })); + }); + + if query.case_sensitive { + // FIXME: This does not do a subsequence match. + res.extend(iter.filter(|item| { + let item_path = &import_map.map[item]; + item_path.to_string().contains(&query.query) + })); + } else { + res.extend(iter); + } if res.len() >= query.limit { res.truncate(query.limit); @@ -582,6 +605,33 @@ mod tests { "###); } + #[test] + fn search_casing() { + let ra_fixture = r#" + //- /main.rs crate:main deps:dep + //- /dep.rs crate:dep + + pub struct fmt; + pub struct FMT; + "#; + + let res = search_dependencies_of(ra_fixture, "main", Query::new("FMT")); + + assert_snapshot!(res, @r###" + dep::FMT (v) + dep::FMT (t) + dep::fmt (t) + dep::fmt (v) + "###); + + let res = search_dependencies_of(ra_fixture, "main", Query::new("FMT").case_sensitive()); + + assert_snapshot!(res, @r###" + dep::FMT (v) + dep::FMT (t) + "###); + } + #[test] fn search_limit() { let res = search_dependencies_of( -- cgit v1.2.3 From dd22657407bb0ab24d141275fd4f0d87269262c8 Mon Sep 17 00:00:00 2001 From: Jonas Schievink Date: Wed, 10 Jun 2020 16:15:49 +0200 Subject: ImportMap: use IndexMap internally It iterates in insertion order, which makes the ordering more predictable. --- crates/ra_hir_def/src/import_map.rs | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'crates/ra_hir_def/src') diff --git a/crates/ra_hir_def/src/import_map.rs b/crates/ra_hir_def/src/import_map.rs index a55d7d83b..36b4fdd81 100644 --- a/crates/ra_hir_def/src/import_map.rs +++ b/crates/ra_hir_def/src/import_map.rs @@ -1,10 +1,11 @@ //! A map of all publicly exported items in a crate. -use std::{cmp::Ordering, collections::hash_map::Entry, fmt, sync::Arc}; +use std::{cmp::Ordering, fmt, hash::BuildHasherDefault, sync::Arc}; use fst::{self, Streamer}; +use indexmap::{map::Entry, IndexMap}; use ra_db::CrateId; -use rustc_hash::FxHashMap; +use rustc_hash::FxHasher; use crate::{ db::DefDatabase, @@ -14,6 +15,8 @@ use crate::{ ModuleDefId, ModuleId, }; +type FxIndexMap = IndexMap>; + /// A map from publicly exported items to the path needed to import/name them from a downstream /// crate. /// @@ -23,7 +26,7 @@ use crate::{ /// Note that all paths are relative to the containing crate's root, so the crate name still needs /// to be prepended to the `ModPath` before the path is valid. pub struct ImportMap { - map: FxHashMap, + map: FxIndexMap, /// List of keys stored in `map`, sorted lexicographically by their `ModPath`. Indexed by the /// values returned by running `fst`. @@ -39,7 +42,7 @@ impl ImportMap { pub fn import_map_query(db: &dyn DefDatabase, krate: CrateId) -> Arc { let _p = ra_prof::profile("import_map_query"); let def_map = db.crate_def_map(krate); - let mut import_map = FxHashMap::with_capacity_and_hasher(64, Default::default()); + let mut import_map = FxIndexMap::with_capacity_and_hasher(64, Default::default()); // We look only into modules that are public(ly reexported), starting with the crate root. let empty = ModPath { kind: PathKind::Plain, segments: vec![] }; @@ -588,9 +591,9 @@ mod tests { let res = search_dependencies_of(ra_fixture, "main", Query::new("fmt")); assert_snapshot!(res, @r###" - dep::Fmt (v) dep::fmt (t) dep::Fmt (t) + dep::Fmt (v) dep::Fmt (m) dep::fmt::Display (t) dep::format (v) @@ -598,9 +601,9 @@ mod tests { let res = search_dependencies_of(ra_fixture, "main", Query::new("fmt").anchor_end()); assert_snapshot!(res, @r###" - dep::Fmt (v) dep::fmt (t) dep::Fmt (t) + dep::Fmt (v) dep::Fmt (m) "###); } @@ -618,17 +621,17 @@ mod tests { let res = search_dependencies_of(ra_fixture, "main", Query::new("FMT")); assert_snapshot!(res, @r###" - dep::FMT (v) - dep::FMT (t) dep::fmt (t) dep::fmt (v) + dep::FMT (t) + dep::FMT (v) "###); let res = search_dependencies_of(ra_fixture, "main", Query::new("FMT").case_sensitive()); assert_snapshot!(res, @r###" - dep::FMT (v) dep::FMT (t) + dep::FMT (v) "###); } -- cgit v1.2.3