aboutsummaryrefslogtreecommitdiff
path: root/crates/vfs/src/file_set.rs
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2020-07-07 16:38:02 +0100
committerAleksey Kladov <[email protected]>2020-07-07 16:38:02 +0100
commit5d2225f4bc82c4cd551db5a53500e7a076bf5586 (patch)
tree31bb591b935992d7f30e0fabe8633018ddbf6d02 /crates/vfs/src/file_set.rs
parent7407636568c791b39cd12f9176f667dac2deef1b (diff)
Fix symbol search in salsa
Previous solution for binning paths into disjoint directories was simple and fast -- just a single binary search. Unfortunatelly, it wasn't coorrect: if the ditr are /d /d/a /d/c then partitioning the file /d/b/lib.rs won't pick /d as a correct directory. The correct solution here is a trie, but it requires exposing path components. So, we use a poor man's substitution -- a *vector* of sorted paths, such that each bucket is prefix-free closes #5246
Diffstat (limited to 'crates/vfs/src/file_set.rs')
-rw-r--r--crates/vfs/src/file_set.rs109
1 files changed, 90 insertions, 19 deletions
diff --git a/crates/vfs/src/file_set.rs b/crates/vfs/src/file_set.rs
index 977ba3010..b0130017e 100644
--- a/crates/vfs/src/file_set.rs
+++ b/crates/vfs/src/file_set.rs
@@ -2,7 +2,7 @@
2//! 2//!
3//! Files which do not belong to any explicitly configured `FileSet` belong to 3//! Files which do not belong to any explicitly configured `FileSet` belong to
4//! the default `FileSet`. 4//! the default `FileSet`.
5use std::{fmt, iter}; 5use std::{fmt, mem};
6 6
7use rustc_hash::FxHashMap; 7use rustc_hash::FxHashMap;
8 8
@@ -15,6 +15,9 @@ pub struct FileSet {
15} 15}
16 16
17impl FileSet { 17impl FileSet {
18 pub fn len(&self) -> usize {
19 self.files.len()
20 }
18 pub fn resolve_path(&self, anchor: FileId, path: &str) -> Option<FileId> { 21 pub fn resolve_path(&self, anchor: FileId, path: &str) -> Option<FileId> {
19 let mut base = self.paths[&anchor].clone(); 22 let mut base = self.paths[&anchor].clone();
20 base.pop(); 23 base.pop();
@@ -37,10 +40,15 @@ impl fmt::Debug for FileSet {
37 } 40 }
38} 41}
39 42
43// Invariant: if k1 is a prefix of k2, then they are in different buckets (k2
44// is closer to 0th bucket).
45// FIXME: replace with an actual trie some day.
46type BadTrie<K, V> = Vec<Vec<(K, V)>>;
47
40#[derive(Debug)] 48#[derive(Debug)]
41pub struct FileSetConfig { 49pub struct FileSetConfig {
42 n_file_sets: usize, 50 n_file_sets: usize,
43 roots: Vec<(VfsPath, usize)>, 51 trie: BadTrie<VfsPath, usize>,
44} 52}
45 53
46impl Default for FileSetConfig { 54impl Default for FileSetConfig {
@@ -65,15 +73,7 @@ impl FileSetConfig {
65 self.n_file_sets 73 self.n_file_sets
66 } 74 }
67 fn classify(&self, path: &VfsPath) -> usize { 75 fn classify(&self, path: &VfsPath) -> usize {
68 let idx = match self.roots.binary_search_by(|(p, _)| p.cmp(path)) { 76 find_ancestor(&self.trie, path, is_prefix).copied().unwrap_or(self.len() - 1)
69 Ok(it) => it,
70 Err(it) => it.saturating_sub(1),
71 };
72 if !self.roots.is_empty() && path.starts_with(&self.roots[idx].0) {
73 self.roots[idx].1
74 } else {
75 self.len() - 1
76 }
77 } 77 }
78} 78}
79 79
@@ -96,13 +96,84 @@ impl FileSetConfigBuilder {
96 } 96 }
97 pub fn build(self) -> FileSetConfig { 97 pub fn build(self) -> FileSetConfig {
98 let n_file_sets = self.roots.len() + 1; 98 let n_file_sets = self.roots.len() + 1;
99 let mut roots: Vec<(VfsPath, usize)> = self 99
100 .roots 100 let mut trie = BadTrie::new();
101 .into_iter() 101
102 .enumerate() 102 for (i, paths) in self.roots.into_iter().enumerate() {
103 .flat_map(|(i, paths)| paths.into_iter().zip(iter::repeat(i))) 103 for p in paths {
104 .collect(); 104 insert(&mut trie, p, i, is_prefix);
105 roots.sort(); 105 }
106 FileSetConfig { n_file_sets, roots } 106 }
107 trie.iter_mut().for_each(|it| it.sort());
108 FileSetConfig { n_file_sets, trie }
109 }
110}
111
112fn is_prefix(short: &VfsPath, long: &VfsPath) -> bool {
113 long.starts_with(short)
114}
115
116fn insert<K: Ord, V, P: Fn(&K, &K) -> bool>(
117 trie: &mut BadTrie<K, V>,
118 mut key: K,
119 mut value: V,
120 is_prefix: P,
121) {
122 'outer: for level in 0.. {
123 if trie.len() == level {
124 trie.push(Vec::new())
125 }
126 for (k, v) in trie[level].iter_mut() {
127 if is_prefix(&key, k) {
128 continue 'outer;
129 }
130 if is_prefix(k, &key) {
131 mem::swap(k, &mut key);
132 mem::swap(v, &mut value);
133 continue 'outer;
134 }
135 }
136 trie[level].push((key, value));
137 return;
138 }
139}
140
141fn find_ancestor<'t, K: Ord, V, P: Fn(&K, &K) -> bool>(
142 trie: &'t BadTrie<K, V>,
143 key: &K,
144 is_prefix: P,
145) -> Option<&'t V> {
146 for bucket in trie {
147 let idx = match bucket.binary_search_by(|(k, _)| k.cmp(key)) {
148 Ok(it) => it,
149 Err(it) => it.saturating_sub(1),
150 };
151 if !bucket.is_empty() && is_prefix(&bucket[idx].0, key) {
152 return Some(&bucket[idx].1);
153 }
107 } 154 }
155 None
156}
157
158#[test]
159fn test_partitioning() {
160 let mut file_set = FileSetConfig::builder();
161 file_set.add_file_set(vec![VfsPath::new_virtual_path("/foo".into())]);
162 file_set.add_file_set(vec![VfsPath::new_virtual_path("/foo/bar/baz".into())]);
163 let file_set = file_set.build();
164
165 let mut vfs = Vfs::default();
166 vfs.set_file_contents(VfsPath::new_virtual_path("/foo/src/lib.rs".into()), Some(Vec::new()));
167 vfs.set_file_contents(
168 VfsPath::new_virtual_path("/foo/src/bar/baz/lib.rs".into()),
169 Some(Vec::new()),
170 );
171 vfs.set_file_contents(
172 VfsPath::new_virtual_path("/foo/bar/baz/lib.rs".into()),
173 Some(Vec::new()),
174 );
175 vfs.set_file_contents(VfsPath::new_virtual_path("/quux/lib.rs".into()), Some(Vec::new()));
176
177 let partition = file_set.partition(&vfs).into_iter().map(|it| it.len()).collect::<Vec<_>>();
178 assert_eq!(partition, vec![2, 1, 1]);
108} 179}