aboutsummaryrefslogtreecommitdiff
path: root/crates/vfs
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2020-07-07 22:32:08 +0100
committerGitHub <[email protected]>2020-07-07 22:32:08 +0100
commit41feb816c98894d48df97193eb085738e405a3c2 (patch)
tree0bfb9d622da4eb9b5bb2748f47b3de7ec24ad8f9 /crates/vfs
parent56ade20380a028026eeb71af2a8a81e37ede7efb (diff)
parent69b79e3a73f9a1b820cf6d5ebc9968d8b08d4e68 (diff)
Merge #5255
5255: Replace ad hocery with science r=matklad a=matklad bors r+ 🤖 Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/vfs')
-rw-r--r--crates/vfs/Cargo.toml1
-rw-r--r--crates/vfs/src/file_set.rs105
-rw-r--r--crates/vfs/src/vfs_path.rs31
3 files changed, 81 insertions, 56 deletions
diff --git a/crates/vfs/Cargo.toml b/crates/vfs/Cargo.toml
index 263069002..b985c4c10 100644
--- a/crates/vfs/Cargo.toml
+++ b/crates/vfs/Cargo.toml
@@ -6,5 +6,6 @@ edition = "2018"
6 6
7[dependencies] 7[dependencies]
8rustc-hash = "1.0" 8rustc-hash = "1.0"
9fst = "0.4"
9 10
10paths = { path = "../paths" } 11paths = { path = "../paths" }
diff --git a/crates/vfs/src/file_set.rs b/crates/vfs/src/file_set.rs
index b0130017e..37c479306 100644
--- a/crates/vfs/src/file_set.rs
+++ b/crates/vfs/src/file_set.rs
@@ -2,8 +2,9 @@
2//! 2//!
3//! Files which do not belong to any explicitly configured `FileSet` belong to 3//! Files which do not belong to any explicitly configured `FileSet` belong to
4//! the default `FileSet`. 4//! the default `FileSet`.
5use std::{fmt, mem}; 5use std::fmt;
6 6
7use fst::{IntoStreamer, Streamer};
7use rustc_hash::FxHashMap; 8use rustc_hash::FxHashMap;
8 9
9use crate::{FileId, Vfs, VfsPath}; 10use crate::{FileId, Vfs, VfsPath};
@@ -40,15 +41,10 @@ impl fmt::Debug for FileSet {
40 } 41 }
41} 42}
42 43
43// Invariant: if k1 is a prefix of k2, then they are in different buckets (k2
44// is closer to 0th bucket).
45// FIXME: replace with an actual trie some day.
46type BadTrie<K, V> = Vec<Vec<(K, V)>>;
47
48#[derive(Debug)] 44#[derive(Debug)]
49pub struct FileSetConfig { 45pub struct FileSetConfig {
50 n_file_sets: usize, 46 n_file_sets: usize,
51 trie: BadTrie<VfsPath, usize>, 47 map: fst::Map<Vec<u8>>,
52} 48}
53 49
54impl Default for FileSetConfig { 50impl Default for FileSetConfig {
@@ -62,9 +58,10 @@ impl FileSetConfig {
62 FileSetConfigBuilder::default() 58 FileSetConfigBuilder::default()
63 } 59 }
64 pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> { 60 pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> {
61 let mut scratch_space = Vec::new();
65 let mut res = vec![FileSet::default(); self.len()]; 62 let mut res = vec![FileSet::default(); self.len()];
66 for (file_id, path) in vfs.iter() { 63 for (file_id, path) in vfs.iter() {
67 let root = self.classify(&path); 64 let root = self.classify(&path, &mut scratch_space);
68 res[root].insert(file_id, path) 65 res[root].insert(file_id, path)
69 } 66 }
70 res 67 res
@@ -72,8 +69,16 @@ impl FileSetConfig {
72 fn len(&self) -> usize { 69 fn len(&self) -> usize {
73 self.n_file_sets 70 self.n_file_sets
74 } 71 }
75 fn classify(&self, path: &VfsPath) -> usize { 72 fn classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize {
76 find_ancestor(&self.trie, path, is_prefix).copied().unwrap_or(self.len() - 1) 73 scratch_space.clear();
74 path.encode(scratch_space);
75 let automaton = PrefixOf::new(scratch_space.as_slice());
76 let mut longest_prefix = self.len() - 1;
77 let mut stream = self.map.search(automaton).into_stream();
78 while let Some((_, v)) = stream.next() {
79 longest_prefix = v as usize;
80 }
81 longest_prefix
77 } 82 }
78} 83}
79 84
@@ -96,63 +101,51 @@ impl FileSetConfigBuilder {
96 } 101 }
97 pub fn build(self) -> FileSetConfig { 102 pub fn build(self) -> FileSetConfig {
98 let n_file_sets = self.roots.len() + 1; 103 let n_file_sets = self.roots.len() + 1;
99 104 let map = {
100 let mut trie = BadTrie::new(); 105 let mut entries = Vec::new();
101 106 for (i, paths) in self.roots.into_iter().enumerate() {
102 for (i, paths) in self.roots.into_iter().enumerate() { 107 for p in paths {
103 for p in paths { 108 let mut buf = Vec::new();
104 insert(&mut trie, p, i, is_prefix); 109 p.encode(&mut buf);
110 entries.push((buf, i as u64));
111 }
105 } 112 }
106 } 113 entries.sort();
107 trie.iter_mut().for_each(|it| it.sort()); 114 entries.dedup_by(|(a, _), (b, _)| a == b);
108 FileSetConfig { n_file_sets, trie } 115 fst::Map::from_iter(entries).unwrap()
116 };
117 FileSetConfig { n_file_sets, map }
109 } 118 }
110} 119}
111 120
112fn is_prefix(short: &VfsPath, long: &VfsPath) -> bool { 121struct PrefixOf<'a> {
113 long.starts_with(short) 122 prefix_of: &'a [u8],
114} 123}
115 124
116fn insert<K: Ord, V, P: Fn(&K, &K) -> bool>( 125impl<'a> PrefixOf<'a> {
117 trie: &mut BadTrie<K, V>, 126 fn new(prefix_of: &'a [u8]) -> Self {
118 mut key: K, 127 Self { prefix_of }
119 mut value: V,
120 is_prefix: P,
121) {
122 'outer: for level in 0.. {
123 if trie.len() == level {
124 trie.push(Vec::new())
125 }
126 for (k, v) in trie[level].iter_mut() {
127 if is_prefix(&key, k) {
128 continue 'outer;
129 }
130 if is_prefix(k, &key) {
131 mem::swap(k, &mut key);
132 mem::swap(v, &mut value);
133 continue 'outer;
134 }
135 }
136 trie[level].push((key, value));
137 return;
138 } 128 }
139} 129}
140 130
141fn find_ancestor<'t, K: Ord, V, P: Fn(&K, &K) -> bool>( 131impl fst::Automaton for PrefixOf<'_> {
142 trie: &'t BadTrie<K, V>, 132 type State = usize;
143 key: &K, 133 fn start(&self) -> usize {
144 is_prefix: P, 134 0
145) -> Option<&'t V> { 135 }
146 for bucket in trie { 136 fn is_match(&self, &state: &usize) -> bool {
147 let idx = match bucket.binary_search_by(|(k, _)| k.cmp(key)) { 137 state != !0
148 Ok(it) => it, 138 }
149 Err(it) => it.saturating_sub(1), 139 fn can_match(&self, &state: &usize) -> bool {
150 }; 140 state != !0
151 if !bucket.is_empty() && is_prefix(&bucket[idx].0, key) { 141 }
152 return Some(&bucket[idx].1); 142 fn accept(&self, &state: &usize, byte: u8) -> usize {
143 if self.prefix_of.get(state) == Some(&byte) {
144 state + 1
145 } else {
146 !0
153 } 147 }
154 } 148 }
155 None
156} 149}
157 150
158#[test] 151#[test]
diff --git a/crates/vfs/src/vfs_path.rs b/crates/vfs/src/vfs_path.rs
index dc3031ada..04a42264e 100644
--- a/crates/vfs/src/vfs_path.rs
+++ b/crates/vfs/src/vfs_path.rs
@@ -48,6 +48,37 @@ impl VfsPath {
48 (VfsPathRepr::VirtualPath(_), _) => false, 48 (VfsPathRepr::VirtualPath(_), _) => false,
49 } 49 }
50 } 50 }
51
52 // Don't make this `pub`
53 pub(crate) fn encode(&self, buf: &mut Vec<u8>) {
54 let tag = match &self.0 {
55 VfsPathRepr::PathBuf(_) => 0,
56 VfsPathRepr::VirtualPath(_) => 1,
57 };
58 buf.push(tag);
59 match &self.0 {
60 VfsPathRepr::PathBuf(it) => {
61 let path: &std::ffi::OsStr = it.as_os_str();
62 #[cfg(windows)]
63 {
64 use std::os::windows::ffi::OsStrExt;
65 for wchar in path.encode_wide() {
66 buf.extend(wchar.to_le_bytes().iter().copied());
67 }
68 }
69 #[cfg(unix)]
70 {
71 use std::os::unix::ffi::OsStrExt;
72 buf.extend(path.as_bytes());
73 }
74 #[cfg(not(any(windows, unix)))]
75 {
76 buf.extend(path.to_string_lossy().as_bytes());
77 }
78 }
79 VfsPathRepr::VirtualPath(VirtualPath(s)) => buf.extend(s.as_bytes()),
80 }
81 }
51} 82}
52 83
53#[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] 84#[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]