diff options
author | Aleksey Kladov <[email protected]> | 2020-07-07 21:53:12 +0100 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2020-07-07 22:28:48 +0100 |
commit | 69b79e3a73f9a1b820cf6d5ebc9968d8b08d4e68 (patch) | |
tree | 774b3fed621311b5d1db620dc2b5f2a4597d2bfa /crates/vfs/src | |
parent | 980a67f44629ed67a54b603aaf9d015a81d61f7a (diff) |
Replace ad hocery with science
Diffstat (limited to 'crates/vfs/src')
-rw-r--r-- | crates/vfs/src/file_set.rs | 105 | ||||
-rw-r--r-- | crates/vfs/src/vfs_path.rs | 31 |
2 files changed, 80 insertions, 56 deletions
diff --git a/crates/vfs/src/file_set.rs b/crates/vfs/src/file_set.rs index b0130017e..37c479306 100644 --- a/crates/vfs/src/file_set.rs +++ b/crates/vfs/src/file_set.rs | |||
@@ -2,8 +2,9 @@ | |||
2 | //! | 2 | //! |
3 | //! Files which do not belong to any explicitly configured `FileSet` belong to | 3 | //! Files which do not belong to any explicitly configured `FileSet` belong to |
4 | //! the default `FileSet`. | 4 | //! the default `FileSet`. |
5 | use std::{fmt, mem}; | 5 | use std::fmt; |
6 | 6 | ||
7 | use fst::{IntoStreamer, Streamer}; | ||
7 | use rustc_hash::FxHashMap; | 8 | use rustc_hash::FxHashMap; |
8 | 9 | ||
9 | use crate::{FileId, Vfs, VfsPath}; | 10 | use crate::{FileId, Vfs, VfsPath}; |
@@ -40,15 +41,10 @@ impl fmt::Debug for FileSet { | |||
40 | } | 41 | } |
41 | } | 42 | } |
42 | 43 | ||
43 | // Invariant: if k1 is a prefix of k2, then they are in different buckets (k2 | ||
44 | // is closer to 0th bucket). | ||
45 | // FIXME: replace with an actual trie some day. | ||
46 | type BadTrie<K, V> = Vec<Vec<(K, V)>>; | ||
47 | |||
48 | #[derive(Debug)] | 44 | #[derive(Debug)] |
49 | pub struct FileSetConfig { | 45 | pub struct FileSetConfig { |
50 | n_file_sets: usize, | 46 | n_file_sets: usize, |
51 | trie: BadTrie<VfsPath, usize>, | 47 | map: fst::Map<Vec<u8>>, |
52 | } | 48 | } |
53 | 49 | ||
54 | impl Default for FileSetConfig { | 50 | impl Default for FileSetConfig { |
@@ -62,9 +58,10 @@ impl FileSetConfig { | |||
62 | FileSetConfigBuilder::default() | 58 | FileSetConfigBuilder::default() |
63 | } | 59 | } |
64 | pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> { | 60 | pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> { |
61 | let mut scratch_space = Vec::new(); | ||
65 | let mut res = vec![FileSet::default(); self.len()]; | 62 | let mut res = vec![FileSet::default(); self.len()]; |
66 | for (file_id, path) in vfs.iter() { | 63 | for (file_id, path) in vfs.iter() { |
67 | let root = self.classify(&path); | 64 | let root = self.classify(&path, &mut scratch_space); |
68 | res[root].insert(file_id, path) | 65 | res[root].insert(file_id, path) |
69 | } | 66 | } |
70 | res | 67 | res |
@@ -72,8 +69,16 @@ impl FileSetConfig { | |||
72 | fn len(&self) -> usize { | 69 | fn len(&self) -> usize { |
73 | self.n_file_sets | 70 | self.n_file_sets |
74 | } | 71 | } |
75 | fn classify(&self, path: &VfsPath) -> usize { | 72 | fn classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize { |
76 | find_ancestor(&self.trie, path, is_prefix).copied().unwrap_or(self.len() - 1) | 73 | scratch_space.clear(); |
74 | path.encode(scratch_space); | ||
75 | let automaton = PrefixOf::new(scratch_space.as_slice()); | ||
76 | let mut longest_prefix = self.len() - 1; | ||
77 | let mut stream = self.map.search(automaton).into_stream(); | ||
78 | while let Some((_, v)) = stream.next() { | ||
79 | longest_prefix = v as usize; | ||
80 | } | ||
81 | longest_prefix | ||
77 | } | 82 | } |
78 | } | 83 | } |
79 | 84 | ||
@@ -96,63 +101,51 @@ impl FileSetConfigBuilder { | |||
96 | } | 101 | } |
97 | pub fn build(self) -> FileSetConfig { | 102 | pub fn build(self) -> FileSetConfig { |
98 | let n_file_sets = self.roots.len() + 1; | 103 | let n_file_sets = self.roots.len() + 1; |
99 | 104 | let map = { | |
100 | let mut trie = BadTrie::new(); | 105 | let mut entries = Vec::new(); |
101 | 106 | for (i, paths) in self.roots.into_iter().enumerate() { | |
102 | for (i, paths) in self.roots.into_iter().enumerate() { | 107 | for p in paths { |
103 | for p in paths { | 108 | let mut buf = Vec::new(); |
104 | insert(&mut trie, p, i, is_prefix); | 109 | p.encode(&mut buf); |
110 | entries.push((buf, i as u64)); | ||
111 | } | ||
105 | } | 112 | } |
106 | } | 113 | entries.sort(); |
107 | trie.iter_mut().for_each(|it| it.sort()); | 114 | entries.dedup_by(|(a, _), (b, _)| a == b); |
108 | FileSetConfig { n_file_sets, trie } | 115 | fst::Map::from_iter(entries).unwrap() |
116 | }; | ||
117 | FileSetConfig { n_file_sets, map } | ||
109 | } | 118 | } |
110 | } | 119 | } |
111 | 120 | ||
112 | fn is_prefix(short: &VfsPath, long: &VfsPath) -> bool { | 121 | struct PrefixOf<'a> { |
113 | long.starts_with(short) | 122 | prefix_of: &'a [u8], |
114 | } | 123 | } |
115 | 124 | ||
116 | fn insert<K: Ord, V, P: Fn(&K, &K) -> bool>( | 125 | impl<'a> PrefixOf<'a> { |
117 | trie: &mut BadTrie<K, V>, | 126 | fn new(prefix_of: &'a [u8]) -> Self { |
118 | mut key: K, | 127 | Self { prefix_of } |
119 | mut value: V, | ||
120 | is_prefix: P, | ||
121 | ) { | ||
122 | 'outer: for level in 0.. { | ||
123 | if trie.len() == level { | ||
124 | trie.push(Vec::new()) | ||
125 | } | ||
126 | for (k, v) in trie[level].iter_mut() { | ||
127 | if is_prefix(&key, k) { | ||
128 | continue 'outer; | ||
129 | } | ||
130 | if is_prefix(k, &key) { | ||
131 | mem::swap(k, &mut key); | ||
132 | mem::swap(v, &mut value); | ||
133 | continue 'outer; | ||
134 | } | ||
135 | } | ||
136 | trie[level].push((key, value)); | ||
137 | return; | ||
138 | } | 128 | } |
139 | } | 129 | } |
140 | 130 | ||
141 | fn find_ancestor<'t, K: Ord, V, P: Fn(&K, &K) -> bool>( | 131 | impl fst::Automaton for PrefixOf<'_> { |
142 | trie: &'t BadTrie<K, V>, | 132 | type State = usize; |
143 | key: &K, | 133 | fn start(&self) -> usize { |
144 | is_prefix: P, | 134 | 0 |
145 | ) -> Option<&'t V> { | 135 | } |
146 | for bucket in trie { | 136 | fn is_match(&self, &state: &usize) -> bool { |
147 | let idx = match bucket.binary_search_by(|(k, _)| k.cmp(key)) { | 137 | state != !0 |
148 | Ok(it) => it, | 138 | } |
149 | Err(it) => it.saturating_sub(1), | 139 | fn can_match(&self, &state: &usize) -> bool { |
150 | }; | 140 | state != !0 |
151 | if !bucket.is_empty() && is_prefix(&bucket[idx].0, key) { | 141 | } |
152 | return Some(&bucket[idx].1); | 142 | fn accept(&self, &state: &usize, byte: u8) -> usize { |
143 | if self.prefix_of.get(state) == Some(&byte) { | ||
144 | state + 1 | ||
145 | } else { | ||
146 | !0 | ||
153 | } | 147 | } |
154 | } | 148 | } |
155 | None | ||
156 | } | 149 | } |
157 | 150 | ||
158 | #[test] | 151 | #[test] |
diff --git a/crates/vfs/src/vfs_path.rs b/crates/vfs/src/vfs_path.rs index dc3031ada..04a42264e 100644 --- a/crates/vfs/src/vfs_path.rs +++ b/crates/vfs/src/vfs_path.rs | |||
@@ -48,6 +48,37 @@ impl VfsPath { | |||
48 | (VfsPathRepr::VirtualPath(_), _) => false, | 48 | (VfsPathRepr::VirtualPath(_), _) => false, |
49 | } | 49 | } |
50 | } | 50 | } |
51 | |||
52 | // Don't make this `pub` | ||
53 | pub(crate) fn encode(&self, buf: &mut Vec<u8>) { | ||
54 | let tag = match &self.0 { | ||
55 | VfsPathRepr::PathBuf(_) => 0, | ||
56 | VfsPathRepr::VirtualPath(_) => 1, | ||
57 | }; | ||
58 | buf.push(tag); | ||
59 | match &self.0 { | ||
60 | VfsPathRepr::PathBuf(it) => { | ||
61 | let path: &std::ffi::OsStr = it.as_os_str(); | ||
62 | #[cfg(windows)] | ||
63 | { | ||
64 | use std::os::windows::ffi::OsStrExt; | ||
65 | for wchar in path.encode_wide() { | ||
66 | buf.extend(wchar.to_le_bytes().iter().copied()); | ||
67 | } | ||
68 | } | ||
69 | #[cfg(unix)] | ||
70 | { | ||
71 | use std::os::unix::ffi::OsStrExt; | ||
72 | buf.extend(path.as_bytes()); | ||
73 | } | ||
74 | #[cfg(not(any(windows, unix)))] | ||
75 | { | ||
76 | buf.extend(path.to_string_lossy().as_bytes()); | ||
77 | } | ||
78 | } | ||
79 | VfsPathRepr::VirtualPath(VirtualPath(s)) => buf.extend(s.as_bytes()), | ||
80 | } | ||
81 | } | ||
51 | } | 82 | } |
52 | 83 | ||
53 | #[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] | 84 | #[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] |