aboutsummaryrefslogtreecommitdiff
path: root/crates/vfs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/vfs')
-rw-r--r--crates/vfs/Cargo.toml15
-rw-r--r--crates/vfs/src/file_set.rs172
-rw-r--r--crates/vfs/src/lib.rs140
-rw-r--r--crates/vfs/src/loader.rs71
-rw-r--r--crates/vfs/src/path_interner.rs31
-rw-r--r--crates/vfs/src/vfs_path.rs146
6 files changed, 575 insertions, 0 deletions
diff --git a/crates/vfs/Cargo.toml b/crates/vfs/Cargo.toml
new file mode 100644
index 000000000..b74cdb7ff
--- /dev/null
+++ b/crates/vfs/Cargo.toml
@@ -0,0 +1,15 @@
1[package]
2name = "vfs"
3version = "0.1.0"
4authors = ["rust-analyzer developers"]
5edition = "2018"
6license = "MIT OR Apache-2.0"
7
8[lib]
9doctest = false
10
11[dependencies]
12rustc-hash = "1.0"
13fst = "0.4"
14
15paths = { path = "../paths" }
diff --git a/crates/vfs/src/file_set.rs b/crates/vfs/src/file_set.rs
new file mode 100644
index 000000000..e5e2ef530
--- /dev/null
+++ b/crates/vfs/src/file_set.rs
@@ -0,0 +1,172 @@
1//! Partitions a list of files into disjoint subsets.
2//!
3//! Files which do not belong to any explicitly configured `FileSet` belong to
4//! the default `FileSet`.
5use std::fmt;
6
7use fst::{IntoStreamer, Streamer};
8use rustc_hash::FxHashMap;
9
10use crate::{FileId, Vfs, VfsPath};
11
12#[derive(Default, Clone, Eq, PartialEq)]
13pub struct FileSet {
14 files: FxHashMap<VfsPath, FileId>,
15 paths: FxHashMap<FileId, VfsPath>,
16}
17
18impl FileSet {
19 pub fn len(&self) -> usize {
20 self.files.len()
21 }
22 pub fn resolve_path(&self, anchor: FileId, path: &str) -> Option<FileId> {
23 let mut base = self.paths[&anchor].clone();
24 base.pop();
25 let path = base.join(path)?;
26 let res = self.files.get(&path).copied();
27 res
28 }
29 pub fn insert(&mut self, file_id: FileId, path: VfsPath) {
30 self.files.insert(path.clone(), file_id);
31 self.paths.insert(file_id, path);
32 }
33 pub fn iter(&self) -> impl Iterator<Item = FileId> + '_ {
34 self.paths.keys().copied()
35 }
36}
37
38impl fmt::Debug for FileSet {
39 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
40 f.debug_struct("FileSet").field("n_files", &self.files.len()).finish()
41 }
42}
43
44#[derive(Debug)]
45pub struct FileSetConfig {
46 n_file_sets: usize,
47 map: fst::Map<Vec<u8>>,
48}
49
50impl Default for FileSetConfig {
51 fn default() -> Self {
52 FileSetConfig::builder().build()
53 }
54}
55
56impl FileSetConfig {
57 pub fn builder() -> FileSetConfigBuilder {
58 FileSetConfigBuilder::default()
59 }
60 pub fn partition(&self, vfs: &Vfs) -> Vec<FileSet> {
61 let mut scratch_space = Vec::new();
62 let mut res = vec![FileSet::default(); self.len()];
63 for (file_id, path) in vfs.iter() {
64 let root = self.classify(&path, &mut scratch_space);
65 res[root].insert(file_id, path.clone())
66 }
67 res
68 }
69 fn len(&self) -> usize {
70 self.n_file_sets
71 }
72 fn classify(&self, path: &VfsPath, scratch_space: &mut Vec<u8>) -> usize {
73 scratch_space.clear();
74 path.encode(scratch_space);
75 let automaton = PrefixOf::new(scratch_space.as_slice());
76 let mut longest_prefix = self.len() - 1;
77 let mut stream = self.map.search(automaton).into_stream();
78 while let Some((_, v)) = stream.next() {
79 longest_prefix = v as usize;
80 }
81 longest_prefix
82 }
83}
84
85pub struct FileSetConfigBuilder {
86 roots: Vec<Vec<VfsPath>>,
87}
88
89impl Default for FileSetConfigBuilder {
90 fn default() -> Self {
91 FileSetConfigBuilder { roots: Vec::new() }
92 }
93}
94
95impl FileSetConfigBuilder {
96 pub fn len(&self) -> usize {
97 self.roots.len()
98 }
99 pub fn add_file_set(&mut self, roots: Vec<VfsPath>) {
100 self.roots.push(roots)
101 }
102 pub fn build(self) -> FileSetConfig {
103 let n_file_sets = self.roots.len() + 1;
104 let map = {
105 let mut entries = Vec::new();
106 for (i, paths) in self.roots.into_iter().enumerate() {
107 for p in paths {
108 let mut buf = Vec::new();
109 p.encode(&mut buf);
110 entries.push((buf, i as u64));
111 }
112 }
113 entries.sort();
114 entries.dedup_by(|(a, _), (b, _)| a == b);
115 fst::Map::from_iter(entries).unwrap()
116 };
117 FileSetConfig { n_file_sets, map }
118 }
119}
120
121struct PrefixOf<'a> {
122 prefix_of: &'a [u8],
123}
124
125impl<'a> PrefixOf<'a> {
126 fn new(prefix_of: &'a [u8]) -> Self {
127 Self { prefix_of }
128 }
129}
130
131impl fst::Automaton for PrefixOf<'_> {
132 type State = usize;
133 fn start(&self) -> usize {
134 0
135 }
136 fn is_match(&self, &state: &usize) -> bool {
137 state != !0
138 }
139 fn can_match(&self, &state: &usize) -> bool {
140 state != !0
141 }
142 fn accept(&self, &state: &usize, byte: u8) -> usize {
143 if self.prefix_of.get(state) == Some(&byte) {
144 state + 1
145 } else {
146 !0
147 }
148 }
149}
150
151#[test]
152fn test_partitioning() {
153 let mut file_set = FileSetConfig::builder();
154 file_set.add_file_set(vec![VfsPath::new_virtual_path("/foo".into())]);
155 file_set.add_file_set(vec![VfsPath::new_virtual_path("/foo/bar/baz".into())]);
156 let file_set = file_set.build();
157
158 let mut vfs = Vfs::default();
159 vfs.set_file_contents(VfsPath::new_virtual_path("/foo/src/lib.rs".into()), Some(Vec::new()));
160 vfs.set_file_contents(
161 VfsPath::new_virtual_path("/foo/src/bar/baz/lib.rs".into()),
162 Some(Vec::new()),
163 );
164 vfs.set_file_contents(
165 VfsPath::new_virtual_path("/foo/bar/baz/lib.rs".into()),
166 Some(Vec::new()),
167 );
168 vfs.set_file_contents(VfsPath::new_virtual_path("/quux/lib.rs".into()), Some(Vec::new()));
169
170 let partition = file_set.partition(&vfs).into_iter().map(|it| it.len()).collect::<Vec<_>>();
171 assert_eq!(partition, vec![2, 1, 1]);
172}
diff --git a/crates/vfs/src/lib.rs b/crates/vfs/src/lib.rs
new file mode 100644
index 000000000..cdf6f1fd0
--- /dev/null
+++ b/crates/vfs/src/lib.rs
@@ -0,0 +1,140 @@
1//! # Virtual File System
2//!
3//! VFS stores all files read by rust-analyzer. Reading file contents from VFS
4//! always returns the same contents, unless VFS was explicitly modified with
5//! `set_file_contents`. All changes to VFS are logged, and can be retrieved via
6//! `take_changes` method. The pack of changes is then pushed to `salsa` and
7//! triggers incremental recomputation.
8//!
9//! Files in VFS are identified with `FileId`s -- interned paths. The notion of
10//! the path, `VfsPath` is somewhat abstract: at the moment, it is represented
11//! as an `std::path::PathBuf` internally, but this is an implementation detail.
12//!
13//! VFS doesn't do IO or file watching itself. For that, see the `loader`
14//! module. `loader::Handle` is an object-safe trait which abstracts both file
15//! loading and file watching. `Handle` is dynamically configured with a set of
16//! directory entries which should be scanned and watched. `Handle` then
17//! asynchronously pushes file changes. Directory entries are configured in
18//! free-form via list of globs, it's up to the `Handle` to interpret the globs
19//! in any specific way.
20//!
21//! A simple `WalkdirLoaderHandle` is provided, which doesn't implement watching
22//! and just scans the directory using walkdir.
23//!
24//! VFS stores a flat list of files. `FileSet` can partition this list of files
25//! into disjoint sets of files. Traversal-like operations (including getting
26//! the neighbor file by the relative path) are handled by the `FileSet`.
27//! `FileSet`s are also pushed to salsa and cause it to re-check `mod foo;`
28//! declarations when files are created or deleted.
29//!
30//! `file_set::FileSet` and `loader::Entry` play similar, but different roles.
31//! Both specify the "set of paths/files", one is geared towards file watching,
32//! the other towards salsa changes. In particular, single `file_set::FileSet`
33//! may correspond to several `loader::Entry`. For example, a crate from
34//! crates.io which uses code generation would have two `Entries` -- for sources
35//! in `~/.cargo`, and for generated code in `./target/debug/build`. It will
36//! have a single `FileSet` which unions the two sources.
37mod vfs_path;
38mod path_interner;
39pub mod file_set;
40pub mod loader;
41
42use std::{fmt, mem};
43
44use crate::path_interner::PathInterner;
45
46pub use crate::vfs_path::VfsPath;
47pub use paths::{AbsPath, AbsPathBuf};
48
49#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)]
50pub struct FileId(pub u32);
51
52#[derive(Default)]
53pub struct Vfs {
54 interner: PathInterner,
55 data: Vec<Option<Vec<u8>>>,
56 changes: Vec<ChangedFile>,
57}
58
59pub struct ChangedFile {
60 pub file_id: FileId,
61 pub change_kind: ChangeKind,
62}
63
64impl ChangedFile {
65 pub fn exists(&self) -> bool {
66 self.change_kind != ChangeKind::Delete
67 }
68 pub fn is_created_or_deleted(&self) -> bool {
69 matches!(self.change_kind, ChangeKind::Create | ChangeKind::Delete)
70 }
71}
72
73#[derive(Eq, PartialEq, Copy, Clone, Debug)]
74pub enum ChangeKind {
75 Create,
76 Modify,
77 Delete,
78}
79
80impl Vfs {
81 pub fn len(&self) -> usize {
82 self.data.len()
83 }
84 pub fn file_id(&self, path: &VfsPath) -> Option<FileId> {
85 self.interner.get(path).filter(|&it| self.get(it).is_some())
86 }
87 pub fn file_path(&self, file_id: FileId) -> VfsPath {
88 self.interner.lookup(file_id).clone()
89 }
90 pub fn file_contents(&self, file_id: FileId) -> &[u8] {
91 self.get(file_id).as_deref().unwrap()
92 }
93 pub fn iter(&self) -> impl Iterator<Item = (FileId, &VfsPath)> + '_ {
94 (0..self.data.len())
95 .map(|it| FileId(it as u32))
96 .filter(move |&file_id| self.get(file_id).is_some())
97 .map(move |file_id| {
98 let path = self.interner.lookup(file_id);
99 (file_id, path)
100 })
101 }
102 pub fn set_file_contents(&mut self, path: VfsPath, contents: Option<Vec<u8>>) {
103 let file_id = self.alloc_file_id(path);
104 let change_kind = match (&self.get(file_id), &contents) {
105 (None, None) => return,
106 (None, Some(_)) => ChangeKind::Create,
107 (Some(_), None) => ChangeKind::Delete,
108 (Some(old), Some(new)) if old == new => return,
109 (Some(_), Some(_)) => ChangeKind::Modify,
110 };
111
112 *self.get_mut(file_id) = contents;
113 self.changes.push(ChangedFile { file_id, change_kind })
114 }
115 pub fn has_changes(&self) -> bool {
116 !self.changes.is_empty()
117 }
118 pub fn take_changes(&mut self) -> Vec<ChangedFile> {
119 mem::take(&mut self.changes)
120 }
121 fn alloc_file_id(&mut self, path: VfsPath) -> FileId {
122 let file_id = self.interner.intern(path);
123 let idx = file_id.0 as usize;
124 let len = self.data.len().max(idx + 1);
125 self.data.resize_with(len, || None);
126 file_id
127 }
128 fn get(&self, file_id: FileId) -> &Option<Vec<u8>> {
129 &self.data[file_id.0 as usize]
130 }
131 fn get_mut(&mut self, file_id: FileId) -> &mut Option<Vec<u8>> {
132 &mut self.data[file_id.0 as usize]
133 }
134}
135
136impl fmt::Debug for Vfs {
137 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
138 f.debug_struct("Vfs").field("n_files", &self.data.len()).finish()
139 }
140}
diff --git a/crates/vfs/src/loader.rs b/crates/vfs/src/loader.rs
new file mode 100644
index 000000000..6de2e5b3f
--- /dev/null
+++ b/crates/vfs/src/loader.rs
@@ -0,0 +1,71 @@
1//! Object safe interface for file watching and reading.
2use std::fmt;
3
4use paths::{AbsPath, AbsPathBuf};
5
6#[derive(Debug)]
7pub enum Entry {
8 Files(Vec<AbsPathBuf>),
9 Directory { path: AbsPathBuf, include: Vec<String> },
10}
11
12#[derive(Debug)]
13pub struct Config {
14 pub load: Vec<Entry>,
15 pub watch: Vec<usize>,
16}
17
18pub enum Message {
19 Progress { n_total: usize, n_done: usize },
20 Loaded { files: Vec<(AbsPathBuf, Option<Vec<u8>>)> },
21}
22
23pub type Sender = Box<dyn Fn(Message) + Send>;
24
25pub trait Handle: fmt::Debug {
26 fn spawn(sender: Sender) -> Self
27 where
28 Self: Sized;
29 fn set_config(&mut self, config: Config);
30 fn invalidate(&mut self, path: AbsPathBuf);
31 fn load_sync(&mut self, path: &AbsPath) -> Option<Vec<u8>>;
32}
33
34impl Entry {
35 pub fn rs_files_recursively(base: AbsPathBuf) -> Entry {
36 Entry::Directory { path: base, include: globs(&["*.rs", "!/.git/"]) }
37 }
38 pub fn local_cargo_package(base: AbsPathBuf) -> Entry {
39 Entry::Directory { path: base, include: globs(&["*.rs", "!/target/", "!/.git/"]) }
40 }
41 pub fn cargo_package_dependency(base: AbsPathBuf) -> Entry {
42 Entry::Directory {
43 path: base,
44 include: globs(&["*.rs", "!/tests/", "!/examples/", "!/benches/", "!/.git/"]),
45 }
46 }
47}
48
49fn globs(globs: &[&str]) -> Vec<String> {
50 globs.iter().map(|it| it.to_string()).collect()
51}
52
53impl fmt::Debug for Message {
54 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55 match self {
56 Message::Loaded { files } => {
57 f.debug_struct("Loaded").field("n_files", &files.len()).finish()
58 }
59 Message::Progress { n_total, n_done } => f
60 .debug_struct("Progress")
61 .field("n_total", n_total)
62 .field("n_done", n_done)
63 .finish(),
64 }
65 }
66}
67
68#[test]
69fn handle_is_object_safe() {
70 fn _assert(_: &dyn Handle) {}
71}
diff --git a/crates/vfs/src/path_interner.rs b/crates/vfs/src/path_interner.rs
new file mode 100644
index 000000000..4f70d61e8
--- /dev/null
+++ b/crates/vfs/src/path_interner.rs
@@ -0,0 +1,31 @@
1//! Maps paths to compact integer ids. We don't care about clearings paths which
2//! no longer exist -- the assumption is total size of paths we ever look at is
3//! not too big.
4use rustc_hash::FxHashMap;
5
6use crate::{FileId, VfsPath};
7
8#[derive(Default)]
9pub(crate) struct PathInterner {
10 map: FxHashMap<VfsPath, FileId>,
11 vec: Vec<VfsPath>,
12}
13
14impl PathInterner {
15 pub(crate) fn get(&self, path: &VfsPath) -> Option<FileId> {
16 self.map.get(path).copied()
17 }
18 pub(crate) fn intern(&mut self, path: VfsPath) -> FileId {
19 if let Some(id) = self.get(&path) {
20 return id;
21 }
22 let id = FileId(self.vec.len() as u32);
23 self.map.insert(path.clone(), id);
24 self.vec.push(path);
25 id
26 }
27
28 pub(crate) fn lookup(&self, id: FileId) -> &VfsPath {
29 &self.vec[id.0 as usize]
30 }
31}
diff --git a/crates/vfs/src/vfs_path.rs b/crates/vfs/src/vfs_path.rs
new file mode 100644
index 000000000..04a42264e
--- /dev/null
+++ b/crates/vfs/src/vfs_path.rs
@@ -0,0 +1,146 @@
1//! Abstract-ish representation of paths for VFS.
2use std::fmt;
3
4use paths::{AbsPath, AbsPathBuf};
5
6/// Long-term, we want to support files which do not reside in the file-system,
7/// so we treat VfsPaths as opaque identifiers.
8#[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
9pub struct VfsPath(VfsPathRepr);
10
11impl VfsPath {
12 /// Creates an "in-memory" path from `/`-separates string.
13 /// This is most useful for testing, to avoid windows/linux differences
14 pub fn new_virtual_path(path: String) -> VfsPath {
15 assert!(path.starts_with('/'));
16 VfsPath(VfsPathRepr::VirtualPath(VirtualPath(path)))
17 }
18
19 pub fn as_path(&self) -> Option<&AbsPath> {
20 match &self.0 {
21 VfsPathRepr::PathBuf(it) => Some(it.as_path()),
22 VfsPathRepr::VirtualPath(_) => None,
23 }
24 }
25 pub fn join(&self, path: &str) -> Option<VfsPath> {
26 match &self.0 {
27 VfsPathRepr::PathBuf(it) => {
28 let res = it.join(path).normalize();
29 Some(VfsPath(VfsPathRepr::PathBuf(res)))
30 }
31 VfsPathRepr::VirtualPath(it) => {
32 let res = it.join(path)?;
33 Some(VfsPath(VfsPathRepr::VirtualPath(res)))
34 }
35 }
36 }
37 pub fn pop(&mut self) -> bool {
38 match &mut self.0 {
39 VfsPathRepr::PathBuf(it) => it.pop(),
40 VfsPathRepr::VirtualPath(it) => it.pop(),
41 }
42 }
43 pub fn starts_with(&self, other: &VfsPath) -> bool {
44 match (&self.0, &other.0) {
45 (VfsPathRepr::PathBuf(lhs), VfsPathRepr::PathBuf(rhs)) => lhs.starts_with(rhs),
46 (VfsPathRepr::PathBuf(_), _) => false,
47 (VfsPathRepr::VirtualPath(lhs), VfsPathRepr::VirtualPath(rhs)) => lhs.starts_with(rhs),
48 (VfsPathRepr::VirtualPath(_), _) => false,
49 }
50 }
51
52 // Don't make this `pub`
53 pub(crate) fn encode(&self, buf: &mut Vec<u8>) {
54 let tag = match &self.0 {
55 VfsPathRepr::PathBuf(_) => 0,
56 VfsPathRepr::VirtualPath(_) => 1,
57 };
58 buf.push(tag);
59 match &self.0 {
60 VfsPathRepr::PathBuf(it) => {
61 let path: &std::ffi::OsStr = it.as_os_str();
62 #[cfg(windows)]
63 {
64 use std::os::windows::ffi::OsStrExt;
65 for wchar in path.encode_wide() {
66 buf.extend(wchar.to_le_bytes().iter().copied());
67 }
68 }
69 #[cfg(unix)]
70 {
71 use std::os::unix::ffi::OsStrExt;
72 buf.extend(path.as_bytes());
73 }
74 #[cfg(not(any(windows, unix)))]
75 {
76 buf.extend(path.to_string_lossy().as_bytes());
77 }
78 }
79 VfsPathRepr::VirtualPath(VirtualPath(s)) => buf.extend(s.as_bytes()),
80 }
81 }
82}
83
84#[derive(Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
85enum VfsPathRepr {
86 PathBuf(AbsPathBuf),
87 VirtualPath(VirtualPath),
88}
89
90impl From<AbsPathBuf> for VfsPath {
91 fn from(v: AbsPathBuf) -> Self {
92 VfsPath(VfsPathRepr::PathBuf(v.normalize()))
93 }
94}
95
96impl fmt::Display for VfsPath {
97 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
98 match &self.0 {
99 VfsPathRepr::PathBuf(it) => fmt::Display::fmt(&it.display(), f),
100 VfsPathRepr::VirtualPath(VirtualPath(it)) => fmt::Display::fmt(it, f),
101 }
102 }
103}
104
105impl fmt::Debug for VfsPath {
106 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
107 fmt::Debug::fmt(&self.0, f)
108 }
109}
110
111impl fmt::Debug for VfsPathRepr {
112 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
113 match &self {
114 VfsPathRepr::PathBuf(it) => fmt::Debug::fmt(&it.display(), f),
115 VfsPathRepr::VirtualPath(VirtualPath(it)) => fmt::Debug::fmt(&it, f),
116 }
117 }
118}
119
120#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
121struct VirtualPath(String);
122
123impl VirtualPath {
124 fn starts_with(&self, other: &VirtualPath) -> bool {
125 self.0.starts_with(&other.0)
126 }
127 fn pop(&mut self) -> bool {
128 let pos = match self.0.rfind('/') {
129 Some(pos) => pos,
130 None => return false,
131 };
132 self.0 = self.0[..pos].to_string();
133 true
134 }
135 fn join(&self, mut path: &str) -> Option<VirtualPath> {
136 let mut res = self.clone();
137 while path.starts_with("../") {
138 if !res.pop() {
139 return None;
140 }
141 path = &path["../".len()..]
142 }
143 res.0 = format!("{}/{}", res.0, path);
144 Some(res)
145 }
146}