From b9f3c5d585ee266f0fd5db77c2a3f331a0bddf2d Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Fri, 19 Jun 2020 15:07:32 +0200
Subject: Speedup VFS::partition

The task of `partition` function is to bin the flat list of paths into
disjoint filesets. Ideally, it should be incremental -- each new file
should be added to a specific fileset.

However, preliminary measurnments show that it is actually fast enough
if we just optimize this to use a binary search instead of a linear
scan.
---
 crates/vfs/src/file_set.rs | 32 +++++++++++++++++++++++---------
 crates/vfs/src/lib.rs      |  3 +++
 2 files changed, 26 insertions(+), 9 deletions(-)

(limited to 'crates/vfs')

diff --git a/crates/vfs/src/file_set.rs b/crates/vfs/src/file_set.rs
index 7dc721f7e..724606a3d 100644
--- a/crates/vfs/src/file_set.rs
+++ b/crates/vfs/src/file_set.rs
@@ -2,7 +2,7 @@
 //!
 //! Files which do not belong to any explicitly configured `FileSet` belong to
 //! the default `FileSet`.
-use std::{cmp, fmt, iter};
+use std::{fmt, iter};
 
 use paths::AbsPathBuf;
 use rustc_hash::FxHashMap;
@@ -44,6 +44,12 @@ pub struct FileSetConfig {
     roots: Vec<(AbsPathBuf, usize)>,
 }
 
+impl Default for FileSetConfig {
+    fn default() -> Self {
+        FileSetConfig::builder().build()
+    }
+}
+
 impl FileSetConfig {
     pub fn builder() -> FileSetConfigBuilder {
         FileSetConfigBuilder::default()
@@ -60,14 +66,19 @@ impl FileSetConfig {
         self.n_file_sets
     }
     fn classify(&self, path: &VfsPath) -> usize {
-        for (root, idx) in self.roots.iter() {
-            if let Some(path) = path.as_path() {
-                if path.starts_with(root) {
-                    return *idx;
-                }
-            }
+        let path = match path.as_path() {
+            Some(it) => it,
+            None => return self.len() - 1,
+        };
+        let idx = match self.roots.binary_search_by(|(p, _)| p.as_path().cmp(path)) {
+            Ok(it) => it,
+            Err(it) => it.saturating_sub(1),
+        };
+        if path.starts_with(&self.roots[idx].0) {
+            self.roots[idx].1
+        } else {
+            self.len() - 1
         }
-        self.len() - 1
     }
 }
 
@@ -82,6 +93,9 @@ impl Default for FileSetConfigBuilder {
 }
 
 impl FileSetConfigBuilder {
+    pub fn len(&self) -> usize {
+        self.roots.len()
+    }
     pub fn add_file_set(&mut self, roots: Vec<AbsPathBuf>) {
         self.roots.push(roots)
     }
@@ -93,7 +107,7 @@ impl FileSetConfigBuilder {
             .enumerate()
             .flat_map(|(i, paths)| paths.into_iter().zip(iter::repeat(i)))
             .collect();
-        roots.sort_by_key(|(path, _)| cmp::Reverse(path.to_string_lossy().len()));
+        roots.sort();
         FileSetConfig { n_file_sets, roots }
     }
 }
diff --git a/crates/vfs/src/lib.rs b/crates/vfs/src/lib.rs
index 75ce61cf9..055219b0c 100644
--- a/crates/vfs/src/lib.rs
+++ b/crates/vfs/src/lib.rs
@@ -79,6 +79,9 @@ pub enum ChangeKind {
 }
 
 impl Vfs {
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
     pub fn file_id(&self, path: &VfsPath) -> Option<FileId> {
         self.interner.get(path).filter(|&it| self.get(it).is_some())
     }
-- 
cgit v1.2.3