From 02fc3d50ee4d179cc5a443a790544c2a5e439cb0 Mon Sep 17 00:00:00 2001
From: David Lattimore <dml@google.com>
Date: Wed, 22 Jul 2020 16:48:12 +1000
Subject: SSR: Refactor to not rely on recursive search for nesting of matches

Previously, submatches were handled simply by searching in placeholders
for more matches. That only works if we search all nodes in the tree
recursively. In a subsequent commit, I intend to make search not always
be recursive recursive. This commit prepares for that by finding all
matches, even if they overlap, then nesting them and removing
overlapping matches.
---
 crates/ra_ssr/src/lib.rs      |  7 +++-
 crates/ra_ssr/src/matching.rs |  4 ++
 crates/ra_ssr/src/nester.rs   | 98 +++++++++++++++++++++++++++++++++++++++++++
 crates/ra_ssr/src/search.rs   | 38 ++++++-----------
 4 files changed, 120 insertions(+), 27 deletions(-)
 create mode 100644 crates/ra_ssr/src/nester.rs

(limited to 'crates/ra_ssr/src')
diff --git a/crates/ra_ssr/src/lib.rs b/crates/ra_ssr/src/lib.rs
index 7b6409806..6d578610b 100644
--- a/crates/ra_ssr/src/lib.rs
+++ b/crates/ra_ssr/src/lib.rs
@@ -4,6 +4,7 @@
 //! based on a template.
 
 mod matching;
+mod nester;
 mod parsing;
 mod replacing;
 mod search;
@@ -90,8 +91,10 @@ impl<'db> MatchFinder<'db> {
     /// Returns matches for all added rules.
     pub fn matches(&self) -> SsrMatches {
         let mut matches = Vec::new();
-        self.find_all_matches(&mut matches);
-        SsrMatches { matches }
+        for rule in &self.rules {
+            self.find_matches_for_rule(rule, &mut matches);
+        }
+        nester::nest_and_remove_collisions(matches, &self.sema)
     }
 
     /// Finds all nodes in `file_id` whose text is exactly equal to `snippet` and attempts to match
diff --git a/crates/ra_ssr/src/matching.rs b/crates/ra_ssr/src/matching.rs
index 064e3a204..005569f6f 100644
--- a/crates/ra_ssr/src/matching.rs
+++ b/crates/ra_ssr/src/matching.rs
@@ -49,6 +49,8 @@ pub struct Match {
     pub(crate) placeholder_values: FxHashMap<Var, PlaceholderMatch>,
     pub(crate) ignored_comments: Vec<ast::Comment>,
     pub(crate) rule_index: usize,
+    /// The depth of matched_node.
+    pub(crate) depth: usize,
 }
 
 /// Represents a `$var` in an SSR query.
@@ -130,10 +132,12 @@ impl<'db, 'sema> Matcher<'db, 'sema> {
             placeholder_values: FxHashMap::default(),
             ignored_comments: Vec::new(),
             rule_index: rule.index,
+            depth: 0,
         };
         // Second matching pass, where we record placeholder matches, ignored comments and maybe do
         // any other more expensive checks that we didn't want to do on the first pass.
         match_state.attempt_match_node(&mut Phase::Second(&mut the_match), &rule.pattern, code)?;
+        the_match.depth = sema.ancestors_with_macros(the_match.matched_node.clone()).count();
         Ok(the_match)
     }
 
diff --git a/crates/ra_ssr/src/nester.rs b/crates/ra_ssr/src/nester.rs
new file mode 100644
index 000000000..b3e20579b
--- /dev/null
+++ b/crates/ra_ssr/src/nester.rs
@@ -0,0 +1,98 @@
+//! Converts a flat collection of matches into a nested form suitable for replacement. When there
+//! are multiple matches for a node, or that overlap, priority is given to the earlier rule. Nested
+//! matches are only permitted if the inner match is contained entirely within a placeholder of an
+//! outer match.
+//!
+//! For example, if our search pattern is `foo(foo($a))` and the code had `foo(foo(foo(foo(42))))`,
+//! then we'll get 3 matches, however only the outermost and innermost matches can be accepted. The
+//! middle match would take the second `foo` from the outer match.
+
+use crate::{Match, SsrMatches};
+use ra_syntax::SyntaxNode;
+use rustc_hash::FxHashMap;
+
+pub(crate) fn nest_and_remove_collisions(
+    mut matches: Vec<Match>,
+    sema: &hir::Semantics<ra_ide_db::RootDatabase>,
+) -> SsrMatches {
+    // We sort the matches by depth then by rule index. Sorting by depth means that by the time we
+    // see a match, any parent matches or conflicting matches will have already been seen. Sorting
+    // by rule_index means that if there are two matches for the same node, the rule added first
+    // will take precedence.
+    matches.sort_by(|a, b| a.depth.cmp(&b.depth).then_with(|| a.rule_index.cmp(&b.rule_index)));
+    let mut collector = MatchCollector::default();
+    for m in matches {
+        collector.add_match(m, sema);
+    }
+    collector.into()
+}
+
+#[derive(Default)]
+struct MatchCollector {
+    matches_by_node: FxHashMap<SyntaxNode, Match>,
+}
+
+impl MatchCollector {
+    /// Attempts to add `m` to matches. If it conflicts with an existing match, it is discarded. If
+    /// it is entirely within the a placeholder of an existing match, then it is added as a child
+    /// match of the existing match.
+    fn add_match(&mut self, m: Match, sema: &hir::Semantics<ra_ide_db::RootDatabase>) {
+        let matched_node = m.matched_node.clone();
+        if let Some(existing) = self.matches_by_node.get_mut(&matched_node) {
+            try_add_sub_match(m, existing, sema);
+            return;
+        }
+        for ancestor in sema.ancestors_with_macros(m.matched_node.clone()) {
+            if let Some(existing) = self.matches_by_node.get_mut(&ancestor) {
+                try_add_sub_match(m, existing, sema);
+                return;
+            }
+        }
+        self.matches_by_node.insert(matched_node, m);
+    }
+}
+
+/// Attempts to add `m` as a sub-match of `existing`.
+fn try_add_sub_match(
+    m: Match,
+    existing: &mut Match,
+    sema: &hir::Semantics<ra_ide_db::RootDatabase>,
+) {
+    for p in existing.placeholder_values.values_mut() {
+        // Note, no need to check if p.range.file is equal to m.range.file, since we
+        // already know we're within `existing`.
+        if p.range.range.contains_range(m.range.range) {
+            // Convert the inner matches in `p` into a temporary MatchCollector. When
+            // we're done, we then convert it back into an SsrMatches. If we expected
+            // lots of inner matches, it might be worthwhile keeping a MatchCollector
+            // around for each placeholder match. However we expect most placeholder
+            // will have 0 and a few will have 1. More than that should hopefully be
+            // exceptional.
+            let mut collector = MatchCollector::default();
+            for m in std::mem::replace(&mut p.inner_matches.matches, Vec::new()) {
+                collector.matches_by_node.insert(m.matched_node.clone(), m);
+            }
+            collector.add_match(m, sema);
+            p.inner_matches = collector.into();
+            break;
+        }
+    }
+}
+
+impl From<MatchCollector> for SsrMatches {
+    fn from(mut match_collector: MatchCollector) -> Self {
+        let mut matches = SsrMatches::default();
+        for (_, m) in match_collector.matches_by_node.drain() {
+            matches.matches.push(m);
+        }
+        matches.matches.sort_by(|a, b| {
+            // Order matches by file_id then by start range. This should be sufficient since ranges
+            // shouldn't be overlapping.
+            a.range
+                .file_id
+                .cmp(&b.range.file_id)
+                .then_with(|| a.range.range.start().cmp(&b.range.range.start()))
+        });
+        matches
+    }
+}
diff --git a/crates/ra_ssr/src/search.rs b/crates/ra_ssr/src/search.rs
index ec3addcf8..a28e9f341 100644
--- a/crates/ra_ssr/src/search.rs
+++ b/crates/ra_ssr/src/search.rs
@@ -1,17 +1,20 @@
 //! Searching for matches.
 
-use crate::{matching, Match, MatchFinder};
+use crate::{matching, parsing::ParsedRule, Match, MatchFinder};
 use ra_db::FileRange;
 use ra_syntax::{ast, AstNode, SyntaxNode};
 
 impl<'db> MatchFinder<'db> {
-    pub(crate) fn find_all_matches(&self, matches_out: &mut Vec<Match>) {
+    /// Adds all matches for `rule` to `matches_out`. Matches may overlap in ways that make
+    /// replacement impossible, so further processing is required in order to properly nest matches
+    /// and remove overlapping matches. This is done in the `nesting` module.
+    pub(crate) fn find_matches_for_rule(&self, rule: &ParsedRule, matches_out: &mut Vec<Match>) {
         // FIXME: Use resolved paths in the pattern to find places to search instead of always
         // scanning every node.
-        self.slow_scan(matches_out);
+        self.slow_scan(rule, matches_out);
     }
 
-    fn slow_scan(&self, matches_out: &mut Vec<Match>) {
+    fn slow_scan(&self, rule: &ParsedRule, matches_out: &mut Vec<Match>) {
         use ra_db::SourceDatabaseExt;
         use ra_ide_db::symbol_index::SymbolsDatabase;
         for &root in self.sema.db.local_roots().iter() {
@@ -19,7 +22,7 @@ impl<'db> MatchFinder<'db> {
             for file_id in sr.iter() {
                 let file = self.sema.parse(file_id);
                 let code = file.syntax();
-                self.slow_scan_node(code, &None, matches_out);
+                self.slow_scan_node(code, rule, &None, matches_out);
             }
         }
     }
@@ -27,28 +30,12 @@ impl<'db> MatchFinder<'db> {
     fn slow_scan_node(
         &self,
         code: &SyntaxNode,
+        rule: &ParsedRule,
         restrict_range: &Option<FileRange>,
         matches_out: &mut Vec<Match>,
     ) {
-        for rule in &self.rules {
-            if let Ok(mut m) = matching::get_match(false, rule, &code, restrict_range, &self.sema) {
-                // Continue searching in each of our placeholders.
-                for placeholder_value in m.placeholder_values.values_mut() {
-                    if let Some(placeholder_node) = &placeholder_value.node {
-                        // Don't search our placeholder if it's the entire matched node, otherwise we'd
-                        // find the same match over and over until we got a stack overflow.
-                        if placeholder_node != code {
-                            self.slow_scan_node(
-                                placeholder_node,
-                                restrict_range,
-                                &mut placeholder_value.inner_matches.matches,
-                            );
-                        }
-                    }
-                }
-                matches_out.push(m);
-                return;
-            }
+        if let Ok(m) = matching::get_match(false, rule, &code, restrict_range, &self.sema) {
+            matches_out.push(m);
         }
         // If we've got a macro call, we already tried matching it pre-expansion, which is the only
         // way to match the whole macro, now try expanding it and matching the expansion.
@@ -60,6 +47,7 @@ impl<'db> MatchFinder<'db> {
                     // i.e. we don't want to match something that came from the macro itself.
                     self.slow_scan_node(
                         &expanded,
+                        rule,
                         &Some(self.sema.original_range(tt.syntax())),
                         matches_out,
                     );
@@ -67,7 +55,7 @@ impl<'db> MatchFinder<'db> {
             }
         }
         for child in code.children() {
-            self.slow_scan_node(&child, restrict_range, matches_out);
+            self.slow_scan_node(&child, rule, restrict_range, matches_out);
         }
     }
 }
-- 
cgit v1.2.3