diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2020-06-22 12:50:34 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2020-06-22 12:50:34 +0100 |
commit | d144d69d2eded43a59c8edb59419b1b9e85c10a5 (patch) | |
tree | 0d52bdbb15723d25b7d3fff9ad25274c72e43434 /crates/ra_ssr/src/matching.rs | |
parent | 19701b39ac232b023ff9ab077a33c743df96d178 (diff) | |
parent | 662ab2ecc8e29eb5995b3c162fac869838bea9a2 (diff) |
Merge #4921
4921: Allow SSR to match type references, items, paths and patterns r=davidlattimore a=davidlattimore
Part of #3186
Co-authored-by: David Lattimore <[email protected]>
Diffstat (limited to 'crates/ra_ssr/src/matching.rs')
-rw-r--r-- | crates/ra_ssr/src/matching.rs | 494 |
1 files changed, 494 insertions, 0 deletions
diff --git a/crates/ra_ssr/src/matching.rs b/crates/ra_ssr/src/matching.rs new file mode 100644 index 000000000..265b6d793 --- /dev/null +++ b/crates/ra_ssr/src/matching.rs | |||
@@ -0,0 +1,494 @@ | |||
1 | //! This module is responsible for matching a search pattern against a node in the AST. In the | ||
2 | //! process of matching, placeholder values are recorded. | ||
3 | |||
4 | use crate::{ | ||
5 | parsing::{Placeholder, SsrTemplate}, | ||
6 | SsrMatches, SsrPattern, SsrRule, | ||
7 | }; | ||
8 | use hir::Semantics; | ||
9 | use ra_db::FileRange; | ||
10 | use ra_syntax::ast::{AstNode, AstToken}; | ||
11 | use ra_syntax::{ | ||
12 | ast, SyntaxElement, SyntaxElementChildren, SyntaxKind, SyntaxNode, SyntaxToken, TextRange, | ||
13 | }; | ||
14 | use rustc_hash::FxHashMap; | ||
15 | use std::{cell::Cell, iter::Peekable}; | ||
16 | |||
17 | // Creates a match error. If we're currently attempting to match some code that we thought we were | ||
18 | // going to match, as indicated by the --debug-snippet flag, then populate the reason field. | ||
19 | macro_rules! match_error { | ||
20 | ($e:expr) => {{ | ||
21 | MatchFailed { | ||
22 | reason: if recording_match_fail_reasons() { | ||
23 | Some(format!("{}", $e)) | ||
24 | } else { | ||
25 | None | ||
26 | } | ||
27 | } | ||
28 | }}; | ||
29 | ($fmt:expr, $($arg:tt)+) => {{ | ||
30 | MatchFailed { | ||
31 | reason: if recording_match_fail_reasons() { | ||
32 | Some(format!($fmt, $($arg)+)) | ||
33 | } else { | ||
34 | None | ||
35 | } | ||
36 | } | ||
37 | }}; | ||
38 | } | ||
39 | |||
40 | // Fails the current match attempt, recording the supplied reason if we're recording match fail reasons. | ||
41 | macro_rules! fail_match { | ||
42 | ($($args:tt)*) => {return Err(match_error!($($args)*))}; | ||
43 | } | ||
44 | |||
45 | /// Information about a match that was found. | ||
46 | #[derive(Debug)] | ||
47 | pub(crate) struct Match { | ||
48 | pub(crate) range: TextRange, | ||
49 | pub(crate) matched_node: SyntaxNode, | ||
50 | pub(crate) placeholder_values: FxHashMap<Var, PlaceholderMatch>, | ||
51 | pub(crate) ignored_comments: Vec<ast::Comment>, | ||
52 | // A copy of the template for the rule that produced this match. We store this on the match for | ||
53 | // if/when we do replacement. | ||
54 | pub(crate) template: SsrTemplate, | ||
55 | } | ||
56 | |||
57 | /// Represents a `$var` in an SSR query. | ||
58 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
59 | pub(crate) struct Var(pub String); | ||
60 | |||
61 | /// Information about a placeholder bound in a match. | ||
62 | #[derive(Debug)] | ||
63 | pub(crate) struct PlaceholderMatch { | ||
64 | /// The node that the placeholder matched to. | ||
65 | pub(crate) node: SyntaxNode, | ||
66 | pub(crate) range: FileRange, | ||
67 | /// More matches, found within `node`. | ||
68 | pub(crate) inner_matches: SsrMatches, | ||
69 | } | ||
70 | |||
71 | #[derive(Debug)] | ||
72 | pub(crate) struct MatchFailureReason { | ||
73 | pub(crate) reason: String, | ||
74 | } | ||
75 | |||
76 | /// An "error" indicating that matching failed. Use the fail_match! macro to create and return this. | ||
77 | #[derive(Clone)] | ||
78 | pub(crate) struct MatchFailed { | ||
79 | /// The reason why we failed to match. Only present when debug_active true in call to | ||
80 | /// `get_match`. | ||
81 | pub(crate) reason: Option<String>, | ||
82 | } | ||
83 | |||
84 | /// Checks if `code` matches the search pattern found in `search_scope`, returning information about | ||
85 | /// the match, if it does. Since we only do matching in this module and searching is done by the | ||
86 | /// parent module, we don't populate nested matches. | ||
87 | pub(crate) fn get_match( | ||
88 | debug_active: bool, | ||
89 | rule: &SsrRule, | ||
90 | code: &SyntaxNode, | ||
91 | restrict_range: &Option<FileRange>, | ||
92 | sema: &Semantics<ra_ide_db::RootDatabase>, | ||
93 | ) -> Result<Match, MatchFailed> { | ||
94 | record_match_fails_reasons_scope(debug_active, || { | ||
95 | MatchState::try_match(rule, code, restrict_range, sema) | ||
96 | }) | ||
97 | } | ||
98 | |||
99 | /// Inputs to matching. This cannot be part of `MatchState`, since we mutate `MatchState` and in at | ||
100 | /// least one case need to hold a borrow of a placeholder from the input pattern while calling a | ||
101 | /// mutable `MatchState` method. | ||
102 | struct MatchInputs<'pattern> { | ||
103 | ssr_pattern: &'pattern SsrPattern, | ||
104 | } | ||
105 | |||
106 | /// State used while attempting to match our search pattern against a particular node of the AST. | ||
107 | struct MatchState<'db, 'sema> { | ||
108 | sema: &'sema Semantics<'db, ra_ide_db::RootDatabase>, | ||
109 | /// If any placeholders come from anywhere outside of this range, then the match will be | ||
110 | /// rejected. | ||
111 | restrict_range: Option<FileRange>, | ||
112 | /// The match that we're building. We do two passes for a successful match. On the first pass, | ||
113 | /// this is None so that we can avoid doing things like storing copies of what placeholders | ||
114 | /// matched to. If that pass succeeds, then we do a second pass where we collect those details. | ||
115 | /// This means that if we have a pattern like `$a.foo()` we won't do an insert into the | ||
116 | /// placeholders map for every single method call in the codebase. Instead we'll discard all the | ||
117 | /// method calls that aren't calls to `foo` on the first pass and only insert into the | ||
118 | /// placeholders map on the second pass. Likewise for ignored comments. | ||
119 | match_out: Option<Match>, | ||
120 | } | ||
121 | |||
122 | impl<'db, 'sema> MatchState<'db, 'sema> { | ||
123 | fn try_match( | ||
124 | rule: &SsrRule, | ||
125 | code: &SyntaxNode, | ||
126 | restrict_range: &Option<FileRange>, | ||
127 | sema: &'sema Semantics<'db, ra_ide_db::RootDatabase>, | ||
128 | ) -> Result<Match, MatchFailed> { | ||
129 | let mut match_state = | ||
130 | MatchState { sema, restrict_range: restrict_range.clone(), match_out: None }; | ||
131 | let match_inputs = MatchInputs { ssr_pattern: &rule.pattern }; | ||
132 | let pattern_tree = rule.pattern.tree_for_kind(code.kind())?; | ||
133 | // First pass at matching, where we check that node types and idents match. | ||
134 | match_state.attempt_match_node(&match_inputs, &pattern_tree, code)?; | ||
135 | match_state.validate_range(&sema.original_range(code))?; | ||
136 | match_state.match_out = Some(Match { | ||
137 | range: sema.original_range(code).range, | ||
138 | matched_node: code.clone(), | ||
139 | placeholder_values: FxHashMap::default(), | ||
140 | ignored_comments: Vec::new(), | ||
141 | template: rule.template.clone(), | ||
142 | }); | ||
143 | // Second matching pass, where we record placeholder matches, ignored comments and maybe do | ||
144 | // any other more expensive checks that we didn't want to do on the first pass. | ||
145 | match_state.attempt_match_node(&match_inputs, &pattern_tree, code)?; | ||
146 | Ok(match_state.match_out.unwrap()) | ||
147 | } | ||
148 | |||
149 | /// Checks that `range` is within the permitted range if any. This is applicable when we're | ||
150 | /// processing a macro expansion and we want to fail the match if we're working with a node that | ||
151 | /// didn't originate from the token tree of the macro call. | ||
152 | fn validate_range(&self, range: &FileRange) -> Result<(), MatchFailed> { | ||
153 | if let Some(restrict_range) = &self.restrict_range { | ||
154 | if restrict_range.file_id != range.file_id | ||
155 | || !restrict_range.range.contains_range(range.range) | ||
156 | { | ||
157 | fail_match!("Node originated from a macro"); | ||
158 | } | ||
159 | } | ||
160 | Ok(()) | ||
161 | } | ||
162 | |||
163 | fn attempt_match_node( | ||
164 | &mut self, | ||
165 | match_inputs: &MatchInputs, | ||
166 | pattern: &SyntaxNode, | ||
167 | code: &SyntaxNode, | ||
168 | ) -> Result<(), MatchFailed> { | ||
169 | // Handle placeholders. | ||
170 | if let Some(placeholder) = | ||
171 | match_inputs.get_placeholder(&SyntaxElement::Node(pattern.clone())) | ||
172 | { | ||
173 | if self.match_out.is_none() { | ||
174 | return Ok(()); | ||
175 | } | ||
176 | let original_range = self.sema.original_range(code); | ||
177 | // We validated the range for the node when we started the match, so the placeholder | ||
178 | // probably can't fail range validation, but just to be safe... | ||
179 | self.validate_range(&original_range)?; | ||
180 | if let Some(match_out) = &mut self.match_out { | ||
181 | match_out.placeholder_values.insert( | ||
182 | Var(placeholder.ident.to_string()), | ||
183 | PlaceholderMatch::new(code, original_range), | ||
184 | ); | ||
185 | } | ||
186 | return Ok(()); | ||
187 | } | ||
188 | // Non-placeholders. | ||
189 | if pattern.kind() != code.kind() { | ||
190 | fail_match!("Pattern had a {:?}, code had {:?}", pattern.kind(), code.kind()); | ||
191 | } | ||
192 | // Some kinds of nodes have special handling. For everything else, we fall back to default | ||
193 | // matching. | ||
194 | match code.kind() { | ||
195 | SyntaxKind::RECORD_FIELD_LIST => { | ||
196 | self.attempt_match_record_field_list(match_inputs, pattern, code) | ||
197 | } | ||
198 | _ => self.attempt_match_node_children(match_inputs, pattern, code), | ||
199 | } | ||
200 | } | ||
201 | |||
202 | fn attempt_match_node_children( | ||
203 | &mut self, | ||
204 | match_inputs: &MatchInputs, | ||
205 | pattern: &SyntaxNode, | ||
206 | code: &SyntaxNode, | ||
207 | ) -> Result<(), MatchFailed> { | ||
208 | self.attempt_match_sequences( | ||
209 | match_inputs, | ||
210 | PatternIterator::new(pattern), | ||
211 | code.children_with_tokens(), | ||
212 | ) | ||
213 | } | ||
214 | |||
215 | fn attempt_match_sequences( | ||
216 | &mut self, | ||
217 | match_inputs: &MatchInputs, | ||
218 | pattern_it: PatternIterator, | ||
219 | mut code_it: SyntaxElementChildren, | ||
220 | ) -> Result<(), MatchFailed> { | ||
221 | let mut pattern_it = pattern_it.peekable(); | ||
222 | loop { | ||
223 | match self.next_non_trivial(&mut code_it) { | ||
224 | None => { | ||
225 | if let Some(p) = pattern_it.next() { | ||
226 | fail_match!("Part of the pattern was unmached: {:?}", p); | ||
227 | } | ||
228 | return Ok(()); | ||
229 | } | ||
230 | Some(SyntaxElement::Token(c)) => { | ||
231 | self.attempt_match_token(&mut pattern_it, &c)?; | ||
232 | } | ||
233 | Some(SyntaxElement::Node(c)) => match pattern_it.next() { | ||
234 | Some(SyntaxElement::Node(p)) => { | ||
235 | self.attempt_match_node(match_inputs, &p, &c)?; | ||
236 | } | ||
237 | Some(p) => fail_match!("Pattern wanted '{}', code has {}", p, c.text()), | ||
238 | None => fail_match!("Pattern reached end, code has {}", c.text()), | ||
239 | }, | ||
240 | } | ||
241 | } | ||
242 | } | ||
243 | |||
244 | fn attempt_match_token( | ||
245 | &mut self, | ||
246 | pattern: &mut Peekable<PatternIterator>, | ||
247 | code: &ra_syntax::SyntaxToken, | ||
248 | ) -> Result<(), MatchFailed> { | ||
249 | self.record_ignored_comments(code); | ||
250 | // Ignore whitespace and comments. | ||
251 | if code.kind().is_trivia() { | ||
252 | return Ok(()); | ||
253 | } | ||
254 | if let Some(SyntaxElement::Token(p)) = pattern.peek() { | ||
255 | // If the code has a comma and the pattern is about to close something, then accept the | ||
256 | // comma without advancing the pattern. i.e. ignore trailing commas. | ||
257 | if code.kind() == SyntaxKind::COMMA && is_closing_token(p.kind()) { | ||
258 | return Ok(()); | ||
259 | } | ||
260 | // Conversely, if the pattern has a comma and the code doesn't, skip that part of the | ||
261 | // pattern and continue to match the code. | ||
262 | if p.kind() == SyntaxKind::COMMA && is_closing_token(code.kind()) { | ||
263 | pattern.next(); | ||
264 | } | ||
265 | } | ||
266 | // Consume an element from the pattern and make sure it matches. | ||
267 | match pattern.next() { | ||
268 | Some(SyntaxElement::Token(p)) => { | ||
269 | if p.kind() != code.kind() || p.text() != code.text() { | ||
270 | fail_match!( | ||
271 | "Pattern wanted token '{}' ({:?}), but code had token '{}' ({:?})", | ||
272 | p.text(), | ||
273 | p.kind(), | ||
274 | code.text(), | ||
275 | code.kind() | ||
276 | ) | ||
277 | } | ||
278 | } | ||
279 | Some(SyntaxElement::Node(p)) => { | ||
280 | // Not sure if this is actually reachable. | ||
281 | fail_match!( | ||
282 | "Pattern wanted {:?}, but code had token '{}' ({:?})", | ||
283 | p, | ||
284 | code.text(), | ||
285 | code.kind() | ||
286 | ); | ||
287 | } | ||
288 | None => { | ||
289 | fail_match!("Pattern exhausted, while code remains: `{}`", code.text()); | ||
290 | } | ||
291 | } | ||
292 | Ok(()) | ||
293 | } | ||
294 | |||
295 | /// We want to allow the records to match in any order, so we have special matching logic for | ||
296 | /// them. | ||
297 | fn attempt_match_record_field_list( | ||
298 | &mut self, | ||
299 | match_inputs: &MatchInputs, | ||
300 | pattern: &SyntaxNode, | ||
301 | code: &SyntaxNode, | ||
302 | ) -> Result<(), MatchFailed> { | ||
303 | // Build a map keyed by field name. | ||
304 | let mut fields_by_name = FxHashMap::default(); | ||
305 | for child in code.children() { | ||
306 | if let Some(record) = ast::RecordField::cast(child.clone()) { | ||
307 | if let Some(name) = record.field_name() { | ||
308 | fields_by_name.insert(name.text().clone(), child.clone()); | ||
309 | } | ||
310 | } | ||
311 | } | ||
312 | for p in pattern.children_with_tokens() { | ||
313 | if let SyntaxElement::Node(p) = p { | ||
314 | if let Some(name_element) = p.first_child_or_token() { | ||
315 | if match_inputs.get_placeholder(&name_element).is_some() { | ||
316 | // If the pattern is using placeholders for field names then order | ||
317 | // independence doesn't make sense. Fall back to regular ordered | ||
318 | // matching. | ||
319 | return self.attempt_match_node_children(match_inputs, pattern, code); | ||
320 | } | ||
321 | if let Some(ident) = only_ident(name_element) { | ||
322 | let code_record = fields_by_name.remove(ident.text()).ok_or_else(|| { | ||
323 | match_error!( | ||
324 | "Placeholder has record field '{}', but code doesn't", | ||
325 | ident | ||
326 | ) | ||
327 | })?; | ||
328 | self.attempt_match_node(match_inputs, &p, &code_record)?; | ||
329 | } | ||
330 | } | ||
331 | } | ||
332 | } | ||
333 | if let Some(unmatched_fields) = fields_by_name.keys().next() { | ||
334 | fail_match!( | ||
335 | "{} field(s) of a record literal failed to match, starting with {}", | ||
336 | fields_by_name.len(), | ||
337 | unmatched_fields | ||
338 | ); | ||
339 | } | ||
340 | Ok(()) | ||
341 | } | ||
342 | |||
343 | fn next_non_trivial(&mut self, code_it: &mut SyntaxElementChildren) -> Option<SyntaxElement> { | ||
344 | loop { | ||
345 | let c = code_it.next(); | ||
346 | if let Some(SyntaxElement::Token(t)) = &c { | ||
347 | self.record_ignored_comments(t); | ||
348 | if t.kind().is_trivia() { | ||
349 | continue; | ||
350 | } | ||
351 | } | ||
352 | return c; | ||
353 | } | ||
354 | } | ||
355 | |||
356 | fn record_ignored_comments(&mut self, token: &SyntaxToken) { | ||
357 | if token.kind() == SyntaxKind::COMMENT { | ||
358 | if let Some(match_out) = &mut self.match_out { | ||
359 | if let Some(comment) = ast::Comment::cast(token.clone()) { | ||
360 | match_out.ignored_comments.push(comment); | ||
361 | } | ||
362 | } | ||
363 | } | ||
364 | } | ||
365 | } | ||
366 | |||
367 | impl MatchInputs<'_> { | ||
368 | fn get_placeholder(&self, element: &SyntaxElement) -> Option<&Placeholder> { | ||
369 | only_ident(element.clone()) | ||
370 | .and_then(|ident| self.ssr_pattern.placeholders_by_stand_in.get(ident.text())) | ||
371 | } | ||
372 | } | ||
373 | |||
374 | fn is_closing_token(kind: SyntaxKind) -> bool { | ||
375 | kind == SyntaxKind::R_PAREN || kind == SyntaxKind::R_CURLY || kind == SyntaxKind::R_BRACK | ||
376 | } | ||
377 | |||
378 | pub(crate) fn record_match_fails_reasons_scope<F, T>(debug_active: bool, f: F) -> T | ||
379 | where | ||
380 | F: Fn() -> T, | ||
381 | { | ||
382 | RECORDING_MATCH_FAIL_REASONS.with(|c| c.set(debug_active)); | ||
383 | let res = f(); | ||
384 | RECORDING_MATCH_FAIL_REASONS.with(|c| c.set(false)); | ||
385 | res | ||
386 | } | ||
387 | |||
388 | // For performance reasons, we don't want to record the reason why every match fails, only the bit | ||
389 | // of code that the user indicated they thought would match. We use a thread local to indicate when | ||
390 | // we are trying to match that bit of code. This saves us having to pass a boolean into all the bits | ||
391 | // of code that can make the decision to not match. | ||
392 | thread_local! { | ||
393 | pub static RECORDING_MATCH_FAIL_REASONS: Cell<bool> = Cell::new(false); | ||
394 | } | ||
395 | |||
396 | fn recording_match_fail_reasons() -> bool { | ||
397 | RECORDING_MATCH_FAIL_REASONS.with(|c| c.get()) | ||
398 | } | ||
399 | |||
400 | impl PlaceholderMatch { | ||
401 | fn new(node: &SyntaxNode, range: FileRange) -> Self { | ||
402 | Self { node: node.clone(), range, inner_matches: SsrMatches::default() } | ||
403 | } | ||
404 | } | ||
405 | |||
406 | impl SsrPattern { | ||
407 | pub(crate) fn tree_for_kind(&self, kind: SyntaxKind) -> Result<&SyntaxNode, MatchFailed> { | ||
408 | let (tree, kind_name) = if ast::Expr::can_cast(kind) { | ||
409 | (&self.expr, "expression") | ||
410 | } else if ast::TypeRef::can_cast(kind) { | ||
411 | (&self.type_ref, "type reference") | ||
412 | } else if ast::ModuleItem::can_cast(kind) { | ||
413 | (&self.item, "item") | ||
414 | } else if ast::Path::can_cast(kind) { | ||
415 | (&self.path, "path") | ||
416 | } else if ast::Pat::can_cast(kind) { | ||
417 | (&self.pattern, "pattern") | ||
418 | } else { | ||
419 | fail_match!("Matching nodes of kind {:?} is not supported", kind); | ||
420 | }; | ||
421 | match tree { | ||
422 | Some(tree) => Ok(tree), | ||
423 | None => fail_match!("Pattern cannot be parsed as a {}", kind_name), | ||
424 | } | ||
425 | } | ||
426 | } | ||
427 | |||
428 | // If `node` contains nothing but an ident then return it, otherwise return None. | ||
429 | fn only_ident(element: SyntaxElement) -> Option<SyntaxToken> { | ||
430 | match element { | ||
431 | SyntaxElement::Token(t) => { | ||
432 | if t.kind() == SyntaxKind::IDENT { | ||
433 | return Some(t); | ||
434 | } | ||
435 | } | ||
436 | SyntaxElement::Node(n) => { | ||
437 | let mut children = n.children_with_tokens(); | ||
438 | if let (Some(only_child), None) = (children.next(), children.next()) { | ||
439 | return only_ident(only_child); | ||
440 | } | ||
441 | } | ||
442 | } | ||
443 | None | ||
444 | } | ||
445 | |||
446 | struct PatternIterator { | ||
447 | iter: SyntaxElementChildren, | ||
448 | } | ||
449 | |||
450 | impl Iterator for PatternIterator { | ||
451 | type Item = SyntaxElement; | ||
452 | |||
453 | fn next(&mut self) -> Option<SyntaxElement> { | ||
454 | while let Some(element) = self.iter.next() { | ||
455 | if !element.kind().is_trivia() { | ||
456 | return Some(element); | ||
457 | } | ||
458 | } | ||
459 | None | ||
460 | } | ||
461 | } | ||
462 | |||
463 | impl PatternIterator { | ||
464 | fn new(parent: &SyntaxNode) -> Self { | ||
465 | Self { iter: parent.children_with_tokens() } | ||
466 | } | ||
467 | } | ||
468 | |||
469 | #[cfg(test)] | ||
470 | mod tests { | ||
471 | use super::*; | ||
472 | use crate::MatchFinder; | ||
473 | |||
474 | #[test] | ||
475 | fn parse_match_replace() { | ||
476 | let rule: SsrRule = "foo($x) ==>> bar($x)".parse().unwrap(); | ||
477 | let input = "fn main() { foo(1+2); }"; | ||
478 | |||
479 | use ra_db::fixture::WithFixture; | ||
480 | let (db, file_id) = ra_ide_db::RootDatabase::with_single_file(input); | ||
481 | let mut match_finder = MatchFinder::new(&db); | ||
482 | match_finder.add_rule(rule); | ||
483 | let matches = match_finder.find_matches_in_file(file_id); | ||
484 | assert_eq!(matches.matches.len(), 1); | ||
485 | assert_eq!(matches.matches[0].matched_node.text(), "foo(1+2)"); | ||
486 | assert_eq!(matches.matches[0].placeholder_values.len(), 1); | ||
487 | assert_eq!(matches.matches[0].placeholder_values[&Var("x".to_string())].node.text(), "1+2"); | ||
488 | |||
489 | let edit = crate::replacing::matches_to_edit(&matches); | ||
490 | let mut after = input.to_string(); | ||
491 | edit.apply(&mut after); | ||
492 | assert_eq!(after, "fn main() { bar(1+2); }"); | ||
493 | } | ||
494 | } | ||