diff options
Diffstat (limited to 'crates/ssr/src/parsing.rs')
-rw-r--r-- | crates/ssr/src/parsing.rs | 389 |
1 files changed, 389 insertions, 0 deletions
diff --git a/crates/ssr/src/parsing.rs b/crates/ssr/src/parsing.rs new file mode 100644 index 000000000..9570e96e3 --- /dev/null +++ b/crates/ssr/src/parsing.rs | |||
@@ -0,0 +1,389 @@ | |||
1 | //! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`. | ||
2 | //! We first split everything before and after the separator `==>>`. Next, both the search pattern | ||
3 | //! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for | ||
4 | //! placeholders, which start with `$`. For replacement templates, this is the final form. For | ||
5 | //! search patterns, we go further and parse the pattern as each kind of thing that we can match. | ||
6 | //! e.g. expressions, type references etc. | ||
7 | |||
8 | use crate::errors::bail; | ||
9 | use crate::{SsrError, SsrPattern, SsrRule}; | ||
10 | use rustc_hash::{FxHashMap, FxHashSet}; | ||
11 | use std::str::FromStr; | ||
12 | use syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, T}; | ||
13 | use test_utils::mark; | ||
14 | |||
15 | #[derive(Debug)] | ||
16 | pub(crate) struct ParsedRule { | ||
17 | pub(crate) placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>, | ||
18 | pub(crate) pattern: SyntaxNode, | ||
19 | pub(crate) template: Option<SyntaxNode>, | ||
20 | } | ||
21 | |||
22 | #[derive(Debug)] | ||
23 | pub(crate) struct RawPattern { | ||
24 | tokens: Vec<PatternElement>, | ||
25 | } | ||
26 | |||
27 | // Part of a search or replace pattern. | ||
28 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
29 | pub(crate) enum PatternElement { | ||
30 | Token(Token), | ||
31 | Placeholder(Placeholder), | ||
32 | } | ||
33 | |||
34 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
35 | pub(crate) struct Placeholder { | ||
36 | /// The name of this placeholder. e.g. for "$a", this would be "a" | ||
37 | pub(crate) ident: SmolStr, | ||
38 | /// A unique name used in place of this placeholder when we parse the pattern as Rust code. | ||
39 | stand_in_name: String, | ||
40 | pub(crate) constraints: Vec<Constraint>, | ||
41 | } | ||
42 | |||
43 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
44 | pub(crate) enum Constraint { | ||
45 | Kind(NodeKind), | ||
46 | Not(Box<Constraint>), | ||
47 | } | ||
48 | |||
49 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
50 | pub(crate) enum NodeKind { | ||
51 | Literal, | ||
52 | } | ||
53 | |||
54 | #[derive(Debug, Clone, PartialEq, Eq)] | ||
55 | pub(crate) struct Token { | ||
56 | kind: SyntaxKind, | ||
57 | pub(crate) text: SmolStr, | ||
58 | } | ||
59 | |||
60 | impl ParsedRule { | ||
61 | fn new( | ||
62 | pattern: &RawPattern, | ||
63 | template: Option<&RawPattern>, | ||
64 | ) -> Result<Vec<ParsedRule>, SsrError> { | ||
65 | let raw_pattern = pattern.as_rust_code(); | ||
66 | let raw_template = template.map(|t| t.as_rust_code()); | ||
67 | let raw_template = raw_template.as_ref().map(|s| s.as_str()); | ||
68 | let mut builder = RuleBuilder { | ||
69 | placeholders_by_stand_in: pattern.placeholders_by_stand_in(), | ||
70 | rules: Vec::new(), | ||
71 | }; | ||
72 | builder.try_add(ast::Expr::parse(&raw_pattern), raw_template.map(ast::Expr::parse)); | ||
73 | builder.try_add(ast::Type::parse(&raw_pattern), raw_template.map(ast::Type::parse)); | ||
74 | builder.try_add(ast::Item::parse(&raw_pattern), raw_template.map(ast::Item::parse)); | ||
75 | builder.try_add(ast::Path::parse(&raw_pattern), raw_template.map(ast::Path::parse)); | ||
76 | builder.try_add(ast::Pat::parse(&raw_pattern), raw_template.map(ast::Pat::parse)); | ||
77 | builder.build() | ||
78 | } | ||
79 | } | ||
80 | |||
81 | struct RuleBuilder { | ||
82 | placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>, | ||
83 | rules: Vec<ParsedRule>, | ||
84 | } | ||
85 | |||
86 | impl RuleBuilder { | ||
87 | fn try_add<T: AstNode>(&mut self, pattern: Result<T, ()>, template: Option<Result<T, ()>>) { | ||
88 | match (pattern, template) { | ||
89 | (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule { | ||
90 | placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), | ||
91 | pattern: pattern.syntax().clone(), | ||
92 | template: Some(template.syntax().clone()), | ||
93 | }), | ||
94 | (Ok(pattern), None) => self.rules.push(ParsedRule { | ||
95 | placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), | ||
96 | pattern: pattern.syntax().clone(), | ||
97 | template: None, | ||
98 | }), | ||
99 | _ => {} | ||
100 | } | ||
101 | } | ||
102 | |||
103 | fn build(mut self) -> Result<Vec<ParsedRule>, SsrError> { | ||
104 | if self.rules.is_empty() { | ||
105 | bail!("Not a valid Rust expression, type, item, path or pattern"); | ||
106 | } | ||
107 | // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a | ||
108 | // mix leads to strange semantics, since the path-based rules only match things where the | ||
109 | // path refers to semantically the same thing, whereas the non-path-based rules could match | ||
110 | // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the | ||
111 | // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a | ||
112 | // pattern (IDENT_PAT -> NAME -> IDENT). Allowing such a rule through would result in | ||
113 | // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd | ||
114 | // have to use the slow-scan search mechanism. | ||
115 | if self.rules.iter().any(|rule| contains_path(&rule.pattern)) { | ||
116 | let old_len = self.rules.len(); | ||
117 | self.rules.retain(|rule| contains_path(&rule.pattern)); | ||
118 | if self.rules.len() < old_len { | ||
119 | mark::hit!(pattern_is_a_single_segment_path); | ||
120 | } | ||
121 | } | ||
122 | Ok(self.rules) | ||
123 | } | ||
124 | } | ||
125 | |||
126 | /// Returns whether there are any paths in `node`. | ||
127 | fn contains_path(node: &SyntaxNode) -> bool { | ||
128 | node.kind() == SyntaxKind::PATH | ||
129 | || node.descendants().any(|node| node.kind() == SyntaxKind::PATH) | ||
130 | } | ||
131 | |||
132 | impl FromStr for SsrRule { | ||
133 | type Err = SsrError; | ||
134 | |||
135 | fn from_str(query: &str) -> Result<SsrRule, SsrError> { | ||
136 | let mut it = query.split("==>>"); | ||
137 | let pattern = it.next().expect("at least empty string").trim(); | ||
138 | let template = it | ||
139 | .next() | ||
140 | .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))? | ||
141 | .trim() | ||
142 | .to_string(); | ||
143 | if it.next().is_some() { | ||
144 | return Err(SsrError("More than one delimiter found".into())); | ||
145 | } | ||
146 | let raw_pattern = pattern.parse()?; | ||
147 | let raw_template = template.parse()?; | ||
148 | let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?; | ||
149 | let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules }; | ||
150 | validate_rule(&rule)?; | ||
151 | Ok(rule) | ||
152 | } | ||
153 | } | ||
154 | |||
155 | impl FromStr for RawPattern { | ||
156 | type Err = SsrError; | ||
157 | |||
158 | fn from_str(pattern_str: &str) -> Result<RawPattern, SsrError> { | ||
159 | Ok(RawPattern { tokens: parse_pattern(pattern_str)? }) | ||
160 | } | ||
161 | } | ||
162 | |||
163 | impl RawPattern { | ||
164 | /// Returns this search pattern as Rust source code that we can feed to the Rust parser. | ||
165 | fn as_rust_code(&self) -> String { | ||
166 | let mut res = String::new(); | ||
167 | for t in &self.tokens { | ||
168 | res.push_str(match t { | ||
169 | PatternElement::Token(token) => token.text.as_str(), | ||
170 | PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(), | ||
171 | }); | ||
172 | } | ||
173 | res | ||
174 | } | ||
175 | |||
176 | pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> { | ||
177 | let mut res = FxHashMap::default(); | ||
178 | for t in &self.tokens { | ||
179 | if let PatternElement::Placeholder(placeholder) = t { | ||
180 | res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone()); | ||
181 | } | ||
182 | } | ||
183 | res | ||
184 | } | ||
185 | } | ||
186 | |||
187 | impl FromStr for SsrPattern { | ||
188 | type Err = SsrError; | ||
189 | |||
190 | fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> { | ||
191 | let raw_pattern = pattern_str.parse()?; | ||
192 | let parsed_rules = ParsedRule::new(&raw_pattern, None)?; | ||
193 | Ok(SsrPattern { raw: raw_pattern, parsed_rules }) | ||
194 | } | ||
195 | } | ||
196 | |||
197 | /// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true, | ||
198 | /// then any whitespace tokens will be removed, which we do for the search pattern, but not for the | ||
199 | /// replace pattern. | ||
200 | fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> { | ||
201 | let mut res = Vec::new(); | ||
202 | let mut placeholder_names = FxHashSet::default(); | ||
203 | let mut tokens = tokenize(pattern_str)?.into_iter(); | ||
204 | while let Some(token) = tokens.next() { | ||
205 | if token.kind == T![$] { | ||
206 | let placeholder = parse_placeholder(&mut tokens)?; | ||
207 | if !placeholder_names.insert(placeholder.ident.clone()) { | ||
208 | bail!("Name `{}` repeats more than once", placeholder.ident); | ||
209 | } | ||
210 | res.push(PatternElement::Placeholder(placeholder)); | ||
211 | } else { | ||
212 | res.push(PatternElement::Token(token)); | ||
213 | } | ||
214 | } | ||
215 | Ok(res) | ||
216 | } | ||
217 | |||
218 | /// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search | ||
219 | /// pattern didn't define. | ||
220 | fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { | ||
221 | let mut defined_placeholders = FxHashSet::default(); | ||
222 | for p in &rule.pattern.tokens { | ||
223 | if let PatternElement::Placeholder(placeholder) = p { | ||
224 | defined_placeholders.insert(&placeholder.ident); | ||
225 | } | ||
226 | } | ||
227 | let mut undefined = Vec::new(); | ||
228 | for p in &rule.template.tokens { | ||
229 | if let PatternElement::Placeholder(placeholder) = p { | ||
230 | if !defined_placeholders.contains(&placeholder.ident) { | ||
231 | undefined.push(format!("${}", placeholder.ident)); | ||
232 | } | ||
233 | if !placeholder.constraints.is_empty() { | ||
234 | bail!("Replacement placeholders cannot have constraints"); | ||
235 | } | ||
236 | } | ||
237 | } | ||
238 | if !undefined.is_empty() { | ||
239 | bail!("Replacement contains undefined placeholders: {}", undefined.join(", ")); | ||
240 | } | ||
241 | Ok(()) | ||
242 | } | ||
243 | |||
244 | fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> { | ||
245 | let mut start = 0; | ||
246 | let (raw_tokens, errors) = syntax::tokenize(source); | ||
247 | if let Some(first_error) = errors.first() { | ||
248 | bail!("Failed to parse pattern: {}", first_error); | ||
249 | } | ||
250 | let mut tokens: Vec<Token> = Vec::new(); | ||
251 | for raw_token in raw_tokens { | ||
252 | let token_len = usize::from(raw_token.len); | ||
253 | tokens.push(Token { | ||
254 | kind: raw_token.kind, | ||
255 | text: SmolStr::new(&source[start..start + token_len]), | ||
256 | }); | ||
257 | start += token_len; | ||
258 | } | ||
259 | Ok(tokens) | ||
260 | } | ||
261 | |||
262 | fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> { | ||
263 | let mut name = None; | ||
264 | let mut constraints = Vec::new(); | ||
265 | if let Some(token) = tokens.next() { | ||
266 | match token.kind { | ||
267 | SyntaxKind::IDENT => { | ||
268 | name = Some(token.text); | ||
269 | } | ||
270 | T!['{'] => { | ||
271 | let token = | ||
272 | tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?; | ||
273 | if token.kind == SyntaxKind::IDENT { | ||
274 | name = Some(token.text); | ||
275 | } | ||
276 | loop { | ||
277 | let token = tokens | ||
278 | .next() | ||
279 | .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?; | ||
280 | match token.kind { | ||
281 | T![:] => { | ||
282 | constraints.push(parse_constraint(tokens)?); | ||
283 | } | ||
284 | T!['}'] => break, | ||
285 | _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text), | ||
286 | } | ||
287 | } | ||
288 | } | ||
289 | _ => { | ||
290 | bail!("Placeholders should either be $name or ${{name:constraints}}"); | ||
291 | } | ||
292 | } | ||
293 | } | ||
294 | let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?; | ||
295 | Ok(Placeholder::new(name, constraints)) | ||
296 | } | ||
297 | |||
298 | fn parse_constraint(tokens: &mut std::vec::IntoIter<Token>) -> Result<Constraint, SsrError> { | ||
299 | let constraint_type = tokens | ||
300 | .next() | ||
301 | .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))? | ||
302 | .text | ||
303 | .to_string(); | ||
304 | match constraint_type.as_str() { | ||
305 | "kind" => { | ||
306 | expect_token(tokens, "(")?; | ||
307 | let t = tokens.next().ok_or_else(|| { | ||
308 | SsrError::new("Unexpected end of constraint while looking for kind") | ||
309 | })?; | ||
310 | if t.kind != SyntaxKind::IDENT { | ||
311 | bail!("Expected ident, found {:?} while parsing kind constraint", t.kind); | ||
312 | } | ||
313 | expect_token(tokens, ")")?; | ||
314 | Ok(Constraint::Kind(NodeKind::from(&t.text)?)) | ||
315 | } | ||
316 | "not" => { | ||
317 | expect_token(tokens, "(")?; | ||
318 | let sub = parse_constraint(tokens)?; | ||
319 | expect_token(tokens, ")")?; | ||
320 | Ok(Constraint::Not(Box::new(sub))) | ||
321 | } | ||
322 | x => bail!("Unsupported constraint type '{}'", x), | ||
323 | } | ||
324 | } | ||
325 | |||
326 | fn expect_token(tokens: &mut std::vec::IntoIter<Token>, expected: &str) -> Result<(), SsrError> { | ||
327 | if let Some(t) = tokens.next() { | ||
328 | if t.text == expected { | ||
329 | return Ok(()); | ||
330 | } | ||
331 | bail!("Expected {} found {}", expected, t.text); | ||
332 | } | ||
333 | bail!("Expected {} found end of stream", expected); | ||
334 | } | ||
335 | |||
336 | impl NodeKind { | ||
337 | fn from(name: &SmolStr) -> Result<NodeKind, SsrError> { | ||
338 | Ok(match name.as_str() { | ||
339 | "literal" => NodeKind::Literal, | ||
340 | _ => bail!("Unknown node kind '{}'", name), | ||
341 | }) | ||
342 | } | ||
343 | } | ||
344 | |||
345 | impl Placeholder { | ||
346 | fn new(name: SmolStr, constraints: Vec<Constraint>) -> Self { | ||
347 | Self { stand_in_name: format!("__placeholder_{}", name), constraints, ident: name } | ||
348 | } | ||
349 | } | ||
350 | |||
351 | #[cfg(test)] | ||
352 | mod tests { | ||
353 | use super::*; | ||
354 | |||
355 | #[test] | ||
356 | fn parser_happy_case() { | ||
357 | fn token(kind: SyntaxKind, text: &str) -> PatternElement { | ||
358 | PatternElement::Token(Token { kind, text: SmolStr::new(text) }) | ||
359 | } | ||
360 | fn placeholder(name: &str) -> PatternElement { | ||
361 | PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new())) | ||
362 | } | ||
363 | let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap(); | ||
364 | assert_eq!( | ||
365 | result.pattern.tokens, | ||
366 | vec![ | ||
367 | token(SyntaxKind::IDENT, "foo"), | ||
368 | token(T!['('], "("), | ||
369 | placeholder("a"), | ||
370 | token(T![,], ","), | ||
371 | token(SyntaxKind::WHITESPACE, " "), | ||
372 | placeholder("b"), | ||
373 | token(T![')'], ")"), | ||
374 | ] | ||
375 | ); | ||
376 | assert_eq!( | ||
377 | result.template.tokens, | ||
378 | vec![ | ||
379 | token(SyntaxKind::IDENT, "bar"), | ||
380 | token(T!['('], "("), | ||
381 | placeholder("b"), | ||
382 | token(T![,], ","), | ||
383 | token(SyntaxKind::WHITESPACE, " "), | ||
384 | placeholder("a"), | ||
385 | token(T![')'], ")"), | ||
386 | ] | ||
387 | ); | ||
388 | } | ||
389 | } | ||