diff options
Diffstat (limited to 'crates/ssr/src/parsing.rs')
-rw-r--r-- | crates/ssr/src/parsing.rs | 414 |
1 files changed, 0 insertions, 414 deletions
diff --git a/crates/ssr/src/parsing.rs b/crates/ssr/src/parsing.rs deleted file mode 100644 index 3d5e4feb7..000000000 --- a/crates/ssr/src/parsing.rs +++ /dev/null | |||
@@ -1,414 +0,0 @@ | |||
1 | //! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`. | ||
2 | //! We first split everything before and after the separator `==>>`. Next, both the search pattern | ||
3 | //! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for | ||
4 | //! placeholders, which start with `$`. For replacement templates, this is the final form. For | ||
5 | //! search patterns, we go further and parse the pattern as each kind of thing that we can match. | ||
6 | //! e.g. expressions, type references etc. | ||
7 | |||
8 | use crate::errors::bail; | ||
9 | use crate::{SsrError, SsrPattern, SsrRule}; | ||
10 | use rustc_hash::{FxHashMap, FxHashSet}; | ||
11 | use std::{fmt::Display, str::FromStr}; | ||
12 | use syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, T}; | ||
13 | use test_utils::mark; | ||
14 | |||
15 | #[derive(Debug)] | ||
16 | pub(crate) struct ParsedRule { | ||
17 | pub(crate) placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>, | ||
18 | pub(crate) pattern: SyntaxNode, | ||
19 | pub(crate) template: Option<SyntaxNode>, | ||
20 | } | ||
21 | |||
22 | #[derive(Debug)] | ||
23 | pub(crate) struct RawPattern { | ||
24 | tokens: Vec<PatternElement>, | ||
25 | } | ||
26 | |||
27 | // Part of a search or replace pattern. | ||
28 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
29 | pub(crate) enum PatternElement { | ||
30 | Token(Token), | ||
31 | Placeholder(Placeholder), | ||
32 | } | ||
33 | |||
34 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
35 | pub(crate) struct Placeholder { | ||
36 | /// The name of this placeholder. e.g. for "$a", this would be "a" | ||
37 | pub(crate) ident: Var, | ||
38 | /// A unique name used in place of this placeholder when we parse the pattern as Rust code. | ||
39 | stand_in_name: String, | ||
40 | pub(crate) constraints: Vec<Constraint>, | ||
41 | } | ||
42 | |||
43 | /// Represents a `$var` in an SSR query. | ||
44 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
45 | pub(crate) struct Var(pub(crate) String); | ||
46 | |||
47 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
48 | pub(crate) enum Constraint { | ||
49 | Kind(NodeKind), | ||
50 | Not(Box<Constraint>), | ||
51 | } | ||
52 | |||
53 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
54 | pub(crate) enum NodeKind { | ||
55 | Literal, | ||
56 | } | ||
57 | |||
58 | #[derive(Debug, Clone, PartialEq, Eq)] | ||
59 | pub(crate) struct Token { | ||
60 | kind: SyntaxKind, | ||
61 | pub(crate) text: SmolStr, | ||
62 | } | ||
63 | |||
64 | impl ParsedRule { | ||
65 | fn new( | ||
66 | pattern: &RawPattern, | ||
67 | template: Option<&RawPattern>, | ||
68 | ) -> Result<Vec<ParsedRule>, SsrError> { | ||
69 | let raw_pattern = pattern.as_rust_code(); | ||
70 | let raw_template = template.map(|t| t.as_rust_code()); | ||
71 | let raw_template = raw_template.as_ref().map(|s| s.as_str()); | ||
72 | let mut builder = RuleBuilder { | ||
73 | placeholders_by_stand_in: pattern.placeholders_by_stand_in(), | ||
74 | rules: Vec::new(), | ||
75 | }; | ||
76 | |||
77 | let raw_template_stmt = raw_template.map(ast::Stmt::parse); | ||
78 | if let raw_template_expr @ Some(Ok(_)) = raw_template.map(ast::Expr::parse) { | ||
79 | builder.try_add(ast::Expr::parse(&raw_pattern), raw_template_expr); | ||
80 | } else { | ||
81 | builder.try_add(ast::Expr::parse(&raw_pattern), raw_template_stmt.clone()); | ||
82 | } | ||
83 | builder.try_add(ast::Type::parse(&raw_pattern), raw_template.map(ast::Type::parse)); | ||
84 | builder.try_add(ast::Item::parse(&raw_pattern), raw_template.map(ast::Item::parse)); | ||
85 | builder.try_add(ast::Path::parse(&raw_pattern), raw_template.map(ast::Path::parse)); | ||
86 | builder.try_add(ast::Pat::parse(&raw_pattern), raw_template.map(ast::Pat::parse)); | ||
87 | builder.try_add(ast::Stmt::parse(&raw_pattern), raw_template_stmt); | ||
88 | builder.build() | ||
89 | } | ||
90 | } | ||
91 | |||
92 | struct RuleBuilder { | ||
93 | placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>, | ||
94 | rules: Vec<ParsedRule>, | ||
95 | } | ||
96 | |||
97 | impl RuleBuilder { | ||
98 | fn try_add<T: AstNode, T2: AstNode>( | ||
99 | &mut self, | ||
100 | pattern: Result<T, ()>, | ||
101 | template: Option<Result<T2, ()>>, | ||
102 | ) { | ||
103 | match (pattern, template) { | ||
104 | (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule { | ||
105 | placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), | ||
106 | pattern: pattern.syntax().clone(), | ||
107 | template: Some(template.syntax().clone()), | ||
108 | }), | ||
109 | (Ok(pattern), None) => self.rules.push(ParsedRule { | ||
110 | placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), | ||
111 | pattern: pattern.syntax().clone(), | ||
112 | template: None, | ||
113 | }), | ||
114 | _ => {} | ||
115 | } | ||
116 | } | ||
117 | |||
118 | fn build(mut self) -> Result<Vec<ParsedRule>, SsrError> { | ||
119 | if self.rules.is_empty() { | ||
120 | bail!("Not a valid Rust expression, type, item, path or pattern"); | ||
121 | } | ||
122 | // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a | ||
123 | // mix leads to strange semantics, since the path-based rules only match things where the | ||
124 | // path refers to semantically the same thing, whereas the non-path-based rules could match | ||
125 | // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the | ||
126 | // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a | ||
127 | // pattern (IDENT_PAT -> NAME -> IDENT). Allowing such a rule through would result in | ||
128 | // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd | ||
129 | // have to use the slow-scan search mechanism. | ||
130 | if self.rules.iter().any(|rule| contains_path(&rule.pattern)) { | ||
131 | let old_len = self.rules.len(); | ||
132 | self.rules.retain(|rule| contains_path(&rule.pattern)); | ||
133 | if self.rules.len() < old_len { | ||
134 | mark::hit!(pattern_is_a_single_segment_path); | ||
135 | } | ||
136 | } | ||
137 | Ok(self.rules) | ||
138 | } | ||
139 | } | ||
140 | |||
141 | /// Returns whether there are any paths in `node`. | ||
142 | fn contains_path(node: &SyntaxNode) -> bool { | ||
143 | node.kind() == SyntaxKind::PATH | ||
144 | || node.descendants().any(|node| node.kind() == SyntaxKind::PATH) | ||
145 | } | ||
146 | |||
147 | impl FromStr for SsrRule { | ||
148 | type Err = SsrError; | ||
149 | |||
150 | fn from_str(query: &str) -> Result<SsrRule, SsrError> { | ||
151 | let mut it = query.split("==>>"); | ||
152 | let pattern = it.next().expect("at least empty string").trim(); | ||
153 | let template = it | ||
154 | .next() | ||
155 | .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))? | ||
156 | .trim() | ||
157 | .to_string(); | ||
158 | if it.next().is_some() { | ||
159 | return Err(SsrError("More than one delimiter found".into())); | ||
160 | } | ||
161 | let raw_pattern = pattern.parse()?; | ||
162 | let raw_template = template.parse()?; | ||
163 | let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?; | ||
164 | let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules }; | ||
165 | validate_rule(&rule)?; | ||
166 | Ok(rule) | ||
167 | } | ||
168 | } | ||
169 | |||
170 | impl FromStr for RawPattern { | ||
171 | type Err = SsrError; | ||
172 | |||
173 | fn from_str(pattern_str: &str) -> Result<RawPattern, SsrError> { | ||
174 | Ok(RawPattern { tokens: parse_pattern(pattern_str)? }) | ||
175 | } | ||
176 | } | ||
177 | |||
178 | impl RawPattern { | ||
179 | /// Returns this search pattern as Rust source code that we can feed to the Rust parser. | ||
180 | fn as_rust_code(&self) -> String { | ||
181 | let mut res = String::new(); | ||
182 | for t in &self.tokens { | ||
183 | res.push_str(match t { | ||
184 | PatternElement::Token(token) => token.text.as_str(), | ||
185 | PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(), | ||
186 | }); | ||
187 | } | ||
188 | res | ||
189 | } | ||
190 | |||
191 | pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> { | ||
192 | let mut res = FxHashMap::default(); | ||
193 | for t in &self.tokens { | ||
194 | if let PatternElement::Placeholder(placeholder) = t { | ||
195 | res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone()); | ||
196 | } | ||
197 | } | ||
198 | res | ||
199 | } | ||
200 | } | ||
201 | |||
202 | impl FromStr for SsrPattern { | ||
203 | type Err = SsrError; | ||
204 | |||
205 | fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> { | ||
206 | let raw_pattern = pattern_str.parse()?; | ||
207 | let parsed_rules = ParsedRule::new(&raw_pattern, None)?; | ||
208 | Ok(SsrPattern { raw: raw_pattern, parsed_rules }) | ||
209 | } | ||
210 | } | ||
211 | |||
212 | /// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true, | ||
213 | /// then any whitespace tokens will be removed, which we do for the search pattern, but not for the | ||
214 | /// replace pattern. | ||
215 | fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> { | ||
216 | let mut res = Vec::new(); | ||
217 | let mut placeholder_names = FxHashSet::default(); | ||
218 | let mut tokens = tokenize(pattern_str)?.into_iter(); | ||
219 | while let Some(token) = tokens.next() { | ||
220 | if token.kind == T![$] { | ||
221 | let placeholder = parse_placeholder(&mut tokens)?; | ||
222 | if !placeholder_names.insert(placeholder.ident.clone()) { | ||
223 | bail!("Placeholder `{}` repeats more than once", placeholder.ident); | ||
224 | } | ||
225 | res.push(PatternElement::Placeholder(placeholder)); | ||
226 | } else { | ||
227 | res.push(PatternElement::Token(token)); | ||
228 | } | ||
229 | } | ||
230 | Ok(res) | ||
231 | } | ||
232 | |||
233 | /// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search | ||
234 | /// pattern didn't define. | ||
235 | fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { | ||
236 | let mut defined_placeholders = FxHashSet::default(); | ||
237 | for p in &rule.pattern.tokens { | ||
238 | if let PatternElement::Placeholder(placeholder) = p { | ||
239 | defined_placeholders.insert(&placeholder.ident); | ||
240 | } | ||
241 | } | ||
242 | let mut undefined = Vec::new(); | ||
243 | for p in &rule.template.tokens { | ||
244 | if let PatternElement::Placeholder(placeholder) = p { | ||
245 | if !defined_placeholders.contains(&placeholder.ident) { | ||
246 | undefined.push(placeholder.ident.to_string()); | ||
247 | } | ||
248 | if !placeholder.constraints.is_empty() { | ||
249 | bail!("Replacement placeholders cannot have constraints"); | ||
250 | } | ||
251 | } | ||
252 | } | ||
253 | if !undefined.is_empty() { | ||
254 | bail!("Replacement contains undefined placeholders: {}", undefined.join(", ")); | ||
255 | } | ||
256 | Ok(()) | ||
257 | } | ||
258 | |||
259 | fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> { | ||
260 | let mut start = 0; | ||
261 | let (raw_tokens, errors) = syntax::tokenize(source); | ||
262 | if let Some(first_error) = errors.first() { | ||
263 | bail!("Failed to parse pattern: {}", first_error); | ||
264 | } | ||
265 | let mut tokens: Vec<Token> = Vec::new(); | ||
266 | for raw_token in raw_tokens { | ||
267 | let token_len = usize::from(raw_token.len); | ||
268 | tokens.push(Token { | ||
269 | kind: raw_token.kind, | ||
270 | text: SmolStr::new(&source[start..start + token_len]), | ||
271 | }); | ||
272 | start += token_len; | ||
273 | } | ||
274 | Ok(tokens) | ||
275 | } | ||
276 | |||
277 | fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> { | ||
278 | let mut name = None; | ||
279 | let mut constraints = Vec::new(); | ||
280 | if let Some(token) = tokens.next() { | ||
281 | match token.kind { | ||
282 | SyntaxKind::IDENT => { | ||
283 | name = Some(token.text); | ||
284 | } | ||
285 | T!['{'] => { | ||
286 | let token = | ||
287 | tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?; | ||
288 | if token.kind == SyntaxKind::IDENT { | ||
289 | name = Some(token.text); | ||
290 | } | ||
291 | loop { | ||
292 | let token = tokens | ||
293 | .next() | ||
294 | .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?; | ||
295 | match token.kind { | ||
296 | T![:] => { | ||
297 | constraints.push(parse_constraint(tokens)?); | ||
298 | } | ||
299 | T!['}'] => break, | ||
300 | _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text), | ||
301 | } | ||
302 | } | ||
303 | } | ||
304 | _ => { | ||
305 | bail!("Placeholders should either be $name or ${{name:constraints}}"); | ||
306 | } | ||
307 | } | ||
308 | } | ||
309 | let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?; | ||
310 | Ok(Placeholder::new(name, constraints)) | ||
311 | } | ||
312 | |||
313 | fn parse_constraint(tokens: &mut std::vec::IntoIter<Token>) -> Result<Constraint, SsrError> { | ||
314 | let constraint_type = tokens | ||
315 | .next() | ||
316 | .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))? | ||
317 | .text | ||
318 | .to_string(); | ||
319 | match constraint_type.as_str() { | ||
320 | "kind" => { | ||
321 | expect_token(tokens, "(")?; | ||
322 | let t = tokens.next().ok_or_else(|| { | ||
323 | SsrError::new("Unexpected end of constraint while looking for kind") | ||
324 | })?; | ||
325 | if t.kind != SyntaxKind::IDENT { | ||
326 | bail!("Expected ident, found {:?} while parsing kind constraint", t.kind); | ||
327 | } | ||
328 | expect_token(tokens, ")")?; | ||
329 | Ok(Constraint::Kind(NodeKind::from(&t.text)?)) | ||
330 | } | ||
331 | "not" => { | ||
332 | expect_token(tokens, "(")?; | ||
333 | let sub = parse_constraint(tokens)?; | ||
334 | expect_token(tokens, ")")?; | ||
335 | Ok(Constraint::Not(Box::new(sub))) | ||
336 | } | ||
337 | x => bail!("Unsupported constraint type '{}'", x), | ||
338 | } | ||
339 | } | ||
340 | |||
341 | fn expect_token(tokens: &mut std::vec::IntoIter<Token>, expected: &str) -> Result<(), SsrError> { | ||
342 | if let Some(t) = tokens.next() { | ||
343 | if t.text == expected { | ||
344 | return Ok(()); | ||
345 | } | ||
346 | bail!("Expected {} found {}", expected, t.text); | ||
347 | } | ||
348 | bail!("Expected {} found end of stream", expected); | ||
349 | } | ||
350 | |||
351 | impl NodeKind { | ||
352 | fn from(name: &SmolStr) -> Result<NodeKind, SsrError> { | ||
353 | Ok(match name.as_str() { | ||
354 | "literal" => NodeKind::Literal, | ||
355 | _ => bail!("Unknown node kind '{}'", name), | ||
356 | }) | ||
357 | } | ||
358 | } | ||
359 | |||
360 | impl Placeholder { | ||
361 | fn new(name: SmolStr, constraints: Vec<Constraint>) -> Self { | ||
362 | Self { | ||
363 | stand_in_name: format!("__placeholder_{}", name), | ||
364 | constraints, | ||
365 | ident: Var(name.to_string()), | ||
366 | } | ||
367 | } | ||
368 | } | ||
369 | |||
370 | impl Display for Var { | ||
371 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
372 | write!(f, "${}", self.0) | ||
373 | } | ||
374 | } | ||
375 | |||
376 | #[cfg(test)] | ||
377 | mod tests { | ||
378 | use super::*; | ||
379 | |||
380 | #[test] | ||
381 | fn parser_happy_case() { | ||
382 | fn token(kind: SyntaxKind, text: &str) -> PatternElement { | ||
383 | PatternElement::Token(Token { kind, text: SmolStr::new(text) }) | ||
384 | } | ||
385 | fn placeholder(name: &str) -> PatternElement { | ||
386 | PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new())) | ||
387 | } | ||
388 | let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap(); | ||
389 | assert_eq!( | ||
390 | result.pattern.tokens, | ||
391 | vec![ | ||
392 | token(SyntaxKind::IDENT, "foo"), | ||
393 | token(T!['('], "("), | ||
394 | placeholder("a"), | ||
395 | token(T![,], ","), | ||
396 | token(SyntaxKind::WHITESPACE, " "), | ||
397 | placeholder("b"), | ||
398 | token(T![')'], ")"), | ||
399 | ] | ||
400 | ); | ||
401 | assert_eq!( | ||
402 | result.template.tokens, | ||
403 | vec![ | ||
404 | token(SyntaxKind::IDENT, "bar"), | ||
405 | token(T!['('], "("), | ||
406 | placeholder("b"), | ||
407 | token(T![,], ","), | ||
408 | token(SyntaxKind::WHITESPACE, " "), | ||
409 | placeholder("a"), | ||
410 | token(T![')'], ")"), | ||
411 | ] | ||
412 | ); | ||
413 | } | ||
414 | } | ||