diff options
author | Zac Pullar-Strecker <[email protected]> | 2020-08-24 10:19:53 +0100 |
---|---|---|
committer | Zac Pullar-Strecker <[email protected]> | 2020-08-24 10:20:13 +0100 |
commit | 7bbca7a1b3f9293d2f5cc5745199bc5f8396f2f0 (patch) | |
tree | bdb47765991cb973b2cd5481a088fac636bd326c /crates/ssr/src/parsing.rs | |
parent | ca464650eeaca6195891199a93f4f76cf3e7e697 (diff) | |
parent | e65d48d1fb3d4d91d9dc1148a7a836ff5c9a3c87 (diff) |
Merge remote-tracking branch 'upstream/master' into 503-hover-doc-links
Diffstat (limited to 'crates/ssr/src/parsing.rs')
-rw-r--r-- | crates/ssr/src/parsing.rs | 403 |
1 files changed, 403 insertions, 0 deletions
diff --git a/crates/ssr/src/parsing.rs b/crates/ssr/src/parsing.rs new file mode 100644 index 000000000..05b66dcd7 --- /dev/null +++ b/crates/ssr/src/parsing.rs | |||
@@ -0,0 +1,403 @@ | |||
1 | //! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`. | ||
2 | //! We first split everything before and after the separator `==>>`. Next, both the search pattern | ||
3 | //! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for | ||
4 | //! placeholders, which start with `$`. For replacement templates, this is the final form. For | ||
5 | //! search patterns, we go further and parse the pattern as each kind of thing that we can match. | ||
6 | //! e.g. expressions, type references etc. | ||
7 | |||
8 | use crate::errors::bail; | ||
9 | use crate::{SsrError, SsrPattern, SsrRule}; | ||
10 | use rustc_hash::{FxHashMap, FxHashSet}; | ||
11 | use std::{fmt::Display, str::FromStr}; | ||
12 | use syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, T}; | ||
13 | use test_utils::mark; | ||
14 | |||
15 | #[derive(Debug)] | ||
16 | pub(crate) struct ParsedRule { | ||
17 | pub(crate) placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>, | ||
18 | pub(crate) pattern: SyntaxNode, | ||
19 | pub(crate) template: Option<SyntaxNode>, | ||
20 | } | ||
21 | |||
22 | #[derive(Debug)] | ||
23 | pub(crate) struct RawPattern { | ||
24 | tokens: Vec<PatternElement>, | ||
25 | } | ||
26 | |||
27 | // Part of a search or replace pattern. | ||
28 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
29 | pub(crate) enum PatternElement { | ||
30 | Token(Token), | ||
31 | Placeholder(Placeholder), | ||
32 | } | ||
33 | |||
34 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
35 | pub(crate) struct Placeholder { | ||
36 | /// The name of this placeholder. e.g. for "$a", this would be "a" | ||
37 | pub(crate) ident: Var, | ||
38 | /// A unique name used in place of this placeholder when we parse the pattern as Rust code. | ||
39 | stand_in_name: String, | ||
40 | pub(crate) constraints: Vec<Constraint>, | ||
41 | } | ||
42 | |||
43 | /// Represents a `$var` in an SSR query. | ||
44 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
45 | pub(crate) struct Var(pub String); | ||
46 | |||
47 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
48 | pub(crate) enum Constraint { | ||
49 | Kind(NodeKind), | ||
50 | Not(Box<Constraint>), | ||
51 | } | ||
52 | |||
53 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
54 | pub(crate) enum NodeKind { | ||
55 | Literal, | ||
56 | } | ||
57 | |||
58 | #[derive(Debug, Clone, PartialEq, Eq)] | ||
59 | pub(crate) struct Token { | ||
60 | kind: SyntaxKind, | ||
61 | pub(crate) text: SmolStr, | ||
62 | } | ||
63 | |||
64 | impl ParsedRule { | ||
65 | fn new( | ||
66 | pattern: &RawPattern, | ||
67 | template: Option<&RawPattern>, | ||
68 | ) -> Result<Vec<ParsedRule>, SsrError> { | ||
69 | let raw_pattern = pattern.as_rust_code(); | ||
70 | let raw_template = template.map(|t| t.as_rust_code()); | ||
71 | let raw_template = raw_template.as_ref().map(|s| s.as_str()); | ||
72 | let mut builder = RuleBuilder { | ||
73 | placeholders_by_stand_in: pattern.placeholders_by_stand_in(), | ||
74 | rules: Vec::new(), | ||
75 | }; | ||
76 | builder.try_add(ast::Expr::parse(&raw_pattern), raw_template.map(ast::Expr::parse)); | ||
77 | builder.try_add(ast::Type::parse(&raw_pattern), raw_template.map(ast::Type::parse)); | ||
78 | builder.try_add(ast::Item::parse(&raw_pattern), raw_template.map(ast::Item::parse)); | ||
79 | builder.try_add(ast::Path::parse(&raw_pattern), raw_template.map(ast::Path::parse)); | ||
80 | builder.try_add(ast::Pat::parse(&raw_pattern), raw_template.map(ast::Pat::parse)); | ||
81 | builder.build() | ||
82 | } | ||
83 | } | ||
84 | |||
85 | struct RuleBuilder { | ||
86 | placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>, | ||
87 | rules: Vec<ParsedRule>, | ||
88 | } | ||
89 | |||
90 | impl RuleBuilder { | ||
91 | fn try_add<T: AstNode>(&mut self, pattern: Result<T, ()>, template: Option<Result<T, ()>>) { | ||
92 | match (pattern, template) { | ||
93 | (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule { | ||
94 | placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), | ||
95 | pattern: pattern.syntax().clone(), | ||
96 | template: Some(template.syntax().clone()), | ||
97 | }), | ||
98 | (Ok(pattern), None) => self.rules.push(ParsedRule { | ||
99 | placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), | ||
100 | pattern: pattern.syntax().clone(), | ||
101 | template: None, | ||
102 | }), | ||
103 | _ => {} | ||
104 | } | ||
105 | } | ||
106 | |||
107 | fn build(mut self) -> Result<Vec<ParsedRule>, SsrError> { | ||
108 | if self.rules.is_empty() { | ||
109 | bail!("Not a valid Rust expression, type, item, path or pattern"); | ||
110 | } | ||
111 | // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a | ||
112 | // mix leads to strange semantics, since the path-based rules only match things where the | ||
113 | // path refers to semantically the same thing, whereas the non-path-based rules could match | ||
114 | // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the | ||
115 | // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a | ||
116 | // pattern (IDENT_PAT -> NAME -> IDENT). Allowing such a rule through would result in | ||
117 | // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd | ||
118 | // have to use the slow-scan search mechanism. | ||
119 | if self.rules.iter().any(|rule| contains_path(&rule.pattern)) { | ||
120 | let old_len = self.rules.len(); | ||
121 | self.rules.retain(|rule| contains_path(&rule.pattern)); | ||
122 | if self.rules.len() < old_len { | ||
123 | mark::hit!(pattern_is_a_single_segment_path); | ||
124 | } | ||
125 | } | ||
126 | Ok(self.rules) | ||
127 | } | ||
128 | } | ||
129 | |||
130 | /// Returns whether there are any paths in `node`. | ||
131 | fn contains_path(node: &SyntaxNode) -> bool { | ||
132 | node.kind() == SyntaxKind::PATH | ||
133 | || node.descendants().any(|node| node.kind() == SyntaxKind::PATH) | ||
134 | } | ||
135 | |||
136 | impl FromStr for SsrRule { | ||
137 | type Err = SsrError; | ||
138 | |||
139 | fn from_str(query: &str) -> Result<SsrRule, SsrError> { | ||
140 | let mut it = query.split("==>>"); | ||
141 | let pattern = it.next().expect("at least empty string").trim(); | ||
142 | let template = it | ||
143 | .next() | ||
144 | .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))? | ||
145 | .trim() | ||
146 | .to_string(); | ||
147 | if it.next().is_some() { | ||
148 | return Err(SsrError("More than one delimiter found".into())); | ||
149 | } | ||
150 | let raw_pattern = pattern.parse()?; | ||
151 | let raw_template = template.parse()?; | ||
152 | let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?; | ||
153 | let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules }; | ||
154 | validate_rule(&rule)?; | ||
155 | Ok(rule) | ||
156 | } | ||
157 | } | ||
158 | |||
159 | impl FromStr for RawPattern { | ||
160 | type Err = SsrError; | ||
161 | |||
162 | fn from_str(pattern_str: &str) -> Result<RawPattern, SsrError> { | ||
163 | Ok(RawPattern { tokens: parse_pattern(pattern_str)? }) | ||
164 | } | ||
165 | } | ||
166 | |||
167 | impl RawPattern { | ||
168 | /// Returns this search pattern as Rust source code that we can feed to the Rust parser. | ||
169 | fn as_rust_code(&self) -> String { | ||
170 | let mut res = String::new(); | ||
171 | for t in &self.tokens { | ||
172 | res.push_str(match t { | ||
173 | PatternElement::Token(token) => token.text.as_str(), | ||
174 | PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(), | ||
175 | }); | ||
176 | } | ||
177 | res | ||
178 | } | ||
179 | |||
180 | pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> { | ||
181 | let mut res = FxHashMap::default(); | ||
182 | for t in &self.tokens { | ||
183 | if let PatternElement::Placeholder(placeholder) = t { | ||
184 | res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone()); | ||
185 | } | ||
186 | } | ||
187 | res | ||
188 | } | ||
189 | } | ||
190 | |||
191 | impl FromStr for SsrPattern { | ||
192 | type Err = SsrError; | ||
193 | |||
194 | fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> { | ||
195 | let raw_pattern = pattern_str.parse()?; | ||
196 | let parsed_rules = ParsedRule::new(&raw_pattern, None)?; | ||
197 | Ok(SsrPattern { raw: raw_pattern, parsed_rules }) | ||
198 | } | ||
199 | } | ||
200 | |||
201 | /// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true, | ||
202 | /// then any whitespace tokens will be removed, which we do for the search pattern, but not for the | ||
203 | /// replace pattern. | ||
204 | fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> { | ||
205 | let mut res = Vec::new(); | ||
206 | let mut placeholder_names = FxHashSet::default(); | ||
207 | let mut tokens = tokenize(pattern_str)?.into_iter(); | ||
208 | while let Some(token) = tokens.next() { | ||
209 | if token.kind == T![$] { | ||
210 | let placeholder = parse_placeholder(&mut tokens)?; | ||
211 | if !placeholder_names.insert(placeholder.ident.clone()) { | ||
212 | bail!("Placeholder `{}` repeats more than once", placeholder.ident); | ||
213 | } | ||
214 | res.push(PatternElement::Placeholder(placeholder)); | ||
215 | } else { | ||
216 | res.push(PatternElement::Token(token)); | ||
217 | } | ||
218 | } | ||
219 | Ok(res) | ||
220 | } | ||
221 | |||
222 | /// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search | ||
223 | /// pattern didn't define. | ||
224 | fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { | ||
225 | let mut defined_placeholders = FxHashSet::default(); | ||
226 | for p in &rule.pattern.tokens { | ||
227 | if let PatternElement::Placeholder(placeholder) = p { | ||
228 | defined_placeholders.insert(&placeholder.ident); | ||
229 | } | ||
230 | } | ||
231 | let mut undefined = Vec::new(); | ||
232 | for p in &rule.template.tokens { | ||
233 | if let PatternElement::Placeholder(placeholder) = p { | ||
234 | if !defined_placeholders.contains(&placeholder.ident) { | ||
235 | undefined.push(placeholder.ident.to_string()); | ||
236 | } | ||
237 | if !placeholder.constraints.is_empty() { | ||
238 | bail!("Replacement placeholders cannot have constraints"); | ||
239 | } | ||
240 | } | ||
241 | } | ||
242 | if !undefined.is_empty() { | ||
243 | bail!("Replacement contains undefined placeholders: {}", undefined.join(", ")); | ||
244 | } | ||
245 | Ok(()) | ||
246 | } | ||
247 | |||
248 | fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> { | ||
249 | let mut start = 0; | ||
250 | let (raw_tokens, errors) = syntax::tokenize(source); | ||
251 | if let Some(first_error) = errors.first() { | ||
252 | bail!("Failed to parse pattern: {}", first_error); | ||
253 | } | ||
254 | let mut tokens: Vec<Token> = Vec::new(); | ||
255 | for raw_token in raw_tokens { | ||
256 | let token_len = usize::from(raw_token.len); | ||
257 | tokens.push(Token { | ||
258 | kind: raw_token.kind, | ||
259 | text: SmolStr::new(&source[start..start + token_len]), | ||
260 | }); | ||
261 | start += token_len; | ||
262 | } | ||
263 | Ok(tokens) | ||
264 | } | ||
265 | |||
266 | fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> { | ||
267 | let mut name = None; | ||
268 | let mut constraints = Vec::new(); | ||
269 | if let Some(token) = tokens.next() { | ||
270 | match token.kind { | ||
271 | SyntaxKind::IDENT => { | ||
272 | name = Some(token.text); | ||
273 | } | ||
274 | T!['{'] => { | ||
275 | let token = | ||
276 | tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?; | ||
277 | if token.kind == SyntaxKind::IDENT { | ||
278 | name = Some(token.text); | ||
279 | } | ||
280 | loop { | ||
281 | let token = tokens | ||
282 | .next() | ||
283 | .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?; | ||
284 | match token.kind { | ||
285 | T![:] => { | ||
286 | constraints.push(parse_constraint(tokens)?); | ||
287 | } | ||
288 | T!['}'] => break, | ||
289 | _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text), | ||
290 | } | ||
291 | } | ||
292 | } | ||
293 | _ => { | ||
294 | bail!("Placeholders should either be $name or ${{name:constraints}}"); | ||
295 | } | ||
296 | } | ||
297 | } | ||
298 | let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?; | ||
299 | Ok(Placeholder::new(name, constraints)) | ||
300 | } | ||
301 | |||
302 | fn parse_constraint(tokens: &mut std::vec::IntoIter<Token>) -> Result<Constraint, SsrError> { | ||
303 | let constraint_type = tokens | ||
304 | .next() | ||
305 | .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))? | ||
306 | .text | ||
307 | .to_string(); | ||
308 | match constraint_type.as_str() { | ||
309 | "kind" => { | ||
310 | expect_token(tokens, "(")?; | ||
311 | let t = tokens.next().ok_or_else(|| { | ||
312 | SsrError::new("Unexpected end of constraint while looking for kind") | ||
313 | })?; | ||
314 | if t.kind != SyntaxKind::IDENT { | ||
315 | bail!("Expected ident, found {:?} while parsing kind constraint", t.kind); | ||
316 | } | ||
317 | expect_token(tokens, ")")?; | ||
318 | Ok(Constraint::Kind(NodeKind::from(&t.text)?)) | ||
319 | } | ||
320 | "not" => { | ||
321 | expect_token(tokens, "(")?; | ||
322 | let sub = parse_constraint(tokens)?; | ||
323 | expect_token(tokens, ")")?; | ||
324 | Ok(Constraint::Not(Box::new(sub))) | ||
325 | } | ||
326 | x => bail!("Unsupported constraint type '{}'", x), | ||
327 | } | ||
328 | } | ||
329 | |||
330 | fn expect_token(tokens: &mut std::vec::IntoIter<Token>, expected: &str) -> Result<(), SsrError> { | ||
331 | if let Some(t) = tokens.next() { | ||
332 | if t.text == expected { | ||
333 | return Ok(()); | ||
334 | } | ||
335 | bail!("Expected {} found {}", expected, t.text); | ||
336 | } | ||
337 | bail!("Expected {} found end of stream", expected); | ||
338 | } | ||
339 | |||
340 | impl NodeKind { | ||
341 | fn from(name: &SmolStr) -> Result<NodeKind, SsrError> { | ||
342 | Ok(match name.as_str() { | ||
343 | "literal" => NodeKind::Literal, | ||
344 | _ => bail!("Unknown node kind '{}'", name), | ||
345 | }) | ||
346 | } | ||
347 | } | ||
348 | |||
349 | impl Placeholder { | ||
350 | fn new(name: SmolStr, constraints: Vec<Constraint>) -> Self { | ||
351 | Self { | ||
352 | stand_in_name: format!("__placeholder_{}", name), | ||
353 | constraints, | ||
354 | ident: Var(name.to_string()), | ||
355 | } | ||
356 | } | ||
357 | } | ||
358 | |||
359 | impl Display for Var { | ||
360 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
361 | write!(f, "${}", self.0) | ||
362 | } | ||
363 | } | ||
364 | |||
365 | #[cfg(test)] | ||
366 | mod tests { | ||
367 | use super::*; | ||
368 | |||
369 | #[test] | ||
370 | fn parser_happy_case() { | ||
371 | fn token(kind: SyntaxKind, text: &str) -> PatternElement { | ||
372 | PatternElement::Token(Token { kind, text: SmolStr::new(text) }) | ||
373 | } | ||
374 | fn placeholder(name: &str) -> PatternElement { | ||
375 | PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new())) | ||
376 | } | ||
377 | let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap(); | ||
378 | assert_eq!( | ||
379 | result.pattern.tokens, | ||
380 | vec![ | ||
381 | token(SyntaxKind::IDENT, "foo"), | ||
382 | token(T!['('], "("), | ||
383 | placeholder("a"), | ||
384 | token(T![,], ","), | ||
385 | token(SyntaxKind::WHITESPACE, " "), | ||
386 | placeholder("b"), | ||
387 | token(T![')'], ")"), | ||
388 | ] | ||
389 | ); | ||
390 | assert_eq!( | ||
391 | result.template.tokens, | ||
392 | vec![ | ||
393 | token(SyntaxKind::IDENT, "bar"), | ||
394 | token(T!['('], "("), | ||
395 | placeholder("b"), | ||
396 | token(T![,], ","), | ||
397 | token(SyntaxKind::WHITESPACE, " "), | ||
398 | placeholder("a"), | ||
399 | token(T![')'], ")"), | ||
400 | ] | ||
401 | ); | ||
402 | } | ||
403 | } | ||