aboutsummaryrefslogtreecommitdiff
path: root/crates/ssr/src/parsing.rs
diff options
context:
space:
mode:
authorZac Pullar-Strecker <[email protected]>2020-08-24 10:19:53 +0100
committerZac Pullar-Strecker <[email protected]>2020-08-24 10:20:13 +0100
commit7bbca7a1b3f9293d2f5cc5745199bc5f8396f2f0 (patch)
treebdb47765991cb973b2cd5481a088fac636bd326c /crates/ssr/src/parsing.rs
parentca464650eeaca6195891199a93f4f76cf3e7e697 (diff)
parente65d48d1fb3d4d91d9dc1148a7a836ff5c9a3c87 (diff)
Merge remote-tracking branch 'upstream/master' into 503-hover-doc-links
Diffstat (limited to 'crates/ssr/src/parsing.rs')
-rw-r--r--crates/ssr/src/parsing.rs403
1 files changed, 403 insertions, 0 deletions
diff --git a/crates/ssr/src/parsing.rs b/crates/ssr/src/parsing.rs
new file mode 100644
index 000000000..05b66dcd7
--- /dev/null
+++ b/crates/ssr/src/parsing.rs
@@ -0,0 +1,403 @@
1//! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`.
2//! We first split everything before and after the separator `==>>`. Next, both the search pattern
3//! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for
4//! placeholders, which start with `$`. For replacement templates, this is the final form. For
5//! search patterns, we go further and parse the pattern as each kind of thing that we can match.
6//! e.g. expressions, type references etc.
7
8use crate::errors::bail;
9use crate::{SsrError, SsrPattern, SsrRule};
10use rustc_hash::{FxHashMap, FxHashSet};
11use std::{fmt::Display, str::FromStr};
12use syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, T};
13use test_utils::mark;
14
15#[derive(Debug)]
16pub(crate) struct ParsedRule {
17 pub(crate) placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
18 pub(crate) pattern: SyntaxNode,
19 pub(crate) template: Option<SyntaxNode>,
20}
21
22#[derive(Debug)]
23pub(crate) struct RawPattern {
24 tokens: Vec<PatternElement>,
25}
26
27// Part of a search or replace pattern.
28#[derive(Clone, Debug, PartialEq, Eq)]
29pub(crate) enum PatternElement {
30 Token(Token),
31 Placeholder(Placeholder),
32}
33
34#[derive(Clone, Debug, PartialEq, Eq)]
35pub(crate) struct Placeholder {
36 /// The name of this placeholder. e.g. for "$a", this would be "a"
37 pub(crate) ident: Var,
38 /// A unique name used in place of this placeholder when we parse the pattern as Rust code.
39 stand_in_name: String,
40 pub(crate) constraints: Vec<Constraint>,
41}
42
43/// Represents a `$var` in an SSR query.
44#[derive(Debug, Clone, PartialEq, Eq, Hash)]
45pub(crate) struct Var(pub String);
46
47#[derive(Clone, Debug, PartialEq, Eq)]
48pub(crate) enum Constraint {
49 Kind(NodeKind),
50 Not(Box<Constraint>),
51}
52
53#[derive(Clone, Debug, PartialEq, Eq)]
54pub(crate) enum NodeKind {
55 Literal,
56}
57
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub(crate) struct Token {
60 kind: SyntaxKind,
61 pub(crate) text: SmolStr,
62}
63
64impl ParsedRule {
65 fn new(
66 pattern: &RawPattern,
67 template: Option<&RawPattern>,
68 ) -> Result<Vec<ParsedRule>, SsrError> {
69 let raw_pattern = pattern.as_rust_code();
70 let raw_template = template.map(|t| t.as_rust_code());
71 let raw_template = raw_template.as_ref().map(|s| s.as_str());
72 let mut builder = RuleBuilder {
73 placeholders_by_stand_in: pattern.placeholders_by_stand_in(),
74 rules: Vec::new(),
75 };
76 builder.try_add(ast::Expr::parse(&raw_pattern), raw_template.map(ast::Expr::parse));
77 builder.try_add(ast::Type::parse(&raw_pattern), raw_template.map(ast::Type::parse));
78 builder.try_add(ast::Item::parse(&raw_pattern), raw_template.map(ast::Item::parse));
79 builder.try_add(ast::Path::parse(&raw_pattern), raw_template.map(ast::Path::parse));
80 builder.try_add(ast::Pat::parse(&raw_pattern), raw_template.map(ast::Pat::parse));
81 builder.build()
82 }
83}
84
85struct RuleBuilder {
86 placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
87 rules: Vec<ParsedRule>,
88}
89
90impl RuleBuilder {
91 fn try_add<T: AstNode>(&mut self, pattern: Result<T, ()>, template: Option<Result<T, ()>>) {
92 match (pattern, template) {
93 (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule {
94 placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
95 pattern: pattern.syntax().clone(),
96 template: Some(template.syntax().clone()),
97 }),
98 (Ok(pattern), None) => self.rules.push(ParsedRule {
99 placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
100 pattern: pattern.syntax().clone(),
101 template: None,
102 }),
103 _ => {}
104 }
105 }
106
107 fn build(mut self) -> Result<Vec<ParsedRule>, SsrError> {
108 if self.rules.is_empty() {
109 bail!("Not a valid Rust expression, type, item, path or pattern");
110 }
111 // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a
112 // mix leads to strange semantics, since the path-based rules only match things where the
113 // path refers to semantically the same thing, whereas the non-path-based rules could match
114 // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the
115 // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a
116 // pattern (IDENT_PAT -> NAME -> IDENT). Allowing such a rule through would result in
117 // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd
118 // have to use the slow-scan search mechanism.
119 if self.rules.iter().any(|rule| contains_path(&rule.pattern)) {
120 let old_len = self.rules.len();
121 self.rules.retain(|rule| contains_path(&rule.pattern));
122 if self.rules.len() < old_len {
123 mark::hit!(pattern_is_a_single_segment_path);
124 }
125 }
126 Ok(self.rules)
127 }
128}
129
130/// Returns whether there are any paths in `node`.
131fn contains_path(node: &SyntaxNode) -> bool {
132 node.kind() == SyntaxKind::PATH
133 || node.descendants().any(|node| node.kind() == SyntaxKind::PATH)
134}
135
136impl FromStr for SsrRule {
137 type Err = SsrError;
138
139 fn from_str(query: &str) -> Result<SsrRule, SsrError> {
140 let mut it = query.split("==>>");
141 let pattern = it.next().expect("at least empty string").trim();
142 let template = it
143 .next()
144 .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))?
145 .trim()
146 .to_string();
147 if it.next().is_some() {
148 return Err(SsrError("More than one delimiter found".into()));
149 }
150 let raw_pattern = pattern.parse()?;
151 let raw_template = template.parse()?;
152 let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?;
153 let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules };
154 validate_rule(&rule)?;
155 Ok(rule)
156 }
157}
158
159impl FromStr for RawPattern {
160 type Err = SsrError;
161
162 fn from_str(pattern_str: &str) -> Result<RawPattern, SsrError> {
163 Ok(RawPattern { tokens: parse_pattern(pattern_str)? })
164 }
165}
166
167impl RawPattern {
168 /// Returns this search pattern as Rust source code that we can feed to the Rust parser.
169 fn as_rust_code(&self) -> String {
170 let mut res = String::new();
171 for t in &self.tokens {
172 res.push_str(match t {
173 PatternElement::Token(token) => token.text.as_str(),
174 PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(),
175 });
176 }
177 res
178 }
179
180 pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> {
181 let mut res = FxHashMap::default();
182 for t in &self.tokens {
183 if let PatternElement::Placeholder(placeholder) = t {
184 res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone());
185 }
186 }
187 res
188 }
189}
190
191impl FromStr for SsrPattern {
192 type Err = SsrError;
193
194 fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> {
195 let raw_pattern = pattern_str.parse()?;
196 let parsed_rules = ParsedRule::new(&raw_pattern, None)?;
197 Ok(SsrPattern { raw: raw_pattern, parsed_rules })
198 }
199}
200
201/// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true,
202/// then any whitespace tokens will be removed, which we do for the search pattern, but not for the
203/// replace pattern.
204fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> {
205 let mut res = Vec::new();
206 let mut placeholder_names = FxHashSet::default();
207 let mut tokens = tokenize(pattern_str)?.into_iter();
208 while let Some(token) = tokens.next() {
209 if token.kind == T![$] {
210 let placeholder = parse_placeholder(&mut tokens)?;
211 if !placeholder_names.insert(placeholder.ident.clone()) {
212 bail!("Placeholder `{}` repeats more than once", placeholder.ident);
213 }
214 res.push(PatternElement::Placeholder(placeholder));
215 } else {
216 res.push(PatternElement::Token(token));
217 }
218 }
219 Ok(res)
220}
221
222/// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search
223/// pattern didn't define.
224fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> {
225 let mut defined_placeholders = FxHashSet::default();
226 for p in &rule.pattern.tokens {
227 if let PatternElement::Placeholder(placeholder) = p {
228 defined_placeholders.insert(&placeholder.ident);
229 }
230 }
231 let mut undefined = Vec::new();
232 for p in &rule.template.tokens {
233 if let PatternElement::Placeholder(placeholder) = p {
234 if !defined_placeholders.contains(&placeholder.ident) {
235 undefined.push(placeholder.ident.to_string());
236 }
237 if !placeholder.constraints.is_empty() {
238 bail!("Replacement placeholders cannot have constraints");
239 }
240 }
241 }
242 if !undefined.is_empty() {
243 bail!("Replacement contains undefined placeholders: {}", undefined.join(", "));
244 }
245 Ok(())
246}
247
248fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> {
249 let mut start = 0;
250 let (raw_tokens, errors) = syntax::tokenize(source);
251 if let Some(first_error) = errors.first() {
252 bail!("Failed to parse pattern: {}", first_error);
253 }
254 let mut tokens: Vec<Token> = Vec::new();
255 for raw_token in raw_tokens {
256 let token_len = usize::from(raw_token.len);
257 tokens.push(Token {
258 kind: raw_token.kind,
259 text: SmolStr::new(&source[start..start + token_len]),
260 });
261 start += token_len;
262 }
263 Ok(tokens)
264}
265
266fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> {
267 let mut name = None;
268 let mut constraints = Vec::new();
269 if let Some(token) = tokens.next() {
270 match token.kind {
271 SyntaxKind::IDENT => {
272 name = Some(token.text);
273 }
274 T!['{'] => {
275 let token =
276 tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?;
277 if token.kind == SyntaxKind::IDENT {
278 name = Some(token.text);
279 }
280 loop {
281 let token = tokens
282 .next()
283 .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?;
284 match token.kind {
285 T![:] => {
286 constraints.push(parse_constraint(tokens)?);
287 }
288 T!['}'] => break,
289 _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text),
290 }
291 }
292 }
293 _ => {
294 bail!("Placeholders should either be $name or ${{name:constraints}}");
295 }
296 }
297 }
298 let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?;
299 Ok(Placeholder::new(name, constraints))
300}
301
302fn parse_constraint(tokens: &mut std::vec::IntoIter<Token>) -> Result<Constraint, SsrError> {
303 let constraint_type = tokens
304 .next()
305 .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))?
306 .text
307 .to_string();
308 match constraint_type.as_str() {
309 "kind" => {
310 expect_token(tokens, "(")?;
311 let t = tokens.next().ok_or_else(|| {
312 SsrError::new("Unexpected end of constraint while looking for kind")
313 })?;
314 if t.kind != SyntaxKind::IDENT {
315 bail!("Expected ident, found {:?} while parsing kind constraint", t.kind);
316 }
317 expect_token(tokens, ")")?;
318 Ok(Constraint::Kind(NodeKind::from(&t.text)?))
319 }
320 "not" => {
321 expect_token(tokens, "(")?;
322 let sub = parse_constraint(tokens)?;
323 expect_token(tokens, ")")?;
324 Ok(Constraint::Not(Box::new(sub)))
325 }
326 x => bail!("Unsupported constraint type '{}'", x),
327 }
328}
329
330fn expect_token(tokens: &mut std::vec::IntoIter<Token>, expected: &str) -> Result<(), SsrError> {
331 if let Some(t) = tokens.next() {
332 if t.text == expected {
333 return Ok(());
334 }
335 bail!("Expected {} found {}", expected, t.text);
336 }
337 bail!("Expected {} found end of stream", expected);
338}
339
340impl NodeKind {
341 fn from(name: &SmolStr) -> Result<NodeKind, SsrError> {
342 Ok(match name.as_str() {
343 "literal" => NodeKind::Literal,
344 _ => bail!("Unknown node kind '{}'", name),
345 })
346 }
347}
348
349impl Placeholder {
350 fn new(name: SmolStr, constraints: Vec<Constraint>) -> Self {
351 Self {
352 stand_in_name: format!("__placeholder_{}", name),
353 constraints,
354 ident: Var(name.to_string()),
355 }
356 }
357}
358
359impl Display for Var {
360 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
361 write!(f, "${}", self.0)
362 }
363}
364
365#[cfg(test)]
366mod tests {
367 use super::*;
368
369 #[test]
370 fn parser_happy_case() {
371 fn token(kind: SyntaxKind, text: &str) -> PatternElement {
372 PatternElement::Token(Token { kind, text: SmolStr::new(text) })
373 }
374 fn placeholder(name: &str) -> PatternElement {
375 PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new()))
376 }
377 let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap();
378 assert_eq!(
379 result.pattern.tokens,
380 vec![
381 token(SyntaxKind::IDENT, "foo"),
382 token(T!['('], "("),
383 placeholder("a"),
384 token(T![,], ","),
385 token(SyntaxKind::WHITESPACE, " "),
386 placeholder("b"),
387 token(T![')'], ")"),
388 ]
389 );
390 assert_eq!(
391 result.template.tokens,
392 vec![
393 token(SyntaxKind::IDENT, "bar"),
394 token(T!['('], "("),
395 placeholder("b"),
396 token(T![,], ","),
397 token(SyntaxKind::WHITESPACE, " "),
398 placeholder("a"),
399 token(T![')'], ")"),
400 ]
401 );
402 }
403}