aboutsummaryrefslogtreecommitdiff
path: root/crates/ssr/src/parsing.rs
diff options
context:
space:
mode:
authorDmitry <[email protected]>2020-08-14 19:32:05 +0100
committerDmitry <[email protected]>2020-08-14 19:32:05 +0100
commit178c3e135a2a249692f7784712492e7884ae0c00 (patch)
treeac6b769dbf7162150caa0c1624786a4dd79ff3be /crates/ssr/src/parsing.rs
parent06ff8e6c760ff05f10e868b5d1f9d79e42fbb49c (diff)
parentc2594daf2974dbd4ce3d9b7ec72481764abaceb5 (diff)
Merge remote-tracking branch 'origin/master'
Diffstat (limited to 'crates/ssr/src/parsing.rs')
-rw-r--r--crates/ssr/src/parsing.rs389
1 files changed, 389 insertions, 0 deletions
diff --git a/crates/ssr/src/parsing.rs b/crates/ssr/src/parsing.rs
new file mode 100644
index 000000000..9570e96e3
--- /dev/null
+++ b/crates/ssr/src/parsing.rs
@@ -0,0 +1,389 @@
1//! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`.
2//! We first split everything before and after the separator `==>>`. Next, both the search pattern
3//! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for
4//! placeholders, which start with `$`. For replacement templates, this is the final form. For
5//! search patterns, we go further and parse the pattern as each kind of thing that we can match.
6//! e.g. expressions, type references etc.
7
8use crate::errors::bail;
9use crate::{SsrError, SsrPattern, SsrRule};
10use rustc_hash::{FxHashMap, FxHashSet};
11use std::str::FromStr;
12use syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, T};
13use test_utils::mark;
14
15#[derive(Debug)]
16pub(crate) struct ParsedRule {
17 pub(crate) placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
18 pub(crate) pattern: SyntaxNode,
19 pub(crate) template: Option<SyntaxNode>,
20}
21
22#[derive(Debug)]
23pub(crate) struct RawPattern {
24 tokens: Vec<PatternElement>,
25}
26
27// Part of a search or replace pattern.
28#[derive(Clone, Debug, PartialEq, Eq)]
29pub(crate) enum PatternElement {
30 Token(Token),
31 Placeholder(Placeholder),
32}
33
34#[derive(Clone, Debug, PartialEq, Eq)]
35pub(crate) struct Placeholder {
36 /// The name of this placeholder. e.g. for "$a", this would be "a"
37 pub(crate) ident: SmolStr,
38 /// A unique name used in place of this placeholder when we parse the pattern as Rust code.
39 stand_in_name: String,
40 pub(crate) constraints: Vec<Constraint>,
41}
42
43#[derive(Clone, Debug, PartialEq, Eq)]
44pub(crate) enum Constraint {
45 Kind(NodeKind),
46 Not(Box<Constraint>),
47}
48
49#[derive(Clone, Debug, PartialEq, Eq)]
50pub(crate) enum NodeKind {
51 Literal,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq)]
55pub(crate) struct Token {
56 kind: SyntaxKind,
57 pub(crate) text: SmolStr,
58}
59
60impl ParsedRule {
61 fn new(
62 pattern: &RawPattern,
63 template: Option<&RawPattern>,
64 ) -> Result<Vec<ParsedRule>, SsrError> {
65 let raw_pattern = pattern.as_rust_code();
66 let raw_template = template.map(|t| t.as_rust_code());
67 let raw_template = raw_template.as_ref().map(|s| s.as_str());
68 let mut builder = RuleBuilder {
69 placeholders_by_stand_in: pattern.placeholders_by_stand_in(),
70 rules: Vec::new(),
71 };
72 builder.try_add(ast::Expr::parse(&raw_pattern), raw_template.map(ast::Expr::parse));
73 builder.try_add(ast::Type::parse(&raw_pattern), raw_template.map(ast::Type::parse));
74 builder.try_add(ast::Item::parse(&raw_pattern), raw_template.map(ast::Item::parse));
75 builder.try_add(ast::Path::parse(&raw_pattern), raw_template.map(ast::Path::parse));
76 builder.try_add(ast::Pat::parse(&raw_pattern), raw_template.map(ast::Pat::parse));
77 builder.build()
78 }
79}
80
81struct RuleBuilder {
82 placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
83 rules: Vec<ParsedRule>,
84}
85
86impl RuleBuilder {
87 fn try_add<T: AstNode>(&mut self, pattern: Result<T, ()>, template: Option<Result<T, ()>>) {
88 match (pattern, template) {
89 (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule {
90 placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
91 pattern: pattern.syntax().clone(),
92 template: Some(template.syntax().clone()),
93 }),
94 (Ok(pattern), None) => self.rules.push(ParsedRule {
95 placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
96 pattern: pattern.syntax().clone(),
97 template: None,
98 }),
99 _ => {}
100 }
101 }
102
103 fn build(mut self) -> Result<Vec<ParsedRule>, SsrError> {
104 if self.rules.is_empty() {
105 bail!("Not a valid Rust expression, type, item, path or pattern");
106 }
107 // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a
108 // mix leads to strange semantics, since the path-based rules only match things where the
109 // path refers to semantically the same thing, whereas the non-path-based rules could match
110 // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the
111 // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a
112 // pattern (IDENT_PAT -> NAME -> IDENT). Allowing such a rule through would result in
113 // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd
114 // have to use the slow-scan search mechanism.
115 if self.rules.iter().any(|rule| contains_path(&rule.pattern)) {
116 let old_len = self.rules.len();
117 self.rules.retain(|rule| contains_path(&rule.pattern));
118 if self.rules.len() < old_len {
119 mark::hit!(pattern_is_a_single_segment_path);
120 }
121 }
122 Ok(self.rules)
123 }
124}
125
126/// Returns whether there are any paths in `node`.
127fn contains_path(node: &SyntaxNode) -> bool {
128 node.kind() == SyntaxKind::PATH
129 || node.descendants().any(|node| node.kind() == SyntaxKind::PATH)
130}
131
132impl FromStr for SsrRule {
133 type Err = SsrError;
134
135 fn from_str(query: &str) -> Result<SsrRule, SsrError> {
136 let mut it = query.split("==>>");
137 let pattern = it.next().expect("at least empty string").trim();
138 let template = it
139 .next()
140 .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))?
141 .trim()
142 .to_string();
143 if it.next().is_some() {
144 return Err(SsrError("More than one delimiter found".into()));
145 }
146 let raw_pattern = pattern.parse()?;
147 let raw_template = template.parse()?;
148 let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?;
149 let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules };
150 validate_rule(&rule)?;
151 Ok(rule)
152 }
153}
154
155impl FromStr for RawPattern {
156 type Err = SsrError;
157
158 fn from_str(pattern_str: &str) -> Result<RawPattern, SsrError> {
159 Ok(RawPattern { tokens: parse_pattern(pattern_str)? })
160 }
161}
162
163impl RawPattern {
164 /// Returns this search pattern as Rust source code that we can feed to the Rust parser.
165 fn as_rust_code(&self) -> String {
166 let mut res = String::new();
167 for t in &self.tokens {
168 res.push_str(match t {
169 PatternElement::Token(token) => token.text.as_str(),
170 PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(),
171 });
172 }
173 res
174 }
175
176 pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> {
177 let mut res = FxHashMap::default();
178 for t in &self.tokens {
179 if let PatternElement::Placeholder(placeholder) = t {
180 res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone());
181 }
182 }
183 res
184 }
185}
186
187impl FromStr for SsrPattern {
188 type Err = SsrError;
189
190 fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> {
191 let raw_pattern = pattern_str.parse()?;
192 let parsed_rules = ParsedRule::new(&raw_pattern, None)?;
193 Ok(SsrPattern { raw: raw_pattern, parsed_rules })
194 }
195}
196
197/// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true,
198/// then any whitespace tokens will be removed, which we do for the search pattern, but not for the
199/// replace pattern.
200fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> {
201 let mut res = Vec::new();
202 let mut placeholder_names = FxHashSet::default();
203 let mut tokens = tokenize(pattern_str)?.into_iter();
204 while let Some(token) = tokens.next() {
205 if token.kind == T![$] {
206 let placeholder = parse_placeholder(&mut tokens)?;
207 if !placeholder_names.insert(placeholder.ident.clone()) {
208 bail!("Name `{}` repeats more than once", placeholder.ident);
209 }
210 res.push(PatternElement::Placeholder(placeholder));
211 } else {
212 res.push(PatternElement::Token(token));
213 }
214 }
215 Ok(res)
216}
217
218/// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search
219/// pattern didn't define.
220fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> {
221 let mut defined_placeholders = FxHashSet::default();
222 for p in &rule.pattern.tokens {
223 if let PatternElement::Placeholder(placeholder) = p {
224 defined_placeholders.insert(&placeholder.ident);
225 }
226 }
227 let mut undefined = Vec::new();
228 for p in &rule.template.tokens {
229 if let PatternElement::Placeholder(placeholder) = p {
230 if !defined_placeholders.contains(&placeholder.ident) {
231 undefined.push(format!("${}", placeholder.ident));
232 }
233 if !placeholder.constraints.is_empty() {
234 bail!("Replacement placeholders cannot have constraints");
235 }
236 }
237 }
238 if !undefined.is_empty() {
239 bail!("Replacement contains undefined placeholders: {}", undefined.join(", "));
240 }
241 Ok(())
242}
243
244fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> {
245 let mut start = 0;
246 let (raw_tokens, errors) = syntax::tokenize(source);
247 if let Some(first_error) = errors.first() {
248 bail!("Failed to parse pattern: {}", first_error);
249 }
250 let mut tokens: Vec<Token> = Vec::new();
251 for raw_token in raw_tokens {
252 let token_len = usize::from(raw_token.len);
253 tokens.push(Token {
254 kind: raw_token.kind,
255 text: SmolStr::new(&source[start..start + token_len]),
256 });
257 start += token_len;
258 }
259 Ok(tokens)
260}
261
262fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> {
263 let mut name = None;
264 let mut constraints = Vec::new();
265 if let Some(token) = tokens.next() {
266 match token.kind {
267 SyntaxKind::IDENT => {
268 name = Some(token.text);
269 }
270 T!['{'] => {
271 let token =
272 tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?;
273 if token.kind == SyntaxKind::IDENT {
274 name = Some(token.text);
275 }
276 loop {
277 let token = tokens
278 .next()
279 .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?;
280 match token.kind {
281 T![:] => {
282 constraints.push(parse_constraint(tokens)?);
283 }
284 T!['}'] => break,
285 _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text),
286 }
287 }
288 }
289 _ => {
290 bail!("Placeholders should either be $name or ${{name:constraints}}");
291 }
292 }
293 }
294 let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?;
295 Ok(Placeholder::new(name, constraints))
296}
297
298fn parse_constraint(tokens: &mut std::vec::IntoIter<Token>) -> Result<Constraint, SsrError> {
299 let constraint_type = tokens
300 .next()
301 .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))?
302 .text
303 .to_string();
304 match constraint_type.as_str() {
305 "kind" => {
306 expect_token(tokens, "(")?;
307 let t = tokens.next().ok_or_else(|| {
308 SsrError::new("Unexpected end of constraint while looking for kind")
309 })?;
310 if t.kind != SyntaxKind::IDENT {
311 bail!("Expected ident, found {:?} while parsing kind constraint", t.kind);
312 }
313 expect_token(tokens, ")")?;
314 Ok(Constraint::Kind(NodeKind::from(&t.text)?))
315 }
316 "not" => {
317 expect_token(tokens, "(")?;
318 let sub = parse_constraint(tokens)?;
319 expect_token(tokens, ")")?;
320 Ok(Constraint::Not(Box::new(sub)))
321 }
322 x => bail!("Unsupported constraint type '{}'", x),
323 }
324}
325
326fn expect_token(tokens: &mut std::vec::IntoIter<Token>, expected: &str) -> Result<(), SsrError> {
327 if let Some(t) = tokens.next() {
328 if t.text == expected {
329 return Ok(());
330 }
331 bail!("Expected {} found {}", expected, t.text);
332 }
333 bail!("Expected {} found end of stream", expected);
334}
335
336impl NodeKind {
337 fn from(name: &SmolStr) -> Result<NodeKind, SsrError> {
338 Ok(match name.as_str() {
339 "literal" => NodeKind::Literal,
340 _ => bail!("Unknown node kind '{}'", name),
341 })
342 }
343}
344
345impl Placeholder {
346 fn new(name: SmolStr, constraints: Vec<Constraint>) -> Self {
347 Self { stand_in_name: format!("__placeholder_{}", name), constraints, ident: name }
348 }
349}
350
351#[cfg(test)]
352mod tests {
353 use super::*;
354
355 #[test]
356 fn parser_happy_case() {
357 fn token(kind: SyntaxKind, text: &str) -> PatternElement {
358 PatternElement::Token(Token { kind, text: SmolStr::new(text) })
359 }
360 fn placeholder(name: &str) -> PatternElement {
361 PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new()))
362 }
363 let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap();
364 assert_eq!(
365 result.pattern.tokens,
366 vec![
367 token(SyntaxKind::IDENT, "foo"),
368 token(T!['('], "("),
369 placeholder("a"),
370 token(T![,], ","),
371 token(SyntaxKind::WHITESPACE, " "),
372 placeholder("b"),
373 token(T![')'], ")"),
374 ]
375 );
376 assert_eq!(
377 result.template.tokens,
378 vec![
379 token(SyntaxKind::IDENT, "bar"),
380 token(T!['('], "("),
381 placeholder("b"),
382 token(T![,], ","),
383 token(SyntaxKind::WHITESPACE, " "),
384 placeholder("a"),
385 token(T![')'], ")"),
386 ]
387 );
388 }
389}