aboutsummaryrefslogtreecommitdiff
path: root/crates/ssr/src/parsing.rs
diff options
context:
space:
mode:
authorChetan Khilosiya <[email protected]>2021-02-22 19:14:58 +0000
committerChetan Khilosiya <[email protected]>2021-02-22 19:29:16 +0000
commiteb6cfa7f157690480fca5d55c69dba3fae87ad4f (patch)
treea49a763fee848041fd607f449ad13a0b1040636e /crates/ssr/src/parsing.rs
parente4756cb4f6e66097638b9d101589358976be2ba8 (diff)
7526: Renamed create ssr to ide_ssr.
Diffstat (limited to 'crates/ssr/src/parsing.rs')
-rw-r--r--crates/ssr/src/parsing.rs414
1 files changed, 0 insertions, 414 deletions
diff --git a/crates/ssr/src/parsing.rs b/crates/ssr/src/parsing.rs
deleted file mode 100644
index 3d5e4feb7..000000000
--- a/crates/ssr/src/parsing.rs
+++ /dev/null
@@ -1,414 +0,0 @@
1//! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`.
2//! We first split everything before and after the separator `==>>`. Next, both the search pattern
3//! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for
4//! placeholders, which start with `$`. For replacement templates, this is the final form. For
5//! search patterns, we go further and parse the pattern as each kind of thing that we can match.
6//! e.g. expressions, type references etc.
7
8use crate::errors::bail;
9use crate::{SsrError, SsrPattern, SsrRule};
10use rustc_hash::{FxHashMap, FxHashSet};
11use std::{fmt::Display, str::FromStr};
12use syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, T};
13use test_utils::mark;
14
15#[derive(Debug)]
16pub(crate) struct ParsedRule {
17 pub(crate) placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
18 pub(crate) pattern: SyntaxNode,
19 pub(crate) template: Option<SyntaxNode>,
20}
21
22#[derive(Debug)]
23pub(crate) struct RawPattern {
24 tokens: Vec<PatternElement>,
25}
26
27// Part of a search or replace pattern.
28#[derive(Clone, Debug, PartialEq, Eq)]
29pub(crate) enum PatternElement {
30 Token(Token),
31 Placeholder(Placeholder),
32}
33
34#[derive(Clone, Debug, PartialEq, Eq)]
35pub(crate) struct Placeholder {
36 /// The name of this placeholder. e.g. for "$a", this would be "a"
37 pub(crate) ident: Var,
38 /// A unique name used in place of this placeholder when we parse the pattern as Rust code.
39 stand_in_name: String,
40 pub(crate) constraints: Vec<Constraint>,
41}
42
43/// Represents a `$var` in an SSR query.
44#[derive(Debug, Clone, PartialEq, Eq, Hash)]
45pub(crate) struct Var(pub(crate) String);
46
47#[derive(Clone, Debug, PartialEq, Eq)]
48pub(crate) enum Constraint {
49 Kind(NodeKind),
50 Not(Box<Constraint>),
51}
52
53#[derive(Clone, Debug, PartialEq, Eq)]
54pub(crate) enum NodeKind {
55 Literal,
56}
57
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub(crate) struct Token {
60 kind: SyntaxKind,
61 pub(crate) text: SmolStr,
62}
63
64impl ParsedRule {
65 fn new(
66 pattern: &RawPattern,
67 template: Option<&RawPattern>,
68 ) -> Result<Vec<ParsedRule>, SsrError> {
69 let raw_pattern = pattern.as_rust_code();
70 let raw_template = template.map(|t| t.as_rust_code());
71 let raw_template = raw_template.as_ref().map(|s| s.as_str());
72 let mut builder = RuleBuilder {
73 placeholders_by_stand_in: pattern.placeholders_by_stand_in(),
74 rules: Vec::new(),
75 };
76
77 let raw_template_stmt = raw_template.map(ast::Stmt::parse);
78 if let raw_template_expr @ Some(Ok(_)) = raw_template.map(ast::Expr::parse) {
79 builder.try_add(ast::Expr::parse(&raw_pattern), raw_template_expr);
80 } else {
81 builder.try_add(ast::Expr::parse(&raw_pattern), raw_template_stmt.clone());
82 }
83 builder.try_add(ast::Type::parse(&raw_pattern), raw_template.map(ast::Type::parse));
84 builder.try_add(ast::Item::parse(&raw_pattern), raw_template.map(ast::Item::parse));
85 builder.try_add(ast::Path::parse(&raw_pattern), raw_template.map(ast::Path::parse));
86 builder.try_add(ast::Pat::parse(&raw_pattern), raw_template.map(ast::Pat::parse));
87 builder.try_add(ast::Stmt::parse(&raw_pattern), raw_template_stmt);
88 builder.build()
89 }
90}
91
92struct RuleBuilder {
93 placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>,
94 rules: Vec<ParsedRule>,
95}
96
97impl RuleBuilder {
98 fn try_add<T: AstNode, T2: AstNode>(
99 &mut self,
100 pattern: Result<T, ()>,
101 template: Option<Result<T2, ()>>,
102 ) {
103 match (pattern, template) {
104 (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule {
105 placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
106 pattern: pattern.syntax().clone(),
107 template: Some(template.syntax().clone()),
108 }),
109 (Ok(pattern), None) => self.rules.push(ParsedRule {
110 placeholders_by_stand_in: self.placeholders_by_stand_in.clone(),
111 pattern: pattern.syntax().clone(),
112 template: None,
113 }),
114 _ => {}
115 }
116 }
117
118 fn build(mut self) -> Result<Vec<ParsedRule>, SsrError> {
119 if self.rules.is_empty() {
120 bail!("Not a valid Rust expression, type, item, path or pattern");
121 }
122 // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a
123 // mix leads to strange semantics, since the path-based rules only match things where the
124 // path refers to semantically the same thing, whereas the non-path-based rules could match
125 // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the
126 // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a
127 // pattern (IDENT_PAT -> NAME -> IDENT). Allowing such a rule through would result in
128 // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd
129 // have to use the slow-scan search mechanism.
130 if self.rules.iter().any(|rule| contains_path(&rule.pattern)) {
131 let old_len = self.rules.len();
132 self.rules.retain(|rule| contains_path(&rule.pattern));
133 if self.rules.len() < old_len {
134 mark::hit!(pattern_is_a_single_segment_path);
135 }
136 }
137 Ok(self.rules)
138 }
139}
140
141/// Returns whether there are any paths in `node`.
142fn contains_path(node: &SyntaxNode) -> bool {
143 node.kind() == SyntaxKind::PATH
144 || node.descendants().any(|node| node.kind() == SyntaxKind::PATH)
145}
146
147impl FromStr for SsrRule {
148 type Err = SsrError;
149
150 fn from_str(query: &str) -> Result<SsrRule, SsrError> {
151 let mut it = query.split("==>>");
152 let pattern = it.next().expect("at least empty string").trim();
153 let template = it
154 .next()
155 .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))?
156 .trim()
157 .to_string();
158 if it.next().is_some() {
159 return Err(SsrError("More than one delimiter found".into()));
160 }
161 let raw_pattern = pattern.parse()?;
162 let raw_template = template.parse()?;
163 let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?;
164 let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules };
165 validate_rule(&rule)?;
166 Ok(rule)
167 }
168}
169
170impl FromStr for RawPattern {
171 type Err = SsrError;
172
173 fn from_str(pattern_str: &str) -> Result<RawPattern, SsrError> {
174 Ok(RawPattern { tokens: parse_pattern(pattern_str)? })
175 }
176}
177
178impl RawPattern {
179 /// Returns this search pattern as Rust source code that we can feed to the Rust parser.
180 fn as_rust_code(&self) -> String {
181 let mut res = String::new();
182 for t in &self.tokens {
183 res.push_str(match t {
184 PatternElement::Token(token) => token.text.as_str(),
185 PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(),
186 });
187 }
188 res
189 }
190
191 pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> {
192 let mut res = FxHashMap::default();
193 for t in &self.tokens {
194 if let PatternElement::Placeholder(placeholder) = t {
195 res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone());
196 }
197 }
198 res
199 }
200}
201
202impl FromStr for SsrPattern {
203 type Err = SsrError;
204
205 fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> {
206 let raw_pattern = pattern_str.parse()?;
207 let parsed_rules = ParsedRule::new(&raw_pattern, None)?;
208 Ok(SsrPattern { raw: raw_pattern, parsed_rules })
209 }
210}
211
212/// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true,
213/// then any whitespace tokens will be removed, which we do for the search pattern, but not for the
214/// replace pattern.
215fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> {
216 let mut res = Vec::new();
217 let mut placeholder_names = FxHashSet::default();
218 let mut tokens = tokenize(pattern_str)?.into_iter();
219 while let Some(token) = tokens.next() {
220 if token.kind == T![$] {
221 let placeholder = parse_placeholder(&mut tokens)?;
222 if !placeholder_names.insert(placeholder.ident.clone()) {
223 bail!("Placeholder `{}` repeats more than once", placeholder.ident);
224 }
225 res.push(PatternElement::Placeholder(placeholder));
226 } else {
227 res.push(PatternElement::Token(token));
228 }
229 }
230 Ok(res)
231}
232
233/// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search
234/// pattern didn't define.
235fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> {
236 let mut defined_placeholders = FxHashSet::default();
237 for p in &rule.pattern.tokens {
238 if let PatternElement::Placeholder(placeholder) = p {
239 defined_placeholders.insert(&placeholder.ident);
240 }
241 }
242 let mut undefined = Vec::new();
243 for p in &rule.template.tokens {
244 if let PatternElement::Placeholder(placeholder) = p {
245 if !defined_placeholders.contains(&placeholder.ident) {
246 undefined.push(placeholder.ident.to_string());
247 }
248 if !placeholder.constraints.is_empty() {
249 bail!("Replacement placeholders cannot have constraints");
250 }
251 }
252 }
253 if !undefined.is_empty() {
254 bail!("Replacement contains undefined placeholders: {}", undefined.join(", "));
255 }
256 Ok(())
257}
258
259fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> {
260 let mut start = 0;
261 let (raw_tokens, errors) = syntax::tokenize(source);
262 if let Some(first_error) = errors.first() {
263 bail!("Failed to parse pattern: {}", first_error);
264 }
265 let mut tokens: Vec<Token> = Vec::new();
266 for raw_token in raw_tokens {
267 let token_len = usize::from(raw_token.len);
268 tokens.push(Token {
269 kind: raw_token.kind,
270 text: SmolStr::new(&source[start..start + token_len]),
271 });
272 start += token_len;
273 }
274 Ok(tokens)
275}
276
277fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> {
278 let mut name = None;
279 let mut constraints = Vec::new();
280 if let Some(token) = tokens.next() {
281 match token.kind {
282 SyntaxKind::IDENT => {
283 name = Some(token.text);
284 }
285 T!['{'] => {
286 let token =
287 tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?;
288 if token.kind == SyntaxKind::IDENT {
289 name = Some(token.text);
290 }
291 loop {
292 let token = tokens
293 .next()
294 .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?;
295 match token.kind {
296 T![:] => {
297 constraints.push(parse_constraint(tokens)?);
298 }
299 T!['}'] => break,
300 _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text),
301 }
302 }
303 }
304 _ => {
305 bail!("Placeholders should either be $name or ${{name:constraints}}");
306 }
307 }
308 }
309 let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?;
310 Ok(Placeholder::new(name, constraints))
311}
312
313fn parse_constraint(tokens: &mut std::vec::IntoIter<Token>) -> Result<Constraint, SsrError> {
314 let constraint_type = tokens
315 .next()
316 .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))?
317 .text
318 .to_string();
319 match constraint_type.as_str() {
320 "kind" => {
321 expect_token(tokens, "(")?;
322 let t = tokens.next().ok_or_else(|| {
323 SsrError::new("Unexpected end of constraint while looking for kind")
324 })?;
325 if t.kind != SyntaxKind::IDENT {
326 bail!("Expected ident, found {:?} while parsing kind constraint", t.kind);
327 }
328 expect_token(tokens, ")")?;
329 Ok(Constraint::Kind(NodeKind::from(&t.text)?))
330 }
331 "not" => {
332 expect_token(tokens, "(")?;
333 let sub = parse_constraint(tokens)?;
334 expect_token(tokens, ")")?;
335 Ok(Constraint::Not(Box::new(sub)))
336 }
337 x => bail!("Unsupported constraint type '{}'", x),
338 }
339}
340
341fn expect_token(tokens: &mut std::vec::IntoIter<Token>, expected: &str) -> Result<(), SsrError> {
342 if let Some(t) = tokens.next() {
343 if t.text == expected {
344 return Ok(());
345 }
346 bail!("Expected {} found {}", expected, t.text);
347 }
348 bail!("Expected {} found end of stream", expected);
349}
350
351impl NodeKind {
352 fn from(name: &SmolStr) -> Result<NodeKind, SsrError> {
353 Ok(match name.as_str() {
354 "literal" => NodeKind::Literal,
355 _ => bail!("Unknown node kind '{}'", name),
356 })
357 }
358}
359
360impl Placeholder {
361 fn new(name: SmolStr, constraints: Vec<Constraint>) -> Self {
362 Self {
363 stand_in_name: format!("__placeholder_{}", name),
364 constraints,
365 ident: Var(name.to_string()),
366 }
367 }
368}
369
370impl Display for Var {
371 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
372 write!(f, "${}", self.0)
373 }
374}
375
376#[cfg(test)]
377mod tests {
378 use super::*;
379
380 #[test]
381 fn parser_happy_case() {
382 fn token(kind: SyntaxKind, text: &str) -> PatternElement {
383 PatternElement::Token(Token { kind, text: SmolStr::new(text) })
384 }
385 fn placeholder(name: &str) -> PatternElement {
386 PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new()))
387 }
388 let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap();
389 assert_eq!(
390 result.pattern.tokens,
391 vec![
392 token(SyntaxKind::IDENT, "foo"),
393 token(T!['('], "("),
394 placeholder("a"),
395 token(T![,], ","),
396 token(SyntaxKind::WHITESPACE, " "),
397 placeholder("b"),
398 token(T![')'], ")"),
399 ]
400 );
401 assert_eq!(
402 result.template.tokens,
403 vec![
404 token(SyntaxKind::IDENT, "bar"),
405 token(T!['('], "("),
406 placeholder("b"),
407 token(T![,], ","),
408 token(SyntaxKind::WHITESPACE, " "),
409 placeholder("a"),
410 token(T![')'], ")"),
411 ]
412 );
413 }
414}