diff options
Diffstat (limited to 'crates/ra_ssr/src/parsing.rs')
-rw-r--r-- | crates/ra_ssr/src/parsing.rs | 259 |
1 files changed, 188 insertions, 71 deletions
diff --git a/crates/ra_ssr/src/parsing.rs b/crates/ra_ssr/src/parsing.rs index 1ae166d19..78e03f394 100644 --- a/crates/ra_ssr/src/parsing.rs +++ b/crates/ra_ssr/src/parsing.rs | |||
@@ -5,24 +5,22 @@ | |||
5 | //! search patterns, we go further and parse the pattern as each kind of thing that we can match. | 5 | //! search patterns, we go further and parse the pattern as each kind of thing that we can match. |
6 | //! e.g. expressions, type references etc. | 6 | //! e.g. expressions, type references etc. |
7 | 7 | ||
8 | use crate::errors::bail; | ||
8 | use crate::{SsrError, SsrPattern, SsrRule}; | 9 | use crate::{SsrError, SsrPattern, SsrRule}; |
9 | use ra_syntax::{ast, AstNode, SmolStr, SyntaxKind}; | 10 | use ra_syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, T}; |
10 | use rustc_hash::{FxHashMap, FxHashSet}; | 11 | use rustc_hash::{FxHashMap, FxHashSet}; |
11 | use std::str::FromStr; | 12 | use std::str::FromStr; |
13 | use test_utils::mark; | ||
12 | 14 | ||
13 | /// Returns from the current function with an error, supplied by arguments as for format! | 15 | #[derive(Debug)] |
14 | macro_rules! bail { | 16 | pub(crate) struct ParsedRule { |
15 | ($e:expr) => {return Err($crate::SsrError::new($e))}; | 17 | pub(crate) placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>, |
16 | ($fmt:expr, $($arg:tt)+) => {return Err($crate::SsrError::new(format!($fmt, $($arg)+)))} | 18 | pub(crate) pattern: SyntaxNode, |
17 | } | 19 | pub(crate) template: Option<SyntaxNode>, |
18 | |||
19 | #[derive(Clone, Debug)] | ||
20 | pub(crate) struct SsrTemplate { | ||
21 | pub(crate) tokens: Vec<PatternElement>, | ||
22 | } | 20 | } |
23 | 21 | ||
24 | #[derive(Debug)] | 22 | #[derive(Debug)] |
25 | pub(crate) struct RawSearchPattern { | 23 | pub(crate) struct RawPattern { |
26 | tokens: Vec<PatternElement>, | 24 | tokens: Vec<PatternElement>, |
27 | } | 25 | } |
28 | 26 | ||
@@ -39,6 +37,18 @@ pub(crate) struct Placeholder { | |||
39 | pub(crate) ident: SmolStr, | 37 | pub(crate) ident: SmolStr, |
40 | /// A unique name used in place of this placeholder when we parse the pattern as Rust code. | 38 | /// A unique name used in place of this placeholder when we parse the pattern as Rust code. |
41 | stand_in_name: String, | 39 | stand_in_name: String, |
40 | pub(crate) constraints: Vec<Constraint>, | ||
41 | } | ||
42 | |||
43 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
44 | pub(crate) enum Constraint { | ||
45 | Kind(NodeKind), | ||
46 | Not(Box<Constraint>), | ||
47 | } | ||
48 | |||
49 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
50 | pub(crate) enum NodeKind { | ||
51 | Literal, | ||
42 | } | 52 | } |
43 | 53 | ||
44 | #[derive(Debug, Clone, PartialEq, Eq)] | 54 | #[derive(Debug, Clone, PartialEq, Eq)] |
@@ -47,6 +57,78 @@ pub(crate) struct Token { | |||
47 | pub(crate) text: SmolStr, | 57 | pub(crate) text: SmolStr, |
48 | } | 58 | } |
49 | 59 | ||
60 | impl ParsedRule { | ||
61 | fn new( | ||
62 | pattern: &RawPattern, | ||
63 | template: Option<&RawPattern>, | ||
64 | ) -> Result<Vec<ParsedRule>, SsrError> { | ||
65 | let raw_pattern = pattern.as_rust_code(); | ||
66 | let raw_template = template.map(|t| t.as_rust_code()); | ||
67 | let raw_template = raw_template.as_ref().map(|s| s.as_str()); | ||
68 | let mut builder = RuleBuilder { | ||
69 | placeholders_by_stand_in: pattern.placeholders_by_stand_in(), | ||
70 | rules: Vec::new(), | ||
71 | }; | ||
72 | builder.try_add(ast::Expr::parse(&raw_pattern), raw_template.map(ast::Expr::parse)); | ||
73 | builder.try_add(ast::TypeRef::parse(&raw_pattern), raw_template.map(ast::TypeRef::parse)); | ||
74 | builder.try_add(ast::Item::parse(&raw_pattern), raw_template.map(ast::Item::parse)); | ||
75 | builder.try_add(ast::Path::parse(&raw_pattern), raw_template.map(ast::Path::parse)); | ||
76 | builder.try_add(ast::Pat::parse(&raw_pattern), raw_template.map(ast::Pat::parse)); | ||
77 | builder.build() | ||
78 | } | ||
79 | } | ||
80 | |||
81 | struct RuleBuilder { | ||
82 | placeholders_by_stand_in: FxHashMap<SmolStr, Placeholder>, | ||
83 | rules: Vec<ParsedRule>, | ||
84 | } | ||
85 | |||
86 | impl RuleBuilder { | ||
87 | fn try_add<T: AstNode>(&mut self, pattern: Result<T, ()>, template: Option<Result<T, ()>>) { | ||
88 | match (pattern, template) { | ||
89 | (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule { | ||
90 | placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), | ||
91 | pattern: pattern.syntax().clone(), | ||
92 | template: Some(template.syntax().clone()), | ||
93 | }), | ||
94 | (Ok(pattern), None) => self.rules.push(ParsedRule { | ||
95 | placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), | ||
96 | pattern: pattern.syntax().clone(), | ||
97 | template: None, | ||
98 | }), | ||
99 | _ => {} | ||
100 | } | ||
101 | } | ||
102 | |||
103 | fn build(mut self) -> Result<Vec<ParsedRule>, SsrError> { | ||
104 | if self.rules.is_empty() { | ||
105 | bail!("Not a valid Rust expression, type, item, path or pattern"); | ||
106 | } | ||
107 | // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a | ||
108 | // mix leads to strange semantics, since the path-based rules only match things where the | ||
109 | // path refers to semantically the same thing, whereas the non-path-based rules could match | ||
110 | // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the | ||
111 | // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a | ||
112 | // pattern (BIND_PAT -> NAME -> IDENT). Allowing such a rule through would result in | ||
113 | // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd | ||
114 | // have to use the slow-scan search mechanism. | ||
115 | if self.rules.iter().any(|rule| contains_path(&rule.pattern)) { | ||
116 | let old_len = self.rules.len(); | ||
117 | self.rules.retain(|rule| contains_path(&rule.pattern)); | ||
118 | if self.rules.len() < old_len { | ||
119 | mark::hit!(pattern_is_a_single_segment_path); | ||
120 | } | ||
121 | } | ||
122 | Ok(self.rules) | ||
123 | } | ||
124 | } | ||
125 | |||
126 | /// Returns whether there are any paths in `node`. | ||
127 | fn contains_path(node: &SyntaxNode) -> bool { | ||
128 | node.kind() == SyntaxKind::PATH | ||
129 | || node.descendants().any(|node| node.kind() == SyntaxKind::PATH) | ||
130 | } | ||
131 | |||
50 | impl FromStr for SsrRule { | 132 | impl FromStr for SsrRule { |
51 | type Err = SsrError; | 133 | type Err = SsrError; |
52 | 134 | ||
@@ -55,27 +137,30 @@ impl FromStr for SsrRule { | |||
55 | let pattern = it.next().expect("at least empty string").trim(); | 137 | let pattern = it.next().expect("at least empty string").trim(); |
56 | let template = it | 138 | let template = it |
57 | .next() | 139 | .next() |
58 | .ok_or_else(|| SsrError("Cannot find delemiter `==>>`".into()))? | 140 | .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))? |
59 | .trim() | 141 | .trim() |
60 | .to_string(); | 142 | .to_string(); |
61 | if it.next().is_some() { | 143 | if it.next().is_some() { |
62 | return Err(SsrError("More than one delimiter found".into())); | 144 | return Err(SsrError("More than one delimiter found".into())); |
63 | } | 145 | } |
64 | let rule = SsrRule { pattern: pattern.parse()?, template: template.parse()? }; | 146 | let raw_pattern = pattern.parse()?; |
147 | let raw_template = template.parse()?; | ||
148 | let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?; | ||
149 | let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules }; | ||
65 | validate_rule(&rule)?; | 150 | validate_rule(&rule)?; |
66 | Ok(rule) | 151 | Ok(rule) |
67 | } | 152 | } |
68 | } | 153 | } |
69 | 154 | ||
70 | impl FromStr for RawSearchPattern { | 155 | impl FromStr for RawPattern { |
71 | type Err = SsrError; | 156 | type Err = SsrError; |
72 | 157 | ||
73 | fn from_str(pattern_str: &str) -> Result<RawSearchPattern, SsrError> { | 158 | fn from_str(pattern_str: &str) -> Result<RawPattern, SsrError> { |
74 | Ok(RawSearchPattern { tokens: parse_pattern(pattern_str)? }) | 159 | Ok(RawPattern { tokens: parse_pattern(pattern_str)? }) |
75 | } | 160 | } |
76 | } | 161 | } |
77 | 162 | ||
78 | impl RawSearchPattern { | 163 | impl RawPattern { |
79 | /// Returns this search pattern as Rust source code that we can feed to the Rust parser. | 164 | /// Returns this search pattern as Rust source code that we can feed to the Rust parser. |
80 | fn as_rust_code(&self) -> String { | 165 | fn as_rust_code(&self) -> String { |
81 | let mut res = String::new(); | 166 | let mut res = String::new(); |
@@ -88,7 +173,7 @@ impl RawSearchPattern { | |||
88 | res | 173 | res |
89 | } | 174 | } |
90 | 175 | ||
91 | fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> { | 176 | pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> { |
92 | let mut res = FxHashMap::default(); | 177 | let mut res = FxHashMap::default(); |
93 | for t in &self.tokens { | 178 | for t in &self.tokens { |
94 | if let PatternElement::Placeholder(placeholder) = t { | 179 | if let PatternElement::Placeholder(placeholder) = t { |
@@ -103,41 +188,9 @@ impl FromStr for SsrPattern { | |||
103 | type Err = SsrError; | 188 | type Err = SsrError; |
104 | 189 | ||
105 | fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> { | 190 | fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> { |
106 | let raw: RawSearchPattern = pattern_str.parse()?; | 191 | let raw_pattern = pattern_str.parse()?; |
107 | let raw_str = raw.as_rust_code(); | 192 | let parsed_rules = ParsedRule::new(&raw_pattern, None)?; |
108 | let res = SsrPattern { | 193 | Ok(SsrPattern { raw: raw_pattern, parsed_rules }) |
109 | expr: ast::Expr::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
110 | type_ref: ast::TypeRef::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
111 | item: ast::ModuleItem::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
112 | path: ast::Path::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
113 | pattern: ast::Pat::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
114 | placeholders_by_stand_in: raw.placeholders_by_stand_in(), | ||
115 | raw, | ||
116 | }; | ||
117 | if res.expr.is_none() | ||
118 | && res.type_ref.is_none() | ||
119 | && res.item.is_none() | ||
120 | && res.path.is_none() | ||
121 | && res.pattern.is_none() | ||
122 | { | ||
123 | bail!("Pattern is not a valid Rust expression, type, item, path or pattern"); | ||
124 | } | ||
125 | Ok(res) | ||
126 | } | ||
127 | } | ||
128 | |||
129 | impl FromStr for SsrTemplate { | ||
130 | type Err = SsrError; | ||
131 | |||
132 | fn from_str(pattern_str: &str) -> Result<SsrTemplate, SsrError> { | ||
133 | let tokens = parse_pattern(pattern_str)?; | ||
134 | // Validate that the template is a valid fragment of Rust code. We reuse the validation | ||
135 | // logic for search patterns since the only thing that differs is the error message. | ||
136 | if SsrPattern::from_str(pattern_str).is_err() { | ||
137 | bail!("Replacement is not a valid Rust expression, type, item, path or pattern"); | ||
138 | } | ||
139 | // Our actual template needs to preserve whitespace, so we can't reuse `tokens`. | ||
140 | Ok(SsrTemplate { tokens }) | ||
141 | } | 194 | } |
142 | } | 195 | } |
143 | 196 | ||
@@ -149,7 +202,7 @@ fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> { | |||
149 | let mut placeholder_names = FxHashSet::default(); | 202 | let mut placeholder_names = FxHashSet::default(); |
150 | let mut tokens = tokenize(pattern_str)?.into_iter(); | 203 | let mut tokens = tokenize(pattern_str)?.into_iter(); |
151 | while let Some(token) = tokens.next() { | 204 | while let Some(token) = tokens.next() { |
152 | if token.kind == SyntaxKind::DOLLAR { | 205 | if token.kind == T![$] { |
153 | let placeholder = parse_placeholder(&mut tokens)?; | 206 | let placeholder = parse_placeholder(&mut tokens)?; |
154 | if !placeholder_names.insert(placeholder.ident.clone()) { | 207 | if !placeholder_names.insert(placeholder.ident.clone()) { |
155 | bail!("Name `{}` repeats more than once", placeholder.ident); | 208 | bail!("Name `{}` repeats more than once", placeholder.ident); |
@@ -166,7 +219,7 @@ fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> { | |||
166 | /// pattern didn't define. | 219 | /// pattern didn't define. |
167 | fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { | 220 | fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { |
168 | let mut defined_placeholders = FxHashSet::default(); | 221 | let mut defined_placeholders = FxHashSet::default(); |
169 | for p in &rule.pattern.raw.tokens { | 222 | for p in &rule.pattern.tokens { |
170 | if let PatternElement::Placeholder(placeholder) = p { | 223 | if let PatternElement::Placeholder(placeholder) = p { |
171 | defined_placeholders.insert(&placeholder.ident); | 224 | defined_placeholders.insert(&placeholder.ident); |
172 | } | 225 | } |
@@ -177,6 +230,9 @@ fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { | |||
177 | if !defined_placeholders.contains(&placeholder.ident) { | 230 | if !defined_placeholders.contains(&placeholder.ident) { |
178 | undefined.push(format!("${}", placeholder.ident)); | 231 | undefined.push(format!("${}", placeholder.ident)); |
179 | } | 232 | } |
233 | if !placeholder.constraints.is_empty() { | ||
234 | bail!("Replacement placeholders cannot have constraints"); | ||
235 | } | ||
180 | } | 236 | } |
181 | } | 237 | } |
182 | if !undefined.is_empty() { | 238 | if !undefined.is_empty() { |
@@ -205,29 +261,90 @@ fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> { | |||
205 | 261 | ||
206 | fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> { | 262 | fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> { |
207 | let mut name = None; | 263 | let mut name = None; |
264 | let mut constraints = Vec::new(); | ||
208 | if let Some(token) = tokens.next() { | 265 | if let Some(token) = tokens.next() { |
209 | match token.kind { | 266 | match token.kind { |
210 | SyntaxKind::IDENT => { | 267 | SyntaxKind::IDENT => { |
211 | name = Some(token.text); | 268 | name = Some(token.text); |
212 | } | 269 | } |
270 | T!['{'] => { | ||
271 | let token = | ||
272 | tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?; | ||
273 | if token.kind == SyntaxKind::IDENT { | ||
274 | name = Some(token.text); | ||
275 | } | ||
276 | loop { | ||
277 | let token = tokens | ||
278 | .next() | ||
279 | .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?; | ||
280 | match token.kind { | ||
281 | T![:] => { | ||
282 | constraints.push(parse_constraint(tokens)?); | ||
283 | } | ||
284 | T!['}'] => break, | ||
285 | _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text), | ||
286 | } | ||
287 | } | ||
288 | } | ||
213 | _ => { | 289 | _ => { |
214 | bail!("Placeholders should be $name"); | 290 | bail!("Placeholders should either be $name or ${{name:constraints}}"); |
215 | } | 291 | } |
216 | } | 292 | } |
217 | } | 293 | } |
218 | let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?; | 294 | let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?; |
219 | Ok(Placeholder::new(name)) | 295 | Ok(Placeholder::new(name, constraints)) |
220 | } | 296 | } |
221 | 297 | ||
222 | impl Placeholder { | 298 | fn parse_constraint(tokens: &mut std::vec::IntoIter<Token>) -> Result<Constraint, SsrError> { |
223 | fn new(name: SmolStr) -> Self { | 299 | let constraint_type = tokens |
224 | Self { stand_in_name: format!("__placeholder_{}", name), ident: name } | 300 | .next() |
301 | .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))? | ||
302 | .text | ||
303 | .to_string(); | ||
304 | match constraint_type.as_str() { | ||
305 | "kind" => { | ||
306 | expect_token(tokens, "(")?; | ||
307 | let t = tokens.next().ok_or_else(|| { | ||
308 | SsrError::new("Unexpected end of constraint while looking for kind") | ||
309 | })?; | ||
310 | if t.kind != SyntaxKind::IDENT { | ||
311 | bail!("Expected ident, found {:?} while parsing kind constraint", t.kind); | ||
312 | } | ||
313 | expect_token(tokens, ")")?; | ||
314 | Ok(Constraint::Kind(NodeKind::from(&t.text)?)) | ||
315 | } | ||
316 | "not" => { | ||
317 | expect_token(tokens, "(")?; | ||
318 | let sub = parse_constraint(tokens)?; | ||
319 | expect_token(tokens, ")")?; | ||
320 | Ok(Constraint::Not(Box::new(sub))) | ||
321 | } | ||
322 | x => bail!("Unsupported constraint type '{}'", x), | ||
323 | } | ||
324 | } | ||
325 | |||
326 | fn expect_token(tokens: &mut std::vec::IntoIter<Token>, expected: &str) -> Result<(), SsrError> { | ||
327 | if let Some(t) = tokens.next() { | ||
328 | if t.text == expected { | ||
329 | return Ok(()); | ||
330 | } | ||
331 | bail!("Expected {} found {}", expected, t.text); | ||
332 | } | ||
333 | bail!("Expected {} found end of stream", expected); | ||
334 | } | ||
335 | |||
336 | impl NodeKind { | ||
337 | fn from(name: &SmolStr) -> Result<NodeKind, SsrError> { | ||
338 | Ok(match name.as_str() { | ||
339 | "literal" => NodeKind::Literal, | ||
340 | _ => bail!("Unknown node kind '{}'", name), | ||
341 | }) | ||
225 | } | 342 | } |
226 | } | 343 | } |
227 | 344 | ||
228 | impl SsrError { | 345 | impl Placeholder { |
229 | fn new(message: impl Into<String>) -> SsrError { | 346 | fn new(name: SmolStr, constraints: Vec<Constraint>) -> Self { |
230 | SsrError(message.into()) | 347 | Self { stand_in_name: format!("__placeholder_{}", name), constraints, ident: name } |
231 | } | 348 | } |
232 | } | 349 | } |
233 | 350 | ||
@@ -241,31 +358,31 @@ mod tests { | |||
241 | PatternElement::Token(Token { kind, text: SmolStr::new(text) }) | 358 | PatternElement::Token(Token { kind, text: SmolStr::new(text) }) |
242 | } | 359 | } |
243 | fn placeholder(name: &str) -> PatternElement { | 360 | fn placeholder(name: &str) -> PatternElement { |
244 | PatternElement::Placeholder(Placeholder::new(SmolStr::new(name))) | 361 | PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new())) |
245 | } | 362 | } |
246 | let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap(); | 363 | let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap(); |
247 | assert_eq!( | 364 | assert_eq!( |
248 | result.pattern.raw.tokens, | 365 | result.pattern.tokens, |
249 | vec![ | 366 | vec![ |
250 | token(SyntaxKind::IDENT, "foo"), | 367 | token(SyntaxKind::IDENT, "foo"), |
251 | token(SyntaxKind::L_PAREN, "("), | 368 | token(T!['('], "("), |
252 | placeholder("a"), | 369 | placeholder("a"), |
253 | token(SyntaxKind::COMMA, ","), | 370 | token(T![,], ","), |
254 | token(SyntaxKind::WHITESPACE, " "), | 371 | token(SyntaxKind::WHITESPACE, " "), |
255 | placeholder("b"), | 372 | placeholder("b"), |
256 | token(SyntaxKind::R_PAREN, ")"), | 373 | token(T![')'], ")"), |
257 | ] | 374 | ] |
258 | ); | 375 | ); |
259 | assert_eq!( | 376 | assert_eq!( |
260 | result.template.tokens, | 377 | result.template.tokens, |
261 | vec![ | 378 | vec![ |
262 | token(SyntaxKind::IDENT, "bar"), | 379 | token(SyntaxKind::IDENT, "bar"), |
263 | token(SyntaxKind::L_PAREN, "("), | 380 | token(T!['('], "("), |
264 | placeholder("b"), | 381 | placeholder("b"), |
265 | token(SyntaxKind::COMMA, ","), | 382 | token(T![,], ","), |
266 | token(SyntaxKind::WHITESPACE, " "), | 383 | token(SyntaxKind::WHITESPACE, " "), |
267 | placeholder("a"), | 384 | placeholder("a"), |
268 | token(SyntaxKind::R_PAREN, ")"), | 385 | token(T![')'], ")"), |
269 | ] | 386 | ] |
270 | ); | 387 | ); |
271 | } | 388 | } |