diff options
Diffstat (limited to 'crates/ra_ssr/src/parsing.rs')
-rw-r--r-- | crates/ra_ssr/src/parsing.rs | 272 |
1 files changed, 272 insertions, 0 deletions
diff --git a/crates/ra_ssr/src/parsing.rs b/crates/ra_ssr/src/parsing.rs new file mode 100644 index 000000000..90c13dbc2 --- /dev/null +++ b/crates/ra_ssr/src/parsing.rs | |||
@@ -0,0 +1,272 @@ | |||
1 | //! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`. | ||
2 | //! We first split everything before and after the separator `==>>`. Next, both the search pattern | ||
3 | //! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for | ||
4 | //! placeholders, which start with `$`. For replacement templates, this is the final form. For | ||
5 | //! search patterns, we go further and parse the pattern as each kind of thing that we can match. | ||
6 | //! e.g. expressions, type references etc. | ||
7 | |||
8 | use crate::{SsrError, SsrPattern, SsrRule}; | ||
9 | use ra_syntax::{ast, AstNode, SmolStr, SyntaxKind}; | ||
10 | use rustc_hash::{FxHashMap, FxHashSet}; | ||
11 | use std::str::FromStr; | ||
12 | |||
13 | /// Returns from the current function with an error, supplied by arguments as for format! | ||
14 | macro_rules! bail { | ||
15 | ($e:expr) => {return Err($crate::SsrError::new($e))}; | ||
16 | ($fmt:expr, $($arg:tt)+) => {return Err($crate::SsrError::new(format!($fmt, $($arg)+)))} | ||
17 | } | ||
18 | |||
19 | #[derive(Clone, Debug)] | ||
20 | pub(crate) struct SsrTemplate { | ||
21 | pub(crate) tokens: Vec<PatternElement>, | ||
22 | } | ||
23 | |||
24 | #[derive(Debug)] | ||
25 | pub(crate) struct RawSearchPattern { | ||
26 | tokens: Vec<PatternElement>, | ||
27 | } | ||
28 | |||
29 | // Part of a search or replace pattern. | ||
30 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
31 | pub(crate) enum PatternElement { | ||
32 | Token(Token), | ||
33 | Placeholder(Placeholder), | ||
34 | } | ||
35 | |||
36 | #[derive(Clone, Debug, PartialEq, Eq)] | ||
37 | pub(crate) struct Placeholder { | ||
38 | /// The name of this placeholder. e.g. for "$a", this would be "a" | ||
39 | pub(crate) ident: SmolStr, | ||
40 | /// A unique name used in place of this placeholder when we parse the pattern as Rust code. | ||
41 | stand_in_name: String, | ||
42 | } | ||
43 | |||
44 | #[derive(Debug, Clone, PartialEq, Eq)] | ||
45 | pub(crate) struct Token { | ||
46 | kind: SyntaxKind, | ||
47 | pub(crate) text: SmolStr, | ||
48 | } | ||
49 | |||
50 | impl FromStr for SsrRule { | ||
51 | type Err = SsrError; | ||
52 | |||
53 | fn from_str(query: &str) -> Result<SsrRule, SsrError> { | ||
54 | let mut it = query.split("==>>"); | ||
55 | let pattern = it.next().expect("at least empty string").trim(); | ||
56 | let template = it | ||
57 | .next() | ||
58 | .ok_or_else(|| SsrError("Cannot find delemiter `==>>`".into()))? | ||
59 | .trim() | ||
60 | .to_string(); | ||
61 | if it.next().is_some() { | ||
62 | return Err(SsrError("More than one delimiter found".into())); | ||
63 | } | ||
64 | let rule = SsrRule { pattern: pattern.parse()?, template: template.parse()? }; | ||
65 | validate_rule(&rule)?; | ||
66 | Ok(rule) | ||
67 | } | ||
68 | } | ||
69 | |||
70 | impl FromStr for RawSearchPattern { | ||
71 | type Err = SsrError; | ||
72 | |||
73 | fn from_str(pattern_str: &str) -> Result<RawSearchPattern, SsrError> { | ||
74 | Ok(RawSearchPattern { tokens: parse_pattern(pattern_str)? }) | ||
75 | } | ||
76 | } | ||
77 | |||
78 | impl RawSearchPattern { | ||
79 | /// Returns this search pattern as Rust source code that we can feed to the Rust parser. | ||
80 | fn as_rust_code(&self) -> String { | ||
81 | let mut res = String::new(); | ||
82 | for t in &self.tokens { | ||
83 | res.push_str(match t { | ||
84 | PatternElement::Token(token) => token.text.as_str(), | ||
85 | PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(), | ||
86 | }); | ||
87 | } | ||
88 | res | ||
89 | } | ||
90 | |||
91 | fn placeholders_by_stand_in(&self) -> FxHashMap<SmolStr, Placeholder> { | ||
92 | let mut res = FxHashMap::default(); | ||
93 | for t in &self.tokens { | ||
94 | if let PatternElement::Placeholder(placeholder) = t { | ||
95 | res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone()); | ||
96 | } | ||
97 | } | ||
98 | res | ||
99 | } | ||
100 | } | ||
101 | |||
102 | impl FromStr for SsrPattern { | ||
103 | type Err = SsrError; | ||
104 | |||
105 | fn from_str(pattern_str: &str) -> Result<SsrPattern, SsrError> { | ||
106 | let raw: RawSearchPattern = pattern_str.parse()?; | ||
107 | let raw_str = raw.as_rust_code(); | ||
108 | let res = SsrPattern { | ||
109 | expr: ast::Expr::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
110 | type_ref: ast::TypeRef::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
111 | item: ast::ModuleItem::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
112 | path: ast::Path::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
113 | pattern: ast::Pat::parse(&raw_str).ok().map(|n| n.syntax().clone()), | ||
114 | placeholders_by_stand_in: raw.placeholders_by_stand_in(), | ||
115 | raw, | ||
116 | }; | ||
117 | if res.expr.is_none() | ||
118 | && res.type_ref.is_none() | ||
119 | && res.item.is_none() | ||
120 | && res.path.is_none() | ||
121 | && res.pattern.is_none() | ||
122 | { | ||
123 | bail!("Pattern is not a valid Rust expression, type, item, path or pattern"); | ||
124 | } | ||
125 | Ok(res) | ||
126 | } | ||
127 | } | ||
128 | |||
129 | impl FromStr for SsrTemplate { | ||
130 | type Err = SsrError; | ||
131 | |||
132 | fn from_str(pattern_str: &str) -> Result<SsrTemplate, SsrError> { | ||
133 | let tokens = parse_pattern(pattern_str)?; | ||
134 | // Validate that the template is a valid fragment of Rust code. We reuse the validation | ||
135 | // logic for search patterns since the only thing that differs is the error message. | ||
136 | if SsrPattern::from_str(pattern_str).is_err() { | ||
137 | bail!("Replacement is not a valid Rust expression, type, item, path or pattern"); | ||
138 | } | ||
139 | // Our actual template needs to preserve whitespace, so we can't reuse `tokens`. | ||
140 | Ok(SsrTemplate { tokens }) | ||
141 | } | ||
142 | } | ||
143 | |||
144 | /// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true, | ||
145 | /// then any whitespace tokens will be removed, which we do for the search pattern, but not for the | ||
146 | /// replace pattern. | ||
147 | fn parse_pattern(pattern_str: &str) -> Result<Vec<PatternElement>, SsrError> { | ||
148 | let mut res = Vec::new(); | ||
149 | let mut placeholder_names = FxHashSet::default(); | ||
150 | let mut tokens = tokenize(pattern_str)?.into_iter(); | ||
151 | while let Some(token) = tokens.next() { | ||
152 | if token.kind == SyntaxKind::DOLLAR { | ||
153 | let placeholder = parse_placeholder(&mut tokens)?; | ||
154 | if !placeholder_names.insert(placeholder.ident.clone()) { | ||
155 | bail!("Name `{}` repeats more than once", placeholder.ident); | ||
156 | } | ||
157 | res.push(PatternElement::Placeholder(placeholder)); | ||
158 | } else { | ||
159 | res.push(PatternElement::Token(token)); | ||
160 | } | ||
161 | } | ||
162 | Ok(res) | ||
163 | } | ||
164 | |||
165 | /// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search | ||
166 | /// pattern didn't define. | ||
167 | fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { | ||
168 | let mut defined_placeholders = std::collections::HashSet::new(); | ||
169 | for p in &rule.pattern.raw.tokens { | ||
170 | if let PatternElement::Placeholder(placeholder) = p { | ||
171 | defined_placeholders.insert(&placeholder.ident); | ||
172 | } | ||
173 | } | ||
174 | let mut undefined = Vec::new(); | ||
175 | for p in &rule.template.tokens { | ||
176 | if let PatternElement::Placeholder(placeholder) = p { | ||
177 | if !defined_placeholders.contains(&placeholder.ident) { | ||
178 | undefined.push(format!("${}", placeholder.ident)); | ||
179 | } | ||
180 | } | ||
181 | } | ||
182 | if !undefined.is_empty() { | ||
183 | bail!("Replacement contains undefined placeholders: {}", undefined.join(", ")); | ||
184 | } | ||
185 | Ok(()) | ||
186 | } | ||
187 | |||
188 | fn tokenize(source: &str) -> Result<Vec<Token>, SsrError> { | ||
189 | let mut start = 0; | ||
190 | let (raw_tokens, errors) = ra_syntax::tokenize(source); | ||
191 | if let Some(first_error) = errors.first() { | ||
192 | bail!("Failed to parse pattern: {}", first_error); | ||
193 | } | ||
194 | let mut tokens: Vec<Token> = Vec::new(); | ||
195 | for raw_token in raw_tokens { | ||
196 | let token_len = usize::from(raw_token.len); | ||
197 | tokens.push(Token { | ||
198 | kind: raw_token.kind, | ||
199 | text: SmolStr::new(&source[start..start + token_len]), | ||
200 | }); | ||
201 | start += token_len; | ||
202 | } | ||
203 | Ok(tokens) | ||
204 | } | ||
205 | |||
206 | fn parse_placeholder(tokens: &mut std::vec::IntoIter<Token>) -> Result<Placeholder, SsrError> { | ||
207 | let mut name = None; | ||
208 | if let Some(token) = tokens.next() { | ||
209 | match token.kind { | ||
210 | SyntaxKind::IDENT => { | ||
211 | name = Some(token.text); | ||
212 | } | ||
213 | _ => { | ||
214 | bail!("Placeholders should be $name"); | ||
215 | } | ||
216 | } | ||
217 | } | ||
218 | let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?; | ||
219 | Ok(Placeholder::new(name)) | ||
220 | } | ||
221 | |||
222 | impl Placeholder { | ||
223 | fn new(name: SmolStr) -> Self { | ||
224 | Self { stand_in_name: format!("__placeholder_{}", name), ident: name } | ||
225 | } | ||
226 | } | ||
227 | |||
228 | impl SsrError { | ||
229 | fn new(message: impl Into<String>) -> SsrError { | ||
230 | SsrError(message.into()) | ||
231 | } | ||
232 | } | ||
233 | |||
234 | #[cfg(test)] | ||
235 | mod tests { | ||
236 | use super::*; | ||
237 | |||
238 | #[test] | ||
239 | fn parser_happy_case() { | ||
240 | fn token(kind: SyntaxKind, text: &str) -> PatternElement { | ||
241 | PatternElement::Token(Token { kind, text: SmolStr::new(text) }) | ||
242 | } | ||
243 | fn placeholder(name: &str) -> PatternElement { | ||
244 | PatternElement::Placeholder(Placeholder::new(SmolStr::new(name))) | ||
245 | } | ||
246 | let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap(); | ||
247 | assert_eq!( | ||
248 | result.pattern.raw.tokens, | ||
249 | vec![ | ||
250 | token(SyntaxKind::IDENT, "foo"), | ||
251 | token(SyntaxKind::L_PAREN, "("), | ||
252 | placeholder("a"), | ||
253 | token(SyntaxKind::COMMA, ","), | ||
254 | token(SyntaxKind::WHITESPACE, " "), | ||
255 | placeholder("b"), | ||
256 | token(SyntaxKind::R_PAREN, ")"), | ||
257 | ] | ||
258 | ); | ||
259 | assert_eq!( | ||
260 | result.template.tokens, | ||
261 | vec![ | ||
262 | token(SyntaxKind::IDENT, "bar"), | ||
263 | token(SyntaxKind::L_PAREN, "("), | ||
264 | placeholder("b"), | ||
265 | token(SyntaxKind::COMMA, ","), | ||
266 | token(SyntaxKind::WHITESPACE, " "), | ||
267 | placeholder("a"), | ||
268 | token(SyntaxKind::R_PAREN, ")"), | ||
269 | ] | ||
270 | ); | ||
271 | } | ||
272 | } | ||