diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2019-10-23 16:57:47 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2019-10-23 16:57:47 +0100 |
commit | 2197205885f43441f14861f34449426295397dd9 (patch) | |
tree | 3af21aaefe1efdabafeb5702959e1094504813e7 /xtask/src/codegen | |
parent | edf4d8e555c6847fb9e6e61d727c4def11789bfc (diff) | |
parent | 6048d294009f0f58593747e0870aa174e29a32af (diff) |
Merge #2050
2050: xtask: don't depend on itertools r=matklad a=matklad
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'xtask/src/codegen')
-rw-r--r-- | xtask/src/codegen/gen_parser_tests.rs | 155 | ||||
-rw-r--r-- | xtask/src/codegen/gen_syntax.rs | 354 |
2 files changed, 509 insertions, 0 deletions
diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs new file mode 100644 index 000000000..0f550d948 --- /dev/null +++ b/xtask/src/codegen/gen_parser_tests.rs | |||
@@ -0,0 +1,155 @@ | |||
1 | //! This module greps parser's code for specially formatted comments and turnes | ||
2 | //! them into tests. | ||
3 | |||
4 | use std::{ | ||
5 | collections::HashMap, | ||
6 | fs, | ||
7 | path::{Path, PathBuf}, | ||
8 | }; | ||
9 | |||
10 | use crate::{ | ||
11 | codegen::{self, update, Mode}, | ||
12 | project_root, Result, | ||
13 | }; | ||
14 | |||
15 | pub fn generate_parser_tests(mode: Mode) -> Result<()> { | ||
16 | let tests = tests_from_dir(&project_root().join(Path::new(codegen::GRAMMAR_DIR)))?; | ||
17 | fn install_tests(tests: &HashMap<String, Test>, into: &str, mode: Mode) -> Result<()> { | ||
18 | let tests_dir = project_root().join(into); | ||
19 | if !tests_dir.is_dir() { | ||
20 | fs::create_dir_all(&tests_dir)?; | ||
21 | } | ||
22 | // ok is never actually read, but it needs to be specified to create a Test in existing_tests | ||
23 | let existing = existing_tests(&tests_dir, true)?; | ||
24 | for t in existing.keys().filter(|&t| !tests.contains_key(t)) { | ||
25 | panic!("Test is deleted: {}", t); | ||
26 | } | ||
27 | |||
28 | let mut new_idx = existing.len() + 1; | ||
29 | for (name, test) in tests { | ||
30 | let path = match existing.get(name) { | ||
31 | Some((path, _test)) => path.clone(), | ||
32 | None => { | ||
33 | let file_name = format!("{:04}_{}.rs", new_idx, name); | ||
34 | new_idx += 1; | ||
35 | tests_dir.join(file_name) | ||
36 | } | ||
37 | }; | ||
38 | update(&path, &test.text, mode)?; | ||
39 | } | ||
40 | Ok(()) | ||
41 | } | ||
42 | install_tests(&tests.ok, codegen::OK_INLINE_TESTS_DIR, mode)?; | ||
43 | install_tests(&tests.err, codegen::ERR_INLINE_TESTS_DIR, mode) | ||
44 | } | ||
45 | |||
46 | #[derive(Debug)] | ||
47 | struct Test { | ||
48 | pub name: String, | ||
49 | pub text: String, | ||
50 | pub ok: bool, | ||
51 | } | ||
52 | |||
53 | #[derive(Default, Debug)] | ||
54 | struct Tests { | ||
55 | pub ok: HashMap<String, Test>, | ||
56 | pub err: HashMap<String, Test>, | ||
57 | } | ||
58 | |||
59 | fn collect_tests(s: &str) -> Vec<(usize, Test)> { | ||
60 | let mut res = vec![]; | ||
61 | let prefix = "// "; | ||
62 | let lines = s.lines().map(str::trim_start).enumerate(); | ||
63 | |||
64 | let mut block = vec![]; | ||
65 | for (line_idx, line) in lines { | ||
66 | let is_comment = line.starts_with(prefix); | ||
67 | if is_comment { | ||
68 | block.push((line_idx, &line[prefix.len()..])); | ||
69 | } else { | ||
70 | process_block(&mut res, &block); | ||
71 | block.clear(); | ||
72 | } | ||
73 | } | ||
74 | process_block(&mut res, &block); | ||
75 | return res; | ||
76 | |||
77 | fn process_block(acc: &mut Vec<(usize, Test)>, block: &[(usize, &str)]) { | ||
78 | if block.is_empty() { | ||
79 | return; | ||
80 | } | ||
81 | let mut ok = true; | ||
82 | let mut block = block.iter(); | ||
83 | let (start_line, name) = loop { | ||
84 | match block.next() { | ||
85 | Some(&(idx, line)) if line.starts_with("test ") => { | ||
86 | break (idx, line["test ".len()..].to_string()); | ||
87 | } | ||
88 | Some(&(idx, line)) if line.starts_with("test_err ") => { | ||
89 | ok = false; | ||
90 | break (idx, line["test_err ".len()..].to_string()); | ||
91 | } | ||
92 | Some(_) => (), | ||
93 | None => return, | ||
94 | } | ||
95 | }; | ||
96 | let text: String = | ||
97 | block.map(|(_, line)| *line).chain(std::iter::once("")).collect::<Vec<_>>().join("\n"); | ||
98 | assert!(!text.trim().is_empty() && text.ends_with('\n')); | ||
99 | acc.push((start_line, Test { name, text, ok })) | ||
100 | } | ||
101 | } | ||
102 | |||
103 | fn tests_from_dir(dir: &Path) -> Result<Tests> { | ||
104 | let mut res = Tests::default(); | ||
105 | for entry in ::walkdir::WalkDir::new(dir) { | ||
106 | let entry = entry.unwrap(); | ||
107 | if !entry.file_type().is_file() { | ||
108 | continue; | ||
109 | } | ||
110 | if entry.path().extension().unwrap_or_default() != "rs" { | ||
111 | continue; | ||
112 | } | ||
113 | process_file(&mut res, entry.path())?; | ||
114 | } | ||
115 | let grammar_rs = dir.parent().unwrap().join("grammar.rs"); | ||
116 | process_file(&mut res, &grammar_rs)?; | ||
117 | return Ok(res); | ||
118 | fn process_file(res: &mut Tests, path: &Path) -> Result<()> { | ||
119 | let text = fs::read_to_string(path)?; | ||
120 | |||
121 | for (_, test) in collect_tests(&text) { | ||
122 | if test.ok { | ||
123 | if let Some(old_test) = res.ok.insert(test.name.clone(), test) { | ||
124 | Err(format!("Duplicate test: {}", old_test.name))? | ||
125 | } | ||
126 | } else { | ||
127 | if let Some(old_test) = res.err.insert(test.name.clone(), test) { | ||
128 | Err(format!("Duplicate test: {}", old_test.name))? | ||
129 | } | ||
130 | } | ||
131 | } | ||
132 | Ok(()) | ||
133 | } | ||
134 | } | ||
135 | |||
136 | fn existing_tests(dir: &Path, ok: bool) -> Result<HashMap<String, (PathBuf, Test)>> { | ||
137 | let mut res = HashMap::new(); | ||
138 | for file in fs::read_dir(dir)? { | ||
139 | let file = file?; | ||
140 | let path = file.path(); | ||
141 | if path.extension().unwrap_or_default() != "rs" { | ||
142 | continue; | ||
143 | } | ||
144 | let name = { | ||
145 | let file_name = path.file_name().unwrap().to_str().unwrap(); | ||
146 | file_name[5..file_name.len() - 3].to_string() | ||
147 | }; | ||
148 | let text = fs::read_to_string(&path)?; | ||
149 | let test = Test { name: name.clone(), text, ok }; | ||
150 | if let Some(old) = res.insert(name, (path, test)) { | ||
151 | println!("Duplicate test: {:?}", old); | ||
152 | } | ||
153 | } | ||
154 | Ok(res) | ||
155 | } | ||
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs new file mode 100644 index 000000000..6a81c0e4d --- /dev/null +++ b/xtask/src/codegen/gen_syntax.rs | |||
@@ -0,0 +1,354 @@ | |||
1 | //! This module generate AST datatype used by rust-analyzer. | ||
2 | //! | ||
3 | //! Specifically, it generates the `SyntaxKind` enum and a number of newtype | ||
4 | //! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. | ||
5 | |||
6 | use std::{ | ||
7 | collections::BTreeMap, | ||
8 | fs, | ||
9 | io::Write, | ||
10 | process::{Command, Stdio}, | ||
11 | }; | ||
12 | |||
13 | use proc_macro2::{Punct, Spacing}; | ||
14 | use quote::{format_ident, quote}; | ||
15 | use ron; | ||
16 | use serde::Deserialize; | ||
17 | |||
18 | use crate::{ | ||
19 | codegen::{self, update, Mode}, | ||
20 | project_root, Result, | ||
21 | }; | ||
22 | |||
23 | pub fn generate_syntax(mode: Mode) -> Result<()> { | ||
24 | let grammar = project_root().join(codegen::GRAMMAR); | ||
25 | let grammar: Grammar = { | ||
26 | let text = fs::read_to_string(grammar)?; | ||
27 | ron::de::from_str(&text)? | ||
28 | }; | ||
29 | |||
30 | let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); | ||
31 | let syntax_kinds = generate_syntax_kinds(&grammar)?; | ||
32 | update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; | ||
33 | |||
34 | let ast_file = project_root().join(codegen::AST); | ||
35 | let ast = generate_ast(&grammar)?; | ||
36 | update(ast_file.as_path(), &ast, mode)?; | ||
37 | |||
38 | Ok(()) | ||
39 | } | ||
40 | |||
41 | fn generate_ast(grammar: &Grammar) -> Result<String> { | ||
42 | let nodes = grammar.ast.iter().map(|(name, ast_node)| { | ||
43 | let variants = | ||
44 | ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>(); | ||
45 | let name = format_ident!("{}", name); | ||
46 | |||
47 | let adt = if variants.is_empty() { | ||
48 | let kind = format_ident!("{}", to_upper_snake_case(&name.to_string())); | ||
49 | quote! { | ||
50 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
51 | pub struct #name { | ||
52 | pub(crate) syntax: SyntaxNode, | ||
53 | } | ||
54 | |||
55 | impl AstNode for #name { | ||
56 | fn can_cast(kind: SyntaxKind) -> bool { | ||
57 | match kind { | ||
58 | #kind => true, | ||
59 | _ => false, | ||
60 | } | ||
61 | } | ||
62 | fn cast(syntax: SyntaxNode) -> Option<Self> { | ||
63 | if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } | ||
64 | } | ||
65 | fn syntax(&self) -> &SyntaxNode { &self.syntax } | ||
66 | } | ||
67 | } | ||
68 | } else { | ||
69 | let kinds = variants | ||
70 | .iter() | ||
71 | .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string()))) | ||
72 | .collect::<Vec<_>>(); | ||
73 | |||
74 | quote! { | ||
75 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
76 | pub enum #name { | ||
77 | #(#variants(#variants),)* | ||
78 | } | ||
79 | |||
80 | #( | ||
81 | impl From<#variants> for #name { | ||
82 | fn from(node: #variants) -> #name { | ||
83 | #name::#variants(node) | ||
84 | } | ||
85 | } | ||
86 | )* | ||
87 | |||
88 | impl AstNode for #name { | ||
89 | fn can_cast(kind: SyntaxKind) -> bool { | ||
90 | match kind { | ||
91 | #(#kinds)|* => true, | ||
92 | _ => false, | ||
93 | } | ||
94 | } | ||
95 | fn cast(syntax: SyntaxNode) -> Option<Self> { | ||
96 | let res = match syntax.kind() { | ||
97 | #( | ||
98 | #kinds => #name::#variants(#variants { syntax }), | ||
99 | )* | ||
100 | _ => return None, | ||
101 | }; | ||
102 | Some(res) | ||
103 | } | ||
104 | fn syntax(&self) -> &SyntaxNode { | ||
105 | match self { | ||
106 | #( | ||
107 | #name::#variants(it) => &it.syntax, | ||
108 | )* | ||
109 | } | ||
110 | } | ||
111 | } | ||
112 | } | ||
113 | }; | ||
114 | |||
115 | let traits = ast_node.traits.iter().map(|trait_name| { | ||
116 | let trait_name = format_ident!("{}", trait_name); | ||
117 | quote!(impl ast::#trait_name for #name {}) | ||
118 | }); | ||
119 | |||
120 | let collections = ast_node.collections.iter().map(|(name, kind)| { | ||
121 | let method_name = format_ident!("{}", name); | ||
122 | let kind = format_ident!("{}", kind); | ||
123 | quote! { | ||
124 | pub fn #method_name(&self) -> AstChildren<#kind> { | ||
125 | AstChildren::new(&self.syntax) | ||
126 | } | ||
127 | } | ||
128 | }); | ||
129 | |||
130 | let options = ast_node.options.iter().map(|attr| { | ||
131 | let method_name = match attr { | ||
132 | Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)), | ||
133 | Attr::NameType(n, _) => format_ident!("{}", n), | ||
134 | }; | ||
135 | let ty = match attr { | ||
136 | Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t), | ||
137 | }; | ||
138 | quote! { | ||
139 | pub fn #method_name(&self) -> Option<#ty> { | ||
140 | AstChildren::new(&self.syntax).next() | ||
141 | } | ||
142 | } | ||
143 | }); | ||
144 | |||
145 | quote! { | ||
146 | #adt | ||
147 | |||
148 | #(#traits)* | ||
149 | |||
150 | impl #name { | ||
151 | #(#collections)* | ||
152 | #(#options)* | ||
153 | } | ||
154 | } | ||
155 | }); | ||
156 | |||
157 | let ast = quote! { | ||
158 | use crate::{ | ||
159 | SyntaxNode, SyntaxKind::{self, *}, | ||
160 | ast::{self, AstNode, AstChildren}, | ||
161 | }; | ||
162 | |||
163 | #(#nodes)* | ||
164 | }; | ||
165 | |||
166 | let pretty = reformat(ast)?; | ||
167 | Ok(pretty) | ||
168 | } | ||
169 | |||
170 | fn generate_syntax_kinds(grammar: &Grammar) -> Result<String> { | ||
171 | let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar | ||
172 | .punct | ||
173 | .iter() | ||
174 | .filter(|(token, _name)| token.len() == 1) | ||
175 | .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name))) | ||
176 | .unzip(); | ||
177 | |||
178 | let punctuation_values = grammar.punct.iter().map(|(token, _name)| { | ||
179 | if "{}[]()".contains(token) { | ||
180 | let c = token.chars().next().unwrap(); | ||
181 | quote! { #c } | ||
182 | } else { | ||
183 | let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint)); | ||
184 | quote! { #(#cs)* } | ||
185 | } | ||
186 | }); | ||
187 | let punctuation = | ||
188 | grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>(); | ||
189 | |||
190 | let full_keywords_values = &grammar.keywords; | ||
191 | let full_keywords = | ||
192 | full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw))); | ||
193 | |||
194 | let all_keywords_values = | ||
195 | grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>(); | ||
196 | let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw)); | ||
197 | let all_keywords = all_keywords_values | ||
198 | .iter() | ||
199 | .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name))) | ||
200 | .collect::<Vec<_>>(); | ||
201 | |||
202 | let literals = | ||
203 | grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>(); | ||
204 | |||
205 | let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>(); | ||
206 | |||
207 | let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>(); | ||
208 | |||
209 | let ast = quote! { | ||
210 | #![allow(bad_style, missing_docs, unreachable_pub)] | ||
211 | /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. | ||
212 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] | ||
213 | #[repr(u16)] | ||
214 | pub enum SyntaxKind { | ||
215 | // Technical SyntaxKinds: they appear temporally during parsing, | ||
216 | // but never end up in the final tree | ||
217 | #[doc(hidden)] | ||
218 | TOMBSTONE, | ||
219 | #[doc(hidden)] | ||
220 | EOF, | ||
221 | #(#punctuation,)* | ||
222 | #(#all_keywords,)* | ||
223 | #(#literals,)* | ||
224 | #(#tokens,)* | ||
225 | #(#nodes,)* | ||
226 | |||
227 | // Technical kind so that we can cast from u16 safely | ||
228 | #[doc(hidden)] | ||
229 | __LAST, | ||
230 | } | ||
231 | use self::SyntaxKind::*; | ||
232 | |||
233 | impl SyntaxKind { | ||
234 | pub fn is_keyword(self) -> bool { | ||
235 | match self { | ||
236 | #(#all_keywords)|* => true, | ||
237 | _ => false, | ||
238 | } | ||
239 | } | ||
240 | |||
241 | pub fn is_punct(self) -> bool { | ||
242 | match self { | ||
243 | #(#punctuation)|* => true, | ||
244 | _ => false, | ||
245 | } | ||
246 | } | ||
247 | |||
248 | pub fn is_literal(self) -> bool { | ||
249 | match self { | ||
250 | #(#literals)|* => true, | ||
251 | _ => false, | ||
252 | } | ||
253 | } | ||
254 | |||
255 | pub fn from_keyword(ident: &str) -> Option<SyntaxKind> { | ||
256 | let kw = match ident { | ||
257 | #(#full_keywords_values => #full_keywords,)* | ||
258 | _ => return None, | ||
259 | }; | ||
260 | Some(kw) | ||
261 | } | ||
262 | |||
263 | pub fn from_char(c: char) -> Option<SyntaxKind> { | ||
264 | let tok = match c { | ||
265 | #(#single_byte_tokens_values => #single_byte_tokens,)* | ||
266 | _ => return None, | ||
267 | }; | ||
268 | Some(tok) | ||
269 | } | ||
270 | } | ||
271 | |||
272 | #[macro_export] | ||
273 | macro_rules! T { | ||
274 | #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)* | ||
275 | #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)* | ||
276 | } | ||
277 | }; | ||
278 | |||
279 | reformat(ast) | ||
280 | } | ||
281 | |||
282 | fn reformat(text: impl std::fmt::Display) -> Result<String> { | ||
283 | let mut rustfmt = Command::new("rustfmt") | ||
284 | .arg("--config-path") | ||
285 | .arg(project_root().join("rustfmt.toml")) | ||
286 | .stdin(Stdio::piped()) | ||
287 | .stdout(Stdio::piped()) | ||
288 | .spawn()?; | ||
289 | write!(rustfmt.stdin.take().unwrap(), "{}", text)?; | ||
290 | let output = rustfmt.wait_with_output()?; | ||
291 | let stdout = String::from_utf8(output.stdout)?; | ||
292 | let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`"; | ||
293 | Ok(format!("//! {}\n\n{}", preamble, stdout)) | ||
294 | } | ||
295 | |||
296 | #[derive(Deserialize, Debug)] | ||
297 | struct Grammar { | ||
298 | punct: Vec<(String, String)>, | ||
299 | keywords: Vec<String>, | ||
300 | contextual_keywords: Vec<String>, | ||
301 | literals: Vec<String>, | ||
302 | tokens: Vec<String>, | ||
303 | nodes: Vec<String>, | ||
304 | ast: BTreeMap<String, AstNode>, | ||
305 | } | ||
306 | |||
307 | #[derive(Deserialize, Debug)] | ||
308 | struct AstNode { | ||
309 | #[serde(default)] | ||
310 | #[serde(rename = "enum")] | ||
311 | variants: Vec<String>, | ||
312 | |||
313 | #[serde(default)] | ||
314 | traits: Vec<String>, | ||
315 | #[serde(default)] | ||
316 | collections: Vec<(String, String)>, | ||
317 | #[serde(default)] | ||
318 | options: Vec<Attr>, | ||
319 | } | ||
320 | |||
321 | #[derive(Deserialize, Debug)] | ||
322 | #[serde(untagged)] | ||
323 | enum Attr { | ||
324 | Type(String), | ||
325 | NameType(String, String), | ||
326 | } | ||
327 | |||
328 | fn to_upper_snake_case(s: &str) -> String { | ||
329 | let mut buf = String::with_capacity(s.len()); | ||
330 | let mut prev_is_upper = None; | ||
331 | for c in s.chars() { | ||
332 | if c.is_ascii_uppercase() && prev_is_upper == Some(false) { | ||
333 | buf.push('_') | ||
334 | } | ||
335 | prev_is_upper = Some(c.is_ascii_uppercase()); | ||
336 | |||
337 | buf.push(c.to_ascii_uppercase()); | ||
338 | } | ||
339 | buf | ||
340 | } | ||
341 | |||
342 | fn to_lower_snake_case(s: &str) -> String { | ||
343 | let mut buf = String::with_capacity(s.len()); | ||
344 | let mut prev_is_upper = None; | ||
345 | for c in s.chars() { | ||
346 | if c.is_ascii_uppercase() && prev_is_upper == Some(false) { | ||
347 | buf.push('_') | ||
348 | } | ||
349 | prev_is_upper = Some(c.is_ascii_uppercase()); | ||
350 | |||
351 | buf.push(c.to_ascii_lowercase()); | ||
352 | } | ||
353 | buf | ||
354 | } | ||