aboutsummaryrefslogtreecommitdiff
path: root/xtask/src/codegen
diff options
context:
space:
mode:
Diffstat (limited to 'xtask/src/codegen')
-rw-r--r--xtask/src/codegen/gen_parser_tests.rs155
-rw-r--r--xtask/src/codegen/gen_syntax.rs354
2 files changed, 509 insertions, 0 deletions
diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs
new file mode 100644
index 000000000..0f550d948
--- /dev/null
+++ b/xtask/src/codegen/gen_parser_tests.rs
@@ -0,0 +1,155 @@
1//! This module greps parser's code for specially formatted comments and turnes
2//! them into tests.
3
4use std::{
5 collections::HashMap,
6 fs,
7 path::{Path, PathBuf},
8};
9
10use crate::{
11 codegen::{self, update, Mode},
12 project_root, Result,
13};
14
15pub fn generate_parser_tests(mode: Mode) -> Result<()> {
16 let tests = tests_from_dir(&project_root().join(Path::new(codegen::GRAMMAR_DIR)))?;
17 fn install_tests(tests: &HashMap<String, Test>, into: &str, mode: Mode) -> Result<()> {
18 let tests_dir = project_root().join(into);
19 if !tests_dir.is_dir() {
20 fs::create_dir_all(&tests_dir)?;
21 }
22 // ok is never actually read, but it needs to be specified to create a Test in existing_tests
23 let existing = existing_tests(&tests_dir, true)?;
24 for t in existing.keys().filter(|&t| !tests.contains_key(t)) {
25 panic!("Test is deleted: {}", t);
26 }
27
28 let mut new_idx = existing.len() + 1;
29 for (name, test) in tests {
30 let path = match existing.get(name) {
31 Some((path, _test)) => path.clone(),
32 None => {
33 let file_name = format!("{:04}_{}.rs", new_idx, name);
34 new_idx += 1;
35 tests_dir.join(file_name)
36 }
37 };
38 update(&path, &test.text, mode)?;
39 }
40 Ok(())
41 }
42 install_tests(&tests.ok, codegen::OK_INLINE_TESTS_DIR, mode)?;
43 install_tests(&tests.err, codegen::ERR_INLINE_TESTS_DIR, mode)
44}
45
46#[derive(Debug)]
47struct Test {
48 pub name: String,
49 pub text: String,
50 pub ok: bool,
51}
52
53#[derive(Default, Debug)]
54struct Tests {
55 pub ok: HashMap<String, Test>,
56 pub err: HashMap<String, Test>,
57}
58
59fn collect_tests(s: &str) -> Vec<(usize, Test)> {
60 let mut res = vec![];
61 let prefix = "// ";
62 let lines = s.lines().map(str::trim_start).enumerate();
63
64 let mut block = vec![];
65 for (line_idx, line) in lines {
66 let is_comment = line.starts_with(prefix);
67 if is_comment {
68 block.push((line_idx, &line[prefix.len()..]));
69 } else {
70 process_block(&mut res, &block);
71 block.clear();
72 }
73 }
74 process_block(&mut res, &block);
75 return res;
76
77 fn process_block(acc: &mut Vec<(usize, Test)>, block: &[(usize, &str)]) {
78 if block.is_empty() {
79 return;
80 }
81 let mut ok = true;
82 let mut block = block.iter();
83 let (start_line, name) = loop {
84 match block.next() {
85 Some(&(idx, line)) if line.starts_with("test ") => {
86 break (idx, line["test ".len()..].to_string());
87 }
88 Some(&(idx, line)) if line.starts_with("test_err ") => {
89 ok = false;
90 break (idx, line["test_err ".len()..].to_string());
91 }
92 Some(_) => (),
93 None => return,
94 }
95 };
96 let text: String =
97 block.map(|(_, line)| *line).chain(std::iter::once("")).collect::<Vec<_>>().join("\n");
98 assert!(!text.trim().is_empty() && text.ends_with('\n'));
99 acc.push((start_line, Test { name, text, ok }))
100 }
101}
102
103fn tests_from_dir(dir: &Path) -> Result<Tests> {
104 let mut res = Tests::default();
105 for entry in ::walkdir::WalkDir::new(dir) {
106 let entry = entry.unwrap();
107 if !entry.file_type().is_file() {
108 continue;
109 }
110 if entry.path().extension().unwrap_or_default() != "rs" {
111 continue;
112 }
113 process_file(&mut res, entry.path())?;
114 }
115 let grammar_rs = dir.parent().unwrap().join("grammar.rs");
116 process_file(&mut res, &grammar_rs)?;
117 return Ok(res);
118 fn process_file(res: &mut Tests, path: &Path) -> Result<()> {
119 let text = fs::read_to_string(path)?;
120
121 for (_, test) in collect_tests(&text) {
122 if test.ok {
123 if let Some(old_test) = res.ok.insert(test.name.clone(), test) {
124 Err(format!("Duplicate test: {}", old_test.name))?
125 }
126 } else {
127 if let Some(old_test) = res.err.insert(test.name.clone(), test) {
128 Err(format!("Duplicate test: {}", old_test.name))?
129 }
130 }
131 }
132 Ok(())
133 }
134}
135
136fn existing_tests(dir: &Path, ok: bool) -> Result<HashMap<String, (PathBuf, Test)>> {
137 let mut res = HashMap::new();
138 for file in fs::read_dir(dir)? {
139 let file = file?;
140 let path = file.path();
141 if path.extension().unwrap_or_default() != "rs" {
142 continue;
143 }
144 let name = {
145 let file_name = path.file_name().unwrap().to_str().unwrap();
146 file_name[5..file_name.len() - 3].to_string()
147 };
148 let text = fs::read_to_string(&path)?;
149 let test = Test { name: name.clone(), text, ok };
150 if let Some(old) = res.insert(name, (path, test)) {
151 println!("Duplicate test: {:?}", old);
152 }
153 }
154 Ok(res)
155}
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
new file mode 100644
index 000000000..6a81c0e4d
--- /dev/null
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -0,0 +1,354 @@
1//! This module generate AST datatype used by rust-analyzer.
2//!
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
5
6use std::{
7 collections::BTreeMap,
8 fs,
9 io::Write,
10 process::{Command, Stdio},
11};
12
13use proc_macro2::{Punct, Spacing};
14use quote::{format_ident, quote};
15use ron;
16use serde::Deserialize;
17
18use crate::{
19 codegen::{self, update, Mode},
20 project_root, Result,
21};
22
23pub fn generate_syntax(mode: Mode) -> Result<()> {
24 let grammar = project_root().join(codegen::GRAMMAR);
25 let grammar: Grammar = {
26 let text = fs::read_to_string(grammar)?;
27 ron::de::from_str(&text)?
28 };
29
30 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
31 let syntax_kinds = generate_syntax_kinds(&grammar)?;
32 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
33
34 let ast_file = project_root().join(codegen::AST);
35 let ast = generate_ast(&grammar)?;
36 update(ast_file.as_path(), &ast, mode)?;
37
38 Ok(())
39}
40
41fn generate_ast(grammar: &Grammar) -> Result<String> {
42 let nodes = grammar.ast.iter().map(|(name, ast_node)| {
43 let variants =
44 ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>();
45 let name = format_ident!("{}", name);
46
47 let adt = if variants.is_empty() {
48 let kind = format_ident!("{}", to_upper_snake_case(&name.to_string()));
49 quote! {
50 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
51 pub struct #name {
52 pub(crate) syntax: SyntaxNode,
53 }
54
55 impl AstNode for #name {
56 fn can_cast(kind: SyntaxKind) -> bool {
57 match kind {
58 #kind => true,
59 _ => false,
60 }
61 }
62 fn cast(syntax: SyntaxNode) -> Option<Self> {
63 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
64 }
65 fn syntax(&self) -> &SyntaxNode { &self.syntax }
66 }
67 }
68 } else {
69 let kinds = variants
70 .iter()
71 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
72 .collect::<Vec<_>>();
73
74 quote! {
75 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
76 pub enum #name {
77 #(#variants(#variants),)*
78 }
79
80 #(
81 impl From<#variants> for #name {
82 fn from(node: #variants) -> #name {
83 #name::#variants(node)
84 }
85 }
86 )*
87
88 impl AstNode for #name {
89 fn can_cast(kind: SyntaxKind) -> bool {
90 match kind {
91 #(#kinds)|* => true,
92 _ => false,
93 }
94 }
95 fn cast(syntax: SyntaxNode) -> Option<Self> {
96 let res = match syntax.kind() {
97 #(
98 #kinds => #name::#variants(#variants { syntax }),
99 )*
100 _ => return None,
101 };
102 Some(res)
103 }
104 fn syntax(&self) -> &SyntaxNode {
105 match self {
106 #(
107 #name::#variants(it) => &it.syntax,
108 )*
109 }
110 }
111 }
112 }
113 };
114
115 let traits = ast_node.traits.iter().map(|trait_name| {
116 let trait_name = format_ident!("{}", trait_name);
117 quote!(impl ast::#trait_name for #name {})
118 });
119
120 let collections = ast_node.collections.iter().map(|(name, kind)| {
121 let method_name = format_ident!("{}", name);
122 let kind = format_ident!("{}", kind);
123 quote! {
124 pub fn #method_name(&self) -> AstChildren<#kind> {
125 AstChildren::new(&self.syntax)
126 }
127 }
128 });
129
130 let options = ast_node.options.iter().map(|attr| {
131 let method_name = match attr {
132 Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)),
133 Attr::NameType(n, _) => format_ident!("{}", n),
134 };
135 let ty = match attr {
136 Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t),
137 };
138 quote! {
139 pub fn #method_name(&self) -> Option<#ty> {
140 AstChildren::new(&self.syntax).next()
141 }
142 }
143 });
144
145 quote! {
146 #adt
147
148 #(#traits)*
149
150 impl #name {
151 #(#collections)*
152 #(#options)*
153 }
154 }
155 });
156
157 let ast = quote! {
158 use crate::{
159 SyntaxNode, SyntaxKind::{self, *},
160 ast::{self, AstNode, AstChildren},
161 };
162
163 #(#nodes)*
164 };
165
166 let pretty = reformat(ast)?;
167 Ok(pretty)
168}
169
170fn generate_syntax_kinds(grammar: &Grammar) -> Result<String> {
171 let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
172 .punct
173 .iter()
174 .filter(|(token, _name)| token.len() == 1)
175 .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
176 .unzip();
177
178 let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
179 if "{}[]()".contains(token) {
180 let c = token.chars().next().unwrap();
181 quote! { #c }
182 } else {
183 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
184 quote! { #(#cs)* }
185 }
186 });
187 let punctuation =
188 grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
189
190 let full_keywords_values = &grammar.keywords;
191 let full_keywords =
192 full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw)));
193
194 let all_keywords_values =
195 grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
196 let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw));
197 let all_keywords = all_keywords_values
198 .iter()
199 .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name)))
200 .collect::<Vec<_>>();
201
202 let literals =
203 grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
204
205 let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
206
207 let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
208
209 let ast = quote! {
210 #![allow(bad_style, missing_docs, unreachable_pub)]
211 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
212 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
213 #[repr(u16)]
214 pub enum SyntaxKind {
215 // Technical SyntaxKinds: they appear temporally during parsing,
216 // but never end up in the final tree
217 #[doc(hidden)]
218 TOMBSTONE,
219 #[doc(hidden)]
220 EOF,
221 #(#punctuation,)*
222 #(#all_keywords,)*
223 #(#literals,)*
224 #(#tokens,)*
225 #(#nodes,)*
226
227 // Technical kind so that we can cast from u16 safely
228 #[doc(hidden)]
229 __LAST,
230 }
231 use self::SyntaxKind::*;
232
233 impl SyntaxKind {
234 pub fn is_keyword(self) -> bool {
235 match self {
236 #(#all_keywords)|* => true,
237 _ => false,
238 }
239 }
240
241 pub fn is_punct(self) -> bool {
242 match self {
243 #(#punctuation)|* => true,
244 _ => false,
245 }
246 }
247
248 pub fn is_literal(self) -> bool {
249 match self {
250 #(#literals)|* => true,
251 _ => false,
252 }
253 }
254
255 pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
256 let kw = match ident {
257 #(#full_keywords_values => #full_keywords,)*
258 _ => return None,
259 };
260 Some(kw)
261 }
262
263 pub fn from_char(c: char) -> Option<SyntaxKind> {
264 let tok = match c {
265 #(#single_byte_tokens_values => #single_byte_tokens,)*
266 _ => return None,
267 };
268 Some(tok)
269 }
270 }
271
272 #[macro_export]
273 macro_rules! T {
274 #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)*
275 #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)*
276 }
277 };
278
279 reformat(ast)
280}
281
282fn reformat(text: impl std::fmt::Display) -> Result<String> {
283 let mut rustfmt = Command::new("rustfmt")
284 .arg("--config-path")
285 .arg(project_root().join("rustfmt.toml"))
286 .stdin(Stdio::piped())
287 .stdout(Stdio::piped())
288 .spawn()?;
289 write!(rustfmt.stdin.take().unwrap(), "{}", text)?;
290 let output = rustfmt.wait_with_output()?;
291 let stdout = String::from_utf8(output.stdout)?;
292 let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`";
293 Ok(format!("//! {}\n\n{}", preamble, stdout))
294}
295
296#[derive(Deserialize, Debug)]
297struct Grammar {
298 punct: Vec<(String, String)>,
299 keywords: Vec<String>,
300 contextual_keywords: Vec<String>,
301 literals: Vec<String>,
302 tokens: Vec<String>,
303 nodes: Vec<String>,
304 ast: BTreeMap<String, AstNode>,
305}
306
307#[derive(Deserialize, Debug)]
308struct AstNode {
309 #[serde(default)]
310 #[serde(rename = "enum")]
311 variants: Vec<String>,
312
313 #[serde(default)]
314 traits: Vec<String>,
315 #[serde(default)]
316 collections: Vec<(String, String)>,
317 #[serde(default)]
318 options: Vec<Attr>,
319}
320
321#[derive(Deserialize, Debug)]
322#[serde(untagged)]
323enum Attr {
324 Type(String),
325 NameType(String, String),
326}
327
328fn to_upper_snake_case(s: &str) -> String {
329 let mut buf = String::with_capacity(s.len());
330 let mut prev_is_upper = None;
331 for c in s.chars() {
332 if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
333 buf.push('_')
334 }
335 prev_is_upper = Some(c.is_ascii_uppercase());
336
337 buf.push(c.to_ascii_uppercase());
338 }
339 buf
340}
341
342fn to_lower_snake_case(s: &str) -> String {
343 let mut buf = String::with_capacity(s.len());
344 let mut prev_is_upper = None;
345 for c in s.chars() {
346 if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
347 buf.push('_')
348 }
349 prev_is_upper = Some(c.is_ascii_uppercase());
350
351 buf.push(c.to_ascii_lowercase());
352 }
353 buf
354}