aboutsummaryrefslogtreecommitdiff
path: root/xtask/src/codegen
diff options
context:
space:
mode:
Diffstat (limited to 'xtask/src/codegen')
-rw-r--r--xtask/src/codegen/gen_parser_tests.rs150
-rw-r--r--xtask/src/codegen/gen_syntax.rs354
2 files changed, 504 insertions, 0 deletions
diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs
new file mode 100644
index 000000000..e09b6fcfe
--- /dev/null
+++ b/xtask/src/codegen/gen_parser_tests.rs
@@ -0,0 +1,150 @@
1//! This module greps parser's code for specially formatted comments and turnes
2//! them into tests.
3
4use std::{
5 collections::HashMap,
6 fs,
7 path::{Path, PathBuf},
8};
9
10use itertools::Itertools;
11
12use crate::{
13 codegen::{self, update, Mode},
14 project_root, Result,
15};
16
17pub fn generate_parser_tests(mode: Mode) -> Result<()> {
18 let tests = tests_from_dir(&project_root().join(Path::new(codegen::GRAMMAR_DIR)))?;
19 fn install_tests(tests: &HashMap<String, Test>, into: &str, mode: Mode) -> Result<()> {
20 let tests_dir = project_root().join(into);
21 if !tests_dir.is_dir() {
22 fs::create_dir_all(&tests_dir)?;
23 }
24 // ok is never actually read, but it needs to be specified to create a Test in existing_tests
25 let existing = existing_tests(&tests_dir, true)?;
26 for t in existing.keys().filter(|&t| !tests.contains_key(t)) {
27 panic!("Test is deleted: {}", t);
28 }
29
30 let mut new_idx = existing.len() + 1;
31 for (name, test) in tests {
32 let path = match existing.get(name) {
33 Some((path, _test)) => path.clone(),
34 None => {
35 let file_name = format!("{:04}_{}.rs", new_idx, name);
36 new_idx += 1;
37 tests_dir.join(file_name)
38 }
39 };
40 update(&path, &test.text, mode)?;
41 }
42 Ok(())
43 }
44 install_tests(&tests.ok, codegen::OK_INLINE_TESTS_DIR, mode)?;
45 install_tests(&tests.err, codegen::ERR_INLINE_TESTS_DIR, mode)
46}
47
48#[derive(Debug)]
49struct Test {
50 pub name: String,
51 pub text: String,
52 pub ok: bool,
53}
54
55#[derive(Default, Debug)]
56struct Tests {
57 pub ok: HashMap<String, Test>,
58 pub err: HashMap<String, Test>,
59}
60
61fn collect_tests(s: &str) -> Vec<(usize, Test)> {
62 let mut res = vec![];
63 let prefix = "// ";
64 let comment_blocks = s
65 .lines()
66 .map(str::trim_start)
67 .enumerate()
68 .group_by(|(_idx, line)| line.starts_with(prefix));
69
70 'outer: for (is_comment, block) in comment_blocks.into_iter() {
71 if !is_comment {
72 continue;
73 }
74 let mut block = block.map(|(idx, line)| (idx, &line[prefix.len()..]));
75
76 let mut ok = true;
77 let (start_line, name) = loop {
78 match block.next() {
79 Some((idx, line)) if line.starts_with("test ") => {
80 break (idx, line["test ".len()..].to_string());
81 }
82 Some((idx, line)) if line.starts_with("test_err ") => {
83 ok = false;
84 break (idx, line["test_err ".len()..].to_string());
85 }
86 Some(_) => (),
87 None => continue 'outer,
88 }
89 };
90 let text: String =
91 itertools::join(block.map(|(_, line)| line).chain(::std::iter::once("")), "\n");
92 assert!(!text.trim().is_empty() && text.ends_with('\n'));
93 res.push((start_line, Test { name, text, ok }))
94 }
95 res
96}
97
98fn tests_from_dir(dir: &Path) -> Result<Tests> {
99 let mut res = Tests::default();
100 for entry in ::walkdir::WalkDir::new(dir) {
101 let entry = entry.unwrap();
102 if !entry.file_type().is_file() {
103 continue;
104 }
105 if entry.path().extension().unwrap_or_default() != "rs" {
106 continue;
107 }
108 process_file(&mut res, entry.path())?;
109 }
110 let grammar_rs = dir.parent().unwrap().join("grammar.rs");
111 process_file(&mut res, &grammar_rs)?;
112 return Ok(res);
113 fn process_file(res: &mut Tests, path: &Path) -> Result<()> {
114 let text = fs::read_to_string(path)?;
115
116 for (_, test) in collect_tests(&text) {
117 if test.ok {
118 if let Some(old_test) = res.ok.insert(test.name.clone(), test) {
119 Err(format!("Duplicate test: {}", old_test.name))?
120 }
121 } else {
122 if let Some(old_test) = res.err.insert(test.name.clone(), test) {
123 Err(format!("Duplicate test: {}", old_test.name))?
124 }
125 }
126 }
127 Ok(())
128 }
129}
130
131fn existing_tests(dir: &Path, ok: bool) -> Result<HashMap<String, (PathBuf, Test)>> {
132 let mut res = HashMap::new();
133 for file in fs::read_dir(dir)? {
134 let file = file?;
135 let path = file.path();
136 if path.extension().unwrap_or_default() != "rs" {
137 continue;
138 }
139 let name = {
140 let file_name = path.file_name().unwrap().to_str().unwrap();
141 file_name[5..file_name.len() - 3].to_string()
142 };
143 let text = fs::read_to_string(&path)?;
144 let test = Test { name: name.clone(), text, ok };
145 if let Some(old) = res.insert(name, (path, test)) {
146 println!("Duplicate test: {:?}", old);
147 }
148 }
149 Ok(res)
150}
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
new file mode 100644
index 000000000..6a81c0e4d
--- /dev/null
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -0,0 +1,354 @@
1//! This module generate AST datatype used by rust-analyzer.
2//!
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
5
6use std::{
7 collections::BTreeMap,
8 fs,
9 io::Write,
10 process::{Command, Stdio},
11};
12
13use proc_macro2::{Punct, Spacing};
14use quote::{format_ident, quote};
15use ron;
16use serde::Deserialize;
17
18use crate::{
19 codegen::{self, update, Mode},
20 project_root, Result,
21};
22
23pub fn generate_syntax(mode: Mode) -> Result<()> {
24 let grammar = project_root().join(codegen::GRAMMAR);
25 let grammar: Grammar = {
26 let text = fs::read_to_string(grammar)?;
27 ron::de::from_str(&text)?
28 };
29
30 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
31 let syntax_kinds = generate_syntax_kinds(&grammar)?;
32 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
33
34 let ast_file = project_root().join(codegen::AST);
35 let ast = generate_ast(&grammar)?;
36 update(ast_file.as_path(), &ast, mode)?;
37
38 Ok(())
39}
40
41fn generate_ast(grammar: &Grammar) -> Result<String> {
42 let nodes = grammar.ast.iter().map(|(name, ast_node)| {
43 let variants =
44 ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>();
45 let name = format_ident!("{}", name);
46
47 let adt = if variants.is_empty() {
48 let kind = format_ident!("{}", to_upper_snake_case(&name.to_string()));
49 quote! {
50 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
51 pub struct #name {
52 pub(crate) syntax: SyntaxNode,
53 }
54
55 impl AstNode for #name {
56 fn can_cast(kind: SyntaxKind) -> bool {
57 match kind {
58 #kind => true,
59 _ => false,
60 }
61 }
62 fn cast(syntax: SyntaxNode) -> Option<Self> {
63 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
64 }
65 fn syntax(&self) -> &SyntaxNode { &self.syntax }
66 }
67 }
68 } else {
69 let kinds = variants
70 .iter()
71 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
72 .collect::<Vec<_>>();
73
74 quote! {
75 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
76 pub enum #name {
77 #(#variants(#variants),)*
78 }
79
80 #(
81 impl From<#variants> for #name {
82 fn from(node: #variants) -> #name {
83 #name::#variants(node)
84 }
85 }
86 )*
87
88 impl AstNode for #name {
89 fn can_cast(kind: SyntaxKind) -> bool {
90 match kind {
91 #(#kinds)|* => true,
92 _ => false,
93 }
94 }
95 fn cast(syntax: SyntaxNode) -> Option<Self> {
96 let res = match syntax.kind() {
97 #(
98 #kinds => #name::#variants(#variants { syntax }),
99 )*
100 _ => return None,
101 };
102 Some(res)
103 }
104 fn syntax(&self) -> &SyntaxNode {
105 match self {
106 #(
107 #name::#variants(it) => &it.syntax,
108 )*
109 }
110 }
111 }
112 }
113 };
114
115 let traits = ast_node.traits.iter().map(|trait_name| {
116 let trait_name = format_ident!("{}", trait_name);
117 quote!(impl ast::#trait_name for #name {})
118 });
119
120 let collections = ast_node.collections.iter().map(|(name, kind)| {
121 let method_name = format_ident!("{}", name);
122 let kind = format_ident!("{}", kind);
123 quote! {
124 pub fn #method_name(&self) -> AstChildren<#kind> {
125 AstChildren::new(&self.syntax)
126 }
127 }
128 });
129
130 let options = ast_node.options.iter().map(|attr| {
131 let method_name = match attr {
132 Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)),
133 Attr::NameType(n, _) => format_ident!("{}", n),
134 };
135 let ty = match attr {
136 Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t),
137 };
138 quote! {
139 pub fn #method_name(&self) -> Option<#ty> {
140 AstChildren::new(&self.syntax).next()
141 }
142 }
143 });
144
145 quote! {
146 #adt
147
148 #(#traits)*
149
150 impl #name {
151 #(#collections)*
152 #(#options)*
153 }
154 }
155 });
156
157 let ast = quote! {
158 use crate::{
159 SyntaxNode, SyntaxKind::{self, *},
160 ast::{self, AstNode, AstChildren},
161 };
162
163 #(#nodes)*
164 };
165
166 let pretty = reformat(ast)?;
167 Ok(pretty)
168}
169
170fn generate_syntax_kinds(grammar: &Grammar) -> Result<String> {
171 let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
172 .punct
173 .iter()
174 .filter(|(token, _name)| token.len() == 1)
175 .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
176 .unzip();
177
178 let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
179 if "{}[]()".contains(token) {
180 let c = token.chars().next().unwrap();
181 quote! { #c }
182 } else {
183 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
184 quote! { #(#cs)* }
185 }
186 });
187 let punctuation =
188 grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
189
190 let full_keywords_values = &grammar.keywords;
191 let full_keywords =
192 full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw)));
193
194 let all_keywords_values =
195 grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
196 let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw));
197 let all_keywords = all_keywords_values
198 .iter()
199 .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name)))
200 .collect::<Vec<_>>();
201
202 let literals =
203 grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
204
205 let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
206
207 let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
208
209 let ast = quote! {
210 #![allow(bad_style, missing_docs, unreachable_pub)]
211 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
212 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
213 #[repr(u16)]
214 pub enum SyntaxKind {
215 // Technical SyntaxKinds: they appear temporally during parsing,
216 // but never end up in the final tree
217 #[doc(hidden)]
218 TOMBSTONE,
219 #[doc(hidden)]
220 EOF,
221 #(#punctuation,)*
222 #(#all_keywords,)*
223 #(#literals,)*
224 #(#tokens,)*
225 #(#nodes,)*
226
227 // Technical kind so that we can cast from u16 safely
228 #[doc(hidden)]
229 __LAST,
230 }
231 use self::SyntaxKind::*;
232
233 impl SyntaxKind {
234 pub fn is_keyword(self) -> bool {
235 match self {
236 #(#all_keywords)|* => true,
237 _ => false,
238 }
239 }
240
241 pub fn is_punct(self) -> bool {
242 match self {
243 #(#punctuation)|* => true,
244 _ => false,
245 }
246 }
247
248 pub fn is_literal(self) -> bool {
249 match self {
250 #(#literals)|* => true,
251 _ => false,
252 }
253 }
254
255 pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
256 let kw = match ident {
257 #(#full_keywords_values => #full_keywords,)*
258 _ => return None,
259 };
260 Some(kw)
261 }
262
263 pub fn from_char(c: char) -> Option<SyntaxKind> {
264 let tok = match c {
265 #(#single_byte_tokens_values => #single_byte_tokens,)*
266 _ => return None,
267 };
268 Some(tok)
269 }
270 }
271
272 #[macro_export]
273 macro_rules! T {
274 #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)*
275 #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)*
276 }
277 };
278
279 reformat(ast)
280}
281
282fn reformat(text: impl std::fmt::Display) -> Result<String> {
283 let mut rustfmt = Command::new("rustfmt")
284 .arg("--config-path")
285 .arg(project_root().join("rustfmt.toml"))
286 .stdin(Stdio::piped())
287 .stdout(Stdio::piped())
288 .spawn()?;
289 write!(rustfmt.stdin.take().unwrap(), "{}", text)?;
290 let output = rustfmt.wait_with_output()?;
291 let stdout = String::from_utf8(output.stdout)?;
292 let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`";
293 Ok(format!("//! {}\n\n{}", preamble, stdout))
294}
295
296#[derive(Deserialize, Debug)]
297struct Grammar {
298 punct: Vec<(String, String)>,
299 keywords: Vec<String>,
300 contextual_keywords: Vec<String>,
301 literals: Vec<String>,
302 tokens: Vec<String>,
303 nodes: Vec<String>,
304 ast: BTreeMap<String, AstNode>,
305}
306
307#[derive(Deserialize, Debug)]
308struct AstNode {
309 #[serde(default)]
310 #[serde(rename = "enum")]
311 variants: Vec<String>,
312
313 #[serde(default)]
314 traits: Vec<String>,
315 #[serde(default)]
316 collections: Vec<(String, String)>,
317 #[serde(default)]
318 options: Vec<Attr>,
319}
320
321#[derive(Deserialize, Debug)]
322#[serde(untagged)]
323enum Attr {
324 Type(String),
325 NameType(String, String),
326}
327
328fn to_upper_snake_case(s: &str) -> String {
329 let mut buf = String::with_capacity(s.len());
330 let mut prev_is_upper = None;
331 for c in s.chars() {
332 if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
333 buf.push('_')
334 }
335 prev_is_upper = Some(c.is_ascii_uppercase());
336
337 buf.push(c.to_ascii_uppercase());
338 }
339 buf
340}
341
342fn to_lower_snake_case(s: &str) -> String {
343 let mut buf = String::with_capacity(s.len());
344 let mut prev_is_upper = None;
345 for c in s.chars() {
346 if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
347 buf.push('_')
348 }
349 prev_is_upper = Some(c.is_ascii_uppercase());
350
351 buf.push(c.to_ascii_lowercase());
352 }
353 buf
354}