aboutsummaryrefslogtreecommitdiff
path: root/xtask/src/codegen/gen_syntax.rs
diff options
context:
space:
mode:
Diffstat (limited to 'xtask/src/codegen/gen_syntax.rs')
-rw-r--r--xtask/src/codegen/gen_syntax.rs354
1 files changed, 354 insertions, 0 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
new file mode 100644
index 000000000..6a81c0e4d
--- /dev/null
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -0,0 +1,354 @@
1//! This module generate AST datatype used by rust-analyzer.
2//!
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
5
6use std::{
7 collections::BTreeMap,
8 fs,
9 io::Write,
10 process::{Command, Stdio},
11};
12
13use proc_macro2::{Punct, Spacing};
14use quote::{format_ident, quote};
15use ron;
16use serde::Deserialize;
17
18use crate::{
19 codegen::{self, update, Mode},
20 project_root, Result,
21};
22
23pub fn generate_syntax(mode: Mode) -> Result<()> {
24 let grammar = project_root().join(codegen::GRAMMAR);
25 let grammar: Grammar = {
26 let text = fs::read_to_string(grammar)?;
27 ron::de::from_str(&text)?
28 };
29
30 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
31 let syntax_kinds = generate_syntax_kinds(&grammar)?;
32 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
33
34 let ast_file = project_root().join(codegen::AST);
35 let ast = generate_ast(&grammar)?;
36 update(ast_file.as_path(), &ast, mode)?;
37
38 Ok(())
39}
40
41fn generate_ast(grammar: &Grammar) -> Result<String> {
42 let nodes = grammar.ast.iter().map(|(name, ast_node)| {
43 let variants =
44 ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>();
45 let name = format_ident!("{}", name);
46
47 let adt = if variants.is_empty() {
48 let kind = format_ident!("{}", to_upper_snake_case(&name.to_string()));
49 quote! {
50 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
51 pub struct #name {
52 pub(crate) syntax: SyntaxNode,
53 }
54
55 impl AstNode for #name {
56 fn can_cast(kind: SyntaxKind) -> bool {
57 match kind {
58 #kind => true,
59 _ => false,
60 }
61 }
62 fn cast(syntax: SyntaxNode) -> Option<Self> {
63 if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
64 }
65 fn syntax(&self) -> &SyntaxNode { &self.syntax }
66 }
67 }
68 } else {
69 let kinds = variants
70 .iter()
71 .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
72 .collect::<Vec<_>>();
73
74 quote! {
75 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
76 pub enum #name {
77 #(#variants(#variants),)*
78 }
79
80 #(
81 impl From<#variants> for #name {
82 fn from(node: #variants) -> #name {
83 #name::#variants(node)
84 }
85 }
86 )*
87
88 impl AstNode for #name {
89 fn can_cast(kind: SyntaxKind) -> bool {
90 match kind {
91 #(#kinds)|* => true,
92 _ => false,
93 }
94 }
95 fn cast(syntax: SyntaxNode) -> Option<Self> {
96 let res = match syntax.kind() {
97 #(
98 #kinds => #name::#variants(#variants { syntax }),
99 )*
100 _ => return None,
101 };
102 Some(res)
103 }
104 fn syntax(&self) -> &SyntaxNode {
105 match self {
106 #(
107 #name::#variants(it) => &it.syntax,
108 )*
109 }
110 }
111 }
112 }
113 };
114
115 let traits = ast_node.traits.iter().map(|trait_name| {
116 let trait_name = format_ident!("{}", trait_name);
117 quote!(impl ast::#trait_name for #name {})
118 });
119
120 let collections = ast_node.collections.iter().map(|(name, kind)| {
121 let method_name = format_ident!("{}", name);
122 let kind = format_ident!("{}", kind);
123 quote! {
124 pub fn #method_name(&self) -> AstChildren<#kind> {
125 AstChildren::new(&self.syntax)
126 }
127 }
128 });
129
130 let options = ast_node.options.iter().map(|attr| {
131 let method_name = match attr {
132 Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)),
133 Attr::NameType(n, _) => format_ident!("{}", n),
134 };
135 let ty = match attr {
136 Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t),
137 };
138 quote! {
139 pub fn #method_name(&self) -> Option<#ty> {
140 AstChildren::new(&self.syntax).next()
141 }
142 }
143 });
144
145 quote! {
146 #adt
147
148 #(#traits)*
149
150 impl #name {
151 #(#collections)*
152 #(#options)*
153 }
154 }
155 });
156
157 let ast = quote! {
158 use crate::{
159 SyntaxNode, SyntaxKind::{self, *},
160 ast::{self, AstNode, AstChildren},
161 };
162
163 #(#nodes)*
164 };
165
166 let pretty = reformat(ast)?;
167 Ok(pretty)
168}
169
170fn generate_syntax_kinds(grammar: &Grammar) -> Result<String> {
171 let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
172 .punct
173 .iter()
174 .filter(|(token, _name)| token.len() == 1)
175 .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
176 .unzip();
177
178 let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
179 if "{}[]()".contains(token) {
180 let c = token.chars().next().unwrap();
181 quote! { #c }
182 } else {
183 let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
184 quote! { #(#cs)* }
185 }
186 });
187 let punctuation =
188 grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
189
190 let full_keywords_values = &grammar.keywords;
191 let full_keywords =
192 full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw)));
193
194 let all_keywords_values =
195 grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
196 let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw));
197 let all_keywords = all_keywords_values
198 .iter()
199 .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name)))
200 .collect::<Vec<_>>();
201
202 let literals =
203 grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
204
205 let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
206
207 let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
208
209 let ast = quote! {
210 #![allow(bad_style, missing_docs, unreachable_pub)]
211 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
212 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
213 #[repr(u16)]
214 pub enum SyntaxKind {
215 // Technical SyntaxKinds: they appear temporally during parsing,
216 // but never end up in the final tree
217 #[doc(hidden)]
218 TOMBSTONE,
219 #[doc(hidden)]
220 EOF,
221 #(#punctuation,)*
222 #(#all_keywords,)*
223 #(#literals,)*
224 #(#tokens,)*
225 #(#nodes,)*
226
227 // Technical kind so that we can cast from u16 safely
228 #[doc(hidden)]
229 __LAST,
230 }
231 use self::SyntaxKind::*;
232
233 impl SyntaxKind {
234 pub fn is_keyword(self) -> bool {
235 match self {
236 #(#all_keywords)|* => true,
237 _ => false,
238 }
239 }
240
241 pub fn is_punct(self) -> bool {
242 match self {
243 #(#punctuation)|* => true,
244 _ => false,
245 }
246 }
247
248 pub fn is_literal(self) -> bool {
249 match self {
250 #(#literals)|* => true,
251 _ => false,
252 }
253 }
254
255 pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
256 let kw = match ident {
257 #(#full_keywords_values => #full_keywords,)*
258 _ => return None,
259 };
260 Some(kw)
261 }
262
263 pub fn from_char(c: char) -> Option<SyntaxKind> {
264 let tok = match c {
265 #(#single_byte_tokens_values => #single_byte_tokens,)*
266 _ => return None,
267 };
268 Some(tok)
269 }
270 }
271
272 #[macro_export]
273 macro_rules! T {
274 #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)*
275 #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)*
276 }
277 };
278
279 reformat(ast)
280}
281
282fn reformat(text: impl std::fmt::Display) -> Result<String> {
283 let mut rustfmt = Command::new("rustfmt")
284 .arg("--config-path")
285 .arg(project_root().join("rustfmt.toml"))
286 .stdin(Stdio::piped())
287 .stdout(Stdio::piped())
288 .spawn()?;
289 write!(rustfmt.stdin.take().unwrap(), "{}", text)?;
290 let output = rustfmt.wait_with_output()?;
291 let stdout = String::from_utf8(output.stdout)?;
292 let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`";
293 Ok(format!("//! {}\n\n{}", preamble, stdout))
294}
295
296#[derive(Deserialize, Debug)]
297struct Grammar {
298 punct: Vec<(String, String)>,
299 keywords: Vec<String>,
300 contextual_keywords: Vec<String>,
301 literals: Vec<String>,
302 tokens: Vec<String>,
303 nodes: Vec<String>,
304 ast: BTreeMap<String, AstNode>,
305}
306
307#[derive(Deserialize, Debug)]
308struct AstNode {
309 #[serde(default)]
310 #[serde(rename = "enum")]
311 variants: Vec<String>,
312
313 #[serde(default)]
314 traits: Vec<String>,
315 #[serde(default)]
316 collections: Vec<(String, String)>,
317 #[serde(default)]
318 options: Vec<Attr>,
319}
320
321#[derive(Deserialize, Debug)]
322#[serde(untagged)]
323enum Attr {
324 Type(String),
325 NameType(String, String),
326}
327
328fn to_upper_snake_case(s: &str) -> String {
329 let mut buf = String::with_capacity(s.len());
330 let mut prev_is_upper = None;
331 for c in s.chars() {
332 if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
333 buf.push('_')
334 }
335 prev_is_upper = Some(c.is_ascii_uppercase());
336
337 buf.push(c.to_ascii_uppercase());
338 }
339 buf
340}
341
342fn to_lower_snake_case(s: &str) -> String {
343 let mut buf = String::with_capacity(s.len());
344 let mut prev_is_upper = None;
345 for c in s.chars() {
346 if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
347 buf.push('_')
348 }
349 prev_is_upper = Some(c.is_ascii_uppercase());
350
351 buf.push(c.to_ascii_lowercase());
352 }
353 buf
354}