diff options
author | Luca Barbieri <[email protected]> | 2020-04-03 20:12:08 +0100 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2020-04-08 16:15:12 +0100 |
commit | 68196ccc10c60de52bb771d295879456f73ede95 (patch) | |
tree | cec60917f9c4475c49d4842c05469f19ad2bff86 /xtask/src | |
parent | 4762c6d9c66dc1b6be9b9010dbe787ef8d69530a (diff) |
Add AstElement trait, generate tokens, support tokens in enums
- Adds a new AstElement trait that is implemented by all generated
node, token and enum structs
- Overhauls the code generators to code-generate all tokens, and
also enhances enums to support including tokens, node, and nested
enums
Diffstat (limited to 'xtask/src')
-rw-r--r-- | xtask/src/codegen/gen_syntax.rs | 298 |
1 files changed, 269 insertions, 29 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 32afd47bc..c730c75ee 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs | |||
@@ -5,6 +5,8 @@ | |||
5 | 5 | ||
6 | use proc_macro2::{Punct, Spacing}; | 6 | use proc_macro2::{Punct, Spacing}; |
7 | use quote::{format_ident, quote}; | 7 | use quote::{format_ident, quote}; |
8 | use std::borrow::Cow; | ||
9 | use std::collections::{BTreeSet, HashMap, HashSet}; | ||
8 | 10 | ||
9 | use crate::{ | 11 | use crate::{ |
10 | ast_src::{AstSrc, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, | 12 | ast_src::{AstSrc, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, |
@@ -18,13 +20,125 @@ pub fn generate_syntax(mode: Mode) -> Result<()> { | |||
18 | update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; | 20 | update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; |
19 | 21 | ||
20 | let ast_file = project_root().join(codegen::AST); | 22 | let ast_file = project_root().join(codegen::AST); |
21 | let ast = generate_ast(AST_SRC)?; | 23 | let ast = generate_ast(KINDS_SRC, AST_SRC)?; |
22 | update(ast_file.as_path(), &ast, mode)?; | 24 | update(ast_file.as_path(), &ast, mode)?; |
23 | 25 | ||
24 | Ok(()) | 26 | Ok(()) |
25 | } | 27 | } |
26 | 28 | ||
27 | fn generate_ast(grammar: AstSrc<'_>) -> Result<String> { | 29 | #[derive(Debug, Default, Clone)] |
30 | struct ElementKinds { | ||
31 | kinds: BTreeSet<proc_macro2::Ident>, | ||
32 | has_nodes: bool, | ||
33 | has_tokens: bool, | ||
34 | } | ||
35 | |||
36 | fn generate_ast(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { | ||
37 | let all_token_kinds: Vec<_> = kinds | ||
38 | .punct | ||
39 | .into_iter() | ||
40 | .map(|(_, kind)| kind) | ||
41 | .copied() | ||
42 | .map(|x| x.into()) | ||
43 | .chain( | ||
44 | kinds | ||
45 | .keywords | ||
46 | .into_iter() | ||
47 | .chain(kinds.contextual_keywords.into_iter()) | ||
48 | .map(|name| Cow::Owned(format!("{}_KW", to_upper_snake_case(&name)))), | ||
49 | ) | ||
50 | .chain(kinds.literals.into_iter().copied().map(|x| x.into())) | ||
51 | .chain(kinds.tokens.into_iter().copied().map(|x| x.into())) | ||
52 | .collect(); | ||
53 | |||
54 | let mut element_kinds_map = HashMap::new(); | ||
55 | for kind in &all_token_kinds { | ||
56 | let kind = &**kind; | ||
57 | let name = to_pascal_case(kind); | ||
58 | element_kinds_map.insert( | ||
59 | name, | ||
60 | ElementKinds { | ||
61 | kinds: Some(format_ident!("{}", kind)).into_iter().collect(), | ||
62 | has_nodes: false, | ||
63 | has_tokens: true, | ||
64 | }, | ||
65 | ); | ||
66 | } | ||
67 | |||
68 | for kind in kinds.nodes { | ||
69 | let name = to_pascal_case(kind); | ||
70 | element_kinds_map.insert( | ||
71 | name, | ||
72 | ElementKinds { | ||
73 | kinds: Some(format_ident!("{}", *kind)).into_iter().collect(), | ||
74 | has_nodes: true, | ||
75 | has_tokens: false, | ||
76 | }, | ||
77 | ); | ||
78 | } | ||
79 | |||
80 | for en in grammar.enums { | ||
81 | let mut element_kinds: ElementKinds = Default::default(); | ||
82 | for variant in en.variants { | ||
83 | if let Some(variant_element_kinds) = element_kinds_map.get(*variant) { | ||
84 | element_kinds.kinds.extend(variant_element_kinds.kinds.iter().cloned()); | ||
85 | element_kinds.has_tokens |= variant_element_kinds.has_tokens; | ||
86 | element_kinds.has_nodes |= variant_element_kinds.has_nodes; | ||
87 | } else { | ||
88 | panic!("Enum variant has type that does not exist or was not declared before the enum: {}", *variant); | ||
89 | } | ||
90 | } | ||
91 | element_kinds_map.insert(en.name.to_string(), element_kinds); | ||
92 | } | ||
93 | |||
94 | let tokens = all_token_kinds.iter().map(|kind_str| { | ||
95 | let kind_str = &**kind_str; | ||
96 | let kind = format_ident!("{}", kind_str); | ||
97 | let name = format_ident!("{}", to_pascal_case(kind_str)); | ||
98 | quote! { | ||
99 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
100 | pub struct #name(SyntaxToken); | ||
101 | |||
102 | impl std::fmt::Display for #name { | ||
103 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { | ||
104 | std::fmt::Display::fmt(self.syntax(), f) | ||
105 | } | ||
106 | } | ||
107 | |||
108 | impl AstToken for #name { | ||
109 | fn can_cast(kind: SyntaxKind) -> bool { | ||
110 | match kind { | ||
111 | #kind => true, | ||
112 | _ => false, | ||
113 | } | ||
114 | } | ||
115 | fn cast_or_return(syntax: SyntaxToken) -> Result<Self, SyntaxToken> { | ||
116 | if Self::can_cast(syntax.kind()) { Ok(Self(syntax)) } else { Err(syntax) } | ||
117 | } | ||
118 | fn syntax(&self) -> &SyntaxToken { &self.0 } | ||
119 | fn into_syntax(self) -> SyntaxToken { self.0 } | ||
120 | } | ||
121 | |||
122 | impl AstElement for #name { | ||
123 | fn can_cast_element(kind: SyntaxKind) -> bool { | ||
124 | match kind { | ||
125 | #kind => true, | ||
126 | _ => false, | ||
127 | } | ||
128 | } | ||
129 | fn cast_or_return_element(syntax: SyntaxElement) -> Result<Self, SyntaxElement> { | ||
130 | if Self::can_cast_element(syntax.kind()) { Ok(Self(syntax.into_token().unwrap())) } else { Err(syntax) } | ||
131 | } | ||
132 | fn syntax_element(&self) -> NodeOrToken<&SyntaxNode, &SyntaxToken> { | ||
133 | NodeOrToken::Token(&self.0) | ||
134 | } | ||
135 | fn into_syntax_element(self) -> SyntaxElement { | ||
136 | NodeOrToken::Token(self.0) | ||
137 | } | ||
138 | } | ||
139 | } | ||
140 | }); | ||
141 | |||
28 | let nodes = grammar.nodes.iter().map(|node| { | 142 | let nodes = grammar.nodes.iter().map(|node| { |
29 | let name = format_ident!("{}", node.name); | 143 | let name = format_ident!("{}", node.name); |
30 | let kind = format_ident!("{}", to_upper_snake_case(&name.to_string())); | 144 | let kind = format_ident!("{}", to_upper_snake_case(&name.to_string())); |
@@ -42,20 +156,28 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result<String> { | |||
42 | FieldSrc::Optional(ty) | FieldSrc::Many(ty) => ty, | 156 | FieldSrc::Optional(ty) | FieldSrc::Many(ty) => ty, |
43 | FieldSrc::Shorthand => name, | 157 | FieldSrc::Shorthand => name, |
44 | }; | 158 | }; |
159 | let element_kinds = &element_kinds_map.get(*ty).unwrap_or_else(|| panic!("type not found: {}", *ty)); | ||
160 | let iter = if !element_kinds.has_tokens { | ||
161 | format_ident!("AstChildren") | ||
162 | } else if !element_kinds.has_nodes { | ||
163 | format_ident!("AstChildTokens") | ||
164 | } else { | ||
165 | format_ident!("AstChildElements") | ||
166 | }; | ||
45 | let ty = format_ident!("{}", ty); | 167 | let ty = format_ident!("{}", ty); |
46 | 168 | ||
47 | match field { | 169 | match field { |
48 | FieldSrc::Many(_) => { | 170 | FieldSrc::Many(_) => { |
49 | quote! { | 171 | quote! { |
50 | pub fn #method_name(&self) -> AstChildren<#ty> { | 172 | pub fn #method_name(&self) -> #iter<#ty> { |
51 | AstChildren::new(&self.syntax) | 173 | #iter::new(&self.syntax) |
52 | } | 174 | } |
53 | } | 175 | } |
54 | } | 176 | } |
55 | FieldSrc::Optional(_) | FieldSrc::Shorthand => { | 177 | FieldSrc::Optional(_) | FieldSrc::Shorthand => { |
56 | quote! { | 178 | quote! { |
57 | pub fn #method_name(&self) -> Option<#ty> { | 179 | pub fn #method_name(&self) -> Option<#ty> { |
58 | AstChildren::new(&self.syntax).next() | 180 | #iter::new(&self.syntax).next() |
59 | } | 181 | } |
60 | } | 182 | } |
61 | } | 183 | } |
@@ -81,11 +203,31 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result<String> { | |||
81 | _ => false, | 203 | _ => false, |
82 | } | 204 | } |
83 | } | 205 | } |
84 | fn cast(syntax: SyntaxNode) -> Option<Self> { | 206 | fn cast_or_return(syntax: SyntaxNode) -> Result<Self, SyntaxNode> { |
85 | if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } | 207 | if Self::can_cast(syntax.kind()) { Ok(Self { syntax }) } else { Err(syntax) } |
86 | } | 208 | } |
87 | fn syntax(&self) -> &SyntaxNode { &self.syntax } | 209 | fn syntax(&self) -> &SyntaxNode { &self.syntax } |
210 | fn into_syntax(self) -> SyntaxNode { self.syntax } | ||
88 | } | 211 | } |
212 | |||
213 | impl AstElement for #name { | ||
214 | fn can_cast_element(kind: SyntaxKind) -> bool { | ||
215 | match kind { | ||
216 | #kind => true, | ||
217 | _ => false, | ||
218 | } | ||
219 | } | ||
220 | fn cast_or_return_element(syntax: SyntaxElement) -> Result<Self, SyntaxElement> { | ||
221 | if Self::can_cast_element(syntax.kind()) { Ok(Self { syntax: syntax.into_node().unwrap() }) } else { Err(syntax) } | ||
222 | } | ||
223 | fn syntax_element(&self) -> NodeOrToken<&SyntaxNode, &SyntaxToken> { | ||
224 | NodeOrToken::Node(&self.syntax) | ||
225 | } | ||
226 | fn into_syntax_element(self) -> SyntaxElement { | ||
227 | NodeOrToken::Node(self.syntax) | ||
228 | } | ||
229 | } | ||
230 | |||
89 | #(#traits)* | 231 | #(#traits)* |
90 | 232 | ||
91 | impl #name { | 233 | impl #name { |
@@ -96,16 +238,71 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result<String> { | |||
96 | 238 | ||
97 | let enums = grammar.enums.iter().map(|en| { | 239 | let enums = grammar.enums.iter().map(|en| { |
98 | let variants = en.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>(); | 240 | let variants = en.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>(); |
241 | let element_kinds = &element_kinds_map[&en.name.to_string()]; | ||
99 | let name = format_ident!("{}", en.name); | 242 | let name = format_ident!("{}", en.name); |
100 | let kinds = variants | 243 | let kinds = en.variants |
101 | .iter() | 244 | .iter() |
102 | .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string()))) | 245 | .map(|name| { |
246 | element_kinds_map[*name].kinds.iter().collect::<Vec<_>>() | ||
247 | }) | ||
103 | .collect::<Vec<_>>(); | 248 | .collect::<Vec<_>>(); |
104 | let traits = en.traits.iter().map(|trait_name| { | 249 | let traits = en.traits.iter().map(|trait_name| { |
105 | let trait_name = format_ident!("{}", trait_name); | 250 | let trait_name = format_ident!("{}", trait_name); |
106 | quote!(impl ast::#trait_name for #name {}) | 251 | quote!(impl ast::#trait_name for #name {}) |
107 | }); | 252 | }); |
108 | 253 | ||
254 | let all_kinds = &element_kinds.kinds; | ||
255 | |||
256 | let specific_ast_trait = if element_kinds.has_nodes != element_kinds.has_tokens { | ||
257 | let (ast_trait, syntax_type) = if element_kinds.has_tokens { | ||
258 | ( | ||
259 | quote!(AstToken), | ||
260 | quote!(SyntaxToken), | ||
261 | ) | ||
262 | } else { | ||
263 | ( | ||
264 | quote!(AstNode), | ||
265 | quote!(SyntaxNode), | ||
266 | ) | ||
267 | }; | ||
268 | |||
269 | quote! { | ||
270 | impl #ast_trait for #name { | ||
271 | fn can_cast(kind: SyntaxKind) -> bool { | ||
272 | match kind { | ||
273 | #(#all_kinds)|* => true, | ||
274 | _ => false, | ||
275 | } | ||
276 | } | ||
277 | #[allow(unreachable_patterns)] | ||
278 | fn cast_or_return(syntax: #syntax_type) -> Result<Self, #syntax_type> { | ||
279 | match syntax.kind() { | ||
280 | #( | ||
281 | #(#kinds)|* => #variants::cast_or_return(syntax).map(|x| #name::#variants(x)), | ||
282 | )* | ||
283 | _ => Err(syntax), | ||
284 | } | ||
285 | } | ||
286 | fn syntax(&self) -> &#syntax_type { | ||
287 | match self { | ||
288 | #( | ||
289 | #name::#variants(it) => it.syntax(), | ||
290 | )* | ||
291 | } | ||
292 | } | ||
293 | fn into_syntax(self) -> #syntax_type { | ||
294 | match self { | ||
295 | #( | ||
296 | #name::#variants(it) => it.into_syntax(), | ||
297 | )* | ||
298 | } | ||
299 | } | ||
300 | } | ||
301 | } | ||
302 | } else { | ||
303 | Default::default() | ||
304 | }; | ||
305 | |||
109 | quote! { | 306 | quote! { |
110 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | 307 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] |
111 | pub enum #name { | 308 | pub enum #name { |
@@ -122,44 +319,71 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result<String> { | |||
122 | 319 | ||
123 | impl std::fmt::Display for #name { | 320 | impl std::fmt::Display for #name { |
124 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { | 321 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
125 | std::fmt::Display::fmt(self.syntax(), f) | 322 | match self { |
323 | #( | ||
324 | #name::#variants(it) => std::fmt::Display::fmt(it, f), | ||
325 | )* | ||
326 | } | ||
126 | } | 327 | } |
127 | } | 328 | } |
128 | 329 | ||
129 | impl AstNode for #name { | 330 | #specific_ast_trait |
130 | fn can_cast(kind: SyntaxKind) -> bool { | 331 | |
332 | impl AstElement for #name { | ||
333 | fn can_cast_element(kind: SyntaxKind) -> bool { | ||
131 | match kind { | 334 | match kind { |
132 | #(#kinds)|* => true, | 335 | #(#all_kinds)|* => true, |
133 | _ => false, | 336 | _ => false, |
134 | } | 337 | } |
135 | } | 338 | } |
136 | fn cast(syntax: SyntaxNode) -> Option<Self> { | 339 | #[allow(unreachable_patterns)] |
137 | let res = match syntax.kind() { | 340 | fn cast_or_return_element(syntax: SyntaxElement) -> Result<Self, SyntaxElement> { |
341 | match syntax.kind() { | ||
342 | #( | ||
343 | #(#kinds)|* => #variants::cast_or_return_element(syntax).map(|x| #name::#variants(x)), | ||
344 | )* | ||
345 | _ => Err(syntax), | ||
346 | } | ||
347 | } | ||
348 | fn syntax_element(&self) -> NodeOrToken<&SyntaxNode, &SyntaxToken> { | ||
349 | match self { | ||
138 | #( | 350 | #( |
139 | #kinds => #name::#variants(#variants { syntax }), | 351 | #name::#variants(it) => it.syntax_element(), |
140 | )* | 352 | )* |
141 | _ => return None, | 353 | } |
142 | }; | ||
143 | Some(res) | ||
144 | } | 354 | } |
145 | fn syntax(&self) -> &SyntaxNode { | 355 | fn into_syntax_element(self) -> SyntaxElement { |
146 | match self { | 356 | match self { |
147 | #( | 357 | #( |
148 | #name::#variants(it) => &it.syntax, | 358 | #name::#variants(it) => it.into_syntax_element(), |
149 | )* | 359 | )* |
150 | } | 360 | } |
151 | } | 361 | } |
152 | } | 362 | } |
363 | |||
153 | #(#traits)* | 364 | #(#traits)* |
154 | } | 365 | } |
155 | }); | 366 | }); |
156 | 367 | ||
368 | let defined_nodes: HashSet<_> = grammar.nodes.iter().map(|node| node.name).collect(); | ||
369 | |||
370 | for node in kinds | ||
371 | .nodes | ||
372 | .iter() | ||
373 | .map(|kind| to_pascal_case(*kind)) | ||
374 | .filter(|name| !defined_nodes.contains(&**name)) | ||
375 | { | ||
376 | eprintln!("Warning: node {} not defined in ast source", node); | ||
377 | } | ||
378 | |||
157 | let ast = quote! { | 379 | let ast = quote! { |
380 | #[allow(unused_imports)] | ||
158 | use crate::{ | 381 | use crate::{ |
159 | SyntaxNode, SyntaxKind::{self, *}, | 382 | SyntaxNode, SyntaxToken, SyntaxElement, NodeOrToken, SyntaxKind::{self, *}, |
160 | ast::{self, AstNode, AstChildren}, | 383 | ast::{self, AstNode, AstToken, AstElement, AstChildren, AstChildTokens, AstChildElements}, |
161 | }; | 384 | }; |
162 | 385 | ||
386 | #(#tokens)* | ||
163 | #(#nodes)* | 387 | #(#nodes)* |
164 | #(#enums)* | 388 | #(#enums)* |
165 | }; | 389 | }; |
@@ -282,12 +506,12 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result<String> { | |||
282 | 506 | ||
283 | fn to_upper_snake_case(s: &str) -> String { | 507 | fn to_upper_snake_case(s: &str) -> String { |
284 | let mut buf = String::with_capacity(s.len()); | 508 | let mut buf = String::with_capacity(s.len()); |
285 | let mut prev_is_upper = None; | 509 | let mut prev = false; |
286 | for c in s.chars() { | 510 | for c in s.chars() { |
287 | if c.is_ascii_uppercase() && prev_is_upper == Some(false) { | 511 | if c.is_ascii_uppercase() && prev { |
288 | buf.push('_') | 512 | buf.push('_') |
289 | } | 513 | } |
290 | prev_is_upper = Some(c.is_ascii_uppercase()); | 514 | prev = true; |
291 | 515 | ||
292 | buf.push(c.to_ascii_uppercase()); | 516 | buf.push(c.to_ascii_uppercase()); |
293 | } | 517 | } |
@@ -296,14 +520,30 @@ fn to_upper_snake_case(s: &str) -> String { | |||
296 | 520 | ||
297 | fn to_lower_snake_case(s: &str) -> String { | 521 | fn to_lower_snake_case(s: &str) -> String { |
298 | let mut buf = String::with_capacity(s.len()); | 522 | let mut buf = String::with_capacity(s.len()); |
299 | let mut prev_is_upper = None; | 523 | let mut prev = false; |
300 | for c in s.chars() { | 524 | for c in s.chars() { |
301 | if c.is_ascii_uppercase() && prev_is_upper == Some(false) { | 525 | if c.is_ascii_uppercase() && prev { |
302 | buf.push('_') | 526 | buf.push('_') |
303 | } | 527 | } |
304 | prev_is_upper = Some(c.is_ascii_uppercase()); | 528 | prev = true; |
305 | 529 | ||
306 | buf.push(c.to_ascii_lowercase()); | 530 | buf.push(c.to_ascii_lowercase()); |
307 | } | 531 | } |
308 | buf | 532 | buf |
309 | } | 533 | } |
534 | |||
535 | fn to_pascal_case(s: &str) -> String { | ||
536 | let mut buf = String::with_capacity(s.len()); | ||
537 | let mut prev_is_underscore = true; | ||
538 | for c in s.chars() { | ||
539 | if c == '_' { | ||
540 | prev_is_underscore = true; | ||
541 | } else if prev_is_underscore { | ||
542 | buf.push(c.to_ascii_uppercase()); | ||
543 | prev_is_underscore = false; | ||
544 | } else { | ||
545 | buf.push(c.to_ascii_lowercase()); | ||
546 | } | ||
547 | } | ||
548 | buf | ||
549 | } | ||