diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2020-04-09 10:24:19 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2020-04-09 10:24:19 +0100 |
commit | 85956932872481cf4813c5e7794d981a9edb4623 (patch) | |
tree | f915426894d00e918bfb2ad475633370baa6adaa /xtask/src | |
parent | 412eda73877c7a897561a70b83f55ee346e18a2c (diff) | |
parent | 8f01e62bb962fbe282344125f6ace54326efcaa3 (diff) |
Merge #3909
3909: Generate tokense r=matklad a=matklad
bors r+
🤖
Co-authored-by: Luca Barbieri <[email protected]>
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'xtask/src')
-rw-r--r-- | xtask/src/codegen/gen_syntax.rs | 148 |
1 files changed, 138 insertions, 10 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 32afd47bc..2dfb68371 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs | |||
@@ -5,6 +5,8 @@ | |||
5 | 5 | ||
6 | use proc_macro2::{Punct, Spacing}; | 6 | use proc_macro2::{Punct, Spacing}; |
7 | use quote::{format_ident, quote}; | 7 | use quote::{format_ident, quote}; |
8 | use std::borrow::Cow; | ||
9 | use std::collections::{BTreeSet, HashMap, HashSet}; | ||
8 | 10 | ||
9 | use crate::{ | 11 | use crate::{ |
10 | ast_src::{AstSrc, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, | 12 | ast_src::{AstSrc, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, |
@@ -18,13 +20,108 @@ pub fn generate_syntax(mode: Mode) -> Result<()> { | |||
18 | update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; | 20 | update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; |
19 | 21 | ||
20 | let ast_file = project_root().join(codegen::AST); | 22 | let ast_file = project_root().join(codegen::AST); |
21 | let ast = generate_ast(AST_SRC)?; | 23 | let ast = generate_ast(KINDS_SRC, AST_SRC)?; |
22 | update(ast_file.as_path(), &ast, mode)?; | 24 | update(ast_file.as_path(), &ast, mode)?; |
23 | 25 | ||
24 | Ok(()) | 26 | Ok(()) |
25 | } | 27 | } |
26 | 28 | ||
27 | fn generate_ast(grammar: AstSrc<'_>) -> Result<String> { | 29 | #[derive(Debug, Default, Clone)] |
30 | struct ElementKinds { | ||
31 | kinds: BTreeSet<proc_macro2::Ident>, | ||
32 | has_nodes: bool, | ||
33 | has_tokens: bool, | ||
34 | } | ||
35 | |||
36 | fn generate_ast(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { | ||
37 | let all_token_kinds: Vec<_> = kinds | ||
38 | .punct | ||
39 | .into_iter() | ||
40 | .map(|(_, kind)| kind) | ||
41 | .copied() | ||
42 | .map(|x| x.into()) | ||
43 | .chain( | ||
44 | kinds | ||
45 | .keywords | ||
46 | .into_iter() | ||
47 | .chain(kinds.contextual_keywords.into_iter()) | ||
48 | .map(|name| Cow::Owned(format!("{}_KW", to_upper_snake_case(&name)))), | ||
49 | ) | ||
50 | .chain(kinds.literals.into_iter().copied().map(|x| x.into())) | ||
51 | .chain(kinds.tokens.into_iter().copied().map(|x| x.into())) | ||
52 | .collect(); | ||
53 | |||
54 | let mut element_kinds_map = HashMap::new(); | ||
55 | for kind in &all_token_kinds { | ||
56 | let kind = &**kind; | ||
57 | let name = to_pascal_case(kind); | ||
58 | element_kinds_map.insert( | ||
59 | name, | ||
60 | ElementKinds { | ||
61 | kinds: Some(format_ident!("{}", kind)).into_iter().collect(), | ||
62 | has_nodes: false, | ||
63 | has_tokens: true, | ||
64 | }, | ||
65 | ); | ||
66 | } | ||
67 | |||
68 | for kind in kinds.nodes { | ||
69 | let name = to_pascal_case(kind); | ||
70 | element_kinds_map.insert( | ||
71 | name, | ||
72 | ElementKinds { | ||
73 | kinds: Some(format_ident!("{}", *kind)).into_iter().collect(), | ||
74 | has_nodes: true, | ||
75 | has_tokens: false, | ||
76 | }, | ||
77 | ); | ||
78 | } | ||
79 | |||
80 | for en in grammar.enums { | ||
81 | let mut element_kinds: ElementKinds = Default::default(); | ||
82 | for variant in en.variants { | ||
83 | if let Some(variant_element_kinds) = element_kinds_map.get(*variant) { | ||
84 | element_kinds.kinds.extend(variant_element_kinds.kinds.iter().cloned()); | ||
85 | element_kinds.has_tokens |= variant_element_kinds.has_tokens; | ||
86 | element_kinds.has_nodes |= variant_element_kinds.has_nodes; | ||
87 | } else { | ||
88 | panic!("Enum variant has type that does not exist or was not declared before the enum: {}", *variant); | ||
89 | } | ||
90 | } | ||
91 | element_kinds_map.insert(en.name.to_string(), element_kinds); | ||
92 | } | ||
93 | |||
94 | let tokens = all_token_kinds.iter().map(|kind_str| { | ||
95 | let kind_str = &**kind_str; | ||
96 | let kind = format_ident!("{}", kind_str); | ||
97 | let name = format_ident!("{}", to_pascal_case(kind_str)); | ||
98 | quote! { | ||
99 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
100 | pub struct #name { | ||
101 | pub(crate) syntax: SyntaxToken, | ||
102 | } | ||
103 | |||
104 | impl std::fmt::Display for #name { | ||
105 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { | ||
106 | std::fmt::Display::fmt(&self.syntax, f) | ||
107 | } | ||
108 | } | ||
109 | |||
110 | impl AstToken for #name { | ||
111 | fn can_cast(kind: SyntaxKind) -> bool { | ||
112 | match kind { | ||
113 | #kind => true, | ||
114 | _ => false, | ||
115 | } | ||
116 | } | ||
117 | fn cast(syntax: SyntaxToken) -> Option<Self> { | ||
118 | if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } | ||
119 | } | ||
120 | fn syntax(&self) -> &SyntaxToken { &self.syntax } | ||
121 | } | ||
122 | } | ||
123 | }); | ||
124 | |||
28 | let nodes = grammar.nodes.iter().map(|node| { | 125 | let nodes = grammar.nodes.iter().map(|node| { |
29 | let name = format_ident!("{}", node.name); | 126 | let name = format_ident!("{}", node.name); |
30 | let kind = format_ident!("{}", to_upper_snake_case(&name.to_string())); | 127 | let kind = format_ident!("{}", to_upper_snake_case(&name.to_string())); |
@@ -42,6 +139,7 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result<String> { | |||
42 | FieldSrc::Optional(ty) | FieldSrc::Many(ty) => ty, | 139 | FieldSrc::Optional(ty) | FieldSrc::Many(ty) => ty, |
43 | FieldSrc::Shorthand => name, | 140 | FieldSrc::Shorthand => name, |
44 | }; | 141 | }; |
142 | |||
45 | let ty = format_ident!("{}", ty); | 143 | let ty = format_ident!("{}", ty); |
46 | 144 | ||
47 | match field { | 145 | match field { |
@@ -86,6 +184,7 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result<String> { | |||
86 | } | 184 | } |
87 | fn syntax(&self) -> &SyntaxNode { &self.syntax } | 185 | fn syntax(&self) -> &SyntaxNode { &self.syntax } |
88 | } | 186 | } |
187 | |||
89 | #(#traits)* | 188 | #(#traits)* |
90 | 189 | ||
91 | impl #name { | 190 | impl #name { |
@@ -154,12 +253,25 @@ fn generate_ast(grammar: AstSrc<'_>) -> Result<String> { | |||
154 | } | 253 | } |
155 | }); | 254 | }); |
156 | 255 | ||
256 | let defined_nodes: HashSet<_> = grammar.nodes.iter().map(|node| node.name).collect(); | ||
257 | |||
258 | for node in kinds | ||
259 | .nodes | ||
260 | .iter() | ||
261 | .map(|kind| to_pascal_case(*kind)) | ||
262 | .filter(|name| !defined_nodes.contains(&**name)) | ||
263 | { | ||
264 | eprintln!("Warning: node {} not defined in ast source", node); | ||
265 | } | ||
266 | |||
157 | let ast = quote! { | 267 | let ast = quote! { |
268 | #[allow(unused_imports)] | ||
158 | use crate::{ | 269 | use crate::{ |
159 | SyntaxNode, SyntaxKind::{self, *}, | 270 | SyntaxNode, SyntaxToken, SyntaxElement, NodeOrToken, SyntaxKind::{self, *}, |
160 | ast::{self, AstNode, AstChildren}, | 271 | ast::{self, AstNode, AstToken, AstChildren}, |
161 | }; | 272 | }; |
162 | 273 | ||
274 | #(#tokens)* | ||
163 | #(#nodes)* | 275 | #(#nodes)* |
164 | #(#enums)* | 276 | #(#enums)* |
165 | }; | 277 | }; |
@@ -282,12 +394,12 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result<String> { | |||
282 | 394 | ||
283 | fn to_upper_snake_case(s: &str) -> String { | 395 | fn to_upper_snake_case(s: &str) -> String { |
284 | let mut buf = String::with_capacity(s.len()); | 396 | let mut buf = String::with_capacity(s.len()); |
285 | let mut prev_is_upper = None; | 397 | let mut prev = false; |
286 | for c in s.chars() { | 398 | for c in s.chars() { |
287 | if c.is_ascii_uppercase() && prev_is_upper == Some(false) { | 399 | if c.is_ascii_uppercase() && prev { |
288 | buf.push('_') | 400 | buf.push('_') |
289 | } | 401 | } |
290 | prev_is_upper = Some(c.is_ascii_uppercase()); | 402 | prev = true; |
291 | 403 | ||
292 | buf.push(c.to_ascii_uppercase()); | 404 | buf.push(c.to_ascii_uppercase()); |
293 | } | 405 | } |
@@ -296,14 +408,30 @@ fn to_upper_snake_case(s: &str) -> String { | |||
296 | 408 | ||
297 | fn to_lower_snake_case(s: &str) -> String { | 409 | fn to_lower_snake_case(s: &str) -> String { |
298 | let mut buf = String::with_capacity(s.len()); | 410 | let mut buf = String::with_capacity(s.len()); |
299 | let mut prev_is_upper = None; | 411 | let mut prev = false; |
300 | for c in s.chars() { | 412 | for c in s.chars() { |
301 | if c.is_ascii_uppercase() && prev_is_upper == Some(false) { | 413 | if c.is_ascii_uppercase() && prev { |
302 | buf.push('_') | 414 | buf.push('_') |
303 | } | 415 | } |
304 | prev_is_upper = Some(c.is_ascii_uppercase()); | 416 | prev = true; |
305 | 417 | ||
306 | buf.push(c.to_ascii_lowercase()); | 418 | buf.push(c.to_ascii_lowercase()); |
307 | } | 419 | } |
308 | buf | 420 | buf |
309 | } | 421 | } |
422 | |||
423 | fn to_pascal_case(s: &str) -> String { | ||
424 | let mut buf = String::with_capacity(s.len()); | ||
425 | let mut prev_is_underscore = true; | ||
426 | for c in s.chars() { | ||
427 | if c == '_' { | ||
428 | prev_is_underscore = true; | ||
429 | } else if prev_is_underscore { | ||
430 | buf.push(c.to_ascii_uppercase()); | ||
431 | prev_is_underscore = false; | ||
432 | } else { | ||
433 | buf.push(c.to_ascii_lowercase()); | ||
434 | } | ||
435 | } | ||
436 | buf | ||
437 | } | ||