aboutsummaryrefslogtreecommitdiff
path: root/xtask/src/codegen/gen_syntax.rs
diff options
context:
space:
mode:
Diffstat (limited to 'xtask/src/codegen/gen_syntax.rs')
-rw-r--r--xtask/src/codegen/gen_syntax.rs353
1 files changed, 309 insertions, 44 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
index 745a25862..cafad8070 100644
--- a/xtask/src/codegen/gen_syntax.rs
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -3,34 +3,41 @@
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype 3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. 4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
5 5
6use std::{collections::HashSet, fmt::Write}; 6use std::{
7 collections::{BTreeSet, HashSet},
8 fmt::Write,
9};
7 10
8use proc_macro2::{Punct, Spacing}; 11use proc_macro2::{Punct, Spacing};
9use quote::{format_ident, quote}; 12use quote::{format_ident, quote};
13use ungrammar::{rust_grammar, Grammar, Rule};
10 14
11use crate::{ 15use crate::{
12 ast_src::{AstSrc, Field, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, 16 ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC},
13 codegen::{self, update, Mode}, 17 codegen::{self, update, Mode},
14 project_root, Result, 18 project_root, Result,
15}; 19};
16 20
17pub fn generate_syntax(mode: Mode) -> Result<()> { 21pub fn generate_syntax(mode: Mode) -> Result<()> {
22 let grammar = rust_grammar();
23 let ast = lower(&grammar);
24
18 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); 25 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
19 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; 26 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?;
20 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; 27 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
21 28
22 let ast_tokens_file = project_root().join(codegen::AST_TOKENS); 29 let ast_tokens_file = project_root().join(codegen::AST_TOKENS);
23 let contents = generate_tokens(AST_SRC)?; 30 let contents = generate_tokens(&ast)?;
24 update(ast_tokens_file.as_path(), &contents, mode)?; 31 update(ast_tokens_file.as_path(), &contents, mode)?;
25 32
26 let ast_nodes_file = project_root().join(codegen::AST_NODES); 33 let ast_nodes_file = project_root().join(codegen::AST_NODES);
27 let contents = generate_nodes(KINDS_SRC, AST_SRC)?; 34 let contents = generate_nodes(KINDS_SRC, &ast)?;
28 update(ast_nodes_file.as_path(), &contents, mode)?; 35 update(ast_nodes_file.as_path(), &contents, mode)?;
29 36
30 Ok(()) 37 Ok(())
31} 38}
32 39
33fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> { 40fn generate_tokens(grammar: &AstSrc) -> Result<String> {
34 let tokens = grammar.tokens.iter().map(|token| { 41 let tokens = grammar.tokens.iter().map(|token| {
35 let name = format_ident!("{}", token); 42 let name = format_ident!("{}", token);
36 let kind = format_ident!("{}", to_upper_snake_case(token)); 43 let kind = format_ident!("{}", to_upper_snake_case(token));
@@ -62,13 +69,13 @@ fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> {
62 Ok(pretty) 69 Ok(pretty)
63} 70}
64 71
65fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { 72fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> {
66 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar 73 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
67 .nodes 74 .nodes
68 .iter() 75 .iter()
69 .map(|node| { 76 .map(|node| {
70 let name = format_ident!("{}", node.name); 77 let name = format_ident!("{}", node.name);
71 let kind = format_ident!("{}", to_upper_snake_case(node.name)); 78 let kind = format_ident!("{}", to_upper_snake_case(&node.name));
72 let traits = node.traits.iter().map(|trait_name| { 79 let traits = node.traits.iter().map(|trait_name| {
73 let trait_name = format_ident!("{}", trait_name); 80 let trait_name = format_ident!("{}", trait_name);
74 quote!(impl ast::#trait_name for #name {}) 81 quote!(impl ast::#trait_name for #name {})
@@ -144,25 +151,10 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
144 quote!(impl ast::#trait_name for #name {}) 151 quote!(impl ast::#trait_name for #name {})
145 }); 152 });
146 153
147 ( 154 let ast_node = if en.name == "Stmt" {
148 quote! { 155 quote! {}
149 #[pretty_doc_comment_placeholder_workaround] 156 } else {
150 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
151 pub enum #name {
152 #(#variants(#variants),)*
153 }
154
155 #(#traits)*
156 },
157 quote! { 157 quote! {
158 #(
159 impl From<#variants> for #name {
160 fn from(node: #variants) -> #name {
161 #name::#variants(node)
162 }
163 }
164 )*
165
166 impl AstNode for #name { 158 impl AstNode for #name {
167 fn can_cast(kind: SyntaxKind) -> bool { 159 fn can_cast(kind: SyntaxKind) -> bool {
168 match kind { 160 match kind {
@@ -187,13 +179,35 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
187 } 179 }
188 } 180 }
189 } 181 }
182 }
183 };
184
185 (
186 quote! {
187 #[pretty_doc_comment_placeholder_workaround]
188 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
189 pub enum #name {
190 #(#variants(#variants),)*
191 }
192
193 #(#traits)*
194 },
195 quote! {
196 #(
197 impl From<#variants> for #name {
198 fn from(node: #variants) -> #name {
199 #name::#variants(node)
200 }
201 }
202 )*
203 #ast_node
190 }, 204 },
191 ) 205 )
192 }) 206 })
193 .unzip(); 207 .unzip();
194 208
195 let enum_names = grammar.enums.iter().map(|it| it.name); 209 let enum_names = grammar.enums.iter().map(|it| &it.name);
196 let node_names = grammar.nodes.iter().map(|it| it.name); 210 let node_names = grammar.nodes.iter().map(|it| &it.name);
197 211
198 let display_impls = 212 let display_impls =
199 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| { 213 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
@@ -212,9 +226,11 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
212 .nodes 226 .nodes
213 .iter() 227 .iter()
214 .map(|kind| to_pascal_case(kind)) 228 .map(|kind| to_pascal_case(kind))
215 .filter(|name| !defined_nodes.contains(name.as_str())) 229 .filter(|name| !defined_nodes.iter().any(|&it| it == name))
216 { 230 {
217 eprintln!("Warning: node {} not defined in ast source", node); 231 drop(node)
232 // TODO: restore this
233 // eprintln!("Warning: node {} not defined in ast source", node);
218 } 234 }
219 235
220 let ast = quote! { 236 let ast = quote! {
@@ -236,12 +252,12 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
236 let mut res = String::with_capacity(ast.len() * 2); 252 let mut res = String::with_capacity(ast.len() * 2);
237 253
238 let mut docs = 254 let mut docs =
239 grammar.nodes.iter().map(|it| it.doc).chain(grammar.enums.iter().map(|it| it.doc)); 255 grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
240 256
241 for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") { 257 for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") {
242 res.push_str(chunk); 258 res.push_str(chunk);
243 if let Some(doc) = docs.next() { 259 if let Some(doc) = docs.next() {
244 write_doc_comment(doc, &mut res); 260 write_doc_comment(&doc, &mut res);
245 } 261 }
246 } 262 }
247 263
@@ -249,7 +265,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
249 Ok(pretty) 265 Ok(pretty)
250} 266}
251 267
252fn write_doc_comment(contents: &[&str], dest: &mut String) { 268fn write_doc_comment(contents: &[String], dest: &mut String) {
253 for line in contents { 269 for line in contents {
254 writeln!(dest, "///{}", line).unwrap(); 270 writeln!(dest, "///{}", line).unwrap();
255 } 271 }
@@ -296,7 +312,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result<String> {
296 312
297 let ast = quote! { 313 let ast = quote! {
298 #![allow(bad_style, missing_docs, unreachable_pub)] 314 #![allow(bad_style, missing_docs, unreachable_pub)]
299 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. 315 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.
300 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 316 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
301 #[repr(u16)] 317 #[repr(u16)]
302 pub enum SyntaxKind { 318 pub enum SyntaxKind {
@@ -363,6 +379,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result<String> {
363 #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)* 379 #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)*
364 [lifetime] => { $crate::SyntaxKind::LIFETIME }; 380 [lifetime] => { $crate::SyntaxKind::LIFETIME };
365 [ident] => { $crate::SyntaxKind::IDENT }; 381 [ident] => { $crate::SyntaxKind::IDENT };
382 [shebang] => { $crate::SyntaxKind::SHEBANG };
366 } 383 }
367 }; 384 };
368 385
@@ -413,9 +430,13 @@ fn to_pascal_case(s: &str) -> String {
413 buf 430 buf
414} 431}
415 432
416impl Field<'_> { 433fn pluralize(s: &str) -> String {
434 format!("{}s", s)
435}
436
437impl Field {
417 fn is_many(&self) -> bool { 438 fn is_many(&self) -> bool {
418 matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) 439 matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
419 } 440 }
420 fn token_kind(&self) -> Option<proc_macro2::TokenStream> { 441 fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
421 match self { 442 match self {
@@ -429,7 +450,7 @@ impl Field<'_> {
429 fn method_name(&self) -> proc_macro2::Ident { 450 fn method_name(&self) -> proc_macro2::Ident {
430 match self { 451 match self {
431 Field::Token(name) => { 452 Field::Token(name) => {
432 let name = match *name { 453 let name = match name.as_str() {
433 ";" => "semicolon", 454 ";" => "semicolon",
434 "->" => "thin_arrow", 455 "->" => "thin_arrow",
435 "'{'" => "l_curly", 456 "'{'" => "l_curly",
@@ -448,29 +469,273 @@ impl Field<'_> {
448 "." => "dot", 469 "." => "dot",
449 ".." => "dotdot", 470 ".." => "dotdot",
450 "..." => "dotdotdot", 471 "..." => "dotdotdot",
472 "..=" => "dotdoteq",
451 "=>" => "fat_arrow", 473 "=>" => "fat_arrow",
452 "@" => "at", 474 "@" => "at",
453 ":" => "colon", 475 ":" => "colon",
454 "::" => "coloncolon", 476 "::" => "coloncolon",
455 "#" => "pound", 477 "#" => "pound",
456 "?" => "question_mark", 478 "?" => "question_mark",
479 "," => "comma",
457 _ => name, 480 _ => name,
458 }; 481 };
459 format_ident!("{}_token", name) 482 format_ident!("{}_token", name)
460 } 483 }
461 Field::Node { name, src } => match src { 484 Field::Node { name, .. } => {
462 FieldSrc::Shorthand => format_ident!("{}", to_lower_snake_case(name)), 485 if name == "type" {
463 _ => format_ident!("{}", name), 486 format_ident!("ty")
464 }, 487 } else {
488 format_ident!("{}", name)
489 }
490 }
465 } 491 }
466 } 492 }
467 fn ty(&self) -> proc_macro2::Ident { 493 fn ty(&self) -> proc_macro2::Ident {
468 match self { 494 match self {
469 Field::Token(_) => format_ident!("SyntaxToken"), 495 Field::Token(_) => format_ident!("SyntaxToken"),
470 Field::Node { name, src } => match src { 496 Field::Node { ty, .. } => format_ident!("{}", ty),
471 FieldSrc::Optional(ty) | FieldSrc::Many(ty) => format_ident!("{}", ty),
472 FieldSrc::Shorthand => format_ident!("{}", name),
473 },
474 } 497 }
475 } 498 }
476} 499}
500
501fn lower(grammar: &Grammar) -> AstSrc {
502 let mut res = AstSrc::default();
503 res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()];
504
505 let nodes = grammar.iter().collect::<Vec<_>>();
506
507 for &node in &nodes {
508 let name = grammar[node].name.clone();
509 let rule = &grammar[node].rule;
510 match lower_enum(grammar, rule) {
511 Some(variants) => {
512 let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
513 res.enums.push(enum_src);
514 }
515 None => {
516 let mut fields = Vec::new();
517 lower_rule(&mut fields, grammar, None, rule);
518 res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
519 }
520 }
521 }
522
523 deduplicate_fields(&mut res);
524 extract_enums(&mut res);
525 extract_struct_traits(&mut res);
526 extract_enum_traits(&mut res);
527 res
528}
529
530fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
531 let alternatives = match rule {
532 Rule::Alt(it) => it,
533 _ => return None,
534 };
535 let mut variants = Vec::new();
536 for alternative in alternatives {
537 match alternative {
538 Rule::Node(it) => variants.push(grammar[*it].name.clone()),
539 Rule::Token(it) if grammar[*it].name == ";" => (),
540 _ => return None,
541 }
542 }
543 Some(variants)
544}
545
546fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) {
547 if lower_comma_list(acc, grammar, label, rule) {
548 return;
549 }
550
551 match rule {
552 Rule::Node(node) => {
553 let ty = grammar[*node].name.clone();
554 let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty));
555 let field = Field::Node { name, ty, cardinality: Cardinality::Optional };
556 acc.push(field);
557 }
558 Rule::Token(token) => {
559 assert!(label.is_none());
560 let mut name = grammar[*token].name.clone();
561 if name != "int_number" && name != "string" {
562 if "[]{}()".contains(&name) {
563 name = format!("'{}'", name);
564 }
565 let field = Field::Token(name);
566 acc.push(field);
567 }
568 }
569 Rule::Rep(inner) => {
570 if let Rule::Node(node) = &**inner {
571 let ty = grammar[*node].name.clone();
572 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
573 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
574 acc.push(field);
575 return;
576 }
577 todo!("{:?}", rule)
578 }
579 Rule::Labeled { label: l, rule } => {
580 assert!(label.is_none());
581 let manually_implemented = matches!(
582 l.as_str(),
583 "lhs"
584 | "rhs"
585 | "then_branch"
586 | "else_branch"
587 | "start"
588 | "end"
589 | "op"
590 | "index"
591 | "base"
592 | "value"
593 | "trait"
594 | "self_ty"
595 );
596 if manually_implemented {
597 return;
598 }
599 lower_rule(acc, grammar, Some(l), rule);
600 }
601 Rule::Seq(rules) | Rule::Alt(rules) => {
602 for rule in rules {
603 lower_rule(acc, grammar, label, rule)
604 }
605 }
606 Rule::Opt(rule) => lower_rule(acc, grammar, label, rule),
607 }
608}
609
610// (T (',' T)* ','?)
611fn lower_comma_list(
612 acc: &mut Vec<Field>,
613 grammar: &Grammar,
614 label: Option<&String>,
615 rule: &Rule,
616) -> bool {
617 let rule = match rule {
618 Rule::Seq(it) => it,
619 _ => return false,
620 };
621 let (node, repeat, trailing_comma) = match rule.as_slice() {
622 [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_comma)] => {
623 (node, repeat, trailing_comma)
624 }
625 _ => return false,
626 };
627 let repeat = match &**repeat {
628 Rule::Seq(it) => it,
629 _ => return false,
630 };
631 match repeat.as_slice() {
632 [comma, Rule::Node(n)] if comma == &**trailing_comma && n == node => (),
633 _ => return false,
634 }
635 let ty = grammar[*node].name.clone();
636 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
637 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
638 acc.push(field);
639 true
640}
641
642fn deduplicate_fields(ast: &mut AstSrc) {
643 for node in &mut ast.nodes {
644 let mut i = 0;
645 'outer: while i < node.fields.len() {
646 for j in 0..i {
647 let f1 = &node.fields[i];
648 let f2 = &node.fields[j];
649 if f1 == f2 {
650 node.fields.remove(i);
651 continue 'outer;
652 }
653 }
654 i += 1;
655 }
656 }
657}
658
659fn extract_enums(ast: &mut AstSrc) {
660 for node in &mut ast.nodes {
661 for enm in &ast.enums {
662 let mut to_remove = Vec::new();
663 for (i, field) in node.fields.iter().enumerate() {
664 let ty = field.ty().to_string();
665 if enm.variants.iter().any(|it| it == &ty) {
666 to_remove.push(i);
667 }
668 }
669 if to_remove.len() == enm.variants.len() {
670 node.remove_field(to_remove);
671 let ty = enm.name.clone();
672 let name = to_lower_snake_case(&ty);
673 node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional });
674 }
675 }
676 }
677}
678
679fn extract_struct_traits(ast: &mut AstSrc) {
680 let traits: &[(&str, &[&str])] = &[
681 ("AttrsOwner", &["attrs"]),
682 ("NameOwner", &["name"]),
683 ("VisibilityOwner", &["visibility"]),
684 ("GenericParamsOwner", &["generic_param_list", "where_clause"]),
685 ("TypeBoundsOwner", &["type_bound_list", "colon_token"]),
686 ("ModuleItemOwner", &["items"]),
687 ("LoopBodyOwner", &["label", "loop_body"]),
688 ("ArgListOwner", &["arg_list"]),
689 ];
690
691 for node in &mut ast.nodes {
692 for (name, methods) in traits {
693 extract_struct_trait(node, name, methods);
694 }
695 }
696}
697
698fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
699 let mut to_remove = Vec::new();
700 for (i, field) in node.fields.iter().enumerate() {
701 let method_name = field.method_name().to_string();
702 if methods.iter().any(|&it| it == &method_name) {
703 to_remove.push(i);
704 }
705 }
706 if to_remove.len() == methods.len() {
707 node.traits.push(trait_name.to_string());
708 node.remove_field(to_remove);
709 }
710}
711
712fn extract_enum_traits(ast: &mut AstSrc) {
713 for enm in &mut ast.enums {
714 if enm.name == "Stmt" {
715 continue;
716 }
717 let nodes = &ast.nodes;
718 let mut variant_traits = enm
719 .variants
720 .iter()
721 .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
722 .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
723
724 let mut enum_traits = match variant_traits.next() {
725 Some(it) => it,
726 None => continue,
727 };
728 for traits in variant_traits {
729 enum_traits = enum_traits.intersection(&traits).cloned().collect();
730 }
731 enm.traits = enum_traits.into_iter().collect();
732 }
733}
734
735impl AstNodeSrc {
736 fn remove_field(&mut self, to_remove: Vec<usize>) {
737 to_remove.into_iter().rev().for_each(|idx| {
738 self.fields.remove(idx);
739 });
740 }
741}