aboutsummaryrefslogtreecommitdiff
path: root/xtask/src/codegen/gen_syntax.rs
diff options
context:
space:
mode:
Diffstat (limited to 'xtask/src/codegen/gen_syntax.rs')
-rw-r--r--xtask/src/codegen/gen_syntax.rs290
1 files changed, 264 insertions, 26 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
index 745a25862..45b788bdb 100644
--- a/xtask/src/codegen/gen_syntax.rs
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -3,34 +3,43 @@
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype 3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. 4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
5 5
6use std::{collections::HashSet, fmt::Write}; 6use std::{
7 collections::{BTreeSet, HashSet},
8 fmt::Write,
9};
7 10
8use proc_macro2::{Punct, Spacing}; 11use proc_macro2::{Punct, Spacing};
9use quote::{format_ident, quote}; 12use quote::{format_ident, quote};
13use ungrammar::{Grammar, Rule};
10 14
11use crate::{ 15use crate::{
12 ast_src::{AstSrc, Field, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, 16 ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC},
13 codegen::{self, update, Mode}, 17 codegen::{self, update, Mode},
14 project_root, Result, 18 project_root, Result,
15}; 19};
16 20
17pub fn generate_syntax(mode: Mode) -> Result<()> { 21pub fn generate_syntax(mode: Mode) -> Result<()> {
22 let grammar = include_str!("rust.ungram")
23 .parse::<Grammar>()
24 .unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err));
25 let ast = lower(&grammar);
26
18 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); 27 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
19 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; 28 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?;
20 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; 29 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
21 30
22 let ast_tokens_file = project_root().join(codegen::AST_TOKENS); 31 let ast_tokens_file = project_root().join(codegen::AST_TOKENS);
23 let contents = generate_tokens(AST_SRC)?; 32 let contents = generate_tokens(&ast)?;
24 update(ast_tokens_file.as_path(), &contents, mode)?; 33 update(ast_tokens_file.as_path(), &contents, mode)?;
25 34
26 let ast_nodes_file = project_root().join(codegen::AST_NODES); 35 let ast_nodes_file = project_root().join(codegen::AST_NODES);
27 let contents = generate_nodes(KINDS_SRC, AST_SRC)?; 36 let contents = generate_nodes(KINDS_SRC, &ast)?;
28 update(ast_nodes_file.as_path(), &contents, mode)?; 37 update(ast_nodes_file.as_path(), &contents, mode)?;
29 38
30 Ok(()) 39 Ok(())
31} 40}
32 41
33fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> { 42fn generate_tokens(grammar: &AstSrc) -> Result<String> {
34 let tokens = grammar.tokens.iter().map(|token| { 43 let tokens = grammar.tokens.iter().map(|token| {
35 let name = format_ident!("{}", token); 44 let name = format_ident!("{}", token);
36 let kind = format_ident!("{}", to_upper_snake_case(token)); 45 let kind = format_ident!("{}", to_upper_snake_case(token));
@@ -62,13 +71,13 @@ fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> {
62 Ok(pretty) 71 Ok(pretty)
63} 72}
64 73
65fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { 74fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> {
66 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar 75 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
67 .nodes 76 .nodes
68 .iter() 77 .iter()
69 .map(|node| { 78 .map(|node| {
70 let name = format_ident!("{}", node.name); 79 let name = format_ident!("{}", node.name);
71 let kind = format_ident!("{}", to_upper_snake_case(node.name)); 80 let kind = format_ident!("{}", to_upper_snake_case(&node.name));
72 let traits = node.traits.iter().map(|trait_name| { 81 let traits = node.traits.iter().map(|trait_name| {
73 let trait_name = format_ident!("{}", trait_name); 82 let trait_name = format_ident!("{}", trait_name);
74 quote!(impl ast::#trait_name for #name {}) 83 quote!(impl ast::#trait_name for #name {})
@@ -192,8 +201,8 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
192 }) 201 })
193 .unzip(); 202 .unzip();
194 203
195 let enum_names = grammar.enums.iter().map(|it| it.name); 204 let enum_names = grammar.enums.iter().map(|it| &it.name);
196 let node_names = grammar.nodes.iter().map(|it| it.name); 205 let node_names = grammar.nodes.iter().map(|it| &it.name);
197 206
198 let display_impls = 207 let display_impls =
199 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| { 208 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
@@ -212,9 +221,11 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
212 .nodes 221 .nodes
213 .iter() 222 .iter()
214 .map(|kind| to_pascal_case(kind)) 223 .map(|kind| to_pascal_case(kind))
215 .filter(|name| !defined_nodes.contains(name.as_str())) 224 .filter(|name| !defined_nodes.iter().any(|&it| it == name))
216 { 225 {
217 eprintln!("Warning: node {} not defined in ast source", node); 226 drop(node)
227 // TODO: restore this
228 // eprintln!("Warning: node {} not defined in ast source", node);
218 } 229 }
219 230
220 let ast = quote! { 231 let ast = quote! {
@@ -236,12 +247,12 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
236 let mut res = String::with_capacity(ast.len() * 2); 247 let mut res = String::with_capacity(ast.len() * 2);
237 248
238 let mut docs = 249 let mut docs =
239 grammar.nodes.iter().map(|it| it.doc).chain(grammar.enums.iter().map(|it| it.doc)); 250 grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
240 251
241 for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") { 252 for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") {
242 res.push_str(chunk); 253 res.push_str(chunk);
243 if let Some(doc) = docs.next() { 254 if let Some(doc) = docs.next() {
244 write_doc_comment(doc, &mut res); 255 write_doc_comment(&doc, &mut res);
245 } 256 }
246 } 257 }
247 258
@@ -249,7 +260,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
249 Ok(pretty) 260 Ok(pretty)
250} 261}
251 262
252fn write_doc_comment(contents: &[&str], dest: &mut String) { 263fn write_doc_comment(contents: &[String], dest: &mut String) {
253 for line in contents { 264 for line in contents {
254 writeln!(dest, "///{}", line).unwrap(); 265 writeln!(dest, "///{}", line).unwrap();
255 } 266 }
@@ -296,7 +307,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result<String> {
296 307
297 let ast = quote! { 308 let ast = quote! {
298 #![allow(bad_style, missing_docs, unreachable_pub)] 309 #![allow(bad_style, missing_docs, unreachable_pub)]
299 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. 310 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.
300 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 311 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
301 #[repr(u16)] 312 #[repr(u16)]
302 pub enum SyntaxKind { 313 pub enum SyntaxKind {
@@ -363,6 +374,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result<String> {
363 #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)* 374 #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)*
364 [lifetime] => { $crate::SyntaxKind::LIFETIME }; 375 [lifetime] => { $crate::SyntaxKind::LIFETIME };
365 [ident] => { $crate::SyntaxKind::IDENT }; 376 [ident] => { $crate::SyntaxKind::IDENT };
377 [shebang] => { $crate::SyntaxKind::SHEBANG };
366 } 378 }
367 }; 379 };
368 380
@@ -413,9 +425,13 @@ fn to_pascal_case(s: &str) -> String {
413 buf 425 buf
414} 426}
415 427
416impl Field<'_> { 428fn pluralize(s: &str) -> String {
429 format!("{}s", s)
430}
431
432impl Field {
417 fn is_many(&self) -> bool { 433 fn is_many(&self) -> bool {
418 matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) 434 matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
419 } 435 }
420 fn token_kind(&self) -> Option<proc_macro2::TokenStream> { 436 fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
421 match self { 437 match self {
@@ -429,7 +445,7 @@ impl Field<'_> {
429 fn method_name(&self) -> proc_macro2::Ident { 445 fn method_name(&self) -> proc_macro2::Ident {
430 match self { 446 match self {
431 Field::Token(name) => { 447 Field::Token(name) => {
432 let name = match *name { 448 let name = match name.as_str() {
433 ";" => "semicolon", 449 ";" => "semicolon",
434 "->" => "thin_arrow", 450 "->" => "thin_arrow",
435 "'{'" => "l_curly", 451 "'{'" => "l_curly",
@@ -448,29 +464,251 @@ impl Field<'_> {
448 "." => "dot", 464 "." => "dot",
449 ".." => "dotdot", 465 ".." => "dotdot",
450 "..." => "dotdotdot", 466 "..." => "dotdotdot",
467 "..=" => "dotdoteq",
451 "=>" => "fat_arrow", 468 "=>" => "fat_arrow",
452 "@" => "at", 469 "@" => "at",
453 ":" => "colon", 470 ":" => "colon",
454 "::" => "coloncolon", 471 "::" => "coloncolon",
455 "#" => "pound", 472 "#" => "pound",
456 "?" => "question_mark", 473 "?" => "question_mark",
474 "," => "comma",
457 _ => name, 475 _ => name,
458 }; 476 };
459 format_ident!("{}_token", name) 477 format_ident!("{}_token", name)
460 } 478 }
461 Field::Node { name, src } => match src { 479 Field::Node { name, .. } => format_ident!("{}", name),
462 FieldSrc::Shorthand => format_ident!("{}", to_lower_snake_case(name)),
463 _ => format_ident!("{}", name),
464 },
465 } 480 }
466 } 481 }
467 fn ty(&self) -> proc_macro2::Ident { 482 fn ty(&self) -> proc_macro2::Ident {
468 match self { 483 match self {
469 Field::Token(_) => format_ident!("SyntaxToken"), 484 Field::Token(_) => format_ident!("SyntaxToken"),
470 Field::Node { name, src } => match src { 485 Field::Node { ty, .. } => format_ident!("{}", ty),
471 FieldSrc::Optional(ty) | FieldSrc::Many(ty) => format_ident!("{}", ty),
472 FieldSrc::Shorthand => format_ident!("{}", name),
473 },
474 } 486 }
475 } 487 }
476} 488}
489
490fn lower(grammar: &Grammar) -> AstSrc {
491 let mut res = AstSrc::default();
492 res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()];
493
494 let nodes = grammar
495 .iter()
496 .filter(|&node| match grammar[node].rule {
497 Rule::Node(it) if it == node => false,
498 _ => true,
499 })
500 .collect::<Vec<_>>();
501
502 for &node in &nodes {
503 let name = grammar[node].name.clone();
504 let rule = &grammar[node].rule;
505 match lower_enum(grammar, rule) {
506 Some(variants) => {
507 let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
508 res.enums.push(enum_src);
509 }
510 None => {
511 let mut fields = Vec::new();
512 lower_rule(&mut fields, grammar, None, rule);
513 res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
514 }
515 }
516 }
517
518 deduplicate_fields(&mut res);
519 extract_enums(&mut res);
520 extract_struct_traits(&mut res);
521 extract_enum_traits(&mut res);
522 res
523}
524
525fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
526 let alternatives = match rule {
527 Rule::Alt(it) => it,
528 _ => return None,
529 };
530 let mut variants = Vec::new();
531 for alternative in alternatives {
532 match alternative {
533 Rule::Node(it) => variants.push(grammar[*it].name.clone()),
534 _ => return None,
535 }
536 }
537 Some(variants)
538}
539
540fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) {
541 if lower_comma_list(acc, grammar, label, rule) {
542 return;
543 }
544
545 match rule {
546 Rule::Node(node) => {
547 let ty = grammar[*node].name.clone();
548 let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty));
549 let field = Field::Node { name, ty, cardinality: Cardinality::Optional };
550 acc.push(field);
551 }
552 Rule::Token(token) => {
553 assert!(label.is_none());
554 let mut name = grammar[*token].name.clone();
555 if name != "int_number" && name != "string" {
556 if "[]{}()".contains(&name) {
557 name = format!("'{}'", name);
558 }
559 let field = Field::Token(name);
560 acc.push(field);
561 }
562 }
563 Rule::Rep(inner) => {
564 if let Rule::Node(node) = &**inner {
565 let ty = grammar[*node].name.clone();
566 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
567 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
568 acc.push(field);
569 return;
570 }
571 todo!("{:?}", rule)
572 }
573 Rule::Labeled { label: l, rule } => {
574 assert!(label.is_none());
575 lower_rule(acc, grammar, Some(l), rule);
576 }
577 Rule::Seq(rules) | Rule::Alt(rules) => {
578 for rule in rules {
579 lower_rule(acc, grammar, label, rule)
580 }
581 }
582 Rule::Opt(rule) => lower_rule(acc, grammar, label, rule),
583 }
584}
585
586// (T (',' T)* ','?)
587fn lower_comma_list(
588 acc: &mut Vec<Field>,
589 grammar: &Grammar,
590 label: Option<&String>,
591 rule: &Rule,
592) -> bool {
593 let rule = match rule {
594 Rule::Seq(it) => it,
595 _ => return false,
596 };
597 let (node, repeat, trailing_comma) = match rule.as_slice() {
598 [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_comma)] => {
599 (node, repeat, trailing_comma)
600 }
601 _ => return false,
602 };
603 let repeat = match &**repeat {
604 Rule::Seq(it) => it,
605 _ => return false,
606 };
607 match repeat.as_slice() {
608 [comma, Rule::Node(n)] if comma == &**trailing_comma && n == node => (),
609 _ => return false,
610 }
611 let ty = grammar[*node].name.clone();
612 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
613 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
614 acc.push(field);
615 true
616}
617
618fn deduplicate_fields(ast: &mut AstSrc) {
619 for node in &mut ast.nodes {
620 let mut i = 0;
621 'outer: while i < node.fields.len() {
622 for j in 0..i {
623 let f1 = &node.fields[i];
624 let f2 = &node.fields[j];
625 if f1 == f2 {
626 node.fields.remove(i);
627 continue 'outer;
628 }
629 }
630 i += 1;
631 }
632 }
633}
634
635fn extract_enums(ast: &mut AstSrc) {
636 for node in &mut ast.nodes {
637 for enm in &ast.enums {
638 let mut to_remove = Vec::new();
639 for (i, field) in node.fields.iter().enumerate() {
640 let ty = field.ty().to_string();
641 if enm.variants.iter().any(|it| it == &ty) {
642 to_remove.push(i);
643 }
644 }
645 if to_remove.len() == enm.variants.len() {
646 node.remove_field(to_remove);
647 let ty = enm.name.clone();
648 let name = to_lower_snake_case(&ty);
649 node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional });
650 }
651 }
652 }
653}
654
655fn extract_struct_traits(ast: &mut AstSrc) {
656 let traits: &[(&str, &[&str])] = &[
657 ("AttrsOwner", &["attrs"]),
658 ("NameOwner", &["name"]),
659 ("VisibilityOwner", &["visibility"]),
660 ("GenericParamsOwner", &["generic_param_list", "where_clause"]),
661 ("TypeBoundsOwner", &["type_bound_list", "colon_token"]),
662 ("ModuleItemOwner", &["items"]),
663 ("LoopBodyOwner", &["label", "loop_body"]),
664 ("ArgListOwner", &["arg_list"]),
665 ];
666
667 for node in &mut ast.nodes {
668 for (name, methods) in traits {
669 extract_struct_trait(node, name, methods);
670 }
671 }
672}
673
674fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
675 let mut to_remove = Vec::new();
676 for (i, field) in node.fields.iter().enumerate() {
677 let method_name = field.method_name().to_string();
678 if methods.iter().any(|&it| it == &method_name) {
679 to_remove.push(i);
680 }
681 }
682 if to_remove.len() == methods.len() {
683 node.traits.push(trait_name.to_string());
684 node.remove_field(to_remove);
685 }
686}
687
688fn extract_enum_traits(ast: &mut AstSrc) {
689 for enm in &mut ast.enums {
690 let nodes = &ast.nodes;
691 let mut variant_traits = enm
692 .variants
693 .iter()
694 .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
695 .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
696
697 let mut enum_traits = match variant_traits.next() {
698 Some(it) => it,
699 None => continue,
700 };
701 for traits in variant_traits {
702 enum_traits = enum_traits.intersection(&traits).cloned().collect();
703 }
704 enm.traits = enum_traits.into_iter().collect();
705 }
706}
707
708impl AstNodeSrc {
709 fn remove_field(&mut self, to_remove: Vec<usize>) {
710 to_remove.into_iter().rev().for_each(|idx| {
711 self.fields.remove(idx);
712 });
713 }
714}