aboutsummaryrefslogtreecommitdiff
path: root/xtask/src/codegen/gen_syntax.rs
diff options
context:
space:
mode:
Diffstat (limited to 'xtask/src/codegen/gen_syntax.rs')
-rw-r--r--xtask/src/codegen/gen_syntax.rs249
1 files changed, 233 insertions, 16 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
index 745a25862..24e8be1fb 100644
--- a/xtask/src/codegen/gen_syntax.rs
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -3,34 +3,43 @@
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype 3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. 4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
5 5
6use std::{collections::HashSet, fmt::Write}; 6use std::{
7 collections::{BTreeSet, HashSet},
8 fmt::Write,
9};
7 10
8use proc_macro2::{Punct, Spacing}; 11use proc_macro2::{Punct, Spacing};
9use quote::{format_ident, quote}; 12use quote::{format_ident, quote};
13use ungrammar::{Grammar, Rule};
10 14
11use crate::{ 15use crate::{
12 ast_src::{AstSrc, Field, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, 16 ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC},
13 codegen::{self, update, Mode}, 17 codegen::{self, update, Mode},
14 project_root, Result, 18 project_root, Result,
15}; 19};
16 20
17pub fn generate_syntax(mode: Mode) -> Result<()> { 21pub fn generate_syntax(mode: Mode) -> Result<()> {
22 let grammar = include_str!("rust.ungram")
23 .parse::<Grammar>()
24 .unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err));
25 let ast = lower(&grammar);
26
18 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); 27 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
19 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; 28 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?;
20 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; 29 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
21 30
22 let ast_tokens_file = project_root().join(codegen::AST_TOKENS); 31 let ast_tokens_file = project_root().join(codegen::AST_TOKENS);
23 let contents = generate_tokens(AST_SRC)?; 32 let contents = generate_tokens(&ast)?;
24 update(ast_tokens_file.as_path(), &contents, mode)?; 33 update(ast_tokens_file.as_path(), &contents, mode)?;
25 34
26 let ast_nodes_file = project_root().join(codegen::AST_NODES); 35 let ast_nodes_file = project_root().join(codegen::AST_NODES);
27 let contents = generate_nodes(KINDS_SRC, AST_SRC)?; 36 let contents = generate_nodes(KINDS_SRC, &ast)?;
28 update(ast_nodes_file.as_path(), &contents, mode)?; 37 update(ast_nodes_file.as_path(), &contents, mode)?;
29 38
30 Ok(()) 39 Ok(())
31} 40}
32 41
33fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> { 42fn generate_tokens(grammar: &AstSrc) -> Result<String> {
34 let tokens = grammar.tokens.iter().map(|token| { 43 let tokens = grammar.tokens.iter().map(|token| {
35 let name = format_ident!("{}", token); 44 let name = format_ident!("{}", token);
36 let kind = format_ident!("{}", to_upper_snake_case(token)); 45 let kind = format_ident!("{}", to_upper_snake_case(token));
@@ -62,13 +71,13 @@ fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> {
62 Ok(pretty) 71 Ok(pretty)
63} 72}
64 73
65fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { 74fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> {
66 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar 75 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
67 .nodes 76 .nodes
68 .iter() 77 .iter()
69 .map(|node| { 78 .map(|node| {
70 let name = format_ident!("{}", node.name); 79 let name = format_ident!("{}", node.name);
71 let kind = format_ident!("{}", to_upper_snake_case(node.name)); 80 let kind = format_ident!("{}", to_upper_snake_case(&node.name));
72 let traits = node.traits.iter().map(|trait_name| { 81 let traits = node.traits.iter().map(|trait_name| {
73 let trait_name = format_ident!("{}", trait_name); 82 let trait_name = format_ident!("{}", trait_name);
74 quote!(impl ast::#trait_name for #name {}) 83 quote!(impl ast::#trait_name for #name {})
@@ -192,8 +201,8 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
192 }) 201 })
193 .unzip(); 202 .unzip();
194 203
195 let enum_names = grammar.enums.iter().map(|it| it.name); 204 let enum_names = grammar.enums.iter().map(|it| &it.name);
196 let node_names = grammar.nodes.iter().map(|it| it.name); 205 let node_names = grammar.nodes.iter().map(|it| &it.name);
197 206
198 let display_impls = 207 let display_impls =
199 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| { 208 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
@@ -212,9 +221,11 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
212 .nodes 221 .nodes
213 .iter() 222 .iter()
214 .map(|kind| to_pascal_case(kind)) 223 .map(|kind| to_pascal_case(kind))
215 .filter(|name| !defined_nodes.contains(name.as_str())) 224 .filter(|name| !defined_nodes.iter().any(|&it| it == name))
216 { 225 {
217 eprintln!("Warning: node {} not defined in ast source", node); 226 drop(node)
227 // TODO: restore this
228 // eprintln!("Warning: node {} not defined in ast source", node);
218 } 229 }
219 230
220 let ast = quote! { 231 let ast = quote! {
@@ -236,12 +247,12 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
236 let mut res = String::with_capacity(ast.len() * 2); 247 let mut res = String::with_capacity(ast.len() * 2);
237 248
238 let mut docs = 249 let mut docs =
239 grammar.nodes.iter().map(|it| it.doc).chain(grammar.enums.iter().map(|it| it.doc)); 250 grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
240 251
241 for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") { 252 for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") {
242 res.push_str(chunk); 253 res.push_str(chunk);
243 if let Some(doc) = docs.next() { 254 if let Some(doc) = docs.next() {
244 write_doc_comment(doc, &mut res); 255 write_doc_comment(&doc, &mut res);
245 } 256 }
246 } 257 }
247 258
@@ -249,7 +260,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
249 Ok(pretty) 260 Ok(pretty)
250} 261}
251 262
252fn write_doc_comment(contents: &[&str], dest: &mut String) { 263fn write_doc_comment(contents: &[String], dest: &mut String) {
253 for line in contents { 264 for line in contents {
254 writeln!(dest, "///{}", line).unwrap(); 265 writeln!(dest, "///{}", line).unwrap();
255 } 266 }
@@ -413,7 +424,11 @@ fn to_pascal_case(s: &str) -> String {
413 buf 424 buf
414} 425}
415 426
416impl Field<'_> { 427fn pluralize(s: &str) -> String {
428 format!("{}s", s)
429}
430
431impl Field {
417 fn is_many(&self) -> bool { 432 fn is_many(&self) -> bool {
418 matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) 433 matches!(self, Field::Node { src: FieldSrc::Many(_), .. })
419 } 434 }
@@ -429,7 +444,7 @@ impl Field<'_> {
429 fn method_name(&self) -> proc_macro2::Ident { 444 fn method_name(&self) -> proc_macro2::Ident {
430 match self { 445 match self {
431 Field::Token(name) => { 446 Field::Token(name) => {
432 let name = match *name { 447 let name = match name.as_str() {
433 ";" => "semicolon", 448 ";" => "semicolon",
434 "->" => "thin_arrow", 449 "->" => "thin_arrow",
435 "'{'" => "l_curly", 450 "'{'" => "l_curly",
@@ -448,6 +463,7 @@ impl Field<'_> {
448 "." => "dot", 463 "." => "dot",
449 ".." => "dotdot", 464 ".." => "dotdot",
450 "..." => "dotdotdot", 465 "..." => "dotdotdot",
466 "..=" => "dotdoteq",
451 "=>" => "fat_arrow", 467 "=>" => "fat_arrow",
452 "@" => "at", 468 "@" => "at",
453 ":" => "colon", 469 ":" => "colon",
@@ -474,3 +490,204 @@ impl Field<'_> {
474 } 490 }
475 } 491 }
476} 492}
493
494fn lower(grammar: &Grammar) -> AstSrc {
495 let mut res = AstSrc::default();
496 res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()];
497
498 let nodes = grammar
499 .iter()
500 .filter(|&node| match grammar[node].rule {
501 Rule::Node(it) if it == node => false,
502 _ => true,
503 })
504 .collect::<Vec<_>>();
505
506 for &node in &nodes {
507 let name = grammar[node].name.clone();
508 let rule = &grammar[node].rule;
509 match lower_enum(grammar, rule) {
510 Some(variants) => {
511 let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
512 res.enums.push(enum_src);
513 }
514 None => {
515 let mut fields = Vec::new();
516 lower_rule(&mut fields, grammar, rule);
517 res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
518 }
519 }
520 }
521
522 deduplicate_fields(&mut res);
523 extract_enums(&mut res);
524 extract_struct_traits(&mut res);
525 extract_enum_traits(&mut res);
526 res
527}
528
529fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
530 let alternatives = match rule {
531 Rule::Alt(it) => it,
532 _ => return None,
533 };
534 let mut variants = Vec::new();
535 for alternative in alternatives {
536 match alternative {
537 Rule::Node(it) => variants.push(grammar[*it].name.clone()),
538 _ => return None,
539 }
540 }
541 Some(variants)
542}
543
544fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, rule: &Rule) {
545 match rule {
546 Rule::Node(node) => {
547 let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand };
548 acc.push(field);
549 }
550 Rule::Token(token) => {
551 let mut name = grammar[*token].name.clone();
552 if name != "int_number" && name != "string" {
553 if "[]{}()".contains(&name) {
554 name = format!("'{}'", name);
555 }
556 let field = Field::Token(name);
557 acc.push(field);
558 }
559 }
560 Rule::Rep(inner) => {
561 if let Rule::Node(node) = &**inner {
562 let name = grammar[*node].name.clone();
563 let label = pluralize(&to_lower_snake_case(&name));
564 let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) };
565 acc.push(field);
566 return;
567 }
568 todo!("{:?}", rule)
569 }
570 Rule::Labeled { label, rule } => {
571 let node = match &**rule {
572 Rule::Rep(inner) | Rule::Opt(inner) => match &**inner {
573 Rule::Node(node) => node,
574 _ => todo!("{:?}", rule),
575 },
576 Rule::Node(node) => node,
577 _ => todo!("{:?}", rule),
578 };
579 let field = Field::Node {
580 name: label.clone(),
581 src: match &**rule {
582 Rule::Rep(_) => FieldSrc::Many(grammar[*node].name.clone()),
583 _ => FieldSrc::Optional(grammar[*node].name.clone()),
584 },
585 };
586 acc.push(field);
587 }
588 Rule::Seq(rules) | Rule::Alt(rules) => {
589 for rule in rules {
590 lower_rule(acc, grammar, rule)
591 }
592 }
593 Rule::Opt(rule) => lower_rule(acc, grammar, rule),
594 }
595}
596
597fn deduplicate_fields(ast: &mut AstSrc) {
598 eprintln!();
599 for node in &mut ast.nodes {
600 let mut i = 0;
601 'outer: while i < node.fields.len() {
602 for j in 0..i {
603 let f1 = &node.fields[i];
604 let f2 = &node.fields[j];
605 if f1 == f2 {
606 node.fields.remove(i);
607 continue 'outer;
608 }
609 }
610 i += 1;
611 }
612 }
613}
614
615fn extract_enums(ast: &mut AstSrc) {
616 for node in &mut ast.nodes {
617 for enm in &ast.enums {
618 let mut to_remove = Vec::new();
619 for (i, field) in node.fields.iter().enumerate() {
620 let ty = field.ty().to_string();
621 if enm.variants.iter().any(|it| it == &ty) {
622 to_remove.push(i);
623 }
624 }
625 if to_remove.len() == enm.variants.len() {
626 node.remove_field(to_remove);
627 node.fields.push(Field::Node { name: enm.name.clone(), src: FieldSrc::Shorthand });
628 }
629 }
630 }
631}
632
633fn extract_struct_traits(ast: &mut AstSrc) {
634 let traits: &[(&str, &[&str])] = &[
635 ("AttrsOwner", &["attrs"]),
636 ("NameOwner", &["name"]),
637 ("VisibilityOwner", &["visibility"]),
638 ("TypeParamsOwner", &["type_param_list", "where_clause"]),
639 ("TypeBoundsOwner", &["type_bound_list", "colon_token"]),
640 ("ModuleItemOwner", &["items"]),
641 ("TypeAscriptionOwner", &["ascribed_type"]),
642 ("LoopBodyOwner", &["label", "loop_body"]),
643 ("ArgListOwner", &["arg_list"]),
644 ];
645
646 for node in &mut ast.nodes {
647 for (name, methods) in traits {
648 extract_struct_trait(node, name, methods);
649 }
650 }
651}
652
653fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
654 let mut to_remove = Vec::new();
655 for (i, field) in node.fields.iter().enumerate() {
656 let method_name = field.method_name().to_string();
657 if methods.iter().any(|&it| it == &method_name) {
658 to_remove.push(i);
659 }
660 }
661 if to_remove.len() == methods.len() {
662 node.traits.push(trait_name.to_string());
663 node.remove_field(to_remove);
664 }
665}
666
667fn extract_enum_traits(ast: &mut AstSrc) {
668 for enm in &mut ast.enums {
669 let nodes = &ast.nodes;
670 let mut variant_traits = enm
671 .variants
672 .iter()
673 .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
674 .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
675
676 let mut enum_traits = match variant_traits.next() {
677 Some(it) => it,
678 None => continue,
679 };
680 for traits in variant_traits {
681 enum_traits = enum_traits.intersection(&traits).cloned().collect();
682 }
683 enm.traits = enum_traits.into_iter().collect();
684 }
685}
686
687impl AstNodeSrc {
688 fn remove_field(&mut self, to_remove: Vec<usize>) {
689 to_remove.into_iter().rev().for_each(|idx| {
690 self.fields.remove(idx);
691 });
692 }
693}