aboutsummaryrefslogtreecommitdiff
path: root/xtask/src/codegen/gen_syntax.rs
diff options
context:
space:
mode:
Diffstat (limited to 'xtask/src/codegen/gen_syntax.rs')
-rw-r--r--xtask/src/codegen/gen_syntax.rs296
1 files changed, 270 insertions, 26 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
index 745a25862..d6a72ccc0 100644
--- a/xtask/src/codegen/gen_syntax.rs
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -3,34 +3,43 @@
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype 3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. 4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
5 5
6use std::{collections::HashSet, fmt::Write}; 6use std::{
7 collections::{BTreeSet, HashSet},
8 fmt::Write,
9};
7 10
8use proc_macro2::{Punct, Spacing}; 11use proc_macro2::{Punct, Spacing};
9use quote::{format_ident, quote}; 12use quote::{format_ident, quote};
13use ungrammar::{Grammar, Rule};
10 14
11use crate::{ 15use crate::{
12 ast_src::{AstSrc, Field, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, 16 ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC},
13 codegen::{self, update, Mode}, 17 codegen::{self, update, Mode},
14 project_root, Result, 18 project_root, Result,
15}; 19};
16 20
17pub fn generate_syntax(mode: Mode) -> Result<()> { 21pub fn generate_syntax(mode: Mode) -> Result<()> {
22 let grammar = include_str!("rust.ungram")
23 .parse::<Grammar>()
24 .unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err));
25 let ast = lower(&grammar);
26
18 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); 27 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
19 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; 28 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?;
20 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; 29 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
21 30
22 let ast_tokens_file = project_root().join(codegen::AST_TOKENS); 31 let ast_tokens_file = project_root().join(codegen::AST_TOKENS);
23 let contents = generate_tokens(AST_SRC)?; 32 let contents = generate_tokens(&ast)?;
24 update(ast_tokens_file.as_path(), &contents, mode)?; 33 update(ast_tokens_file.as_path(), &contents, mode)?;
25 34
26 let ast_nodes_file = project_root().join(codegen::AST_NODES); 35 let ast_nodes_file = project_root().join(codegen::AST_NODES);
27 let contents = generate_nodes(KINDS_SRC, AST_SRC)?; 36 let contents = generate_nodes(KINDS_SRC, &ast)?;
28 update(ast_nodes_file.as_path(), &contents, mode)?; 37 update(ast_nodes_file.as_path(), &contents, mode)?;
29 38
30 Ok(()) 39 Ok(())
31} 40}
32 41
33fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> { 42fn generate_tokens(grammar: &AstSrc) -> Result<String> {
34 let tokens = grammar.tokens.iter().map(|token| { 43 let tokens = grammar.tokens.iter().map(|token| {
35 let name = format_ident!("{}", token); 44 let name = format_ident!("{}", token);
36 let kind = format_ident!("{}", to_upper_snake_case(token)); 45 let kind = format_ident!("{}", to_upper_snake_case(token));
@@ -62,13 +71,13 @@ fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> {
62 Ok(pretty) 71 Ok(pretty)
63} 72}
64 73
65fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { 74fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> {
66 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar 75 let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar
67 .nodes 76 .nodes
68 .iter() 77 .iter()
69 .map(|node| { 78 .map(|node| {
70 let name = format_ident!("{}", node.name); 79 let name = format_ident!("{}", node.name);
71 let kind = format_ident!("{}", to_upper_snake_case(node.name)); 80 let kind = format_ident!("{}", to_upper_snake_case(&node.name));
72 let traits = node.traits.iter().map(|trait_name| { 81 let traits = node.traits.iter().map(|trait_name| {
73 let trait_name = format_ident!("{}", trait_name); 82 let trait_name = format_ident!("{}", trait_name);
74 quote!(impl ast::#trait_name for #name {}) 83 quote!(impl ast::#trait_name for #name {})
@@ -192,8 +201,8 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
192 }) 201 })
193 .unzip(); 202 .unzip();
194 203
195 let enum_names = grammar.enums.iter().map(|it| it.name); 204 let enum_names = grammar.enums.iter().map(|it| &it.name);
196 let node_names = grammar.nodes.iter().map(|it| it.name); 205 let node_names = grammar.nodes.iter().map(|it| &it.name);
197 206
198 let display_impls = 207 let display_impls =
199 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| { 208 enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| {
@@ -212,9 +221,11 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
212 .nodes 221 .nodes
213 .iter() 222 .iter()
214 .map(|kind| to_pascal_case(kind)) 223 .map(|kind| to_pascal_case(kind))
215 .filter(|name| !defined_nodes.contains(name.as_str())) 224 .filter(|name| !defined_nodes.iter().any(|&it| it == name))
216 { 225 {
217 eprintln!("Warning: node {} not defined in ast source", node); 226 drop(node)
227 // TODO: restore this
228 // eprintln!("Warning: node {} not defined in ast source", node);
218 } 229 }
219 230
220 let ast = quote! { 231 let ast = quote! {
@@ -236,12 +247,12 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
236 let mut res = String::with_capacity(ast.len() * 2); 247 let mut res = String::with_capacity(ast.len() * 2);
237 248
238 let mut docs = 249 let mut docs =
239 grammar.nodes.iter().map(|it| it.doc).chain(grammar.enums.iter().map(|it| it.doc)); 250 grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc));
240 251
241 for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") { 252 for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") {
242 res.push_str(chunk); 253 res.push_str(chunk);
243 if let Some(doc) = docs.next() { 254 if let Some(doc) = docs.next() {
244 write_doc_comment(doc, &mut res); 255 write_doc_comment(&doc, &mut res);
245 } 256 }
246 } 257 }
247 258
@@ -249,7 +260,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> {
249 Ok(pretty) 260 Ok(pretty)
250} 261}
251 262
252fn write_doc_comment(contents: &[&str], dest: &mut String) { 263fn write_doc_comment(contents: &[String], dest: &mut String) {
253 for line in contents { 264 for line in contents {
254 writeln!(dest, "///{}", line).unwrap(); 265 writeln!(dest, "///{}", line).unwrap();
255 } 266 }
@@ -296,7 +307,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result<String> {
296 307
297 let ast = quote! { 308 let ast = quote! {
298 #![allow(bad_style, missing_docs, unreachable_pub)] 309 #![allow(bad_style, missing_docs, unreachable_pub)]
299 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. 310 /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`.
300 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] 311 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
301 #[repr(u16)] 312 #[repr(u16)]
302 pub enum SyntaxKind { 313 pub enum SyntaxKind {
@@ -363,6 +374,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result<String> {
363 #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)* 374 #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)*
364 [lifetime] => { $crate::SyntaxKind::LIFETIME }; 375 [lifetime] => { $crate::SyntaxKind::LIFETIME };
365 [ident] => { $crate::SyntaxKind::IDENT }; 376 [ident] => { $crate::SyntaxKind::IDENT };
377 [shebang] => { $crate::SyntaxKind::SHEBANG };
366 } 378 }
367 }; 379 };
368 380
@@ -413,9 +425,13 @@ fn to_pascal_case(s: &str) -> String {
413 buf 425 buf
414} 426}
415 427
416impl Field<'_> { 428fn pluralize(s: &str) -> String {
429 format!("{}s", s)
430}
431
432impl Field {
417 fn is_many(&self) -> bool { 433 fn is_many(&self) -> bool {
418 matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) 434 matches!(self, Field::Node { cardinality: Cardinality::Many, .. })
419 } 435 }
420 fn token_kind(&self) -> Option<proc_macro2::TokenStream> { 436 fn token_kind(&self) -> Option<proc_macro2::TokenStream> {
421 match self { 437 match self {
@@ -429,7 +445,7 @@ impl Field<'_> {
429 fn method_name(&self) -> proc_macro2::Ident { 445 fn method_name(&self) -> proc_macro2::Ident {
430 match self { 446 match self {
431 Field::Token(name) => { 447 Field::Token(name) => {
432 let name = match *name { 448 let name = match name.as_str() {
433 ";" => "semicolon", 449 ";" => "semicolon",
434 "->" => "thin_arrow", 450 "->" => "thin_arrow",
435 "'{'" => "l_curly", 451 "'{'" => "l_curly",
@@ -448,29 +464,257 @@ impl Field<'_> {
448 "." => "dot", 464 "." => "dot",
449 ".." => "dotdot", 465 ".." => "dotdot",
450 "..." => "dotdotdot", 466 "..." => "dotdotdot",
467 "..=" => "dotdoteq",
451 "=>" => "fat_arrow", 468 "=>" => "fat_arrow",
452 "@" => "at", 469 "@" => "at",
453 ":" => "colon", 470 ":" => "colon",
454 "::" => "coloncolon", 471 "::" => "coloncolon",
455 "#" => "pound", 472 "#" => "pound",
456 "?" => "question_mark", 473 "?" => "question_mark",
474 "," => "comma",
457 _ => name, 475 _ => name,
458 }; 476 };
459 format_ident!("{}_token", name) 477 format_ident!("{}_token", name)
460 } 478 }
461 Field::Node { name, src } => match src { 479 Field::Node { name, .. } => {
462 FieldSrc::Shorthand => format_ident!("{}", to_lower_snake_case(name)), 480 if name == "type" {
463 _ => format_ident!("{}", name), 481 format_ident!("ty")
464 }, 482 } else {
483 format_ident!("{}", name)
484 }
485 }
465 } 486 }
466 } 487 }
467 fn ty(&self) -> proc_macro2::Ident { 488 fn ty(&self) -> proc_macro2::Ident {
468 match self { 489 match self {
469 Field::Token(_) => format_ident!("SyntaxToken"), 490 Field::Token(_) => format_ident!("SyntaxToken"),
470 Field::Node { name, src } => match src { 491 Field::Node { ty, .. } => format_ident!("{}", ty),
471 FieldSrc::Optional(ty) | FieldSrc::Many(ty) => format_ident!("{}", ty), 492 }
472 FieldSrc::Shorthand => format_ident!("{}", name), 493 }
473 }, 494}
495
496fn lower(grammar: &Grammar) -> AstSrc {
497 let mut res = AstSrc::default();
498 res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()];
499
500 let nodes = grammar
501 .iter()
502 .filter(|&node| match grammar[node].rule {
503 Rule::Node(it) if it == node => false,
504 _ => true,
505 })
506 .collect::<Vec<_>>();
507
508 for &node in &nodes {
509 let name = grammar[node].name.clone();
510 let rule = &grammar[node].rule;
511 match lower_enum(grammar, rule) {
512 Some(variants) => {
513 let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
514 res.enums.push(enum_src);
515 }
516 None => {
517 let mut fields = Vec::new();
518 lower_rule(&mut fields, grammar, None, rule);
519 res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
520 }
521 }
522 }
523
524 deduplicate_fields(&mut res);
525 extract_enums(&mut res);
526 extract_struct_traits(&mut res);
527 extract_enum_traits(&mut res);
528 res
529}
530
531fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
532 let alternatives = match rule {
533 Rule::Alt(it) => it,
534 _ => return None,
535 };
536 let mut variants = Vec::new();
537 for alternative in alternatives {
538 match alternative {
539 Rule::Node(it) => variants.push(grammar[*it].name.clone()),
540 _ => return None,
541 }
542 }
543 Some(variants)
544}
545
546fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, label: Option<&String>, rule: &Rule) {
547 if lower_comma_list(acc, grammar, label, rule) {
548 return;
549 }
550
551 match rule {
552 Rule::Node(node) => {
553 let ty = grammar[*node].name.clone();
554 let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty));
555 let field = Field::Node { name, ty, cardinality: Cardinality::Optional };
556 acc.push(field);
557 }
558 Rule::Token(token) => {
559 assert!(label.is_none());
560 let mut name = grammar[*token].name.clone();
561 if name != "int_number" && name != "string" {
562 if "[]{}()".contains(&name) {
563 name = format!("'{}'", name);
564 }
565 let field = Field::Token(name);
566 acc.push(field);
567 }
568 }
569 Rule::Rep(inner) => {
570 if let Rule::Node(node) = &**inner {
571 let ty = grammar[*node].name.clone();
572 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
573 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
574 acc.push(field);
575 return;
576 }
577 todo!("{:?}", rule)
578 }
579 Rule::Labeled { label: l, rule } => {
580 assert!(label.is_none());
581 lower_rule(acc, grammar, Some(l), rule);
582 }
583 Rule::Seq(rules) | Rule::Alt(rules) => {
584 for rule in rules {
585 lower_rule(acc, grammar, label, rule)
586 }
474 } 587 }
588 Rule::Opt(rule) => lower_rule(acc, grammar, label, rule),
589 }
590}
591
592// (T (',' T)* ','?)
593fn lower_comma_list(
594 acc: &mut Vec<Field>,
595 grammar: &Grammar,
596 label: Option<&String>,
597 rule: &Rule,
598) -> bool {
599 let rule = match rule {
600 Rule::Seq(it) => it,
601 _ => return false,
602 };
603 let (node, repeat, trailing_comma) = match rule.as_slice() {
604 [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_comma)] => {
605 (node, repeat, trailing_comma)
606 }
607 _ => return false,
608 };
609 let repeat = match &**repeat {
610 Rule::Seq(it) => it,
611 _ => return false,
612 };
613 match repeat.as_slice() {
614 [comma, Rule::Node(n)] if comma == &**trailing_comma && n == node => (),
615 _ => return false,
616 }
617 let ty = grammar[*node].name.clone();
618 let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty)));
619 let field = Field::Node { name, ty, cardinality: Cardinality::Many };
620 acc.push(field);
621 true
622}
623
624fn deduplicate_fields(ast: &mut AstSrc) {
625 for node in &mut ast.nodes {
626 let mut i = 0;
627 'outer: while i < node.fields.len() {
628 for j in 0..i {
629 let f1 = &node.fields[i];
630 let f2 = &node.fields[j];
631 if f1 == f2 {
632 node.fields.remove(i);
633 continue 'outer;
634 }
635 }
636 i += 1;
637 }
638 }
639}
640
641fn extract_enums(ast: &mut AstSrc) {
642 for node in &mut ast.nodes {
643 for enm in &ast.enums {
644 let mut to_remove = Vec::new();
645 for (i, field) in node.fields.iter().enumerate() {
646 let ty = field.ty().to_string();
647 if enm.variants.iter().any(|it| it == &ty) {
648 to_remove.push(i);
649 }
650 }
651 if to_remove.len() == enm.variants.len() {
652 node.remove_field(to_remove);
653 let ty = enm.name.clone();
654 let name = to_lower_snake_case(&ty);
655 node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional });
656 }
657 }
658 }
659}
660
661fn extract_struct_traits(ast: &mut AstSrc) {
662 let traits: &[(&str, &[&str])] = &[
663 ("AttrsOwner", &["attrs"]),
664 ("NameOwner", &["name"]),
665 ("VisibilityOwner", &["visibility"]),
666 ("GenericParamsOwner", &["generic_param_list", "where_clause"]),
667 ("TypeBoundsOwner", &["type_bound_list", "colon_token"]),
668 ("ModuleItemOwner", &["items"]),
669 ("LoopBodyOwner", &["label", "loop_body"]),
670 ("ArgListOwner", &["arg_list"]),
671 ];
672
673 for node in &mut ast.nodes {
674 for (name, methods) in traits {
675 extract_struct_trait(node, name, methods);
676 }
677 }
678}
679
680fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
681 let mut to_remove = Vec::new();
682 for (i, field) in node.fields.iter().enumerate() {
683 let method_name = field.method_name().to_string();
684 if methods.iter().any(|&it| it == &method_name) {
685 to_remove.push(i);
686 }
687 }
688 if to_remove.len() == methods.len() {
689 node.traits.push(trait_name.to_string());
690 node.remove_field(to_remove);
691 }
692}
693
694fn extract_enum_traits(ast: &mut AstSrc) {
695 for enm in &mut ast.enums {
696 let nodes = &ast.nodes;
697 let mut variant_traits = enm
698 .variants
699 .iter()
700 .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
701 .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
702
703 let mut enum_traits = match variant_traits.next() {
704 Some(it) => it,
705 None => continue,
706 };
707 for traits in variant_traits {
708 enum_traits = enum_traits.intersection(&traits).cloned().collect();
709 }
710 enm.traits = enum_traits.into_iter().collect();
711 }
712}
713
714impl AstNodeSrc {
715 fn remove_field(&mut self, to_remove: Vec<usize>) {
716 to_remove.into_iter().rev().for_each(|idx| {
717 self.fields.remove(idx);
718 });
475 } 719 }
476} 720}