diff options
Diffstat (limited to 'xtask/src/codegen/gen_syntax.rs')
-rw-r--r-- | xtask/src/codegen/gen_syntax.rs | 249 |
1 files changed, 233 insertions, 16 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 745a25862..24e8be1fb 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs | |||
@@ -3,34 +3,43 @@ | |||
3 | //! Specifically, it generates the `SyntaxKind` enum and a number of newtype | 3 | //! Specifically, it generates the `SyntaxKind` enum and a number of newtype |
4 | //! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. | 4 | //! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. |
5 | 5 | ||
6 | use std::{collections::HashSet, fmt::Write}; | 6 | use std::{ |
7 | collections::{BTreeSet, HashSet}, | ||
8 | fmt::Write, | ||
9 | }; | ||
7 | 10 | ||
8 | use proc_macro2::{Punct, Spacing}; | 11 | use proc_macro2::{Punct, Spacing}; |
9 | use quote::{format_ident, quote}; | 12 | use quote::{format_ident, quote}; |
13 | use ungrammar::{Grammar, Rule}; | ||
10 | 14 | ||
11 | use crate::{ | 15 | use crate::{ |
12 | ast_src::{AstSrc, Field, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, | 16 | ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, |
13 | codegen::{self, update, Mode}, | 17 | codegen::{self, update, Mode}, |
14 | project_root, Result, | 18 | project_root, Result, |
15 | }; | 19 | }; |
16 | 20 | ||
17 | pub fn generate_syntax(mode: Mode) -> Result<()> { | 21 | pub fn generate_syntax(mode: Mode) -> Result<()> { |
22 | let grammar = include_str!("rust.ungram") | ||
23 | .parse::<Grammar>() | ||
24 | .unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err)); | ||
25 | let ast = lower(&grammar); | ||
26 | |||
18 | let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); | 27 | let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); |
19 | let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; | 28 | let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; |
20 | update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; | 29 | update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; |
21 | 30 | ||
22 | let ast_tokens_file = project_root().join(codegen::AST_TOKENS); | 31 | let ast_tokens_file = project_root().join(codegen::AST_TOKENS); |
23 | let contents = generate_tokens(AST_SRC)?; | 32 | let contents = generate_tokens(&ast)?; |
24 | update(ast_tokens_file.as_path(), &contents, mode)?; | 33 | update(ast_tokens_file.as_path(), &contents, mode)?; |
25 | 34 | ||
26 | let ast_nodes_file = project_root().join(codegen::AST_NODES); | 35 | let ast_nodes_file = project_root().join(codegen::AST_NODES); |
27 | let contents = generate_nodes(KINDS_SRC, AST_SRC)?; | 36 | let contents = generate_nodes(KINDS_SRC, &ast)?; |
28 | update(ast_nodes_file.as_path(), &contents, mode)?; | 37 | update(ast_nodes_file.as_path(), &contents, mode)?; |
29 | 38 | ||
30 | Ok(()) | 39 | Ok(()) |
31 | } | 40 | } |
32 | 41 | ||
33 | fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> { | 42 | fn generate_tokens(grammar: &AstSrc) -> Result<String> { |
34 | let tokens = grammar.tokens.iter().map(|token| { | 43 | let tokens = grammar.tokens.iter().map(|token| { |
35 | let name = format_ident!("{}", token); | 44 | let name = format_ident!("{}", token); |
36 | let kind = format_ident!("{}", to_upper_snake_case(token)); | 45 | let kind = format_ident!("{}", to_upper_snake_case(token)); |
@@ -62,13 +71,13 @@ fn generate_tokens(grammar: AstSrc<'_>) -> Result<String> { | |||
62 | Ok(pretty) | 71 | Ok(pretty) |
63 | } | 72 | } |
64 | 73 | ||
65 | fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { | 74 | fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> { |
66 | let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar | 75 | let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar |
67 | .nodes | 76 | .nodes |
68 | .iter() | 77 | .iter() |
69 | .map(|node| { | 78 | .map(|node| { |
70 | let name = format_ident!("{}", node.name); | 79 | let name = format_ident!("{}", node.name); |
71 | let kind = format_ident!("{}", to_upper_snake_case(node.name)); | 80 | let kind = format_ident!("{}", to_upper_snake_case(&node.name)); |
72 | let traits = node.traits.iter().map(|trait_name| { | 81 | let traits = node.traits.iter().map(|trait_name| { |
73 | let trait_name = format_ident!("{}", trait_name); | 82 | let trait_name = format_ident!("{}", trait_name); |
74 | quote!(impl ast::#trait_name for #name {}) | 83 | quote!(impl ast::#trait_name for #name {}) |
@@ -192,8 +201,8 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { | |||
192 | }) | 201 | }) |
193 | .unzip(); | 202 | .unzip(); |
194 | 203 | ||
195 | let enum_names = grammar.enums.iter().map(|it| it.name); | 204 | let enum_names = grammar.enums.iter().map(|it| &it.name); |
196 | let node_names = grammar.nodes.iter().map(|it| it.name); | 205 | let node_names = grammar.nodes.iter().map(|it| &it.name); |
197 | 206 | ||
198 | let display_impls = | 207 | let display_impls = |
199 | enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| { | 208 | enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| { |
@@ -212,9 +221,11 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { | |||
212 | .nodes | 221 | .nodes |
213 | .iter() | 222 | .iter() |
214 | .map(|kind| to_pascal_case(kind)) | 223 | .map(|kind| to_pascal_case(kind)) |
215 | .filter(|name| !defined_nodes.contains(name.as_str())) | 224 | .filter(|name| !defined_nodes.iter().any(|&it| it == name)) |
216 | { | 225 | { |
217 | eprintln!("Warning: node {} not defined in ast source", node); | 226 | drop(node) |
227 | // TODO: restore this | ||
228 | // eprintln!("Warning: node {} not defined in ast source", node); | ||
218 | } | 229 | } |
219 | 230 | ||
220 | let ast = quote! { | 231 | let ast = quote! { |
@@ -236,12 +247,12 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { | |||
236 | let mut res = String::with_capacity(ast.len() * 2); | 247 | let mut res = String::with_capacity(ast.len() * 2); |
237 | 248 | ||
238 | let mut docs = | 249 | let mut docs = |
239 | grammar.nodes.iter().map(|it| it.doc).chain(grammar.enums.iter().map(|it| it.doc)); | 250 | grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc)); |
240 | 251 | ||
241 | for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") { | 252 | for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") { |
242 | res.push_str(chunk); | 253 | res.push_str(chunk); |
243 | if let Some(doc) = docs.next() { | 254 | if let Some(doc) = docs.next() { |
244 | write_doc_comment(doc, &mut res); | 255 | write_doc_comment(&doc, &mut res); |
245 | } | 256 | } |
246 | } | 257 | } |
247 | 258 | ||
@@ -249,7 +260,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result<String> { | |||
249 | Ok(pretty) | 260 | Ok(pretty) |
250 | } | 261 | } |
251 | 262 | ||
252 | fn write_doc_comment(contents: &[&str], dest: &mut String) { | 263 | fn write_doc_comment(contents: &[String], dest: &mut String) { |
253 | for line in contents { | 264 | for line in contents { |
254 | writeln!(dest, "///{}", line).unwrap(); | 265 | writeln!(dest, "///{}", line).unwrap(); |
255 | } | 266 | } |
@@ -413,7 +424,11 @@ fn to_pascal_case(s: &str) -> String { | |||
413 | buf | 424 | buf |
414 | } | 425 | } |
415 | 426 | ||
416 | impl Field<'_> { | 427 | fn pluralize(s: &str) -> String { |
428 | format!("{}s", s) | ||
429 | } | ||
430 | |||
431 | impl Field { | ||
417 | fn is_many(&self) -> bool { | 432 | fn is_many(&self) -> bool { |
418 | matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) | 433 | matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) |
419 | } | 434 | } |
@@ -429,7 +444,7 @@ impl Field<'_> { | |||
429 | fn method_name(&self) -> proc_macro2::Ident { | 444 | fn method_name(&self) -> proc_macro2::Ident { |
430 | match self { | 445 | match self { |
431 | Field::Token(name) => { | 446 | Field::Token(name) => { |
432 | let name = match *name { | 447 | let name = match name.as_str() { |
433 | ";" => "semicolon", | 448 | ";" => "semicolon", |
434 | "->" => "thin_arrow", | 449 | "->" => "thin_arrow", |
435 | "'{'" => "l_curly", | 450 | "'{'" => "l_curly", |
@@ -448,6 +463,7 @@ impl Field<'_> { | |||
448 | "." => "dot", | 463 | "." => "dot", |
449 | ".." => "dotdot", | 464 | ".." => "dotdot", |
450 | "..." => "dotdotdot", | 465 | "..." => "dotdotdot", |
466 | "..=" => "dotdoteq", | ||
451 | "=>" => "fat_arrow", | 467 | "=>" => "fat_arrow", |
452 | "@" => "at", | 468 | "@" => "at", |
453 | ":" => "colon", | 469 | ":" => "colon", |
@@ -474,3 +490,204 @@ impl Field<'_> { | |||
474 | } | 490 | } |
475 | } | 491 | } |
476 | } | 492 | } |
493 | |||
494 | fn lower(grammar: &Grammar) -> AstSrc { | ||
495 | let mut res = AstSrc::default(); | ||
496 | res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()]; | ||
497 | |||
498 | let nodes = grammar | ||
499 | .iter() | ||
500 | .filter(|&node| match grammar[node].rule { | ||
501 | Rule::Node(it) if it == node => false, | ||
502 | _ => true, | ||
503 | }) | ||
504 | .collect::<Vec<_>>(); | ||
505 | |||
506 | for &node in &nodes { | ||
507 | let name = grammar[node].name.clone(); | ||
508 | let rule = &grammar[node].rule; | ||
509 | match lower_enum(grammar, rule) { | ||
510 | Some(variants) => { | ||
511 | let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants }; | ||
512 | res.enums.push(enum_src); | ||
513 | } | ||
514 | None => { | ||
515 | let mut fields = Vec::new(); | ||
516 | lower_rule(&mut fields, grammar, rule); | ||
517 | res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields }); | ||
518 | } | ||
519 | } | ||
520 | } | ||
521 | |||
522 | deduplicate_fields(&mut res); | ||
523 | extract_enums(&mut res); | ||
524 | extract_struct_traits(&mut res); | ||
525 | extract_enum_traits(&mut res); | ||
526 | res | ||
527 | } | ||
528 | |||
529 | fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> { | ||
530 | let alternatives = match rule { | ||
531 | Rule::Alt(it) => it, | ||
532 | _ => return None, | ||
533 | }; | ||
534 | let mut variants = Vec::new(); | ||
535 | for alternative in alternatives { | ||
536 | match alternative { | ||
537 | Rule::Node(it) => variants.push(grammar[*it].name.clone()), | ||
538 | _ => return None, | ||
539 | } | ||
540 | } | ||
541 | Some(variants) | ||
542 | } | ||
543 | |||
544 | fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, rule: &Rule) { | ||
545 | match rule { | ||
546 | Rule::Node(node) => { | ||
547 | let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand }; | ||
548 | acc.push(field); | ||
549 | } | ||
550 | Rule::Token(token) => { | ||
551 | let mut name = grammar[*token].name.clone(); | ||
552 | if name != "int_number" && name != "string" { | ||
553 | if "[]{}()".contains(&name) { | ||
554 | name = format!("'{}'", name); | ||
555 | } | ||
556 | let field = Field::Token(name); | ||
557 | acc.push(field); | ||
558 | } | ||
559 | } | ||
560 | Rule::Rep(inner) => { | ||
561 | if let Rule::Node(node) = &**inner { | ||
562 | let name = grammar[*node].name.clone(); | ||
563 | let label = pluralize(&to_lower_snake_case(&name)); | ||
564 | let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) }; | ||
565 | acc.push(field); | ||
566 | return; | ||
567 | } | ||
568 | todo!("{:?}", rule) | ||
569 | } | ||
570 | Rule::Labeled { label, rule } => { | ||
571 | let node = match &**rule { | ||
572 | Rule::Rep(inner) | Rule::Opt(inner) => match &**inner { | ||
573 | Rule::Node(node) => node, | ||
574 | _ => todo!("{:?}", rule), | ||
575 | }, | ||
576 | Rule::Node(node) => node, | ||
577 | _ => todo!("{:?}", rule), | ||
578 | }; | ||
579 | let field = Field::Node { | ||
580 | name: label.clone(), | ||
581 | src: match &**rule { | ||
582 | Rule::Rep(_) => FieldSrc::Many(grammar[*node].name.clone()), | ||
583 | _ => FieldSrc::Optional(grammar[*node].name.clone()), | ||
584 | }, | ||
585 | }; | ||
586 | acc.push(field); | ||
587 | } | ||
588 | Rule::Seq(rules) | Rule::Alt(rules) => { | ||
589 | for rule in rules { | ||
590 | lower_rule(acc, grammar, rule) | ||
591 | } | ||
592 | } | ||
593 | Rule::Opt(rule) => lower_rule(acc, grammar, rule), | ||
594 | } | ||
595 | } | ||
596 | |||
597 | fn deduplicate_fields(ast: &mut AstSrc) { | ||
598 | eprintln!(); | ||
599 | for node in &mut ast.nodes { | ||
600 | let mut i = 0; | ||
601 | 'outer: while i < node.fields.len() { | ||
602 | for j in 0..i { | ||
603 | let f1 = &node.fields[i]; | ||
604 | let f2 = &node.fields[j]; | ||
605 | if f1 == f2 { | ||
606 | node.fields.remove(i); | ||
607 | continue 'outer; | ||
608 | } | ||
609 | } | ||
610 | i += 1; | ||
611 | } | ||
612 | } | ||
613 | } | ||
614 | |||
615 | fn extract_enums(ast: &mut AstSrc) { | ||
616 | for node in &mut ast.nodes { | ||
617 | for enm in &ast.enums { | ||
618 | let mut to_remove = Vec::new(); | ||
619 | for (i, field) in node.fields.iter().enumerate() { | ||
620 | let ty = field.ty().to_string(); | ||
621 | if enm.variants.iter().any(|it| it == &ty) { | ||
622 | to_remove.push(i); | ||
623 | } | ||
624 | } | ||
625 | if to_remove.len() == enm.variants.len() { | ||
626 | node.remove_field(to_remove); | ||
627 | node.fields.push(Field::Node { name: enm.name.clone(), src: FieldSrc::Shorthand }); | ||
628 | } | ||
629 | } | ||
630 | } | ||
631 | } | ||
632 | |||
633 | fn extract_struct_traits(ast: &mut AstSrc) { | ||
634 | let traits: &[(&str, &[&str])] = &[ | ||
635 | ("AttrsOwner", &["attrs"]), | ||
636 | ("NameOwner", &["name"]), | ||
637 | ("VisibilityOwner", &["visibility"]), | ||
638 | ("TypeParamsOwner", &["type_param_list", "where_clause"]), | ||
639 | ("TypeBoundsOwner", &["type_bound_list", "colon_token"]), | ||
640 | ("ModuleItemOwner", &["items"]), | ||
641 | ("TypeAscriptionOwner", &["ascribed_type"]), | ||
642 | ("LoopBodyOwner", &["label", "loop_body"]), | ||
643 | ("ArgListOwner", &["arg_list"]), | ||
644 | ]; | ||
645 | |||
646 | for node in &mut ast.nodes { | ||
647 | for (name, methods) in traits { | ||
648 | extract_struct_trait(node, name, methods); | ||
649 | } | ||
650 | } | ||
651 | } | ||
652 | |||
653 | fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) { | ||
654 | let mut to_remove = Vec::new(); | ||
655 | for (i, field) in node.fields.iter().enumerate() { | ||
656 | let method_name = field.method_name().to_string(); | ||
657 | if methods.iter().any(|&it| it == &method_name) { | ||
658 | to_remove.push(i); | ||
659 | } | ||
660 | } | ||
661 | if to_remove.len() == methods.len() { | ||
662 | node.traits.push(trait_name.to_string()); | ||
663 | node.remove_field(to_remove); | ||
664 | } | ||
665 | } | ||
666 | |||
667 | fn extract_enum_traits(ast: &mut AstSrc) { | ||
668 | for enm in &mut ast.enums { | ||
669 | let nodes = &ast.nodes; | ||
670 | let mut variant_traits = enm | ||
671 | .variants | ||
672 | .iter() | ||
673 | .map(|var| nodes.iter().find(|it| &it.name == var).unwrap()) | ||
674 | .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>()); | ||
675 | |||
676 | let mut enum_traits = match variant_traits.next() { | ||
677 | Some(it) => it, | ||
678 | None => continue, | ||
679 | }; | ||
680 | for traits in variant_traits { | ||
681 | enum_traits = enum_traits.intersection(&traits).cloned().collect(); | ||
682 | } | ||
683 | enm.traits = enum_traits.into_iter().collect(); | ||
684 | } | ||
685 | } | ||
686 | |||
687 | impl AstNodeSrc { | ||
688 | fn remove_field(&mut self, to_remove: Vec<usize>) { | ||
689 | to_remove.into_iter().rev().for_each(|idx| { | ||
690 | self.fields.remove(idx); | ||
691 | }); | ||
692 | } | ||
693 | } | ||