aboutsummaryrefslogtreecommitdiff
path: root/xtask/src/codegen/gen_syntax.rs
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2020-07-29 14:45:23 +0100
committerAleksey Kladov <[email protected]>2020-07-29 18:18:25 +0100
commit3d28292157e1b6c9675ef64eddf53786c3e7dc5f (patch)
tree1942eeab995b0bd191c6a998b47cb20a426e5dd3 /xtask/src/codegen/gen_syntax.rs
parent525ae706b3e4c0f5f8b80d197e5fede0a9974442 (diff)
Switch to ungrammar from ast_src
The primary advantage of ungrammar is that it (eventually) allows one to describe concrete syntax tree structure -- with alternatives and specific sequence of tokens & nodes. That should be re-usable for: * generate `make` calls * Rust reference * Hypothetical parser's evented API We loose doc comments for the time being unfortunately. I don't think we should add support for doc comments to ungrammar -- they'll make grammar file hard to read. We might supply docs as out-of band info, or maybe just via a reference, but we'll think about that once things are no longer in flux
Diffstat (limited to 'xtask/src/codegen/gen_syntax.rs')
-rw-r--r--xtask/src/codegen/gen_syntax.rs224
1 files changed, 220 insertions, 4 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
index 5a18b3e2b..24e8be1fb 100644
--- a/xtask/src/codegen/gen_syntax.rs
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -3,19 +3,27 @@
3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype 3//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. 4//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
5 5
6use std::{collections::HashSet, fmt::Write}; 6use std::{
7 collections::{BTreeSet, HashSet},
8 fmt::Write,
9};
7 10
8use proc_macro2::{Punct, Spacing}; 11use proc_macro2::{Punct, Spacing};
9use quote::{format_ident, quote}; 12use quote::{format_ident, quote};
13use ungrammar::{Grammar, Rule};
10 14
11use crate::{ 15use crate::{
12 ast_src::{rust_ast, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, 16 ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC},
13 codegen::{self, update, Mode}, 17 codegen::{self, update, Mode},
14 project_root, Result, 18 project_root, Result,
15}; 19};
16 20
17pub fn generate_syntax(mode: Mode) -> Result<()> { 21pub fn generate_syntax(mode: Mode) -> Result<()> {
18 let ast = rust_ast(); 22 let grammar = include_str!("rust.ungram")
23 .parse::<Grammar>()
24 .unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err));
25 let ast = lower(&grammar);
26
19 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); 27 let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
20 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; 28 let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?;
21 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; 29 update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
@@ -215,7 +223,9 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> {
215 .map(|kind| to_pascal_case(kind)) 223 .map(|kind| to_pascal_case(kind))
216 .filter(|name| !defined_nodes.iter().any(|&it| it == name)) 224 .filter(|name| !defined_nodes.iter().any(|&it| it == name))
217 { 225 {
218 eprintln!("Warning: node {} not defined in ast source", node); 226 drop(node)
227 // TODO: restore this
228 // eprintln!("Warning: node {} not defined in ast source", node);
219 } 229 }
220 230
221 let ast = quote! { 231 let ast = quote! {
@@ -414,6 +424,10 @@ fn to_pascal_case(s: &str) -> String {
414 buf 424 buf
415} 425}
416 426
427fn pluralize(s: &str) -> String {
428 format!("{}s", s)
429}
430
417impl Field { 431impl Field {
418 fn is_many(&self) -> bool { 432 fn is_many(&self) -> bool {
419 matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) 433 matches!(self, Field::Node { src: FieldSrc::Many(_), .. })
@@ -449,6 +463,7 @@ impl Field {
449 "." => "dot", 463 "." => "dot",
450 ".." => "dotdot", 464 ".." => "dotdot",
451 "..." => "dotdotdot", 465 "..." => "dotdotdot",
466 "..=" => "dotdoteq",
452 "=>" => "fat_arrow", 467 "=>" => "fat_arrow",
453 "@" => "at", 468 "@" => "at",
454 ":" => "colon", 469 ":" => "colon",
@@ -475,3 +490,204 @@ impl Field {
475 } 490 }
476 } 491 }
477} 492}
493
494fn lower(grammar: &Grammar) -> AstSrc {
495 let mut res = AstSrc::default();
496 res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()];
497
498 let nodes = grammar
499 .iter()
500 .filter(|&node| match grammar[node].rule {
501 Rule::Node(it) if it == node => false,
502 _ => true,
503 })
504 .collect::<Vec<_>>();
505
506 for &node in &nodes {
507 let name = grammar[node].name.clone();
508 let rule = &grammar[node].rule;
509 match lower_enum(grammar, rule) {
510 Some(variants) => {
511 let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants };
512 res.enums.push(enum_src);
513 }
514 None => {
515 let mut fields = Vec::new();
516 lower_rule(&mut fields, grammar, rule);
517 res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields });
518 }
519 }
520 }
521
522 deduplicate_fields(&mut res);
523 extract_enums(&mut res);
524 extract_struct_traits(&mut res);
525 extract_enum_traits(&mut res);
526 res
527}
528
529fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> {
530 let alternatives = match rule {
531 Rule::Alt(it) => it,
532 _ => return None,
533 };
534 let mut variants = Vec::new();
535 for alternative in alternatives {
536 match alternative {
537 Rule::Node(it) => variants.push(grammar[*it].name.clone()),
538 _ => return None,
539 }
540 }
541 Some(variants)
542}
543
544fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, rule: &Rule) {
545 match rule {
546 Rule::Node(node) => {
547 let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand };
548 acc.push(field);
549 }
550 Rule::Token(token) => {
551 let mut name = grammar[*token].name.clone();
552 if name != "int_number" && name != "string" {
553 if "[]{}()".contains(&name) {
554 name = format!("'{}'", name);
555 }
556 let field = Field::Token(name);
557 acc.push(field);
558 }
559 }
560 Rule::Rep(inner) => {
561 if let Rule::Node(node) = &**inner {
562 let name = grammar[*node].name.clone();
563 let label = pluralize(&to_lower_snake_case(&name));
564 let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) };
565 acc.push(field);
566 return;
567 }
568 todo!("{:?}", rule)
569 }
570 Rule::Labeled { label, rule } => {
571 let node = match &**rule {
572 Rule::Rep(inner) | Rule::Opt(inner) => match &**inner {
573 Rule::Node(node) => node,
574 _ => todo!("{:?}", rule),
575 },
576 Rule::Node(node) => node,
577 _ => todo!("{:?}", rule),
578 };
579 let field = Field::Node {
580 name: label.clone(),
581 src: match &**rule {
582 Rule::Rep(_) => FieldSrc::Many(grammar[*node].name.clone()),
583 _ => FieldSrc::Optional(grammar[*node].name.clone()),
584 },
585 };
586 acc.push(field);
587 }
588 Rule::Seq(rules) | Rule::Alt(rules) => {
589 for rule in rules {
590 lower_rule(acc, grammar, rule)
591 }
592 }
593 Rule::Opt(rule) => lower_rule(acc, grammar, rule),
594 }
595}
596
597fn deduplicate_fields(ast: &mut AstSrc) {
598 eprintln!();
599 for node in &mut ast.nodes {
600 let mut i = 0;
601 'outer: while i < node.fields.len() {
602 for j in 0..i {
603 let f1 = &node.fields[i];
604 let f2 = &node.fields[j];
605 if f1 == f2 {
606 node.fields.remove(i);
607 continue 'outer;
608 }
609 }
610 i += 1;
611 }
612 }
613}
614
615fn extract_enums(ast: &mut AstSrc) {
616 for node in &mut ast.nodes {
617 for enm in &ast.enums {
618 let mut to_remove = Vec::new();
619 for (i, field) in node.fields.iter().enumerate() {
620 let ty = field.ty().to_string();
621 if enm.variants.iter().any(|it| it == &ty) {
622 to_remove.push(i);
623 }
624 }
625 if to_remove.len() == enm.variants.len() {
626 node.remove_field(to_remove);
627 node.fields.push(Field::Node { name: enm.name.clone(), src: FieldSrc::Shorthand });
628 }
629 }
630 }
631}
632
633fn extract_struct_traits(ast: &mut AstSrc) {
634 let traits: &[(&str, &[&str])] = &[
635 ("AttrsOwner", &["attrs"]),
636 ("NameOwner", &["name"]),
637 ("VisibilityOwner", &["visibility"]),
638 ("TypeParamsOwner", &["type_param_list", "where_clause"]),
639 ("TypeBoundsOwner", &["type_bound_list", "colon_token"]),
640 ("ModuleItemOwner", &["items"]),
641 ("TypeAscriptionOwner", &["ascribed_type"]),
642 ("LoopBodyOwner", &["label", "loop_body"]),
643 ("ArgListOwner", &["arg_list"]),
644 ];
645
646 for node in &mut ast.nodes {
647 for (name, methods) in traits {
648 extract_struct_trait(node, name, methods);
649 }
650 }
651}
652
653fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) {
654 let mut to_remove = Vec::new();
655 for (i, field) in node.fields.iter().enumerate() {
656 let method_name = field.method_name().to_string();
657 if methods.iter().any(|&it| it == &method_name) {
658 to_remove.push(i);
659 }
660 }
661 if to_remove.len() == methods.len() {
662 node.traits.push(trait_name.to_string());
663 node.remove_field(to_remove);
664 }
665}
666
667fn extract_enum_traits(ast: &mut AstSrc) {
668 for enm in &mut ast.enums {
669 let nodes = &ast.nodes;
670 let mut variant_traits = enm
671 .variants
672 .iter()
673 .map(|var| nodes.iter().find(|it| &it.name == var).unwrap())
674 .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>());
675
676 let mut enum_traits = match variant_traits.next() {
677 Some(it) => it,
678 None => continue,
679 };
680 for traits in variant_traits {
681 enum_traits = enum_traits.intersection(&traits).cloned().collect();
682 }
683 enm.traits = enum_traits.into_iter().collect();
684 }
685}
686
687impl AstNodeSrc {
688 fn remove_field(&mut self, to_remove: Vec<usize>) {
689 to_remove.into_iter().rev().for_each(|idx| {
690 self.fields.remove(idx);
691 });
692 }
693}