diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2020-07-29 18:18:53 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2020-07-29 18:18:53 +0100 |
commit | 2dfda0b984c45946b9a4148bd848350deac544f2 (patch) | |
tree | 1942eeab995b0bd191c6a998b47cb20a426e5dd3 /xtask/src/codegen/gen_syntax.rs | |
parent | 525ae706b3e4c0f5f8b80d197e5fede0a9974442 (diff) | |
parent | 3d28292157e1b6c9675ef64eddf53786c3e7dc5f (diff) |
Merge #5572
5572: Switch to ungrammar from ast_src r=matklad a=matklad
The primary advantage of ungrammar is that it (eventually) allows one
to describe concrete syntax tree structure -- with alternatives and
specific sequence of tokens & nodes.
That should be re-usable for:
* generate `make` calls
* Rust reference
* Hypothetical parser's evented API
We loose doc comments for the time being unfortunately. I don't think
we should add support for doc comments to ungrammar -- they'll make
grammar file hard to read. We might supply docs as out-of band info,
or maybe just via a reference, but we'll think about that once things
are no longer in flux
bors r+
🤖
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'xtask/src/codegen/gen_syntax.rs')
-rw-r--r-- | xtask/src/codegen/gen_syntax.rs | 224 |
1 files changed, 220 insertions, 4 deletions
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 5a18b3e2b..24e8be1fb 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs | |||
@@ -3,19 +3,27 @@ | |||
3 | //! Specifically, it generates the `SyntaxKind` enum and a number of newtype | 3 | //! Specifically, it generates the `SyntaxKind` enum and a number of newtype |
4 | //! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. | 4 | //! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. |
5 | 5 | ||
6 | use std::{collections::HashSet, fmt::Write}; | 6 | use std::{ |
7 | collections::{BTreeSet, HashSet}, | ||
8 | fmt::Write, | ||
9 | }; | ||
7 | 10 | ||
8 | use proc_macro2::{Punct, Spacing}; | 11 | use proc_macro2::{Punct, Spacing}; |
9 | use quote::{format_ident, quote}; | 12 | use quote::{format_ident, quote}; |
13 | use ungrammar::{Grammar, Rule}; | ||
10 | 14 | ||
11 | use crate::{ | 15 | use crate::{ |
12 | ast_src::{rust_ast, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, | 16 | ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, |
13 | codegen::{self, update, Mode}, | 17 | codegen::{self, update, Mode}, |
14 | project_root, Result, | 18 | project_root, Result, |
15 | }; | 19 | }; |
16 | 20 | ||
17 | pub fn generate_syntax(mode: Mode) -> Result<()> { | 21 | pub fn generate_syntax(mode: Mode) -> Result<()> { |
18 | let ast = rust_ast(); | 22 | let grammar = include_str!("rust.ungram") |
23 | .parse::<Grammar>() | ||
24 | .unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err)); | ||
25 | let ast = lower(&grammar); | ||
26 | |||
19 | let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); | 27 | let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); |
20 | let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; | 28 | let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; |
21 | update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; | 29 | update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; |
@@ -215,7 +223,9 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result<String> { | |||
215 | .map(|kind| to_pascal_case(kind)) | 223 | .map(|kind| to_pascal_case(kind)) |
216 | .filter(|name| !defined_nodes.iter().any(|&it| it == name)) | 224 | .filter(|name| !defined_nodes.iter().any(|&it| it == name)) |
217 | { | 225 | { |
218 | eprintln!("Warning: node {} not defined in ast source", node); | 226 | drop(node) |
227 | // TODO: restore this | ||
228 | // eprintln!("Warning: node {} not defined in ast source", node); | ||
219 | } | 229 | } |
220 | 230 | ||
221 | let ast = quote! { | 231 | let ast = quote! { |
@@ -414,6 +424,10 @@ fn to_pascal_case(s: &str) -> String { | |||
414 | buf | 424 | buf |
415 | } | 425 | } |
416 | 426 | ||
427 | fn pluralize(s: &str) -> String { | ||
428 | format!("{}s", s) | ||
429 | } | ||
430 | |||
417 | impl Field { | 431 | impl Field { |
418 | fn is_many(&self) -> bool { | 432 | fn is_many(&self) -> bool { |
419 | matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) | 433 | matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) |
@@ -449,6 +463,7 @@ impl Field { | |||
449 | "." => "dot", | 463 | "." => "dot", |
450 | ".." => "dotdot", | 464 | ".." => "dotdot", |
451 | "..." => "dotdotdot", | 465 | "..." => "dotdotdot", |
466 | "..=" => "dotdoteq", | ||
452 | "=>" => "fat_arrow", | 467 | "=>" => "fat_arrow", |
453 | "@" => "at", | 468 | "@" => "at", |
454 | ":" => "colon", | 469 | ":" => "colon", |
@@ -475,3 +490,204 @@ impl Field { | |||
475 | } | 490 | } |
476 | } | 491 | } |
477 | } | 492 | } |
493 | |||
494 | fn lower(grammar: &Grammar) -> AstSrc { | ||
495 | let mut res = AstSrc::default(); | ||
496 | res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()]; | ||
497 | |||
498 | let nodes = grammar | ||
499 | .iter() | ||
500 | .filter(|&node| match grammar[node].rule { | ||
501 | Rule::Node(it) if it == node => false, | ||
502 | _ => true, | ||
503 | }) | ||
504 | .collect::<Vec<_>>(); | ||
505 | |||
506 | for &node in &nodes { | ||
507 | let name = grammar[node].name.clone(); | ||
508 | let rule = &grammar[node].rule; | ||
509 | match lower_enum(grammar, rule) { | ||
510 | Some(variants) => { | ||
511 | let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants }; | ||
512 | res.enums.push(enum_src); | ||
513 | } | ||
514 | None => { | ||
515 | let mut fields = Vec::new(); | ||
516 | lower_rule(&mut fields, grammar, rule); | ||
517 | res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields }); | ||
518 | } | ||
519 | } | ||
520 | } | ||
521 | |||
522 | deduplicate_fields(&mut res); | ||
523 | extract_enums(&mut res); | ||
524 | extract_struct_traits(&mut res); | ||
525 | extract_enum_traits(&mut res); | ||
526 | res | ||
527 | } | ||
528 | |||
529 | fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option<Vec<String>> { | ||
530 | let alternatives = match rule { | ||
531 | Rule::Alt(it) => it, | ||
532 | _ => return None, | ||
533 | }; | ||
534 | let mut variants = Vec::new(); | ||
535 | for alternative in alternatives { | ||
536 | match alternative { | ||
537 | Rule::Node(it) => variants.push(grammar[*it].name.clone()), | ||
538 | _ => return None, | ||
539 | } | ||
540 | } | ||
541 | Some(variants) | ||
542 | } | ||
543 | |||
544 | fn lower_rule(acc: &mut Vec<Field>, grammar: &Grammar, rule: &Rule) { | ||
545 | match rule { | ||
546 | Rule::Node(node) => { | ||
547 | let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand }; | ||
548 | acc.push(field); | ||
549 | } | ||
550 | Rule::Token(token) => { | ||
551 | let mut name = grammar[*token].name.clone(); | ||
552 | if name != "int_number" && name != "string" { | ||
553 | if "[]{}()".contains(&name) { | ||
554 | name = format!("'{}'", name); | ||
555 | } | ||
556 | let field = Field::Token(name); | ||
557 | acc.push(field); | ||
558 | } | ||
559 | } | ||
560 | Rule::Rep(inner) => { | ||
561 | if let Rule::Node(node) = &**inner { | ||
562 | let name = grammar[*node].name.clone(); | ||
563 | let label = pluralize(&to_lower_snake_case(&name)); | ||
564 | let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) }; | ||
565 | acc.push(field); | ||
566 | return; | ||
567 | } | ||
568 | todo!("{:?}", rule) | ||
569 | } | ||
570 | Rule::Labeled { label, rule } => { | ||
571 | let node = match &**rule { | ||
572 | Rule::Rep(inner) | Rule::Opt(inner) => match &**inner { | ||
573 | Rule::Node(node) => node, | ||
574 | _ => todo!("{:?}", rule), | ||
575 | }, | ||
576 | Rule::Node(node) => node, | ||
577 | _ => todo!("{:?}", rule), | ||
578 | }; | ||
579 | let field = Field::Node { | ||
580 | name: label.clone(), | ||
581 | src: match &**rule { | ||
582 | Rule::Rep(_) => FieldSrc::Many(grammar[*node].name.clone()), | ||
583 | _ => FieldSrc::Optional(grammar[*node].name.clone()), | ||
584 | }, | ||
585 | }; | ||
586 | acc.push(field); | ||
587 | } | ||
588 | Rule::Seq(rules) | Rule::Alt(rules) => { | ||
589 | for rule in rules { | ||
590 | lower_rule(acc, grammar, rule) | ||
591 | } | ||
592 | } | ||
593 | Rule::Opt(rule) => lower_rule(acc, grammar, rule), | ||
594 | } | ||
595 | } | ||
596 | |||
597 | fn deduplicate_fields(ast: &mut AstSrc) { | ||
598 | eprintln!(); | ||
599 | for node in &mut ast.nodes { | ||
600 | let mut i = 0; | ||
601 | 'outer: while i < node.fields.len() { | ||
602 | for j in 0..i { | ||
603 | let f1 = &node.fields[i]; | ||
604 | let f2 = &node.fields[j]; | ||
605 | if f1 == f2 { | ||
606 | node.fields.remove(i); | ||
607 | continue 'outer; | ||
608 | } | ||
609 | } | ||
610 | i += 1; | ||
611 | } | ||
612 | } | ||
613 | } | ||
614 | |||
615 | fn extract_enums(ast: &mut AstSrc) { | ||
616 | for node in &mut ast.nodes { | ||
617 | for enm in &ast.enums { | ||
618 | let mut to_remove = Vec::new(); | ||
619 | for (i, field) in node.fields.iter().enumerate() { | ||
620 | let ty = field.ty().to_string(); | ||
621 | if enm.variants.iter().any(|it| it == &ty) { | ||
622 | to_remove.push(i); | ||
623 | } | ||
624 | } | ||
625 | if to_remove.len() == enm.variants.len() { | ||
626 | node.remove_field(to_remove); | ||
627 | node.fields.push(Field::Node { name: enm.name.clone(), src: FieldSrc::Shorthand }); | ||
628 | } | ||
629 | } | ||
630 | } | ||
631 | } | ||
632 | |||
633 | fn extract_struct_traits(ast: &mut AstSrc) { | ||
634 | let traits: &[(&str, &[&str])] = &[ | ||
635 | ("AttrsOwner", &["attrs"]), | ||
636 | ("NameOwner", &["name"]), | ||
637 | ("VisibilityOwner", &["visibility"]), | ||
638 | ("TypeParamsOwner", &["type_param_list", "where_clause"]), | ||
639 | ("TypeBoundsOwner", &["type_bound_list", "colon_token"]), | ||
640 | ("ModuleItemOwner", &["items"]), | ||
641 | ("TypeAscriptionOwner", &["ascribed_type"]), | ||
642 | ("LoopBodyOwner", &["label", "loop_body"]), | ||
643 | ("ArgListOwner", &["arg_list"]), | ||
644 | ]; | ||
645 | |||
646 | for node in &mut ast.nodes { | ||
647 | for (name, methods) in traits { | ||
648 | extract_struct_trait(node, name, methods); | ||
649 | } | ||
650 | } | ||
651 | } | ||
652 | |||
653 | fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) { | ||
654 | let mut to_remove = Vec::new(); | ||
655 | for (i, field) in node.fields.iter().enumerate() { | ||
656 | let method_name = field.method_name().to_string(); | ||
657 | if methods.iter().any(|&it| it == &method_name) { | ||
658 | to_remove.push(i); | ||
659 | } | ||
660 | } | ||
661 | if to_remove.len() == methods.len() { | ||
662 | node.traits.push(trait_name.to_string()); | ||
663 | node.remove_field(to_remove); | ||
664 | } | ||
665 | } | ||
666 | |||
667 | fn extract_enum_traits(ast: &mut AstSrc) { | ||
668 | for enm in &mut ast.enums { | ||
669 | let nodes = &ast.nodes; | ||
670 | let mut variant_traits = enm | ||
671 | .variants | ||
672 | .iter() | ||
673 | .map(|var| nodes.iter().find(|it| &it.name == var).unwrap()) | ||
674 | .map(|node| node.traits.iter().cloned().collect::<BTreeSet<_>>()); | ||
675 | |||
676 | let mut enum_traits = match variant_traits.next() { | ||
677 | Some(it) => it, | ||
678 | None => continue, | ||
679 | }; | ||
680 | for traits in variant_traits { | ||
681 | enum_traits = enum_traits.intersection(&traits).cloned().collect(); | ||
682 | } | ||
683 | enm.traits = enum_traits.into_iter().collect(); | ||
684 | } | ||
685 | } | ||
686 | |||
687 | impl AstNodeSrc { | ||
688 | fn remove_field(&mut self, to_remove: Vec<usize>) { | ||
689 | to_remove.into_iter().rev().for_each(|idx| { | ||
690 | self.fields.remove(idx); | ||
691 | }); | ||
692 | } | ||
693 | } | ||