From 3d28292157e1b6c9675ef64eddf53786c3e7dc5f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 29 Jul 2020 15:45:23 +0200 Subject: Switch to ungrammar from ast_src The primary advantage of ungrammar is that it (eventually) allows one to describe concrete syntax tree structure -- with alternatives and specific sequence of tokens & nodes. That should be re-usable for: * generate `make` calls * Rust reference * Hypothetical parser's evented API We loose doc comments for the time being unfortunately. I don't think we should add support for doc comments to ungrammar -- they'll make grammar file hard to read. We might supply docs as out-of band info, or maybe just via a reference, but we'll think about that once things are no longer in flux --- xtask/src/codegen/gen_syntax.rs | 224 ++++++++++++++++- xtask/src/codegen/rust.ungram | 529 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 749 insertions(+), 4 deletions(-) create mode 100644 xtask/src/codegen/rust.ungram (limited to 'xtask/src/codegen') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 5a18b3e2b..24e8be1fb 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -3,19 +3,27 @@ //! Specifically, it generates the `SyntaxKind` enum and a number of newtype //! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. -use std::{collections::HashSet, fmt::Write}; +use std::{ + collections::{BTreeSet, HashSet}, + fmt::Write, +}; use proc_macro2::{Punct, Spacing}; use quote::{format_ident, quote}; +use ungrammar::{Grammar, Rule}; use crate::{ - ast_src::{rust_ast, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, + ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, codegen::{self, update, Mode}, project_root, Result, }; pub fn generate_syntax(mode: Mode) -> Result<()> { - let ast = rust_ast(); + let grammar = include_str!("rust.ungram") + .parse::() + .unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err)); + let ast = lower(&grammar); + let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; @@ -215,7 +223,9 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result { .map(|kind| to_pascal_case(kind)) .filter(|name| !defined_nodes.iter().any(|&it| it == name)) { - eprintln!("Warning: node {} not defined in ast source", node); + drop(node) + // TODO: restore this + // eprintln!("Warning: node {} not defined in ast source", node); } let ast = quote! { @@ -414,6 +424,10 @@ fn to_pascal_case(s: &str) -> String { buf } +fn pluralize(s: &str) -> String { + format!("{}s", s) +} + impl Field { fn is_many(&self) -> bool { matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) @@ -449,6 +463,7 @@ impl Field { "." => "dot", ".." => "dotdot", "..." => "dotdotdot", + "..=" => "dotdoteq", "=>" => "fat_arrow", "@" => "at", ":" => "colon", @@ -475,3 +490,204 @@ impl Field { } } } + +fn lower(grammar: &Grammar) -> AstSrc { + let mut res = AstSrc::default(); + res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()]; + + let nodes = grammar + .iter() + .filter(|&node| match grammar[node].rule { + Rule::Node(it) if it == node => false, + _ => true, + }) + .collect::>(); + + for &node in &nodes { + let name = grammar[node].name.clone(); + let rule = &grammar[node].rule; + match lower_enum(grammar, rule) { + Some(variants) => { + let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants }; + res.enums.push(enum_src); + } + None => { + let mut fields = Vec::new(); + lower_rule(&mut fields, grammar, rule); + res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields }); + } + } + } + + deduplicate_fields(&mut res); + extract_enums(&mut res); + extract_struct_traits(&mut res); + extract_enum_traits(&mut res); + res +} + +fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option> { + let alternatives = match rule { + Rule::Alt(it) => it, + _ => return None, + }; + let mut variants = Vec::new(); + for alternative in alternatives { + match alternative { + Rule::Node(it) => variants.push(grammar[*it].name.clone()), + _ => return None, + } + } + Some(variants) +} + +fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { + match rule { + Rule::Node(node) => { + let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand }; + acc.push(field); + } + Rule::Token(token) => { + let mut name = grammar[*token].name.clone(); + if name != "int_number" && name != "string" { + if "[]{}()".contains(&name) { + name = format!("'{}'", name); + } + let field = Field::Token(name); + acc.push(field); + } + } + Rule::Rep(inner) => { + if let Rule::Node(node) = &**inner { + let name = grammar[*node].name.clone(); + let label = pluralize(&to_lower_snake_case(&name)); + let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) }; + acc.push(field); + return; + } + todo!("{:?}", rule) + } + Rule::Labeled { label, rule } => { + let node = match &**rule { + Rule::Rep(inner) | Rule::Opt(inner) => match &**inner { + Rule::Node(node) => node, + _ => todo!("{:?}", rule), + }, + Rule::Node(node) => node, + _ => todo!("{:?}", rule), + }; + let field = Field::Node { + name: label.clone(), + src: match &**rule { + Rule::Rep(_) => FieldSrc::Many(grammar[*node].name.clone()), + _ => FieldSrc::Optional(grammar[*node].name.clone()), + }, + }; + acc.push(field); + } + Rule::Seq(rules) | Rule::Alt(rules) => { + for rule in rules { + lower_rule(acc, grammar, rule) + } + } + Rule::Opt(rule) => lower_rule(acc, grammar, rule), + } +} + +fn deduplicate_fields(ast: &mut AstSrc) { + eprintln!(); + for node in &mut ast.nodes { + let mut i = 0; + 'outer: while i < node.fields.len() { + for j in 0..i { + let f1 = &node.fields[i]; + let f2 = &node.fields[j]; + if f1 == f2 { + node.fields.remove(i); + continue 'outer; + } + } + i += 1; + } + } +} + +fn extract_enums(ast: &mut AstSrc) { + for node in &mut ast.nodes { + for enm in &ast.enums { + let mut to_remove = Vec::new(); + for (i, field) in node.fields.iter().enumerate() { + let ty = field.ty().to_string(); + if enm.variants.iter().any(|it| it == &ty) { + to_remove.push(i); + } + } + if to_remove.len() == enm.variants.len() { + node.remove_field(to_remove); + node.fields.push(Field::Node { name: enm.name.clone(), src: FieldSrc::Shorthand }); + } + } + } +} + +fn extract_struct_traits(ast: &mut AstSrc) { + let traits: &[(&str, &[&str])] = &[ + ("AttrsOwner", &["attrs"]), + ("NameOwner", &["name"]), + ("VisibilityOwner", &["visibility"]), + ("TypeParamsOwner", &["type_param_list", "where_clause"]), + ("TypeBoundsOwner", &["type_bound_list", "colon_token"]), + ("ModuleItemOwner", &["items"]), + ("TypeAscriptionOwner", &["ascribed_type"]), + ("LoopBodyOwner", &["label", "loop_body"]), + ("ArgListOwner", &["arg_list"]), + ]; + + for node in &mut ast.nodes { + for (name, methods) in traits { + extract_struct_trait(node, name, methods); + } + } +} + +fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) { + let mut to_remove = Vec::new(); + for (i, field) in node.fields.iter().enumerate() { + let method_name = field.method_name().to_string(); + if methods.iter().any(|&it| it == &method_name) { + to_remove.push(i); + } + } + if to_remove.len() == methods.len() { + node.traits.push(trait_name.to_string()); + node.remove_field(to_remove); + } +} + +fn extract_enum_traits(ast: &mut AstSrc) { + for enm in &mut ast.enums { + let nodes = &ast.nodes; + let mut variant_traits = enm + .variants + .iter() + .map(|var| nodes.iter().find(|it| &it.name == var).unwrap()) + .map(|node| node.traits.iter().cloned().collect::>()); + + let mut enum_traits = match variant_traits.next() { + Some(it) => it, + None => continue, + }; + for traits in variant_traits { + enum_traits = enum_traits.intersection(&traits).cloned().collect(); + } + enm.traits = enum_traits.into_iter().collect(); + } +} + +impl AstNodeSrc { + fn remove_field(&mut self, to_remove: Vec) { + to_remove.into_iter().rev().for_each(|idx| { + self.fields.remove(idx); + }); + } +} diff --git a/xtask/src/codegen/rust.ungram b/xtask/src/codegen/rust.ungram new file mode 100644 index 000000000..8a3eb7b29 --- /dev/null +++ b/xtask/src/codegen/rust.ungram @@ -0,0 +1,529 @@ +SourceFile = + Attr* + items:ModuleItem* + +FnDef = + Attr* Visibility? Abi? 'const' 'default' 'async' 'unsafe' 'fn' Name TypeParamList? + ParamList RetType? + WhereClause? + (body:BlockExpr | ';') + +RetType = + '->' TypeRef + +StructDef = + Attr* Visibility? 'struct' Name TypeParamList? ( + WhereClause? (RecordFieldDefList | ';') + | TupleFieldDefList WhereClause? ';' + ) + +UnionDef = + Attr* Visibility? 'union' Name TypeParamList? WhereClause? + RecordFieldDefList + +RecordFieldDefList = + '{' fields:RecordFieldDef* '}' + +RecordFieldDef = + Attr* Visibility? Name ':' ascribed_type:TypeRef + +TupleFieldDefList = + '(' fields:TupleFieldDef* ')' + +TupleFieldDef = + Attr* Visibility? Name TypeRef + +FieldDefList = + RecordFieldDefList +| TupleFieldDefList + +EnumDef = + Attr* Visibility? 'enum' Name TypeParamList? WhereClause? + variant_list:EnumVariantList + +EnumVariantList = + '{' variants:EnumVariant* '}' + +EnumVariant = + Attr* Visibility? Name FieldDefList ('=' Expr)? + +TraitDef = + Attr* Visibility? 'unsafe'? 'auto'? 'trait' Name TypeParamList + (':' TypeBoundList?)? WhereClause + ItemList + +Module = + Attr* Visibility? 'mod' Name + (ItemList | ';') + +ItemList = + '{' + AssocItem* + items:ModuleItem* + '}' + +ConstDef = + Attr* Visibility? 'default'? 'const' Name ':' ascribed_type:TypeRef + '=' body:Expr ';' + +StaticDef = + Attr* Visibility? 'static'? 'mut'? 'static' Name ':' ascribed_type:TypeRef + '=' body:Expr ';' + +TypeAliasDef = + Attr* Visibility? 'default'? 'type' Name TypeParamList? WhereClause? (':' TypeBoundList?)? + '=' TypeRef ';' + +ImplDef = + Attr* Visibility? 'const'? 'default'? 'unsafe'? 'impl' TypeParamList? '!'? 'for' + WhereClause? + ItemList + +ParenType = + '(' TypeRef ')' + +TupleType = + '(' fields:TypeRef* ')' + +NeverType = + '!' + +PathType = + Path + +PointerType = + '*' ('const' | 'mut') TypeRef + +ArrayType = + '[' TypeRef ';' Expr ']' + +SliceType = + '[' TypeRef ']' + +ReferenceType = + '&' 'lifetime'? 'mut'? TypeRef + +PlaceholderType = + '_' + +FnPointerType = + Abi 'unsafe'? 'fn' ParamList RetType? + +ForType = + 'for' TypeParamList TypeRef + +ImplTraitType = + 'impl' TypeBoundList + +DynTraitType = + 'dyn' TypeBoundList + +TupleExpr = + Attr* '(' Expr* ')' + +ArrayExpr = + Attr* '[' (Expr* | Expr ';' Expr) ']' + +ParenExpr = + Attr* '(' Expr ')' + +PathExpr = + Path + +LambdaExpr = + Attr* 'static'? 'async'? 'move'? ParamList RetType? + body:Expr + +IfExpr = + Attr* 'if' Condition + +Condition = + 'let' Pat '=' Expr +| Expr + +EffectExpr = + Attr* Label? ('try' | 'unsafe' | 'async') BlockExpr + +LoopExpr = + Attr* Label? 'loop' + loop_body:BlockExpr? + +ForExpr = + Attr* Label? 'for' Pat 'in' iterable:Expr + loop_body:BlockExpr? + +WhileExpr = + Attr* Label? 'while' Condition + loop_body:BlockExpr? + +ContinueExpr = + Attr* 'continue' 'lifetime'? + +BreakExpr = + Attr* 'break' 'lifetime'? Expr? + +Label = + 'lifetime' + +BlockExpr = + Attr* Label + '{' + items:ModuleItem* + statements:Stmt* + Expr? + '}' + +ReturnExpr = + Attr* 'return' Expr + +CallExpr = + Attr* Expr ArgList + +MethodCallExpr = + Attr* Expr '.' NameRef TypeArgList? ArgList + +ArgList = + '(' args:Expr* ')' + +FieldExpr = + Attr* Expr '.' NameRef + +IndexExpr = + Attr* '[' ']' + +AwaitExpr = + Attr* Expr '.' 'await' + +TryExpr = + Attr* Expr '?' + +CastExpr = + Attr* Expr 'as' TypeRef + +RefExpr = + Attr* '&' ('raw' | 'mut' | 'const') Expr + +PrefixExpr = + Attr* Expr + +BoxExpr = + Attr* 'box' Expr + +RangeExpr = + Attr* + +BinExpr = + Attr* + +Literal = + 'int_number' + +MatchExpr = + Attr* 'match' Expr MatchArmList + +MatchArmList = + '{' arms:MatchArm* '}' + +MatchArm = + Attr* Pat guard:MatchGuard? '=>' Expr + +MatchGuard = + 'if' Expr + +RecordLit = + Path RecordFieldList + +RecordFieldList = + '{' + fields:RecordField* + ('..' spread:Expr)? + '}' + +RecordField = + Attr* NameRef (':' Expr)? + +OrPat = + Pat* + +ParenPat = + '(' Pat ')' + +RefPat = + '&' 'mut'? Pat + +BoxPat = + 'box' Path + +BindPat = + Attr* 'ref'? 'mut'? Name ('@' Pat)? + +PlaceholderPat = + '_' + +DotDotPat = + '..' + +PathPat = + Path + +SlicePat = + '[' args:Pat* ']' + +RangePat = + '..' | '..=' + +LiteralPat = + Literal + +MacroPat = + MacroCall + +RecordPat = + Path RecordFieldPatList + +RecordFieldPatList = + '{' + record_field_pats:RecordFieldPat* + BindPat* + '..'? + '}' + +RecordFieldPat = + Attr* NameRef ':' Pat + +TupleStructPat = + Path '(' args:Pat* ')' + +TuplePat = + '(' args:Pat* ')' + +Visibility = + 'pub' ('(' 'super' | 'self' | 'crate' | 'in' Path ')')? + +Name = + 'ident' + +NameRef = + 'ident' | 'int_number' + +MacroCall = + Attr* Path '!' Name? TokenTree ';'? + +MacroDef = + Name TokenTree + +TokenTree = + '(' ')' | '{' '}' | '[' ']' + +MacroItems = + items:ModuleItem* + +MacroStmts = + statements:Stmt* + Expr? + +Attr = + '#' '!'? '[' Path ('=' input:AttrInput)? ']' + +TypeParamList = + '<' + TypeParam* + LifetimeParam* + ConstParam* + '>' + +TypeParam = + Attr* Name (':' TypeBoundList?)? + ('=' default_type:TypeRef)? + +ConstParam = + Attr* 'const' Name ':' ascribed_type:TypeRef + ('=' default_val:Expr)? + +LifetimeParam = + Attr* 'lifetime' + +TypeBound = + 'lifetime' | 'const'? TypeRef + +TypeBoundList = + bounds:TypeBound* + +WherePred = + ('for' TypeParamList)? ('lifetime' | TypeRef) ':' TypeBoundList + +WhereClause = + 'where' predicates:WherePred* + +Abi = + 'string' + +ExprStmt = + Attr* Expr ';' + +LetStmt = + Attr* 'let' Pat (':' ascribed_type:TypeRef) + '=' initializer:Expr ';' + +ParamList = + '(' SelfParam Param* ')' + +SelfParam = + Attr* ('&' 'lifetime'?)? 'mut'? 'self' (':' ascribed_type:TypeRef) + +Param = + Attr* Pat (':' ascribed_type:TypeRef) +| '...' + +UseItem = + Attr* Visibility? 'use' UseTree ';' + +UseTree = + Path ('::' ('*' | UseTreeList)) Alias? + +UseTreeList = + '{' UseTree* '}' + +Alias = + 'as' Name + +ExternCrateItem = + Attr* Visibility? 'extern' 'crate' (NameRef | 'self') Alias? ';' + +Path = + (qualifier:Path '::')? segment:PathSegment + +PathSegment = + '::' | 'crate' | 'self' | 'super' +| '<' NameRef TypeArgList ParamList RetType PathType '>' + +TypeArgList = + '::'? '<' + TypeArg* + LifetimeArg* + AssocTypeArg* + ConstArg* + '>' + +TypeArg = + TypeRef + +AssocTypeArg = + NameRef (':' TypeBoundList | '=' TypeRef) + +LifetimeArg = + 'lifetime' + +ConstArg = + Literal | BlockExpr BlockExpr + +ExternBlock = + Attr* Abi ExternItemList + +ExternItemList = + '{' extern_items:ExternItem* '}' + +MetaItem = + Path '=' AttrInput nested_meta_items:MetaItem* + +NominalDef = + StructDef +| EnumDef +| UnionDef + +TypeRef = + ParenType +| TupleType +| NeverType +| PathType +| PointerType +| ArrayType +| SliceType +| ReferenceType +| PlaceholderType +| FnPointerType +| ForType +| ImplTraitType +| DynTraitType + +AssocItem = + FnDef +| TypeAliasDef +| ConstDef + +ExternItem = + FnDef | StaticDef + +ModuleItem = + StructDef +| UnionDef +| EnumDef +| FnDef +| TraitDef +| TypeAliasDef +| ImplDef +| UseItem +| ExternCrateItem +| ConstDef +| StaticDef +| Module +| MacroCall +| ExternBlock + +AttrInput = + Literal +| TokenTree + +Stmt = + LetStmt +| ExprStmt + +Pat = + OrPat +| ParenPat +| RefPat +| BoxPat +| BindPat +| PlaceholderPat +| DotDotPat +| PathPat +| RecordPat +| TupleStructPat +| TuplePat +| SlicePat +| RangePat +| LiteralPat +| MacroPat + +Expr = + TupleExpr +| ArrayExpr +| ParenExpr +| PathExpr +| LambdaExpr +| IfExpr +| LoopExpr +| ForExpr +| WhileExpr +| ContinueExpr +| BreakExpr +| Label +| BlockExpr +| ReturnExpr +| MatchExpr +| RecordLit +| CallExpr +| IndexExpr +| MethodCallExpr +| FieldExpr +| AwaitExpr +| TryExpr +| EffectExpr +| CastExpr +| RefExpr +| PrefixExpr +| RangeExpr +| BinExpr +| Literal +| MacroCall +| BoxExpr -- cgit v1.2.3