From 627eddbc7e5eb13fc17c1c655ee1c3864c6dd4fe Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 29 Jul 2020 11:48:32 +0200 Subject: Owned AST IR --- xtask/src/codegen/gen_syntax.rs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 745a25862..5a18b3e2b 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -9,28 +9,29 @@ use proc_macro2::{Punct, Spacing}; use quote::{format_ident, quote}; use crate::{ - ast_src::{AstSrc, Field, FieldSrc, KindsSrc, AST_SRC, KINDS_SRC}, + ast_src::{rust_ast, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, codegen::{self, update, Mode}, project_root, Result, }; pub fn generate_syntax(mode: Mode) -> Result<()> { + let ast = rust_ast(); let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; let ast_tokens_file = project_root().join(codegen::AST_TOKENS); - let contents = generate_tokens(AST_SRC)?; + let contents = generate_tokens(&ast)?; update(ast_tokens_file.as_path(), &contents, mode)?; let ast_nodes_file = project_root().join(codegen::AST_NODES); - let contents = generate_nodes(KINDS_SRC, AST_SRC)?; + let contents = generate_nodes(KINDS_SRC, &ast)?; update(ast_nodes_file.as_path(), &contents, mode)?; Ok(()) } -fn generate_tokens(grammar: AstSrc<'_>) -> Result { +fn generate_tokens(grammar: &AstSrc) -> Result { let tokens = grammar.tokens.iter().map(|token| { let name = format_ident!("{}", token); let kind = format_ident!("{}", to_upper_snake_case(token)); @@ -62,13 +63,13 @@ fn generate_tokens(grammar: AstSrc<'_>) -> Result { Ok(pretty) } -fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result { +fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result { let (node_defs, node_boilerplate_impls): (Vec<_>, Vec<_>) = grammar .nodes .iter() .map(|node| { let name = format_ident!("{}", node.name); - let kind = format_ident!("{}", to_upper_snake_case(node.name)); + let kind = format_ident!("{}", to_upper_snake_case(&node.name)); let traits = node.traits.iter().map(|trait_name| { let trait_name = format_ident!("{}", trait_name); quote!(impl ast::#trait_name for #name {}) @@ -192,8 +193,8 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result { }) .unzip(); - let enum_names = grammar.enums.iter().map(|it| it.name); - let node_names = grammar.nodes.iter().map(|it| it.name); + let enum_names = grammar.enums.iter().map(|it| &it.name); + let node_names = grammar.nodes.iter().map(|it| &it.name); let display_impls = enum_names.chain(node_names.clone()).map(|it| format_ident!("{}", it)).map(|name| { @@ -212,7 +213,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result { .nodes .iter() .map(|kind| to_pascal_case(kind)) - .filter(|name| !defined_nodes.contains(name.as_str())) + .filter(|name| !defined_nodes.iter().any(|&it| it == name)) { eprintln!("Warning: node {} not defined in ast source", node); } @@ -236,12 +237,12 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result { let mut res = String::with_capacity(ast.len() * 2); let mut docs = - grammar.nodes.iter().map(|it| it.doc).chain(grammar.enums.iter().map(|it| it.doc)); + grammar.nodes.iter().map(|it| &it.doc).chain(grammar.enums.iter().map(|it| &it.doc)); for chunk in ast.split("# [ pretty_doc_comment_placeholder_workaround ]") { res.push_str(chunk); if let Some(doc) = docs.next() { - write_doc_comment(doc, &mut res); + write_doc_comment(&doc, &mut res); } } @@ -249,7 +250,7 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: AstSrc<'_>) -> Result { Ok(pretty) } -fn write_doc_comment(contents: &[&str], dest: &mut String) { +fn write_doc_comment(contents: &[String], dest: &mut String) { for line in contents { writeln!(dest, "///{}", line).unwrap(); } @@ -413,7 +414,7 @@ fn to_pascal_case(s: &str) -> String { buf } -impl Field<'_> { +impl Field { fn is_many(&self) -> bool { matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) } @@ -429,7 +430,7 @@ impl Field<'_> { fn method_name(&self) -> proc_macro2::Ident { match self { Field::Token(name) => { - let name = match *name { + let name = match name.as_str() { ";" => "semicolon", "->" => "thin_arrow", "'{'" => "l_curly", -- cgit v1.2.3 From 3d28292157e1b6c9675ef64eddf53786c3e7dc5f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 29 Jul 2020 15:45:23 +0200 Subject: Switch to ungrammar from ast_src The primary advantage of ungrammar is that it (eventually) allows one to describe concrete syntax tree structure -- with alternatives and specific sequence of tokens & nodes. That should be re-usable for: * generate `make` calls * Rust reference * Hypothetical parser's evented API We loose doc comments for the time being unfortunately. I don't think we should add support for doc comments to ungrammar -- they'll make grammar file hard to read. We might supply docs as out-of band info, or maybe just via a reference, but we'll think about that once things are no longer in flux --- xtask/src/codegen/gen_syntax.rs | 224 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 220 insertions(+), 4 deletions(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 5a18b3e2b..24e8be1fb 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -3,19 +3,27 @@ //! Specifically, it generates the `SyntaxKind` enum and a number of newtype //! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. -use std::{collections::HashSet, fmt::Write}; +use std::{ + collections::{BTreeSet, HashSet}, + fmt::Write, +}; use proc_macro2::{Punct, Spacing}; use quote::{format_ident, quote}; +use ungrammar::{Grammar, Rule}; use crate::{ - ast_src::{rust_ast, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, + ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, codegen::{self, update, Mode}, project_root, Result, }; pub fn generate_syntax(mode: Mode) -> Result<()> { - let ast = rust_ast(); + let grammar = include_str!("rust.ungram") + .parse::() + .unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err)); + let ast = lower(&grammar); + let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); let syntax_kinds = generate_syntax_kinds(KINDS_SRC)?; update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; @@ -215,7 +223,9 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result { .map(|kind| to_pascal_case(kind)) .filter(|name| !defined_nodes.iter().any(|&it| it == name)) { - eprintln!("Warning: node {} not defined in ast source", node); + drop(node) + // TODO: restore this + // eprintln!("Warning: node {} not defined in ast source", node); } let ast = quote! { @@ -414,6 +424,10 @@ fn to_pascal_case(s: &str) -> String { buf } +fn pluralize(s: &str) -> String { + format!("{}s", s) +} + impl Field { fn is_many(&self) -> bool { matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) @@ -449,6 +463,7 @@ impl Field { "." => "dot", ".." => "dotdot", "..." => "dotdotdot", + "..=" => "dotdoteq", "=>" => "fat_arrow", "@" => "at", ":" => "colon", @@ -475,3 +490,204 @@ impl Field { } } } + +fn lower(grammar: &Grammar) -> AstSrc { + let mut res = AstSrc::default(); + res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()]; + + let nodes = grammar + .iter() + .filter(|&node| match grammar[node].rule { + Rule::Node(it) if it == node => false, + _ => true, + }) + .collect::>(); + + for &node in &nodes { + let name = grammar[node].name.clone(); + let rule = &grammar[node].rule; + match lower_enum(grammar, rule) { + Some(variants) => { + let enum_src = AstEnumSrc { doc: Vec::new(), name, traits: Vec::new(), variants }; + res.enums.push(enum_src); + } + None => { + let mut fields = Vec::new(); + lower_rule(&mut fields, grammar, rule); + res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields }); + } + } + } + + deduplicate_fields(&mut res); + extract_enums(&mut res); + extract_struct_traits(&mut res); + extract_enum_traits(&mut res); + res +} + +fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option> { + let alternatives = match rule { + Rule::Alt(it) => it, + _ => return None, + }; + let mut variants = Vec::new(); + for alternative in alternatives { + match alternative { + Rule::Node(it) => variants.push(grammar[*it].name.clone()), + _ => return None, + } + } + Some(variants) +} + +fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { + match rule { + Rule::Node(node) => { + let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand }; + acc.push(field); + } + Rule::Token(token) => { + let mut name = grammar[*token].name.clone(); + if name != "int_number" && name != "string" { + if "[]{}()".contains(&name) { + name = format!("'{}'", name); + } + let field = Field::Token(name); + acc.push(field); + } + } + Rule::Rep(inner) => { + if let Rule::Node(node) = &**inner { + let name = grammar[*node].name.clone(); + let label = pluralize(&to_lower_snake_case(&name)); + let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) }; + acc.push(field); + return; + } + todo!("{:?}", rule) + } + Rule::Labeled { label, rule } => { + let node = match &**rule { + Rule::Rep(inner) | Rule::Opt(inner) => match &**inner { + Rule::Node(node) => node, + _ => todo!("{:?}", rule), + }, + Rule::Node(node) => node, + _ => todo!("{:?}", rule), + }; + let field = Field::Node { + name: label.clone(), + src: match &**rule { + Rule::Rep(_) => FieldSrc::Many(grammar[*node].name.clone()), + _ => FieldSrc::Optional(grammar[*node].name.clone()), + }, + }; + acc.push(field); + } + Rule::Seq(rules) | Rule::Alt(rules) => { + for rule in rules { + lower_rule(acc, grammar, rule) + } + } + Rule::Opt(rule) => lower_rule(acc, grammar, rule), + } +} + +fn deduplicate_fields(ast: &mut AstSrc) { + eprintln!(); + for node in &mut ast.nodes { + let mut i = 0; + 'outer: while i < node.fields.len() { + for j in 0..i { + let f1 = &node.fields[i]; + let f2 = &node.fields[j]; + if f1 == f2 { + node.fields.remove(i); + continue 'outer; + } + } + i += 1; + } + } +} + +fn extract_enums(ast: &mut AstSrc) { + for node in &mut ast.nodes { + for enm in &ast.enums { + let mut to_remove = Vec::new(); + for (i, field) in node.fields.iter().enumerate() { + let ty = field.ty().to_string(); + if enm.variants.iter().any(|it| it == &ty) { + to_remove.push(i); + } + } + if to_remove.len() == enm.variants.len() { + node.remove_field(to_remove); + node.fields.push(Field::Node { name: enm.name.clone(), src: FieldSrc::Shorthand }); + } + } + } +} + +fn extract_struct_traits(ast: &mut AstSrc) { + let traits: &[(&str, &[&str])] = &[ + ("AttrsOwner", &["attrs"]), + ("NameOwner", &["name"]), + ("VisibilityOwner", &["visibility"]), + ("TypeParamsOwner", &["type_param_list", "where_clause"]), + ("TypeBoundsOwner", &["type_bound_list", "colon_token"]), + ("ModuleItemOwner", &["items"]), + ("TypeAscriptionOwner", &["ascribed_type"]), + ("LoopBodyOwner", &["label", "loop_body"]), + ("ArgListOwner", &["arg_list"]), + ]; + + for node in &mut ast.nodes { + for (name, methods) in traits { + extract_struct_trait(node, name, methods); + } + } +} + +fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str]) { + let mut to_remove = Vec::new(); + for (i, field) in node.fields.iter().enumerate() { + let method_name = field.method_name().to_string(); + if methods.iter().any(|&it| it == &method_name) { + to_remove.push(i); + } + } + if to_remove.len() == methods.len() { + node.traits.push(trait_name.to_string()); + node.remove_field(to_remove); + } +} + +fn extract_enum_traits(ast: &mut AstSrc) { + for enm in &mut ast.enums { + let nodes = &ast.nodes; + let mut variant_traits = enm + .variants + .iter() + .map(|var| nodes.iter().find(|it| &it.name == var).unwrap()) + .map(|node| node.traits.iter().cloned().collect::>()); + + let mut enum_traits = match variant_traits.next() { + Some(it) => it, + None => continue, + }; + for traits in variant_traits { + enum_traits = enum_traits.intersection(&traits).cloned().collect(); + } + enm.traits = enum_traits.into_iter().collect(); + } +} + +impl AstNodeSrc { + fn remove_field(&mut self, to_remove: Vec) { + to_remove.into_iter().rev().for_each(|idx| { + self.fields.remove(idx); + }); + } +} -- cgit v1.2.3 From ede5d17b0409f9d5a209aaf16508262dbd2a4489 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 30 Jul 2020 00:27:00 +0200 Subject: Finish SourceFile grammar --- xtask/src/codegen/gen_syntax.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 24e8be1fb..9b49712c1 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -374,6 +374,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result { #([#all_keywords_idents] => { $crate::SyntaxKind::#all_keywords };)* [lifetime] => { $crate::SyntaxKind::LIFETIME }; [ident] => { $crate::SyntaxKind::IDENT }; + [shebang] => { $crate::SyntaxKind::SHEBANG }; } }; @@ -595,7 +596,6 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { } fn deduplicate_fields(ast: &mut AstSrc) { - eprintln!(); for node in &mut ast.nodes { let mut i = 0; 'outer: while i < node.fields.len() { -- cgit v1.2.3 From e381c02ef304fdeafde1c94afd1a10c2085ab716 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 30 Jul 2020 14:06:04 +0200 Subject: Add comma list to use tree --- xtask/src/codegen/gen_syntax.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 9b49712c1..c77fc8a8d 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -543,6 +543,10 @@ fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option> { } fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { + if lower_comma_list(acc, grammar, rule) { + return; + } + match rule { Rule::Node(node) => { let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand }; @@ -595,6 +599,37 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { } } +// (T (',' T)* ','?)? +fn lower_comma_list(acc: &mut Vec, grammar: &Grammar, rule: &Rule) -> bool { + let rule = match rule { + Rule::Opt(it) => it, + _ => return false, + }; + let rule = match &**rule { + Rule::Seq(it) => it, + _ => return false, + }; + let (node, repeat, trailing_comma) = match rule.as_slice() { + [Rule::Node(node), Rule::Rep(repeat), Rule::Opt(trailing_comma)] => { + (node, repeat, trailing_comma) + } + _ => return false, + }; + let repeat = match &**repeat { + Rule::Seq(it) => it, + _ => return false, + }; + match repeat.as_slice() { + [comma, Rule::Node(n)] if comma == &**trailing_comma && n == node => (), + _ => return false, + } + let name = grammar[*node].name.clone(); + let label = pluralize(&to_lower_snake_case(&name)); + let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) }; + acc.push(field); + true +} + fn deduplicate_fields(ast: &mut AstSrc) { for node in &mut ast.nodes { let mut i = 0; -- cgit v1.2.3 From 1142112c70b705f59b7d559d9d72cdc831865158 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 30 Jul 2020 14:51:08 +0200 Subject: Rename FnDef -> Fn --- xtask/src/codegen/gen_syntax.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index c77fc8a8d..072527208 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -471,6 +471,7 @@ impl Field { "::" => "coloncolon", "#" => "pound", "?" => "question_mark", + "," => "comma", _ => name, }; format_ident!("{}_token", name) @@ -599,13 +600,9 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { } } -// (T (',' T)* ','?)? +// (T (',' T)* ','?) fn lower_comma_list(acc: &mut Vec, grammar: &Grammar, rule: &Rule) -> bool { let rule = match rule { - Rule::Opt(it) => it, - _ => return false, - }; - let rule = match &**rule { Rule::Seq(it) => it, _ => return false, }; -- cgit v1.2.3 From 28ef4c375a9f56d69daf885504aea3df7012bb81 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 30 Jul 2020 15:36:21 +0200 Subject: Rename TypeParamList -> GenericParamList --- xtask/src/codegen/gen_syntax.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 072527208..f79cd972e 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -667,7 +667,7 @@ fn extract_struct_traits(ast: &mut AstSrc) { ("AttrsOwner", &["attrs"]), ("NameOwner", &["name"]), ("VisibilityOwner", &["visibility"]), - ("TypeParamsOwner", &["type_param_list", "where_clause"]), + ("GenericParamsOwner", &["generic_param_list", "where_clause"]), ("TypeBoundsOwner", &["type_bound_list", "colon_token"]), ("ModuleItemOwner", &["items"]), ("TypeAscriptionOwner", &["ascribed_type"]), -- cgit v1.2.3 From d549f6164c89bed66432ab5e5ea6e38cc8b4da6b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 30 Jul 2020 17:10:44 +0200 Subject: Simplify codegen --- xtask/src/codegen/gen_syntax.rs | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index f79cd972e..b435d8a9c 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -13,7 +13,7 @@ use quote::{format_ident, quote}; use ungrammar::{Grammar, Rule}; use crate::{ - ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, FieldSrc, KindsSrc, KINDS_SRC}, + ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, KindsSrc, Valence, KINDS_SRC}, codegen::{self, update, Mode}, project_root, Result, }; @@ -431,7 +431,7 @@ fn pluralize(s: &str) -> String { impl Field { fn is_many(&self) -> bool { - matches!(self, Field::Node { src: FieldSrc::Many(_), .. }) + matches!(self, Field::Node { valence: Valence::Many, .. }) } fn token_kind(&self) -> Option { match self { @@ -476,19 +476,13 @@ impl Field { }; format_ident!("{}_token", name) } - Field::Node { name, src } => match src { - FieldSrc::Shorthand => format_ident!("{}", to_lower_snake_case(name)), - _ => format_ident!("{}", name), - }, + Field::Node { name, .. } => format_ident!("{}", name), } } fn ty(&self) -> proc_macro2::Ident { match self { Field::Token(_) => format_ident!("SyntaxToken"), - Field::Node { name, src } => match src { - FieldSrc::Optional(ty) | FieldSrc::Many(ty) => format_ident!("{}", ty), - FieldSrc::Shorthand => format_ident!("{}", name), - }, + Field::Node { ty, .. } => format_ident!("{}", ty), } } } @@ -550,7 +544,9 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { match rule { Rule::Node(node) => { - let field = Field::Node { name: grammar[*node].name.clone(), src: FieldSrc::Shorthand }; + let ty = grammar[*node].name.clone(); + let name = to_lower_snake_case(&ty); + let field = Field::Node { name, ty, valence: Valence::Optional }; acc.push(field); } Rule::Token(token) => { @@ -565,9 +561,9 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { } Rule::Rep(inner) => { if let Rule::Node(node) = &**inner { - let name = grammar[*node].name.clone(); - let label = pluralize(&to_lower_snake_case(&name)); - let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) }; + let ty = grammar[*node].name.clone(); + let name = pluralize(&to_lower_snake_case(&ty)); + let field = Field::Node { name, ty, valence: Valence::Many }; acc.push(field); return; } @@ -582,11 +578,13 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { Rule::Node(node) => node, _ => todo!("{:?}", rule), }; + let ty = grammar[*node].name.clone(); let field = Field::Node { name: label.clone(), - src: match &**rule { - Rule::Rep(_) => FieldSrc::Many(grammar[*node].name.clone()), - _ => FieldSrc::Optional(grammar[*node].name.clone()), + ty, + valence: match &**rule { + Rule::Rep(_) => Valence::Many, + _ => Valence::Optional, }, }; acc.push(field); @@ -620,9 +618,9 @@ fn lower_comma_list(acc: &mut Vec, grammar: &Grammar, rule: &Rule) -> boo [comma, Rule::Node(n)] if comma == &**trailing_comma && n == node => (), _ => return false, } - let name = grammar[*node].name.clone(); - let label = pluralize(&to_lower_snake_case(&name)); - let field = Field::Node { name: label.clone(), src: FieldSrc::Many(name) }; + let ty = grammar[*node].name.clone(); + let name = pluralize(&to_lower_snake_case(&ty)); + let field = Field::Node { name, ty, valence: Valence::Many }; acc.push(field); true } @@ -656,7 +654,9 @@ fn extract_enums(ast: &mut AstSrc) { } if to_remove.len() == enm.variants.len() { node.remove_field(to_remove); - node.fields.push(Field::Node { name: enm.name.clone(), src: FieldSrc::Shorthand }); + let ty = enm.name.clone(); + let name = to_lower_snake_case(&ty); + node.fields.push(Field::Node { name, ty, valence: Valence::Optional }); } } } -- cgit v1.2.3 From b043947301e9c386c9131d7008ee90a315f48545 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 30 Jul 2020 17:19:51 +0200 Subject: Simplify --- xtask/src/codegen/gen_syntax.rs | 58 +++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 34 deletions(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index b435d8a9c..84ddda5cb 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -13,7 +13,7 @@ use quote::{format_ident, quote}; use ungrammar::{Grammar, Rule}; use crate::{ - ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Field, KindsSrc, Valence, KINDS_SRC}, + ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC}, codegen::{self, update, Mode}, project_root, Result, }; @@ -431,7 +431,7 @@ fn pluralize(s: &str) -> String { impl Field { fn is_many(&self) -> bool { - matches!(self, Field::Node { valence: Valence::Many, .. }) + matches!(self, Field::Node { cardinality: Cardinality::Many, .. }) } fn token_kind(&self) -> Option { match self { @@ -509,7 +509,7 @@ fn lower(grammar: &Grammar) -> AstSrc { } None => { let mut fields = Vec::new(); - lower_rule(&mut fields, grammar, rule); + lower_rule(&mut fields, grammar, None, rule); res.nodes.push(AstNodeSrc { doc: Vec::new(), name, traits: Vec::new(), fields }); } } @@ -537,19 +537,20 @@ fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option> { Some(variants) } -fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { - if lower_comma_list(acc, grammar, rule) { +fn lower_rule(acc: &mut Vec, grammar: &Grammar, label: Option<&String>, rule: &Rule) { + if lower_comma_list(acc, grammar, label, rule) { return; } match rule { Rule::Node(node) => { let ty = grammar[*node].name.clone(); - let name = to_lower_snake_case(&ty); - let field = Field::Node { name, ty, valence: Valence::Optional }; + let name = label.cloned().unwrap_or_else(|| to_lower_snake_case(&ty)); + let field = Field::Node { name, ty, cardinality: Cardinality::Optional }; acc.push(field); } Rule::Token(token) => { + assert!(label.is_none()); let mut name = grammar[*token].name.clone(); if name != "int_number" && name != "string" { if "[]{}()".contains(&name) { @@ -562,44 +563,33 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, rule: &Rule) { Rule::Rep(inner) => { if let Rule::Node(node) = &**inner { let ty = grammar[*node].name.clone(); - let name = pluralize(&to_lower_snake_case(&ty)); - let field = Field::Node { name, ty, valence: Valence::Many }; + let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty))); + let field = Field::Node { name, ty, cardinality: Cardinality::Many }; acc.push(field); return; } todo!("{:?}", rule) } - Rule::Labeled { label, rule } => { - let node = match &**rule { - Rule::Rep(inner) | Rule::Opt(inner) => match &**inner { - Rule::Node(node) => node, - _ => todo!("{:?}", rule), - }, - Rule::Node(node) => node, - _ => todo!("{:?}", rule), - }; - let ty = grammar[*node].name.clone(); - let field = Field::Node { - name: label.clone(), - ty, - valence: match &**rule { - Rule::Rep(_) => Valence::Many, - _ => Valence::Optional, - }, - }; - acc.push(field); + Rule::Labeled { label: l, rule } => { + assert!(label.is_none()); + lower_rule(acc, grammar, Some(l), rule); } Rule::Seq(rules) | Rule::Alt(rules) => { for rule in rules { - lower_rule(acc, grammar, rule) + lower_rule(acc, grammar, label, rule) } } - Rule::Opt(rule) => lower_rule(acc, grammar, rule), + Rule::Opt(rule) => lower_rule(acc, grammar, label, rule), } } // (T (',' T)* ','?) -fn lower_comma_list(acc: &mut Vec, grammar: &Grammar, rule: &Rule) -> bool { +fn lower_comma_list( + acc: &mut Vec, + grammar: &Grammar, + label: Option<&String>, + rule: &Rule, +) -> bool { let rule = match rule { Rule::Seq(it) => it, _ => return false, @@ -619,8 +609,8 @@ fn lower_comma_list(acc: &mut Vec, grammar: &Grammar, rule: &Rule) -> boo _ => return false, } let ty = grammar[*node].name.clone(); - let name = pluralize(&to_lower_snake_case(&ty)); - let field = Field::Node { name, ty, valence: Valence::Many }; + let name = label.cloned().unwrap_or_else(|| pluralize(&to_lower_snake_case(&ty))); + let field = Field::Node { name, ty, cardinality: Cardinality::Many }; acc.push(field); true } @@ -656,7 +646,7 @@ fn extract_enums(ast: &mut AstSrc) { node.remove_field(to_remove); let ty = enm.name.clone(); let name = to_lower_snake_case(&ty); - node.fields.push(Field::Node { name, ty, valence: Valence::Optional }); + node.fields.push(Field::Node { name, ty, cardinality: Cardinality::Optional }); } } } -- cgit v1.2.3 From 216a5344c8ef3c3e430d2761dc8b1a7b60250a15 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 30 Jul 2020 17:50:40 +0200 Subject: Rename StructDef -> Struct --- xtask/src/codegen/gen_syntax.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 84ddda5cb..e993a750c 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -307,7 +307,7 @@ fn generate_syntax_kinds(grammar: KindsSrc<'_>) -> Result { let ast = quote! { #![allow(bad_style, missing_docs, unreachable_pub)] - /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. + /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT`. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] #[repr(u16)] pub enum SyntaxKind { -- cgit v1.2.3 From 2e2642efccd5855e4158b01a006e7884a96982bb Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 30 Jul 2020 20:51:43 +0200 Subject: Remove TypeAscriptionOwner --- xtask/src/codegen/gen_syntax.rs | 1 - 1 file changed, 1 deletion(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index e993a750c..45b788bdb 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -660,7 +660,6 @@ fn extract_struct_traits(ast: &mut AstSrc) { ("GenericParamsOwner", &["generic_param_list", "where_clause"]), ("TypeBoundsOwner", &["type_bound_list", "colon_token"]), ("ModuleItemOwner", &["items"]), - ("TypeAscriptionOwner", &["ascribed_type"]), ("LoopBodyOwner", &["label", "loop_body"]), ("ArgListOwner", &["arg_list"]), ]; -- cgit v1.2.3 From 08ea2271e8050165d0aaf4c994ed3dd746aff3ba Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Jul 2020 12:06:38 +0200 Subject: Rename TypeRef -> Type The TypeRef name comes from IntelliJ days, where you often have both type *syntax* as well as *semantical* representation of types in scope. And naming both Type is confusing. In rust-analyzer however, we use ast types as `ast::Type`, and have many more semantic counterparts to ast types, so avoiding name clash here is just confusing. --- xtask/src/codegen/gen_syntax.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 45b788bdb..d6a72ccc0 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -476,7 +476,13 @@ impl Field { }; format_ident!("{}_token", name) } - Field::Node { name, .. } => format_ident!("{}", name), + Field::Node { name, .. } => { + if name == "type" { + format_ident!("ty") + } else { + format_ident!("{}", name) + } + } } } fn ty(&self) -> proc_macro2::Ident { -- cgit v1.2.3 From a7ca6583fbce6f1bddce7b31ad5bb1fc0665b616 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Jul 2020 15:40:48 +0200 Subject: Handwrite Stmt --- xtask/src/codegen/gen_syntax.rs | 51 +++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 25 deletions(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index d6a72ccc0..e3d4269f6 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -153,25 +153,10 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result { quote!(impl ast::#trait_name for #name {}) }); - ( - quote! { - #[pretty_doc_comment_placeholder_workaround] - #[derive(Debug, Clone, PartialEq, Eq, Hash)] - pub enum #name { - #(#variants(#variants),)* - } - - #(#traits)* - }, + let ast_node = if en.name == "Stmt" { + quote! {} + } else { quote! { - #( - impl From<#variants> for #name { - fn from(node: #variants) -> #name { - #name::#variants(node) - } - } - )* - impl AstNode for #name { fn can_cast(kind: SyntaxKind) -> bool { match kind { @@ -196,6 +181,28 @@ fn generate_nodes(kinds: KindsSrc<'_>, grammar: &AstSrc) -> Result { } } } + } + }; + + ( + quote! { + #[pretty_doc_comment_placeholder_workaround] + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub enum #name { + #(#variants(#variants),)* + } + + #(#traits)* + }, + quote! { + #( + impl From<#variants> for #name { + fn from(node: #variants) -> #name { + #name::#variants(node) + } + } + )* + #ast_node }, ) }) @@ -497,13 +504,7 @@ fn lower(grammar: &Grammar) -> AstSrc { let mut res = AstSrc::default(); res.tokens = vec!["Whitespace".into(), "Comment".into(), "String".into(), "RawString".into()]; - let nodes = grammar - .iter() - .filter(|&node| match grammar[node].rule { - Rule::Node(it) if it == node => false, - _ => true, - }) - .collect::>(); + let nodes = grammar.iter().collect::>(); for &node in &nodes { let name = grammar[node].name.clone(); -- cgit v1.2.3 From d4d986c7f850e1f535bb4c22e3a7f7fba5483628 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Jul 2020 15:46:12 +0200 Subject: Item is a Stmt --- xtask/src/codegen/gen_syntax.rs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index e3d4269f6..d9f358513 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -694,6 +694,9 @@ fn extract_struct_trait(node: &mut AstNodeSrc, trait_name: &str, methods: &[&str fn extract_enum_traits(ast: &mut AstSrc) { for enm in &mut ast.enums { + if enm.name == "Stmt" { + continue; + } let nodes = &ast.nodes; let mut variant_traits = enm .variants -- cgit v1.2.3 From bfcee63e75d6feb21cafbdf3887e0efd508b6b2e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Jul 2020 16:52:08 +0200 Subject: Work on expressions grammar --- xtask/src/codegen/gen_syntax.rs | 3 +++ 1 file changed, 3 insertions(+) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index d9f358513..90f746e96 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -579,6 +579,9 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, label: Option<&String>, r } Rule::Labeled { label: l, rule } => { assert!(label.is_none()); + if l == "op" { + return; + } lower_rule(acc, grammar, Some(l), rule); } Rule::Seq(rules) | Rule::Alt(rules) => { -- cgit v1.2.3 From 633aace41108b74fe6c93c5ab04272067db033f9 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Jul 2020 17:08:58 +0200 Subject: Rename LambdaExpr -> ClosureExpr --- xtask/src/codegen/gen_syntax.rs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 90f746e96..059538696 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -579,7 +579,19 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, label: Option<&String>, r } Rule::Labeled { label: l, rule } => { assert!(label.is_none()); - if l == "op" { + let manually_implemented = matches!( + l.as_str(), + "lhs" + | "rhs" + | "then_branch" + | "else_branch" + | "start" + | "end" + | "op" + | "index" + | "base" + ); + if manually_implemented { return; } lower_rule(acc, grammar, Some(l), rule); -- cgit v1.2.3 From 8d28289d0f83225672fc42abcf684364582e73c5 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Jul 2020 18:16:08 +0200 Subject: Specify literal tokens --- xtask/src/codegen/gen_syntax.rs | 1 + 1 file changed, 1 insertion(+) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 059538696..e6231ece2 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -590,6 +590,7 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, label: Option<&String>, r | "op" | "index" | "base" + | "value" ); if manually_implemented { return; -- cgit v1.2.3 From 81359af733f7b13e0bd2196191f2ab294e1b57aa Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 31 Jul 2020 20:22:20 +0200 Subject: Simplify trait gramamr --- xtask/src/codegen/gen_syntax.rs | 2 ++ 1 file changed, 2 insertions(+) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index e6231ece2..4602ff1d7 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -591,6 +591,8 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, label: Option<&String>, r | "index" | "base" | "value" + | "target_type" + | "target_trait" ); if manually_implemented { return; -- cgit v1.2.3 From bff8dd094958f1abe2fcfe8fe9f15dc7a7e6b53e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 1 Aug 2020 13:47:19 +0200 Subject: Update grammar --- xtask/src/codegen/gen_syntax.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'xtask/src/codegen/gen_syntax.rs') diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs index 4602ff1d7..cafad8070 100644 --- a/xtask/src/codegen/gen_syntax.rs +++ b/xtask/src/codegen/gen_syntax.rs @@ -10,7 +10,7 @@ use std::{ use proc_macro2::{Punct, Spacing}; use quote::{format_ident, quote}; -use ungrammar::{Grammar, Rule}; +use ungrammar::{rust_grammar, Grammar, Rule}; use crate::{ ast_src::{AstEnumSrc, AstNodeSrc, AstSrc, Cardinality, Field, KindsSrc, KINDS_SRC}, @@ -19,9 +19,7 @@ use crate::{ }; pub fn generate_syntax(mode: Mode) -> Result<()> { - let grammar = include_str!("rust.ungram") - .parse::() - .unwrap_or_else(|err| panic!("\n \x1b[91merror\x1b[0m: {}\n", err)); + let grammar = rust_grammar(); let ast = lower(&grammar); let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); @@ -538,6 +536,7 @@ fn lower_enum(grammar: &Grammar, rule: &Rule) -> Option> { for alternative in alternatives { match alternative { Rule::Node(it) => variants.push(grammar[*it].name.clone()), + Rule::Token(it) if grammar[*it].name == ";" => (), _ => return None, } } @@ -591,8 +590,8 @@ fn lower_rule(acc: &mut Vec, grammar: &Grammar, label: Option<&String>, r | "index" | "base" | "value" - | "target_type" - | "target_trait" + | "trait" + | "self_ty" ); if manually_implemented { return; -- cgit v1.2.3