From b5f13d8d51ef9107363a60b894a741ab596921ce Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 23 Oct 2019 18:13:40 +0300 Subject: xtask: move codegen to a module --- xtask/src/codegen/gen_parser_tests.rs | 150 ++++++++++++++ xtask/src/codegen/gen_syntax.rs | 354 ++++++++++++++++++++++++++++++++++ 2 files changed, 504 insertions(+) create mode 100644 xtask/src/codegen/gen_parser_tests.rs create mode 100644 xtask/src/codegen/gen_syntax.rs (limited to 'xtask/src/codegen') diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs new file mode 100644 index 000000000..e09b6fcfe --- /dev/null +++ b/xtask/src/codegen/gen_parser_tests.rs @@ -0,0 +1,150 @@ +//! This module greps parser's code for specially formatted comments and turnes +//! them into tests. + +use std::{ + collections::HashMap, + fs, + path::{Path, PathBuf}, +}; + +use itertools::Itertools; + +use crate::{ + codegen::{self, update, Mode}, + project_root, Result, +}; + +pub fn generate_parser_tests(mode: Mode) -> Result<()> { + let tests = tests_from_dir(&project_root().join(Path::new(codegen::GRAMMAR_DIR)))?; + fn install_tests(tests: &HashMap, into: &str, mode: Mode) -> Result<()> { + let tests_dir = project_root().join(into); + if !tests_dir.is_dir() { + fs::create_dir_all(&tests_dir)?; + } + // ok is never actually read, but it needs to be specified to create a Test in existing_tests + let existing = existing_tests(&tests_dir, true)?; + for t in existing.keys().filter(|&t| !tests.contains_key(t)) { + panic!("Test is deleted: {}", t); + } + + let mut new_idx = existing.len() + 1; + for (name, test) in tests { + let path = match existing.get(name) { + Some((path, _test)) => path.clone(), + None => { + let file_name = format!("{:04}_{}.rs", new_idx, name); + new_idx += 1; + tests_dir.join(file_name) + } + }; + update(&path, &test.text, mode)?; + } + Ok(()) + } + install_tests(&tests.ok, codegen::OK_INLINE_TESTS_DIR, mode)?; + install_tests(&tests.err, codegen::ERR_INLINE_TESTS_DIR, mode) +} + +#[derive(Debug)] +struct Test { + pub name: String, + pub text: String, + pub ok: bool, +} + +#[derive(Default, Debug)] +struct Tests { + pub ok: HashMap, + pub err: HashMap, +} + +fn collect_tests(s: &str) -> Vec<(usize, Test)> { + let mut res = vec![]; + let prefix = "// "; + let comment_blocks = s + .lines() + .map(str::trim_start) + .enumerate() + .group_by(|(_idx, line)| line.starts_with(prefix)); + + 'outer: for (is_comment, block) in comment_blocks.into_iter() { + if !is_comment { + continue; + } + let mut block = block.map(|(idx, line)| (idx, &line[prefix.len()..])); + + let mut ok = true; + let (start_line, name) = loop { + match block.next() { + Some((idx, line)) if line.starts_with("test ") => { + break (idx, line["test ".len()..].to_string()); + } + Some((idx, line)) if line.starts_with("test_err ") => { + ok = false; + break (idx, line["test_err ".len()..].to_string()); + } + Some(_) => (), + None => continue 'outer, + } + }; + let text: String = + itertools::join(block.map(|(_, line)| line).chain(::std::iter::once("")), "\n"); + assert!(!text.trim().is_empty() && text.ends_with('\n')); + res.push((start_line, Test { name, text, ok })) + } + res +} + +fn tests_from_dir(dir: &Path) -> Result { + let mut res = Tests::default(); + for entry in ::walkdir::WalkDir::new(dir) { + let entry = entry.unwrap(); + if !entry.file_type().is_file() { + continue; + } + if entry.path().extension().unwrap_or_default() != "rs" { + continue; + } + process_file(&mut res, entry.path())?; + } + let grammar_rs = dir.parent().unwrap().join("grammar.rs"); + process_file(&mut res, &grammar_rs)?; + return Ok(res); + fn process_file(res: &mut Tests, path: &Path) -> Result<()> { + let text = fs::read_to_string(path)?; + + for (_, test) in collect_tests(&text) { + if test.ok { + if let Some(old_test) = res.ok.insert(test.name.clone(), test) { + Err(format!("Duplicate test: {}", old_test.name))? + } + } else { + if let Some(old_test) = res.err.insert(test.name.clone(), test) { + Err(format!("Duplicate test: {}", old_test.name))? + } + } + } + Ok(()) + } +} + +fn existing_tests(dir: &Path, ok: bool) -> Result> { + let mut res = HashMap::new(); + for file in fs::read_dir(dir)? { + let file = file?; + let path = file.path(); + if path.extension().unwrap_or_default() != "rs" { + continue; + } + let name = { + let file_name = path.file_name().unwrap().to_str().unwrap(); + file_name[5..file_name.len() - 3].to_string() + }; + let text = fs::read_to_string(&path)?; + let test = Test { name: name.clone(), text, ok }; + if let Some(old) = res.insert(name, (path, test)) { + println!("Duplicate test: {:?}", old); + } + } + Ok(res) +} diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs new file mode 100644 index 000000000..6a81c0e4d --- /dev/null +++ b/xtask/src/codegen/gen_syntax.rs @@ -0,0 +1,354 @@ +//! This module generate AST datatype used by rust-analyzer. +//! +//! Specifically, it generates the `SyntaxKind` enum and a number of newtype +//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`. + +use std::{ + collections::BTreeMap, + fs, + io::Write, + process::{Command, Stdio}, +}; + +use proc_macro2::{Punct, Spacing}; +use quote::{format_ident, quote}; +use ron; +use serde::Deserialize; + +use crate::{ + codegen::{self, update, Mode}, + project_root, Result, +}; + +pub fn generate_syntax(mode: Mode) -> Result<()> { + let grammar = project_root().join(codegen::GRAMMAR); + let grammar: Grammar = { + let text = fs::read_to_string(grammar)?; + ron::de::from_str(&text)? + }; + + let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS); + let syntax_kinds = generate_syntax_kinds(&grammar)?; + update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?; + + let ast_file = project_root().join(codegen::AST); + let ast = generate_ast(&grammar)?; + update(ast_file.as_path(), &ast, mode)?; + + Ok(()) +} + +fn generate_ast(grammar: &Grammar) -> Result { + let nodes = grammar.ast.iter().map(|(name, ast_node)| { + let variants = + ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::>(); + let name = format_ident!("{}", name); + + let adt = if variants.is_empty() { + let kind = format_ident!("{}", to_upper_snake_case(&name.to_string())); + quote! { + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub struct #name { + pub(crate) syntax: SyntaxNode, + } + + impl AstNode for #name { + fn can_cast(kind: SyntaxKind) -> bool { + match kind { + #kind => true, + _ => false, + } + } + fn cast(syntax: SyntaxNode) -> Option { + if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None } + } + fn syntax(&self) -> &SyntaxNode { &self.syntax } + } + } + } else { + let kinds = variants + .iter() + .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string()))) + .collect::>(); + + quote! { + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub enum #name { + #(#variants(#variants),)* + } + + #( + impl From<#variants> for #name { + fn from(node: #variants) -> #name { + #name::#variants(node) + } + } + )* + + impl AstNode for #name { + fn can_cast(kind: SyntaxKind) -> bool { + match kind { + #(#kinds)|* => true, + _ => false, + } + } + fn cast(syntax: SyntaxNode) -> Option { + let res = match syntax.kind() { + #( + #kinds => #name::#variants(#variants { syntax }), + )* + _ => return None, + }; + Some(res) + } + fn syntax(&self) -> &SyntaxNode { + match self { + #( + #name::#variants(it) => &it.syntax, + )* + } + } + } + } + }; + + let traits = ast_node.traits.iter().map(|trait_name| { + let trait_name = format_ident!("{}", trait_name); + quote!(impl ast::#trait_name for #name {}) + }); + + let collections = ast_node.collections.iter().map(|(name, kind)| { + let method_name = format_ident!("{}", name); + let kind = format_ident!("{}", kind); + quote! { + pub fn #method_name(&self) -> AstChildren<#kind> { + AstChildren::new(&self.syntax) + } + } + }); + + let options = ast_node.options.iter().map(|attr| { + let method_name = match attr { + Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)), + Attr::NameType(n, _) => format_ident!("{}", n), + }; + let ty = match attr { + Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t), + }; + quote! { + pub fn #method_name(&self) -> Option<#ty> { + AstChildren::new(&self.syntax).next() + } + } + }); + + quote! { + #adt + + #(#traits)* + + impl #name { + #(#collections)* + #(#options)* + } + } + }); + + let ast = quote! { + use crate::{ + SyntaxNode, SyntaxKind::{self, *}, + ast::{self, AstNode, AstChildren}, + }; + + #(#nodes)* + }; + + let pretty = reformat(ast)?; + Ok(pretty) +} + +fn generate_syntax_kinds(grammar: &Grammar) -> Result { + let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar + .punct + .iter() + .filter(|(token, _name)| token.len() == 1) + .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name))) + .unzip(); + + let punctuation_values = grammar.punct.iter().map(|(token, _name)| { + if "{}[]()".contains(token) { + let c = token.chars().next().unwrap(); + quote! { #c } + } else { + let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint)); + quote! { #(#cs)* } + } + }); + let punctuation = + grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::>(); + + let full_keywords_values = &grammar.keywords; + let full_keywords = + full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw))); + + let all_keywords_values = + grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::>(); + let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw)); + let all_keywords = all_keywords_values + .iter() + .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name))) + .collect::>(); + + let literals = + grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::>(); + + let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::>(); + + let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::>(); + + let ast = quote! { + #![allow(bad_style, missing_docs, unreachable_pub)] + /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. + #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] + #[repr(u16)] + pub enum SyntaxKind { + // Technical SyntaxKinds: they appear temporally during parsing, + // but never end up in the final tree + #[doc(hidden)] + TOMBSTONE, + #[doc(hidden)] + EOF, + #(#punctuation,)* + #(#all_keywords,)* + #(#literals,)* + #(#tokens,)* + #(#nodes,)* + + // Technical kind so that we can cast from u16 safely + #[doc(hidden)] + __LAST, + } + use self::SyntaxKind::*; + + impl SyntaxKind { + pub fn is_keyword(self) -> bool { + match self { + #(#all_keywords)|* => true, + _ => false, + } + } + + pub fn is_punct(self) -> bool { + match self { + #(#punctuation)|* => true, + _ => false, + } + } + + pub fn is_literal(self) -> bool { + match self { + #(#literals)|* => true, + _ => false, + } + } + + pub fn from_keyword(ident: &str) -> Option { + let kw = match ident { + #(#full_keywords_values => #full_keywords,)* + _ => return None, + }; + Some(kw) + } + + pub fn from_char(c: char) -> Option { + let tok = match c { + #(#single_byte_tokens_values => #single_byte_tokens,)* + _ => return None, + }; + Some(tok) + } + } + + #[macro_export] + macro_rules! T { + #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)* + #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)* + } + }; + + reformat(ast) +} + +fn reformat(text: impl std::fmt::Display) -> Result { + let mut rustfmt = Command::new("rustfmt") + .arg("--config-path") + .arg(project_root().join("rustfmt.toml")) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn()?; + write!(rustfmt.stdin.take().unwrap(), "{}", text)?; + let output = rustfmt.wait_with_output()?; + let stdout = String::from_utf8(output.stdout)?; + let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`"; + Ok(format!("//! {}\n\n{}", preamble, stdout)) +} + +#[derive(Deserialize, Debug)] +struct Grammar { + punct: Vec<(String, String)>, + keywords: Vec, + contextual_keywords: Vec, + literals: Vec, + tokens: Vec, + nodes: Vec, + ast: BTreeMap, +} + +#[derive(Deserialize, Debug)] +struct AstNode { + #[serde(default)] + #[serde(rename = "enum")] + variants: Vec, + + #[serde(default)] + traits: Vec, + #[serde(default)] + collections: Vec<(String, String)>, + #[serde(default)] + options: Vec, +} + +#[derive(Deserialize, Debug)] +#[serde(untagged)] +enum Attr { + Type(String), + NameType(String, String), +} + +fn to_upper_snake_case(s: &str) -> String { + let mut buf = String::with_capacity(s.len()); + let mut prev_is_upper = None; + for c in s.chars() { + if c.is_ascii_uppercase() && prev_is_upper == Some(false) { + buf.push('_') + } + prev_is_upper = Some(c.is_ascii_uppercase()); + + buf.push(c.to_ascii_uppercase()); + } + buf +} + +fn to_lower_snake_case(s: &str) -> String { + let mut buf = String::with_capacity(s.len()); + let mut prev_is_upper = None; + for c in s.chars() { + if c.is_ascii_uppercase() && prev_is_upper == Some(false) { + buf.push('_') + } + prev_is_upper = Some(c.is_ascii_uppercase()); + + buf.push(c.to_ascii_lowercase()); + } + buf +} -- cgit v1.2.3 From 6048d294009f0f58593747e0870aa174e29a32af Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 23 Oct 2019 18:24:40 +0300 Subject: xtask: don't depend on itertools xtask should be fast to compiler, as it's a gateway to rust-analyzer --- xtask/src/codegen/gen_parser_tests.rs | 39 ++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 17 deletions(-) (limited to 'xtask/src/codegen') diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs index e09b6fcfe..0f550d948 100644 --- a/xtask/src/codegen/gen_parser_tests.rs +++ b/xtask/src/codegen/gen_parser_tests.rs @@ -7,8 +7,6 @@ use std::{ path::{Path, PathBuf}, }; -use itertools::Itertools; - use crate::{ codegen::{self, update, Mode}, project_root, Result, @@ -61,38 +59,45 @@ struct Tests { fn collect_tests(s: &str) -> Vec<(usize, Test)> { let mut res = vec![]; let prefix = "// "; - let comment_blocks = s - .lines() - .map(str::trim_start) - .enumerate() - .group_by(|(_idx, line)| line.starts_with(prefix)); + let lines = s.lines().map(str::trim_start).enumerate(); - 'outer: for (is_comment, block) in comment_blocks.into_iter() { - if !is_comment { - continue; + let mut block = vec![]; + for (line_idx, line) in lines { + let is_comment = line.starts_with(prefix); + if is_comment { + block.push((line_idx, &line[prefix.len()..])); + } else { + process_block(&mut res, &block); + block.clear(); } - let mut block = block.map(|(idx, line)| (idx, &line[prefix.len()..])); + } + process_block(&mut res, &block); + return res; + fn process_block(acc: &mut Vec<(usize, Test)>, block: &[(usize, &str)]) { + if block.is_empty() { + return; + } let mut ok = true; + let mut block = block.iter(); let (start_line, name) = loop { match block.next() { - Some((idx, line)) if line.starts_with("test ") => { + Some(&(idx, line)) if line.starts_with("test ") => { break (idx, line["test ".len()..].to_string()); } - Some((idx, line)) if line.starts_with("test_err ") => { + Some(&(idx, line)) if line.starts_with("test_err ") => { ok = false; break (idx, line["test_err ".len()..].to_string()); } Some(_) => (), - None => continue 'outer, + None => return, } }; let text: String = - itertools::join(block.map(|(_, line)| line).chain(::std::iter::once("")), "\n"); + block.map(|(_, line)| *line).chain(std::iter::once("")).collect::>().join("\n"); assert!(!text.trim().is_empty() && text.ends_with('\n')); - res.push((start_line, Test { name, text, ok })) + acc.push((start_line, Test { name, text, ok })) } - res } fn tests_from_dir(dir: &Path) -> Result { -- cgit v1.2.3