Merge #2050

2050: xtask: don't depend on itertools r=matklad a=matklad Co-authored-by: Aleksey Kladov <[email protected]>
author: bors[bot] <26634292+bors[bot]@users.noreply.github.com> 2019-10-23 16:57:47 +0100
committer: GitHub <[email protected]> 2019-10-23 16:57:47 +0100
commit: 2197205885f43441f14861f34449426295397dd9 (patch)
tree: 3af21aaefe1efdabafeb5702959e1094504813e7 /xtask/src/codegen
parent: edf4d8e555c6847fb9e6e61d727c4def11789bfc (diff)
parent: 6048d294009f0f58593747e0870aa174e29a32af (diff)
2 files changed, 509 insertions, 0 deletions
diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs
new file mode 100644
index 000000000..0f550d948
--- /dev/null
+++ b/xtask/src/codegen/gen_parser_tests.rs
@@ -0,0 +1,155 @@
+//! This module greps parser's code for specially formatted comments and turnes
+//! them into tests.
+use std::{
+    collections::HashMap,
+    fs,
+    path::{Path, PathBuf},
+};
+use crate::{
+    codegen::{self, update, Mode},
+    project_root, Result,
+};
+pub fn generate_parser_tests(mode: Mode) -> Result<()> {
+    let tests = tests_from_dir(&project_root().join(Path::new(codegen::GRAMMAR_DIR)))?;
+    fn install_tests(tests: &HashMap<String, Test>, into: &str, mode: Mode) -> Result<()> {
+        let tests_dir = project_root().join(into);
+        if !tests_dir.is_dir() {
+            fs::create_dir_all(&tests_dir)?;
+        }
+        // ok is never actually read, but it needs to be specified to create a Test in existing_tests
+        let existing = existing_tests(&tests_dir, true)?;
+        for t in existing.keys().filter(|&t| !tests.contains_key(t)) {
+            panic!("Test is deleted: {}", t);
+        }
+        let mut new_idx = existing.len() + 1;
+        for (name, test) in tests {
+            let path = match existing.get(name) {
+                Some((path, _test)) => path.clone(),
+                None => {
+                    let file_name = format!("{:04}_{}.rs", new_idx, name);
+                    new_idx += 1;
+                    tests_dir.join(file_name)
+                }
+            };
+            update(&path, &test.text, mode)?;
+        }
+        Ok(())
+    }
+    install_tests(&tests.ok, codegen::OK_INLINE_TESTS_DIR, mode)?;
+    install_tests(&tests.err, codegen::ERR_INLINE_TESTS_DIR, mode)
+}
+#[derive(Debug)]
+struct Test {
+    pub name: String,
+    pub text: String,
+    pub ok: bool,
+}
+#[derive(Default, Debug)]
+struct Tests {
+    pub ok: HashMap<String, Test>,
+    pub err: HashMap<String, Test>,
+}
+fn collect_tests(s: &str) -> Vec<(usize, Test)> {
+    let mut res = vec![];
+    let prefix = "// ";
+    let lines = s.lines().map(str::trim_start).enumerate();
+    let mut block = vec![];
+    for (line_idx, line) in lines {
+        let is_comment = line.starts_with(prefix);
+        if is_comment {
+            block.push((line_idx, &line[prefix.len()..]));
+        } else {
+            process_block(&mut res, &block);
+            block.clear();
+        }
+    }
+    process_block(&mut res, &block);
+    return res;
+    fn process_block(acc: &mut Vec<(usize, Test)>, block: &[(usize, &str)]) {
+        if block.is_empty() {
+            return;
+        }
+        let mut ok = true;
+        let mut block = block.iter();
+        let (start_line, name) = loop {
+            match block.next() {
+                Some(&(idx, line)) if line.starts_with("test ") => {
+                    break (idx, line["test ".len()..].to_string());
+                }
+                Some(&(idx, line)) if line.starts_with("test_err ") => {
+                    ok = false;
+                    break (idx, line["test_err ".len()..].to_string());
+                }
+                Some(_) => (),
+                None => return,
+            }
+        };
+        let text: String =
+            block.map(|(_, line)| *line).chain(std::iter::once("")).collect::<Vec<_>>().join("\n");
+        assert!(!text.trim().is_empty() && text.ends_with('\n'));
+        acc.push((start_line, Test { name, text, ok }))
+    }
+}
+fn tests_from_dir(dir: &Path) -> Result<Tests> {
+    let mut res = Tests::default();
+    for entry in ::walkdir::WalkDir::new(dir) {
+        let entry = entry.unwrap();
+        if !entry.file_type().is_file() {
+            continue;
+        }
+        if entry.path().extension().unwrap_or_default() != "rs" {
+            continue;
+        }
+        process_file(&mut res, entry.path())?;
+    }
+    let grammar_rs = dir.parent().unwrap().join("grammar.rs");
+    process_file(&mut res, &grammar_rs)?;
+    return Ok(res);
+    fn process_file(res: &mut Tests, path: &Path) -> Result<()> {
+        let text = fs::read_to_string(path)?;
+        for (_, test) in collect_tests(&text) {
+            if test.ok {
+                if let Some(old_test) = res.ok.insert(test.name.clone(), test) {
+                    Err(format!("Duplicate test: {}", old_test.name))?
+                }
+            } else {
+                if let Some(old_test) = res.err.insert(test.name.clone(), test) {
+                    Err(format!("Duplicate test: {}", old_test.name))?
+                }
+            }
+        }
+        Ok(())
+    }
+}
+fn existing_tests(dir: &Path, ok: bool) -> Result<HashMap<String, (PathBuf, Test)>> {
+    let mut res = HashMap::new();
+    for file in fs::read_dir(dir)? {
+        let file = file?;
+        let path = file.path();
+        if path.extension().unwrap_or_default() != "rs" {
+            continue;
+        }
+        let name = {
+            let file_name = path.file_name().unwrap().to_str().unwrap();
+            file_name[5..file_name.len() - 3].to_string()
+        };
+        let text = fs::read_to_string(&path)?;
+        let test = Test { name: name.clone(), text, ok };
+        if let Some(old) = res.insert(name, (path, test)) {
+            println!("Duplicate test: {:?}", old);
+        }
+    }
+    Ok(res)
+}
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
new file mode 100644
index 000000000..6a81c0e4d
--- /dev/null
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -0,0 +1,354 @@
+//! This module generate AST datatype used by rust-analyzer.
+//!
+//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
+//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
+use std::{
+    collections::BTreeMap,
+    fs,
+    io::Write,
+    process::{Command, Stdio},
+};
+use proc_macro2::{Punct, Spacing};
+use quote::{format_ident, quote};
+use ron;
+use serde::Deserialize;
+use crate::{
+    codegen::{self, update, Mode},
+    project_root, Result,
+};
+pub fn generate_syntax(mode: Mode) -> Result<()> {
+    let grammar = project_root().join(codegen::GRAMMAR);
+    let grammar: Grammar = {
+        let text = fs::read_to_string(grammar)?;
+        ron::de::from_str(&text)?
+    };
+    let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
+    let syntax_kinds = generate_syntax_kinds(&grammar)?;
+    update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
+    let ast_file = project_root().join(codegen::AST);
+    let ast = generate_ast(&grammar)?;
+    update(ast_file.as_path(), &ast, mode)?;
+    Ok(())
+}
+fn generate_ast(grammar: &Grammar) -> Result<String> {
+    let nodes = grammar.ast.iter().map(|(name, ast_node)| {
+        let variants =
+            ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>();
+        let name = format_ident!("{}", name);
+        let adt = if variants.is_empty() {
+            let kind = format_ident!("{}", to_upper_snake_case(&name.to_string()));
+            quote! {
+                #[derive(Debug, Clone, PartialEq, Eq, Hash)]
+                pub struct #name {
+                    pub(crate) syntax: SyntaxNode,
+                }
+                impl AstNode for #name {
+                    fn can_cast(kind: SyntaxKind) -> bool {
+                        match kind {
+                            #kind => true,
+                            _ => false,
+                        }
+                    }
+                    fn cast(syntax: SyntaxNode) -> Option<Self> {
+                        if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
+                    }
+                    fn syntax(&self) -> &SyntaxNode { &self.syntax }
+                }
+            }
+        } else {
+            let kinds = variants
+                .iter()
+                .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
+                .collect::<Vec<_>>();
+            quote! {
+                #[derive(Debug, Clone, PartialEq, Eq, Hash)]
+                pub enum #name {
+                    #(#variants(#variants),)*
+                }
+                #(
+                impl From<#variants> for #name {
+                    fn from(node: #variants) -> #name {
+                        #name::#variants(node)
+                    }
+                }
+                )*
+                impl AstNode for #name {
+                    fn can_cast(kind: SyntaxKind) -> bool {
+                        match kind {
+                            #(#kinds)|* => true,
+                            _ => false,
+                        }
+                    }
+                    fn cast(syntax: SyntaxNode) -> Option<Self> {
+                        let res = match syntax.kind() {
+                            #(
+                            #kinds => #name::#variants(#variants { syntax }),
+                            )*
+                            _ => return None,
+                        };
+                        Some(res)
+                    }
+                    fn syntax(&self) -> &SyntaxNode {
+                        match self {
+                            #(
+                            #name::#variants(it) => &it.syntax,
+                            )*
+                        }
+                    }
+                }
+            }
+        };
+        let traits = ast_node.traits.iter().map(|trait_name| {
+            let trait_name = format_ident!("{}", trait_name);
+            quote!(impl ast::#trait_name for #name {})
+        });
+        let collections = ast_node.collections.iter().map(|(name, kind)| {
+            let method_name = format_ident!("{}", name);
+            let kind = format_ident!("{}", kind);
+            quote! {
+                pub fn #method_name(&self) -> AstChildren<#kind> {
+                    AstChildren::new(&self.syntax)
+                }
+            }
+        });
+        let options = ast_node.options.iter().map(|attr| {
+            let method_name = match attr {
+                Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)),
+                Attr::NameType(n, _) => format_ident!("{}", n),
+            };
+            let ty = match attr {
+                Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t),
+            };
+            quote! {
+                pub fn #method_name(&self) -> Option<#ty> {
+                    AstChildren::new(&self.syntax).next()
+                }
+            }
+        });
+        quote! {
+            #adt
+            #(#traits)*
+            impl #name {
+                #(#collections)*
+                #(#options)*
+            }
+        }
+    });
+    let ast = quote! {
+        use crate::{
+            SyntaxNode, SyntaxKind::{self, *},
+            ast::{self, AstNode, AstChildren},
+        };
+        #(#nodes)*
+    };
+    let pretty = reformat(ast)?;
+    Ok(pretty)
+}
+fn generate_syntax_kinds(grammar: &Grammar) -> Result<String> {
+    let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
+        .punct
+        .iter()
+        .filter(|(token, _name)| token.len() == 1)
+        .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
+        .unzip();
+    let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
+        if "{}[]()".contains(token) {
+            let c = token.chars().next().unwrap();
+            quote! { #c }
+        } else {
+            let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
+            quote! { #(#cs)* }
+        }
+    });
+    let punctuation =
+        grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
+    let full_keywords_values = &grammar.keywords;
+    let full_keywords =
+        full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw)));
+    let all_keywords_values =
+        grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
+    let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw));
+    let all_keywords = all_keywords_values
+        .iter()
+        .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name)))
+        .collect::<Vec<_>>();
+    let literals =
+        grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
+    let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
+    let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
+    let ast = quote! {
+        #![allow(bad_style, missing_docs, unreachable_pub)]
+        /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
+        #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
+        #[repr(u16)]
+        pub enum SyntaxKind {
+            // Technical SyntaxKinds: they appear temporally during parsing,
+            // but never end up in the final tree
+            #[doc(hidden)]
+            TOMBSTONE,
+            #[doc(hidden)]
+            EOF,
+            #(#punctuation,)*
+            #(#all_keywords,)*
+            #(#literals,)*
+            #(#tokens,)*
+            #(#nodes,)*
+            // Technical kind so that we can cast from u16 safely
+            #[doc(hidden)]
+            __LAST,
+        }
+        use self::SyntaxKind::*;
+        impl SyntaxKind {
+            pub fn is_keyword(self) -> bool {
+                match self {
+                    #(#all_keywords)|* => true,
+                    _ => false,
+                }
+            }
+            pub fn is_punct(self) -> bool {
+                match self {
+                    #(#punctuation)|* => true,
+                    _ => false,
+                }
+            }
+            pub fn is_literal(self) -> bool {
+                match self {
+                    #(#literals)|* => true,
+                    _ => false,
+                }
+            }
+            pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
+                let kw = match ident {
+                    #(#full_keywords_values => #full_keywords,)*
+                    _ => return None,
+                };
+                Some(kw)
+            }
+            pub fn from_char(c: char) -> Option<SyntaxKind> {
+                let tok = match c {
+                    #(#single_byte_tokens_values => #single_byte_tokens,)*
+                    _ => return None,
+                };
+                Some(tok)
+            }
+        }
+        #[macro_export]
+        macro_rules! T {
+            #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)*
+            #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)*
+        }
+    };
+    reformat(ast)
+}
+fn reformat(text: impl std::fmt::Display) -> Result<String> {
+    let mut rustfmt = Command::new("rustfmt")
+        .arg("--config-path")
+        .arg(project_root().join("rustfmt.toml"))
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()?;
+    write!(rustfmt.stdin.take().unwrap(), "{}", text)?;
+    let output = rustfmt.wait_with_output()?;
+    let stdout = String::from_utf8(output.stdout)?;
+    let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`";
+    Ok(format!("//! {}\n\n{}", preamble, stdout))
+}
+#[derive(Deserialize, Debug)]
+struct Grammar {
+    punct: Vec<(String, String)>,
+    keywords: Vec<String>,
+    contextual_keywords: Vec<String>,
+    literals: Vec<String>,
+    tokens: Vec<String>,
+    nodes: Vec<String>,
+    ast: BTreeMap<String, AstNode>,
+}
+#[derive(Deserialize, Debug)]
+struct AstNode {
+    #[serde(default)]
+    #[serde(rename = "enum")]
+    variants: Vec<String>,
+    #[serde(default)]
+    traits: Vec<String>,
+    #[serde(default)]
+    collections: Vec<(String, String)>,
+    #[serde(default)]
+    options: Vec<Attr>,
+}
+#[derive(Deserialize, Debug)]
+#[serde(untagged)]
+enum Attr {
+    Type(String),
+    NameType(String, String),
+}
+fn to_upper_snake_case(s: &str) -> String {
+    let mut buf = String::with_capacity(s.len());
+    let mut prev_is_upper = None;
+    for c in s.chars() {
+        if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
+            buf.push('_')
+        }
+        prev_is_upper = Some(c.is_ascii_uppercase());
+        buf.push(c.to_ascii_uppercase());
+    }
+    buf
+}
+fn to_lower_snake_case(s: &str) -> String {
+    let mut buf = String::with_capacity(s.len());
+    let mut prev_is_upper = None;
+    for c in s.chars() {
+        if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
+            buf.push('_')
+        }
+        prev_is_upper = Some(c.is_ascii_uppercase());
+        buf.push(c.to_ascii_lowercase());
+    }
+    buf
+}
author	bors[bot] <26634292+bors[bot]@users.noreply.github.com>	2019-10-23 16:57:47 +0100
committer	GitHub <[email protected]>	2019-10-23 16:57:47 +0100
commit	2197205885f43441f14861f34449426295397dd9 (patch)
tree	3af21aaefe1efdabafeb5702959e1094504813e7 /xtask/src/codegen
parent	edf4d8e555c6847fb9e6e61d727c4def11789bfc (diff)
parent	6048d294009f0f58593747e0870aa174e29a32af (diff)

diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs new file mode 100644 index 000000000..0f550d948 --- /dev/null +++ b/xtask/src/codegen/gen_parser_tests.rs
@@ -0,0 +1,155 @@
	1	//! This module greps parser's code for specially formatted comments and turnes
	2	//! them into tests.
	3
	4	use std::{
	5	collections::HashMap,
	6	fs,
	7	path::{Path, PathBuf},
	8	};
	9
	10	use crate::{
	11	codegen::{self, update, Mode},
	12	project_root, Result,
	13	};
	14
	15	pub fn generate_parser_tests(mode: Mode) -> Result<()> {
	16	let tests = tests_from_dir(&project_root().join(Path::new(codegen::GRAMMAR_DIR)))?;
	17	fn install_tests(tests: &HashMap<String, Test>, into: &str, mode: Mode) -> Result<()> {
	18	let tests_dir = project_root().join(into);
	19	if !tests_dir.is_dir() {
	20	fs::create_dir_all(&tests_dir)?;
	21	}
	22	// ok is never actually read, but it needs to be specified to create a Test in existing_tests
	23	let existing = existing_tests(&tests_dir, true)?;
	24	for t in existing.keys().filter(\|&t\| !tests.contains_key(t)) {
	25	panic!("Test is deleted: {}", t);
	26	}
	27
	28	let mut new_idx = existing.len() + 1;
	29	for (name, test) in tests {
	30	let path = match existing.get(name) {
	31	Some((path, _test)) => path.clone(),
	32	None => {
	33	let file_name = format!("{:04}_{}.rs", new_idx, name);
	34	new_idx += 1;
	35	tests_dir.join(file_name)
	36	}
	37	};
	38	update(&path, &test.text, mode)?;
	39	}
	40	Ok(())
	41	}
	42	install_tests(&tests.ok, codegen::OK_INLINE_TESTS_DIR, mode)?;
	43	install_tests(&tests.err, codegen::ERR_INLINE_TESTS_DIR, mode)
	44	}
	45
	46	#[derive(Debug)]
	47	struct Test {
	48	pub name: String,
	49	pub text: String,
	50	pub ok: bool,
	51	}
	52
	53	#[derive(Default, Debug)]
	54	struct Tests {
	55	pub ok: HashMap<String, Test>,
	56	pub err: HashMap<String, Test>,
	57	}
	58
	59	fn collect_tests(s: &str) -> Vec<(usize, Test)> {
	60	let mut res = vec![];
	61	let prefix = "// ";
	62	let lines = s.lines().map(str::trim_start).enumerate();
	63
	64	let mut block = vec![];
	65	for (line_idx, line) in lines {
	66	let is_comment = line.starts_with(prefix);
	67	if is_comment {
	68	block.push((line_idx, &line[prefix.len()..]));
	69	} else {
	70	process_block(&mut res, &block);
	71	block.clear();
	72	}
	73	}
	74	process_block(&mut res, &block);
	75	return res;
	76
	77	fn process_block(acc: &mut Vec<(usize, Test)>, block: &[(usize, &str)]) {
	78	if block.is_empty() {
	79	return;
	80	}
	81	let mut ok = true;
	82	let mut block = block.iter();
	83	let (start_line, name) = loop {
	84	match block.next() {
	85	Some(&(idx, line)) if line.starts_with("test ") => {
	86	break (idx, line["test ".len()..].to_string());
	87	}
	88	Some(&(idx, line)) if line.starts_with("test_err ") => {
	89	ok = false;
	90	break (idx, line["test_err ".len()..].to_string());
	91	}
	92	Some(_) => (),
	93	None => return,
	94	}
	95	};
	96	let text: String =
	97	block.map(\|(_, line)\| *line).chain(std::iter::once("")).collect::<Vec<_>>().join("\n");
	98	assert!(!text.trim().is_empty() && text.ends_with('\n'));
	99	acc.push((start_line, Test { name, text, ok }))
	100	}
	101	}
	102
	103	fn tests_from_dir(dir: &Path) -> Result<Tests> {
	104	let mut res = Tests::default();
	105	for entry in ::walkdir::WalkDir::new(dir) {
	106	let entry = entry.unwrap();
	107	if !entry.file_type().is_file() {
	108	continue;
	109	}
	110	if entry.path().extension().unwrap_or_default() != "rs" {
	111	continue;
	112	}
	113	process_file(&mut res, entry.path())?;
	114	}
	115	let grammar_rs = dir.parent().unwrap().join("grammar.rs");
	116	process_file(&mut res, &grammar_rs)?;
	117	return Ok(res);
	118	fn process_file(res: &mut Tests, path: &Path) -> Result<()> {
	119	let text = fs::read_to_string(path)?;
	120
	121	for (_, test) in collect_tests(&text) {
	122	if test.ok {
	123	if let Some(old_test) = res.ok.insert(test.name.clone(), test) {
	124	Err(format!("Duplicate test: {}", old_test.name))?
	125	}
	126	} else {
	127	if let Some(old_test) = res.err.insert(test.name.clone(), test) {
	128	Err(format!("Duplicate test: {}", old_test.name))?
	129	}
	130	}
	131	}
	132	Ok(())
	133	}
	134	}
	135
	136	fn existing_tests(dir: &Path, ok: bool) -> Result<HashMap<String, (PathBuf, Test)>> {
	137	let mut res = HashMap::new();
	138	for file in fs::read_dir(dir)? {
	139	let file = file?;
	140	let path = file.path();
	141	if path.extension().unwrap_or_default() != "rs" {
	142	continue;
	143	}
	144	let name = {
	145	let file_name = path.file_name().unwrap().to_str().unwrap();
	146	file_name[5..file_name.len() - 3].to_string()
	147	};
	148	let text = fs::read_to_string(&path)?;
	149	let test = Test { name: name.clone(), text, ok };
	150	if let Some(old) = res.insert(name, (path, test)) {
	151	println!("Duplicate test: {:?}", old);
	152	}
	153	}
	154	Ok(res)
	155	}


diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs new file mode 100644 index 000000000..6a81c0e4d --- /dev/null +++ b/xtask/src/codegen/gen_syntax.rs
@@ -0,0 +1,354 @@
	1	//! This module generate AST datatype used by rust-analyzer.
	2	//!
	3	//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
	4	//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
	5
	6	use std::{
	7	collections::BTreeMap,
	8	fs,
	9	io::Write,
	10	process::{Command, Stdio},
	11	};
	12
	13	use proc_macro2::{Punct, Spacing};
	14	use quote::{format_ident, quote};
	15	use ron;
	16	use serde::Deserialize;
	17
	18	use crate::{
	19	codegen::{self, update, Mode},
	20	project_root, Result,
	21	};
	22
	23	pub fn generate_syntax(mode: Mode) -> Result<()> {
	24	let grammar = project_root().join(codegen::GRAMMAR);
	25	let grammar: Grammar = {
	26	let text = fs::read_to_string(grammar)?;
	27	ron::de::from_str(&text)?
	28	};
	29
	30	let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
	31	let syntax_kinds = generate_syntax_kinds(&grammar)?;
	32	update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
	33
	34	let ast_file = project_root().join(codegen::AST);
	35	let ast = generate_ast(&grammar)?;
	36	update(ast_file.as_path(), &ast, mode)?;
	37
	38	Ok(())
	39	}
	40
	41	fn generate_ast(grammar: &Grammar) -> Result<String> {
	42	let nodes = grammar.ast.iter().map(\|(name, ast_node)\| {
	43	let variants =
	44	ast_node.variants.iter().map(\|var\| format_ident!("{}", var)).collect::<Vec<_>>();
	45	let name = format_ident!("{}", name);
	46
	47	let adt = if variants.is_empty() {
	48	let kind = format_ident!("{}", to_upper_snake_case(&name.to_string()));
	49	quote! {
	50	#[derive(Debug, Clone, PartialEq, Eq, Hash)]
	51	pub struct #name {
	52	pub(crate) syntax: SyntaxNode,
	53	}
	54
	55	impl AstNode for #name {
	56	fn can_cast(kind: SyntaxKind) -> bool {
	57	match kind {
	58	#kind => true,
	59	_ => false,
	60	}
	61	}
	62	fn cast(syntax: SyntaxNode) -> Option<Self> {
	63	if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
	64	}
	65	fn syntax(&self) -> &SyntaxNode { &self.syntax }
	66	}
	67	}
	68	} else {
	69	let kinds = variants
	70	.iter()
	71	.map(\|name\| format_ident!("{}", to_upper_snake_case(&name.to_string())))
	72	.collect::<Vec<_>>();
	73
	74	quote! {
	75	#[derive(Debug, Clone, PartialEq, Eq, Hash)]
	76	pub enum #name {
	77	#(#variants(#variants),)*
	78	}
	79
	80	#(
	81	impl From<#variants> for #name {
	82	fn from(node: #variants) -> #name {
	83	#name::#variants(node)
	84	}
	85	}
	86	)*
	87
	88	impl AstNode for #name {
	89	fn can_cast(kind: SyntaxKind) -> bool {
	90	match kind {
	91	#(#kinds)\|* => true,
	92	_ => false,
	93	}
	94	}
	95	fn cast(syntax: SyntaxNode) -> Option<Self> {
	96	let res = match syntax.kind() {
	97	#(
	98	#kinds => #name::#variants(#variants { syntax }),
	99	)*
	100	_ => return None,
	101	};
	102	Some(res)
	103	}
	104	fn syntax(&self) -> &SyntaxNode {
	105	match self {
	106	#(
	107	#name::#variants(it) => &it.syntax,
	108	)*
	109	}
	110	}
	111	}
	112	}
	113	};
	114
	115	let traits = ast_node.traits.iter().map(\|trait_name\| {
	116	let trait_name = format_ident!("{}", trait_name);
	117	quote!(impl ast::#trait_name for #name {})
	118	});
	119
	120	let collections = ast_node.collections.iter().map(\|(name, kind)\| {
	121	let method_name = format_ident!("{}", name);
	122	let kind = format_ident!("{}", kind);
	123	quote! {
	124	pub fn #method_name(&self) -> AstChildren<#kind> {
	125	AstChildren::new(&self.syntax)
	126	}
	127	}
	128	});
	129
	130	let options = ast_node.options.iter().map(\|attr\| {
	131	let method_name = match attr {
	132	Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)),
	133	Attr::NameType(n, _) => format_ident!("{}", n),
	134	};
	135	let ty = match attr {
	136	Attr::Type(t) \| Attr::NameType(_, t) => format_ident!("{}", t),
	137	};
	138	quote! {
	139	pub fn #method_name(&self) -> Option<#ty> {
	140	AstChildren::new(&self.syntax).next()
	141	}
	142	}
	143	});
	144
	145	quote! {
	146	#adt
	147
	148	#(#traits)*
	149
	150	impl #name {
	151	#(#collections)*
	152	#(#options)*
	153	}
	154	}
	155	});
	156
	157	let ast = quote! {
	158	use crate::{
	159	SyntaxNode, SyntaxKind::{self, *},
	160	ast::{self, AstNode, AstChildren},
	161	};
	162
	163	#(#nodes)*
	164	};
	165
	166	let pretty = reformat(ast)?;
	167	Ok(pretty)
	168	}
	169
	170	fn generate_syntax_kinds(grammar: &Grammar) -> Result<String> {
	171	let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
	172	.punct
	173	.iter()
	174	.filter(\|(token, _name)\| token.len() == 1)
	175	.map(\|(token, name)\| (token.chars().next().unwrap(), format_ident!("{}", name)))
	176	.unzip();
	177
	178	let punctuation_values = grammar.punct.iter().map(\|(token, _name)\| {
	179	if "{}[]()".contains(token) {
	180	let c = token.chars().next().unwrap();
	181	quote! { #c }
	182	} else {
	183	let cs = token.chars().map(\|c\| Punct::new(c, Spacing::Joint));
	184	quote! { #(#cs)* }
	185	}
	186	});
	187	let punctuation =
	188	grammar.punct.iter().map(\|(_token, name)\| format_ident!("{}", name)).collect::<Vec<_>>();
	189
	190	let full_keywords_values = &grammar.keywords;
	191	let full_keywords =
	192	full_keywords_values.iter().map(\|kw\| format_ident!("{}_KW", to_upper_snake_case(&kw)));
	193
	194	let all_keywords_values =
	195	grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
	196	let all_keywords_idents = all_keywords_values.iter().map(\|kw\| format_ident!("{}", kw));
	197	let all_keywords = all_keywords_values
	198	.iter()
	199	.map(\|name\| format_ident!("{}_KW", to_upper_snake_case(&name)))
	200	.collect::<Vec<_>>();
	201
	202	let literals =
	203	grammar.literals.iter().map(\|name\| format_ident!("{}", name)).collect::<Vec<_>>();
	204
	205	let tokens = grammar.tokens.iter().map(\|name\| format_ident!("{}", name)).collect::<Vec<_>>();
	206
	207	let nodes = grammar.nodes.iter().map(\|name\| format_ident!("{}", name)).collect::<Vec<_>>();
	208
	209	let ast = quote! {
	210	#![allow(bad_style, missing_docs, unreachable_pub)]
	211	/// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
	212	#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
	213	#[repr(u16)]
	214	pub enum SyntaxKind {
	215	// Technical SyntaxKinds: they appear temporally during parsing,
	216	// but never end up in the final tree
	217	#[doc(hidden)]
	218	TOMBSTONE,
	219	#[doc(hidden)]
	220	EOF,
	221	#(#punctuation,)*
	222	#(#all_keywords,)*
	223	#(#literals,)*
	224	#(#tokens,)*
	225	#(#nodes,)*
	226
	227	// Technical kind so that we can cast from u16 safely
	228	#[doc(hidden)]
	229	__LAST,
	230	}
	231	use self::SyntaxKind::*;
	232
	233	impl SyntaxKind {
	234	pub fn is_keyword(self) -> bool {
	235	match self {
	236	#(#all_keywords)\|* => true,
	237	_ => false,
	238	}
	239	}
	240
	241	pub fn is_punct(self) -> bool {
	242	match self {
	243	#(#punctuation)\|* => true,
	244	_ => false,
	245	}
	246	}
	247
	248	pub fn is_literal(self) -> bool {
	249	match self {
	250	#(#literals)\|* => true,
	251	_ => false,
	252	}
	253	}
	254
	255	pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
	256	let kw = match ident {
	257	#(#full_keywords_values => #full_keywords,)*
	258	_ => return None,
	259	};
	260	Some(kw)
	261	}
	262
	263	pub fn from_char(c: char) -> Option<SyntaxKind> {
	264	let tok = match c {
	265	#(#single_byte_tokens_values => #single_byte_tokens,)*
	266	_ => return None,
	267	};
	268	Some(tok)
	269	}
	270	}
	271
	272	#[macro_export]
	273	macro_rules! T {
	274	#((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)*
	275	#((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)*
	276	}
	277	};
	278
	279	reformat(ast)
	280	}
	281
	282	fn reformat(text: impl std::fmt::Display) -> Result<String> {
	283	let mut rustfmt = Command::new("rustfmt")
	284	.arg("--config-path")
	285	.arg(project_root().join("rustfmt.toml"))
	286	.stdin(Stdio::piped())
	287	.stdout(Stdio::piped())
	288	.spawn()?;
	289	write!(rustfmt.stdin.take().unwrap(), "{}", text)?;
	290	let output = rustfmt.wait_with_output()?;
	291	let stdout = String::from_utf8(output.stdout)?;
	292	let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`";
	293	Ok(format!("//! {}\n\n{}", preamble, stdout))
	294	}
	295
	296	#[derive(Deserialize, Debug)]
	297	struct Grammar {
	298	punct: Vec<(String, String)>,
	299	keywords: Vec<String>,
	300	contextual_keywords: Vec<String>,
	301	literals: Vec<String>,
	302	tokens: Vec<String>,
	303	nodes: Vec<String>,
	304	ast: BTreeMap<String, AstNode>,
	305	}
	306
	307	#[derive(Deserialize, Debug)]
	308	struct AstNode {
	309	#[serde(default)]
	310	#[serde(rename = "enum")]
	311	variants: Vec<String>,
	312
	313	#[serde(default)]
	314	traits: Vec<String>,
	315	#[serde(default)]
	316	collections: Vec<(String, String)>,
	317	#[serde(default)]
	318	options: Vec<Attr>,
	319	}
	320
	321	#[derive(Deserialize, Debug)]
	322	#[serde(untagged)]
	323	enum Attr {
	324	Type(String),
	325	NameType(String, String),
	326	}
	327
	328	fn to_upper_snake_case(s: &str) -> String {
	329	let mut buf = String::with_capacity(s.len());
	330	let mut prev_is_upper = None;
	331	for c in s.chars() {
	332	if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
	333	buf.push('_')
	334	}
	335	prev_is_upper = Some(c.is_ascii_uppercase());
	336
	337	buf.push(c.to_ascii_uppercase());
	338	}
	339	buf
	340	}
	341
	342	fn to_lower_snake_case(s: &str) -> String {
	343	let mut buf = String::with_capacity(s.len());
	344	let mut prev_is_upper = None;
	345	for c in s.chars() {
	346	if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
	347	buf.push('_')
	348	}
	349	prev_is_upper = Some(c.is_ascii_uppercase());
	350
	351	buf.push(c.to_ascii_lowercase());
	352	}
	353	buf
	354	}