From b5f13d8d51ef9107363a60b894a741ab596921ce Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Wed, 23 Oct 2019 18:13:40 +0300
Subject: xtask: move codegen to a module

---
 xtask/src/codegen/gen_parser_tests.rs | 150 ++++++++++++++
 xtask/src/codegen/gen_syntax.rs       | 354 ++++++++++++++++++++++++++++++++++
 2 files changed, 504 insertions(+)
 create mode 100644 xtask/src/codegen/gen_parser_tests.rs
 create mode 100644 xtask/src/codegen/gen_syntax.rs

(limited to 'xtask/src/codegen')
diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs
new file mode 100644
index 000000000..e09b6fcfe
--- /dev/null
+++ b/xtask/src/codegen/gen_parser_tests.rs
@@ -0,0 +1,150 @@
+//! This module greps parser's code for specially formatted comments and turnes
+//! them into tests.
+
+use std::{
+    collections::HashMap,
+    fs,
+    path::{Path, PathBuf},
+};
+
+use itertools::Itertools;
+
+use crate::{
+    codegen::{self, update, Mode},
+    project_root, Result,
+};
+
+pub fn generate_parser_tests(mode: Mode) -> Result<()> {
+    let tests = tests_from_dir(&project_root().join(Path::new(codegen::GRAMMAR_DIR)))?;
+    fn install_tests(tests: &HashMap<String, Test>, into: &str, mode: Mode) -> Result<()> {
+        let tests_dir = project_root().join(into);
+        if !tests_dir.is_dir() {
+            fs::create_dir_all(&tests_dir)?;
+        }
+        // ok is never actually read, but it needs to be specified to create a Test in existing_tests
+        let existing = existing_tests(&tests_dir, true)?;
+        for t in existing.keys().filter(|&t| !tests.contains_key(t)) {
+            panic!("Test is deleted: {}", t);
+        }
+
+        let mut new_idx = existing.len() + 1;
+        for (name, test) in tests {
+            let path = match existing.get(name) {
+                Some((path, _test)) => path.clone(),
+                None => {
+                    let file_name = format!("{:04}_{}.rs", new_idx, name);
+                    new_idx += 1;
+                    tests_dir.join(file_name)
+                }
+            };
+            update(&path, &test.text, mode)?;
+        }
+        Ok(())
+    }
+    install_tests(&tests.ok, codegen::OK_INLINE_TESTS_DIR, mode)?;
+    install_tests(&tests.err, codegen::ERR_INLINE_TESTS_DIR, mode)
+}
+
+#[derive(Debug)]
+struct Test {
+    pub name: String,
+    pub text: String,
+    pub ok: bool,
+}
+
+#[derive(Default, Debug)]
+struct Tests {
+    pub ok: HashMap<String, Test>,
+    pub err: HashMap<String, Test>,
+}
+
+fn collect_tests(s: &str) -> Vec<(usize, Test)> {
+    let mut res = vec![];
+    let prefix = "// ";
+    let comment_blocks = s
+        .lines()
+        .map(str::trim_start)
+        .enumerate()
+        .group_by(|(_idx, line)| line.starts_with(prefix));
+
+    'outer: for (is_comment, block) in comment_blocks.into_iter() {
+        if !is_comment {
+            continue;
+        }
+        let mut block = block.map(|(idx, line)| (idx, &line[prefix.len()..]));
+
+        let mut ok = true;
+        let (start_line, name) = loop {
+            match block.next() {
+                Some((idx, line)) if line.starts_with("test ") => {
+                    break (idx, line["test ".len()..].to_string());
+                }
+                Some((idx, line)) if line.starts_with("test_err ") => {
+                    ok = false;
+                    break (idx, line["test_err ".len()..].to_string());
+                }
+                Some(_) => (),
+                None => continue 'outer,
+            }
+        };
+        let text: String =
+            itertools::join(block.map(|(_, line)| line).chain(::std::iter::once("")), "\n");
+        assert!(!text.trim().is_empty() && text.ends_with('\n'));
+        res.push((start_line, Test { name, text, ok }))
+    }
+    res
+}
+
+fn tests_from_dir(dir: &Path) -> Result<Tests> {
+    let mut res = Tests::default();
+    for entry in ::walkdir::WalkDir::new(dir) {
+        let entry = entry.unwrap();
+        if !entry.file_type().is_file() {
+            continue;
+        }
+        if entry.path().extension().unwrap_or_default() != "rs" {
+            continue;
+        }
+        process_file(&mut res, entry.path())?;
+    }
+    let grammar_rs = dir.parent().unwrap().join("grammar.rs");
+    process_file(&mut res, &grammar_rs)?;
+    return Ok(res);
+    fn process_file(res: &mut Tests, path: &Path) -> Result<()> {
+        let text = fs::read_to_string(path)?;
+
+        for (_, test) in collect_tests(&text) {
+            if test.ok {
+                if let Some(old_test) = res.ok.insert(test.name.clone(), test) {
+                    Err(format!("Duplicate test: {}", old_test.name))?
+                }
+            } else {
+                if let Some(old_test) = res.err.insert(test.name.clone(), test) {
+                    Err(format!("Duplicate test: {}", old_test.name))?
+                }
+            }
+        }
+        Ok(())
+    }
+}
+
+fn existing_tests(dir: &Path, ok: bool) -> Result<HashMap<String, (PathBuf, Test)>> {
+    let mut res = HashMap::new();
+    for file in fs::read_dir(dir)? {
+        let file = file?;
+        let path = file.path();
+        if path.extension().unwrap_or_default() != "rs" {
+            continue;
+        }
+        let name = {
+            let file_name = path.file_name().unwrap().to_str().unwrap();
+            file_name[5..file_name.len() - 3].to_string()
+        };
+        let text = fs::read_to_string(&path)?;
+        let test = Test { name: name.clone(), text, ok };
+        if let Some(old) = res.insert(name, (path, test)) {
+            println!("Duplicate test: {:?}", old);
+        }
+    }
+    Ok(res)
+}
diff --git a/xtask/src/codegen/gen_syntax.rs b/xtask/src/codegen/gen_syntax.rs
new file mode 100644
index 000000000..6a81c0e4d
--- /dev/null
+++ b/xtask/src/codegen/gen_syntax.rs
@@ -0,0 +1,354 @@
+//! This module generate AST datatype used by rust-analyzer.
+//!
+//! Specifically, it generates the `SyntaxKind` enum and a number of newtype
+//! wrappers around `SyntaxNode` which implement `ra_syntax::AstNode`.
+
+use std::{
+    collections::BTreeMap,
+    fs,
+    io::Write,
+    process::{Command, Stdio},
+};
+
+use proc_macro2::{Punct, Spacing};
+use quote::{format_ident, quote};
+use ron;
+use serde::Deserialize;
+
+use crate::{
+    codegen::{self, update, Mode},
+    project_root, Result,
+};
+
+pub fn generate_syntax(mode: Mode) -> Result<()> {
+    let grammar = project_root().join(codegen::GRAMMAR);
+    let grammar: Grammar = {
+        let text = fs::read_to_string(grammar)?;
+        ron::de::from_str(&text)?
+    };
+
+    let syntax_kinds_file = project_root().join(codegen::SYNTAX_KINDS);
+    let syntax_kinds = generate_syntax_kinds(&grammar)?;
+    update(syntax_kinds_file.as_path(), &syntax_kinds, mode)?;
+
+    let ast_file = project_root().join(codegen::AST);
+    let ast = generate_ast(&grammar)?;
+    update(ast_file.as_path(), &ast, mode)?;
+
+    Ok(())
+}
+
+fn generate_ast(grammar: &Grammar) -> Result<String> {
+    let nodes = grammar.ast.iter().map(|(name, ast_node)| {
+        let variants =
+            ast_node.variants.iter().map(|var| format_ident!("{}", var)).collect::<Vec<_>>();
+        let name = format_ident!("{}", name);
+
+        let adt = if variants.is_empty() {
+            let kind = format_ident!("{}", to_upper_snake_case(&name.to_string()));
+            quote! {
+                #[derive(Debug, Clone, PartialEq, Eq, Hash)]
+                pub struct #name {
+                    pub(crate) syntax: SyntaxNode,
+                }
+
+                impl AstNode for #name {
+                    fn can_cast(kind: SyntaxKind) -> bool {
+                        match kind {
+                            #kind => true,
+                            _ => false,
+                        }
+                    }
+                    fn cast(syntax: SyntaxNode) -> Option<Self> {
+                        if Self::can_cast(syntax.kind()) { Some(Self { syntax }) } else { None }
+                    }
+                    fn syntax(&self) -> &SyntaxNode { &self.syntax }
+                }
+            }
+        } else {
+            let kinds = variants
+                .iter()
+                .map(|name| format_ident!("{}", to_upper_snake_case(&name.to_string())))
+                .collect::<Vec<_>>();
+
+            quote! {
+                #[derive(Debug, Clone, PartialEq, Eq, Hash)]
+                pub enum #name {
+                    #(#variants(#variants),)*
+                }
+
+                #(
+                impl From<#variants> for #name {
+                    fn from(node: #variants) -> #name {
+                        #name::#variants(node)
+                    }
+                }
+                )*
+
+                impl AstNode for #name {
+                    fn can_cast(kind: SyntaxKind) -> bool {
+                        match kind {
+                            #(#kinds)|* => true,
+                            _ => false,
+                        }
+                    }
+                    fn cast(syntax: SyntaxNode) -> Option<Self> {
+                        let res = match syntax.kind() {
+                            #(
+                            #kinds => #name::#variants(#variants { syntax }),
+                            )*
+                            _ => return None,
+                        };
+                        Some(res)
+                    }
+                    fn syntax(&self) -> &SyntaxNode {
+                        match self {
+                            #(
+                            #name::#variants(it) => &it.syntax,
+                            )*
+                        }
+                    }
+                }
+            }
+        };
+
+        let traits = ast_node.traits.iter().map(|trait_name| {
+            let trait_name = format_ident!("{}", trait_name);
+            quote!(impl ast::#trait_name for #name {})
+        });
+
+        let collections = ast_node.collections.iter().map(|(name, kind)| {
+            let method_name = format_ident!("{}", name);
+            let kind = format_ident!("{}", kind);
+            quote! {
+                pub fn #method_name(&self) -> AstChildren<#kind> {
+                    AstChildren::new(&self.syntax)
+                }
+            }
+        });
+
+        let options = ast_node.options.iter().map(|attr| {
+            let method_name = match attr {
+                Attr::Type(t) => format_ident!("{}", to_lower_snake_case(&t)),
+                Attr::NameType(n, _) => format_ident!("{}", n),
+            };
+            let ty = match attr {
+                Attr::Type(t) | Attr::NameType(_, t) => format_ident!("{}", t),
+            };
+            quote! {
+                pub fn #method_name(&self) -> Option<#ty> {
+                    AstChildren::new(&self.syntax).next()
+                }
+            }
+        });
+
+        quote! {
+            #adt
+
+            #(#traits)*
+
+            impl #name {
+                #(#collections)*
+                #(#options)*
+            }
+        }
+    });
+
+    let ast = quote! {
+        use crate::{
+            SyntaxNode, SyntaxKind::{self, *},
+            ast::{self, AstNode, AstChildren},
+        };
+
+        #(#nodes)*
+    };
+
+    let pretty = reformat(ast)?;
+    Ok(pretty)
+}
+
+fn generate_syntax_kinds(grammar: &Grammar) -> Result<String> {
+    let (single_byte_tokens_values, single_byte_tokens): (Vec<_>, Vec<_>) = grammar
+        .punct
+        .iter()
+        .filter(|(token, _name)| token.len() == 1)
+        .map(|(token, name)| (token.chars().next().unwrap(), format_ident!("{}", name)))
+        .unzip();
+
+    let punctuation_values = grammar.punct.iter().map(|(token, _name)| {
+        if "{}[]()".contains(token) {
+            let c = token.chars().next().unwrap();
+            quote! { #c }
+        } else {
+            let cs = token.chars().map(|c| Punct::new(c, Spacing::Joint));
+            quote! { #(#cs)* }
+        }
+    });
+    let punctuation =
+        grammar.punct.iter().map(|(_token, name)| format_ident!("{}", name)).collect::<Vec<_>>();
+
+    let full_keywords_values = &grammar.keywords;
+    let full_keywords =
+        full_keywords_values.iter().map(|kw| format_ident!("{}_KW", to_upper_snake_case(&kw)));
+
+    let all_keywords_values =
+        grammar.keywords.iter().chain(grammar.contextual_keywords.iter()).collect::<Vec<_>>();
+    let all_keywords_idents = all_keywords_values.iter().map(|kw| format_ident!("{}", kw));
+    let all_keywords = all_keywords_values
+        .iter()
+        .map(|name| format_ident!("{}_KW", to_upper_snake_case(&name)))
+        .collect::<Vec<_>>();
+
+    let literals =
+        grammar.literals.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
+
+    let tokens = grammar.tokens.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
+
+    let nodes = grammar.nodes.iter().map(|name| format_ident!("{}", name)).collect::<Vec<_>>();
+
+    let ast = quote! {
+        #![allow(bad_style, missing_docs, unreachable_pub)]
+        /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`.
+        #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
+        #[repr(u16)]
+        pub enum SyntaxKind {
+            // Technical SyntaxKinds: they appear temporally during parsing,
+            // but never end up in the final tree
+            #[doc(hidden)]
+            TOMBSTONE,
+            #[doc(hidden)]
+            EOF,
+            #(#punctuation,)*
+            #(#all_keywords,)*
+            #(#literals,)*
+            #(#tokens,)*
+            #(#nodes,)*
+
+            // Technical kind so that we can cast from u16 safely
+            #[doc(hidden)]
+            __LAST,
+        }
+        use self::SyntaxKind::*;
+
+        impl SyntaxKind {
+            pub fn is_keyword(self) -> bool {
+                match self {
+                    #(#all_keywords)|* => true,
+                    _ => false,
+                }
+            }
+
+            pub fn is_punct(self) -> bool {
+                match self {
+                    #(#punctuation)|* => true,
+                    _ => false,
+                }
+            }
+
+            pub fn is_literal(self) -> bool {
+                match self {
+                    #(#literals)|* => true,
+                    _ => false,
+                }
+            }
+
+            pub fn from_keyword(ident: &str) -> Option<SyntaxKind> {
+                let kw = match ident {
+                    #(#full_keywords_values => #full_keywords,)*
+                    _ => return None,
+                };
+                Some(kw)
+            }
+
+            pub fn from_char(c: char) -> Option<SyntaxKind> {
+                let tok = match c {
+                    #(#single_byte_tokens_values => #single_byte_tokens,)*
+                    _ => return None,
+                };
+                Some(tok)
+            }
+        }
+
+        #[macro_export]
+        macro_rules! T {
+            #((#punctuation_values) => { $crate::SyntaxKind::#punctuation };)*
+            #((#all_keywords_idents) => { $crate::SyntaxKind::#all_keywords };)*
+        }
+    };
+
+    reformat(ast)
+}
+
+fn reformat(text: impl std::fmt::Display) -> Result<String> {
+    let mut rustfmt = Command::new("rustfmt")
+        .arg("--config-path")
+        .arg(project_root().join("rustfmt.toml"))
+        .stdin(Stdio::piped())
+        .stdout(Stdio::piped())
+        .spawn()?;
+    write!(rustfmt.stdin.take().unwrap(), "{}", text)?;
+    let output = rustfmt.wait_with_output()?;
+    let stdout = String::from_utf8(output.stdout)?;
+    let preamble = "Generated file, do not edit by hand, see `crate/ra_tools/src/codegen`";
+    Ok(format!("//! {}\n\n{}", preamble, stdout))
+}
+
+#[derive(Deserialize, Debug)]
+struct Grammar {
+    punct: Vec<(String, String)>,
+    keywords: Vec<String>,
+    contextual_keywords: Vec<String>,
+    literals: Vec<String>,
+    tokens: Vec<String>,
+    nodes: Vec<String>,
+    ast: BTreeMap<String, AstNode>,
+}
+
+#[derive(Deserialize, Debug)]
+struct AstNode {
+    #[serde(default)]
+    #[serde(rename = "enum")]
+    variants: Vec<String>,
+
+    #[serde(default)]
+    traits: Vec<String>,
+    #[serde(default)]
+    collections: Vec<(String, String)>,
+    #[serde(default)]
+    options: Vec<Attr>,
+}
+
+#[derive(Deserialize, Debug)]
+#[serde(untagged)]
+enum Attr {
+    Type(String),
+    NameType(String, String),
+}
+
+fn to_upper_snake_case(s: &str) -> String {
+    let mut buf = String::with_capacity(s.len());
+    let mut prev_is_upper = None;
+    for c in s.chars() {
+        if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
+            buf.push('_')
+        }
+        prev_is_upper = Some(c.is_ascii_uppercase());
+
+        buf.push(c.to_ascii_uppercase());
+    }
+    buf
+}
+
+fn to_lower_snake_case(s: &str) -> String {
+    let mut buf = String::with_capacity(s.len());
+    let mut prev_is_upper = None;
+    for c in s.chars() {
+        if c.is_ascii_uppercase() && prev_is_upper == Some(false) {
+            buf.push('_')
+        }
+        prev_is_upper = Some(c.is_ascii_uppercase());
+
+        buf.push(c.to_ascii_lowercase());
+    }
+    buf
+}
-- 
cgit v1.2.3


From 6048d294009f0f58593747e0870aa174e29a32af Mon Sep 17 00:00:00 2001
From: Aleksey Kladov <aleksey.kladov@gmail.com>
Date: Wed, 23 Oct 2019 18:24:40 +0300
Subject: xtask: don't depend on itertools

xtask should be fast to compiler, as it's a gateway to rust-analyzer
---
 xtask/src/codegen/gen_parser_tests.rs | 39 ++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 17 deletions(-)

(limited to 'xtask/src/codegen')

diff --git a/xtask/src/codegen/gen_parser_tests.rs b/xtask/src/codegen/gen_parser_tests.rs
index e09b6fcfe..0f550d948 100644
--- a/xtask/src/codegen/gen_parser_tests.rs
+++ b/xtask/src/codegen/gen_parser_tests.rs
@@ -7,8 +7,6 @@ use std::{
     path::{Path, PathBuf},
 };
 
-use itertools::Itertools;
-
 use crate::{
     codegen::{self, update, Mode},
     project_root, Result,
@@ -61,38 +59,45 @@ struct Tests {
 fn collect_tests(s: &str) -> Vec<(usize, Test)> {
     let mut res = vec![];
     let prefix = "// ";
-    let comment_blocks = s
-        .lines()
-        .map(str::trim_start)
-        .enumerate()
-        .group_by(|(_idx, line)| line.starts_with(prefix));
+    let lines = s.lines().map(str::trim_start).enumerate();
 
-    'outer: for (is_comment, block) in comment_blocks.into_iter() {
-        if !is_comment {
-            continue;
+    let mut block = vec![];
+    for (line_idx, line) in lines {
+        let is_comment = line.starts_with(prefix);
+        if is_comment {
+            block.push((line_idx, &line[prefix.len()..]));
+        } else {
+            process_block(&mut res, &block);
+            block.clear();
         }
-        let mut block = block.map(|(idx, line)| (idx, &line[prefix.len()..]));
+    }
+    process_block(&mut res, &block);
+    return res;
 
+    fn process_block(acc: &mut Vec<(usize, Test)>, block: &[(usize, &str)]) {
+        if block.is_empty() {
+            return;
+        }
         let mut ok = true;
+        let mut block = block.iter();
         let (start_line, name) = loop {
             match block.next() {
-                Some((idx, line)) if line.starts_with("test ") => {
+                Some(&(idx, line)) if line.starts_with("test ") => {
                     break (idx, line["test ".len()..].to_string());
                 }
-                Some((idx, line)) if line.starts_with("test_err ") => {
+                Some(&(idx, line)) if line.starts_with("test_err ") => {
                     ok = false;
                     break (idx, line["test_err ".len()..].to_string());
                 }
                 Some(_) => (),
-                None => continue 'outer,
+                None => return,
             }
         };
         let text: String =
-            itertools::join(block.map(|(_, line)| line).chain(::std::iter::once("")), "\n");
+            block.map(|(_, line)| *line).chain(std::iter::once("")).collect::<Vec<_>>().join("\n");
         assert!(!text.trim().is_empty() && text.ends_with('\n'));
-        res.push((start_line, Test { name, text, ok }))
+        acc.push((start_line, Test { name, text, ok }))
     }
-    res
 }
 
 fn tests_from_dir(dir: &Path) -> Result<Tests> {
-- 
cgit v1.2.3