From 550d7fbe3cbf2af4a47fca6c9bbefaf798cd7b7b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 12 Aug 2020 16:46:20 +0200 Subject: Rename ra_tt -> tt --- crates/tt/Cargo.toml | 16 ++++ crates/tt/src/buffer.rs | 185 ++++++++++++++++++++++++++++++++++++ crates/tt/src/lib.rs | 246 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 447 insertions(+) create mode 100644 crates/tt/Cargo.toml create mode 100644 crates/tt/src/buffer.rs create mode 100644 crates/tt/src/lib.rs (limited to 'crates/tt') diff --git a/crates/tt/Cargo.toml b/crates/tt/Cargo.toml new file mode 100644 index 000000000..dfcdcf03e --- /dev/null +++ b/crates/tt/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "tt" +version = "0.0.0" +license = "MIT OR Apache-2.0" +authors = ["rust-analyzer developers"] +edition = "2018" + +[lib] +doctest = false + +[dependencies] +# ideally, `serde` should be enabled by `rust-analyzer`, but we enable it here +# to reduce number of compilations +smol_str = { version = "0.1.15", features = ["serde"] } + +stdx = { path = "../stdx" } diff --git a/crates/tt/src/buffer.rs b/crates/tt/src/buffer.rs new file mode 100644 index 000000000..02c771f70 --- /dev/null +++ b/crates/tt/src/buffer.rs @@ -0,0 +1,185 @@ +//! FIXME: write short doc here + +use crate::{Subtree, TokenTree}; + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct EntryId(usize); + +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +struct EntryPtr(EntryId, usize); + +/// Internal type which is used instead of `TokenTree` to represent a token tree +/// within a `TokenBuffer`. +#[derive(Debug)] +enum Entry<'t> { + // Mimicking types from proc-macro. + Subtree(&'t TokenTree, EntryId), + Leaf(&'t TokenTree), + // End entries contain a pointer to the entry from the containing + // token tree, or None if this is the outermost level. + End(Option), +} + +/// A token tree buffer +/// The safe version of `syn` [`TokenBuffer`](https://github.com/dtolnay/syn/blob/6533607f91686545cb034d2838beea338d9d0742/src/buffer.rs#L41) +#[derive(Debug)] +pub struct TokenBuffer<'t> { + buffers: Vec]>>, +} + +impl<'t> TokenBuffer<'t> { + pub fn new(tokens: &'t [TokenTree]) -> TokenBuffer<'t> { + let mut buffers = vec![]; + + let idx = TokenBuffer::new_inner(tokens, &mut buffers, None); + assert_eq!(idx, 0); + + TokenBuffer { buffers } + } + + fn new_inner( + tokens: &'t [TokenTree], + buffers: &mut Vec]>>, + next: Option, + ) -> usize { + // Must contain everything in tokens and then the Entry::End + let start_capacity = tokens.len() + 1; + let mut entries = Vec::with_capacity(start_capacity); + let mut children = vec![]; + + for (idx, tt) in tokens.iter().enumerate() { + match tt { + TokenTree::Leaf(_) => { + entries.push(Entry::Leaf(tt)); + } + TokenTree::Subtree(subtree) => { + entries.push(Entry::End(None)); + children.push((idx, (subtree, tt))); + } + } + } + + entries.push(Entry::End(next)); + let res = buffers.len(); + buffers.push(entries.into_boxed_slice()); + + for (child_idx, (subtree, tt)) in children { + let idx = TokenBuffer::new_inner( + &subtree.token_trees, + buffers, + Some(EntryPtr(EntryId(res), child_idx + 1)), + ); + buffers[res].as_mut()[child_idx] = Entry::Subtree(tt, EntryId(idx)); + } + + res + } + + /// Creates a cursor referencing the first token in the buffer and able to + /// traverse until the end of the buffer. + pub fn begin(&self) -> Cursor { + Cursor::create(self, EntryPtr(EntryId(0), 0)) + } + + fn entry(&self, ptr: &EntryPtr) -> Option<&Entry> { + let id = ptr.0; + self.buffers[id.0].get(ptr.1) + } +} + +/// A safe version of `Cursor` from `syn` crate https://github.com/dtolnay/syn/blob/6533607f91686545cb034d2838beea338d9d0742/src/buffer.rs#L125 +#[derive(Copy, Clone, Debug)] +pub struct Cursor<'a> { + buffer: &'a TokenBuffer<'a>, + ptr: EntryPtr, +} + +impl<'a> PartialEq for Cursor<'a> { + fn eq(&self, other: &Cursor) -> bool { + self.ptr == other.ptr && std::ptr::eq(self.buffer, other.buffer) + } +} + +impl<'a> Eq for Cursor<'a> {} + +impl<'a> Cursor<'a> { + /// Check whether it is eof + pub fn eof(self) -> bool { + matches!(self.buffer.entry(&self.ptr), None | Some(Entry::End(None))) + } + + /// If the cursor is pointing at the end of a subtree, returns + /// the parent subtree + pub fn end(self) -> Option<&'a Subtree> { + match self.entry() { + Some(Entry::End(Some(ptr))) => { + let idx = ptr.1; + if let Some(Entry::Subtree(TokenTree::Subtree(subtree), _)) = + self.buffer.entry(&EntryPtr(ptr.0, idx - 1)) + { + return Some(subtree); + } + + None + } + _ => None, + } + } + + fn entry(self) -> Option<&'a Entry<'a>> { + self.buffer.entry(&self.ptr) + } + + /// If the cursor is pointing at a `Subtree`, returns + /// a cursor into that subtree + pub fn subtree(self) -> Option> { + match self.entry() { + Some(Entry::Subtree(_, entry_id)) => { + Some(Cursor::create(self.buffer, EntryPtr(*entry_id, 0))) + } + _ => None, + } + } + + /// If the cursor is pointing at a `TokenTree`, returns it + pub fn token_tree(self) -> Option<&'a TokenTree> { + match self.entry() { + Some(Entry::Leaf(tt)) => Some(tt), + Some(Entry::Subtree(tt, _)) => Some(tt), + Some(Entry::End(_)) => None, + None => None, + } + } + + fn create(buffer: &'a TokenBuffer, ptr: EntryPtr) -> Cursor<'a> { + Cursor { buffer, ptr } + } + + /// Bump the cursor + pub fn bump(self) -> Cursor<'a> { + if let Some(Entry::End(exit)) = self.buffer.entry(&self.ptr) { + if let Some(exit) = exit { + Cursor::create(self.buffer, *exit) + } else { + self + } + } else { + Cursor::create(self.buffer, EntryPtr(self.ptr.0, self.ptr.1 + 1)) + } + } + + /// Bump the cursor, if it is a subtree, returns + /// a cursor into that subtree + pub fn bump_subtree(self) -> Cursor<'a> { + match self.entry() { + Some(Entry::Subtree(_, _)) => self.subtree().unwrap(), + _ => self.bump(), + } + } + + /// Check whether it is a top level + pub fn is_root(&self) -> bool { + let entry_id = self.ptr.0; + entry_id.0 == 0 + } +} diff --git a/crates/tt/src/lib.rs b/crates/tt/src/lib.rs new file mode 100644 index 000000000..20c3f5eab --- /dev/null +++ b/crates/tt/src/lib.rs @@ -0,0 +1,246 @@ +//! `tt` crate defines a `TokenTree` data structure: this is the interface (both +//! input and output) of macros. It closely mirrors `proc_macro` crate's +//! `TokenTree`. +use std::{ + fmt::{self, Debug}, + panic::RefUnwindSafe, +}; + +use stdx::impl_from; + +pub use smol_str::SmolStr; + +/// Represents identity of the token. +/// +/// For hygiene purposes, we need to track which expanded tokens originated from +/// which source tokens. We do it by assigning an distinct identity to each +/// source token and making sure that identities are preserved during macro +/// expansion. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct TokenId(pub u32); + +impl TokenId { + pub const fn unspecified() -> TokenId { + TokenId(!0) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum TokenTree { + Leaf(Leaf), + Subtree(Subtree), +} +impl_from!(Leaf, Subtree for TokenTree); + +impl TokenTree { + pub fn empty() -> Self { + TokenTree::Subtree(Subtree::default()) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Leaf { + Literal(Literal), + Punct(Punct), + Ident(Ident), +} +impl_from!(Literal, Punct, Ident for Leaf); + +#[derive(Clone, PartialEq, Eq, Hash, Default)] +pub struct Subtree { + pub delimiter: Option, + pub token_trees: Vec, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct Delimiter { + pub id: TokenId, + pub kind: DelimiterKind, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum DelimiterKind { + Parenthesis, + Brace, + Bracket, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Literal { + pub text: SmolStr, + pub id: TokenId, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Punct { + pub char: char, + pub spacing: Spacing, + pub id: TokenId, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Spacing { + Alone, + Joint, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Ident { + pub text: SmolStr, + pub id: TokenId, +} + +fn print_debug_subtree(f: &mut fmt::Formatter<'_>, subtree: &Subtree, level: usize) -> fmt::Result { + let align = std::iter::repeat(" ").take(level).collect::(); + + let aux = match subtree.delimiter.map(|it| (it.kind, it.id.0)) { + None => "$".to_string(), + Some((DelimiterKind::Parenthesis, id)) => format!("() {}", id), + Some((DelimiterKind::Brace, id)) => format!("{{}} {}", id), + Some((DelimiterKind::Bracket, id)) => format!("[] {}", id), + }; + + if subtree.token_trees.is_empty() { + write!(f, "{}SUBTREE {}", align, aux)?; + } else { + writeln!(f, "{}SUBTREE {}", align, aux)?; + for (idx, child) in subtree.token_trees.iter().enumerate() { + print_debug_token(f, child, level + 1)?; + if idx != subtree.token_trees.len() - 1 { + writeln!(f)?; + } + } + } + + Ok(()) +} + +fn print_debug_token(f: &mut fmt::Formatter<'_>, tkn: &TokenTree, level: usize) -> fmt::Result { + let align = std::iter::repeat(" ").take(level).collect::(); + + match tkn { + TokenTree::Leaf(leaf) => match leaf { + Leaf::Literal(lit) => write!(f, "{}LITERAL {} {}", align, lit.text, lit.id.0)?, + Leaf::Punct(punct) => write!( + f, + "{}PUNCH {} [{}] {}", + align, + punct.char, + if punct.spacing == Spacing::Alone { "alone" } else { "joint" }, + punct.id.0 + )?, + Leaf::Ident(ident) => write!(f, "{}IDENT {} {}", align, ident.text, ident.id.0)?, + }, + TokenTree::Subtree(subtree) => { + print_debug_subtree(f, subtree, level)?; + } + } + + Ok(()) +} + +impl Debug for Subtree { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + print_debug_subtree(f, self, 0) + } +} + +impl fmt::Display for TokenTree { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TokenTree::Leaf(it) => fmt::Display::fmt(it, f), + TokenTree::Subtree(it) => fmt::Display::fmt(it, f), + } + } +} + +impl fmt::Display for Subtree { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let (l, r) = match self.delimiter_kind() { + Some(DelimiterKind::Parenthesis) => ("(", ")"), + Some(DelimiterKind::Brace) => ("{", "}"), + Some(DelimiterKind::Bracket) => ("[", "]"), + None => ("", ""), + }; + f.write_str(l)?; + let mut needs_space = false; + for tt in self.token_trees.iter() { + if needs_space { + f.write_str(" ")?; + } + needs_space = true; + match tt { + TokenTree::Leaf(Leaf::Punct(p)) => { + needs_space = p.spacing == Spacing::Alone; + fmt::Display::fmt(p, f)? + } + tt => fmt::Display::fmt(tt, f)?, + } + } + f.write_str(r)?; + Ok(()) + } +} + +impl fmt::Display for Leaf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Leaf::Ident(it) => fmt::Display::fmt(it, f), + Leaf::Literal(it) => fmt::Display::fmt(it, f), + Leaf::Punct(it) => fmt::Display::fmt(it, f), + } + } +} + +impl fmt::Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.text, f) + } +} + +impl fmt::Display for Literal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.text, f) + } +} + +impl fmt::Display for Punct { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.char, f) + } +} + +impl Subtree { + /// Count the number of tokens recursively + pub fn count(&self) -> usize { + let children_count = self + .token_trees + .iter() + .map(|c| match c { + TokenTree::Subtree(c) => c.count(), + _ => 0, + }) + .sum::(); + + self.token_trees.len() + children_count + } + + pub fn delimiter_kind(&self) -> Option { + self.delimiter.map(|it| it.kind) + } +} + +pub mod buffer; + +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum ExpansionError { + IOError(String), + JsonError(String), + Unknown(String), + ExpansionError(String), +} + +pub trait TokenExpander: Debug + Send + Sync + RefUnwindSafe { + fn expand(&self, subtree: &Subtree, attrs: Option<&Subtree>) + -> Result; +} -- cgit v1.2.3