Rename ra_mbe -> mbe

author: Aleksey Kladov <[email protected]> 2020-08-13 09:08:11 +0100
committer: Aleksey Kladov <[email protected]> 2020-08-13 09:36:04 +0100
commit: 2f45cfc415626cfae5cba89c88a25fb3225486f7 (patch)
tree: e04c21d108582a89efd2d3e5a680a0ed35156912 /crates/mbe/src/lib.rs
parent: d42ba6397668fe28bd9cd92db829755905469a69 (diff)
1 files changed, 278 insertions, 0 deletions
diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs
new file mode 100644
index 000000000..f854ca09a
--- /dev/null
+++ b/crates/mbe/src/lib.rs
@@ -0,0 +1,278 @@
+//! `mbe` (short for Macro By Example) crate contains code for handling
+//! `macro_rules` macros. It uses `TokenTree` (from `tt` package) as the
+//! interface, although it contains some code to bridge `SyntaxNode`s and
+//! `TokenTree`s as well!
+mod parser;
+mod mbe_expander;
+mod syntax_bridge;
+mod tt_iter;
+mod subtree_source;
+#[cfg(test)]
+mod tests;
+pub use tt::{Delimiter, Punct};
+use crate::{
+    parser::{parse_pattern, Op},
+    tt_iter::TtIter,
+};
+#[derive(Debug, PartialEq, Eq)]
+pub enum ParseError {
+    Expected(String),
+    RepetitionEmtpyTokenTree,
+}
+#[derive(Debug, PartialEq, Eq, Clone)]
+pub enum ExpandError {
+    NoMatchingRule,
+    UnexpectedToken,
+    BindingError(String),
+    ConversionError,
+    InvalidRepeat,
+    ProcMacroError(tt::ExpansionError),
+}
+impl From<tt::ExpansionError> for ExpandError {
+    fn from(it: tt::ExpansionError) -> Self {
+        ExpandError::ProcMacroError(it)
+    }
+}
+pub use crate::syntax_bridge::{
+    ast_to_token_tree, parse_to_token_tree, syntax_node_to_token_tree, token_tree_to_syntax_node,
+    TokenMap,
+};
+/// This struct contains AST for a single `macro_rules` definition. What might
+/// be very confusing is that AST has almost exactly the same shape as
+/// `tt::TokenTree`, but there's a crucial difference: in macro rules, `$ident`
+/// and `$()*` have special meaning (see `Var` and `Repeat` data structures)
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub struct MacroRules {
+    rules: Vec<Rule>,
+    /// Highest id of the token we have in TokenMap
+    shift: Shift,
+}
+#[derive(Clone, Debug, PartialEq, Eq)]
+struct Rule {
+    lhs: tt::Subtree,
+    rhs: tt::Subtree,
+}
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+struct Shift(u32);
+impl Shift {
+    fn new(tt: &tt::Subtree) -> Shift {
+        // Note that TokenId is started from zero,
+        // We have to add 1 to prevent duplication.
+        let value = max_id(tt).map_or(0, |it| it + 1);
+        return Shift(value);
+        // Find the max token id inside a subtree
+        fn max_id(subtree: &tt::Subtree) -> Option<u32> {
+            subtree
+                .token_trees
+                .iter()
+                .filter_map(|tt| match tt {
+                    tt::TokenTree::Subtree(subtree) => {
+                        let tree_id = max_id(subtree);
+                        match subtree.delimiter {
+                            Some(it) if it.id != tt::TokenId::unspecified() => {
+                                Some(tree_id.map_or(it.id.0, |t| t.max(it.id.0)))
+                            }
+                            _ => tree_id,
+                        }
+                    }
+                    tt::TokenTree::Leaf(tt::Leaf::Ident(ident))
+                        if ident.id != tt::TokenId::unspecified() =>
+                    {
+                        Some(ident.id.0)
+                    }
+                    _ => None,
+                })
+                .max()
+        }
+    }
+    /// Shift given TokenTree token id
+    fn shift_all(self, tt: &mut tt::Subtree) {
+        for t in tt.token_trees.iter_mut() {
+            match t {
+                tt::TokenTree::Leaf(leaf) => match leaf {
+                    tt::Leaf::Ident(ident) => ident.id = self.shift(ident.id),
+                    tt::Leaf::Punct(punct) => punct.id = self.shift(punct.id),
+                    tt::Leaf::Literal(lit) => lit.id = self.shift(lit.id),
+                },
+                tt::TokenTree::Subtree(tt) => {
+                    if let Some(it) = tt.delimiter.as_mut() {
+                        it.id = self.shift(it.id);
+                    };
+                    self.shift_all(tt)
+                }
+            }
+        }
+    }
+    fn shift(self, id: tt::TokenId) -> tt::TokenId {
+        if id == tt::TokenId::unspecified() {
+            return id;
+        }
+        tt::TokenId(id.0 + self.0)
+    }
+    fn unshift(self, id: tt::TokenId) -> Option<tt::TokenId> {
+        id.0.checked_sub(self.0).map(tt::TokenId)
+    }
+}
+#[derive(Debug, Eq, PartialEq)]
+pub enum Origin {
+    Def,
+    Call,
+}
+impl MacroRules {
+    pub fn parse(tt: &tt::Subtree) -> Result<MacroRules, ParseError> {
+        // Note: this parsing can be implemented using mbe machinery itself, by
+        // matching against `$($lhs:tt => $rhs:tt);*` pattern, but implementing
+        // manually seems easier.
+        let mut src = TtIter::new(tt);
+        let mut rules = Vec::new();
+        while src.len() > 0 {
+            let rule = Rule::parse(&mut src)?;
+            rules.push(rule);
+            if let Err(()) = src.expect_char(';') {
+                if src.len() > 0 {
+                    return Err(ParseError::Expected("expected `:`".to_string()));
+                }
+                break;
+            }
+        }
+        for rule in rules.iter() {
+            validate(&rule.lhs)?;
+        }
+        Ok(MacroRules { rules, shift: Shift::new(tt) })
+    }
+    pub fn expand(&self, tt: &tt::Subtree) -> ExpandResult<tt::Subtree> {
+        // apply shift
+        let mut tt = tt.clone();
+        self.shift.shift_all(&mut tt);
+        mbe_expander::expand(self, &tt)
+    }
+    pub fn map_id_down(&self, id: tt::TokenId) -> tt::TokenId {
+        self.shift.shift(id)
+    }
+    pub fn map_id_up(&self, id: tt::TokenId) -> (tt::TokenId, Origin) {
+        match self.shift.unshift(id) {
+            Some(id) => (id, Origin::Call),
+            None => (id, Origin::Def),
+        }
+    }
+}
+impl Rule {
+    fn parse(src: &mut TtIter) -> Result<Rule, ParseError> {
+        let mut lhs = src
+            .expect_subtree()
+            .map_err(|()| ParseError::Expected("expected subtree".to_string()))?
+            .clone();
+        lhs.delimiter = None;
+        src.expect_char('=').map_err(|()| ParseError::Expected("expected `=`".to_string()))?;
+        src.expect_char('>').map_err(|()| ParseError::Expected("expected `>`".to_string()))?;
+        let mut rhs = src
+            .expect_subtree()
+            .map_err(|()| ParseError::Expected("expected subtree".to_string()))?
+            .clone();
+        rhs.delimiter = None;
+        Ok(crate::Rule { lhs, rhs })
+    }
+}
+fn to_parse_error(e: ExpandError) -> ParseError {
+    let msg = match e {
+        ExpandError::InvalidRepeat => "invalid repeat".to_string(),
+        _ => "invalid macro definition".to_string(),
+    };
+    ParseError::Expected(msg)
+}
+fn validate(pattern: &tt::Subtree) -> Result<(), ParseError> {
+    for op in parse_pattern(pattern) {
+        let op = op.map_err(to_parse_error)?;
+        match op {
+            Op::TokenTree(tt::TokenTree::Subtree(subtree)) => validate(subtree)?,
+            Op::Repeat { subtree, separator, .. } => {
+                // Checks that no repetition which could match an empty token
+                // https://github.com/rust-lang/rust/blob/a58b1ed44f5e06976de2bdc4d7dc81c36a96934f/src/librustc_expand/mbe/macro_rules.rs#L558
+                if separator.is_none() {
+                    if parse_pattern(subtree).all(|child_op| {
+                        match child_op.map_err(to_parse_error) {
+                            Ok(Op::Var { kind, .. }) => {
+                                // vis is optional
+                                if kind.map_or(false, |it| it == "vis") {
+                                    return true;
+                                }
+                            }
+                            Ok(Op::Repeat { kind, .. }) => {
+                                return matches!(
+                                    kind,
+                                    parser::RepeatKind::ZeroOrMore | parser::RepeatKind::ZeroOrOne
+                                )
+                            }
+                            _ => {}
+                        }
+                        false
+                    }) {
+                        return Err(ParseError::RepetitionEmtpyTokenTree);
+                    }
+                }
+                validate(subtree)?
+            }
+            _ => (),
+        }
+    }
+    Ok(())
+}
+#[derive(Debug)]
+pub struct ExpandResult<T>(pub T, pub Option<ExpandError>);
+impl<T> ExpandResult<T> {
+    pub fn ok(t: T) -> ExpandResult<T> {
+        ExpandResult(t, None)
+    }
+    pub fn only_err(err: ExpandError) -> ExpandResult<T>
+    where
+        T: Default,
+    {
+        ExpandResult(Default::default(), Some(err))
+    }
+    pub fn map<U>(self, f: impl FnOnce(T) -> U) -> ExpandResult<U> {
+        ExpandResult(f(self.0), self.1)
+    }
+    pub fn result(self) -> Result<T, ExpandError> {
+        self.1.map(Err).unwrap_or(Ok(self.0))
+    }
+}
+impl<T: Default> From<Result<T, ExpandError>> for ExpandResult<T> {
+    fn from(result: Result<T, ExpandError>) -> ExpandResult<T> {
+        result
+            .map_or_else(|e| ExpandResult(Default::default(), Some(e)), |it| ExpandResult(it, None))
+    }
+}
author	Aleksey Kladov <[email protected]>	2020-08-13 09:08:11 +0100
committer	Aleksey Kladov <[email protected]>	2020-08-13 09:36:04 +0100
commit	2f45cfc415626cfae5cba89c88a25fb3225486f7 (patch)
tree	e04c21d108582a89efd2d3e5a680a0ed35156912 /crates/mbe/src/lib.rs
parent	d42ba6397668fe28bd9cd92db829755905469a69 (diff)

diff --git a/crates/mbe/src/lib.rs b/crates/mbe/src/lib.rs new file mode 100644 index 000000000..f854ca09a --- /dev/null +++ b/crates/mbe/src/lib.rs
@@ -0,0 +1,278 @@
	1	//! `mbe` (short for Macro By Example) crate contains code for handling
	2	//! `macro_rules` macros. It uses `TokenTree` (from `tt` package) as the
	3	//! interface, although it contains some code to bridge `SyntaxNode`s and
	4	//! `TokenTree`s as well!
	5
	6	mod parser;
	7	mod mbe_expander;
	8	mod syntax_bridge;
	9	mod tt_iter;
	10	mod subtree_source;
	11
	12	#[cfg(test)]
	13	mod tests;
	14
	15	pub use tt::{Delimiter, Punct};
	16
	17	use crate::{
	18	parser::{parse_pattern, Op},
	19	tt_iter::TtIter,
	20	};
	21
	22	#[derive(Debug, PartialEq, Eq)]
	23	pub enum ParseError {
	24	Expected(String),
	25	RepetitionEmtpyTokenTree,
	26	}
	27
	28	#[derive(Debug, PartialEq, Eq, Clone)]
	29	pub enum ExpandError {
	30	NoMatchingRule,
	31	UnexpectedToken,
	32	BindingError(String),
	33	ConversionError,
	34	InvalidRepeat,
	35	ProcMacroError(tt::ExpansionError),
	36	}
	37
	38	impl From<tt::ExpansionError> for ExpandError {
	39	fn from(it: tt::ExpansionError) -> Self {
	40	ExpandError::ProcMacroError(it)
	41	}
	42	}
	43
	44	pub use crate::syntax_bridge::{
	45	ast_to_token_tree, parse_to_token_tree, syntax_node_to_token_tree, token_tree_to_syntax_node,
	46	TokenMap,
	47	};
	48
	49	/// This struct contains AST for a single `macro_rules` definition. What might
	50	/// be very confusing is that AST has almost exactly the same shape as
	51	/// `tt::TokenTree`, but there's a crucial difference: in macro rules, `$ident`
	52	/// and `$()*` have special meaning (see `Var` and `Repeat` data structures)
	53	#[derive(Clone, Debug, PartialEq, Eq)]
	54	pub struct MacroRules {
	55	rules: Vec<Rule>,
	56	/// Highest id of the token we have in TokenMap
	57	shift: Shift,
	58	}
	59
	60	#[derive(Clone, Debug, PartialEq, Eq)]
	61	struct Rule {
	62	lhs: tt::Subtree,
	63	rhs: tt::Subtree,
	64	}
	65
	66	#[derive(Clone, Copy, Debug, PartialEq, Eq)]
	67	struct Shift(u32);
	68
	69	impl Shift {
	70	fn new(tt: &tt::Subtree) -> Shift {
	71	// Note that TokenId is started from zero,
	72	// We have to add 1 to prevent duplication.
	73	let value = max_id(tt).map_or(0, \|it\| it + 1);
	74	return Shift(value);
	75
	76	// Find the max token id inside a subtree
	77	fn max_id(subtree: &tt::Subtree) -> Option<u32> {
	78	subtree
	79	.token_trees
	80	.iter()
	81	.filter_map(\|tt\| match tt {
	82	tt::TokenTree::Subtree(subtree) => {
	83	let tree_id = max_id(subtree);
	84	match subtree.delimiter {
	85	Some(it) if it.id != tt::TokenId::unspecified() => {
	86	Some(tree_id.map_or(it.id.0, \|t\| t.max(it.id.0)))
	87	}
	88	_ => tree_id,
	89	}
	90	}
	91	tt::TokenTree::Leaf(tt::Leaf::Ident(ident))
	92	if ident.id != tt::TokenId::unspecified() =>
	93	{
	94	Some(ident.id.0)
	95	}
	96	_ => None,
	97	})
	98	.max()
	99	}
	100	}
	101
	102	/// Shift given TokenTree token id
	103	fn shift_all(self, tt: &mut tt::Subtree) {
	104	for t in tt.token_trees.iter_mut() {
	105	match t {
	106	tt::TokenTree::Leaf(leaf) => match leaf {
	107	tt::Leaf::Ident(ident) => ident.id = self.shift(ident.id),
	108	tt::Leaf::Punct(punct) => punct.id = self.shift(punct.id),
	109	tt::Leaf::Literal(lit) => lit.id = self.shift(lit.id),
	110	},
	111	tt::TokenTree::Subtree(tt) => {
	112	if let Some(it) = tt.delimiter.as_mut() {
	113	it.id = self.shift(it.id);
	114	};
	115	self.shift_all(tt)
	116	}
	117	}
	118	}
	119	}
	120
	121	fn shift(self, id: tt::TokenId) -> tt::TokenId {
	122	if id == tt::TokenId::unspecified() {
	123	return id;
	124	}
	125	tt::TokenId(id.0 + self.0)
	126	}
	127
	128	fn unshift(self, id: tt::TokenId) -> Option<tt::TokenId> {
	129	id.0.checked_sub(self.0).map(tt::TokenId)
	130	}
	131	}
	132
	133	#[derive(Debug, Eq, PartialEq)]
	134	pub enum Origin {
	135	Def,
	136	Call,
	137	}
	138
	139	impl MacroRules {
	140	pub fn parse(tt: &tt::Subtree) -> Result<MacroRules, ParseError> {
	141	// Note: this parsing can be implemented using mbe machinery itself, by
	142	// matching against `$($lhs:tt => $rhs:tt);*` pattern, but implementing
	143	// manually seems easier.
	144	let mut src = TtIter::new(tt);
	145	let mut rules = Vec::new();
	146	while src.len() > 0 {
	147	let rule = Rule::parse(&mut src)?;
	148	rules.push(rule);
	149	if let Err(()) = src.expect_char(';') {
	150	if src.len() > 0 {
	151	return Err(ParseError::Expected("expected `:`".to_string()));
	152	}
	153	break;
	154	}
	155	}
	156
	157	for rule in rules.iter() {
	158	validate(&rule.lhs)?;
	159	}
	160
	161	Ok(MacroRules { rules, shift: Shift::new(tt) })
	162	}
	163
	164	pub fn expand(&self, tt: &tt::Subtree) -> ExpandResult<tt::Subtree> {
	165	// apply shift
	166	let mut tt = tt.clone();
	167	self.shift.shift_all(&mut tt);
	168	mbe_expander::expand(self, &tt)
	169	}
	170
	171	pub fn map_id_down(&self, id: tt::TokenId) -> tt::TokenId {
	172	self.shift.shift(id)
	173	}
	174
	175	pub fn map_id_up(&self, id: tt::TokenId) -> (tt::TokenId, Origin) {
	176	match self.shift.unshift(id) {
	177	Some(id) => (id, Origin::Call),
	178	None => (id, Origin::Def),
	179	}
	180	}
	181	}
	182
	183	impl Rule {
	184	fn parse(src: &mut TtIter) -> Result<Rule, ParseError> {
	185	let mut lhs = src
	186	.expect_subtree()
	187	.map_err(\|()\| ParseError::Expected("expected subtree".to_string()))?
	188	.clone();
	189	lhs.delimiter = None;
	190	src.expect_char('=').map_err(\|()\| ParseError::Expected("expected `=`".to_string()))?;
	191	src.expect_char('>').map_err(\|()\| ParseError::Expected("expected `>`".to_string()))?;
	192	let mut rhs = src
	193	.expect_subtree()
	194	.map_err(\|()\| ParseError::Expected("expected subtree".to_string()))?
	195	.clone();
	196	rhs.delimiter = None;
	197	Ok(crate::Rule { lhs, rhs })
	198	}
	199	}
	200
	201	fn to_parse_error(e: ExpandError) -> ParseError {
	202	let msg = match e {
	203	ExpandError::InvalidRepeat => "invalid repeat".to_string(),
	204	_ => "invalid macro definition".to_string(),
	205	};
	206	ParseError::Expected(msg)
	207	}
	208
	209	fn validate(pattern: &tt::Subtree) -> Result<(), ParseError> {
	210	for op in parse_pattern(pattern) {
	211	let op = op.map_err(to_parse_error)?;
	212
	213	match op {
	214	Op::TokenTree(tt::TokenTree::Subtree(subtree)) => validate(subtree)?,
	215	Op::Repeat { subtree, separator, .. } => {
	216	// Checks that no repetition which could match an empty token
	217	// https://github.com/rust-lang/rust/blob/a58b1ed44f5e06976de2bdc4d7dc81c36a96934f/src/librustc_expand/mbe/macro_rules.rs#L558
	218
	219	if separator.is_none() {
	220	if parse_pattern(subtree).all(\|child_op\| {
	221	match child_op.map_err(to_parse_error) {
	222	Ok(Op::Var { kind, .. }) => {
	223	// vis is optional
	224	if kind.map_or(false, \|it\| it == "vis") {
	225	return true;
	226	}
	227	}
	228	Ok(Op::Repeat { kind, .. }) => {
	229	return matches!(
	230	kind,
	231	parser::RepeatKind::ZeroOrMore \| parser::RepeatKind::ZeroOrOne
	232	)
	233	}
	234	_ => {}
	235	}
	236	false
	237	}) {
	238	return Err(ParseError::RepetitionEmtpyTokenTree);
	239	}
	240	}
	241	validate(subtree)?
	242	}
	243	_ => (),
	244	}
	245	}
	246	Ok(())
	247	}
	248
	249	#[derive(Debug)]
	250	pub struct ExpandResult<T>(pub T, pub Option<ExpandError>);
	251
	252	impl<T> ExpandResult<T> {
	253	pub fn ok(t: T) -> ExpandResult<T> {
	254	ExpandResult(t, None)
	255	}
	256
	257	pub fn only_err(err: ExpandError) -> ExpandResult<T>
	258	where
	259	T: Default,
	260	{
	261	ExpandResult(Default::default(), Some(err))
	262	}
	263
	264	pub fn map<U>(self, f: impl FnOnce(T) -> U) -> ExpandResult<U> {
	265	ExpandResult(f(self.0), self.1)
	266	}
	267
	268	pub fn result(self) -> Result<T, ExpandError> {
	269	self.1.map(Err).unwrap_or(Ok(self.0))
	270	}
	271	}
	272
	273	impl<T: Default> From<Result<T, ExpandError>> for ExpandResult<T> {
	274	fn from(result: Result<T, ExpandError>) -> ExpandResult<T> {
	275	result
	276	.map_or_else(\|e\| ExpandResult(Default::default(), Some(e)), \|it\| ExpandResult(it, None))
	277	}
	278	}