aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_mbe
diff options
context:
space:
mode:
authorbors[bot] <bors[bot]@users.noreply.github.com>2019-02-24 10:15:43 +0000
committerbors[bot] <bors[bot]@users.noreply.github.com>2019-02-24 10:15:43 +0000
commitf6f160391db945a0dcc2f73b38926d6919f7c566 (patch)
tree060450b70c09357615f261d8acd032a647615dd7 /crates/ra_mbe
parentc5e74cebdcbade069c0e1e81e298ab7d729e4cd5 (diff)
parent81bca78349afb9e15994f46401da0cfabfba04a1 (diff)
Merge #885
885: Parse token trees directy r=matklad a=matklad This takes advantage of the recent macro refactoring to directly parse token stream into a syntax tree. Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_mbe')
-rw-r--r--crates/ra_mbe/Cargo.toml1
-rw-r--r--crates/ra_mbe/src/lib.rs63
-rw-r--r--crates/ra_mbe/src/syntax_bridge.rs147
3 files changed, 207 insertions, 4 deletions
diff --git a/crates/ra_mbe/Cargo.toml b/crates/ra_mbe/Cargo.toml
index e7b8660e7..6e785f570 100644
--- a/crates/ra_mbe/Cargo.toml
+++ b/crates/ra_mbe/Cargo.toml
@@ -6,6 +6,7 @@ authors = ["rust-analyzer developers"]
6 6
7[dependencies] 7[dependencies]
8ra_syntax = { path = "../ra_syntax" } 8ra_syntax = { path = "../ra_syntax" }
9ra_parser = { path = "../ra_parser" }
9tt = { path = "../ra_tt", package = "ra_tt" } 10tt = { path = "../ra_tt", package = "ra_tt" }
10 11
11rustc-hash = "1.0.0" 12rustc-hash = "1.0.0"
diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs
index cdca3cafb..907402f5f 100644
--- a/crates/ra_mbe/src/lib.rs
+++ b/crates/ra_mbe/src/lib.rs
@@ -24,7 +24,7 @@ use ra_syntax::SmolStr;
24 24
25pub use tt::{Delimiter, Punct}; 25pub use tt::{Delimiter, Punct};
26 26
27pub use crate::syntax_bridge::ast_to_token_tree; 27pub use crate::syntax_bridge::{ast_to_token_tree, token_tree_to_ast_item_list};
28 28
29/// This struct contains AST for a single `macro_rules` definition. What might 29/// This struct contains AST for a single `macro_rules` definition. What might
30/// be very confusing is that AST has almost exactly the same shape as 30/// be very confusing is that AST has almost exactly the same shape as
@@ -164,14 +164,18 @@ impl_froms!(TokenTree: Leaf, Subtree);
164 crate::MacroRules::parse(&definition_tt).unwrap() 164 crate::MacroRules::parse(&definition_tt).unwrap()
165 } 165 }
166 166
167 fn assert_expansion(rules: &MacroRules, invocation: &str, expansion: &str) { 167 fn expand(rules: &MacroRules, invocation: &str) -> tt::Subtree {
168 let source_file = ast::SourceFile::parse(invocation); 168 let source_file = ast::SourceFile::parse(invocation);
169 let macro_invocation = 169 let macro_invocation =
170 source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap(); 170 source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap();
171 171
172 let (invocation_tt, _) = ast_to_token_tree(macro_invocation.token_tree().unwrap()).unwrap(); 172 let (invocation_tt, _) = ast_to_token_tree(macro_invocation.token_tree().unwrap()).unwrap();
173 173
174 let expanded = rules.expand(&invocation_tt).unwrap(); 174 rules.expand(&invocation_tt).unwrap()
175 }
176
177 fn assert_expansion(rules: &MacroRules, invocation: &str, expansion: &str) {
178 let expanded = expand(rules, invocation);
175 assert_eq!(expanded.to_string(), expansion); 179 assert_eq!(expanded.to_string(), expansion);
176 } 180 }
177 181
@@ -268,4 +272,57 @@ impl_froms!(TokenTree: Leaf, Subtree);
268 assert_expansion(&rules, "foo! { Foo,# Bar }", "struct Foo ; struct Bar ;"); 272 assert_expansion(&rules, "foo! { Foo,# Bar }", "struct Foo ; struct Bar ;");
269 } 273 }
270 274
275 #[test]
276 fn expand_to_item_list() {
277 let rules = create_rules(
278 "
279 macro_rules! structs {
280 ($($i:ident),*) => {
281 $(struct $i { field: u32 } )*
282 }
283 }
284 ",
285 );
286 let expansion = expand(&rules, "structs!(Foo, Bar)");
287 let tree = token_tree_to_ast_item_list(&expansion);
288 assert_eq!(
289 tree.syntax().debug_dump().trim(),
290 r#"
291SOURCE_FILE@[0; 40)
292 STRUCT_DEF@[0; 20)
293 STRUCT_KW@[0; 6)
294 NAME@[6; 9)
295 IDENT@[6; 9) "Foo"
296 NAMED_FIELD_DEF_LIST@[9; 20)
297 L_CURLY@[9; 10)
298 NAMED_FIELD_DEF@[10; 19)
299 NAME@[10; 15)
300 IDENT@[10; 15) "field"
301 COLON@[15; 16)
302 PATH_TYPE@[16; 19)
303 PATH@[16; 19)
304 PATH_SEGMENT@[16; 19)
305 NAME_REF@[16; 19)
306 IDENT@[16; 19) "u32"
307 R_CURLY@[19; 20)
308 STRUCT_DEF@[20; 40)
309 STRUCT_KW@[20; 26)
310 NAME@[26; 29)
311 IDENT@[26; 29) "Bar"
312 NAMED_FIELD_DEF_LIST@[29; 40)
313 L_CURLY@[29; 30)
314 NAMED_FIELD_DEF@[30; 39)
315 NAME@[30; 35)
316 IDENT@[30; 35) "field"
317 COLON@[35; 36)
318 PATH_TYPE@[36; 39)
319 PATH@[36; 39)
320 PATH_SEGMENT@[36; 39)
321 NAME_REF@[36; 39)
322 IDENT@[36; 39) "u32"
323 R_CURLY@[39; 40)"#
324 .trim()
325 );
326 }
327
271} 328}
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs
index 848c785f8..c1472bbe5 100644
--- a/crates/ra_mbe/src/syntax_bridge.rs
+++ b/crates/ra_mbe/src/syntax_bridge.rs
@@ -1,5 +1,6 @@
1use ra_parser::{TokenSource, TreeSink, ParseError};
1use ra_syntax::{ 2use ra_syntax::{
2 AstNode, SyntaxNode, TextRange, 3 AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc,
3 ast, SyntaxKind::*, TextUnit 4 ast, SyntaxKind::*, TextUnit
4}; 5};
5 6
@@ -19,6 +20,15 @@ pub fn ast_to_token_tree(ast: &ast::TokenTree) -> Option<(tt::Subtree, TokenMap)
19 Some((tt, token_map)) 20 Some((tt, token_map))
20} 21}
21 22
23/// Parses the token tree (result of macro expansion) as a sequence of items
24pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc<ast::SourceFile> {
25 let token_source = TtTokenSource::new(tt);
26 let mut tree_sink = TtTreeSink::new(&token_source.tokens);
27 ra_parser::parse(&token_source, &mut tree_sink);
28 let syntax = tree_sink.inner.finish();
29 ast::SourceFile::cast(&syntax).unwrap().to_owned()
30}
31
22impl TokenMap { 32impl TokenMap {
23 pub fn relative_range_of(&self, tt: tt::TokenId) -> Option<TextRange> { 33 pub fn relative_range_of(&self, tt: tt::TokenId) -> Option<TextRange> {
24 let idx = tt.0 as usize; 34 let idx = tt.0 as usize;
@@ -84,3 +94,138 @@ fn convert_tt(
84 let res = tt::Subtree { delimiter, token_trees }; 94 let res = tt::Subtree { delimiter, token_trees };
85 Some(res) 95 Some(res)
86} 96}
97
98struct TtTokenSource {
99 tokens: Vec<TtToken>,
100}
101
102struct TtToken {
103 kind: SyntaxKind,
104 is_joint_to_next: bool,
105 text: SmolStr,
106}
107
108impl TtTokenSource {
109 fn new(tt: &tt::Subtree) -> TtTokenSource {
110 let mut res = TtTokenSource { tokens: Vec::new() };
111 res.convert_subtree(tt);
112 res
113 }
114 fn convert_subtree(&mut self, sub: &tt::Subtree) {
115 self.push_delim(sub.delimiter, false);
116 sub.token_trees.iter().for_each(|tt| self.convert_tt(tt));
117 self.push_delim(sub.delimiter, true)
118 }
119 fn convert_tt(&mut self, tt: &tt::TokenTree) {
120 match tt {
121 tt::TokenTree::Leaf(leaf) => self.convert_leaf(leaf),
122 tt::TokenTree::Subtree(sub) => self.convert_subtree(sub),
123 }
124 }
125 fn convert_leaf(&mut self, leaf: &tt::Leaf) {
126 let tok = match leaf {
127 tt::Leaf::Literal(l) => TtToken {
128 kind: SyntaxKind::INT_NUMBER, // FIXME
129 is_joint_to_next: false,
130 text: l.text.clone(),
131 },
132 tt::Leaf::Punct(p) => {
133 let kind = match p.char {
134 // lexer may produce combpund tokens for these ones
135 '.' => DOT,
136 ':' => COLON,
137 '=' => EQ,
138 '!' => EXCL,
139 '-' => MINUS,
140 c => SyntaxKind::from_char(c).unwrap(),
141 };
142 let text = {
143 let mut buf = [0u8; 4];
144 let s: &str = p.char.encode_utf8(&mut buf);
145 SmolStr::new(s)
146 };
147 TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text }
148 }
149 tt::Leaf::Ident(ident) => {
150 let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT);
151 TtToken { kind, is_joint_to_next: false, text: ident.text.clone() }
152 }
153 };
154 self.tokens.push(tok)
155 }
156 fn push_delim(&mut self, d: tt::Delimiter, closing: bool) {
157 let (kinds, texts) = match d {
158 tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"),
159 tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"),
160 tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"),
161 tt::Delimiter::None => return,
162 };
163 let idx = closing as usize;
164 let kind = kinds[idx];
165 let text = &texts[idx..texts.len() - (1 - idx)];
166 let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) };
167 self.tokens.push(tok)
168 }
169}
170
171impl TokenSource for TtTokenSource {
172 fn token_kind(&self, pos: usize) -> SyntaxKind {
173 if let Some(tok) = self.tokens.get(pos) {
174 tok.kind
175 } else {
176 SyntaxKind::EOF
177 }
178 }
179 fn is_token_joint_to_next(&self, pos: usize) -> bool {
180 self.tokens[pos].is_joint_to_next
181 }
182 fn is_keyword(&self, pos: usize, kw: &str) -> bool {
183 self.tokens[pos].text == *kw
184 }
185}
186
187#[derive(Default)]
188struct TtTreeSink<'a> {
189 buf: String,
190 tokens: &'a [TtToken],
191 text_pos: TextUnit,
192 token_pos: usize,
193 inner: SyntaxTreeBuilder,
194}
195
196impl<'a> TtTreeSink<'a> {
197 fn new(tokens: &'a [TtToken]) -> TtTreeSink {
198 TtTreeSink {
199 buf: String::new(),
200 tokens,
201 text_pos: 0.into(),
202 token_pos: 0,
203 inner: SyntaxTreeBuilder::default(),
204 }
205 }
206}
207
208impl<'a> TreeSink for TtTreeSink<'a> {
209 fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) {
210 for _ in 0..n_tokens {
211 self.buf += self.tokens[self.token_pos].text.as_str();
212 self.token_pos += 1;
213 }
214 self.text_pos += TextUnit::of_str(&self.buf);
215 let text = SmolStr::new(self.buf.as_str());
216 self.buf.clear();
217 self.inner.leaf(kind, text)
218 }
219
220 fn start_branch(&mut self, kind: SyntaxKind) {
221 self.inner.start_branch(kind);
222 }
223
224 fn finish_branch(&mut self) {
225 self.inner.finish_branch();
226 }
227
228 fn error(&mut self, error: ParseError) {
229 self.inner.error(error, self.text_pos)
230 }
231}