diff options
author | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-02-24 10:15:43 +0000 |
---|---|---|
committer | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-02-24 10:15:43 +0000 |
commit | f6f160391db945a0dcc2f73b38926d6919f7c566 (patch) | |
tree | 060450b70c09357615f261d8acd032a647615dd7 /crates/ra_mbe | |
parent | c5e74cebdcbade069c0e1e81e298ab7d729e4cd5 (diff) | |
parent | 81bca78349afb9e15994f46401da0cfabfba04a1 (diff) |
Merge #885
885: Parse token trees directy r=matklad a=matklad
This takes advantage of the recent macro refactoring to directly parse token stream into a syntax tree.
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_mbe')
-rw-r--r-- | crates/ra_mbe/Cargo.toml | 1 | ||||
-rw-r--r-- | crates/ra_mbe/src/lib.rs | 63 | ||||
-rw-r--r-- | crates/ra_mbe/src/syntax_bridge.rs | 147 |
3 files changed, 207 insertions, 4 deletions
diff --git a/crates/ra_mbe/Cargo.toml b/crates/ra_mbe/Cargo.toml index e7b8660e7..6e785f570 100644 --- a/crates/ra_mbe/Cargo.toml +++ b/crates/ra_mbe/Cargo.toml | |||
@@ -6,6 +6,7 @@ authors = ["rust-analyzer developers"] | |||
6 | 6 | ||
7 | [dependencies] | 7 | [dependencies] |
8 | ra_syntax = { path = "../ra_syntax" } | 8 | ra_syntax = { path = "../ra_syntax" } |
9 | ra_parser = { path = "../ra_parser" } | ||
9 | tt = { path = "../ra_tt", package = "ra_tt" } | 10 | tt = { path = "../ra_tt", package = "ra_tt" } |
10 | 11 | ||
11 | rustc-hash = "1.0.0" | 12 | rustc-hash = "1.0.0" |
diff --git a/crates/ra_mbe/src/lib.rs b/crates/ra_mbe/src/lib.rs index cdca3cafb..907402f5f 100644 --- a/crates/ra_mbe/src/lib.rs +++ b/crates/ra_mbe/src/lib.rs | |||
@@ -24,7 +24,7 @@ use ra_syntax::SmolStr; | |||
24 | 24 | ||
25 | pub use tt::{Delimiter, Punct}; | 25 | pub use tt::{Delimiter, Punct}; |
26 | 26 | ||
27 | pub use crate::syntax_bridge::ast_to_token_tree; | 27 | pub use crate::syntax_bridge::{ast_to_token_tree, token_tree_to_ast_item_list}; |
28 | 28 | ||
29 | /// This struct contains AST for a single `macro_rules` definition. What might | 29 | /// This struct contains AST for a single `macro_rules` definition. What might |
30 | /// be very confusing is that AST has almost exactly the same shape as | 30 | /// be very confusing is that AST has almost exactly the same shape as |
@@ -164,14 +164,18 @@ impl_froms!(TokenTree: Leaf, Subtree); | |||
164 | crate::MacroRules::parse(&definition_tt).unwrap() | 164 | crate::MacroRules::parse(&definition_tt).unwrap() |
165 | } | 165 | } |
166 | 166 | ||
167 | fn assert_expansion(rules: &MacroRules, invocation: &str, expansion: &str) { | 167 | fn expand(rules: &MacroRules, invocation: &str) -> tt::Subtree { |
168 | let source_file = ast::SourceFile::parse(invocation); | 168 | let source_file = ast::SourceFile::parse(invocation); |
169 | let macro_invocation = | 169 | let macro_invocation = |
170 | source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap(); | 170 | source_file.syntax().descendants().find_map(ast::MacroCall::cast).unwrap(); |
171 | 171 | ||
172 | let (invocation_tt, _) = ast_to_token_tree(macro_invocation.token_tree().unwrap()).unwrap(); | 172 | let (invocation_tt, _) = ast_to_token_tree(macro_invocation.token_tree().unwrap()).unwrap(); |
173 | 173 | ||
174 | let expanded = rules.expand(&invocation_tt).unwrap(); | 174 | rules.expand(&invocation_tt).unwrap() |
175 | } | ||
176 | |||
177 | fn assert_expansion(rules: &MacroRules, invocation: &str, expansion: &str) { | ||
178 | let expanded = expand(rules, invocation); | ||
175 | assert_eq!(expanded.to_string(), expansion); | 179 | assert_eq!(expanded.to_string(), expansion); |
176 | } | 180 | } |
177 | 181 | ||
@@ -268,4 +272,57 @@ impl_froms!(TokenTree: Leaf, Subtree); | |||
268 | assert_expansion(&rules, "foo! { Foo,# Bar }", "struct Foo ; struct Bar ;"); | 272 | assert_expansion(&rules, "foo! { Foo,# Bar }", "struct Foo ; struct Bar ;"); |
269 | } | 273 | } |
270 | 274 | ||
275 | #[test] | ||
276 | fn expand_to_item_list() { | ||
277 | let rules = create_rules( | ||
278 | " | ||
279 | macro_rules! structs { | ||
280 | ($($i:ident),*) => { | ||
281 | $(struct $i { field: u32 } )* | ||
282 | } | ||
283 | } | ||
284 | ", | ||
285 | ); | ||
286 | let expansion = expand(&rules, "structs!(Foo, Bar)"); | ||
287 | let tree = token_tree_to_ast_item_list(&expansion); | ||
288 | assert_eq!( | ||
289 | tree.syntax().debug_dump().trim(), | ||
290 | r#" | ||
291 | SOURCE_FILE@[0; 40) | ||
292 | STRUCT_DEF@[0; 20) | ||
293 | STRUCT_KW@[0; 6) | ||
294 | NAME@[6; 9) | ||
295 | IDENT@[6; 9) "Foo" | ||
296 | NAMED_FIELD_DEF_LIST@[9; 20) | ||
297 | L_CURLY@[9; 10) | ||
298 | NAMED_FIELD_DEF@[10; 19) | ||
299 | NAME@[10; 15) | ||
300 | IDENT@[10; 15) "field" | ||
301 | COLON@[15; 16) | ||
302 | PATH_TYPE@[16; 19) | ||
303 | PATH@[16; 19) | ||
304 | PATH_SEGMENT@[16; 19) | ||
305 | NAME_REF@[16; 19) | ||
306 | IDENT@[16; 19) "u32" | ||
307 | R_CURLY@[19; 20) | ||
308 | STRUCT_DEF@[20; 40) | ||
309 | STRUCT_KW@[20; 26) | ||
310 | NAME@[26; 29) | ||
311 | IDENT@[26; 29) "Bar" | ||
312 | NAMED_FIELD_DEF_LIST@[29; 40) | ||
313 | L_CURLY@[29; 30) | ||
314 | NAMED_FIELD_DEF@[30; 39) | ||
315 | NAME@[30; 35) | ||
316 | IDENT@[30; 35) "field" | ||
317 | COLON@[35; 36) | ||
318 | PATH_TYPE@[36; 39) | ||
319 | PATH@[36; 39) | ||
320 | PATH_SEGMENT@[36; 39) | ||
321 | NAME_REF@[36; 39) | ||
322 | IDENT@[36; 39) "u32" | ||
323 | R_CURLY@[39; 40)"# | ||
324 | .trim() | ||
325 | ); | ||
326 | } | ||
327 | |||
271 | } | 328 | } |
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 848c785f8..c1472bbe5 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs | |||
@@ -1,5 +1,6 @@ | |||
1 | use ra_parser::{TokenSource, TreeSink, ParseError}; | ||
1 | use ra_syntax::{ | 2 | use ra_syntax::{ |
2 | AstNode, SyntaxNode, TextRange, | 3 | AstNode, SyntaxNode, TextRange, SyntaxKind, SmolStr, SyntaxTreeBuilder, TreeArc, |
3 | ast, SyntaxKind::*, TextUnit | 4 | ast, SyntaxKind::*, TextUnit |
4 | }; | 5 | }; |
5 | 6 | ||
@@ -19,6 +20,15 @@ pub fn ast_to_token_tree(ast: &ast::TokenTree) -> Option<(tt::Subtree, TokenMap) | |||
19 | Some((tt, token_map)) | 20 | Some((tt, token_map)) |
20 | } | 21 | } |
21 | 22 | ||
23 | /// Parses the token tree (result of macro expansion) as a sequence of items | ||
24 | pub fn token_tree_to_ast_item_list(tt: &tt::Subtree) -> TreeArc<ast::SourceFile> { | ||
25 | let token_source = TtTokenSource::new(tt); | ||
26 | let mut tree_sink = TtTreeSink::new(&token_source.tokens); | ||
27 | ra_parser::parse(&token_source, &mut tree_sink); | ||
28 | let syntax = tree_sink.inner.finish(); | ||
29 | ast::SourceFile::cast(&syntax).unwrap().to_owned() | ||
30 | } | ||
31 | |||
22 | impl TokenMap { | 32 | impl TokenMap { |
23 | pub fn relative_range_of(&self, tt: tt::TokenId) -> Option<TextRange> { | 33 | pub fn relative_range_of(&self, tt: tt::TokenId) -> Option<TextRange> { |
24 | let idx = tt.0 as usize; | 34 | let idx = tt.0 as usize; |
@@ -84,3 +94,138 @@ fn convert_tt( | |||
84 | let res = tt::Subtree { delimiter, token_trees }; | 94 | let res = tt::Subtree { delimiter, token_trees }; |
85 | Some(res) | 95 | Some(res) |
86 | } | 96 | } |
97 | |||
98 | struct TtTokenSource { | ||
99 | tokens: Vec<TtToken>, | ||
100 | } | ||
101 | |||
102 | struct TtToken { | ||
103 | kind: SyntaxKind, | ||
104 | is_joint_to_next: bool, | ||
105 | text: SmolStr, | ||
106 | } | ||
107 | |||
108 | impl TtTokenSource { | ||
109 | fn new(tt: &tt::Subtree) -> TtTokenSource { | ||
110 | let mut res = TtTokenSource { tokens: Vec::new() }; | ||
111 | res.convert_subtree(tt); | ||
112 | res | ||
113 | } | ||
114 | fn convert_subtree(&mut self, sub: &tt::Subtree) { | ||
115 | self.push_delim(sub.delimiter, false); | ||
116 | sub.token_trees.iter().for_each(|tt| self.convert_tt(tt)); | ||
117 | self.push_delim(sub.delimiter, true) | ||
118 | } | ||
119 | fn convert_tt(&mut self, tt: &tt::TokenTree) { | ||
120 | match tt { | ||
121 | tt::TokenTree::Leaf(leaf) => self.convert_leaf(leaf), | ||
122 | tt::TokenTree::Subtree(sub) => self.convert_subtree(sub), | ||
123 | } | ||
124 | } | ||
125 | fn convert_leaf(&mut self, leaf: &tt::Leaf) { | ||
126 | let tok = match leaf { | ||
127 | tt::Leaf::Literal(l) => TtToken { | ||
128 | kind: SyntaxKind::INT_NUMBER, // FIXME | ||
129 | is_joint_to_next: false, | ||
130 | text: l.text.clone(), | ||
131 | }, | ||
132 | tt::Leaf::Punct(p) => { | ||
133 | let kind = match p.char { | ||
134 | // lexer may produce combpund tokens for these ones | ||
135 | '.' => DOT, | ||
136 | ':' => COLON, | ||
137 | '=' => EQ, | ||
138 | '!' => EXCL, | ||
139 | '-' => MINUS, | ||
140 | c => SyntaxKind::from_char(c).unwrap(), | ||
141 | }; | ||
142 | let text = { | ||
143 | let mut buf = [0u8; 4]; | ||
144 | let s: &str = p.char.encode_utf8(&mut buf); | ||
145 | SmolStr::new(s) | ||
146 | }; | ||
147 | TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } | ||
148 | } | ||
149 | tt::Leaf::Ident(ident) => { | ||
150 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); | ||
151 | TtToken { kind, is_joint_to_next: false, text: ident.text.clone() } | ||
152 | } | ||
153 | }; | ||
154 | self.tokens.push(tok) | ||
155 | } | ||
156 | fn push_delim(&mut self, d: tt::Delimiter, closing: bool) { | ||
157 | let (kinds, texts) = match d { | ||
158 | tt::Delimiter::Parenthesis => ([L_PAREN, R_PAREN], "()"), | ||
159 | tt::Delimiter::Brace => ([L_CURLY, R_CURLY], "{}"), | ||
160 | tt::Delimiter::Bracket => ([L_BRACK, R_BRACK], "[]"), | ||
161 | tt::Delimiter::None => return, | ||
162 | }; | ||
163 | let idx = closing as usize; | ||
164 | let kind = kinds[idx]; | ||
165 | let text = &texts[idx..texts.len() - (1 - idx)]; | ||
166 | let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) }; | ||
167 | self.tokens.push(tok) | ||
168 | } | ||
169 | } | ||
170 | |||
171 | impl TokenSource for TtTokenSource { | ||
172 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
173 | if let Some(tok) = self.tokens.get(pos) { | ||
174 | tok.kind | ||
175 | } else { | ||
176 | SyntaxKind::EOF | ||
177 | } | ||
178 | } | ||
179 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
180 | self.tokens[pos].is_joint_to_next | ||
181 | } | ||
182 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
183 | self.tokens[pos].text == *kw | ||
184 | } | ||
185 | } | ||
186 | |||
187 | #[derive(Default)] | ||
188 | struct TtTreeSink<'a> { | ||
189 | buf: String, | ||
190 | tokens: &'a [TtToken], | ||
191 | text_pos: TextUnit, | ||
192 | token_pos: usize, | ||
193 | inner: SyntaxTreeBuilder, | ||
194 | } | ||
195 | |||
196 | impl<'a> TtTreeSink<'a> { | ||
197 | fn new(tokens: &'a [TtToken]) -> TtTreeSink { | ||
198 | TtTreeSink { | ||
199 | buf: String::new(), | ||
200 | tokens, | ||
201 | text_pos: 0.into(), | ||
202 | token_pos: 0, | ||
203 | inner: SyntaxTreeBuilder::default(), | ||
204 | } | ||
205 | } | ||
206 | } | ||
207 | |||
208 | impl<'a> TreeSink for TtTreeSink<'a> { | ||
209 | fn leaf(&mut self, kind: SyntaxKind, n_tokens: u8) { | ||
210 | for _ in 0..n_tokens { | ||
211 | self.buf += self.tokens[self.token_pos].text.as_str(); | ||
212 | self.token_pos += 1; | ||
213 | } | ||
214 | self.text_pos += TextUnit::of_str(&self.buf); | ||
215 | let text = SmolStr::new(self.buf.as_str()); | ||
216 | self.buf.clear(); | ||
217 | self.inner.leaf(kind, text) | ||
218 | } | ||
219 | |||
220 | fn start_branch(&mut self, kind: SyntaxKind) { | ||
221 | self.inner.start_branch(kind); | ||
222 | } | ||
223 | |||
224 | fn finish_branch(&mut self) { | ||
225 | self.inner.finish_branch(); | ||
226 | } | ||
227 | |||
228 | fn error(&mut self, error: ParseError) { | ||
229 | self.inner.error(error, self.text_pos) | ||
230 | } | ||
231 | } | ||