diff options
author | Edwin Cheng <[email protected]> | 2019-04-06 13:14:28 +0100 |
---|---|---|
committer | Edwin Cheng <[email protected]> | 2019-04-06 13:14:28 +0100 |
commit | aac9dfa46418603940ab2333cfea2190d9464d9e (patch) | |
tree | 83bc019e9961703ba1025c647a92ba4cd8e394af | |
parent | 1d7735fbc6795c3ea5f02950b47413e0b35d6677 (diff) |
Add TtCursorTokenSource and TtCursorTokenSink
-rw-r--r-- | crates/ra_mbe/src/syntax_bridge.rs | 58 | ||||
-rw-r--r-- | crates/ra_mbe/src/tt_cursor.rs | 170 | ||||
-rw-r--r-- | crates/ra_parser/src/grammar.rs | 4 | ||||
-rw-r--r-- | crates/ra_parser/src/lib.rs | 8 |
4 files changed, 216 insertions, 24 deletions
diff --git a/crates/ra_mbe/src/syntax_bridge.rs b/crates/ra_mbe/src/syntax_bridge.rs index 139a0fd33..3a0702a30 100644 --- a/crates/ra_mbe/src/syntax_bridge.rs +++ b/crates/ra_mbe/src/syntax_bridge.rs | |||
@@ -104,15 +104,16 @@ fn convert_tt( | |||
104 | } | 104 | } |
105 | 105 | ||
106 | #[derive(Debug)] | 106 | #[derive(Debug)] |
107 | struct TtTokenSource { | 107 | pub(crate) struct TtTokenSource { |
108 | tokens: Vec<TtToken>, | 108 | pub tokens: Vec<TtToken>, |
109 | } | 109 | } |
110 | 110 | ||
111 | #[derive(Debug)] | 111 | #[derive(Debug)] |
112 | struct TtToken { | 112 | pub(crate) struct TtToken { |
113 | kind: SyntaxKind, | 113 | pub kind: SyntaxKind, |
114 | is_joint_to_next: bool, | 114 | pub is_joint_to_next: bool, |
115 | text: SmolStr, | 115 | pub text: SmolStr, |
116 | pub n_tokens: usize, | ||
116 | } | 117 | } |
117 | 118 | ||
118 | // Some helper functions | 119 | // Some helper functions |
@@ -123,7 +124,7 @@ fn to_punct(tt: &tt::TokenTree) -> Option<&tt::Punct> { | |||
123 | None | 124 | None |
124 | } | 125 | } |
125 | 126 | ||
126 | struct TokenPeek<'a, I> | 127 | pub(crate) struct TokenPeek<'a, I> |
127 | where | 128 | where |
128 | I: Iterator<Item = &'a tt::TokenTree>, | 129 | I: Iterator<Item = &'a tt::TokenTree>, |
129 | { | 130 | { |
@@ -134,7 +135,11 @@ impl<'a, I> TokenPeek<'a, I> | |||
134 | where | 135 | where |
135 | I: Iterator<Item = &'a tt::TokenTree>, | 136 | I: Iterator<Item = &'a tt::TokenTree>, |
136 | { | 137 | { |
137 | fn next(&mut self) -> Option<&tt::TokenTree> { | 138 | pub fn new(iter: I) -> Self { |
139 | TokenPeek { iter: itertools::multipeek(iter) } | ||
140 | } | ||
141 | |||
142 | pub fn next(&mut self) -> Option<&tt::TokenTree> { | ||
138 | self.iter.next() | 143 | self.iter.next() |
139 | } | 144 | } |
140 | 145 | ||
@@ -161,14 +166,14 @@ where | |||
161 | } | 166 | } |
162 | 167 | ||
163 | impl TtTokenSource { | 168 | impl TtTokenSource { |
164 | fn new(tt: &tt::Subtree) -> TtTokenSource { | 169 | pub fn new(tt: &tt::Subtree) -> TtTokenSource { |
165 | let mut res = TtTokenSource { tokens: Vec::new() }; | 170 | let mut res = TtTokenSource { tokens: Vec::new() }; |
166 | res.convert_subtree(tt); | 171 | res.convert_subtree(tt); |
167 | res | 172 | res |
168 | } | 173 | } |
169 | fn convert_subtree(&mut self, sub: &tt::Subtree) { | 174 | fn convert_subtree(&mut self, sub: &tt::Subtree) { |
170 | self.push_delim(sub.delimiter, false); | 175 | self.push_delim(sub.delimiter, false); |
171 | let mut peek = TokenPeek { iter: itertools::multipeek(sub.token_trees.iter()) }; | 176 | let mut peek = TokenPeek::new(sub.token_trees.iter()); |
172 | while let Some(tt) = peek.iter.next() { | 177 | while let Some(tt) = peek.iter.next() { |
173 | self.convert_tt(tt, &mut peek); | 178 | self.convert_tt(tt, &mut peek); |
174 | } | 179 | } |
@@ -194,10 +199,17 @@ impl TtTokenSource { | |||
194 | kind: classify_literal(&l.text).unwrap().kind, | 199 | kind: classify_literal(&l.text).unwrap().kind, |
195 | is_joint_to_next: false, | 200 | is_joint_to_next: false, |
196 | text: l.text.clone(), | 201 | text: l.text.clone(), |
202 | n_tokens: 1, | ||
197 | }, | 203 | }, |
198 | tt::Leaf::Punct(p) => { | 204 | tt::Leaf::Punct(p) => { |
199 | if let Some(tt) = Self::convert_multi_char_punct(p, iter) { | 205 | if let Some((kind, is_joint_to_next, text, size)) = |
200 | tt | 206 | Self::convert_multi_char_punct(p, iter) |
207 | { | ||
208 | for _ in 0..size - 1 { | ||
209 | iter.next(); | ||
210 | } | ||
211 | |||
212 | TtToken { kind, is_joint_to_next, text: text.into(), n_tokens: size } | ||
201 | } else { | 213 | } else { |
202 | let kind = match p.char { | 214 | let kind = match p.char { |
203 | // lexer may produce combpund tokens for these ones | 215 | // lexer may produce combpund tokens for these ones |
@@ -213,21 +225,26 @@ impl TtTokenSource { | |||
213 | let s: &str = p.char.encode_utf8(&mut buf); | 225 | let s: &str = p.char.encode_utf8(&mut buf); |
214 | SmolStr::new(s) | 226 | SmolStr::new(s) |
215 | }; | 227 | }; |
216 | TtToken { kind, is_joint_to_next: p.spacing == tt::Spacing::Joint, text } | 228 | TtToken { |
229 | kind, | ||
230 | is_joint_to_next: p.spacing == tt::Spacing::Joint, | ||
231 | text, | ||
232 | n_tokens: 1, | ||
233 | } | ||
217 | } | 234 | } |
218 | } | 235 | } |
219 | tt::Leaf::Ident(ident) => { | 236 | tt::Leaf::Ident(ident) => { |
220 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); | 237 | let kind = SyntaxKind::from_keyword(ident.text.as_str()).unwrap_or(IDENT); |
221 | TtToken { kind, is_joint_to_next: false, text: ident.text.clone() } | 238 | TtToken { kind, is_joint_to_next: false, text: ident.text.clone(), n_tokens: 1 } |
222 | } | 239 | } |
223 | }; | 240 | }; |
224 | self.tokens.push(tok) | 241 | self.tokens.push(tok) |
225 | } | 242 | } |
226 | 243 | ||
227 | fn convert_multi_char_punct<'a, I>( | 244 | pub(crate) fn convert_multi_char_punct<'a, I>( |
228 | p: &tt::Punct, | 245 | p: &tt::Punct, |
229 | iter: &mut TokenPeek<'a, I>, | 246 | iter: &mut TokenPeek<'a, I>, |
230 | ) -> Option<TtToken> | 247 | ) -> Option<(SyntaxKind, bool, &'static str, usize)> |
231 | where | 248 | where |
232 | I: Iterator<Item = &'a tt::TokenTree>, | 249 | I: Iterator<Item = &'a tt::TokenTree>, |
233 | { | 250 | { |
@@ -239,9 +256,7 @@ impl TtTokenSource { | |||
239 | ('.', '.', '=') => Some((DOTDOTEQ, "..=")), | 256 | ('.', '.', '=') => Some((DOTDOTEQ, "..=")), |
240 | _ => None, | 257 | _ => None, |
241 | } { | 258 | } { |
242 | iter.next(); | 259 | return Some((kind, is_joint_to_next, text, 3)); |
243 | iter.next(); | ||
244 | return Some(TtToken { kind, is_joint_to_next, text: text.into() }); | ||
245 | } | 260 | } |
246 | } | 261 | } |
247 | 262 | ||
@@ -273,8 +288,7 @@ impl TtTokenSource { | |||
273 | 288 | ||
274 | _ => None, | 289 | _ => None, |
275 | } { | 290 | } { |
276 | iter.next(); | 291 | return Some((kind, is_joint_to_next, text, 2)); |
277 | return Some(TtToken { kind, is_joint_to_next, text: text.into() }); | ||
278 | } | 292 | } |
279 | } | 293 | } |
280 | 294 | ||
@@ -291,7 +305,7 @@ impl TtTokenSource { | |||
291 | let idx = closing as usize; | 305 | let idx = closing as usize; |
292 | let kind = kinds[idx]; | 306 | let kind = kinds[idx]; |
293 | let text = &texts[idx..texts.len() - (1 - idx)]; | 307 | let text = &texts[idx..texts.len() - (1 - idx)]; |
294 | let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text) }; | 308 | let tok = TtToken { kind, is_joint_to_next: false, text: SmolStr::new(text), n_tokens: 1 }; |
295 | self.tokens.push(tok) | 309 | self.tokens.push(tok) |
296 | } | 310 | } |
297 | } | 311 | } |
diff --git a/crates/ra_mbe/src/tt_cursor.rs b/crates/ra_mbe/src/tt_cursor.rs index 6f619621a..6ac3ac187 100644 --- a/crates/ra_mbe/src/tt_cursor.rs +++ b/crates/ra_mbe/src/tt_cursor.rs | |||
@@ -1,4 +1,124 @@ | |||
1 | use crate::ParseError; | 1 | use crate::ParseError; |
2 | use crate::syntax_bridge::{TtTokenSource, TtToken, TokenPeek}; | ||
3 | use ra_parser::{TokenSource, TreeSink}; | ||
4 | |||
5 | use ra_syntax::{ | ||
6 | SyntaxKind | ||
7 | }; | ||
8 | |||
9 | struct TtCursorTokenSource { | ||
10 | tt_pos: usize, | ||
11 | inner: TtTokenSource, | ||
12 | } | ||
13 | |||
14 | impl TtCursorTokenSource { | ||
15 | fn new(subtree: &tt::Subtree, curr: usize) -> TtCursorTokenSource { | ||
16 | let mut res = TtCursorTokenSource { inner: TtTokenSource::new(subtree), tt_pos: 1 }; | ||
17 | |||
18 | // Matching `TtToken` cursor to `tt::TokenTree` cursor | ||
19 | // It is because TtToken is not One to One mapping to tt::Token | ||
20 | // There are 3 case (`TtToken` <=> `tt::TokenTree`) : | ||
21 | // * One to One => ident, single char punch | ||
22 | // * Many to One => `tt::TokenTree::SubTree` | ||
23 | // * One to Many => multibyte punct | ||
24 | // | ||
25 | // Such that we cannot simpliy advance the cursor | ||
26 | // We have to bump it one by one | ||
27 | let mut pos = 0; | ||
28 | while pos < curr { | ||
29 | pos += res.bump(&subtree.token_trees[pos]); | ||
30 | } | ||
31 | |||
32 | res | ||
33 | } | ||
34 | |||
35 | fn skip_sibling_leaf(&self, leaf: &tt::Leaf, iter: &mut std::slice::Iter<tt::TokenTree>) { | ||
36 | if let tt::Leaf::Punct(p) = leaf { | ||
37 | let mut peek = TokenPeek::new(iter); | ||
38 | if let Some((_, _, _, size)) = TtTokenSource::convert_multi_char_punct(p, &mut peek) { | ||
39 | for _ in 0..size - 1 { | ||
40 | peek.next(); | ||
41 | } | ||
42 | } | ||
43 | } | ||
44 | } | ||
45 | |||
46 | fn count_tt_tokens( | ||
47 | &self, | ||
48 | tt: &tt::TokenTree, | ||
49 | iter: Option<&mut std::slice::Iter<tt::TokenTree>>, | ||
50 | ) -> usize { | ||
51 | assert!(!self.inner.tokens.is_empty()); | ||
52 | |||
53 | match tt { | ||
54 | tt::TokenTree::Subtree(sub_tree) => { | ||
55 | let mut iter = sub_tree.token_trees.iter(); | ||
56 | let mut count = match sub_tree.delimiter { | ||
57 | tt::Delimiter::None => 0, | ||
58 | _ => 2, | ||
59 | }; | ||
60 | |||
61 | while let Some(tt) = iter.next() { | ||
62 | count += self.count_tt_tokens(&tt, Some(&mut iter)); | ||
63 | } | ||
64 | count | ||
65 | } | ||
66 | |||
67 | tt::TokenTree::Leaf(leaf) => { | ||
68 | iter.map(|iter| { | ||
69 | self.skip_sibling_leaf(leaf, iter); | ||
70 | }); | ||
71 | |||
72 | 1 | ||
73 | } | ||
74 | } | ||
75 | } | ||
76 | |||
77 | fn count(&self, tt: &tt::TokenTree) -> usize { | ||
78 | self.count_tt_tokens(tt, None) | ||
79 | } | ||
80 | |||
81 | fn bump(&mut self, tt: &tt::TokenTree) -> usize { | ||
82 | let cur = self.current().unwrap(); | ||
83 | let n_tokens = cur.n_tokens; | ||
84 | self.tt_pos += self.count(tt); | ||
85 | n_tokens | ||
86 | } | ||
87 | |||
88 | fn current(&self) -> Option<&TtToken> { | ||
89 | self.inner.tokens.get(self.tt_pos) | ||
90 | } | ||
91 | } | ||
92 | |||
93 | impl TokenSource for TtCursorTokenSource { | ||
94 | fn token_kind(&self, pos: usize) -> SyntaxKind { | ||
95 | if let Some(tok) = self.inner.tokens.get(self.tt_pos + pos) { | ||
96 | tok.kind | ||
97 | } else { | ||
98 | SyntaxKind::EOF | ||
99 | } | ||
100 | } | ||
101 | fn is_token_joint_to_next(&self, pos: usize) -> bool { | ||
102 | self.inner.tokens[self.tt_pos + pos].is_joint_to_next | ||
103 | } | ||
104 | fn is_keyword(&self, pos: usize, kw: &str) -> bool { | ||
105 | self.inner.tokens[self.tt_pos + pos].text == *kw | ||
106 | } | ||
107 | } | ||
108 | |||
109 | struct TtCursorTokenSink { | ||
110 | token_pos: usize, | ||
111 | } | ||
112 | |||
113 | impl TreeSink for TtCursorTokenSink { | ||
114 | fn token(&mut self, _kind: SyntaxKind, n_tokens: u8) { | ||
115 | self.token_pos += n_tokens as usize; | ||
116 | } | ||
117 | |||
118 | fn start_node(&mut self, _kind: SyntaxKind) {} | ||
119 | fn finish_node(&mut self) {} | ||
120 | fn error(&mut self, _error: ra_parser::ParseError) {} | ||
121 | } | ||
2 | 122 | ||
3 | #[derive(Clone)] | 123 | #[derive(Clone)] |
4 | pub(crate) struct TtCursor<'a> { | 124 | pub(crate) struct TtCursor<'a> { |
@@ -78,8 +198,54 @@ impl<'a> TtCursor<'a> { | |||
78 | }) | 198 | }) |
79 | } | 199 | } |
80 | 200 | ||
81 | pub(crate) fn eat_path(&mut self) -> Option<tt::Subtree> { | 201 | fn eat_parse_result( |
82 | None | 202 | &mut self, |
203 | parsed_token: usize, | ||
204 | src: &mut TtCursorTokenSource, | ||
205 | ) -> Option<tt::TokenTree> { | ||
206 | let mut res = vec![]; | ||
207 | |||
208 | // Matching `TtToken` cursor to `tt::TokenTree` cursor | ||
209 | // It is because TtToken is not One to One mapping to tt::Token | ||
210 | // There are 3 case (`TtToken` <=> `tt::TokenTree`) : | ||
211 | // * One to One => ident, single char punch | ||
212 | // * Many to One => `tt::TokenTree::SubTree` | ||
213 | // * One to Many => multibyte punct | ||
214 | // | ||
215 | // Such that we cannot simpliy advance the cursor | ||
216 | // We have to bump it one by one | ||
217 | let next_pos = src.tt_pos + parsed_token; | ||
218 | while src.tt_pos < next_pos { | ||
219 | let n = src.bump(self.current().unwrap()); | ||
220 | res.extend((0..n).map(|_| self.eat().unwrap())); | ||
221 | } | ||
222 | |||
223 | let res: Vec<_> = res.into_iter().cloned().collect(); | ||
224 | |||
225 | match res.len() { | ||
226 | 0 => None, | ||
227 | 1 => Some(res[0].clone()), | ||
228 | _ => Some(tt::TokenTree::Subtree(tt::Subtree { | ||
229 | delimiter: tt::Delimiter::None, | ||
230 | token_trees: res, | ||
231 | })), | ||
232 | } | ||
233 | } | ||
234 | |||
235 | fn eat_parse<F>(&mut self, f: F) -> Option<tt::TokenTree> | ||
236 | where | ||
237 | F: FnOnce(&dyn TokenSource, &mut dyn TreeSink), | ||
238 | { | ||
239 | let mut src = TtCursorTokenSource::new(self.subtree, self.pos); | ||
240 | let mut sink = TtCursorTokenSink { token_pos: 0 }; | ||
241 | |||
242 | f(&src, &mut sink); | ||
243 | |||
244 | self.eat_parse_result(sink.token_pos, &mut src) | ||
245 | } | ||
246 | |||
247 | pub(crate) fn eat_path(&mut self) -> Option<tt::TokenTree> { | ||
248 | self.eat_parse(ra_parser::parse_path) | ||
83 | } | 249 | } |
84 | 250 | ||
85 | pub(crate) fn expect_char(&mut self, char: char) -> Result<(), ParseError> { | 251 | pub(crate) fn expect_char(&mut self, char: char) -> Result<(), ParseError> { |
diff --git a/crates/ra_parser/src/grammar.rs b/crates/ra_parser/src/grammar.rs index b2ffeff8c..c5f510e6b 100644 --- a/crates/ra_parser/src/grammar.rs +++ b/crates/ra_parser/src/grammar.rs | |||
@@ -49,6 +49,10 @@ pub(crate) fn root(p: &mut Parser) { | |||
49 | m.complete(p, SOURCE_FILE); | 49 | m.complete(p, SOURCE_FILE); |
50 | } | 50 | } |
51 | 51 | ||
52 | pub(crate) fn path(p: &mut Parser) { | ||
53 | paths::type_path(p); | ||
54 | } | ||
55 | |||
52 | pub(crate) fn reparser( | 56 | pub(crate) fn reparser( |
53 | node: SyntaxKind, | 57 | node: SyntaxKind, |
54 | first_child: Option<SyntaxKind>, | 58 | first_child: Option<SyntaxKind>, |
diff --git a/crates/ra_parser/src/lib.rs b/crates/ra_parser/src/lib.rs index 30ba06aac..3ceeeebd7 100644 --- a/crates/ra_parser/src/lib.rs +++ b/crates/ra_parser/src/lib.rs | |||
@@ -61,6 +61,14 @@ pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | |||
61 | event::process(tree_sink, events); | 61 | event::process(tree_sink, events); |
62 | } | 62 | } |
63 | 63 | ||
64 | /// Parse given tokens into the given sink as a path | ||
65 | pub fn parse_path(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | ||
66 | let mut p = parser::Parser::new(token_source); | ||
67 | grammar::path(&mut p); | ||
68 | let events = p.finish(); | ||
69 | event::process(tree_sink, events); | ||
70 | } | ||
71 | |||
64 | /// A parsing function for a specific braced-block. | 72 | /// A parsing function for a specific braced-block. |
65 | pub struct Reparser(fn(&mut parser::Parser)); | 73 | pub struct Reparser(fn(&mut parser::Parser)); |
66 | 74 | ||