diff options
Diffstat (limited to 'crates/ra_parser/src')
-rw-r--r-- | crates/ra_parser/src/lib.rs | 54 | ||||
-rw-r--r-- | crates/ra_parser/src/parser.rs | 57 |
2 files changed, 69 insertions, 42 deletions
diff --git a/crates/ra_parser/src/lib.rs b/crates/ra_parser/src/lib.rs index 697d1b794..3d88be642 100644 --- a/crates/ra_parser/src/lib.rs +++ b/crates/ra_parser/src/lib.rs | |||
@@ -31,12 +31,26 @@ pub struct ParseError(pub String); | |||
31 | /// | 31 | /// |
32 | /// Hopefully this will allow us to treat text and token trees in the same way! | 32 | /// Hopefully this will allow us to treat text and token trees in the same way! |
33 | pub trait TokenSource { | 33 | pub trait TokenSource { |
34 | fn current(&self) -> Token; | ||
35 | |||
36 | /// Lookahead n token | ||
37 | fn lookahead_nth(&self, n: usize) -> Token; | ||
38 | |||
39 | /// bump cursor to next token | ||
40 | fn bump(&mut self); | ||
41 | |||
42 | /// Is the current token a specified keyword? | ||
43 | fn is_keyword(&self, kw: &str) -> bool; | ||
44 | } | ||
45 | |||
46 | /// `TokenCursor` abstracts the cursor of `TokenSource` operates one. | ||
47 | #[derive(Debug, Copy, Clone, Eq, PartialEq)] | ||
48 | pub struct Token { | ||
34 | /// What is the current token? | 49 | /// What is the current token? |
35 | fn token_kind(&self, pos: usize) -> SyntaxKind; | 50 | pub kind: SyntaxKind, |
51 | |||
36 | /// Is the current token joined to the next one (`> >` vs `>>`). | 52 | /// Is the current token joined to the next one (`> >` vs `>>`). |
37 | fn is_token_joint_to_next(&self, pos: usize) -> bool; | 53 | pub is_jointed_to_next: bool, |
38 | /// Is the current token a specified keyword? | ||
39 | fn is_keyword(&self, pos: usize, kw: &str) -> bool; | ||
40 | } | 54 | } |
41 | 55 | ||
42 | /// `TreeSink` abstracts details of a particular syntax tree implementation. | 56 | /// `TreeSink` abstracts details of a particular syntax tree implementation. |
@@ -54,7 +68,7 @@ pub trait TreeSink { | |||
54 | fn error(&mut self, error: ParseError); | 68 | fn error(&mut self, error: ParseError); |
55 | } | 69 | } |
56 | 70 | ||
57 | fn parse_from_tokens<F>(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink, f: F) | 71 | fn parse_from_tokens<F>(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink, f: F) |
58 | where | 72 | where |
59 | F: FnOnce(&mut parser::Parser), | 73 | F: FnOnce(&mut parser::Parser), |
60 | { | 74 | { |
@@ -65,61 +79,65 @@ where | |||
65 | } | 79 | } |
66 | 80 | ||
67 | /// Parse given tokens into the given sink as a rust file. | 81 | /// Parse given tokens into the given sink as a rust file. |
68 | pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 82 | pub fn parse(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
69 | parse_from_tokens(token_source, tree_sink, grammar::root); | 83 | parse_from_tokens(token_source, tree_sink, grammar::root); |
70 | } | 84 | } |
71 | 85 | ||
72 | /// Parse given tokens into the given sink as a path | 86 | /// Parse given tokens into the given sink as a path |
73 | pub fn parse_path(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 87 | pub fn parse_path(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
74 | parse_from_tokens(token_source, tree_sink, grammar::path); | 88 | parse_from_tokens(token_source, tree_sink, grammar::path); |
75 | } | 89 | } |
76 | 90 | ||
77 | /// Parse given tokens into the given sink as a expression | 91 | /// Parse given tokens into the given sink as a expression |
78 | pub fn parse_expr(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 92 | pub fn parse_expr(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
79 | parse_from_tokens(token_source, tree_sink, grammar::expr); | 93 | parse_from_tokens(token_source, tree_sink, grammar::expr); |
80 | } | 94 | } |
81 | 95 | ||
82 | /// Parse given tokens into the given sink as a ty | 96 | /// Parse given tokens into the given sink as a ty |
83 | pub fn parse_ty(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 97 | pub fn parse_ty(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
84 | parse_from_tokens(token_source, tree_sink, grammar::type_); | 98 | parse_from_tokens(token_source, tree_sink, grammar::type_); |
85 | } | 99 | } |
86 | 100 | ||
87 | /// Parse given tokens into the given sink as a pattern | 101 | /// Parse given tokens into the given sink as a pattern |
88 | pub fn parse_pat(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 102 | pub fn parse_pat(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
89 | parse_from_tokens(token_source, tree_sink, grammar::pattern); | 103 | parse_from_tokens(token_source, tree_sink, grammar::pattern); |
90 | } | 104 | } |
91 | 105 | ||
92 | /// Parse given tokens into the given sink as a statement | 106 | /// Parse given tokens into the given sink as a statement |
93 | pub fn parse_stmt(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink, with_semi: bool) { | 107 | pub fn parse_stmt( |
108 | token_source: &mut dyn TokenSource, | ||
109 | tree_sink: &mut dyn TreeSink, | ||
110 | with_semi: bool, | ||
111 | ) { | ||
94 | parse_from_tokens(token_source, tree_sink, |p| grammar::stmt(p, with_semi)); | 112 | parse_from_tokens(token_source, tree_sink, |p| grammar::stmt(p, with_semi)); |
95 | } | 113 | } |
96 | 114 | ||
97 | /// Parse given tokens into the given sink as a block | 115 | /// Parse given tokens into the given sink as a block |
98 | pub fn parse_block(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 116 | pub fn parse_block(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
99 | parse_from_tokens(token_source, tree_sink, grammar::block); | 117 | parse_from_tokens(token_source, tree_sink, grammar::block); |
100 | } | 118 | } |
101 | 119 | ||
102 | pub fn parse_meta(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 120 | pub fn parse_meta(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
103 | parse_from_tokens(token_source, tree_sink, grammar::meta_item); | 121 | parse_from_tokens(token_source, tree_sink, grammar::meta_item); |
104 | } | 122 | } |
105 | 123 | ||
106 | /// Parse given tokens into the given sink as an item | 124 | /// Parse given tokens into the given sink as an item |
107 | pub fn parse_item(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 125 | pub fn parse_item(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
108 | parse_from_tokens(token_source, tree_sink, grammar::item); | 126 | parse_from_tokens(token_source, tree_sink, grammar::item); |
109 | } | 127 | } |
110 | 128 | ||
111 | /// Parse given tokens into the given sink as an visibility qualifier | 129 | /// Parse given tokens into the given sink as an visibility qualifier |
112 | pub fn parse_vis(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 130 | pub fn parse_vis(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
113 | parse_from_tokens(token_source, tree_sink, |p| { | 131 | parse_from_tokens(token_source, tree_sink, |p| { |
114 | grammar::opt_visibility(p); | 132 | grammar::opt_visibility(p); |
115 | }); | 133 | }); |
116 | } | 134 | } |
117 | 135 | ||
118 | pub fn parse_macro_items(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 136 | pub fn parse_macro_items(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
119 | parse_from_tokens(token_source, tree_sink, grammar::macro_items); | 137 | parse_from_tokens(token_source, tree_sink, grammar::macro_items); |
120 | } | 138 | } |
121 | 139 | ||
122 | pub fn parse_macro_stmts(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 140 | pub fn parse_macro_stmts(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
123 | parse_from_tokens(token_source, tree_sink, grammar::macro_stmts); | 141 | parse_from_tokens(token_source, tree_sink, grammar::macro_stmts); |
124 | } | 142 | } |
125 | 143 | ||
@@ -140,7 +158,7 @@ impl Reparser { | |||
140 | /// | 158 | /// |
141 | /// Tokens must start with `{`, end with `}` and form a valid brace | 159 | /// Tokens must start with `{`, end with `}` and form a valid brace |
142 | /// sequence. | 160 | /// sequence. |
143 | pub fn parse(self, token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { | 161 | pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) { |
144 | let Reparser(r) = self; | 162 | let Reparser(r) = self; |
145 | let mut p = parser::Parser::new(token_source); | 163 | let mut p = parser::Parser::new(token_source); |
146 | r(&mut p); | 164 | r(&mut p); |
diff --git a/crates/ra_parser/src/parser.rs b/crates/ra_parser/src/parser.rs index 4434dfb09..8f654f04c 100644 --- a/crates/ra_parser/src/parser.rs +++ b/crates/ra_parser/src/parser.rs | |||
@@ -19,15 +19,14 @@ use crate::{ | |||
19 | /// "start expression, consume number literal, | 19 | /// "start expression, consume number literal, |
20 | /// finish expression". See `Event` docs for more. | 20 | /// finish expression". See `Event` docs for more. |
21 | pub(crate) struct Parser<'t> { | 21 | pub(crate) struct Parser<'t> { |
22 | token_source: &'t dyn TokenSource, | 22 | token_source: &'t mut dyn TokenSource, |
23 | token_pos: usize, | ||
24 | events: Vec<Event>, | 23 | events: Vec<Event>, |
25 | steps: Cell<u32>, | 24 | steps: Cell<u32>, |
26 | } | 25 | } |
27 | 26 | ||
28 | impl<'t> Parser<'t> { | 27 | impl<'t> Parser<'t> { |
29 | pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { | 28 | pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> { |
30 | Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } | 29 | Parser { token_source, events: Vec::new(), steps: Cell::new(0) } |
31 | } | 30 | } |
32 | 31 | ||
33 | pub(crate) fn finish(self) -> Vec<Event> { | 32 | pub(crate) fn finish(self) -> Vec<Event> { |
@@ -49,7 +48,7 @@ impl<'t> Parser<'t> { | |||
49 | let c1 = self.nth(0); | 48 | let c1 = self.nth(0); |
50 | let c2 = self.nth(1); | 49 | let c2 = self.nth(1); |
51 | 50 | ||
52 | if self.token_source.is_token_joint_to_next(self.token_pos) { | 51 | if self.token_source.current().is_jointed_to_next { |
53 | Some((c1, c2)) | 52 | Some((c1, c2)) |
54 | } else { | 53 | } else { |
55 | None | 54 | None |
@@ -64,8 +63,8 @@ impl<'t> Parser<'t> { | |||
64 | let c1 = self.nth(0); | 63 | let c1 = self.nth(0); |
65 | let c2 = self.nth(1); | 64 | let c2 = self.nth(1); |
66 | let c3 = self.nth(2); | 65 | let c3 = self.nth(2); |
67 | if self.token_source.is_token_joint_to_next(self.token_pos) | 66 | if self.token_source.current().is_jointed_to_next |
68 | && self.token_source.is_token_joint_to_next(self.token_pos + 1) | 67 | && self.token_source.lookahead_nth(1).is_jointed_to_next |
69 | { | 68 | { |
70 | Some((c1, c2, c3)) | 69 | Some((c1, c2, c3)) |
71 | } else { | 70 | } else { |
@@ -76,6 +75,8 @@ impl<'t> Parser<'t> { | |||
76 | /// Lookahead operation: returns the kind of the next nth | 75 | /// Lookahead operation: returns the kind of the next nth |
77 | /// token. | 76 | /// token. |
78 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { | 77 | pub(crate) fn nth(&self, n: usize) -> SyntaxKind { |
78 | assert!(n <= 3); | ||
79 | |||
79 | let steps = self.steps.get(); | 80 | let steps = self.steps.get(); |
80 | assert!(steps <= 10_000_000, "the parser seems stuck"); | 81 | assert!(steps <= 10_000_000, "the parser seems stuck"); |
81 | self.steps.set(steps + 1); | 82 | self.steps.set(steps + 1); |
@@ -86,7 +87,7 @@ impl<'t> Parser<'t> { | |||
86 | let mut i = 0; | 87 | let mut i = 0; |
87 | 88 | ||
88 | loop { | 89 | loop { |
89 | let mut kind = self.token_source.token_kind(self.token_pos + i); | 90 | let mut kind = self.token_source.lookahead_nth(i).kind; |
90 | if let Some((composited, step)) = self.is_composite(kind, i) { | 91 | if let Some((composited, step)) = self.is_composite(kind, i) { |
91 | kind = composited; | 92 | kind = composited; |
92 | i += step; | 93 | i += step; |
@@ -115,7 +116,7 @@ impl<'t> Parser<'t> { | |||
115 | 116 | ||
116 | /// Checks if the current token is contextual keyword with text `t`. | 117 | /// Checks if the current token is contextual keyword with text `t`. |
117 | pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { | 118 | pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { |
118 | self.token_source.is_keyword(self.token_pos, kw) | 119 | self.token_source.is_keyword(kw) |
119 | } | 120 | } |
120 | 121 | ||
121 | /// Starts a new node in the syntax tree. All nodes and tokens | 122 | /// Starts a new node in the syntax tree. All nodes and tokens |
@@ -130,12 +131,12 @@ impl<'t> Parser<'t> { | |||
130 | /// Advances the parser by one token unconditionally | 131 | /// Advances the parser by one token unconditionally |
131 | /// Mainly use in `token_tree` parsing | 132 | /// Mainly use in `token_tree` parsing |
132 | pub(crate) fn bump_raw(&mut self) { | 133 | pub(crate) fn bump_raw(&mut self) { |
133 | let mut kind = self.token_source.token_kind(self.token_pos); | 134 | let mut kind = self.token_source.current().kind; |
134 | 135 | ||
135 | // Skip dollars, do_bump will eat these later | 136 | // Skip dollars, do_bump will eat these later |
136 | let mut i = 0; | 137 | let mut i = 0; |
137 | while kind == SyntaxKind::L_DOLLAR || kind == SyntaxKind::R_DOLLAR { | 138 | while kind == SyntaxKind::L_DOLLAR || kind == SyntaxKind::R_DOLLAR { |
138 | kind = self.token_source.token_kind(self.token_pos + i); | 139 | kind = self.token_source.lookahead_nth(i).kind; |
139 | i += 1; | 140 | i += 1; |
140 | } | 141 | } |
141 | 142 | ||
@@ -236,7 +237,11 @@ impl<'t> Parser<'t> { | |||
236 | 237 | ||
237 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | 238 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { |
238 | self.eat_dollars(); | 239 | self.eat_dollars(); |
239 | self.token_pos += usize::from(n_raw_tokens); | 240 | |
241 | for _ in 0..n_raw_tokens { | ||
242 | self.token_source.bump(); | ||
243 | } | ||
244 | |||
240 | self.push_event(Event::Token { kind, n_raw_tokens }); | 245 | self.push_event(Event::Token { kind, n_raw_tokens }); |
241 | } | 246 | } |
242 | 247 | ||
@@ -249,10 +254,14 @@ impl<'t> Parser<'t> { | |||
249 | // We assume the dollars will not occuried between | 254 | // We assume the dollars will not occuried between |
250 | // mult-byte tokens | 255 | // mult-byte tokens |
251 | 256 | ||
252 | let jn1 = self.token_source.is_token_joint_to_next(self.token_pos + n); | 257 | let first = self.token_source.lookahead_nth(n); |
253 | let la2 = self.token_source.token_kind(self.token_pos + n + 1); | 258 | let second = self.token_source.lookahead_nth(n + 1); |
254 | let jn2 = self.token_source.is_token_joint_to_next(self.token_pos + n + 1); | 259 | let third = self.token_source.lookahead_nth(n + 2); |
255 | let la3 = self.token_source.token_kind(self.token_pos + n + 2); | 260 | |
261 | let jn1 = first.is_jointed_to_next; | ||
262 | let la2 = second.kind; | ||
263 | let jn2 = second.is_jointed_to_next; | ||
264 | let la3 = third.kind; | ||
256 | 265 | ||
257 | match kind { | 266 | match kind { |
258 | T![.] if jn1 && la2 == T![.] && jn2 && la3 == T![.] => Some((T![...], 3)), | 267 | T![.] if jn1 && la2 == T![.] && jn2 && la3 == T![.] => Some((T![...], 3)), |
@@ -271,9 +280,9 @@ impl<'t> Parser<'t> { | |||
271 | 280 | ||
272 | fn eat_dollars(&mut self) { | 281 | fn eat_dollars(&mut self) { |
273 | loop { | 282 | loop { |
274 | match self.token_source.token_kind(self.token_pos) { | 283 | match self.token_source.current().kind { |
275 | k @ SyntaxKind::L_DOLLAR | k @ SyntaxKind::R_DOLLAR => { | 284 | k @ SyntaxKind::L_DOLLAR | k @ SyntaxKind::R_DOLLAR => { |
276 | self.token_pos += 1; | 285 | self.token_source.bump(); |
277 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); | 286 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); |
278 | } | 287 | } |
279 | _ => { | 288 | _ => { |
@@ -286,9 +295,9 @@ impl<'t> Parser<'t> { | |||
286 | pub(crate) fn eat_l_dollars(&mut self) -> usize { | 295 | pub(crate) fn eat_l_dollars(&mut self) -> usize { |
287 | let mut ate_count = 0; | 296 | let mut ate_count = 0; |
288 | loop { | 297 | loop { |
289 | match self.token_source.token_kind(self.token_pos) { | 298 | match self.token_source.current().kind { |
290 | k @ SyntaxKind::L_DOLLAR => { | 299 | k @ SyntaxKind::L_DOLLAR => { |
291 | self.token_pos += 1; | 300 | self.token_source.bump(); |
292 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); | 301 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); |
293 | ate_count += 1; | 302 | ate_count += 1; |
294 | } | 303 | } |
@@ -302,9 +311,9 @@ impl<'t> Parser<'t> { | |||
302 | pub(crate) fn eat_r_dollars(&mut self, max_count: usize) -> usize { | 311 | pub(crate) fn eat_r_dollars(&mut self, max_count: usize) -> usize { |
303 | let mut ate_count = 0; | 312 | let mut ate_count = 0; |
304 | loop { | 313 | loop { |
305 | match self.token_source.token_kind(self.token_pos) { | 314 | match self.token_source.current().kind { |
306 | k @ SyntaxKind::R_DOLLAR => { | 315 | k @ SyntaxKind::R_DOLLAR => { |
307 | self.token_pos += 1; | 316 | self.token_source.bump(); |
308 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); | 317 | self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); |
309 | ate_count += 1; | 318 | ate_count += 1; |
310 | 319 | ||
@@ -320,12 +329,12 @@ impl<'t> Parser<'t> { | |||
320 | } | 329 | } |
321 | 330 | ||
322 | pub(crate) fn at_l_dollar(&self) -> bool { | 331 | pub(crate) fn at_l_dollar(&self) -> bool { |
323 | let kind = self.token_source.token_kind(self.token_pos); | 332 | let kind = self.token_source.current().kind; |
324 | (kind == SyntaxKind::L_DOLLAR) | 333 | (kind == SyntaxKind::L_DOLLAR) |
325 | } | 334 | } |
326 | 335 | ||
327 | pub(crate) fn at_r_dollar(&self) -> bool { | 336 | pub(crate) fn at_r_dollar(&self) -> bool { |
328 | let kind = self.token_source.token_kind(self.token_pos); | 337 | let kind = self.token_source.current().kind; |
329 | (kind == SyntaxKind::R_DOLLAR) | 338 | (kind == SyntaxKind::R_DOLLAR) |
330 | } | 339 | } |
331 | } | 340 | } |