aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_parser
diff options
context:
space:
mode:
authorEdwin Cheng <[email protected]>2019-05-25 13:31:53 +0100
committerEdwin Cheng <[email protected]>2019-05-25 13:41:03 +0100
commitfcb1eef3232c3fc673bf5f98595708e108c3950c (patch)
tree326a3961c5ca99cb50a12fb94f3398116af4a2f1 /crates/ra_parser
parentef00b5af1c7a7a7cac685eff661a10252825d84a (diff)
Change TokenSource to iteration based
Diffstat (limited to 'crates/ra_parser')
-rw-r--r--crates/ra_parser/src/lib.rs54
-rw-r--r--crates/ra_parser/src/parser.rs57
2 files changed, 69 insertions, 42 deletions
diff --git a/crates/ra_parser/src/lib.rs b/crates/ra_parser/src/lib.rs
index 697d1b794..3d88be642 100644
--- a/crates/ra_parser/src/lib.rs
+++ b/crates/ra_parser/src/lib.rs
@@ -31,12 +31,26 @@ pub struct ParseError(pub String);
31/// 31///
32/// Hopefully this will allow us to treat text and token trees in the same way! 32/// Hopefully this will allow us to treat text and token trees in the same way!
33pub trait TokenSource { 33pub trait TokenSource {
34 fn current(&self) -> Token;
35
36 /// Lookahead n token
37 fn lookahead_nth(&self, n: usize) -> Token;
38
39 /// bump cursor to next token
40 fn bump(&mut self);
41
42 /// Is the current token a specified keyword?
43 fn is_keyword(&self, kw: &str) -> bool;
44}
45
46/// `TokenCursor` abstracts the cursor of `TokenSource` operates one.
47#[derive(Debug, Copy, Clone, Eq, PartialEq)]
48pub struct Token {
34 /// What is the current token? 49 /// What is the current token?
35 fn token_kind(&self, pos: usize) -> SyntaxKind; 50 pub kind: SyntaxKind,
51
36 /// Is the current token joined to the next one (`> >` vs `>>`). 52 /// Is the current token joined to the next one (`> >` vs `>>`).
37 fn is_token_joint_to_next(&self, pos: usize) -> bool; 53 pub is_jointed_to_next: bool,
38 /// Is the current token a specified keyword?
39 fn is_keyword(&self, pos: usize, kw: &str) -> bool;
40} 54}
41 55
42/// `TreeSink` abstracts details of a particular syntax tree implementation. 56/// `TreeSink` abstracts details of a particular syntax tree implementation.
@@ -54,7 +68,7 @@ pub trait TreeSink {
54 fn error(&mut self, error: ParseError); 68 fn error(&mut self, error: ParseError);
55} 69}
56 70
57fn parse_from_tokens<F>(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink, f: F) 71fn parse_from_tokens<F>(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink, f: F)
58where 72where
59 F: FnOnce(&mut parser::Parser), 73 F: FnOnce(&mut parser::Parser),
60{ 74{
@@ -65,61 +79,65 @@ where
65} 79}
66 80
67/// Parse given tokens into the given sink as a rust file. 81/// Parse given tokens into the given sink as a rust file.
68pub fn parse(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 82pub fn parse(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
69 parse_from_tokens(token_source, tree_sink, grammar::root); 83 parse_from_tokens(token_source, tree_sink, grammar::root);
70} 84}
71 85
72/// Parse given tokens into the given sink as a path 86/// Parse given tokens into the given sink as a path
73pub fn parse_path(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 87pub fn parse_path(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
74 parse_from_tokens(token_source, tree_sink, grammar::path); 88 parse_from_tokens(token_source, tree_sink, grammar::path);
75} 89}
76 90
77/// Parse given tokens into the given sink as a expression 91/// Parse given tokens into the given sink as a expression
78pub fn parse_expr(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 92pub fn parse_expr(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
79 parse_from_tokens(token_source, tree_sink, grammar::expr); 93 parse_from_tokens(token_source, tree_sink, grammar::expr);
80} 94}
81 95
82/// Parse given tokens into the given sink as a ty 96/// Parse given tokens into the given sink as a ty
83pub fn parse_ty(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 97pub fn parse_ty(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
84 parse_from_tokens(token_source, tree_sink, grammar::type_); 98 parse_from_tokens(token_source, tree_sink, grammar::type_);
85} 99}
86 100
87/// Parse given tokens into the given sink as a pattern 101/// Parse given tokens into the given sink as a pattern
88pub fn parse_pat(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 102pub fn parse_pat(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
89 parse_from_tokens(token_source, tree_sink, grammar::pattern); 103 parse_from_tokens(token_source, tree_sink, grammar::pattern);
90} 104}
91 105
92/// Parse given tokens into the given sink as a statement 106/// Parse given tokens into the given sink as a statement
93pub fn parse_stmt(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink, with_semi: bool) { 107pub fn parse_stmt(
108 token_source: &mut dyn TokenSource,
109 tree_sink: &mut dyn TreeSink,
110 with_semi: bool,
111) {
94 parse_from_tokens(token_source, tree_sink, |p| grammar::stmt(p, with_semi)); 112 parse_from_tokens(token_source, tree_sink, |p| grammar::stmt(p, with_semi));
95} 113}
96 114
97/// Parse given tokens into the given sink as a block 115/// Parse given tokens into the given sink as a block
98pub fn parse_block(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 116pub fn parse_block(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
99 parse_from_tokens(token_source, tree_sink, grammar::block); 117 parse_from_tokens(token_source, tree_sink, grammar::block);
100} 118}
101 119
102pub fn parse_meta(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 120pub fn parse_meta(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
103 parse_from_tokens(token_source, tree_sink, grammar::meta_item); 121 parse_from_tokens(token_source, tree_sink, grammar::meta_item);
104} 122}
105 123
106/// Parse given tokens into the given sink as an item 124/// Parse given tokens into the given sink as an item
107pub fn parse_item(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 125pub fn parse_item(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
108 parse_from_tokens(token_source, tree_sink, grammar::item); 126 parse_from_tokens(token_source, tree_sink, grammar::item);
109} 127}
110 128
111/// Parse given tokens into the given sink as an visibility qualifier 129/// Parse given tokens into the given sink as an visibility qualifier
112pub fn parse_vis(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 130pub fn parse_vis(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
113 parse_from_tokens(token_source, tree_sink, |p| { 131 parse_from_tokens(token_source, tree_sink, |p| {
114 grammar::opt_visibility(p); 132 grammar::opt_visibility(p);
115 }); 133 });
116} 134}
117 135
118pub fn parse_macro_items(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 136pub fn parse_macro_items(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
119 parse_from_tokens(token_source, tree_sink, grammar::macro_items); 137 parse_from_tokens(token_source, tree_sink, grammar::macro_items);
120} 138}
121 139
122pub fn parse_macro_stmts(token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 140pub fn parse_macro_stmts(token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
123 parse_from_tokens(token_source, tree_sink, grammar::macro_stmts); 141 parse_from_tokens(token_source, tree_sink, grammar::macro_stmts);
124} 142}
125 143
@@ -140,7 +158,7 @@ impl Reparser {
140 /// 158 ///
141 /// Tokens must start with `{`, end with `}` and form a valid brace 159 /// Tokens must start with `{`, end with `}` and form a valid brace
142 /// sequence. 160 /// sequence.
143 pub fn parse(self, token_source: &dyn TokenSource, tree_sink: &mut dyn TreeSink) { 161 pub fn parse(self, token_source: &mut dyn TokenSource, tree_sink: &mut dyn TreeSink) {
144 let Reparser(r) = self; 162 let Reparser(r) = self;
145 let mut p = parser::Parser::new(token_source); 163 let mut p = parser::Parser::new(token_source);
146 r(&mut p); 164 r(&mut p);
diff --git a/crates/ra_parser/src/parser.rs b/crates/ra_parser/src/parser.rs
index 4434dfb09..8f654f04c 100644
--- a/crates/ra_parser/src/parser.rs
+++ b/crates/ra_parser/src/parser.rs
@@ -19,15 +19,14 @@ use crate::{
19/// "start expression, consume number literal, 19/// "start expression, consume number literal,
20/// finish expression". See `Event` docs for more. 20/// finish expression". See `Event` docs for more.
21pub(crate) struct Parser<'t> { 21pub(crate) struct Parser<'t> {
22 token_source: &'t dyn TokenSource, 22 token_source: &'t mut dyn TokenSource,
23 token_pos: usize,
24 events: Vec<Event>, 23 events: Vec<Event>,
25 steps: Cell<u32>, 24 steps: Cell<u32>,
26} 25}
27 26
28impl<'t> Parser<'t> { 27impl<'t> Parser<'t> {
29 pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { 28 pub(super) fn new(token_source: &'t mut dyn TokenSource) -> Parser<'t> {
30 Parser { token_source, token_pos: 0, events: Vec::new(), steps: Cell::new(0) } 29 Parser { token_source, events: Vec::new(), steps: Cell::new(0) }
31 } 30 }
32 31
33 pub(crate) fn finish(self) -> Vec<Event> { 32 pub(crate) fn finish(self) -> Vec<Event> {
@@ -49,7 +48,7 @@ impl<'t> Parser<'t> {
49 let c1 = self.nth(0); 48 let c1 = self.nth(0);
50 let c2 = self.nth(1); 49 let c2 = self.nth(1);
51 50
52 if self.token_source.is_token_joint_to_next(self.token_pos) { 51 if self.token_source.current().is_jointed_to_next {
53 Some((c1, c2)) 52 Some((c1, c2))
54 } else { 53 } else {
55 None 54 None
@@ -64,8 +63,8 @@ impl<'t> Parser<'t> {
64 let c1 = self.nth(0); 63 let c1 = self.nth(0);
65 let c2 = self.nth(1); 64 let c2 = self.nth(1);
66 let c3 = self.nth(2); 65 let c3 = self.nth(2);
67 if self.token_source.is_token_joint_to_next(self.token_pos) 66 if self.token_source.current().is_jointed_to_next
68 && self.token_source.is_token_joint_to_next(self.token_pos + 1) 67 && self.token_source.lookahead_nth(1).is_jointed_to_next
69 { 68 {
70 Some((c1, c2, c3)) 69 Some((c1, c2, c3))
71 } else { 70 } else {
@@ -76,6 +75,8 @@ impl<'t> Parser<'t> {
76 /// Lookahead operation: returns the kind of the next nth 75 /// Lookahead operation: returns the kind of the next nth
77 /// token. 76 /// token.
78 pub(crate) fn nth(&self, n: usize) -> SyntaxKind { 77 pub(crate) fn nth(&self, n: usize) -> SyntaxKind {
78 assert!(n <= 3);
79
79 let steps = self.steps.get(); 80 let steps = self.steps.get();
80 assert!(steps <= 10_000_000, "the parser seems stuck"); 81 assert!(steps <= 10_000_000, "the parser seems stuck");
81 self.steps.set(steps + 1); 82 self.steps.set(steps + 1);
@@ -86,7 +87,7 @@ impl<'t> Parser<'t> {
86 let mut i = 0; 87 let mut i = 0;
87 88
88 loop { 89 loop {
89 let mut kind = self.token_source.token_kind(self.token_pos + i); 90 let mut kind = self.token_source.lookahead_nth(i).kind;
90 if let Some((composited, step)) = self.is_composite(kind, i) { 91 if let Some((composited, step)) = self.is_composite(kind, i) {
91 kind = composited; 92 kind = composited;
92 i += step; 93 i += step;
@@ -115,7 +116,7 @@ impl<'t> Parser<'t> {
115 116
116 /// Checks if the current token is contextual keyword with text `t`. 117 /// Checks if the current token is contextual keyword with text `t`.
117 pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { 118 pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool {
118 self.token_source.is_keyword(self.token_pos, kw) 119 self.token_source.is_keyword(kw)
119 } 120 }
120 121
121 /// Starts a new node in the syntax tree. All nodes and tokens 122 /// Starts a new node in the syntax tree. All nodes and tokens
@@ -130,12 +131,12 @@ impl<'t> Parser<'t> {
130 /// Advances the parser by one token unconditionally 131 /// Advances the parser by one token unconditionally
131 /// Mainly use in `token_tree` parsing 132 /// Mainly use in `token_tree` parsing
132 pub(crate) fn bump_raw(&mut self) { 133 pub(crate) fn bump_raw(&mut self) {
133 let mut kind = self.token_source.token_kind(self.token_pos); 134 let mut kind = self.token_source.current().kind;
134 135
135 // Skip dollars, do_bump will eat these later 136 // Skip dollars, do_bump will eat these later
136 let mut i = 0; 137 let mut i = 0;
137 while kind == SyntaxKind::L_DOLLAR || kind == SyntaxKind::R_DOLLAR { 138 while kind == SyntaxKind::L_DOLLAR || kind == SyntaxKind::R_DOLLAR {
138 kind = self.token_source.token_kind(self.token_pos + i); 139 kind = self.token_source.lookahead_nth(i).kind;
139 i += 1; 140 i += 1;
140 } 141 }
141 142
@@ -236,7 +237,11 @@ impl<'t> Parser<'t> {
236 237
237 fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { 238 fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) {
238 self.eat_dollars(); 239 self.eat_dollars();
239 self.token_pos += usize::from(n_raw_tokens); 240
241 for _ in 0..n_raw_tokens {
242 self.token_source.bump();
243 }
244
240 self.push_event(Event::Token { kind, n_raw_tokens }); 245 self.push_event(Event::Token { kind, n_raw_tokens });
241 } 246 }
242 247
@@ -249,10 +254,14 @@ impl<'t> Parser<'t> {
249 // We assume the dollars will not occuried between 254 // We assume the dollars will not occuried between
250 // mult-byte tokens 255 // mult-byte tokens
251 256
252 let jn1 = self.token_source.is_token_joint_to_next(self.token_pos + n); 257 let first = self.token_source.lookahead_nth(n);
253 let la2 = self.token_source.token_kind(self.token_pos + n + 1); 258 let second = self.token_source.lookahead_nth(n + 1);
254 let jn2 = self.token_source.is_token_joint_to_next(self.token_pos + n + 1); 259 let third = self.token_source.lookahead_nth(n + 2);
255 let la3 = self.token_source.token_kind(self.token_pos + n + 2); 260
261 let jn1 = first.is_jointed_to_next;
262 let la2 = second.kind;
263 let jn2 = second.is_jointed_to_next;
264 let la3 = third.kind;
256 265
257 match kind { 266 match kind {
258 T![.] if jn1 && la2 == T![.] && jn2 && la3 == T![.] => Some((T![...], 3)), 267 T![.] if jn1 && la2 == T![.] && jn2 && la3 == T![.] => Some((T![...], 3)),
@@ -271,9 +280,9 @@ impl<'t> Parser<'t> {
271 280
272 fn eat_dollars(&mut self) { 281 fn eat_dollars(&mut self) {
273 loop { 282 loop {
274 match self.token_source.token_kind(self.token_pos) { 283 match self.token_source.current().kind {
275 k @ SyntaxKind::L_DOLLAR | k @ SyntaxKind::R_DOLLAR => { 284 k @ SyntaxKind::L_DOLLAR | k @ SyntaxKind::R_DOLLAR => {
276 self.token_pos += 1; 285 self.token_source.bump();
277 self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); 286 self.push_event(Event::Token { kind: k, n_raw_tokens: 1 });
278 } 287 }
279 _ => { 288 _ => {
@@ -286,9 +295,9 @@ impl<'t> Parser<'t> {
286 pub(crate) fn eat_l_dollars(&mut self) -> usize { 295 pub(crate) fn eat_l_dollars(&mut self) -> usize {
287 let mut ate_count = 0; 296 let mut ate_count = 0;
288 loop { 297 loop {
289 match self.token_source.token_kind(self.token_pos) { 298 match self.token_source.current().kind {
290 k @ SyntaxKind::L_DOLLAR => { 299 k @ SyntaxKind::L_DOLLAR => {
291 self.token_pos += 1; 300 self.token_source.bump();
292 self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); 301 self.push_event(Event::Token { kind: k, n_raw_tokens: 1 });
293 ate_count += 1; 302 ate_count += 1;
294 } 303 }
@@ -302,9 +311,9 @@ impl<'t> Parser<'t> {
302 pub(crate) fn eat_r_dollars(&mut self, max_count: usize) -> usize { 311 pub(crate) fn eat_r_dollars(&mut self, max_count: usize) -> usize {
303 let mut ate_count = 0; 312 let mut ate_count = 0;
304 loop { 313 loop {
305 match self.token_source.token_kind(self.token_pos) { 314 match self.token_source.current().kind {
306 k @ SyntaxKind::R_DOLLAR => { 315 k @ SyntaxKind::R_DOLLAR => {
307 self.token_pos += 1; 316 self.token_source.bump();
308 self.push_event(Event::Token { kind: k, n_raw_tokens: 1 }); 317 self.push_event(Event::Token { kind: k, n_raw_tokens: 1 });
309 ate_count += 1; 318 ate_count += 1;
310 319
@@ -320,12 +329,12 @@ impl<'t> Parser<'t> {
320 } 329 }
321 330
322 pub(crate) fn at_l_dollar(&self) -> bool { 331 pub(crate) fn at_l_dollar(&self) -> bool {
323 let kind = self.token_source.token_kind(self.token_pos); 332 let kind = self.token_source.current().kind;
324 (kind == SyntaxKind::L_DOLLAR) 333 (kind == SyntaxKind::L_DOLLAR)
325 } 334 }
326 335
327 pub(crate) fn at_r_dollar(&self) -> bool { 336 pub(crate) fn at_r_dollar(&self) -> bool {
328 let kind = self.token_source.token_kind(self.token_pos); 337 let kind = self.token_source.current().kind;
329 (kind == SyntaxKind::R_DOLLAR) 338 (kind == SyntaxKind::R_DOLLAR)
330 } 339 }
331} 340}