diff options
-rw-r--r-- | crates/ra_syntax/src/parsing/parser_api.rs | 108 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/parser_impl.rs | 165 |
2 files changed, 102 insertions, 171 deletions
diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs index aed23a6a4..92d7895d3 100644 --- a/crates/ra_syntax/src/parsing/parser_api.rs +++ b/crates/ra_syntax/src/parsing/parser_api.rs | |||
@@ -1,10 +1,14 @@ | |||
1 | use std::cell::Cell; | ||
2 | |||
1 | use drop_bomb::DropBomb; | 3 | use drop_bomb::DropBomb; |
2 | 4 | ||
3 | use crate::{ | 5 | use crate::{ |
4 | SyntaxKind::{self, ERROR}, | 6 | syntax_error::ParseError, |
7 | SyntaxKind::{self, ERROR, EOF, TOMBSTONE}, | ||
5 | parsing::{ | 8 | parsing::{ |
9 | TokenSource, TokenPos, | ||
6 | token_set::TokenSet, | 10 | token_set::TokenSet, |
7 | parser_impl::ParserImpl, | 11 | parser_impl::event::Event, |
8 | }, | 12 | }, |
9 | }; | 13 | }; |
10 | 14 | ||
@@ -17,9 +21,22 @@ use crate::{ | |||
17 | /// tree, but rather a flat stream of events of the form | 21 | /// tree, but rather a flat stream of events of the form |
18 | /// "start expression, consume number literal, | 22 | /// "start expression, consume number literal, |
19 | /// finish expression". See `Event` docs for more. | 23 | /// finish expression". See `Event` docs for more. |
20 | pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); | 24 | pub(crate) struct Parser<'t> { |
25 | token_source: &'t dyn TokenSource, | ||
26 | pos: TokenPos, | ||
27 | events: Vec<Event>, | ||
28 | steps: Cell<u32>, | ||
29 | } | ||
21 | 30 | ||
22 | impl<'t> Parser<'t> { | 31 | impl<'t> Parser<'t> { |
32 | pub(super) fn new(token_source: &'t dyn TokenSource) -> Parser<'t> { | ||
33 | Parser { token_source, pos: TokenPos::default(), events: Vec::new(), steps: Cell::new(0) } | ||
34 | } | ||
35 | |||
36 | pub(crate) fn finish(self) -> Vec<Event> { | ||
37 | self.events | ||
38 | } | ||
39 | |||
23 | /// Returns the kind of the current token. | 40 | /// Returns the kind of the current token. |
24 | /// If parser has already reached the end of input, | 41 | /// If parser has already reached the end of input, |
25 | /// the special `EOF` kind is returned. | 42 | /// the special `EOF` kind is returned. |
@@ -32,7 +49,13 @@ impl<'t> Parser<'t> { | |||
32 | /// | 49 | /// |
33 | /// Useful for parsing things like `>>`. | 50 | /// Useful for parsing things like `>>`. |
34 | pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { | 51 | pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { |
35 | self.0.current2() | 52 | let c1 = self.token_source.token_kind(self.pos); |
53 | let c2 = self.token_source.token_kind(self.pos + 1); | ||
54 | if self.token_source.is_token_joint_to_next(self.pos) { | ||
55 | Some((c1, c2)) | ||
56 | } else { | ||
57 | None | ||
58 | } | ||
36 | } | 59 | } |
37 | 60 | ||
38 | /// Returns the kinds of the current three tokens, if they are not separated | 61 | /// Returns the kinds of the current three tokens, if they are not separated |
@@ -40,13 +63,25 @@ impl<'t> Parser<'t> { | |||
40 | /// | 63 | /// |
41 | /// Useful for parsing things like `=>>`. | 64 | /// Useful for parsing things like `=>>`. |
42 | pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { | 65 | pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { |
43 | self.0.current3() | 66 | let c1 = self.token_source.token_kind(self.pos); |
67 | let c2 = self.token_source.token_kind(self.pos + 1); | ||
68 | let c3 = self.token_source.token_kind(self.pos + 2); | ||
69 | if self.token_source.is_token_joint_to_next(self.pos) | ||
70 | && self.token_source.is_token_joint_to_next(self.pos + 1) | ||
71 | { | ||
72 | Some((c1, c2, c3)) | ||
73 | } else { | ||
74 | None | ||
75 | } | ||
44 | } | 76 | } |
45 | 77 | ||
46 | /// Lookahead operation: returns the kind of the next nth | 78 | /// Lookahead operation: returns the kind of the next nth |
47 | /// token. | 79 | /// token. |
48 | pub(crate) fn nth(&self, n: u32) -> SyntaxKind { | 80 | pub(crate) fn nth(&self, n: u32) -> SyntaxKind { |
49 | self.0.nth(n) | 81 | let steps = self.steps.get(); |
82 | assert!(steps <= 10_000_000, "the parser seems stuck"); | ||
83 | self.steps.set(steps + 1); | ||
84 | self.token_source.token_kind(self.pos + n) | ||
50 | } | 85 | } |
51 | 86 | ||
52 | /// Checks if the current token is `kind`. | 87 | /// Checks if the current token is `kind`. |
@@ -60,20 +95,26 @@ impl<'t> Parser<'t> { | |||
60 | } | 95 | } |
61 | 96 | ||
62 | /// Checks if the current token is contextual keyword with text `t`. | 97 | /// Checks if the current token is contextual keyword with text `t`. |
63 | pub(crate) fn at_contextual_kw(&self, t: &str) -> bool { | 98 | pub(crate) fn at_contextual_kw(&self, kw: &str) -> bool { |
64 | self.0.at_kw(t) | 99 | self.token_source.is_keyword(self.pos, kw) |
65 | } | 100 | } |
66 | 101 | ||
67 | /// Starts a new node in the syntax tree. All nodes and tokens | 102 | /// Starts a new node in the syntax tree. All nodes and tokens |
68 | /// consumed between the `start` and the corresponding `Marker::complete` | 103 | /// consumed between the `start` and the corresponding `Marker::complete` |
69 | /// belong to the same node. | 104 | /// belong to the same node. |
70 | pub(crate) fn start(&mut self) -> Marker { | 105 | pub(crate) fn start(&mut self) -> Marker { |
71 | Marker::new(self.0.start()) | 106 | let pos = self.events.len() as u32; |
107 | self.push_event(Event::tombstone()); | ||
108 | Marker::new(pos) | ||
72 | } | 109 | } |
73 | 110 | ||
74 | /// Advances the parser by one token unconditionally. | 111 | /// Advances the parser by one token unconditionally. |
75 | pub(crate) fn bump(&mut self) { | 112 | pub(crate) fn bump(&mut self) { |
76 | self.0.bump(); | 113 | let kind = self.nth(0); |
114 | if kind == EOF { | ||
115 | return; | ||
116 | } | ||
117 | self.do_bump(kind, 1); | ||
77 | } | 118 | } |
78 | 119 | ||
79 | /// Advances the parser by one token, remapping its kind. | 120 | /// Advances the parser by one token, remapping its kind. |
@@ -83,14 +124,18 @@ impl<'t> Parser<'t> { | |||
83 | /// `union` keyword, and keyword is what ends up in the | 124 | /// `union` keyword, and keyword is what ends up in the |
84 | /// final tree. | 125 | /// final tree. |
85 | pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { | 126 | pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { |
86 | self.0.bump_remap(kind); | 127 | if self.nth(0) == EOF { |
128 | // TODO: panic!? | ||
129 | return; | ||
130 | } | ||
131 | self.do_bump(kind, 1); | ||
87 | } | 132 | } |
88 | 133 | ||
89 | /// Advances the parser by `n` tokens, remapping its kind. | 134 | /// Advances the parser by `n` tokens, remapping its kind. |
90 | /// This is useful to create compound tokens from parts. For | 135 | /// This is useful to create compound tokens from parts. For |
91 | /// example, an `<<` token is two consecutive remapped `<` tokens | 136 | /// example, an `<<` token is two consecutive remapped `<` tokens |
92 | pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { | 137 | pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { |
93 | self.0.bump_compound(kind, n); | 138 | self.do_bump(kind, n); |
94 | } | 139 | } |
95 | 140 | ||
96 | /// Emit error with the `message` | 141 | /// Emit error with the `message` |
@@ -98,7 +143,8 @@ impl<'t> Parser<'t> { | |||
98 | /// structured errors with spans and notes, like rustc | 143 | /// structured errors with spans and notes, like rustc |
99 | /// does. | 144 | /// does. |
100 | pub(crate) fn error<T: Into<String>>(&mut self, message: T) { | 145 | pub(crate) fn error<T: Into<String>>(&mut self, message: T) { |
101 | self.0.error(message.into()) | 146 | let msg = ParseError(message.into()); |
147 | self.push_event(Event::Error { msg }) | ||
102 | } | 148 | } |
103 | 149 | ||
104 | /// Consume the next token if `kind` matches. | 150 | /// Consume the next token if `kind` matches. |
@@ -136,6 +182,15 @@ impl<'t> Parser<'t> { | |||
136 | m.complete(self, ERROR); | 182 | m.complete(self, ERROR); |
137 | }; | 183 | }; |
138 | } | 184 | } |
185 | |||
186 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | ||
187 | self.pos += u32::from(n_raw_tokens); | ||
188 | self.push_event(Event::Token { kind, n_raw_tokens }); | ||
189 | } | ||
190 | |||
191 | fn push_event(&mut self, event: Event) { | ||
192 | self.events.push(event) | ||
193 | } | ||
139 | } | 194 | } |
140 | 195 | ||
141 | /// See `Parser::start`. | 196 | /// See `Parser::start`. |
@@ -154,7 +209,14 @@ impl Marker { | |||
154 | /// operation like `.precede()` to deal with forward_parent. | 209 | /// operation like `.precede()` to deal with forward_parent. |
155 | pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { | 210 | pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { |
156 | self.bomb.defuse(); | 211 | self.bomb.defuse(); |
157 | p.0.complete(self.pos, kind); | 212 | let idx = self.pos as usize; |
213 | match p.events[idx] { | ||
214 | Event::Start { kind: ref mut slot, .. } => { | ||
215 | *slot = kind; | ||
216 | } | ||
217 | _ => unreachable!(), | ||
218 | } | ||
219 | p.push_event(Event::Finish); | ||
158 | CompletedMarker::new(self.pos, kind) | 220 | CompletedMarker::new(self.pos, kind) |
159 | } | 221 | } |
160 | 222 | ||
@@ -162,7 +224,13 @@ impl Marker { | |||
162 | /// are attached to its parent instead. | 224 | /// are attached to its parent instead. |
163 | pub(crate) fn abandon(mut self, p: &mut Parser) { | 225 | pub(crate) fn abandon(mut self, p: &mut Parser) { |
164 | self.bomb.defuse(); | 226 | self.bomb.defuse(); |
165 | p.0.abandon(self.pos); | 227 | let idx = self.pos as usize; |
228 | if idx == p.events.len() - 1 { | ||
229 | match p.events.pop() { | ||
230 | Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), | ||
231 | _ => unreachable!(), | ||
232 | } | ||
233 | } | ||
166 | } | 234 | } |
167 | } | 235 | } |
168 | 236 | ||
@@ -186,7 +254,15 @@ impl CompletedMarker { | |||
186 | /// then mark `NEWSTART` as `START`'s parent with saving its relative | 254 | /// then mark `NEWSTART` as `START`'s parent with saving its relative |
187 | /// distance to `NEWSTART` into forward_parent(=2 in this case); | 255 | /// distance to `NEWSTART` into forward_parent(=2 in this case); |
188 | pub(crate) fn precede(self, p: &mut Parser) -> Marker { | 256 | pub(crate) fn precede(self, p: &mut Parser) -> Marker { |
189 | Marker::new(p.0.precede(self.0)) | 257 | let new_pos = p.start(); |
258 | let idx = self.0 as usize; | ||
259 | match p.events[idx] { | ||
260 | Event::Start { ref mut forward_parent, .. } => { | ||
261 | *forward_parent = Some(new_pos.pos - self.0); | ||
262 | } | ||
263 | _ => unreachable!(), | ||
264 | } | ||
265 | new_pos | ||
190 | } | 266 | } |
191 | 267 | ||
192 | pub(crate) fn kind(&self) -> SyntaxKind { | 268 | pub(crate) fn kind(&self) -> SyntaxKind { |
diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs index 89439e074..6eed0e656 100644 --- a/crates/ra_syntax/src/parsing/parser_impl.rs +++ b/crates/ra_syntax/src/parsing/parser_impl.rs | |||
@@ -1,20 +1,13 @@ | |||
1 | mod event; | 1 | pub(super) mod event; |
2 | pub(crate) mod input; | 2 | pub(super) mod input; |
3 | 3 | ||
4 | use std::cell::Cell; | 4 | use crate::parsing::{ |
5 | 5 | TreeSink, TokenSource, | |
6 | use crate::{ | 6 | lexer::Token, |
7 | syntax_error::ParseError, | 7 | parser_api::Parser, |
8 | parsing::{ | 8 | parser_impl::event::EventProcessor, |
9 | TreeSink, TokenSource, TokenPos, | ||
10 | lexer::Token, | ||
11 | parser_api::Parser, | ||
12 | parser_impl::event::{Event, EventProcessor}, | ||
13 | }, | ||
14 | }; | 9 | }; |
15 | 10 | ||
16 | use crate::SyntaxKind::{self, EOF, TOMBSTONE}; | ||
17 | |||
18 | /// Parse a sequence of tokens into the representative node tree | 11 | /// Parse a sequence of tokens into the representative node tree |
19 | pub(super) fn parse_with<S: TreeSink>( | 12 | pub(super) fn parse_with<S: TreeSink>( |
20 | sink: S, | 13 | sink: S, |
@@ -24,147 +17,9 @@ pub(super) fn parse_with<S: TreeSink>( | |||
24 | ) -> S::Tree { | 17 | ) -> S::Tree { |
25 | let mut events = { | 18 | let mut events = { |
26 | let input = input::ParserInput::new(text, tokens); | 19 | let input = input::ParserInput::new(text, tokens); |
27 | let parser_impl = ParserImpl::new(&input); | 20 | let mut parser_api = Parser::new(&input); |
28 | let mut parser_api = Parser(parser_impl); | ||
29 | parser(&mut parser_api); | 21 | parser(&mut parser_api); |
30 | parser_api.0.into_events() | 22 | parser_api.finish() |
31 | }; | 23 | }; |
32 | EventProcessor::new(sink, text, tokens, &mut events).process().finish() | 24 | EventProcessor::new(sink, text, tokens, &mut events).process().finish() |
33 | } | 25 | } |
34 | |||
35 | /// Implementation details of `Parser`, extracted | ||
36 | /// to a separate struct in order not to pollute | ||
37 | /// the public API of the `Parser`. | ||
38 | pub(super) struct ParserImpl<'a> { | ||
39 | token_source: &'a dyn TokenSource, | ||
40 | pos: TokenPos, | ||
41 | events: Vec<Event>, | ||
42 | steps: Cell<u32>, | ||
43 | } | ||
44 | |||
45 | impl<'a> ParserImpl<'a> { | ||
46 | fn new(token_source: &'a dyn TokenSource) -> ParserImpl<'a> { | ||
47 | ParserImpl { | ||
48 | token_source, | ||
49 | pos: TokenPos::default(), | ||
50 | events: Vec::new(), | ||
51 | steps: Cell::new(0), | ||
52 | } | ||
53 | } | ||
54 | |||
55 | fn into_events(self) -> Vec<Event> { | ||
56 | assert_eq!(self.nth(0), EOF); | ||
57 | self.events | ||
58 | } | ||
59 | |||
60 | pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { | ||
61 | let c1 = self.token_source.token_kind(self.pos); | ||
62 | let c2 = self.token_source.token_kind(self.pos + 1); | ||
63 | if self.token_source.is_token_joint_to_next(self.pos) { | ||
64 | Some((c1, c2)) | ||
65 | } else { | ||
66 | None | ||
67 | } | ||
68 | } | ||
69 | |||
70 | pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { | ||
71 | let c1 = self.token_source.token_kind(self.pos); | ||
72 | let c2 = self.token_source.token_kind(self.pos + 1); | ||
73 | let c3 = self.token_source.token_kind(self.pos + 2); | ||
74 | if self.token_source.is_token_joint_to_next(self.pos) | ||
75 | && self.token_source.is_token_joint_to_next(self.pos + 1) | ||
76 | { | ||
77 | Some((c1, c2, c3)) | ||
78 | } else { | ||
79 | None | ||
80 | } | ||
81 | } | ||
82 | |||
83 | /// Get the syntax kind of the nth token. | ||
84 | pub(super) fn nth(&self, n: u32) -> SyntaxKind { | ||
85 | let steps = self.steps.get(); | ||
86 | assert!(steps <= 10_000_000, "the parser seems stuck"); | ||
87 | self.steps.set(steps + 1); | ||
88 | self.token_source.token_kind(self.pos + n) | ||
89 | } | ||
90 | |||
91 | pub(super) fn at_kw(&self, kw: &str) -> bool { | ||
92 | self.token_source.is_keyword(self.pos, kw) | ||
93 | } | ||
94 | |||
95 | /// Start parsing right behind the last event. | ||
96 | pub(super) fn start(&mut self) -> u32 { | ||
97 | let pos = self.events.len() as u32; | ||
98 | self.push_event(Event::tombstone()); | ||
99 | pos | ||
100 | } | ||
101 | |||
102 | /// Advances the parser by one token unconditionally. | ||
103 | pub(super) fn bump(&mut self) { | ||
104 | let kind = self.nth(0); | ||
105 | if kind == EOF { | ||
106 | return; | ||
107 | } | ||
108 | self.do_bump(kind, 1); | ||
109 | } | ||
110 | |||
111 | pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
112 | if self.nth(0) == EOF { | ||
113 | // TODO: panic!? | ||
114 | return; | ||
115 | } | ||
116 | self.do_bump(kind, 1); | ||
117 | } | ||
118 | |||
119 | pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { | ||
120 | self.do_bump(kind, n); | ||
121 | } | ||
122 | |||
123 | fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { | ||
124 | self.pos += u32::from(n_raw_tokens); | ||
125 | self.push_event(Event::Token { kind, n_raw_tokens }); | ||
126 | } | ||
127 | |||
128 | /// Append one Error event to the back of events. | ||
129 | pub(super) fn error(&mut self, msg: String) { | ||
130 | self.push_event(Event::Error { msg: ParseError(msg) }) | ||
131 | } | ||
132 | |||
133 | /// Complete an event with appending a `Finish` event. | ||
134 | pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { | ||
135 | match self.events[pos as usize] { | ||
136 | Event::Start { kind: ref mut slot, .. } => { | ||
137 | *slot = kind; | ||
138 | } | ||
139 | _ => unreachable!(), | ||
140 | } | ||
141 | self.push_event(Event::Finish); | ||
142 | } | ||
143 | |||
144 | /// Ignore the dummy `Start` event. | ||
145 | pub(super) fn abandon(&mut self, pos: u32) { | ||
146 | let idx = pos as usize; | ||
147 | if idx == self.events.len() - 1 { | ||
148 | match self.events.pop() { | ||
149 | Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), | ||
150 | _ => unreachable!(), | ||
151 | } | ||
152 | } | ||
153 | } | ||
154 | |||
155 | /// Save the relative distance of a completed event to its forward_parent. | ||
156 | pub(super) fn precede(&mut self, pos: u32) -> u32 { | ||
157 | let new_pos = self.start(); | ||
158 | match self.events[pos as usize] { | ||
159 | Event::Start { ref mut forward_parent, .. } => { | ||
160 | *forward_parent = Some(new_pos - pos); | ||
161 | } | ||
162 | _ => unreachable!(), | ||
163 | } | ||
164 | new_pos | ||
165 | } | ||
166 | |||
167 | fn push_event(&mut self, event: Event) { | ||
168 | self.events.push(event) | ||
169 | } | ||
170 | } | ||