diff options
author | bors[bot] <bors[bot]@users.noreply.github.com> | 2018-02-11 14:59:58 +0000 |
---|---|---|
committer | bors[bot] <bors[bot]@users.noreply.github.com> | 2018-02-11 14:59:58 +0000 |
commit | a6f9b0414cf5bf49ad7f714b9d3fe5af91a16404 (patch) | |
tree | 2fc1e8ccc43bbee85a06026270d7c8de5959e323 | |
parent | 7a0ada860b57acd44b1d53e944ae621e438652da (diff) | |
parent | f356628ad8392c6e3ffd72a9ac50a7be87d3d183 (diff) |
Merge #50
50: Shiny new parser r=matklad a=matklad
bors r+
-rw-r--r-- | docs/ARCHITECTURE.md | 21 | ||||
-rw-r--r-- | src/parser/event.rs | 8 | ||||
-rw-r--r-- | src/parser/grammar/items/mod.rs | 4 | ||||
-rw-r--r-- | src/parser/grammar/mod.rs | 26 | ||||
-rw-r--r-- | src/parser/mod.rs | 37 | ||||
-rw-r--r-- | src/parser/parser.rs | 193 | ||||
-rw-r--r-- | src/parser/parser/imp.rs | 119 | ||||
-rw-r--r-- | src/parser/parser/mod.rs | 142 | ||||
-rw-r--r-- | src/parser/token_set.rs | 24 | ||||
-rw-r--r-- | src/tree/file_builder.rs | 4 |
10 files changed, 330 insertions, 248 deletions
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index a1fa246c2..6b4434396 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md | |||
@@ -33,19 +33,22 @@ The centerpiece of this whole endeavor is the syntax tree, in the | |||
33 | 33 | ||
34 | The syntax tree is produced using a three-staged process. | 34 | The syntax tree is produced using a three-staged process. |
35 | 35 | ||
36 | First, a raw text is split into tokens with a lexer. Lexer has a | 36 | First, a raw text is split into tokens with a lexer (the `lexer` module). |
37 | peculiar signature: it is an `Fn(&str) -> Token`, where token is a | 37 | Lexer has a peculiar signature: it is an `Fn(&str) -> Token`, where token |
38 | pair of `SyntaxKind` (you should have read the `tree` module and RFC | 38 | is a pair of `SyntaxKind` (you should have read the `tree` module and RFC |
39 | by this time! :)) and a len. That is, lexer chomps only the first | 39 | by this time! :)) and a len. That is, lexer chomps only the first |
40 | token of the input. This forces the lexer to be stateless, and makes | 40 | token of the input. This forces the lexer to be stateless, and makes |
41 | it possible to implement incremental relexing easily. | 41 | it possible to implement incremental relexing easily. |
42 | 42 | ||
43 | Then, the bulk of work, the parser turns a stream of tokens into | 43 | Then, the bulk of work, the parser turns a stream of tokens into |
44 | stream of events. Not that parser **does not** construct a tree right | 44 | stream of events (the `parser` module; of particular interest are |
45 | away. This is done for several reasons: | 45 | the `parser/event` and `parser/parser` modules, which contain parsing |
46 | API, and the `parser/grammar` module, which contains actual parsing code | ||
47 | for various Rust syntactic constructs). Not that parser **does not** | ||
48 | construct a tree right away. This is done for several reasons: | ||
46 | 49 | ||
47 | * to decouple the actual tree data structure from the parser: you can | 50 | * to decouple the actual tree data structure from the parser: you can |
48 | build any datastructre you want from the stream of events | 51 | build any data structure you want from the stream of events |
49 | 52 | ||
50 | * to make parsing fast: you can produce a list of events without | 53 | * to make parsing fast: you can produce a list of events without |
51 | allocations | 54 | allocations |
@@ -77,12 +80,6 @@ And at last, the TreeBuilder converts a flat stream of events into a | |||
77 | tree structure. It also *should* be responsible for attaching comments | 80 | tree structure. It also *should* be responsible for attaching comments |
78 | and rebalancing the tree, but it does not do this yet :) | 81 | and rebalancing the tree, but it does not do this yet :) |
79 | 82 | ||
80 | |||
81 | ## Error reporing | ||
82 | |||
83 | TODO: describe how stuff like `skip_to_first` works | ||
84 | |||
85 | |||
86 | ## Validator | 83 | ## Validator |
87 | 84 | ||
88 | Parser and lexer accept a lot of *invalid* code intentionally. The | 85 | Parser and lexer accept a lot of *invalid* code intentionally. The |
diff --git a/src/parser/event.rs b/src/parser/event.rs index 90348398e..1c0905a38 100644 --- a/src/parser/event.rs +++ b/src/parser/event.rs | |||
@@ -42,7 +42,7 @@ pub(crate) enum Event { | |||
42 | /// | | 42 | /// | |
43 | /// 'foo' | 43 | /// 'foo' |
44 | /// | 44 | /// |
45 | /// See also `CompleteMarker::precede`. | 45 | /// See also `CompletedMarker::precede`. |
46 | Start { | 46 | Start { |
47 | kind: SyntaxKind, | 47 | kind: SyntaxKind, |
48 | forward_parent: Option<u32>, | 48 | forward_parent: Option<u32>, |
@@ -61,7 +61,7 @@ pub(crate) enum Event { | |||
61 | }, | 61 | }, |
62 | 62 | ||
63 | Error { | 63 | Error { |
64 | message: String, | 64 | msg: String, |
65 | }, | 65 | }, |
66 | } | 66 | } |
67 | 67 | ||
@@ -140,9 +140,7 @@ pub(super) fn to_file(text: String, tokens: &[Token], events: Vec<Event>) -> Fil | |||
140 | } | 140 | } |
141 | builder.leaf(kind, len); | 141 | builder.leaf(kind, len); |
142 | } | 142 | } |
143 | &Event::Error { ref message } => builder.error(ErrorMsg { | 143 | &Event::Error { ref msg } => builder.error(ErrorMsg { msg: msg.clone() }), |
144 | message: message.clone(), | ||
145 | }), | ||
146 | } | 144 | } |
147 | } | 145 | } |
148 | builder.finish() | 146 | builder.finish() |
diff --git a/src/parser/grammar/items/mod.rs b/src/parser/grammar/items/mod.rs index 18ee8af86..3af6d13a1 100644 --- a/src/parser/grammar/items/mod.rs +++ b/src/parser/grammar/items/mod.rs | |||
@@ -94,7 +94,7 @@ fn item(p: &mut Parser) { | |||
94 | 94 | ||
95 | // test unsafe_auto_trait | 95 | // test unsafe_auto_trait |
96 | // unsafe auto trait T {} | 96 | // unsafe auto trait T {} |
97 | IDENT if p.at_kw("auto") && la == TRAIT_KW => { | 97 | IDENT if p.at_contextual_kw("auto") && la == TRAIT_KW => { |
98 | p.bump_remap(AUTO_KW); | 98 | p.bump_remap(AUTO_KW); |
99 | traits::trait_item(p); | 99 | traits::trait_item(p); |
100 | TRAIT_ITEM | 100 | TRAIT_ITEM |
@@ -109,7 +109,7 @@ fn item(p: &mut Parser) { | |||
109 | 109 | ||
110 | // test unsafe_default_impl | 110 | // test unsafe_default_impl |
111 | // unsafe default impl Foo {} | 111 | // unsafe default impl Foo {} |
112 | IDENT if p.at_kw("default") && la == IMPL_KW => { | 112 | IDENT if p.at_contextual_kw("default") && la == IMPL_KW => { |
113 | p.bump_remap(DEFAULT_KW); | 113 | p.bump_remap(DEFAULT_KW); |
114 | traits::impl_item(p); | 114 | traits::impl_item(p); |
115 | IMPL_ITEM | 115 | IMPL_ITEM |
diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs index f5b63aaab..ee0263203 100644 --- a/src/parser/grammar/mod.rs +++ b/src/parser/grammar/mod.rs | |||
@@ -1,4 +1,28 @@ | |||
1 | use super::parser::{Parser, TokenSet}; | 1 | //! This is the actual "grammar" of the Rust language. |
2 | //! | ||
3 | //! Each function in this module and its children corresponds | ||
4 | //! to a production of the format grammar. Submodules roughly | ||
5 | //! correspond to different *areas* of the grammar. By convention, | ||
6 | //! each submodule starts with `use super::*` import and exports | ||
7 | //! "public" productions via `pub(super)`. | ||
8 | //! | ||
9 | //! See docs for `Parser` to learn about API, available to the grammar, | ||
10 | //! and see docs for `Event` to learn how this actually manages to | ||
11 | //! produce parse trees. | ||
12 | //! | ||
13 | //! Code in this module also contains inline tests, which start with | ||
14 | //! `// test name-of-the-test` comment and look like this: | ||
15 | //! | ||
16 | //! ``` | ||
17 | //! // test fn_item_with_zero_parameters | ||
18 | //! // fn foo() {} | ||
19 | //! ``` | ||
20 | //! | ||
21 | //! After adding a new inline-test, run `cargo collect-tests` to extract | ||
22 | //! it as a standalone text-fixture into `tests/data/parser/inline`, and | ||
23 | //! run `cargo test` once to create the "gold" value. | ||
24 | use parser::parser::Parser; | ||
25 | use parser::token_set::TokenSet; | ||
2 | use SyntaxKind; | 26 | use SyntaxKind; |
3 | use syntax_kinds::*; | 27 | use syntax_kinds::*; |
4 | 28 | ||
diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c23ed3349..3814837e1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs | |||
@@ -3,19 +3,20 @@ use {File, SyntaxKind, Token}; | |||
3 | use syntax_kinds::*; | 3 | use syntax_kinds::*; |
4 | 4 | ||
5 | #[macro_use] | 5 | #[macro_use] |
6 | mod token_set; | ||
6 | mod parser; | 7 | mod parser; |
7 | mod input; | 8 | mod input; |
8 | mod event; | 9 | mod event; |
9 | mod grammar; | 10 | mod grammar; |
10 | use self::event::Event; | ||
11 | 11 | ||
12 | /// Parse a sequence of tokens into the representative node tree | 12 | /// Parse a sequence of tokens into the representative node tree |
13 | pub fn parse(text: String, tokens: &[Token]) -> File { | 13 | pub fn parse(text: String, tokens: &[Token]) -> File { |
14 | let events = { | 14 | let events = { |
15 | let input = input::ParserInput::new(&text, tokens); | 15 | let input = input::ParserInput::new(&text, tokens); |
16 | let mut parser = parser::Parser::new(&input); | 16 | let parser_impl = parser::imp::ParserImpl::new(&input); |
17 | let mut parser = parser::Parser(parser_impl); | ||
17 | grammar::file(&mut parser); | 18 | grammar::file(&mut parser); |
18 | parser.into_events() | 19 | parser.0.into_events() |
19 | }; | 20 | }; |
20 | event::to_file(text, tokens, events) | 21 | event::to_file(text, tokens, events) |
21 | } | 22 | } |
@@ -26,33 +27,3 @@ fn is_insignificant(kind: SyntaxKind) -> bool { | |||
26 | _ => false, | 27 | _ => false, |
27 | } | 28 | } |
28 | } | 29 | } |
29 | |||
30 | impl<'p> parser::Parser<'p> { | ||
31 | fn at(&self, kind: SyntaxKind) -> bool { | ||
32 | self.current() == kind | ||
33 | } | ||
34 | |||
35 | fn err_and_bump(&mut self, message: &str) { | ||
36 | let err = self.start(); | ||
37 | self.error(message); | ||
38 | self.bump(); | ||
39 | err.complete(self, ERROR); | ||
40 | } | ||
41 | |||
42 | fn expect(&mut self, kind: SyntaxKind) -> bool { | ||
43 | if self.at(kind) { | ||
44 | self.bump(); | ||
45 | true | ||
46 | } else { | ||
47 | self.error(format!("expected {:?}", kind)); | ||
48 | false | ||
49 | } | ||
50 | } | ||
51 | |||
52 | fn eat(&mut self, kind: SyntaxKind) -> bool { | ||
53 | self.at(kind) && { | ||
54 | self.bump(); | ||
55 | true | ||
56 | } | ||
57 | } | ||
58 | } | ||
diff --git a/src/parser/parser.rs b/src/parser/parser.rs deleted file mode 100644 index 7c8e47cb6..000000000 --- a/src/parser/parser.rs +++ /dev/null | |||
@@ -1,193 +0,0 @@ | |||
1 | use super::Event; | ||
2 | use super::input::{InputPosition, ParserInput}; | ||
3 | use SyntaxKind::{self, EOF, TOMBSTONE}; | ||
4 | |||
5 | pub(crate) struct Marker { | ||
6 | pos: u32, | ||
7 | } | ||
8 | |||
9 | impl Marker { | ||
10 | pub fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompleteMarker { | ||
11 | match self.event(p) { | ||
12 | &mut Event::Start { | ||
13 | kind: ref mut slot, .. | ||
14 | } => { | ||
15 | *slot = kind; | ||
16 | } | ||
17 | _ => unreachable!(), | ||
18 | } | ||
19 | p.event(Event::Finish); | ||
20 | let result = CompleteMarker { pos: self.pos }; | ||
21 | ::std::mem::forget(self); | ||
22 | result | ||
23 | } | ||
24 | |||
25 | pub fn abandon(self, p: &mut Parser) { | ||
26 | let idx = self.pos as usize; | ||
27 | if idx == p.events.len() - 1 { | ||
28 | match p.events.pop() { | ||
29 | Some(Event::Start { | ||
30 | kind: TOMBSTONE, | ||
31 | forward_parent: None, | ||
32 | }) => (), | ||
33 | _ => unreachable!(), | ||
34 | } | ||
35 | } | ||
36 | ::std::mem::forget(self); | ||
37 | } | ||
38 | |||
39 | fn event<'p>(&self, p: &'p mut Parser) -> &'p mut Event { | ||
40 | &mut p.events[self.idx()] | ||
41 | } | ||
42 | |||
43 | fn idx(&self) -> usize { | ||
44 | self.pos as usize | ||
45 | } | ||
46 | } | ||
47 | |||
48 | impl Drop for Marker { | ||
49 | fn drop(&mut self) { | ||
50 | if !::std::thread::panicking() { | ||
51 | panic!("Each marker should be eithe completed or abandoned"); | ||
52 | } | ||
53 | } | ||
54 | } | ||
55 | |||
56 | pub(crate) struct CompleteMarker { | ||
57 | pos: u32, | ||
58 | } | ||
59 | |||
60 | impl CompleteMarker { | ||
61 | pub(crate) fn precede(self, p: &mut Parser) -> Marker { | ||
62 | let m = p.start(); | ||
63 | match p.events[self.pos as usize] { | ||
64 | Event::Start { | ||
65 | ref mut forward_parent, | ||
66 | .. | ||
67 | } => { | ||
68 | *forward_parent = Some(m.pos - self.pos); | ||
69 | } | ||
70 | _ => unreachable!(), | ||
71 | } | ||
72 | m | ||
73 | } | ||
74 | } | ||
75 | |||
76 | pub(crate) struct TokenSet { | ||
77 | pub tokens: &'static [SyntaxKind], | ||
78 | } | ||
79 | |||
80 | impl TokenSet { | ||
81 | pub fn contains(&self, kind: SyntaxKind) -> bool { | ||
82 | self.tokens.contains(&kind) | ||
83 | } | ||
84 | } | ||
85 | |||
86 | #[macro_export] | ||
87 | macro_rules! token_set { | ||
88 | ($($t:ident),*) => { | ||
89 | TokenSet { | ||
90 | tokens: &[$($t),*], | ||
91 | } | ||
92 | }; | ||
93 | |||
94 | ($($t:ident),* ,) => { | ||
95 | token_set!($($t),*) | ||
96 | }; | ||
97 | } | ||
98 | |||
99 | pub(crate) struct Parser<'t> { | ||
100 | inp: &'t ParserInput<'t>, | ||
101 | |||
102 | pos: InputPosition, | ||
103 | events: Vec<Event>, | ||
104 | } | ||
105 | |||
106 | impl<'t> Parser<'t> { | ||
107 | pub(crate) fn new(inp: &'t ParserInput<'t>) -> Parser<'t> { | ||
108 | Parser { | ||
109 | inp, | ||
110 | |||
111 | pos: InputPosition::new(), | ||
112 | events: Vec::new(), | ||
113 | } | ||
114 | } | ||
115 | |||
116 | pub(crate) fn into_events(self) -> Vec<Event> { | ||
117 | assert_eq!(self.current(), EOF); | ||
118 | self.events | ||
119 | } | ||
120 | |||
121 | pub(crate) fn start(&mut self) -> Marker { | ||
122 | let m = Marker { | ||
123 | pos: self.events.len() as u32, | ||
124 | }; | ||
125 | self.event(Event::Start { | ||
126 | kind: TOMBSTONE, | ||
127 | forward_parent: None, | ||
128 | }); | ||
129 | m | ||
130 | } | ||
131 | |||
132 | pub(crate) fn error<'p, T: Into<String>>(&'p mut self, msg: T) -> ErrorBuilder<'p, 't> { | ||
133 | ErrorBuilder::new(self, msg.into()) | ||
134 | } | ||
135 | |||
136 | pub(crate) fn bump(&mut self) { | ||
137 | let kind = self.current(); | ||
138 | if kind == EOF { | ||
139 | return; | ||
140 | } | ||
141 | self.pos += 1; | ||
142 | self.event(Event::Token { | ||
143 | kind, | ||
144 | n_raw_tokens: 1, | ||
145 | }); | ||
146 | } | ||
147 | |||
148 | pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
149 | if self.current() == EOF { | ||
150 | // TODO: panic!? | ||
151 | return; | ||
152 | } | ||
153 | self.pos += 1; | ||
154 | self.event(Event::Token { | ||
155 | kind, | ||
156 | n_raw_tokens: 1, | ||
157 | }); | ||
158 | } | ||
159 | |||
160 | pub(crate) fn nth(&self, n: u32) -> SyntaxKind { | ||
161 | self.inp.kind(self.pos + n) | ||
162 | } | ||
163 | |||
164 | pub(crate) fn at_kw(&self, t: &str) -> bool { | ||
165 | self.inp.text(self.pos) == t | ||
166 | } | ||
167 | |||
168 | pub(crate) fn current(&self) -> SyntaxKind { | ||
169 | self.nth(0) | ||
170 | } | ||
171 | |||
172 | fn event(&mut self, event: Event) { | ||
173 | self.events.push(event) | ||
174 | } | ||
175 | } | ||
176 | |||
177 | pub(crate) struct ErrorBuilder<'p, 't: 'p> { | ||
178 | message: String, | ||
179 | parser: &'p mut Parser<'t>, | ||
180 | } | ||
181 | |||
182 | impl<'p, 't: 'p> Drop for ErrorBuilder<'p, 't> { | ||
183 | fn drop(&mut self) { | ||
184 | let message = ::std::mem::replace(&mut self.message, String::new()); | ||
185 | self.parser.event(Event::Error { message }); | ||
186 | } | ||
187 | } | ||
188 | |||
189 | impl<'t, 'p> ErrorBuilder<'p, 't> { | ||
190 | fn new(parser: &'p mut Parser<'t>, message: String) -> Self { | ||
191 | ErrorBuilder { message, parser } | ||
192 | } | ||
193 | } | ||
diff --git a/src/parser/parser/imp.rs b/src/parser/parser/imp.rs new file mode 100644 index 000000000..f2641c388 --- /dev/null +++ b/src/parser/parser/imp.rs | |||
@@ -0,0 +1,119 @@ | |||
1 | use parser::input::{InputPosition, ParserInput}; | ||
2 | use parser::event::Event; | ||
3 | |||
4 | use SyntaxKind; | ||
5 | use syntax_kinds::{EOF, TOMBSTONE}; | ||
6 | |||
7 | /// Implementation details of `Parser`, extracted | ||
8 | /// to a separate struct in order not to pollute | ||
9 | /// the public API of the `Parser`. | ||
10 | pub(crate) struct ParserImpl<'t> { | ||
11 | inp: &'t ParserInput<'t>, | ||
12 | |||
13 | pos: InputPosition, | ||
14 | events: Vec<Event>, | ||
15 | } | ||
16 | |||
17 | impl<'t> ParserImpl<'t> { | ||
18 | pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { | ||
19 | ParserImpl { | ||
20 | inp, | ||
21 | |||
22 | pos: InputPosition::new(), | ||
23 | events: Vec::new(), | ||
24 | } | ||
25 | } | ||
26 | |||
27 | pub(crate) fn into_events(self) -> Vec<Event> { | ||
28 | assert_eq!(self.nth(0), EOF); | ||
29 | self.events | ||
30 | } | ||
31 | |||
32 | pub(super) fn nth(&self, n: u32) -> SyntaxKind { | ||
33 | self.inp.kind(self.pos + n) | ||
34 | } | ||
35 | |||
36 | pub(super) fn at_kw(&self, t: &str) -> bool { | ||
37 | self.inp.text(self.pos) == t | ||
38 | } | ||
39 | |||
40 | pub(super) fn start(&mut self) -> u32 { | ||
41 | let pos = self.events.len() as u32; | ||
42 | self.event(Event::Start { | ||
43 | kind: TOMBSTONE, | ||
44 | forward_parent: None, | ||
45 | }); | ||
46 | pos | ||
47 | } | ||
48 | |||
49 | pub(super) fn bump(&mut self) { | ||
50 | let kind = self.nth(0); | ||
51 | if kind == EOF { | ||
52 | return; | ||
53 | } | ||
54 | self.do_bump(kind); | ||
55 | } | ||
56 | |||
57 | pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
58 | if self.nth(0) == EOF { | ||
59 | // TODO: panic!? | ||
60 | return; | ||
61 | } | ||
62 | self.do_bump(kind); | ||
63 | } | ||
64 | |||
65 | fn do_bump(&mut self, kind: SyntaxKind) { | ||
66 | self.pos += 1; | ||
67 | self.event(Event::Token { | ||
68 | kind, | ||
69 | n_raw_tokens: 1, | ||
70 | }); | ||
71 | } | ||
72 | |||
73 | pub(super) fn error(&mut self, msg: String) { | ||
74 | self.event(Event::Error { msg }) | ||
75 | } | ||
76 | |||
77 | pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { | ||
78 | match self.events[pos as usize] { | ||
79 | Event::Start { | ||
80 | kind: ref mut slot, .. | ||
81 | } => { | ||
82 | *slot = kind; | ||
83 | } | ||
84 | _ => unreachable!(), | ||
85 | } | ||
86 | self.event(Event::Finish); | ||
87 | } | ||
88 | |||
89 | pub(super) fn abandon(&mut self, pos: u32) { | ||
90 | let idx = pos as usize; | ||
91 | if idx == self.events.len() - 1 { | ||
92 | match self.events.pop() { | ||
93 | Some(Event::Start { | ||
94 | kind: TOMBSTONE, | ||
95 | forward_parent: None, | ||
96 | }) => (), | ||
97 | _ => unreachable!(), | ||
98 | } | ||
99 | } | ||
100 | } | ||
101 | |||
102 | pub(super) fn precede(&mut self, pos: u32) -> u32 { | ||
103 | let new_pos = self.start(); | ||
104 | match self.events[pos as usize] { | ||
105 | Event::Start { | ||
106 | ref mut forward_parent, | ||
107 | .. | ||
108 | } => { | ||
109 | *forward_parent = Some(new_pos - pos); | ||
110 | } | ||
111 | _ => unreachable!(), | ||
112 | } | ||
113 | new_pos | ||
114 | } | ||
115 | |||
116 | fn event(&mut self, event: Event) { | ||
117 | self.events.push(event) | ||
118 | } | ||
119 | } | ||
diff --git a/src/parser/parser/mod.rs b/src/parser/parser/mod.rs new file mode 100644 index 000000000..58f820738 --- /dev/null +++ b/src/parser/parser/mod.rs | |||
@@ -0,0 +1,142 @@ | |||
1 | use SyntaxKind; | ||
2 | use syntax_kinds::ERROR; | ||
3 | |||
4 | pub(super) mod imp; | ||
5 | use self::imp::ParserImpl; | ||
6 | |||
7 | /// `Parser` struct provides the low-level API for | ||
8 | /// navigating through the stream of tokens and | ||
9 | /// constructing the parse tree. The actual parsing | ||
10 | /// happens in the `grammar` module. | ||
11 | /// | ||
12 | /// However, the result of this `Parser` is not a real | ||
13 | /// tree, but rather a flat stream of events of the form | ||
14 | /// "start expression, consume number literal, | ||
15 | /// finish expression". See `Event` docs for more. | ||
16 | pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); | ||
17 | |||
18 | impl<'t> Parser<'t> { | ||
19 | /// Returns the kind of the current token. | ||
20 | /// If parser has already reached the end of input, | ||
21 | /// the special `EOF` kind is returned. | ||
22 | pub(crate) fn current(&self) -> SyntaxKind { | ||
23 | self.nth(0) | ||
24 | } | ||
25 | |||
26 | /// Lookahead operation: returns the kind of the next nth | ||
27 | /// token. | ||
28 | pub(crate) fn nth(&self, n: u32) -> SyntaxKind { | ||
29 | self.0.nth(n) | ||
30 | } | ||
31 | |||
32 | /// Checks if the current token is `kind`. | ||
33 | pub(crate) fn at(&self, kind: SyntaxKind) -> bool { | ||
34 | self.current() == kind | ||
35 | } | ||
36 | |||
37 | /// Checks if the current token is contextual keyword with text `t`. | ||
38 | pub(crate) fn at_contextual_kw(&self, t: &str) -> bool { | ||
39 | self.0.at_kw(t) | ||
40 | } | ||
41 | |||
42 | /// Starts a new node in the syntax tree. All nodes and tokens | ||
43 | /// consumed between the `start` and the corresponding `Marker::complete` | ||
44 | /// belong to the same node. | ||
45 | pub(crate) fn start(&mut self) -> Marker { | ||
46 | Marker(self.0.start()) | ||
47 | } | ||
48 | |||
49 | /// Advances the parser by one token. | ||
50 | pub(crate) fn bump(&mut self) { | ||
51 | self.0.bump(); | ||
52 | } | ||
53 | |||
54 | /// Advances the parser by one token, remapping its kind. | ||
55 | /// This is useful to create contextual keywords from | ||
56 | /// identifiers. For example, the lexer creates an `union` | ||
57 | /// *identifier* token, but the parser remaps it to the | ||
58 | /// `union` keyword, and keyword is what ends up in the | ||
59 | /// final tree. | ||
60 | pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { | ||
61 | self.0.bump_remap(kind); | ||
62 | } | ||
63 | |||
64 | /// Emit error with the `message` | ||
65 | /// TODO: this should be much more fancy and support | ||
66 | /// structured errors with spans and notes, like rustc | ||
67 | /// does. | ||
68 | pub(crate) fn error<T: Into<String>>(&mut self, message: T) { | ||
69 | self.0.error(message.into()) | ||
70 | } | ||
71 | |||
72 | /// Consume the next token if it is `kind`. | ||
73 | pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { | ||
74 | if !self.at(kind) { | ||
75 | return false; | ||
76 | } | ||
77 | self.bump(); | ||
78 | true | ||
79 | } | ||
80 | |||
81 | /// Consume the next token if it is `kind` or emit an error | ||
82 | /// otherwise. | ||
83 | pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { | ||
84 | if self.eat(kind) { | ||
85 | return true; | ||
86 | } | ||
87 | self.error(format!("expected {:?}", kind)); | ||
88 | false | ||
89 | } | ||
90 | |||
91 | /// Create an error node and consume the next token. | ||
92 | pub(crate) fn err_and_bump(&mut self, message: &str) { | ||
93 | let m = self.start(); | ||
94 | self.error(message); | ||
95 | self.bump(); | ||
96 | m.complete(self, ERROR); | ||
97 | } | ||
98 | } | ||
99 | |||
100 | /// See `Parser::start`. | ||
101 | pub(crate) struct Marker(u32); | ||
102 | |||
103 | impl Marker { | ||
104 | /// Finishes the syntax tree node and assigns `kind` to it. | ||
105 | pub(crate) fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { | ||
106 | let pos = self.0; | ||
107 | ::std::mem::forget(self); | ||
108 | p.0.complete(pos, kind); | ||
109 | CompletedMarker(pos) | ||
110 | } | ||
111 | |||
112 | /// Abandons the syntax tree node. All its children | ||
113 | /// are attached to its parent instead. | ||
114 | pub(crate) fn abandon(self, p: &mut Parser) { | ||
115 | let pos = self.0; | ||
116 | ::std::mem::forget(self); | ||
117 | p.0.abandon(pos); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | impl Drop for Marker { | ||
122 | fn drop(&mut self) { | ||
123 | if !::std::thread::panicking() { | ||
124 | panic!("Marker must be either completed or abandoned"); | ||
125 | } | ||
126 | } | ||
127 | } | ||
128 | |||
129 | pub(crate) struct CompletedMarker(u32); | ||
130 | |||
131 | impl CompletedMarker { | ||
132 | /// This one is tricky :-) | ||
133 | /// This method allows to create a new node which starts | ||
134 | /// *before* the current one. That is, parser could start | ||
135 | /// node `A`, then complete it, and then after parsing the | ||
136 | /// whole `A`, decide that it should have started some node | ||
137 | /// `B` before starting `A`. `precede` allows to do exactly | ||
138 | /// that. See also docs about `forward_parent` in `Event::Start`. | ||
139 | pub(crate) fn precede(self, p: &mut Parser) -> Marker { | ||
140 | Marker(p.0.precede(self.0)) | ||
141 | } | ||
142 | } | ||
diff --git a/src/parser/token_set.rs b/src/parser/token_set.rs new file mode 100644 index 000000000..a800f200d --- /dev/null +++ b/src/parser/token_set.rs | |||
@@ -0,0 +1,24 @@ | |||
1 | use SyntaxKind; | ||
2 | |||
3 | pub(crate) struct TokenSet { | ||
4 | pub tokens: &'static [SyntaxKind], | ||
5 | } | ||
6 | |||
7 | impl TokenSet { | ||
8 | pub fn contains(&self, kind: SyntaxKind) -> bool { | ||
9 | self.tokens.contains(&kind) | ||
10 | } | ||
11 | } | ||
12 | |||
13 | #[macro_export] | ||
14 | macro_rules! token_set { | ||
15 | ($($t:ident),*) => { | ||
16 | TokenSet { | ||
17 | tokens: &[$($t),*], | ||
18 | } | ||
19 | }; | ||
20 | |||
21 | ($($t:ident),* ,) => { | ||
22 | token_set!($($t),*) | ||
23 | }; | ||
24 | } | ||
diff --git a/src/tree/file_builder.rs b/src/tree/file_builder.rs index 3c7e2d7cf..f831676c7 100644 --- a/src/tree/file_builder.rs +++ b/src/tree/file_builder.rs | |||
@@ -70,7 +70,7 @@ impl Sink for FileBuilder { | |||
70 | let &(node, after_child) = self.in_progress.last().unwrap(); | 70 | let &(node, after_child) = self.in_progress.last().unwrap(); |
71 | self.errors.push(SyntaxErrorData { | 71 | self.errors.push(SyntaxErrorData { |
72 | node, | 72 | node, |
73 | message: err.message, | 73 | message: err.msg, |
74 | after_child, | 74 | after_child, |
75 | }) | 75 | }) |
76 | } | 76 | } |
@@ -157,5 +157,5 @@ fn grow(left: &mut TextRange, right: TextRange) { | |||
157 | 157 | ||
158 | #[derive(Default)] | 158 | #[derive(Default)] |
159 | pub(crate) struct ErrorMsg { | 159 | pub(crate) struct ErrorMsg { |
160 | pub(crate) message: String, | 160 | pub(crate) msg: String, |
161 | } | 161 | } |