aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorbors[bot] <bors[bot]@users.noreply.github.com>2018-02-11 14:59:58 +0000
committerbors[bot] <bors[bot]@users.noreply.github.com>2018-02-11 14:59:58 +0000
commita6f9b0414cf5bf49ad7f714b9d3fe5af91a16404 (patch)
tree2fc1e8ccc43bbee85a06026270d7c8de5959e323 /src
parent7a0ada860b57acd44b1d53e944ae621e438652da (diff)
parentf356628ad8392c6e3ffd72a9ac50a7be87d3d183 (diff)
Merge #50
50: Shiny new parser r=matklad a=matklad bors r+
Diffstat (limited to 'src')
-rw-r--r--src/parser/event.rs8
-rw-r--r--src/parser/grammar/items/mod.rs4
-rw-r--r--src/parser/grammar/mod.rs26
-rw-r--r--src/parser/mod.rs37
-rw-r--r--src/parser/parser.rs193
-rw-r--r--src/parser/parser/imp.rs119
-rw-r--r--src/parser/parser/mod.rs142
-rw-r--r--src/parser/token_set.rs24
-rw-r--r--src/tree/file_builder.rs4
9 files changed, 321 insertions, 236 deletions
diff --git a/src/parser/event.rs b/src/parser/event.rs
index 90348398e..1c0905a38 100644
--- a/src/parser/event.rs
+++ b/src/parser/event.rs
@@ -42,7 +42,7 @@ pub(crate) enum Event {
42 /// | 42 /// |
43 /// 'foo' 43 /// 'foo'
44 /// 44 ///
45 /// See also `CompleteMarker::precede`. 45 /// See also `CompletedMarker::precede`.
46 Start { 46 Start {
47 kind: SyntaxKind, 47 kind: SyntaxKind,
48 forward_parent: Option<u32>, 48 forward_parent: Option<u32>,
@@ -61,7 +61,7 @@ pub(crate) enum Event {
61 }, 61 },
62 62
63 Error { 63 Error {
64 message: String, 64 msg: String,
65 }, 65 },
66} 66}
67 67
@@ -140,9 +140,7 @@ pub(super) fn to_file(text: String, tokens: &[Token], events: Vec<Event>) -> Fil
140 } 140 }
141 builder.leaf(kind, len); 141 builder.leaf(kind, len);
142 } 142 }
143 &Event::Error { ref message } => builder.error(ErrorMsg { 143 &Event::Error { ref msg } => builder.error(ErrorMsg { msg: msg.clone() }),
144 message: message.clone(),
145 }),
146 } 144 }
147 } 145 }
148 builder.finish() 146 builder.finish()
diff --git a/src/parser/grammar/items/mod.rs b/src/parser/grammar/items/mod.rs
index 18ee8af86..3af6d13a1 100644
--- a/src/parser/grammar/items/mod.rs
+++ b/src/parser/grammar/items/mod.rs
@@ -94,7 +94,7 @@ fn item(p: &mut Parser) {
94 94
95 // test unsafe_auto_trait 95 // test unsafe_auto_trait
96 // unsafe auto trait T {} 96 // unsafe auto trait T {}
97 IDENT if p.at_kw("auto") && la == TRAIT_KW => { 97 IDENT if p.at_contextual_kw("auto") && la == TRAIT_KW => {
98 p.bump_remap(AUTO_KW); 98 p.bump_remap(AUTO_KW);
99 traits::trait_item(p); 99 traits::trait_item(p);
100 TRAIT_ITEM 100 TRAIT_ITEM
@@ -109,7 +109,7 @@ fn item(p: &mut Parser) {
109 109
110 // test unsafe_default_impl 110 // test unsafe_default_impl
111 // unsafe default impl Foo {} 111 // unsafe default impl Foo {}
112 IDENT if p.at_kw("default") && la == IMPL_KW => { 112 IDENT if p.at_contextual_kw("default") && la == IMPL_KW => {
113 p.bump_remap(DEFAULT_KW); 113 p.bump_remap(DEFAULT_KW);
114 traits::impl_item(p); 114 traits::impl_item(p);
115 IMPL_ITEM 115 IMPL_ITEM
diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs
index f5b63aaab..ee0263203 100644
--- a/src/parser/grammar/mod.rs
+++ b/src/parser/grammar/mod.rs
@@ -1,4 +1,28 @@
1use super::parser::{Parser, TokenSet}; 1//! This is the actual "grammar" of the Rust language.
2//!
3//! Each function in this module and its children corresponds
4//! to a production of the format grammar. Submodules roughly
5//! correspond to different *areas* of the grammar. By convention,
6//! each submodule starts with `use super::*` import and exports
7//! "public" productions via `pub(super)`.
8//!
9//! See docs for `Parser` to learn about API, available to the grammar,
10//! and see docs for `Event` to learn how this actually manages to
11//! produce parse trees.
12//!
13//! Code in this module also contains inline tests, which start with
14//! `// test name-of-the-test` comment and look like this:
15//!
16//! ```
17//! // test fn_item_with_zero_parameters
18//! // fn foo() {}
19//! ```
20//!
21//! After adding a new inline-test, run `cargo collect-tests` to extract
22//! it as a standalone text-fixture into `tests/data/parser/inline`, and
23//! run `cargo test` once to create the "gold" value.
24use parser::parser::Parser;
25use parser::token_set::TokenSet;
2use SyntaxKind; 26use SyntaxKind;
3use syntax_kinds::*; 27use syntax_kinds::*;
4 28
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index c23ed3349..3814837e1 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -3,19 +3,20 @@ use {File, SyntaxKind, Token};
3use syntax_kinds::*; 3use syntax_kinds::*;
4 4
5#[macro_use] 5#[macro_use]
6mod token_set;
6mod parser; 7mod parser;
7mod input; 8mod input;
8mod event; 9mod event;
9mod grammar; 10mod grammar;
10use self::event::Event;
11 11
12/// Parse a sequence of tokens into the representative node tree 12/// Parse a sequence of tokens into the representative node tree
13pub fn parse(text: String, tokens: &[Token]) -> File { 13pub fn parse(text: String, tokens: &[Token]) -> File {
14 let events = { 14 let events = {
15 let input = input::ParserInput::new(&text, tokens); 15 let input = input::ParserInput::new(&text, tokens);
16 let mut parser = parser::Parser::new(&input); 16 let parser_impl = parser::imp::ParserImpl::new(&input);
17 let mut parser = parser::Parser(parser_impl);
17 grammar::file(&mut parser); 18 grammar::file(&mut parser);
18 parser.into_events() 19 parser.0.into_events()
19 }; 20 };
20 event::to_file(text, tokens, events) 21 event::to_file(text, tokens, events)
21} 22}
@@ -26,33 +27,3 @@ fn is_insignificant(kind: SyntaxKind) -> bool {
26 _ => false, 27 _ => false,
27 } 28 }
28} 29}
29
30impl<'p> parser::Parser<'p> {
31 fn at(&self, kind: SyntaxKind) -> bool {
32 self.current() == kind
33 }
34
35 fn err_and_bump(&mut self, message: &str) {
36 let err = self.start();
37 self.error(message);
38 self.bump();
39 err.complete(self, ERROR);
40 }
41
42 fn expect(&mut self, kind: SyntaxKind) -> bool {
43 if self.at(kind) {
44 self.bump();
45 true
46 } else {
47 self.error(format!("expected {:?}", kind));
48 false
49 }
50 }
51
52 fn eat(&mut self, kind: SyntaxKind) -> bool {
53 self.at(kind) && {
54 self.bump();
55 true
56 }
57 }
58}
diff --git a/src/parser/parser.rs b/src/parser/parser.rs
deleted file mode 100644
index 7c8e47cb6..000000000
--- a/src/parser/parser.rs
+++ /dev/null
@@ -1,193 +0,0 @@
1use super::Event;
2use super::input::{InputPosition, ParserInput};
3use SyntaxKind::{self, EOF, TOMBSTONE};
4
5pub(crate) struct Marker {
6 pos: u32,
7}
8
9impl Marker {
10 pub fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompleteMarker {
11 match self.event(p) {
12 &mut Event::Start {
13 kind: ref mut slot, ..
14 } => {
15 *slot = kind;
16 }
17 _ => unreachable!(),
18 }
19 p.event(Event::Finish);
20 let result = CompleteMarker { pos: self.pos };
21 ::std::mem::forget(self);
22 result
23 }
24
25 pub fn abandon(self, p: &mut Parser) {
26 let idx = self.pos as usize;
27 if idx == p.events.len() - 1 {
28 match p.events.pop() {
29 Some(Event::Start {
30 kind: TOMBSTONE,
31 forward_parent: None,
32 }) => (),
33 _ => unreachable!(),
34 }
35 }
36 ::std::mem::forget(self);
37 }
38
39 fn event<'p>(&self, p: &'p mut Parser) -> &'p mut Event {
40 &mut p.events[self.idx()]
41 }
42
43 fn idx(&self) -> usize {
44 self.pos as usize
45 }
46}
47
48impl Drop for Marker {
49 fn drop(&mut self) {
50 if !::std::thread::panicking() {
51 panic!("Each marker should be eithe completed or abandoned");
52 }
53 }
54}
55
56pub(crate) struct CompleteMarker {
57 pos: u32,
58}
59
60impl CompleteMarker {
61 pub(crate) fn precede(self, p: &mut Parser) -> Marker {
62 let m = p.start();
63 match p.events[self.pos as usize] {
64 Event::Start {
65 ref mut forward_parent,
66 ..
67 } => {
68 *forward_parent = Some(m.pos - self.pos);
69 }
70 _ => unreachable!(),
71 }
72 m
73 }
74}
75
76pub(crate) struct TokenSet {
77 pub tokens: &'static [SyntaxKind],
78}
79
80impl TokenSet {
81 pub fn contains(&self, kind: SyntaxKind) -> bool {
82 self.tokens.contains(&kind)
83 }
84}
85
86#[macro_export]
87macro_rules! token_set {
88 ($($t:ident),*) => {
89 TokenSet {
90 tokens: &[$($t),*],
91 }
92 };
93
94 ($($t:ident),* ,) => {
95 token_set!($($t),*)
96 };
97}
98
99pub(crate) struct Parser<'t> {
100 inp: &'t ParserInput<'t>,
101
102 pos: InputPosition,
103 events: Vec<Event>,
104}
105
106impl<'t> Parser<'t> {
107 pub(crate) fn new(inp: &'t ParserInput<'t>) -> Parser<'t> {
108 Parser {
109 inp,
110
111 pos: InputPosition::new(),
112 events: Vec::new(),
113 }
114 }
115
116 pub(crate) fn into_events(self) -> Vec<Event> {
117 assert_eq!(self.current(), EOF);
118 self.events
119 }
120
121 pub(crate) fn start(&mut self) -> Marker {
122 let m = Marker {
123 pos: self.events.len() as u32,
124 };
125 self.event(Event::Start {
126 kind: TOMBSTONE,
127 forward_parent: None,
128 });
129 m
130 }
131
132 pub(crate) fn error<'p, T: Into<String>>(&'p mut self, msg: T) -> ErrorBuilder<'p, 't> {
133 ErrorBuilder::new(self, msg.into())
134 }
135
136 pub(crate) fn bump(&mut self) {
137 let kind = self.current();
138 if kind == EOF {
139 return;
140 }
141 self.pos += 1;
142 self.event(Event::Token {
143 kind,
144 n_raw_tokens: 1,
145 });
146 }
147
148 pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
149 if self.current() == EOF {
150 // TODO: panic!?
151 return;
152 }
153 self.pos += 1;
154 self.event(Event::Token {
155 kind,
156 n_raw_tokens: 1,
157 });
158 }
159
160 pub(crate) fn nth(&self, n: u32) -> SyntaxKind {
161 self.inp.kind(self.pos + n)
162 }
163
164 pub(crate) fn at_kw(&self, t: &str) -> bool {
165 self.inp.text(self.pos) == t
166 }
167
168 pub(crate) fn current(&self) -> SyntaxKind {
169 self.nth(0)
170 }
171
172 fn event(&mut self, event: Event) {
173 self.events.push(event)
174 }
175}
176
177pub(crate) struct ErrorBuilder<'p, 't: 'p> {
178 message: String,
179 parser: &'p mut Parser<'t>,
180}
181
182impl<'p, 't: 'p> Drop for ErrorBuilder<'p, 't> {
183 fn drop(&mut self) {
184 let message = ::std::mem::replace(&mut self.message, String::new());
185 self.parser.event(Event::Error { message });
186 }
187}
188
189impl<'t, 'p> ErrorBuilder<'p, 't> {
190 fn new(parser: &'p mut Parser<'t>, message: String) -> Self {
191 ErrorBuilder { message, parser }
192 }
193}
diff --git a/src/parser/parser/imp.rs b/src/parser/parser/imp.rs
new file mode 100644
index 000000000..f2641c388
--- /dev/null
+++ b/src/parser/parser/imp.rs
@@ -0,0 +1,119 @@
1use parser::input::{InputPosition, ParserInput};
2use parser::event::Event;
3
4use SyntaxKind;
5use syntax_kinds::{EOF, TOMBSTONE};
6
7/// Implementation details of `Parser`, extracted
8/// to a separate struct in order not to pollute
9/// the public API of the `Parser`.
10pub(crate) struct ParserImpl<'t> {
11 inp: &'t ParserInput<'t>,
12
13 pos: InputPosition,
14 events: Vec<Event>,
15}
16
17impl<'t> ParserImpl<'t> {
18 pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> {
19 ParserImpl {
20 inp,
21
22 pos: InputPosition::new(),
23 events: Vec::new(),
24 }
25 }
26
27 pub(crate) fn into_events(self) -> Vec<Event> {
28 assert_eq!(self.nth(0), EOF);
29 self.events
30 }
31
32 pub(super) fn nth(&self, n: u32) -> SyntaxKind {
33 self.inp.kind(self.pos + n)
34 }
35
36 pub(super) fn at_kw(&self, t: &str) -> bool {
37 self.inp.text(self.pos) == t
38 }
39
40 pub(super) fn start(&mut self) -> u32 {
41 let pos = self.events.len() as u32;
42 self.event(Event::Start {
43 kind: TOMBSTONE,
44 forward_parent: None,
45 });
46 pos
47 }
48
49 pub(super) fn bump(&mut self) {
50 let kind = self.nth(0);
51 if kind == EOF {
52 return;
53 }
54 self.do_bump(kind);
55 }
56
57 pub(super) fn bump_remap(&mut self, kind: SyntaxKind) {
58 if self.nth(0) == EOF {
59 // TODO: panic!?
60 return;
61 }
62 self.do_bump(kind);
63 }
64
65 fn do_bump(&mut self, kind: SyntaxKind) {
66 self.pos += 1;
67 self.event(Event::Token {
68 kind,
69 n_raw_tokens: 1,
70 });
71 }
72
73 pub(super) fn error(&mut self, msg: String) {
74 self.event(Event::Error { msg })
75 }
76
77 pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) {
78 match self.events[pos as usize] {
79 Event::Start {
80 kind: ref mut slot, ..
81 } => {
82 *slot = kind;
83 }
84 _ => unreachable!(),
85 }
86 self.event(Event::Finish);
87 }
88
89 pub(super) fn abandon(&mut self, pos: u32) {
90 let idx = pos as usize;
91 if idx == self.events.len() - 1 {
92 match self.events.pop() {
93 Some(Event::Start {
94 kind: TOMBSTONE,
95 forward_parent: None,
96 }) => (),
97 _ => unreachable!(),
98 }
99 }
100 }
101
102 pub(super) fn precede(&mut self, pos: u32) -> u32 {
103 let new_pos = self.start();
104 match self.events[pos as usize] {
105 Event::Start {
106 ref mut forward_parent,
107 ..
108 } => {
109 *forward_parent = Some(new_pos - pos);
110 }
111 _ => unreachable!(),
112 }
113 new_pos
114 }
115
116 fn event(&mut self, event: Event) {
117 self.events.push(event)
118 }
119}
diff --git a/src/parser/parser/mod.rs b/src/parser/parser/mod.rs
new file mode 100644
index 000000000..58f820738
--- /dev/null
+++ b/src/parser/parser/mod.rs
@@ -0,0 +1,142 @@
1use SyntaxKind;
2use syntax_kinds::ERROR;
3
4pub(super) mod imp;
5use self::imp::ParserImpl;
6
7/// `Parser` struct provides the low-level API for
8/// navigating through the stream of tokens and
9/// constructing the parse tree. The actual parsing
10/// happens in the `grammar` module.
11///
12/// However, the result of this `Parser` is not a real
13/// tree, but rather a flat stream of events of the form
14/// "start expression, consume number literal,
15/// finish expression". See `Event` docs for more.
16pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>);
17
18impl<'t> Parser<'t> {
19 /// Returns the kind of the current token.
20 /// If parser has already reached the end of input,
21 /// the special `EOF` kind is returned.
22 pub(crate) fn current(&self) -> SyntaxKind {
23 self.nth(0)
24 }
25
26 /// Lookahead operation: returns the kind of the next nth
27 /// token.
28 pub(crate) fn nth(&self, n: u32) -> SyntaxKind {
29 self.0.nth(n)
30 }
31
32 /// Checks if the current token is `kind`.
33 pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
34 self.current() == kind
35 }
36
37 /// Checks if the current token is contextual keyword with text `t`.
38 pub(crate) fn at_contextual_kw(&self, t: &str) -> bool {
39 self.0.at_kw(t)
40 }
41
42 /// Starts a new node in the syntax tree. All nodes and tokens
43 /// consumed between the `start` and the corresponding `Marker::complete`
44 /// belong to the same node.
45 pub(crate) fn start(&mut self) -> Marker {
46 Marker(self.0.start())
47 }
48
49 /// Advances the parser by one token.
50 pub(crate) fn bump(&mut self) {
51 self.0.bump();
52 }
53
54 /// Advances the parser by one token, remapping its kind.
55 /// This is useful to create contextual keywords from
56 /// identifiers. For example, the lexer creates an `union`
57 /// *identifier* token, but the parser remaps it to the
58 /// `union` keyword, and keyword is what ends up in the
59 /// final tree.
60 pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
61 self.0.bump_remap(kind);
62 }
63
64 /// Emit error with the `message`
65 /// TODO: this should be much more fancy and support
66 /// structured errors with spans and notes, like rustc
67 /// does.
68 pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
69 self.0.error(message.into())
70 }
71
72 /// Consume the next token if it is `kind`.
73 pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
74 if !self.at(kind) {
75 return false;
76 }
77 self.bump();
78 true
79 }
80
81 /// Consume the next token if it is `kind` or emit an error
82 /// otherwise.
83 pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
84 if self.eat(kind) {
85 return true;
86 }
87 self.error(format!("expected {:?}", kind));
88 false
89 }
90
91 /// Create an error node and consume the next token.
92 pub(crate) fn err_and_bump(&mut self, message: &str) {
93 let m = self.start();
94 self.error(message);
95 self.bump();
96 m.complete(self, ERROR);
97 }
98}
99
100/// See `Parser::start`.
101pub(crate) struct Marker(u32);
102
103impl Marker {
104 /// Finishes the syntax tree node and assigns `kind` to it.
105 pub(crate) fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
106 let pos = self.0;
107 ::std::mem::forget(self);
108 p.0.complete(pos, kind);
109 CompletedMarker(pos)
110 }
111
112 /// Abandons the syntax tree node. All its children
113 /// are attached to its parent instead.
114 pub(crate) fn abandon(self, p: &mut Parser) {
115 let pos = self.0;
116 ::std::mem::forget(self);
117 p.0.abandon(pos);
118 }
119}
120
121impl Drop for Marker {
122 fn drop(&mut self) {
123 if !::std::thread::panicking() {
124 panic!("Marker must be either completed or abandoned");
125 }
126 }
127}
128
129pub(crate) struct CompletedMarker(u32);
130
131impl CompletedMarker {
132 /// This one is tricky :-)
133 /// This method allows to create a new node which starts
134 /// *before* the current one. That is, parser could start
135 /// node `A`, then complete it, and then after parsing the
136 /// whole `A`, decide that it should have started some node
137 /// `B` before starting `A`. `precede` allows to do exactly
138 /// that. See also docs about `forward_parent` in `Event::Start`.
139 pub(crate) fn precede(self, p: &mut Parser) -> Marker {
140 Marker(p.0.precede(self.0))
141 }
142}
diff --git a/src/parser/token_set.rs b/src/parser/token_set.rs
new file mode 100644
index 000000000..a800f200d
--- /dev/null
+++ b/src/parser/token_set.rs
@@ -0,0 +1,24 @@
1use SyntaxKind;
2
3pub(crate) struct TokenSet {
4 pub tokens: &'static [SyntaxKind],
5}
6
7impl TokenSet {
8 pub fn contains(&self, kind: SyntaxKind) -> bool {
9 self.tokens.contains(&kind)
10 }
11}
12
13#[macro_export]
14macro_rules! token_set {
15 ($($t:ident),*) => {
16 TokenSet {
17 tokens: &[$($t),*],
18 }
19 };
20
21 ($($t:ident),* ,) => {
22 token_set!($($t),*)
23 };
24}
diff --git a/src/tree/file_builder.rs b/src/tree/file_builder.rs
index 3c7e2d7cf..f831676c7 100644
--- a/src/tree/file_builder.rs
+++ b/src/tree/file_builder.rs
@@ -70,7 +70,7 @@ impl Sink for FileBuilder {
70 let &(node, after_child) = self.in_progress.last().unwrap(); 70 let &(node, after_child) = self.in_progress.last().unwrap();
71 self.errors.push(SyntaxErrorData { 71 self.errors.push(SyntaxErrorData {
72 node, 72 node,
73 message: err.message, 73 message: err.msg,
74 after_child, 74 after_child,
75 }) 75 })
76 } 76 }
@@ -157,5 +157,5 @@ fn grow(left: &mut TextRange, right: TextRange) {
157 157
158#[derive(Default)] 158#[derive(Default)]
159pub(crate) struct ErrorMsg { 159pub(crate) struct ErrorMsg {
160 pub(crate) message: String, 160 pub(crate) msg: String,
161} 161}