aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2018-02-11 14:58:22 +0000
committerAleksey Kladov <[email protected]>2018-02-11 14:58:22 +0000
commit59087840f515c809498f09ec535e59054a893525 (patch)
tree282e1f9606dbeeba6c19a2dcd6ff94da420d155a /src
parent9e2c0564783aa91f6440e7cadcc1a4dfda785de0 (diff)
Document how the parsing works
Diffstat (limited to 'src')
-rw-r--r--src/parser/event.rs2
-rw-r--r--src/parser/grammar/mod.rs25
-rw-r--r--src/parser/parser/imp.rs3
-rw-r--r--src/parser/parser/mod.rs65
4 files changed, 82 insertions, 13 deletions
diff --git a/src/parser/event.rs b/src/parser/event.rs
index 30fe5c6d7..4af16d783 100644
--- a/src/parser/event.rs
+++ b/src/parser/event.rs
@@ -42,7 +42,7 @@ pub(crate) enum Event {
42 /// | 42 /// |
43 /// 'foo' 43 /// 'foo'
44 /// 44 ///
45 /// See also `CompleteMarker::precede`. 45 /// See also `CompletedMarker::precede`.
46 Start { 46 Start {
47 kind: SyntaxKind, 47 kind: SyntaxKind,
48 forward_parent: Option<u32>, 48 forward_parent: Option<u32>,
diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs
index e29cf9b02..ee0263203 100644
--- a/src/parser/grammar/mod.rs
+++ b/src/parser/grammar/mod.rs
@@ -1,4 +1,27 @@
1use parser::parser::{Parser}; 1//! This is the actual "grammar" of the Rust language.
2//!
3//! Each function in this module and its children corresponds
4//! to a production of the format grammar. Submodules roughly
5//! correspond to different *areas* of the grammar. By convention,
6//! each submodule starts with `use super::*` import and exports
7//! "public" productions via `pub(super)`.
8//!
9//! See docs for `Parser` to learn about API, available to the grammar,
10//! and see docs for `Event` to learn how this actually manages to
11//! produce parse trees.
12//!
13//! Code in this module also contains inline tests, which start with
14//! `// test name-of-the-test` comment and look like this:
15//!
16//! ```
17//! // test fn_item_with_zero_parameters
18//! // fn foo() {}
19//! ```
20//!
21//! After adding a new inline-test, run `cargo collect-tests` to extract
22//! it as a standalone text-fixture into `tests/data/parser/inline`, and
23//! run `cargo test` once to create the "gold" value.
24use parser::parser::Parser;
2use parser::token_set::TokenSet; 25use parser::token_set::TokenSet;
3use SyntaxKind; 26use SyntaxKind;
4use syntax_kinds::*; 27use syntax_kinds::*;
diff --git a/src/parser/parser/imp.rs b/src/parser/parser/imp.rs
index 2b16e11b9..03c044091 100644
--- a/src/parser/parser/imp.rs
+++ b/src/parser/parser/imp.rs
@@ -4,6 +4,9 @@ use parser::event::Event;
4use SyntaxKind; 4use SyntaxKind;
5use syntax_kinds::{TOMBSTONE, EOF}; 5use syntax_kinds::{TOMBSTONE, EOF};
6 6
7/// Implementation details of `Parser`, extracted
8/// to a separate struct in order not to pollute
9/// the public API of the `Parser`.
7pub(crate) struct ParserImpl<'t> { 10pub(crate) struct ParserImpl<'t> {
8 inp: &'t ParserInput<'t>, 11 inp: &'t ParserInput<'t>,
9 12
diff --git a/src/parser/parser/mod.rs b/src/parser/parser/mod.rs
index c8db20918..618b439be 100644
--- a/src/parser/parser/mod.rs
+++ b/src/parser/parser/mod.rs
@@ -4,51 +4,72 @@ use syntax_kinds::ERROR;
4pub(super) mod imp; 4pub(super) mod imp;
5use self::imp::ParserImpl; 5use self::imp::ParserImpl;
6 6
7/// `Parser` struct provides the low-level API for
8/// navigating through the stream of tokens and
9/// constructing the parse tree. The actual parsing
10/// happens in the `grammar` module.
11///
12/// However, the result of this `Parser` is not a real
13/// tree, but rather a flat stream of events of the form
14/// "start expression, consume number literal,
15/// finish expression". See `Event` docs for more.
7pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); 16pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>);
8 17
9
10impl<'t> Parser<'t> { 18impl<'t> Parser<'t> {
19 /// Returns the kind of the current token.
20 /// If parser has already reached the end of input,
21 /// the special `EOF` kind is returned.
11 pub(crate) fn current(&self) -> SyntaxKind { 22 pub(crate) fn current(&self) -> SyntaxKind {
12 self.nth(0) 23 self.nth(0)
13 } 24 }
14 25
26 /// Lookahead operation: returns the kind of the next nth
27 /// token.
15 pub(crate) fn nth(&self, n: u32) -> SyntaxKind { 28 pub(crate) fn nth(&self, n: u32) -> SyntaxKind {
16 self.0.nth(n) 29 self.0.nth(n)
17 } 30 }
18 31
32 /// Checks if the current token is `kind`.
19 pub(crate) fn at(&self, kind: SyntaxKind) -> bool { 33 pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
20 self.current() == kind 34 self.current() == kind
21 } 35 }
22 36
23 pub(crate) fn at_kw(&self, t: &str) -> bool { 37 /// Checks if the current token is contextual keyword with text `t`.
38 pub(crate) fn at_contextual_kw(&self, t: &str) -> bool {
24 self.0.at_kw(t) 39 self.0.at_kw(t)
25 } 40 }
26 41
42 /// Starts a new node in the syntax tree. All nodes and tokens
43 /// consumed between the `start` and the corresponding `Marker::complete`
44 /// belong to the same node.
27 pub(crate) fn start(&mut self) -> Marker { 45 pub(crate) fn start(&mut self) -> Marker {
28 Marker(self.0.start()) 46 Marker(self.0.start())
29 } 47 }
30 48
49 /// Advances the parser by one token.
31 pub(crate) fn bump(&mut self) { 50 pub(crate) fn bump(&mut self) {
32 self.0.bump(); 51 self.0.bump();
33 } 52 }
34 53
54 /// Advances the parser by one token, remapping its kind.
55 /// This is useful to create contextual keywords from
56 /// identifiers. For example, the lexer creates an `union`
57 /// *identifier* token, but the parser remaps it to the
58 /// `union` keyword, and keyword is what ends up in the
59 /// final tree.
35 pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { 60 pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
36 self.0.bump_remap(kind); 61 self.0.bump_remap(kind);
37 } 62 }
38 63
64 /// Emit error with the `message`
65 /// TODO: this should be much more fancy and support
66 /// structured errors with spans and notes, like rustc
67 /// does.
39 pub(crate) fn error<T: Into<String>>(&mut self, message: T) { 68 pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
40 self.0.error(message.into()) 69 self.0.error(message.into())
41 } 70 }
42 71
43 pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { 72 /// Consume the next token if it is `kind`.
44 if self.at(kind) {
45 self.bump();
46 return true;
47 }
48 self.error(format!("expected {:?}", kind));
49 false
50 }
51
52 pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { 73 pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
53 if !self.at(kind) { 74 if !self.at(kind) {
54 return false; 75 return false;
@@ -57,6 +78,17 @@ impl<'t> Parser<'t> {
57 true 78 true
58 } 79 }
59 80
81 /// Consume the next token if it is `kind` or emit an error
82 /// otherwise.
83 pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
84 if self.eat(kind) {
85 return true;
86 }
87 self.error(format!("expected {:?}", kind));
88 false
89 }
90
91 /// Create an error node and consume the next token.
60 pub(crate) fn err_and_bump(&mut self, message: &str) { 92 pub(crate) fn err_and_bump(&mut self, message: &str) {
61 let m = self.start(); 93 let m = self.start();
62 self.error(message); 94 self.error(message);
@@ -65,9 +97,11 @@ impl<'t> Parser<'t> {
65 } 97 }
66} 98}
67 99
100/// See `Parser::start`.
68pub(crate) struct Marker(u32); 101pub(crate) struct Marker(u32);
69 102
70impl Marker { 103impl Marker {
104 /// Finishes the syntax tree node and assigns `kind` to it.
71 pub(crate) fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { 105 pub(crate) fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
72 let pos = self.0; 106 let pos = self.0;
73 ::std::mem::forget(self); 107 ::std::mem::forget(self);
@@ -75,6 +109,8 @@ impl Marker {
75 CompletedMarker(pos) 109 CompletedMarker(pos)
76 } 110 }
77 111
112 /// Abandons the syntax tree node. All its children
113 /// are attached to its parent instead.
78 pub(crate) fn abandon(self, p: &mut Parser) { 114 pub(crate) fn abandon(self, p: &mut Parser) {
79 let pos = self.0; 115 let pos = self.0;
80 ::std::mem::forget(self); 116 ::std::mem::forget(self);
@@ -94,6 +130,13 @@ impl Drop for Marker {
94pub(crate) struct CompletedMarker(u32); 130pub(crate) struct CompletedMarker(u32);
95 131
96impl CompletedMarker { 132impl CompletedMarker {
133 /// This one is tricky :-)
134 /// This method allows to create a new node which starts
135 /// *before* the current one. That is, parser could start
136 /// node `A`, then complete it, and then after parsing the
137 /// whole `A`, decide that it should have started some node
138 /// `B` before starting `A`. `precede` allows to do exactly
139 /// that. See also docs about `forward_parent` in `Event::Start`.
97 pub(crate) fn precede(self, p: &mut Parser) -> Marker { 140 pub(crate) fn precede(self, p: &mut Parser) -> Marker {
98 Marker(p.0.precede(self.0)) 141 Marker(p.0.precede(self.0))
99 } 142 }