From 59087840f515c809498f09ec535e59054a893525 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 11 Feb 2018 17:58:22 +0300 Subject: Document how the parsing works --- src/parser/event.rs | 2 +- src/parser/grammar/mod.rs | 25 +++++++++++++++++- src/parser/parser/imp.rs | 3 +++ src/parser/parser/mod.rs | 65 +++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 82 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/parser/event.rs b/src/parser/event.rs index 30fe5c6d7..4af16d783 100644 --- a/src/parser/event.rs +++ b/src/parser/event.rs @@ -42,7 +42,7 @@ pub(crate) enum Event { /// | /// 'foo' /// - /// See also `CompleteMarker::precede`. + /// See also `CompletedMarker::precede`. Start { kind: SyntaxKind, forward_parent: Option, diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs index e29cf9b02..ee0263203 100644 --- a/src/parser/grammar/mod.rs +++ b/src/parser/grammar/mod.rs @@ -1,4 +1,27 @@ -use parser::parser::{Parser}; +//! This is the actual "grammar" of the Rust language. +//! +//! Each function in this module and its children corresponds +//! to a production of the format grammar. Submodules roughly +//! correspond to different *areas* of the grammar. By convention, +//! each submodule starts with `use super::*` import and exports +//! "public" productions via `pub(super)`. +//! +//! See docs for `Parser` to learn about API, available to the grammar, +//! and see docs for `Event` to learn how this actually manages to +//! produce parse trees. +//! +//! Code in this module also contains inline tests, which start with +//! `// test name-of-the-test` comment and look like this: +//! +//! ``` +//! // test fn_item_with_zero_parameters +//! // fn foo() {} +//! ``` +//! +//! After adding a new inline-test, run `cargo collect-tests` to extract +//! it as a standalone text-fixture into `tests/data/parser/inline`, and +//! run `cargo test` once to create the "gold" value. +use parser::parser::Parser; use parser::token_set::TokenSet; use SyntaxKind; use syntax_kinds::*; diff --git a/src/parser/parser/imp.rs b/src/parser/parser/imp.rs index 2b16e11b9..03c044091 100644 --- a/src/parser/parser/imp.rs +++ b/src/parser/parser/imp.rs @@ -4,6 +4,9 @@ use parser::event::Event; use SyntaxKind; use syntax_kinds::{TOMBSTONE, EOF}; +/// Implementation details of `Parser`, extracted +/// to a separate struct in order not to pollute +/// the public API of the `Parser`. pub(crate) struct ParserImpl<'t> { inp: &'t ParserInput<'t>, diff --git a/src/parser/parser/mod.rs b/src/parser/parser/mod.rs index c8db20918..618b439be 100644 --- a/src/parser/parser/mod.rs +++ b/src/parser/parser/mod.rs @@ -4,51 +4,72 @@ use syntax_kinds::ERROR; pub(super) mod imp; use self::imp::ParserImpl; +/// `Parser` struct provides the low-level API for +/// navigating through the stream of tokens and +/// constructing the parse tree. The actual parsing +/// happens in the `grammar` module. +/// +/// However, the result of this `Parser` is not a real +/// tree, but rather a flat stream of events of the form +/// "start expression, consume number literal, +/// finish expression". See `Event` docs for more. pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); - impl<'t> Parser<'t> { + /// Returns the kind of the current token. + /// If parser has already reached the end of input, + /// the special `EOF` kind is returned. pub(crate) fn current(&self) -> SyntaxKind { self.nth(0) } + /// Lookahead operation: returns the kind of the next nth + /// token. pub(crate) fn nth(&self, n: u32) -> SyntaxKind { self.0.nth(n) } + /// Checks if the current token is `kind`. pub(crate) fn at(&self, kind: SyntaxKind) -> bool { self.current() == kind } - pub(crate) fn at_kw(&self, t: &str) -> bool { + /// Checks if the current token is contextual keyword with text `t`. + pub(crate) fn at_contextual_kw(&self, t: &str) -> bool { self.0.at_kw(t) } + /// Starts a new node in the syntax tree. All nodes and tokens + /// consumed between the `start` and the corresponding `Marker::complete` + /// belong to the same node. pub(crate) fn start(&mut self) -> Marker { Marker(self.0.start()) } + /// Advances the parser by one token. pub(crate) fn bump(&mut self) { self.0.bump(); } + /// Advances the parser by one token, remapping its kind. + /// This is useful to create contextual keywords from + /// identifiers. For example, the lexer creates an `union` + /// *identifier* token, but the parser remaps it to the + /// `union` keyword, and keyword is what ends up in the + /// final tree. pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { self.0.bump_remap(kind); } + /// Emit error with the `message` + /// TODO: this should be much more fancy and support + /// structured errors with spans and notes, like rustc + /// does. pub(crate) fn error>(&mut self, message: T) { self.0.error(message.into()) } - pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { - if self.at(kind) { - self.bump(); - return true; - } - self.error(format!("expected {:?}", kind)); - false - } - + /// Consume the next token if it is `kind`. pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { if !self.at(kind) { return false; @@ -57,6 +78,17 @@ impl<'t> Parser<'t> { true } + /// Consume the next token if it is `kind` or emit an error + /// otherwise. + pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { + if self.eat(kind) { + return true; + } + self.error(format!("expected {:?}", kind)); + false + } + + /// Create an error node and consume the next token. pub(crate) fn err_and_bump(&mut self, message: &str) { let m = self.start(); self.error(message); @@ -65,9 +97,11 @@ impl<'t> Parser<'t> { } } +/// See `Parser::start`. pub(crate) struct Marker(u32); impl Marker { + /// Finishes the syntax tree node and assigns `kind` to it. pub(crate) fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { let pos = self.0; ::std::mem::forget(self); @@ -75,6 +109,8 @@ impl Marker { CompletedMarker(pos) } + /// Abandons the syntax tree node. All its children + /// are attached to its parent instead. pub(crate) fn abandon(self, p: &mut Parser) { let pos = self.0; ::std::mem::forget(self); @@ -94,6 +130,13 @@ impl Drop for Marker { pub(crate) struct CompletedMarker(u32); impl CompletedMarker { + /// This one is tricky :-) + /// This method allows to create a new node which starts + /// *before* the current one. That is, parser could start + /// node `A`, then complete it, and then after parsing the + /// whole `A`, decide that it should have started some node + /// `B` before starting `A`. `precede` allows to do exactly + /// that. See also docs about `forward_parent` in `Event::Start`. pub(crate) fn precede(self, p: &mut Parser) -> Marker { Marker(p.0.precede(self.0)) } -- cgit v1.2.3