Merge #50

50: Shiny new parser r=matklad a=matklad bors r+
author: bors[bot] <bors[bot]@users.noreply.github.com> 2018-02-11 14:59:58 +0000
committer: bors[bot] <bors[bot]@users.noreply.github.com> 2018-02-11 14:59:58 +0000
commit: a6f9b0414cf5bf49ad7f714b9d3fe5af91a16404 (patch)
tree: 2fc1e8ccc43bbee85a06026270d7c8de5959e323
parent: 7a0ada860b57acd44b1d53e944ae621e438652da (diff)
parent: f356628ad8392c6e3ffd72a9ac50a7be87d3d183 (diff)
10 files changed, 330 insertions, 248 deletions
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index a1fa246c2..6b4434396 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -33,19 +33,22 @@ The centerpiece of this whole endeavor is the syntax tree, in the
 The syntax tree is produced using a three-staged process. 
-First, a raw text is split into tokens with a lexer. Lexer has a
+First, a raw text is split into tokens with a lexer (the `lexer` module).
-peculiar signature: it is an `Fn(&str) -> Token`, where token is a
+Lexer has a peculiar signature: it is an `Fn(&str) -> Token`, where token 
-pair of `SyntaxKind` (you should have read the `tree` module and RFC
+is a pair of `SyntaxKind` (you should have read the `tree` module and RFC
 by this time! :)) and a len. That is, lexer chomps only the first
 token of the input. This forces the lexer to be stateless, and makes
 it possible to implement incremental relexing easily.
 Then, the bulk of work, the parser turns a stream of tokens into
-stream of events. Not that parser **does not** construct a tree right
+stream of events (the `parser` module; of particular interest are 
-away. This is done for several reasons:
+the `parser/event` and `parser/parser` modules, which contain parsing 
+API, and the `parser/grammar` module, which contains actual parsing code
+for various Rust syntactic constructs). Not that parser **does not** 
+construct a tree right away. This is done for several reasons:
 * to decouple the actual tree data structure from the parser: you can
-  build any datastructre you want from the stream of events
+  build any data structure you want from the stream of events
  
 * to make parsing fast: you can produce a list of events without
  allocations
@@ -77,12 +80,6 @@ And at last, the TreeBuilder converts a flat stream of events into a
 tree structure. It also *should* be responsible for attaching comments
 and rebalancing the tree, but it does not do this yet :) 
-## Error reporing
-TODO: describe how stuff like `skip_to_first` works
 ## Validator
 Parser and lexer accept a lot of *invalid* code intentionally. The
diff --git a/src/parser/event.rs b/src/parser/event.rs
index 90348398e..1c0905a38 100644
--- a/src/parser/event.rs
+++ b/src/parser/event.rs
@@ -42,7 +42,7 @@ pub(crate) enum Event {
    ///    |
    ///   'foo'
    ///
-    /// See also `CompleteMarker::precede`.
+    /// See also `CompletedMarker::precede`.
    Start {
        kind: SyntaxKind,
        forward_parent: Option<u32>,
@@ -61,7 +61,7 @@ pub(crate) enum Event {
    },
    Error {
-        message: String,
+        msg: String,
    },
 }
@@ -140,9 +140,7 @@ pub(super) fn to_file(text: String, tokens: &[Token], events: Vec<Event>) -> Fil
                }
                builder.leaf(kind, len);
            }
-            &Event::Error { ref message } => builder.error(ErrorMsg {
+            &Event::Error { ref msg } => builder.error(ErrorMsg { msg: msg.clone() }),
-                message: message.clone(),
-            }),
        }
    }
    builder.finish()
diff --git a/src/parser/grammar/items/mod.rs b/src/parser/grammar/items/mod.rs
index 18ee8af86..3af6d13a1 100644
--- a/src/parser/grammar/items/mod.rs
+++ b/src/parser/grammar/items/mod.rs
@@ -94,7 +94,7 @@ fn item(p: &mut Parser) {
                // test unsafe_auto_trait
                // unsafe auto trait T {}
-                IDENT if p.at_kw("auto") && la == TRAIT_KW => {
+                IDENT if p.at_contextual_kw("auto") && la == TRAIT_KW => {
                    p.bump_remap(AUTO_KW);
                    traits::trait_item(p);
                    TRAIT_ITEM
@@ -109,7 +109,7 @@ fn item(p: &mut Parser) {
                // test unsafe_default_impl
                // unsafe default impl Foo {}
-                IDENT if p.at_kw("default") && la == IMPL_KW => {
+                IDENT if p.at_contextual_kw("default") && la == IMPL_KW => {
                    p.bump_remap(DEFAULT_KW);
                    traits::impl_item(p);
                    IMPL_ITEM
diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs
index f5b63aaab..ee0263203 100644
--- a/src/parser/grammar/mod.rs
+++ b/src/parser/grammar/mod.rs
@@ -1,4 +1,28 @@
-use super::parser::{Parser, TokenSet};
+//! This is the actual "grammar" of the Rust language.
+//!
+//! Each function in this module and its children corresponds
+//! to a production of the format grammar. Submodules roughly
+//! correspond to different *areas* of the grammar. By convention,
+//! each submodule starts with `use super::*` import and exports
+//! "public" productions via `pub(super)`.
+//!
+//! See docs for `Parser` to learn about API, available to the grammar,
+//! and see docs for `Event` to learn how this actually manages to
+//! produce parse trees.
+//!
+//! Code in this module also contains inline tests, which start with
+//! `// test name-of-the-test` comment and look like this:
+//!
+//! ```
+//! // test fn_item_with_zero_parameters
+//! // fn foo() {}
+//! ```
+//!
+//! After adding a new inline-test, run `cargo collect-tests` to extract
+//! it as a standalone text-fixture into `tests/data/parser/inline`, and
+//! run `cargo test` once to create the "gold" value.
+use parser::parser::Parser;
+use parser::token_set::TokenSet;
 use SyntaxKind;
 use syntax_kinds::*;
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index c23ed3349..3814837e1 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -3,19 +3,20 @@ use {File, SyntaxKind, Token};
 use syntax_kinds::*;
 #[macro_use]
+mod token_set;
 mod parser;
 mod input;
 mod event;
 mod grammar;
-use self::event::Event;
 /// Parse a sequence of tokens into the representative node tree
 pub fn parse(text: String, tokens: &[Token]) -> File {
    let events = {
        let input = input::ParserInput::new(&text, tokens);
-        let mut parser = parser::Parser::new(&input);
+        let parser_impl = parser::imp::ParserImpl::new(&input);
+        let mut parser = parser::Parser(parser_impl);
        grammar::file(&mut parser);
-        parser.into_events()
+        parser.0.into_events()
    };
    event::to_file(text, tokens, events)
 }
@@ -26,33 +27,3 @@ fn is_insignificant(kind: SyntaxKind) -> bool {
        _ => false,
    }
 }
-impl<'p> parser::Parser<'p> {
-    fn at(&self, kind: SyntaxKind) -> bool {
-        self.current() == kind
-    }
-    fn err_and_bump(&mut self, message: &str) {
-        let err = self.start();
-        self.error(message);
-        self.bump();
-        err.complete(self, ERROR);
-    }
-    fn expect(&mut self, kind: SyntaxKind) -> bool {
-        if self.at(kind) {
-            self.bump();
-            true
-        } else {
-            self.error(format!("expected {:?}", kind));
-            false
-        }
-    }
-    fn eat(&mut self, kind: SyntaxKind) -> bool {
-        self.at(kind) && {
-            self.bump();
-            true
-        }
-    }
-}
diff --git a/src/parser/parser.rs b/src/parser/parser.rs
deleted file mode 100644
index 7c8e47cb6..000000000
--- a/src/parser/parser.rs
+++ /dev/null
@@ -1,193 +0,0 @@
-use super::Event;
-use super::input::{InputPosition, ParserInput};
-use SyntaxKind::{self, EOF, TOMBSTONE};
-pub(crate) struct Marker {
-    pos: u32,
-}
-impl Marker {
-    pub fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompleteMarker {
-        match self.event(p) {
-            &mut Event::Start {
-                kind: ref mut slot, ..
-            } => {
-                *slot = kind;
-            }
-            _ => unreachable!(),
-        }
-        p.event(Event::Finish);
-        let result = CompleteMarker { pos: self.pos };
-        ::std::mem::forget(self);
-        result
-    }
-    pub fn abandon(self, p: &mut Parser) {
-        let idx = self.pos as usize;
-        if idx == p.events.len() - 1 {
-            match p.events.pop() {
-                Some(Event::Start {
-                    kind: TOMBSTONE,
-                    forward_parent: None,
-                }) => (),
-                _ => unreachable!(),
-            }
-        }
-        ::std::mem::forget(self);
-    }
-    fn event<'p>(&self, p: &'p mut Parser) -> &'p mut Event {
-        &mut p.events[self.idx()]
-    }
-    fn idx(&self) -> usize {
-        self.pos as usize
-    }
-}
-impl Drop for Marker {
-    fn drop(&mut self) {
-        if !::std::thread::panicking() {
-            panic!("Each marker should be eithe completed or abandoned");
-        }
-    }
-}
-pub(crate) struct CompleteMarker {
-    pos: u32,
-}
-impl CompleteMarker {
-    pub(crate) fn precede(self, p: &mut Parser) -> Marker {
-        let m = p.start();
-        match p.events[self.pos as usize] {
-            Event::Start {
-                ref mut forward_parent,
-                ..
-            } => {
-                *forward_parent = Some(m.pos - self.pos);
-            }
-            _ => unreachable!(),
-        }
-        m
-    }
-}
-pub(crate) struct TokenSet {
-    pub tokens: &'static [SyntaxKind],
-}
-impl TokenSet {
-    pub fn contains(&self, kind: SyntaxKind) -> bool {
-        self.tokens.contains(&kind)
-    }
-}
-#[macro_export]
-macro_rules! token_set {
-    ($($t:ident),*) => {
-        TokenSet {
-            tokens: &[$($t),*],
-        }
-    };
-    ($($t:ident),* ,) => {
-        token_set!($($t),*)
-    };
-}
-pub(crate) struct Parser<'t> {
-    inp: &'t ParserInput<'t>,
-    pos: InputPosition,
-    events: Vec<Event>,
-}
-impl<'t> Parser<'t> {
-    pub(crate) fn new(inp: &'t ParserInput<'t>) -> Parser<'t> {
-        Parser {
-            inp,
-            pos: InputPosition::new(),
-            events: Vec::new(),
-        }
-    }
-    pub(crate) fn into_events(self) -> Vec<Event> {
-        assert_eq!(self.current(), EOF);
-        self.events
-    }
-    pub(crate) fn start(&mut self) -> Marker {
-        let m = Marker {
-            pos: self.events.len() as u32,
-        };
-        self.event(Event::Start {
-            kind: TOMBSTONE,
-            forward_parent: None,
-        });
-        m
-    }
-    pub(crate) fn error<'p, T: Into<String>>(&'p mut self, msg: T) -> ErrorBuilder<'p, 't> {
-        ErrorBuilder::new(self, msg.into())
-    }
-    pub(crate) fn bump(&mut self) {
-        let kind = self.current();
-        if kind == EOF {
-            return;
-        }
-        self.pos += 1;
-        self.event(Event::Token {
-            kind,
-            n_raw_tokens: 1,
-        });
-    }
-    pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
-        if self.current() == EOF {
-            // TODO: panic!?
-            return;
-        }
-        self.pos += 1;
-        self.event(Event::Token {
-            kind,
-            n_raw_tokens: 1,
-        });
-    }
-    pub(crate) fn nth(&self, n: u32) -> SyntaxKind {
-        self.inp.kind(self.pos + n)
-    }
-    pub(crate) fn at_kw(&self, t: &str) -> bool {
-        self.inp.text(self.pos) == t
-    }
-    pub(crate) fn current(&self) -> SyntaxKind {
-        self.nth(0)
-    }
-    fn event(&mut self, event: Event) {
-        self.events.push(event)
-    }
-}
-pub(crate) struct ErrorBuilder<'p, 't: 'p> {
-    message: String,
-    parser: &'p mut Parser<'t>,
-}
-impl<'p, 't: 'p> Drop for ErrorBuilder<'p, 't> {
-    fn drop(&mut self) {
-        let message = ::std::mem::replace(&mut self.message, String::new());
-        self.parser.event(Event::Error { message });
-    }
-}
-impl<'t, 'p> ErrorBuilder<'p, 't> {
-    fn new(parser: &'p mut Parser<'t>, message: String) -> Self {
-        ErrorBuilder { message, parser }
-    }
-}
diff --git a/src/parser/parser/imp.rs b/src/parser/parser/imp.rs
new file mode 100644
index 000000000..f2641c388
--- /dev/null
+++ b/src/parser/parser/imp.rs
@@ -0,0 +1,119 @@
+use parser::input::{InputPosition, ParserInput};
+use parser::event::Event;
+use SyntaxKind;
+use syntax_kinds::{EOF, TOMBSTONE};
+/// Implementation details of `Parser`, extracted
+/// to a separate struct in order not to pollute
+/// the public API of the `Parser`.
+pub(crate) struct ParserImpl<'t> {
+    inp: &'t ParserInput<'t>,
+    pos: InputPosition,
+    events: Vec<Event>,
+}
+impl<'t> ParserImpl<'t> {
+    pub(crate) fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> {
+        ParserImpl {
+            inp,
+            pos: InputPosition::new(),
+            events: Vec::new(),
+        }
+    }
+    pub(crate) fn into_events(self) -> Vec<Event> {
+        assert_eq!(self.nth(0), EOF);
+        self.events
+    }
+    pub(super) fn nth(&self, n: u32) -> SyntaxKind {
+        self.inp.kind(self.pos + n)
+    }
+    pub(super) fn at_kw(&self, t: &str) -> bool {
+        self.inp.text(self.pos) == t
+    }
+    pub(super) fn start(&mut self) -> u32 {
+        let pos = self.events.len() as u32;
+        self.event(Event::Start {
+            kind: TOMBSTONE,
+            forward_parent: None,
+        });
+        pos
+    }
+    pub(super) fn bump(&mut self) {
+        let kind = self.nth(0);
+        if kind == EOF {
+            return;
+        }
+        self.do_bump(kind);
+    }
+    pub(super) fn bump_remap(&mut self, kind: SyntaxKind) {
+        if self.nth(0) == EOF {
+            // TODO: panic!?
+            return;
+        }
+        self.do_bump(kind);
+    }
+    fn do_bump(&mut self, kind: SyntaxKind) {
+        self.pos += 1;
+        self.event(Event::Token {
+            kind,
+            n_raw_tokens: 1,
+        });
+    }
+    pub(super) fn error(&mut self, msg: String) {
+        self.event(Event::Error { msg })
+    }
+    pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) {
+        match self.events[pos as usize] {
+            Event::Start {
+                kind: ref mut slot, ..
+            } => {
+                *slot = kind;
+            }
+            _ => unreachable!(),
+        }
+        self.event(Event::Finish);
+    }
+    pub(super) fn abandon(&mut self, pos: u32) {
+        let idx = pos as usize;
+        if idx == self.events.len() - 1 {
+            match self.events.pop() {
+                Some(Event::Start {
+                    kind: TOMBSTONE,
+                    forward_parent: None,
+                }) => (),
+                _ => unreachable!(),
+            }
+        }
+    }
+    pub(super) fn precede(&mut self, pos: u32) -> u32 {
+        let new_pos = self.start();
+        match self.events[pos as usize] {
+            Event::Start {
+                ref mut forward_parent,
+                ..
+            } => {
+                *forward_parent = Some(new_pos - pos);
+            }
+            _ => unreachable!(),
+        }
+        new_pos
+    }
+    fn event(&mut self, event: Event) {
+        self.events.push(event)
+    }
+}
diff --git a/src/parser/parser/mod.rs b/src/parser/parser/mod.rs
new file mode 100644
index 000000000..58f820738
--- /dev/null
+++ b/src/parser/parser/mod.rs
@@ -0,0 +1,142 @@
+use SyntaxKind;
+use syntax_kinds::ERROR;
+pub(super) mod imp;
+use self::imp::ParserImpl;
+/// `Parser` struct provides the low-level API for
+/// navigating through the stream of tokens and
+/// constructing the parse tree. The actual parsing
+/// happens in the `grammar` module.
+///
+/// However, the result of this `Parser` is not a real
+/// tree, but rather a flat stream of events of the form
+/// "start expression, consume number literal,
+/// finish expression". See `Event` docs for more.
+pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>);
+impl<'t> Parser<'t> {
+    /// Returns the kind of the current token.
+    /// If parser has already reached the end of input,
+    /// the special `EOF` kind is returned.
+    pub(crate) fn current(&self) -> SyntaxKind {
+        self.nth(0)
+    }
+    /// Lookahead operation: returns the kind of the next nth
+    /// token.
+    pub(crate) fn nth(&self, n: u32) -> SyntaxKind {
+        self.0.nth(n)
+    }
+    /// Checks if the current token is `kind`.
+    pub(crate) fn at(&self, kind: SyntaxKind) -> bool {
+        self.current() == kind
+    }
+    /// Checks if the current token is contextual keyword with text `t`.
+    pub(crate) fn at_contextual_kw(&self, t: &str) -> bool {
+        self.0.at_kw(t)
+    }
+    /// Starts a new node in the syntax tree. All nodes and tokens
+    /// consumed between the `start` and the corresponding `Marker::complete`
+    /// belong to the same node.
+    pub(crate) fn start(&mut self) -> Marker {
+        Marker(self.0.start())
+    }
+    /// Advances the parser by one token.
+    pub(crate) fn bump(&mut self) {
+        self.0.bump();
+    }
+    /// Advances the parser by one token, remapping its kind.
+    /// This is useful to create contextual keywords from
+    /// identifiers. For example, the lexer creates an `union`
+    /// *identifier* token, but the parser remaps it to the
+    /// `union` keyword, and keyword is what ends up in the
+    /// final tree.
+    pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) {
+        self.0.bump_remap(kind);
+    }
+    /// Emit error with the `message`
+    /// TODO: this should be much more fancy and support
+    /// structured errors with spans and notes, like rustc
+    /// does.
+    pub(crate) fn error<T: Into<String>>(&mut self, message: T) {
+        self.0.error(message.into())
+    }
+    /// Consume the next token if it is `kind`.
+    pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool {
+        if !self.at(kind) {
+            return false;
+        }
+        self.bump();
+        true
+    }
+    /// Consume the next token if it is `kind` or emit an error
+    /// otherwise.
+    pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool {
+        if self.eat(kind) {
+            return true;
+        }
+        self.error(format!("expected {:?}", kind));
+        false
+    }
+    /// Create an error node and consume the next token.
+    pub(crate) fn err_and_bump(&mut self, message: &str) {
+        let m = self.start();
+        self.error(message);
+        self.bump();
+        m.complete(self, ERROR);
+    }
+}
+/// See `Parser::start`.
+pub(crate) struct Marker(u32);
+impl Marker {
+    /// Finishes the syntax tree node and assigns `kind` to it.
+    pub(crate) fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker {
+        let pos = self.0;
+        ::std::mem::forget(self);
+        p.0.complete(pos, kind);
+        CompletedMarker(pos)
+    }
+    /// Abandons the syntax tree node. All its children
+    /// are attached to its parent instead.
+    pub(crate) fn abandon(self, p: &mut Parser) {
+        let pos = self.0;
+        ::std::mem::forget(self);
+        p.0.abandon(pos);
+    }
+}
+impl Drop for Marker {
+    fn drop(&mut self) {
+        if !::std::thread::panicking() {
+            panic!("Marker must be either completed or abandoned");
+        }
+    }
+}
+pub(crate) struct CompletedMarker(u32);
+impl CompletedMarker {
+    /// This one is tricky :-)
+    /// This method allows to create a new node which starts
+    /// *before* the current one. That is, parser could start
+    /// node `A`, then complete it, and then after parsing the
+    /// whole `A`, decide that it should have started some node
+    /// `B` before starting `A`. `precede` allows to do exactly
+    /// that. See also docs about `forward_parent` in `Event::Start`.
+    pub(crate) fn precede(self, p: &mut Parser) -> Marker {
+        Marker(p.0.precede(self.0))
+    }
+}
diff --git a/src/parser/token_set.rs b/src/parser/token_set.rs
new file mode 100644
index 000000000..a800f200d
--- /dev/null
+++ b/src/parser/token_set.rs
@@ -0,0 +1,24 @@
+use SyntaxKind;
+pub(crate) struct TokenSet {
+    pub tokens: &'static [SyntaxKind],
+}
+impl TokenSet {
+    pub fn contains(&self, kind: SyntaxKind) -> bool {
+        self.tokens.contains(&kind)
+    }
+}
+#[macro_export]
+macro_rules! token_set {
+    ($($t:ident),*) => {
+        TokenSet {
+            tokens: &[$($t),*],
+        }
+    };
+    ($($t:ident),* ,) => {
+        token_set!($($t),*)
+    };
+}
diff --git a/src/tree/file_builder.rs b/src/tree/file_builder.rs
index 3c7e2d7cf..f831676c7 100644
--- a/src/tree/file_builder.rs
+++ b/src/tree/file_builder.rs
@@ -70,7 +70,7 @@ impl Sink for FileBuilder {
        let &(node, after_child) = self.in_progress.last().unwrap();
        self.errors.push(SyntaxErrorData {
            node,
-            message: err.message,
+            message: err.msg,
            after_child,
        })
    }
@@ -157,5 +157,5 @@ fn grow(left: &mut TextRange, right: TextRange) {
 #[derive(Default)]
 pub(crate) struct ErrorMsg {
-    pub(crate) message: String,
+    pub(crate) msg: String,
 }
author	bors[bot] <bors[bot]@users.noreply.github.com>	2018-02-11 14:59:58 +0000
committer	bors[bot] <bors[bot]@users.noreply.github.com>	2018-02-11 14:59:58 +0000
commit	a6f9b0414cf5bf49ad7f714b9d3fe5af91a16404 (patch)
tree	2fc1e8ccc43bbee85a06026270d7c8de5959e323
parent	7a0ada860b57acd44b1d53e944ae621e438652da (diff)
parent	f356628ad8392c6e3ffd72a9ac50a7be87d3d183 (diff)