From d94124e3b4a4380ad23c1a317fd3943664b7182a Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 4 Feb 2018 13:53:47 +0300 Subject: Somewhat more san directory structure --- src/parser/event.rs | 140 +++++++++++++++ src/parser/event_parser/grammar/attributes.rs | 79 --------- src/parser/event_parser/grammar/expressions.rs | 20 --- src/parser/event_parser/grammar/items/consts.rs | 21 --- src/parser/event_parser/grammar/items/mod.rs | 196 --------------------- src/parser/event_parser/grammar/items/structs.rs | 117 ------------- src/parser/event_parser/grammar/items/traits.rs | 17 -- src/parser/event_parser/grammar/items/use_item.rs | 66 ------- src/parser/event_parser/grammar/mod.rs | 130 -------------- src/parser/event_parser/grammar/paths.rs | 49 ------ src/parser/event_parser/grammar/type_params.rs | 75 -------- src/parser/event_parser/grammar/types.rs | 5 - src/parser/event_parser/mod.rs | 74 -------- src/parser/event_parser/parser.rs | 201 ---------------------- src/parser/grammar/attributes.rs | 79 +++++++++ src/parser/grammar/expressions.rs | 20 +++ src/parser/grammar/items/consts.rs | 21 +++ src/parser/grammar/items/mod.rs | 196 +++++++++++++++++++++ src/parser/grammar/items/structs.rs | 117 +++++++++++++ src/parser/grammar/items/traits.rs | 17 ++ src/parser/grammar/items/use_item.rs | 66 +++++++ src/parser/grammar/mod.rs | 130 ++++++++++++++ src/parser/grammar/paths.rs | 49 ++++++ src/parser/grammar/type_params.rs | 75 ++++++++ src/parser/grammar/types.rs | 5 + src/parser/mod.rs | 89 ++-------- src/parser/parser.rs | 201 ++++++++++++++++++++++ 27 files changed, 1128 insertions(+), 1127 deletions(-) create mode 100644 src/parser/event.rs delete mode 100644 src/parser/event_parser/grammar/attributes.rs delete mode 100644 src/parser/event_parser/grammar/expressions.rs delete mode 100644 src/parser/event_parser/grammar/items/consts.rs delete mode 100644 src/parser/event_parser/grammar/items/mod.rs delete mode 100644 src/parser/event_parser/grammar/items/structs.rs delete mode 100644 src/parser/event_parser/grammar/items/traits.rs delete mode 100644 src/parser/event_parser/grammar/items/use_item.rs delete mode 100644 src/parser/event_parser/grammar/mod.rs delete mode 100644 src/parser/event_parser/grammar/paths.rs delete mode 100644 src/parser/event_parser/grammar/type_params.rs delete mode 100644 src/parser/event_parser/grammar/types.rs delete mode 100644 src/parser/event_parser/mod.rs delete mode 100644 src/parser/event_parser/parser.rs create mode 100644 src/parser/grammar/attributes.rs create mode 100644 src/parser/grammar/expressions.rs create mode 100644 src/parser/grammar/items/consts.rs create mode 100644 src/parser/grammar/items/mod.rs create mode 100644 src/parser/grammar/items/structs.rs create mode 100644 src/parser/grammar/items/traits.rs create mode 100644 src/parser/grammar/items/use_item.rs create mode 100644 src/parser/grammar/mod.rs create mode 100644 src/parser/grammar/paths.rs create mode 100644 src/parser/grammar/type_params.rs create mode 100644 src/parser/grammar/types.rs create mode 100644 src/parser/parser.rs (limited to 'src') diff --git a/src/parser/event.rs b/src/parser/event.rs new file mode 100644 index 000000000..fd6bdc086 --- /dev/null +++ b/src/parser/event.rs @@ -0,0 +1,140 @@ +use {File, FileBuilder, Sink, SyntaxKind, Token}; +use syntax_kinds::TOMBSTONE; +use super::is_insignificant; + +/// `Parser` produces a flat list of `Event`s. +/// They are converted to a tree-structure in +/// a separate pass, via `TreeBuilder`. +#[derive(Debug)] +pub(crate) enum Event { + /// This event signifies the start of the node. + /// It should be either abandoned (in which case the + /// `kind` is `TOMBSTONE`, and the event is ignored), + /// or completed via a `Finish` event. + /// + /// All tokens between a `Start` and a `Finish` would + /// become the children of the respective node. + /// + /// For left-recursive syntactic constructs, the parser produces + /// a child node before it sees a parent. `forward_parent` + /// exists to allow to tweak parent-child relationships. + /// + /// Consider this path + /// + /// foo::bar + /// + /// The events for it would look like this: + /// + /// + /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH + /// | /\ + /// | | + /// +------forward-parent------+ + /// + /// And the tree would look like this + /// + /// +--PATH---------+ + /// | | | + /// | | | + /// | '::' 'bar' + /// | + /// PATH + /// | + /// 'foo' + /// + /// See also `CompleteMarker::precede`. + Start { + kind: SyntaxKind, + forward_parent: Option, + }, + + /// Complete the previous `Start` event + Finish, + + /// Produce a single leaf-element. + /// `n_raw_tokens` is used to glue complex contextual tokens. + /// For example, lexer tokenizes `>>` as `>`, `>`, and + /// `n_raw_tokens = 2` is used to produced a single `>>`. + Token { + kind: SyntaxKind, + n_raw_tokens: u8, + }, + + Error { + message: String, + }, +} + +pub(super) fn to_file(text: String, tokens: &[Token], events: Vec) -> File { + let mut builder = FileBuilder::new(text); + let mut idx = 0; + + let mut holes = Vec::new(); + let mut forward_parents = Vec::new(); + + for (i, event) in events.iter().enumerate() { + if holes.last() == Some(&i) { + holes.pop(); + continue; + } + + match event { + &Event::Start { + kind: TOMBSTONE, .. + } => (), + + &Event::Start { .. } => { + forward_parents.clear(); + let mut idx = i; + loop { + let (kind, fwd) = match events[idx] { + Event::Start { + kind, + forward_parent, + } => (kind, forward_parent), + _ => unreachable!(), + }; + forward_parents.push((idx, kind)); + if let Some(fwd) = fwd { + idx += fwd as usize; + } else { + break; + } + } + for &(idx, kind) in forward_parents.iter().into_iter().rev() { + builder.start_internal(kind); + holes.push(idx); + } + holes.pop(); + } + &Event::Finish => { + while idx < tokens.len() { + let token = tokens[idx]; + if is_insignificant(token.kind) { + idx += 1; + builder.leaf(token.kind, token.len); + } else { + break; + } + } + builder.finish_internal() + } + &Event::Token { + kind: _, + mut n_raw_tokens, + } => loop { + let token = tokens[idx]; + if !is_insignificant(token.kind) { + n_raw_tokens -= 1; + } + idx += 1; + builder.leaf(token.kind, token.len); + if n_raw_tokens == 0 { + break; + } + }, + &Event::Error { ref message } => builder.error().message(message.clone()).emit(), + } + } + builder.finish() +} diff --git a/src/parser/event_parser/grammar/attributes.rs b/src/parser/event_parser/grammar/attributes.rs deleted file mode 100644 index 8bf04afce..000000000 --- a/src/parser/event_parser/grammar/attributes.rs +++ /dev/null @@ -1,79 +0,0 @@ -use super::*; - -pub(super) fn inner_attributes(p: &mut Parser) { - while p.at([POUND, EXCL]) { - attribute(p, true) - } -} - -pub(super) fn outer_attributes(p: &mut Parser) { - while p.at(POUND) { - attribute(p, false) - } -} - -fn attribute(p: &mut Parser, inner: bool) { - let attr = p.start(); - assert!(p.at(POUND)); - p.bump(); - - if inner { - assert!(p.at(EXCL)); - p.bump(); - } - - if p.expect(L_BRACK) { - meta_item(p); - p.expect(R_BRACK); - } - attr.complete(p, ATTR); -} - -fn meta_item(p: &mut Parser) { - if p.at(IDENT) { - let meta_item = p.start(); - p.bump(); - match p.current() { - EQ => { - p.bump(); - if !expressions::literal(p) { - p.error().message("expected literal").emit(); - } - } - L_PAREN => meta_item_arg_list(p), - _ => (), - } - meta_item.complete(p, META_ITEM); - } else { - p.error().message("expected attribute value").emit() - } -} - -fn meta_item_arg_list(p: &mut Parser) { - assert!(p.at(L_PAREN)); - p.bump(); - loop { - match p.current() { - EOF | R_PAREN => break, - IDENT => meta_item(p), - c => if !expressions::literal(p) { - let message = "expected attribute"; - - if items::ITEM_FIRST.contains(c) { - p.error().message(message).emit(); - return; - } - - let err = p.start(); - p.error().message(message).emit(); - p.bump(); - err.complete(p, ERROR); - continue; - }, - } - if !p.at(R_PAREN) { - p.expect(COMMA); - } - } - p.expect(R_PAREN); -} diff --git a/src/parser/event_parser/grammar/expressions.rs b/src/parser/event_parser/grammar/expressions.rs deleted file mode 100644 index 8caaf3553..000000000 --- a/src/parser/event_parser/grammar/expressions.rs +++ /dev/null @@ -1,20 +0,0 @@ -use super::*; - -pub(super) fn literal(p: &mut Parser) -> bool { - match p.current() { - TRUE_KW | FALSE_KW | INT_NUMBER | FLOAT_NUMBER | BYTE | CHAR | STRING | RAW_STRING - | BYTE_STRING | RAW_BYTE_STRING => { - let lit = p.start(); - p.bump(); - lit.complete(p, LITERAL); - true - } - _ => false, - } -} - -pub(super) fn expr(p: &mut Parser) { - if !literal(p) { - p.error().message("expected expression").emit(); - } -} diff --git a/src/parser/event_parser/grammar/items/consts.rs b/src/parser/event_parser/grammar/items/consts.rs deleted file mode 100644 index c9881d681..000000000 --- a/src/parser/event_parser/grammar/items/consts.rs +++ /dev/null @@ -1,21 +0,0 @@ -use super::*; - -pub(super) fn static_item(p: &mut Parser) { - const_or_static(p, STATIC_KW) -} - -pub(super) fn const_item(p: &mut Parser) { - const_or_static(p, CONST_KW) -} - -fn const_or_static(p: &mut Parser, kw: SyntaxKind) { - assert!(p.at(kw)); - p.bump(); - p.eat(MUT_KW); // TODO: validator to forbid const mut - p.expect(IDENT); - p.expect(COLON); - types::type_ref(p); - p.expect(EQ); - expressions::expr(p); - p.expect(SEMI); -} diff --git a/src/parser/event_parser/grammar/items/mod.rs b/src/parser/event_parser/grammar/items/mod.rs deleted file mode 100644 index 3612802e1..000000000 --- a/src/parser/event_parser/grammar/items/mod.rs +++ /dev/null @@ -1,196 +0,0 @@ -use super::*; - -mod structs; -mod use_item; -mod consts; -mod traits; - -pub(super) fn mod_contents(p: &mut Parser, stop_on_r_curly: bool) { - attributes::inner_attributes(p); - while !p.at(EOF) && !(stop_on_r_curly && p.at(R_CURLY)) { - item(p); - } -} - -pub(super) const ITEM_FIRST: TokenSet = token_set![ - EXTERN_KW, MOD_KW, USE_KW, STRUCT_KW, ENUM_KW, FN_KW, PUB_KW, POUND -]; - -fn item(p: &mut Parser) { - let item = p.start(); - attributes::outer_attributes(p); - visibility(p); - let la = p.nth(1); - let item_kind = match p.current() { - USE_KW => { - use_item::use_item(p); - USE_ITEM - } - // test extern_crate - // extern crate foo; - EXTERN_KW if la == CRATE_KW => { - extern_crate_item(p); - EXTERN_CRATE_ITEM - } - EXTERN_KW => { - abi(p); - match p.current() { - // test extern_fn - // extern fn foo() {} - FN_KW => { - fn_item(p); - FN_ITEM - } - // test extern_block - // extern {} - L_CURLY => { - extern_block(p); - EXTERN_BLOCK - } - // test extern_struct - // extern struct Foo; - _ => { - item.abandon(p); - p.error().message("expected `fn` or `{`").emit(); - return; - } - } - } - STATIC_KW => { - consts::static_item(p); - STATIC_ITEM - } - CONST_KW => match p.nth(1) { - // test const_fn - // const fn foo() {} - FN_KW => { - p.bump(); - fn_item(p); - FN_ITEM - } - // test const_unsafe_fn - // const unsafe fn foo() {} - UNSAFE_KW if p.nth(2) == FN_KW => { - p.bump(); - p.bump(); - fn_item(p); - FN_ITEM - } - _ => { - consts::const_item(p); - CONST_ITEM - } - }, - // TODO: auto trait - // test unsafe_trait - // unsafe trait T {} - UNSAFE_KW if la == TRAIT_KW => { - p.bump(); - traits::trait_item(p); - TRAIT_ITEM - } - // TODO: default impl - // test unsafe_impl - // unsafe impl Foo {} - UNSAFE_KW if la == IMPL_KW => { - p.bump(); - traits::impl_item(p); - IMPL_ITEM - } - MOD_KW => { - mod_item(p); - MOD_ITEM - } - STRUCT_KW => { - structs::struct_item(p); - STRUCT_ITEM - } - ENUM_KW => { - structs::enum_item(p); - ENUM_ITEM - } - FN_KW => { - fn_item(p); - FN_ITEM - } - L_CURLY => { - item.abandon(p); - error_block(p, "expected item"); - return; - } - err_token => { - item.abandon(p); - let message = if err_token == SEMI { - //TODO: if the item is incomplete, this message is misleading - "expected item, found `;`\n\ - consider removing this semicolon" - } else { - "expected item" - }; - p.err_and_bump(message); - return; - } - }; - item.complete(p, item_kind); -} - -fn extern_crate_item(p: &mut Parser) { - assert!(p.at(EXTERN_KW)); - p.bump(); - assert!(p.at(CRATE_KW)); - p.bump(); - - p.expect(IDENT) && alias(p) && p.expect(SEMI); -} - -fn extern_block(p: &mut Parser) { - assert!(p.at(L_CURLY)); - p.bump(); - p.expect(R_CURLY); -} - -fn mod_item(p: &mut Parser) { - assert!(p.at(MOD_KW)); - p.bump(); - - if p.expect(IDENT) && !p.eat(SEMI) { - if p.expect(L_CURLY) { - mod_contents(p, true); - p.expect(R_CURLY); - } - } -} - -fn abi(p: &mut Parser) { - assert!(p.at(EXTERN_KW)); - let abi = p.start(); - p.bump(); - match p.current() { - STRING | RAW_STRING => p.bump(), - _ => (), - } - abi.complete(p, ABI); -} - -fn fn_item(p: &mut Parser) { - assert!(p.at(FN_KW)); - p.bump(); - - p.expect(IDENT); - if p.at(L_PAREN) { - fn_value_parameters(p); - } else { - p.error().message("expected function arguments").emit(); - } - - if p.at(L_CURLY) { - p.expect(L_CURLY); - p.expect(R_CURLY); - } - - fn fn_value_parameters(p: &mut Parser) { - assert!(p.at(L_PAREN)); - p.bump(); - p.expect(R_PAREN); - } -} diff --git a/src/parser/event_parser/grammar/items/structs.rs b/src/parser/event_parser/grammar/items/structs.rs deleted file mode 100644 index 69d95c698..000000000 --- a/src/parser/event_parser/grammar/items/structs.rs +++ /dev/null @@ -1,117 +0,0 @@ -use super::*; - -pub(super) fn struct_item(p: &mut Parser) { - assert!(p.at(STRUCT_KW)); - p.bump(); - - if !p.expect(IDENT) { - return; - } - type_params::list(p); - match p.current() { - WHERE_KW => { - type_params::where_clause(p); - match p.current() { - SEMI => { - p.bump(); - return; - } - L_CURLY => named_fields(p), - _ => { - //TODO: special case `(` error message - p.error().message("expected `;` or `{`").emit(); - return; - } - } - } - SEMI => { - p.bump(); - return; - } - L_CURLY => named_fields(p), - L_PAREN => { - pos_fields(p); - p.expect(SEMI); - } - _ => { - p.error().message("expected `;`, `{`, or `(`").emit(); - return; - } - } -} - -pub(super) fn enum_item(p: &mut Parser) { - assert!(p.at(ENUM_KW)); - p.bump(); - p.expect(IDENT); - type_params::list(p); - type_params::where_clause(p); - if p.expect(L_CURLY) { - while !p.at(EOF) && !p.at(R_CURLY) { - let var = p.start(); - attributes::outer_attributes(p); - if p.at(IDENT) { - p.bump(); - match p.current() { - L_CURLY => named_fields(p), - L_PAREN => pos_fields(p), - EQ => { - p.bump(); - expressions::expr(p); - } - _ => (), - } - var.complete(p, ENUM_VARIANT); - } else { - var.abandon(p); - p.err_and_bump("expected enum variant"); - } - if !p.at(R_CURLY) { - p.expect(COMMA); - } - } - p.expect(R_CURLY); - } -} - -fn named_fields(p: &mut Parser) { - assert!(p.at(L_CURLY)); - p.bump(); - while !p.at(R_CURLY) && !p.at(EOF) { - named_field(p); - if !p.at(R_CURLY) { - p.expect(COMMA); - } - } - p.expect(R_CURLY); - - fn named_field(p: &mut Parser) { - let field = p.start(); - visibility(p); - if p.expect(IDENT) { - p.expect(COLON); - types::type_ref(p); - field.complete(p, NAMED_FIELD); - } else { - field.abandon(p); - p.err_and_bump("expected field declaration"); - } - } -} - -fn pos_fields(p: &mut Parser) { - if !p.expect(L_PAREN) { - return; - } - while !p.at(R_PAREN) && !p.at(EOF) { - let pos_field = p.start(); - visibility(p); - types::type_ref(p); - pos_field.complete(p, POS_FIELD); - - if !p.at(R_PAREN) { - p.expect(COMMA); - } - } - p.expect(R_PAREN); -} diff --git a/src/parser/event_parser/grammar/items/traits.rs b/src/parser/event_parser/grammar/items/traits.rs deleted file mode 100644 index 3bef9639f..000000000 --- a/src/parser/event_parser/grammar/items/traits.rs +++ /dev/null @@ -1,17 +0,0 @@ -use super::*; - -pub(super) fn trait_item(p: &mut Parser) { - assert!(p.at(TRAIT_KW)); - p.bump(); - p.expect(IDENT); - p.expect(L_CURLY); - p.expect(R_CURLY); -} - -pub(super) fn impl_item(p: &mut Parser) { - assert!(p.at(IMPL_KW)); - p.bump(); - p.expect(IDENT); - p.expect(L_CURLY); - p.expect(R_CURLY); -} diff --git a/src/parser/event_parser/grammar/items/use_item.rs b/src/parser/event_parser/grammar/items/use_item.rs deleted file mode 100644 index 38e7b3f8a..000000000 --- a/src/parser/event_parser/grammar/items/use_item.rs +++ /dev/null @@ -1,66 +0,0 @@ -use super::*; - -pub(super) fn use_item(p: &mut Parser) { - assert!(p.at(USE_KW)); - p.bump(); - use_tree(p); - p.expect(SEMI); -} - -fn use_tree(p: &mut Parser) { - let la = p.nth(1); - let m = p.start(); - match (p.current(), la) { - (STAR, _) => p.bump(), - (COLONCOLON, STAR) => { - p.bump(); - p.bump(); - } - (L_CURLY, _) | (COLONCOLON, L_CURLY) => { - if p.at(COLONCOLON) { - p.bump(); - } - nested_trees(p); - } - _ if paths::is_path_start(p) => { - paths::use_path(p); - match p.current() { - AS_KW => { - alias(p); - } - COLONCOLON => { - p.bump(); - match p.current() { - STAR => { - p.bump(); - } - L_CURLY => nested_trees(p), - _ => { - // is this unreachable? - p.error().message("expected `{` or `*`").emit(); - } - } - } - _ => (), - } - } - _ => { - m.abandon(p); - p.err_and_bump("expected one of `*`, `::`, `{`, `self`, `super`, `indent`"); - return; - } - } - m.complete(p, USE_TREE); -} - -fn nested_trees(p: &mut Parser) { - assert!(p.at(L_CURLY)); - p.bump(); - while !p.at(EOF) && !p.at(R_CURLY) { - use_tree(p); - if !p.at(R_CURLY) { - p.expect(COMMA); - } - } - p.expect(R_CURLY); -} diff --git a/src/parser/event_parser/grammar/mod.rs b/src/parser/event_parser/grammar/mod.rs deleted file mode 100644 index afce308d0..000000000 --- a/src/parser/event_parser/grammar/mod.rs +++ /dev/null @@ -1,130 +0,0 @@ -use super::parser::{Parser, TokenSet}; -use SyntaxKind; -use syntax_kinds::*; - -mod items; -mod attributes; -mod expressions; -mod types; -mod paths; -mod type_params; - -pub(crate) fn file(p: &mut Parser) { - let file = p.start(); - p.eat(SHEBANG); - items::mod_contents(p, false); - file.complete(p, FILE); -} - -fn visibility(p: &mut Parser) { - if p.at(PUB_KW) { - let vis = p.start(); - p.bump(); - if p.at(L_PAREN) { - match p.nth(1) { - CRATE_KW | SELF_KW | SUPER_KW => { - p.bump(); - p.bump(); - p.expect(R_PAREN); - } - IN_KW => { - p.bump(); - p.bump(); - paths::use_path(p); - p.expect(R_PAREN); - } - _ => (), - } - } - vis.complete(p, VISIBILITY); - } -} - -fn alias(p: &mut Parser) -> bool { - if p.at(AS_KW) { - let alias = p.start(); - p.bump(); - p.expect(IDENT); - alias.complete(p, ALIAS); - } - true //FIXME: return false if three are errors -} - -fn error_block(p: &mut Parser, message: &str) { - assert!(p.at(L_CURLY)); - let err = p.start(); - p.error().message(message).emit(); - p.bump(); - let mut level: u32 = 1; - while level > 0 && !p.at(EOF) { - match p.current() { - L_CURLY => level += 1, - R_CURLY => level -= 1, - _ => (), - } - p.bump(); - } - err.complete(p, ERROR); -} - -impl<'p> Parser<'p> { - fn at(&self, l: L) -> bool { - l.is_ahead(self) - } - - fn err_and_bump(&mut self, message: &str) { - let err = self.start(); - self.error().message(message).emit(); - self.bump(); - err.complete(self, ERROR); - } - - fn expect(&mut self, kind: SyntaxKind) -> bool { - if self.at(kind) { - self.bump(); - true - } else { - self.error().message(format!("expected {:?}", kind)).emit(); - false - } - } - - fn eat(&mut self, kind: SyntaxKind) -> bool { - self.current() == kind && { - self.bump(); - true - } - } -} - -trait Lookahead: Copy { - fn is_ahead(self, p: &Parser) -> bool; -} - -impl Lookahead for SyntaxKind { - fn is_ahead(self, p: &Parser) -> bool { - p.current() == self - } -} - -impl Lookahead for [SyntaxKind; 2] { - fn is_ahead(self, p: &Parser) -> bool { - p.current() == self[0] && p.nth(1) == self[1] - } -} - -impl Lookahead for [SyntaxKind; 3] { - fn is_ahead(self, p: &Parser) -> bool { - p.current() == self[0] && p.nth(1) == self[1] && p.nth(2) == self[2] - } -} - -#[derive(Clone, Copy)] -struct AnyOf<'a>(&'a [SyntaxKind]); - -impl<'a> Lookahead for AnyOf<'a> { - fn is_ahead(self, p: &Parser) -> bool { - let curr = p.current(); - self.0.iter().any(|&k| k == curr) - } -} diff --git a/src/parser/event_parser/grammar/paths.rs b/src/parser/event_parser/grammar/paths.rs deleted file mode 100644 index 6efac2610..000000000 --- a/src/parser/event_parser/grammar/paths.rs +++ /dev/null @@ -1,49 +0,0 @@ -use super::*; - -pub(super) fn is_path_start(p: &Parser) -> bool { - AnyOf(&[IDENT, SELF_KW, SUPER_KW, COLONCOLON]).is_ahead(p) -} - -pub(super) fn use_path(p: &mut Parser) { - path(p) -} - -pub(super) fn type_path(p: &mut Parser) { - path(p) -} - -fn path(p: &mut Parser) { - if !is_path_start(p) { - return; - } - let path = p.start(); - path_segment(p, true); - let mut qual = path.complete(p, PATH); - loop { - let use_tree = match p.nth(1) { - STAR | L_CURLY => true, - _ => false, - }; - if p.at(COLONCOLON) && !use_tree { - let path = qual.precede(p); - p.bump(); - path_segment(p, false); - let path = path.complete(p, PATH); - qual = path; - } else { - break; - } - } -} - -fn path_segment(p: &mut Parser, first: bool) { - let segment = p.start(); - if first { - p.eat(COLONCOLON); - } - match p.current() { - IDENT | SELF_KW | SUPER_KW => p.bump(), - _ => p.error().message("expected identifier").emit(), - }; - segment.complete(p, PATH_SEGMENT); -} diff --git a/src/parser/event_parser/grammar/type_params.rs b/src/parser/event_parser/grammar/type_params.rs deleted file mode 100644 index 12c9a5362..000000000 --- a/src/parser/event_parser/grammar/type_params.rs +++ /dev/null @@ -1,75 +0,0 @@ -use super::*; - -pub(super) fn list(p: &mut Parser) { - if !p.at(L_ANGLE) { - return; - } - let m = p.start(); - p.bump(); - - while !p.at(EOF) && !p.at(R_ANGLE) { - match p.current() { - LIFETIME => lifetime_param(p), - IDENT => type_param(p), - _ => p.err_and_bump("expected type parameter"), - } - if !p.at(R_ANGLE) && !p.expect(COMMA) { - break; - } - } - p.expect(R_ANGLE); - m.complete(p, TYPE_PARAM_LIST); - - fn lifetime_param(p: &mut Parser) { - assert!(p.at(LIFETIME)); - let m = p.start(); - p.bump(); - if p.eat(COLON) { - while p.at(LIFETIME) { - p.bump(); - if !p.eat(PLUS) { - break; - } - } - } - m.complete(p, LIFETIME_PARAM); - } - - fn type_param(p: &mut Parser) { - assert!(p.at(IDENT)); - let m = p.start(); - p.bump(); - if p.eat(COLON) { - loop { - let has_paren = p.eat(L_PAREN); - p.eat(QUESTION); - if p.at(FOR_KW) { - //TODO - } - if p.at(LIFETIME) { - p.bump(); - } else if paths::is_path_start(p) { - paths::type_path(p); - } else { - break; - } - if has_paren { - p.expect(R_PAREN); - } - if !p.eat(PLUS) { - break; - } - } - } - if p.at(EQ) { - types::type_ref(p) - } - m.complete(p, TYPE_PARAM); - } -} - -pub(super) fn where_clause(p: &mut Parser) { - if p.at(WHERE_KW) { - p.bump(); - } -} diff --git a/src/parser/event_parser/grammar/types.rs b/src/parser/event_parser/grammar/types.rs deleted file mode 100644 index 1a3d44a0a..000000000 --- a/src/parser/event_parser/grammar/types.rs +++ /dev/null @@ -1,5 +0,0 @@ -use super::*; - -pub(super) fn type_ref(p: &mut Parser) { - p.expect(IDENT); -} diff --git a/src/parser/event_parser/mod.rs b/src/parser/event_parser/mod.rs deleted file mode 100644 index 7823c476c..000000000 --- a/src/parser/event_parser/mod.rs +++ /dev/null @@ -1,74 +0,0 @@ -use {SyntaxKind, Token}; - -#[macro_use] -mod parser; -mod grammar; - -/// `Parser` produces a flat list of `Event`s. -/// They are converted to a tree-structure in -/// a separate pass, via `TreeBuilder`. -#[derive(Debug)] -pub(crate) enum Event { - /// This event signifies the start of the node. - /// It should be either abandoned (in which case the - /// `kind` is `TOMBSTONE`, and the event is ignored), - /// or completed via a `Finish` event. - /// - /// All tokens between a `Start` and a `Finish` would - /// become the children of the respective node. - /// - /// For left-recursive syntactic constructs, the parser produces - /// a child node before it sees a parent. `forward_parent` - /// exists to allow to tweak parent-child relationships. - /// - /// Consider this path - /// - /// foo::bar - /// - /// The events for it would look like this: - /// - /// - /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH - /// | /\ - /// | | - /// +------forward-parent------+ - /// - /// And the tree would look like this - /// - /// +--PATH---------+ - /// | | | - /// | | | - /// | '::' 'bar' - /// | - /// PATH - /// | - /// 'foo' - /// - /// See also `CompleteMarker::precede`. - Start { - kind: SyntaxKind, - forward_parent: Option, - }, - - /// Complete the previous `Start` event - Finish, - - /// Produce a single leaf-element. - /// `n_raw_tokens` is used to glue complex contextual tokens. - /// For example, lexer tokenizes `>>` as `>`, `>`, and - /// `n_raw_tokens = 2` is used to produced a single `>>`. - Token { - kind: SyntaxKind, - n_raw_tokens: u8, - }, - - Error { - message: String, - }, -} - -pub(crate) fn parse<'t>(text: &'t str, raw_tokens: &'t [Token]) -> Vec { - let mut parser = parser::Parser::new(text, raw_tokens); - grammar::file(&mut parser); - parser.into_events() -} diff --git a/src/parser/event_parser/parser.rs b/src/parser/event_parser/parser.rs deleted file mode 100644 index 6cf6ac9b9..000000000 --- a/src/parser/event_parser/parser.rs +++ /dev/null @@ -1,201 +0,0 @@ -use {SyntaxKind, TextUnit, Token}; -use super::Event; -use super::super::is_insignificant; -use SyntaxKind::{EOF, TOMBSTONE}; - -pub(crate) struct Marker { - pos: u32, -} - -impl Marker { - pub fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompleteMarker { - match self.event(p) { - &mut Event::Start { - kind: ref mut slot, .. - } => { - *slot = kind; - } - _ => unreachable!(), - } - p.event(Event::Finish); - let result = CompleteMarker { pos: self.pos }; - ::std::mem::forget(self); - result - } - - pub fn abandon(self, p: &mut Parser) { - let idx = self.pos as usize; - if idx == p.events.len() - 1 { - match p.events.pop() { - Some(Event::Start { - kind: TOMBSTONE, - forward_parent: None, - }) => (), - _ => unreachable!(), - } - } - ::std::mem::forget(self); - } - - fn event<'p>(&self, p: &'p mut Parser) -> &'p mut Event { - &mut p.events[self.idx()] - } - - fn idx(&self) -> usize { - self.pos as usize - } -} - -impl Drop for Marker { - fn drop(&mut self) { - if !::std::thread::panicking() { - panic!("Each marker should be eithe completed or abandoned"); - } - } -} - -pub(crate) struct CompleteMarker { - pos: u32, -} - -impl CompleteMarker { - pub(crate) fn precede(self, p: &mut Parser) -> Marker { - let m = p.start(); - match p.events[self.pos as usize] { - Event::Start { - ref mut forward_parent, - .. - } => { - *forward_parent = Some(m.pos - self.pos); - } - _ => unreachable!(), - } - m - } -} - -pub(crate) struct TokenSet { - pub tokens: &'static [SyntaxKind], -} - -impl TokenSet { - pub fn contains(&self, kind: SyntaxKind) -> bool { - self.tokens.contains(&kind) - } -} - -#[macro_export] -macro_rules! token_set { - ($($t:ident),*) => { - TokenSet { - tokens: &[$($t),*], - } - }; - - ($($t:ident),* ,) => { - token_set!($($t),*) - }; -} - -pub(crate) struct Parser<'t> { - #[allow(unused)] - text: &'t str, - #[allow(unused)] - start_offsets: Vec, - tokens: Vec, // non-whitespace tokens - - pos: usize, - events: Vec, -} - -impl<'t> Parser<'t> { - pub(crate) fn new(text: &'t str, raw_tokens: &'t [Token]) -> Parser<'t> { - let mut tokens = Vec::new(); - let mut start_offsets = Vec::new(); - let mut len = TextUnit::new(0); - for &token in raw_tokens.iter() { - if !is_insignificant(token.kind) { - tokens.push(token); - start_offsets.push(len); - } - len += token.len; - } - - Parser { - text, - start_offsets, - tokens, - - pos: 0, - events: Vec::new(), - } - } - - pub(crate) fn into_events(self) -> Vec { - assert_eq!(self.current(), EOF); - self.events - } - - pub(crate) fn start(&mut self) -> Marker { - let m = Marker { - pos: self.events.len() as u32, - }; - self.event(Event::Start { - kind: TOMBSTONE, - forward_parent: None, - }); - m - } - - pub(crate) fn error<'p>(&'p mut self) -> ErrorBuilder<'p, 't> { - ErrorBuilder::new(self) - } - - pub(crate) fn bump(&mut self) { - let kind = self.current(); - if kind == EOF { - return; - } - self.pos += 1; - self.event(Event::Token { - kind, - n_raw_tokens: 1, - }); - } - - pub(crate) fn nth(&self, n: usize) -> SyntaxKind { - self.tokens.get(self.pos + n).map(|t| t.kind).unwrap_or(EOF) - } - - pub(crate) fn current(&self) -> SyntaxKind { - self.nth(0) - } - - fn event(&mut self, event: Event) { - self.events.push(event) - } -} - -pub(crate) struct ErrorBuilder<'p, 't: 'p> { - message: Option, - parser: &'p mut Parser<'t>, -} - -impl<'t, 'p> ErrorBuilder<'p, 't> { - fn new(parser: &'p mut Parser<'t>) -> Self { - ErrorBuilder { - message: None, - parser, - } - } - - pub fn message>(mut self, m: M) -> Self { - self.message = Some(m.into()); - self - } - - pub fn emit(self) { - let message = self.message.expect("Error message not set"); - self.parser.event(Event::Error { message }); - } -} diff --git a/src/parser/grammar/attributes.rs b/src/parser/grammar/attributes.rs new file mode 100644 index 000000000..8bf04afce --- /dev/null +++ b/src/parser/grammar/attributes.rs @@ -0,0 +1,79 @@ +use super::*; + +pub(super) fn inner_attributes(p: &mut Parser) { + while p.at([POUND, EXCL]) { + attribute(p, true) + } +} + +pub(super) fn outer_attributes(p: &mut Parser) { + while p.at(POUND) { + attribute(p, false) + } +} + +fn attribute(p: &mut Parser, inner: bool) { + let attr = p.start(); + assert!(p.at(POUND)); + p.bump(); + + if inner { + assert!(p.at(EXCL)); + p.bump(); + } + + if p.expect(L_BRACK) { + meta_item(p); + p.expect(R_BRACK); + } + attr.complete(p, ATTR); +} + +fn meta_item(p: &mut Parser) { + if p.at(IDENT) { + let meta_item = p.start(); + p.bump(); + match p.current() { + EQ => { + p.bump(); + if !expressions::literal(p) { + p.error().message("expected literal").emit(); + } + } + L_PAREN => meta_item_arg_list(p), + _ => (), + } + meta_item.complete(p, META_ITEM); + } else { + p.error().message("expected attribute value").emit() + } +} + +fn meta_item_arg_list(p: &mut Parser) { + assert!(p.at(L_PAREN)); + p.bump(); + loop { + match p.current() { + EOF | R_PAREN => break, + IDENT => meta_item(p), + c => if !expressions::literal(p) { + let message = "expected attribute"; + + if items::ITEM_FIRST.contains(c) { + p.error().message(message).emit(); + return; + } + + let err = p.start(); + p.error().message(message).emit(); + p.bump(); + err.complete(p, ERROR); + continue; + }, + } + if !p.at(R_PAREN) { + p.expect(COMMA); + } + } + p.expect(R_PAREN); +} diff --git a/src/parser/grammar/expressions.rs b/src/parser/grammar/expressions.rs new file mode 100644 index 000000000..8caaf3553 --- /dev/null +++ b/src/parser/grammar/expressions.rs @@ -0,0 +1,20 @@ +use super::*; + +pub(super) fn literal(p: &mut Parser) -> bool { + match p.current() { + TRUE_KW | FALSE_KW | INT_NUMBER | FLOAT_NUMBER | BYTE | CHAR | STRING | RAW_STRING + | BYTE_STRING | RAW_BYTE_STRING => { + let lit = p.start(); + p.bump(); + lit.complete(p, LITERAL); + true + } + _ => false, + } +} + +pub(super) fn expr(p: &mut Parser) { + if !literal(p) { + p.error().message("expected expression").emit(); + } +} diff --git a/src/parser/grammar/items/consts.rs b/src/parser/grammar/items/consts.rs new file mode 100644 index 000000000..c9881d681 --- /dev/null +++ b/src/parser/grammar/items/consts.rs @@ -0,0 +1,21 @@ +use super::*; + +pub(super) fn static_item(p: &mut Parser) { + const_or_static(p, STATIC_KW) +} + +pub(super) fn const_item(p: &mut Parser) { + const_or_static(p, CONST_KW) +} + +fn const_or_static(p: &mut Parser, kw: SyntaxKind) { + assert!(p.at(kw)); + p.bump(); + p.eat(MUT_KW); // TODO: validator to forbid const mut + p.expect(IDENT); + p.expect(COLON); + types::type_ref(p); + p.expect(EQ); + expressions::expr(p); + p.expect(SEMI); +} diff --git a/src/parser/grammar/items/mod.rs b/src/parser/grammar/items/mod.rs new file mode 100644 index 000000000..3612802e1 --- /dev/null +++ b/src/parser/grammar/items/mod.rs @@ -0,0 +1,196 @@ +use super::*; + +mod structs; +mod use_item; +mod consts; +mod traits; + +pub(super) fn mod_contents(p: &mut Parser, stop_on_r_curly: bool) { + attributes::inner_attributes(p); + while !p.at(EOF) && !(stop_on_r_curly && p.at(R_CURLY)) { + item(p); + } +} + +pub(super) const ITEM_FIRST: TokenSet = token_set![ + EXTERN_KW, MOD_KW, USE_KW, STRUCT_KW, ENUM_KW, FN_KW, PUB_KW, POUND +]; + +fn item(p: &mut Parser) { + let item = p.start(); + attributes::outer_attributes(p); + visibility(p); + let la = p.nth(1); + let item_kind = match p.current() { + USE_KW => { + use_item::use_item(p); + USE_ITEM + } + // test extern_crate + // extern crate foo; + EXTERN_KW if la == CRATE_KW => { + extern_crate_item(p); + EXTERN_CRATE_ITEM + } + EXTERN_KW => { + abi(p); + match p.current() { + // test extern_fn + // extern fn foo() {} + FN_KW => { + fn_item(p); + FN_ITEM + } + // test extern_block + // extern {} + L_CURLY => { + extern_block(p); + EXTERN_BLOCK + } + // test extern_struct + // extern struct Foo; + _ => { + item.abandon(p); + p.error().message("expected `fn` or `{`").emit(); + return; + } + } + } + STATIC_KW => { + consts::static_item(p); + STATIC_ITEM + } + CONST_KW => match p.nth(1) { + // test const_fn + // const fn foo() {} + FN_KW => { + p.bump(); + fn_item(p); + FN_ITEM + } + // test const_unsafe_fn + // const unsafe fn foo() {} + UNSAFE_KW if p.nth(2) == FN_KW => { + p.bump(); + p.bump(); + fn_item(p); + FN_ITEM + } + _ => { + consts::const_item(p); + CONST_ITEM + } + }, + // TODO: auto trait + // test unsafe_trait + // unsafe trait T {} + UNSAFE_KW if la == TRAIT_KW => { + p.bump(); + traits::trait_item(p); + TRAIT_ITEM + } + // TODO: default impl + // test unsafe_impl + // unsafe impl Foo {} + UNSAFE_KW if la == IMPL_KW => { + p.bump(); + traits::impl_item(p); + IMPL_ITEM + } + MOD_KW => { + mod_item(p); + MOD_ITEM + } + STRUCT_KW => { + structs::struct_item(p); + STRUCT_ITEM + } + ENUM_KW => { + structs::enum_item(p); + ENUM_ITEM + } + FN_KW => { + fn_item(p); + FN_ITEM + } + L_CURLY => { + item.abandon(p); + error_block(p, "expected item"); + return; + } + err_token => { + item.abandon(p); + let message = if err_token == SEMI { + //TODO: if the item is incomplete, this message is misleading + "expected item, found `;`\n\ + consider removing this semicolon" + } else { + "expected item" + }; + p.err_and_bump(message); + return; + } + }; + item.complete(p, item_kind); +} + +fn extern_crate_item(p: &mut Parser) { + assert!(p.at(EXTERN_KW)); + p.bump(); + assert!(p.at(CRATE_KW)); + p.bump(); + + p.expect(IDENT) && alias(p) && p.expect(SEMI); +} + +fn extern_block(p: &mut Parser) { + assert!(p.at(L_CURLY)); + p.bump(); + p.expect(R_CURLY); +} + +fn mod_item(p: &mut Parser) { + assert!(p.at(MOD_KW)); + p.bump(); + + if p.expect(IDENT) && !p.eat(SEMI) { + if p.expect(L_CURLY) { + mod_contents(p, true); + p.expect(R_CURLY); + } + } +} + +fn abi(p: &mut Parser) { + assert!(p.at(EXTERN_KW)); + let abi = p.start(); + p.bump(); + match p.current() { + STRING | RAW_STRING => p.bump(), + _ => (), + } + abi.complete(p, ABI); +} + +fn fn_item(p: &mut Parser) { + assert!(p.at(FN_KW)); + p.bump(); + + p.expect(IDENT); + if p.at(L_PAREN) { + fn_value_parameters(p); + } else { + p.error().message("expected function arguments").emit(); + } + + if p.at(L_CURLY) { + p.expect(L_CURLY); + p.expect(R_CURLY); + } + + fn fn_value_parameters(p: &mut Parser) { + assert!(p.at(L_PAREN)); + p.bump(); + p.expect(R_PAREN); + } +} diff --git a/src/parser/grammar/items/structs.rs b/src/parser/grammar/items/structs.rs new file mode 100644 index 000000000..69d95c698 --- /dev/null +++ b/src/parser/grammar/items/structs.rs @@ -0,0 +1,117 @@ +use super::*; + +pub(super) fn struct_item(p: &mut Parser) { + assert!(p.at(STRUCT_KW)); + p.bump(); + + if !p.expect(IDENT) { + return; + } + type_params::list(p); + match p.current() { + WHERE_KW => { + type_params::where_clause(p); + match p.current() { + SEMI => { + p.bump(); + return; + } + L_CURLY => named_fields(p), + _ => { + //TODO: special case `(` error message + p.error().message("expected `;` or `{`").emit(); + return; + } + } + } + SEMI => { + p.bump(); + return; + } + L_CURLY => named_fields(p), + L_PAREN => { + pos_fields(p); + p.expect(SEMI); + } + _ => { + p.error().message("expected `;`, `{`, or `(`").emit(); + return; + } + } +} + +pub(super) fn enum_item(p: &mut Parser) { + assert!(p.at(ENUM_KW)); + p.bump(); + p.expect(IDENT); + type_params::list(p); + type_params::where_clause(p); + if p.expect(L_CURLY) { + while !p.at(EOF) && !p.at(R_CURLY) { + let var = p.start(); + attributes::outer_attributes(p); + if p.at(IDENT) { + p.bump(); + match p.current() { + L_CURLY => named_fields(p), + L_PAREN => pos_fields(p), + EQ => { + p.bump(); + expressions::expr(p); + } + _ => (), + } + var.complete(p, ENUM_VARIANT); + } else { + var.abandon(p); + p.err_and_bump("expected enum variant"); + } + if !p.at(R_CURLY) { + p.expect(COMMA); + } + } + p.expect(R_CURLY); + } +} + +fn named_fields(p: &mut Parser) { + assert!(p.at(L_CURLY)); + p.bump(); + while !p.at(R_CURLY) && !p.at(EOF) { + named_field(p); + if !p.at(R_CURLY) { + p.expect(COMMA); + } + } + p.expect(R_CURLY); + + fn named_field(p: &mut Parser) { + let field = p.start(); + visibility(p); + if p.expect(IDENT) { + p.expect(COLON); + types::type_ref(p); + field.complete(p, NAMED_FIELD); + } else { + field.abandon(p); + p.err_and_bump("expected field declaration"); + } + } +} + +fn pos_fields(p: &mut Parser) { + if !p.expect(L_PAREN) { + return; + } + while !p.at(R_PAREN) && !p.at(EOF) { + let pos_field = p.start(); + visibility(p); + types::type_ref(p); + pos_field.complete(p, POS_FIELD); + + if !p.at(R_PAREN) { + p.expect(COMMA); + } + } + p.expect(R_PAREN); +} diff --git a/src/parser/grammar/items/traits.rs b/src/parser/grammar/items/traits.rs new file mode 100644 index 000000000..3bef9639f --- /dev/null +++ b/src/parser/grammar/items/traits.rs @@ -0,0 +1,17 @@ +use super::*; + +pub(super) fn trait_item(p: &mut Parser) { + assert!(p.at(TRAIT_KW)); + p.bump(); + p.expect(IDENT); + p.expect(L_CURLY); + p.expect(R_CURLY); +} + +pub(super) fn impl_item(p: &mut Parser) { + assert!(p.at(IMPL_KW)); + p.bump(); + p.expect(IDENT); + p.expect(L_CURLY); + p.expect(R_CURLY); +} diff --git a/src/parser/grammar/items/use_item.rs b/src/parser/grammar/items/use_item.rs new file mode 100644 index 000000000..38e7b3f8a --- /dev/null +++ b/src/parser/grammar/items/use_item.rs @@ -0,0 +1,66 @@ +use super::*; + +pub(super) fn use_item(p: &mut Parser) { + assert!(p.at(USE_KW)); + p.bump(); + use_tree(p); + p.expect(SEMI); +} + +fn use_tree(p: &mut Parser) { + let la = p.nth(1); + let m = p.start(); + match (p.current(), la) { + (STAR, _) => p.bump(), + (COLONCOLON, STAR) => { + p.bump(); + p.bump(); + } + (L_CURLY, _) | (COLONCOLON, L_CURLY) => { + if p.at(COLONCOLON) { + p.bump(); + } + nested_trees(p); + } + _ if paths::is_path_start(p) => { + paths::use_path(p); + match p.current() { + AS_KW => { + alias(p); + } + COLONCOLON => { + p.bump(); + match p.current() { + STAR => { + p.bump(); + } + L_CURLY => nested_trees(p), + _ => { + // is this unreachable? + p.error().message("expected `{` or `*`").emit(); + } + } + } + _ => (), + } + } + _ => { + m.abandon(p); + p.err_and_bump("expected one of `*`, `::`, `{`, `self`, `super`, `indent`"); + return; + } + } + m.complete(p, USE_TREE); +} + +fn nested_trees(p: &mut Parser) { + assert!(p.at(L_CURLY)); + p.bump(); + while !p.at(EOF) && !p.at(R_CURLY) { + use_tree(p); + if !p.at(R_CURLY) { + p.expect(COMMA); + } + } + p.expect(R_CURLY); +} diff --git a/src/parser/grammar/mod.rs b/src/parser/grammar/mod.rs new file mode 100644 index 000000000..afce308d0 --- /dev/null +++ b/src/parser/grammar/mod.rs @@ -0,0 +1,130 @@ +use super::parser::{Parser, TokenSet}; +use SyntaxKind; +use syntax_kinds::*; + +mod items; +mod attributes; +mod expressions; +mod types; +mod paths; +mod type_params; + +pub(crate) fn file(p: &mut Parser) { + let file = p.start(); + p.eat(SHEBANG); + items::mod_contents(p, false); + file.complete(p, FILE); +} + +fn visibility(p: &mut Parser) { + if p.at(PUB_KW) { + let vis = p.start(); + p.bump(); + if p.at(L_PAREN) { + match p.nth(1) { + CRATE_KW | SELF_KW | SUPER_KW => { + p.bump(); + p.bump(); + p.expect(R_PAREN); + } + IN_KW => { + p.bump(); + p.bump(); + paths::use_path(p); + p.expect(R_PAREN); + } + _ => (), + } + } + vis.complete(p, VISIBILITY); + } +} + +fn alias(p: &mut Parser) -> bool { + if p.at(AS_KW) { + let alias = p.start(); + p.bump(); + p.expect(IDENT); + alias.complete(p, ALIAS); + } + true //FIXME: return false if three are errors +} + +fn error_block(p: &mut Parser, message: &str) { + assert!(p.at(L_CURLY)); + let err = p.start(); + p.error().message(message).emit(); + p.bump(); + let mut level: u32 = 1; + while level > 0 && !p.at(EOF) { + match p.current() { + L_CURLY => level += 1, + R_CURLY => level -= 1, + _ => (), + } + p.bump(); + } + err.complete(p, ERROR); +} + +impl<'p> Parser<'p> { + fn at(&self, l: L) -> bool { + l.is_ahead(self) + } + + fn err_and_bump(&mut self, message: &str) { + let err = self.start(); + self.error().message(message).emit(); + self.bump(); + err.complete(self, ERROR); + } + + fn expect(&mut self, kind: SyntaxKind) -> bool { + if self.at(kind) { + self.bump(); + true + } else { + self.error().message(format!("expected {:?}", kind)).emit(); + false + } + } + + fn eat(&mut self, kind: SyntaxKind) -> bool { + self.current() == kind && { + self.bump(); + true + } + } +} + +trait Lookahead: Copy { + fn is_ahead(self, p: &Parser) -> bool; +} + +impl Lookahead for SyntaxKind { + fn is_ahead(self, p: &Parser) -> bool { + p.current() == self + } +} + +impl Lookahead for [SyntaxKind; 2] { + fn is_ahead(self, p: &Parser) -> bool { + p.current() == self[0] && p.nth(1) == self[1] + } +} + +impl Lookahead for [SyntaxKind; 3] { + fn is_ahead(self, p: &Parser) -> bool { + p.current() == self[0] && p.nth(1) == self[1] && p.nth(2) == self[2] + } +} + +#[derive(Clone, Copy)] +struct AnyOf<'a>(&'a [SyntaxKind]); + +impl<'a> Lookahead for AnyOf<'a> { + fn is_ahead(self, p: &Parser) -> bool { + let curr = p.current(); + self.0.iter().any(|&k| k == curr) + } +} diff --git a/src/parser/grammar/paths.rs b/src/parser/grammar/paths.rs new file mode 100644 index 000000000..6efac2610 --- /dev/null +++ b/src/parser/grammar/paths.rs @@ -0,0 +1,49 @@ +use super::*; + +pub(super) fn is_path_start(p: &Parser) -> bool { + AnyOf(&[IDENT, SELF_KW, SUPER_KW, COLONCOLON]).is_ahead(p) +} + +pub(super) fn use_path(p: &mut Parser) { + path(p) +} + +pub(super) fn type_path(p: &mut Parser) { + path(p) +} + +fn path(p: &mut Parser) { + if !is_path_start(p) { + return; + } + let path = p.start(); + path_segment(p, true); + let mut qual = path.complete(p, PATH); + loop { + let use_tree = match p.nth(1) { + STAR | L_CURLY => true, + _ => false, + }; + if p.at(COLONCOLON) && !use_tree { + let path = qual.precede(p); + p.bump(); + path_segment(p, false); + let path = path.complete(p, PATH); + qual = path; + } else { + break; + } + } +} + +fn path_segment(p: &mut Parser, first: bool) { + let segment = p.start(); + if first { + p.eat(COLONCOLON); + } + match p.current() { + IDENT | SELF_KW | SUPER_KW => p.bump(), + _ => p.error().message("expected identifier").emit(), + }; + segment.complete(p, PATH_SEGMENT); +} diff --git a/src/parser/grammar/type_params.rs b/src/parser/grammar/type_params.rs new file mode 100644 index 000000000..12c9a5362 --- /dev/null +++ b/src/parser/grammar/type_params.rs @@ -0,0 +1,75 @@ +use super::*; + +pub(super) fn list(p: &mut Parser) { + if !p.at(L_ANGLE) { + return; + } + let m = p.start(); + p.bump(); + + while !p.at(EOF) && !p.at(R_ANGLE) { + match p.current() { + LIFETIME => lifetime_param(p), + IDENT => type_param(p), + _ => p.err_and_bump("expected type parameter"), + } + if !p.at(R_ANGLE) && !p.expect(COMMA) { + break; + } + } + p.expect(R_ANGLE); + m.complete(p, TYPE_PARAM_LIST); + + fn lifetime_param(p: &mut Parser) { + assert!(p.at(LIFETIME)); + let m = p.start(); + p.bump(); + if p.eat(COLON) { + while p.at(LIFETIME) { + p.bump(); + if !p.eat(PLUS) { + break; + } + } + } + m.complete(p, LIFETIME_PARAM); + } + + fn type_param(p: &mut Parser) { + assert!(p.at(IDENT)); + let m = p.start(); + p.bump(); + if p.eat(COLON) { + loop { + let has_paren = p.eat(L_PAREN); + p.eat(QUESTION); + if p.at(FOR_KW) { + //TODO + } + if p.at(LIFETIME) { + p.bump(); + } else if paths::is_path_start(p) { + paths::type_path(p); + } else { + break; + } + if has_paren { + p.expect(R_PAREN); + } + if !p.eat(PLUS) { + break; + } + } + } + if p.at(EQ) { + types::type_ref(p) + } + m.complete(p, TYPE_PARAM); + } +} + +pub(super) fn where_clause(p: &mut Parser) { + if p.at(WHERE_KW) { + p.bump(); + } +} diff --git a/src/parser/grammar/types.rs b/src/parser/grammar/types.rs new file mode 100644 index 000000000..1a3d44a0a --- /dev/null +++ b/src/parser/grammar/types.rs @@ -0,0 +1,5 @@ +use super::*; + +pub(super) fn type_ref(p: &mut Parser) { + p.expect(IDENT); +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c5525ff9c..22ccb4921 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1,88 +1,23 @@ -use {File, FileBuilder, Sink, SyntaxKind, Token}; +use {File, SyntaxKind, Token}; use syntax_kinds::*; -mod event_parser; -use self::event_parser::Event; +#[macro_use] +mod parser; +mod event; +mod grammar; +use self::event::Event; /// Parse a sequence of tokens into the representative node tree pub fn parse(text: String, tokens: &[Token]) -> File { - let events = event_parser::parse(&text, tokens); - from_events_to_file(text, tokens, events) + let events = parse_into_events(&text, tokens); + event::to_file(text, tokens, events) } -fn from_events_to_file(text: String, tokens: &[Token], events: Vec) -> File { - let mut builder = FileBuilder::new(text); - let mut idx = 0; - - let mut holes = Vec::new(); - let mut forward_parents = Vec::new(); - - for (i, event) in events.iter().enumerate() { - if holes.last() == Some(&i) { - holes.pop(); - continue; - } - - match event { - &Event::Start { - kind: TOMBSTONE, .. - } => (), - - &Event::Start { .. } => { - forward_parents.clear(); - let mut idx = i; - loop { - let (kind, fwd) = match events[idx] { - Event::Start { - kind, - forward_parent, - } => (kind, forward_parent), - _ => unreachable!(), - }; - forward_parents.push((idx, kind)); - if let Some(fwd) = fwd { - idx += fwd as usize; - } else { - break; - } - } - for &(idx, kind) in forward_parents.iter().into_iter().rev() { - builder.start_internal(kind); - holes.push(idx); - } - holes.pop(); - } - &Event::Finish => { - while idx < tokens.len() { - let token = tokens[idx]; - if is_insignificant(token.kind) { - idx += 1; - builder.leaf(token.kind, token.len); - } else { - break; - } - } - builder.finish_internal() - } - &Event::Token { - kind: _, - mut n_raw_tokens, - } => loop { - let token = tokens[idx]; - if !is_insignificant(token.kind) { - n_raw_tokens -= 1; - } - idx += 1; - builder.leaf(token.kind, token.len); - if n_raw_tokens == 0 { - break; - } - }, - &Event::Error { ref message } => builder.error().message(message.clone()).emit(), - } - } - builder.finish() +pub(crate) fn parse_into_events<'t>(text: &'t str, raw_tokens: &'t [Token]) -> Vec { + let mut parser = parser::Parser::new(text, raw_tokens); + grammar::file(&mut parser); + parser.into_events() } fn is_insignificant(kind: SyntaxKind) -> bool { diff --git a/src/parser/parser.rs b/src/parser/parser.rs new file mode 100644 index 000000000..3f4c8a07d --- /dev/null +++ b/src/parser/parser.rs @@ -0,0 +1,201 @@ +use {SyntaxKind, TextUnit, Token}; +use super::Event; +use super::is_insignificant; +use SyntaxKind::{EOF, TOMBSTONE}; + +pub(crate) struct Marker { + pos: u32, +} + +impl Marker { + pub fn complete(self, p: &mut Parser, kind: SyntaxKind) -> CompleteMarker { + match self.event(p) { + &mut Event::Start { + kind: ref mut slot, .. + } => { + *slot = kind; + } + _ => unreachable!(), + } + p.event(Event::Finish); + let result = CompleteMarker { pos: self.pos }; + ::std::mem::forget(self); + result + } + + pub fn abandon(self, p: &mut Parser) { + let idx = self.pos as usize; + if idx == p.events.len() - 1 { + match p.events.pop() { + Some(Event::Start { + kind: TOMBSTONE, + forward_parent: None, + }) => (), + _ => unreachable!(), + } + } + ::std::mem::forget(self); + } + + fn event<'p>(&self, p: &'p mut Parser) -> &'p mut Event { + &mut p.events[self.idx()] + } + + fn idx(&self) -> usize { + self.pos as usize + } +} + +impl Drop for Marker { + fn drop(&mut self) { + if !::std::thread::panicking() { + panic!("Each marker should be eithe completed or abandoned"); + } + } +} + +pub(crate) struct CompleteMarker { + pos: u32, +} + +impl CompleteMarker { + pub(crate) fn precede(self, p: &mut Parser) -> Marker { + let m = p.start(); + match p.events[self.pos as usize] { + Event::Start { + ref mut forward_parent, + .. + } => { + *forward_parent = Some(m.pos - self.pos); + } + _ => unreachable!(), + } + m + } +} + +pub(crate) struct TokenSet { + pub tokens: &'static [SyntaxKind], +} + +impl TokenSet { + pub fn contains(&self, kind: SyntaxKind) -> bool { + self.tokens.contains(&kind) + } +} + +#[macro_export] +macro_rules! token_set { + ($($t:ident),*) => { + TokenSet { + tokens: &[$($t),*], + } + }; + + ($($t:ident),* ,) => { + token_set!($($t),*) + }; +} + +pub(crate) struct Parser<'t> { + #[allow(unused)] + text: &'t str, + #[allow(unused)] + start_offsets: Vec, + tokens: Vec, // non-whitespace tokens + + pos: usize, + events: Vec, +} + +impl<'t> Parser<'t> { + pub(crate) fn new(text: &'t str, raw_tokens: &'t [Token]) -> Parser<'t> { + let mut tokens = Vec::new(); + let mut start_offsets = Vec::new(); + let mut len = TextUnit::new(0); + for &token in raw_tokens.iter() { + if !is_insignificant(token.kind) { + tokens.push(token); + start_offsets.push(len); + } + len += token.len; + } + + Parser { + text, + start_offsets, + tokens, + + pos: 0, + events: Vec::new(), + } + } + + pub(crate) fn into_events(self) -> Vec { + assert_eq!(self.current(), EOF); + self.events + } + + pub(crate) fn start(&mut self) -> Marker { + let m = Marker { + pos: self.events.len() as u32, + }; + self.event(Event::Start { + kind: TOMBSTONE, + forward_parent: None, + }); + m + } + + pub(crate) fn error<'p>(&'p mut self) -> ErrorBuilder<'p, 't> { + ErrorBuilder::new(self) + } + + pub(crate) fn bump(&mut self) { + let kind = self.current(); + if kind == EOF { + return; + } + self.pos += 1; + self.event(Event::Token { + kind, + n_raw_tokens: 1, + }); + } + + pub(crate) fn nth(&self, n: usize) -> SyntaxKind { + self.tokens.get(self.pos + n).map(|t| t.kind).unwrap_or(EOF) + } + + pub(crate) fn current(&self) -> SyntaxKind { + self.nth(0) + } + + fn event(&mut self, event: Event) { + self.events.push(event) + } +} + +pub(crate) struct ErrorBuilder<'p, 't: 'p> { + message: Option, + parser: &'p mut Parser<'t>, +} + +impl<'t, 'p> ErrorBuilder<'p, 't> { + fn new(parser: &'p mut Parser<'t>) -> Self { + ErrorBuilder { + message: None, + parser, + } + } + + pub fn message>(mut self, m: M) -> Self { + self.message = Some(m.into()); + self + } + + pub fn emit(self) { + let message = self.message.expect("Error message not set"); + self.parser.event(Event::Error { message }); + } +} -- cgit v1.2.3