From 5222b8aba3b1c2c68706aacf6869423a8e4fe6d5 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 15:47:32 +0300 Subject: move all parsing related bits to a separate module --- crates/ra_syntax/src/parsing/builder.rs | 41 ++ crates/ra_syntax/src/parsing/grammar.rs | 186 ++++++++ crates/ra_syntax/src/parsing/grammar/attributes.rs | 31 ++ .../ra_syntax/src/parsing/grammar/expressions.rs | 473 ++++++++++++++++++++ .../src/parsing/grammar/expressions/atom.rs | 475 +++++++++++++++++++++ crates/ra_syntax/src/parsing/grammar/items.rs | 392 +++++++++++++++++ .../ra_syntax/src/parsing/grammar/items/consts.rs | 21 + .../ra_syntax/src/parsing/grammar/items/nominal.rs | 168 ++++++++ .../ra_syntax/src/parsing/grammar/items/traits.rs | 137 ++++++ .../src/parsing/grammar/items/use_item.rs | 121 ++++++ crates/ra_syntax/src/parsing/grammar/params.rs | 139 ++++++ crates/ra_syntax/src/parsing/grammar/paths.rs | 103 +++++ crates/ra_syntax/src/parsing/grammar/patterns.rs | 248 +++++++++++ crates/ra_syntax/src/parsing/grammar/type_args.rs | 48 +++ .../ra_syntax/src/parsing/grammar/type_params.rs | 175 ++++++++ crates/ra_syntax/src/parsing/grammar/types.rs | 278 ++++++++++++ crates/ra_syntax/src/parsing/lexer.rs | 215 ++++++++++ crates/ra_syntax/src/parsing/lexer/classes.rs | 26 ++ crates/ra_syntax/src/parsing/lexer/comments.rs | 57 +++ crates/ra_syntax/src/parsing/lexer/numbers.rs | 69 +++ crates/ra_syntax/src/parsing/lexer/ptr.rs | 162 +++++++ crates/ra_syntax/src/parsing/lexer/strings.rs | 112 +++++ crates/ra_syntax/src/parsing/parser_api.rs | 195 +++++++++ crates/ra_syntax/src/parsing/parser_impl.rs | 199 +++++++++ crates/ra_syntax/src/parsing/parser_impl/event.rs | 254 +++++++++++ crates/ra_syntax/src/parsing/parser_impl/input.rs | 104 +++++ crates/ra_syntax/src/parsing/reparsing.rs | 370 ++++++++++++++++ crates/ra_syntax/src/parsing/token_set.rs | 41 ++ 28 files changed, 4840 insertions(+) create mode 100644 crates/ra_syntax/src/parsing/builder.rs create mode 100644 crates/ra_syntax/src/parsing/grammar.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/attributes.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/expressions.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/expressions/atom.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/items.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/items/consts.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/items/nominal.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/items/traits.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/items/use_item.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/params.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/paths.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/patterns.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/type_args.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/type_params.rs create mode 100644 crates/ra_syntax/src/parsing/grammar/types.rs create mode 100644 crates/ra_syntax/src/parsing/lexer.rs create mode 100644 crates/ra_syntax/src/parsing/lexer/classes.rs create mode 100644 crates/ra_syntax/src/parsing/lexer/comments.rs create mode 100644 crates/ra_syntax/src/parsing/lexer/numbers.rs create mode 100644 crates/ra_syntax/src/parsing/lexer/ptr.rs create mode 100644 crates/ra_syntax/src/parsing/lexer/strings.rs create mode 100644 crates/ra_syntax/src/parsing/parser_api.rs create mode 100644 crates/ra_syntax/src/parsing/parser_impl.rs create mode 100644 crates/ra_syntax/src/parsing/parser_impl/event.rs create mode 100644 crates/ra_syntax/src/parsing/parser_impl/input.rs create mode 100644 crates/ra_syntax/src/parsing/reparsing.rs create mode 100644 crates/ra_syntax/src/parsing/token_set.rs (limited to 'crates/ra_syntax/src/parsing') diff --git a/crates/ra_syntax/src/parsing/builder.rs b/crates/ra_syntax/src/parsing/builder.rs new file mode 100644 index 000000000..9d7ad06fe --- /dev/null +++ b/crates/ra_syntax/src/parsing/builder.rs @@ -0,0 +1,41 @@ +use crate::{ + parsing::parser_impl::Sink, + syntax_node::{GreenNode, RaTypes, SyntaxError}, + SmolStr, SyntaxKind, +}; +use rowan::GreenNodeBuilder; + +pub(crate) struct GreenBuilder { + errors: Vec, + inner: GreenNodeBuilder, +} + +impl GreenBuilder { + pub(crate) fn new() -> GreenBuilder { + GreenBuilder { errors: Vec::new(), inner: GreenNodeBuilder::new() } + } +} + +impl Sink for GreenBuilder { + type Tree = (GreenNode, Vec); + + fn leaf(&mut self, kind: SyntaxKind, text: SmolStr) { + self.inner.leaf(kind, text); + } + + fn start_branch(&mut self, kind: SyntaxKind) { + self.inner.start_internal(kind) + } + + fn finish_branch(&mut self) { + self.inner.finish_internal(); + } + + fn error(&mut self, error: SyntaxError) { + self.errors.push(error) + } + + fn finish(self) -> (GreenNode, Vec) { + (self.inner.finish(), self.errors) + } +} diff --git a/crates/ra_syntax/src/parsing/grammar.rs b/crates/ra_syntax/src/parsing/grammar.rs new file mode 100644 index 000000000..bf86443de --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar.rs @@ -0,0 +1,186 @@ +//! This is the actual "grammar" of the Rust language. +//! +//! Each function in this module and its children corresponds +//! to a production of the format grammar. Submodules roughly +//! correspond to different *areas* of the grammar. By convention, +//! each submodule starts with `use super::*` import and exports +//! "public" productions via `pub(super)`. +//! +//! See docs for `Parser` to learn about API, available to the grammar, +//! and see docs for `Event` to learn how this actually manages to +//! produce parse trees. +//! +//! Code in this module also contains inline tests, which start with +//! `// test name-of-the-test` comment and look like this: +//! +//! ``` +//! // test function_with_zero_parameters +//! // fn foo() {} +//! ``` +//! +//! After adding a new inline-test, run `cargo collect-tests` to extract +//! it as a standalone text-fixture into `tests/data/parser/inline`, and +//! run `cargo test` once to create the "gold" value. +//! +//! Coding convention: rules like `where_clause` always produce either a +//! node or an error, rules like `opt_where_clause` may produce nothing. +//! Non-opt rules typically start with `assert!(p.at(FIRST_TOKEN))`, the +//! caller is responsible for branching on the first token. +mod attributes; +mod expressions; +mod items; +mod params; +mod paths; +mod patterns; +mod type_args; +mod type_params; +mod types; + +pub(crate) use self::{ + expressions::block, + items::{ + enum_variant_list, extern_item_list, impl_item_list, match_arm_list, mod_item_list, + named_field_def_list, named_field_list, token_tree, trait_item_list, use_tree_list, + }, +}; +use crate::{ + SyntaxKind::{self, *}, + parsing::{ + token_set::TokenSet, + parser_api::{CompletedMarker, Marker, Parser} + }, +}; + +pub(crate) fn root(p: &mut Parser) { + let m = p.start(); + p.eat(SHEBANG); + items::mod_contents(p, false); + m.complete(p, SOURCE_FILE); +} + +#[derive(Clone, Copy, PartialEq, Eq)] +enum BlockLike { + Block, + NotBlock, +} + +impl BlockLike { + fn is_block(self) -> bool { + self == BlockLike::Block + } +} + +fn opt_visibility(p: &mut Parser) { + match p.current() { + PUB_KW => { + let m = p.start(); + p.bump(); + if p.at(L_PAREN) { + match p.nth(1) { + // test crate_visibility + // pub(crate) struct S; + // pub(self) struct S; + // pub(self) struct S; + // pub(self) struct S; + CRATE_KW | SELF_KW | SUPER_KW => { + p.bump(); + p.bump(); + p.expect(R_PAREN); + } + IN_KW => { + p.bump(); + p.bump(); + paths::use_path(p); + p.expect(R_PAREN); + } + _ => (), + } + } + m.complete(p, VISIBILITY); + } + // test crate_keyword_vis + // crate fn main() { } + CRATE_KW => { + let m = p.start(); + p.bump(); + m.complete(p, VISIBILITY); + } + _ => (), + } +} + +fn opt_alias(p: &mut Parser) { + if p.at(AS_KW) { + let m = p.start(); + p.bump(); + name(p); + m.complete(p, ALIAS); + } +} + +fn abi(p: &mut Parser) { + assert!(p.at(EXTERN_KW)); + let abi = p.start(); + p.bump(); + match p.current() { + STRING | RAW_STRING => p.bump(), + _ => (), + } + abi.complete(p, ABI); +} + +fn opt_fn_ret_type(p: &mut Parser) -> bool { + if p.at(THIN_ARROW) { + let m = p.start(); + p.bump(); + types::type_(p); + m.complete(p, RET_TYPE); + true + } else { + false + } +} + +fn name_r(p: &mut Parser, recovery: TokenSet) { + if p.at(IDENT) { + let m = p.start(); + p.bump(); + m.complete(p, NAME); + } else { + p.err_recover("expected a name", recovery); + } +} + +fn name(p: &mut Parser) { + name_r(p, TokenSet::empty()) +} + +fn name_ref(p: &mut Parser) { + if p.at(IDENT) { + let m = p.start(); + p.bump(); + m.complete(p, NAME_REF); + } else { + p.err_and_bump("expected identifier"); + } +} + +fn error_block(p: &mut Parser, message: &str) { + go(p, Some(message)); + fn go(p: &mut Parser, message: Option<&str>) { + assert!(p.at(L_CURLY)); + let m = p.start(); + if let Some(message) = message { + p.error(message); + } + p.bump(); + while !p.at(EOF) && !p.at(R_CURLY) { + match p.current() { + L_CURLY => go(p, None), + _ => p.bump(), + } + } + p.eat(R_CURLY); + m.complete(p, ERROR); + } +} diff --git a/crates/ra_syntax/src/parsing/grammar/attributes.rs b/crates/ra_syntax/src/parsing/grammar/attributes.rs new file mode 100644 index 000000000..cd30e8a45 --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/attributes.rs @@ -0,0 +1,31 @@ +use super::*; + +pub(super) fn inner_attributes(p: &mut Parser) { + while p.current() == POUND && p.nth(1) == EXCL { + attribute(p, true) + } +} + +pub(super) fn outer_attributes(p: &mut Parser) { + while p.at(POUND) { + attribute(p, false) + } +} + +fn attribute(p: &mut Parser, inner: bool) { + let attr = p.start(); + assert!(p.at(POUND)); + p.bump(); + + if inner { + assert!(p.at(EXCL)); + p.bump(); + } + + if p.at(L_BRACK) { + items::token_tree(p); + } else { + p.error("expected `[`"); + } + attr.complete(p, ATTR); +} diff --git a/crates/ra_syntax/src/parsing/grammar/expressions.rs b/crates/ra_syntax/src/parsing/grammar/expressions.rs new file mode 100644 index 000000000..d5a4f4d7b --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/expressions.rs @@ -0,0 +1,473 @@ +mod atom; + +pub(crate) use self::atom::match_arm_list; +pub(super) use self::atom::{literal, LITERAL_FIRST}; +use super::*; + +const EXPR_FIRST: TokenSet = LHS_FIRST; + +pub(super) fn expr(p: &mut Parser) -> BlockLike { + let r = Restrictions { forbid_structs: false, prefer_stmt: false }; + expr_bp(p, r, 1) +} + +pub(super) fn expr_stmt(p: &mut Parser) -> BlockLike { + let r = Restrictions { forbid_structs: false, prefer_stmt: true }; + expr_bp(p, r, 1) +} + +fn expr_no_struct(p: &mut Parser) { + let r = Restrictions { forbid_structs: true, prefer_stmt: false }; + expr_bp(p, r, 1); +} + +// test block +// fn a() {} +// fn b() { let _ = 1; } +// fn c() { 1; 2; } +// fn d() { 1; 2 } +pub(crate) fn block(p: &mut Parser) { + if !p.at(L_CURLY) { + p.error("expected a block"); + return; + } + let m = p.start(); + p.bump(); + // This is checked by a validator + attributes::inner_attributes(p); + + while !p.at(EOF) && !p.at(R_CURLY) { + match p.current() { + // test nocontentexpr + // fn foo(){ + // ;;;some_expr();;;;{;;;};;;;Ok(()) + // } + SEMI => p.bump(), + _ => { + // test block_items + // fn a() { fn b() {} } + let m = p.start(); + let has_attrs = p.at(POUND); + attributes::outer_attributes(p); + if p.at(LET_KW) { + let_stmt(p, m); + } else { + match items::maybe_item(p, items::ItemFlavor::Mod) { + items::MaybeItem::Item(kind) => { + m.complete(p, kind); + } + items::MaybeItem::Modifiers => { + m.abandon(p); + p.error("expected an item"); + } + // test pub_expr + // fn foo() { pub 92; } //FIXME + items::MaybeItem::None => { + if has_attrs { + m.abandon(p); + p.error( + "expected a let statement or an item after attributes in block", + ); + } else { + let is_blocklike = expressions::expr_stmt(p) == BlockLike::Block; + if p.at(R_CURLY) { + m.abandon(p); + } else { + // test no_semi_after_block + // fn foo() { + // if true {} + // loop {} + // match () {} + // while true {} + // for _ in () {} + // {} + // {} + // macro_rules! test { + // () => {} + // } + // test!{} + // } + if is_blocklike { + p.eat(SEMI); + } else { + p.expect(SEMI); + } + m.complete(p, EXPR_STMT); + } + } + } + } + } + } + } + } + p.expect(R_CURLY); + m.complete(p, BLOCK); + + // test let_stmt; + // fn foo() { + // let a; + // let b: i32; + // let c = 92; + // let d: i32 = 92; + // } + fn let_stmt(p: &mut Parser, m: Marker) { + assert!(p.at(LET_KW)); + p.bump(); + patterns::pattern(p); + if p.at(COLON) { + types::ascription(p); + } + if p.eat(EQ) { + expressions::expr(p); + } + p.expect(SEMI); + m.complete(p, LET_STMT); + } +} + +#[derive(Clone, Copy)] +struct Restrictions { + forbid_structs: bool, + prefer_stmt: bool, +} + +enum Op { + Simple, + Composite(SyntaxKind, u8), +} + +fn current_op(p: &Parser) -> (u8, Op) { + if let Some(t) = p.current3() { + match t { + (L_ANGLE, L_ANGLE, EQ) => return (1, Op::Composite(SHLEQ, 3)), + (R_ANGLE, R_ANGLE, EQ) => return (1, Op::Composite(SHREQ, 3)), + _ => (), + } + } + + if let Some(t) = p.current2() { + match t { + (PLUS, EQ) => return (1, Op::Composite(PLUSEQ, 2)), + (MINUS, EQ) => return (1, Op::Composite(MINUSEQ, 2)), + (STAR, EQ) => return (1, Op::Composite(STAREQ, 2)), + (SLASH, EQ) => return (1, Op::Composite(SLASHEQ, 2)), + (PIPE, EQ) => return (1, Op::Composite(PIPEEQ, 2)), + (AMP, EQ) => return (1, Op::Composite(AMPEQ, 2)), + (CARET, EQ) => return (1, Op::Composite(CARETEQ, 2)), + (PIPE, PIPE) => return (3, Op::Composite(PIPEPIPE, 2)), + (AMP, AMP) => return (4, Op::Composite(AMPAMP, 2)), + (L_ANGLE, EQ) => return (5, Op::Composite(LTEQ, 2)), + (R_ANGLE, EQ) => return (5, Op::Composite(GTEQ, 2)), + (L_ANGLE, L_ANGLE) => return (9, Op::Composite(SHL, 2)), + (R_ANGLE, R_ANGLE) => return (9, Op::Composite(SHR, 2)), + _ => (), + } + } + + let bp = match p.current() { + EQ => 1, + DOTDOT | DOTDOTEQ => 2, + EQEQ | NEQ | L_ANGLE | R_ANGLE => 5, + PIPE => 6, + CARET => 7, + AMP => 8, + MINUS | PLUS => 10, + STAR | SLASH | PERCENT => 11, + _ => 0, + }; + (bp, Op::Simple) +} + +// Parses expression with binding power of at least bp. +fn expr_bp(p: &mut Parser, r: Restrictions, bp: u8) -> BlockLike { + let mut lhs = match lhs(p, r) { + Some((lhs, blocklike)) => { + // test stmt_bin_expr_ambiguity + // fn foo() { + // let _ = {1} & 2; + // {1} &2; + // } + if r.prefer_stmt && blocklike.is_block() { + return BlockLike::Block; + } + lhs + } + None => return BlockLike::NotBlock, + }; + + loop { + let is_range = p.current() == DOTDOT || p.current() == DOTDOTEQ; + let (op_bp, op) = current_op(p); + if op_bp < bp { + break; + } + let m = lhs.precede(p); + match op { + Op::Simple => p.bump(), + Op::Composite(kind, n) => { + p.bump_compound(kind, n); + } + } + expr_bp(p, r, op_bp + 1); + lhs = m.complete(p, if is_range { RANGE_EXPR } else { BIN_EXPR }); + } + BlockLike::NotBlock +} + +const LHS_FIRST: TokenSet = + atom::ATOM_EXPR_FIRST.union(token_set![AMP, STAR, EXCL, DOTDOT, DOTDOTEQ, MINUS]); + +fn lhs(p: &mut Parser, r: Restrictions) -> Option<(CompletedMarker, BlockLike)> { + let m; + let kind = match p.current() { + // test ref_expr + // fn foo() { + // let _ = &1; + // let _ = &mut &f(); + // } + AMP => { + m = p.start(); + p.bump(); + p.eat(MUT_KW); + REF_EXPR + } + // test unary_expr + // fn foo() { + // **&1; + // !!true; + // --1; + // } + STAR | EXCL | MINUS => { + m = p.start(); + p.bump(); + PREFIX_EXPR + } + // test full_range_expr + // fn foo() { xs[..]; } + DOTDOT | DOTDOTEQ => { + m = p.start(); + p.bump(); + if p.at_ts(EXPR_FIRST) { + expr_bp(p, r, 2); + } + return Some((m.complete(p, RANGE_EXPR), BlockLike::NotBlock)); + } + _ => { + let (lhs, blocklike) = atom::atom_expr(p, r)?; + return Some(( + postfix_expr(p, lhs, !(r.prefer_stmt && blocklike.is_block())), + blocklike, + )); + } + }; + expr_bp(p, r, 255); + Some((m.complete(p, kind), BlockLike::NotBlock)) +} + +fn postfix_expr( + p: &mut Parser, + mut lhs: CompletedMarker, + // Calls are disallowed if the type is a block and we prefer statements because the call cannot be disambiguated from a tuple + // E.g. `while true {break}();` is parsed as + // `while true {break}; ();` + mut allow_calls: bool, +) -> CompletedMarker { + loop { + lhs = match p.current() { + // test stmt_postfix_expr_ambiguity + // fn foo() { + // match () { + // _ => {} + // () => {} + // [] => {} + // } + // } + L_PAREN if allow_calls => call_expr(p, lhs), + L_BRACK if allow_calls => index_expr(p, lhs), + DOT if p.nth(1) == IDENT && (p.nth(2) == L_PAREN || p.nth(2) == COLONCOLON) => { + method_call_expr(p, lhs) + } + DOT => field_expr(p, lhs), + // test postfix_range + // fn foo() { let x = 1..; } + DOTDOT | DOTDOTEQ if !EXPR_FIRST.contains(p.nth(1)) => { + let m = lhs.precede(p); + p.bump(); + m.complete(p, RANGE_EXPR) + } + QUESTION => try_expr(p, lhs), + AS_KW => cast_expr(p, lhs), + _ => break, + }; + allow_calls = true + } + lhs +} + +// test call_expr +// fn foo() { +// let _ = f(); +// let _ = f()(1)(1, 2,); +// let _ = f(::func()); +// f(::func()); +// } +fn call_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(L_PAREN)); + let m = lhs.precede(p); + arg_list(p); + m.complete(p, CALL_EXPR) +} + +// test index_expr +// fn foo() { +// x[1][2]; +// } +fn index_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(L_BRACK)); + let m = lhs.precede(p); + p.bump(); + expr(p); + p.expect(R_BRACK); + m.complete(p, INDEX_EXPR) +} + +// test method_call_expr +// fn foo() { +// x.foo(); +// y.bar::(1, 2,); +// } +fn method_call_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(DOT) && p.nth(1) == IDENT && (p.nth(2) == L_PAREN || p.nth(2) == COLONCOLON)); + let m = lhs.precede(p); + p.bump(); + name_ref(p); + type_args::opt_type_arg_list(p, true); + if p.at(L_PAREN) { + arg_list(p); + } + m.complete(p, METHOD_CALL_EXPR) +} + +// test field_expr +// fn foo() { +// x.foo; +// x.0.bar; +// } +fn field_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(DOT)); + let m = lhs.precede(p); + p.bump(); + if p.at(IDENT) { + name_ref(p) + } else if p.at(INT_NUMBER) { + p.bump() + } else { + p.error("expected field name or number") + } + m.complete(p, FIELD_EXPR) +} + +// test try_expr +// fn foo() { +// x?; +// } +fn try_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(QUESTION)); + let m = lhs.precede(p); + p.bump(); + m.complete(p, TRY_EXPR) +} + +// test cast_expr +// fn foo() { +// 82 as i32; +// 81 as i8 + 1; +// 79 as i16 - 1; +// } +fn cast_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { + assert!(p.at(AS_KW)); + let m = lhs.precede(p); + p.bump(); + // Use type_no_bounds(), because cast expressions are not + // allowed to have bounds. + types::type_no_bounds(p); + m.complete(p, CAST_EXPR) +} + +fn arg_list(p: &mut Parser) { + assert!(p.at(L_PAREN)); + let m = p.start(); + p.bump(); + while !p.at(R_PAREN) && !p.at(EOF) { + if !p.at_ts(EXPR_FIRST) { + p.error("expected expression"); + break; + } + expr(p); + if !p.at(R_PAREN) && !p.expect(COMMA) { + break; + } + } + p.eat(R_PAREN); + m.complete(p, ARG_LIST); +} + +// test path_expr +// fn foo() { +// let _ = a; +// let _ = a::b; +// let _ = ::a::; +// let _ = format!(); +// } +fn path_expr(p: &mut Parser, r: Restrictions) -> (CompletedMarker, BlockLike) { + assert!(paths::is_path_start(p) || p.at(L_ANGLE)); + let m = p.start(); + paths::expr_path(p); + match p.current() { + L_CURLY if !r.forbid_structs => { + named_field_list(p); + (m.complete(p, STRUCT_LIT), BlockLike::NotBlock) + } + EXCL => { + let block_like = items::macro_call_after_excl(p); + return (m.complete(p, MACRO_CALL), block_like); + } + _ => (m.complete(p, PATH_EXPR), BlockLike::NotBlock), + } +} + +// test struct_lit +// fn foo() { +// S {}; +// S { x, y: 32, }; +// S { x, y: 32, ..Default::default() }; +// } +pub(crate) fn named_field_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.bump(); + while !p.at(EOF) && !p.at(R_CURLY) { + match p.current() { + IDENT => { + let m = p.start(); + name_ref(p); + if p.eat(COLON) { + expr(p); + } + m.complete(p, NAMED_FIELD); + } + DOTDOT => { + p.bump(); + expr(p); + } + L_CURLY => error_block(p, "expected a field"), + _ => p.err_and_bump("expected identifier"), + } + if !p.at(R_CURLY) { + p.expect(COMMA); + } + } + p.expect(R_CURLY); + m.complete(p, NAMED_FIELD_LIST); +} diff --git a/crates/ra_syntax/src/parsing/grammar/expressions/atom.rs b/crates/ra_syntax/src/parsing/grammar/expressions/atom.rs new file mode 100644 index 000000000..e74305b6a --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/expressions/atom.rs @@ -0,0 +1,475 @@ +use super::*; + +// test expr_literals +// fn foo() { +// let _ = true; +// let _ = false; +// let _ = 1; +// let _ = 2.0; +// let _ = b'a'; +// let _ = 'b'; +// let _ = "c"; +// let _ = r"d"; +// let _ = b"e"; +// let _ = br"f"; +// } +pub(crate) const LITERAL_FIRST: TokenSet = token_set![ + TRUE_KW, + FALSE_KW, + INT_NUMBER, + FLOAT_NUMBER, + BYTE, + CHAR, + STRING, + RAW_STRING, + BYTE_STRING, + RAW_BYTE_STRING +]; + +pub(crate) fn literal(p: &mut Parser) -> Option { + if !p.at_ts(LITERAL_FIRST) { + return None; + } + let m = p.start(); + p.bump(); + Some(m.complete(p, LITERAL)) +} + +// E.g. for after the break in `if break {}`, this should not match +pub(super) const ATOM_EXPR_FIRST: TokenSet = + LITERAL_FIRST.union(paths::PATH_FIRST).union(token_set![ + L_PAREN, + L_CURLY, + L_BRACK, + PIPE, + MOVE_KW, + IF_KW, + WHILE_KW, + MATCH_KW, + UNSAFE_KW, + RETURN_KW, + BREAK_KW, + CONTINUE_KW, + LIFETIME, + ]); + +const EXPR_RECOVERY_SET: TokenSet = token_set![LET_KW]; + +pub(super) fn atom_expr(p: &mut Parser, r: Restrictions) -> Option<(CompletedMarker, BlockLike)> { + if let Some(m) = literal(p) { + return Some((m, BlockLike::NotBlock)); + } + if paths::is_path_start(p) || p.at(L_ANGLE) { + return Some(path_expr(p, r)); + } + let la = p.nth(1); + let done = match p.current() { + L_PAREN => tuple_expr(p), + L_BRACK => array_expr(p), + PIPE => lambda_expr(p), + MOVE_KW if la == PIPE => lambda_expr(p), + IF_KW => if_expr(p), + + LOOP_KW => loop_expr(p, None), + FOR_KW => for_expr(p, None), + WHILE_KW => while_expr(p, None), + LIFETIME if la == COLON => { + let m = p.start(); + label(p); + match p.current() { + LOOP_KW => loop_expr(p, Some(m)), + FOR_KW => for_expr(p, Some(m)), + WHILE_KW => while_expr(p, Some(m)), + L_CURLY => block_expr(p, Some(m)), + _ => { + // test_err misplaced_label_err + // fn main() { + // 'loop: impl + // } + p.error("expected a loop"); + m.complete(p, ERROR); + return None; + } + } + } + + MATCH_KW => match_expr(p), + UNSAFE_KW if la == L_CURLY => { + let m = p.start(); + p.bump(); + block_expr(p, Some(m)) + } + L_CURLY => block_expr(p, None), + RETURN_KW => return_expr(p), + CONTINUE_KW => continue_expr(p), + BREAK_KW => break_expr(p, r), + _ => { + p.err_recover("expected expression", EXPR_RECOVERY_SET); + return None; + } + }; + let blocklike = match done.kind() { + IF_EXPR | WHILE_EXPR | FOR_EXPR | LOOP_EXPR | MATCH_EXPR | BLOCK_EXPR => BlockLike::Block, + _ => BlockLike::NotBlock, + }; + Some((done, blocklike)) +} + +// test tuple_expr +// fn foo() { +// (); +// (1); +// (1,); +// } +fn tuple_expr(p: &mut Parser) -> CompletedMarker { + assert!(p.at(L_PAREN)); + let m = p.start(); + p.expect(L_PAREN); + + let mut saw_comma = false; + let mut saw_expr = false; + while !p.at(EOF) && !p.at(R_PAREN) { + saw_expr = true; + if !p.at_ts(EXPR_FIRST) { + p.error("expected expression"); + break; + } + expr(p); + if !p.at(R_PAREN) { + saw_comma = true; + p.expect(COMMA); + } + } + p.expect(R_PAREN); + m.complete(p, if saw_expr && !saw_comma { PAREN_EXPR } else { TUPLE_EXPR }) +} + +// test array_expr +// fn foo() { +// []; +// [1]; +// [1, 2,]; +// [1; 2]; +// } +fn array_expr(p: &mut Parser) -> CompletedMarker { + assert!(p.at(L_BRACK)); + let m = p.start(); + p.bump(); + if p.eat(R_BRACK) { + return m.complete(p, ARRAY_EXPR); + } + expr(p); + if p.eat(SEMI) { + expr(p); + p.expect(R_BRACK); + return m.complete(p, ARRAY_EXPR); + } + while !p.at(EOF) && !p.at(R_BRACK) { + p.expect(COMMA); + if p.at(R_BRACK) { + break; + } + if !p.at_ts(EXPR_FIRST) { + p.error("expected expression"); + break; + } + expr(p); + } + p.expect(R_BRACK); + m.complete(p, ARRAY_EXPR) +} + +// test lambda_expr +// fn foo() { +// || (); +// || -> i32 { 92 }; +// |x| x; +// move |x: i32,| x; +// } +fn lambda_expr(p: &mut Parser) -> CompletedMarker { + assert!(p.at(PIPE) || (p.at(MOVE_KW) && p.nth(1) == PIPE)); + let m = p.start(); + p.eat(MOVE_KW); + params::param_list_opt_types(p); + if opt_fn_ret_type(p) { + if !p.at(L_CURLY) { + p.error("expected `{`"); + } + } + expr(p); + m.complete(p, LAMBDA_EXPR) +} + +// test if_expr +// fn foo() { +// if true {}; +// if true {} else {}; +// if true {} else if false {} else {}; +// if S {}; +// } +fn if_expr(p: &mut Parser) -> CompletedMarker { + assert!(p.at(IF_KW)); + let m = p.start(); + p.bump(); + cond(p); + block(p); + if p.at(ELSE_KW) { + p.bump(); + if p.at(IF_KW) { + if_expr(p); + } else { + block(p); + } + } + m.complete(p, IF_EXPR) +} + +// test label +// fn foo() { +// 'a: loop {} +// 'b: while true {} +// 'c: for x in () {} +// } +fn label(p: &mut Parser) { + assert!(p.at(LIFETIME) && p.nth(1) == COLON); + let m = p.start(); + p.bump(); + p.bump(); + m.complete(p, LABEL); +} + +// test loop_expr +// fn foo() { +// loop {}; +// } +fn loop_expr(p: &mut Parser, m: Option) -> CompletedMarker { + assert!(p.at(LOOP_KW)); + let m = m.unwrap_or_else(|| p.start()); + p.bump(); + block(p); + m.complete(p, LOOP_EXPR) +} + +// test while_expr +// fn foo() { +// while true {}; +// while let Some(x) = it.next() {}; +// } +fn while_expr(p: &mut Parser, m: Option) -> CompletedMarker { + assert!(p.at(WHILE_KW)); + let m = m.unwrap_or_else(|| p.start()); + p.bump(); + cond(p); + block(p); + m.complete(p, WHILE_EXPR) +} + +// test for_expr +// fn foo() { +// for x in [] {}; +// } +fn for_expr(p: &mut Parser, m: Option) -> CompletedMarker { + assert!(p.at(FOR_KW)); + let m = m.unwrap_or_else(|| p.start()); + p.bump(); + patterns::pattern(p); + p.expect(IN_KW); + expr_no_struct(p); + block(p); + m.complete(p, FOR_EXPR) +} + +// test cond +// fn foo() { if let Some(_) = None {} } +fn cond(p: &mut Parser) { + let m = p.start(); + if p.eat(LET_KW) { + patterns::pattern(p); + p.expect(EQ); + } + expr_no_struct(p); + m.complete(p, CONDITION); +} + +// test match_expr +// fn foo() { +// match () { }; +// match S {}; +// } +fn match_expr(p: &mut Parser) -> CompletedMarker { + assert!(p.at(MATCH_KW)); + let m = p.start(); + p.bump(); + expr_no_struct(p); + if p.at(L_CURLY) { + match_arm_list(p); + } else { + p.error("expected `{`") + } + m.complete(p, MATCH_EXPR) +} + +pub(crate) fn match_arm_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.eat(L_CURLY); + + // test match_arms_inner_attribute + // fn foo() { + // match () { + // #![doc("Inner attribute")] + // #![doc("Can be")] + // #![doc("Stacked")] + // _ => (), + // } + // } + attributes::inner_attributes(p); + + while !p.at(EOF) && !p.at(R_CURLY) { + if p.at(L_CURLY) { + error_block(p, "expected match arm"); + continue; + } + + // test match_arms_outer_attributes + // fn foo() { + // match () { + // #[cfg(feature = "some")] + // _ => (), + // #[cfg(feature = "other")] + // _ => (), + // #[cfg(feature = "many")] + // #[cfg(feature = "attributes")] + // #[cfg(feature = "before")] + // _ => (), + // } + // } + attributes::outer_attributes(p); + + // test match_arms_commas + // fn foo() { + // match () { + // _ => (), + // _ => {} + // _ => () + // } + // } + if match_arm(p).is_block() { + p.eat(COMMA); + } else if !p.at(R_CURLY) { + p.expect(COMMA); + } + } + p.expect(R_CURLY); + m.complete(p, MATCH_ARM_LIST); +} + +// test match_arm +// fn foo() { +// match () { +// _ => (), +// _ if Test > Test{field: 0} => (), +// X | Y if Z => (), +// | X | Y if Z => (), +// | X => (), +// }; +// } +fn match_arm(p: &mut Parser) -> BlockLike { + let m = p.start(); + p.eat(PIPE); + patterns::pattern_r(p, TokenSet::empty()); + while p.eat(PIPE) { + patterns::pattern(p); + } + if p.at(IF_KW) { + match_guard(p); + } + p.expect(FAT_ARROW); + let ret = expr_stmt(p); + m.complete(p, MATCH_ARM); + ret +} + +// test match_guard +// fn foo() { +// match () { +// _ if foo => (), +// } +// } +fn match_guard(p: &mut Parser) -> CompletedMarker { + assert!(p.at(IF_KW)); + let m = p.start(); + p.bump(); + expr(p); + m.complete(p, MATCH_GUARD) +} + +// test block_expr +// fn foo() { +// {}; +// unsafe {}; +// 'label: {}; +// } +fn block_expr(p: &mut Parser, m: Option) -> CompletedMarker { + assert!(p.at(L_CURLY)); + let m = m.unwrap_or_else(|| p.start()); + block(p); + m.complete(p, BLOCK_EXPR) +} + +// test return_expr +// fn foo() { +// return; +// return 92; +// } +fn return_expr(p: &mut Parser) -> CompletedMarker { + assert!(p.at(RETURN_KW)); + let m = p.start(); + p.bump(); + if p.at_ts(EXPR_FIRST) { + expr(p); + } + m.complete(p, RETURN_EXPR) +} + +// test continue_expr +// fn foo() { +// loop { +// continue; +// continue 'l; +// } +// } +fn continue_expr(p: &mut Parser) -> CompletedMarker { + assert!(p.at(CONTINUE_KW)); + let m = p.start(); + p.bump(); + p.eat(LIFETIME); + m.complete(p, CONTINUE_EXPR) +} + +// test break_expr +// fn foo() { +// loop { +// break; +// break 'l; +// break 92; +// break 'l 92; +// } +// } +fn break_expr(p: &mut Parser, r: Restrictions) -> CompletedMarker { + assert!(p.at(BREAK_KW)); + let m = p.start(); + p.bump(); + p.eat(LIFETIME); + // test break_ambiguity + // fn foo(){ + // if break {} + // while break {} + // for i in break {} + // match break {} + // } + if p.at_ts(EXPR_FIRST) && !(r.forbid_structs && p.at(L_CURLY)) { + expr(p); + } + m.complete(p, BREAK_EXPR) +} diff --git a/crates/ra_syntax/src/parsing/grammar/items.rs b/crates/ra_syntax/src/parsing/grammar/items.rs new file mode 100644 index 000000000..4b962c1f3 --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/items.rs @@ -0,0 +1,392 @@ +mod consts; +mod nominal; +mod traits; +mod use_item; + +pub(crate) use self::{ + expressions::{match_arm_list, named_field_list}, + nominal::{enum_variant_list, named_field_def_list}, + traits::{impl_item_list, trait_item_list}, + use_item::use_tree_list, +}; +use super::*; + +// test mod_contents +// fn foo() {} +// macro_rules! foo {} +// foo::bar!(); +// super::baz! {} +// struct S; +pub(super) fn mod_contents(p: &mut Parser, stop_on_r_curly: bool) { + attributes::inner_attributes(p); + while !p.at(EOF) && !(stop_on_r_curly && p.at(R_CURLY)) { + item_or_macro(p, stop_on_r_curly, ItemFlavor::Mod) + } +} + +pub(super) enum ItemFlavor { + Mod, + Trait, +} + +pub(super) const ITEM_RECOVERY_SET: TokenSet = token_set![ + FN_KW, STRUCT_KW, ENUM_KW, IMPL_KW, TRAIT_KW, CONST_KW, STATIC_KW, LET_KW, MOD_KW, PUB_KW, + CRATE_KW +]; + +pub(super) fn item_or_macro(p: &mut Parser, stop_on_r_curly: bool, flavor: ItemFlavor) { + let m = p.start(); + attributes::outer_attributes(p); + match maybe_item(p, flavor) { + MaybeItem::Item(kind) => { + m.complete(p, kind); + } + MaybeItem::None => { + if paths::is_path_start(p) { + match macro_call(p) { + BlockLike::Block => (), + BlockLike::NotBlock => { + p.expect(SEMI); + } + } + m.complete(p, MACRO_CALL); + } else { + m.abandon(p); + if p.at(L_CURLY) { + error_block(p, "expected an item"); + } else if p.at(R_CURLY) && !stop_on_r_curly { + let e = p.start(); + p.error("unmatched `}`"); + p.bump(); + e.complete(p, ERROR); + } else if !p.at(EOF) && !p.at(R_CURLY) { + p.err_and_bump("expected an item"); + } else { + p.error("expected an item"); + } + } + } + MaybeItem::Modifiers => { + p.error("expected fn, trait or impl"); + m.complete(p, ERROR); + } + } +} + +pub(super) enum MaybeItem { + None, + Item(SyntaxKind), + Modifiers, +} + +pub(super) fn maybe_item(p: &mut Parser, flavor: ItemFlavor) -> MaybeItem { + opt_visibility(p); + if let Some(kind) = items_without_modifiers(p) { + return MaybeItem::Item(kind); + } + + let mut has_mods = false; + // modifiers + has_mods |= p.eat(CONST_KW); + + // test_err unsafe_block_in_mod + // fn foo(){} unsafe { } fn bar(){} + if p.at(UNSAFE_KW) && p.nth(1) != L_CURLY { + p.eat(UNSAFE_KW); + has_mods = true; + } + if p.at(EXTERN_KW) { + has_mods = true; + abi(p); + } + if p.at(IDENT) && p.at_contextual_kw("auto") && p.nth(1) == TRAIT_KW { + p.bump_remap(AUTO_KW); + has_mods = true; + } + if p.at(IDENT) && p.at_contextual_kw("default") && p.nth(1) == IMPL_KW { + p.bump_remap(DEFAULT_KW); + has_mods = true; + } + + // items + let kind = match p.current() { + // test extern_fn + // extern fn foo() {} + + // test const_fn + // const fn foo() {} + + // test const_unsafe_fn + // const unsafe fn foo() {} + + // test unsafe_extern_fn + // unsafe extern "C" fn foo() {} + + // test unsafe_fn + // unsafe fn foo() {} + FN_KW => { + fn_def(p, flavor); + FN_DEF + } + + // test unsafe_trait + // unsafe trait T {} + + // test auto_trait + // auto trait T {} + + // test unsafe_auto_trait + // unsafe auto trait T {} + TRAIT_KW => { + traits::trait_def(p); + TRAIT_DEF + } + + // test unsafe_impl + // unsafe impl Foo {} + + // test default_impl + // default impl Foo {} + + // test unsafe_default_impl + // unsafe default impl Foo {} + IMPL_KW => { + traits::impl_block(p); + IMPL_BLOCK + } + _ => { + return if has_mods { MaybeItem::Modifiers } else { MaybeItem::None }; + } + }; + + MaybeItem::Item(kind) +} + +fn items_without_modifiers(p: &mut Parser) -> Option { + let la = p.nth(1); + let kind = match p.current() { + // test extern_crate + // extern crate foo; + EXTERN_KW if la == CRATE_KW => { + extern_crate_item(p); + EXTERN_CRATE_ITEM + } + TYPE_KW => { + type_def(p); + TYPE_DEF + } + MOD_KW => { + mod_item(p); + MODULE + } + STRUCT_KW => { + // test struct_items + // struct Foo; + // struct Foo {} + // struct Foo(); + // struct Foo(String, usize); + // struct Foo { + // a: i32, + // b: f32, + // } + nominal::struct_def(p, STRUCT_KW); + if p.at(SEMI) { + p.err_and_bump( + "expected item, found `;`\n\ + consider removing this semicolon", + ); + } + STRUCT_DEF + } + IDENT if p.at_contextual_kw("union") && p.nth(1) == IDENT => { + // test union_items + // union Foo {} + // union Foo { + // a: i32, + // b: f32, + // } + nominal::struct_def(p, UNION_KW); + STRUCT_DEF + } + ENUM_KW => { + nominal::enum_def(p); + ENUM_DEF + } + USE_KW => { + use_item::use_item(p); + USE_ITEM + } + CONST_KW if (la == IDENT || la == MUT_KW) => { + consts::const_def(p); + CONST_DEF + } + STATIC_KW => { + consts::static_def(p); + STATIC_DEF + } + // test extern_block + // extern {} + EXTERN_KW + if la == L_CURLY || ((la == STRING || la == RAW_STRING) && p.nth(2) == L_CURLY) => + { + abi(p); + extern_item_list(p); + EXTERN_BLOCK + } + _ => return None, + }; + Some(kind) +} + +fn extern_crate_item(p: &mut Parser) { + assert!(p.at(EXTERN_KW)); + p.bump(); + assert!(p.at(CRATE_KW)); + p.bump(); + name_ref(p); + opt_alias(p); + p.expect(SEMI); +} + +pub(crate) fn extern_item_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.bump(); + mod_contents(p, true); + p.expect(R_CURLY); + m.complete(p, EXTERN_ITEM_LIST); +} + +fn fn_def(p: &mut Parser, flavor: ItemFlavor) { + assert!(p.at(FN_KW)); + p.bump(); + + name_r(p, ITEM_RECOVERY_SET); + // test function_type_params + // fn foo(){} + type_params::opt_type_param_list(p); + + if p.at(L_PAREN) { + match flavor { + ItemFlavor::Mod => params::param_list(p), + ItemFlavor::Trait => params::param_list_opt_patterns(p), + } + } else { + p.error("expected function arguments"); + } + // test function_ret_type + // fn foo() {} + // fn bar() -> () {} + opt_fn_ret_type(p); + + // test function_where_clause + // fn foo() where T: Copy {} + type_params::opt_where_clause(p); + + // test fn_decl + // trait T { fn foo(); } + if p.at(SEMI) { + p.bump(); + } else { + expressions::block(p) + } +} + +// test type_item +// type Foo = Bar; +fn type_def(p: &mut Parser) { + assert!(p.at(TYPE_KW)); + p.bump(); + + name(p); + + // test type_item_type_params + // type Result = (); + type_params::opt_type_param_list(p); + + if p.at(COLON) { + type_params::bounds(p); + } + + // test type_item_where_clause + // type Foo where Foo: Copy = (); + type_params::opt_where_clause(p); + + if p.eat(EQ) { + types::type_(p); + } + p.expect(SEMI); +} + +pub(crate) fn mod_item(p: &mut Parser) { + assert!(p.at(MOD_KW)); + p.bump(); + + name(p); + if p.at(L_CURLY) { + mod_item_list(p); + } else if !p.eat(SEMI) { + p.error("expected `;` or `{`"); + } +} + +pub(crate) fn mod_item_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.bump(); + mod_contents(p, true); + p.expect(R_CURLY); + m.complete(p, ITEM_LIST); +} + +fn macro_call(p: &mut Parser) -> BlockLike { + assert!(paths::is_path_start(p)); + paths::use_path(p); + macro_call_after_excl(p) +} + +pub(super) fn macro_call_after_excl(p: &mut Parser) -> BlockLike { + p.expect(EXCL); + if p.at(IDENT) { + name(p); + } + match p.current() { + L_CURLY => { + token_tree(p); + BlockLike::Block + } + L_PAREN | L_BRACK => { + token_tree(p); + BlockLike::NotBlock + } + _ => { + p.error("expected `{`, `[`, `(`"); + BlockLike::NotBlock + } + } +} + +pub(crate) fn token_tree(p: &mut Parser) { + let closing_paren_kind = match p.current() { + L_CURLY => R_CURLY, + L_PAREN => R_PAREN, + L_BRACK => R_BRACK, + _ => unreachable!(), + }; + let m = p.start(); + p.bump(); + while !p.at(EOF) && !p.at(closing_paren_kind) { + match p.current() { + L_CURLY | L_PAREN | L_BRACK => token_tree(p), + R_CURLY => { + p.error("unmatched `}`"); + m.complete(p, TOKEN_TREE); + return; + } + R_PAREN | R_BRACK => p.err_and_bump("unmatched brace"), + _ => p.bump(), + } + } + p.expect(closing_paren_kind); + m.complete(p, TOKEN_TREE); +} diff --git a/crates/ra_syntax/src/parsing/grammar/items/consts.rs b/crates/ra_syntax/src/parsing/grammar/items/consts.rs new file mode 100644 index 000000000..5a5852f83 --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/items/consts.rs @@ -0,0 +1,21 @@ +use super::*; + +pub(super) fn static_def(p: &mut Parser) { + const_or_static(p, STATIC_KW) +} + +pub(super) fn const_def(p: &mut Parser) { + const_or_static(p, CONST_KW) +} + +fn const_or_static(p: &mut Parser, kw: SyntaxKind) { + assert!(p.at(kw)); + p.bump(); + p.eat(MUT_KW); // TODO: validator to forbid const mut + name(p); + types::ascription(p); + if p.eat(EQ) { + expressions::expr(p); + } + p.expect(SEMI); +} diff --git a/crates/ra_syntax/src/parsing/grammar/items/nominal.rs b/crates/ra_syntax/src/parsing/grammar/items/nominal.rs new file mode 100644 index 000000000..ff9b38f9c --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/items/nominal.rs @@ -0,0 +1,168 @@ +use super::*; + +pub(super) fn struct_def(p: &mut Parser, kind: SyntaxKind) { + assert!(p.at(STRUCT_KW) || p.at_contextual_kw("union")); + p.bump_remap(kind); + + name_r(p, ITEM_RECOVERY_SET); + type_params::opt_type_param_list(p); + match p.current() { + WHERE_KW => { + type_params::opt_where_clause(p); + match p.current() { + SEMI => { + p.bump(); + return; + } + L_CURLY => named_field_def_list(p), + _ => { + //TODO: special case `(` error message + p.error("expected `;` or `{`"); + return; + } + } + } + SEMI if kind == STRUCT_KW => { + p.bump(); + return; + } + L_CURLY => named_field_def_list(p), + L_PAREN if kind == STRUCT_KW => { + pos_field_def_list(p); + // test tuple_struct_where + // struct Test(T) where T: Clone; + // struct Test(T); + type_params::opt_where_clause(p); + p.expect(SEMI); + } + _ if kind == STRUCT_KW => { + p.error("expected `;`, `{`, or `(`"); + return; + } + _ => { + p.error("expected `{`"); + return; + } + } +} + +pub(super) fn enum_def(p: &mut Parser) { + assert!(p.at(ENUM_KW)); + p.bump(); + name_r(p, ITEM_RECOVERY_SET); + type_params::opt_type_param_list(p); + type_params::opt_where_clause(p); + if p.at(L_CURLY) { + enum_variant_list(p); + } else { + p.error("expected `{`") + } +} + +pub(crate) fn enum_variant_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.bump(); + while !p.at(EOF) && !p.at(R_CURLY) { + if p.at(L_CURLY) { + error_block(p, "expected enum variant"); + continue; + } + let var = p.start(); + attributes::outer_attributes(p); + if p.at(IDENT) { + name(p); + match p.current() { + L_CURLY => named_field_def_list(p), + L_PAREN => pos_field_def_list(p), + EQ => { + p.bump(); + expressions::expr(p); + } + _ => (), + } + var.complete(p, ENUM_VARIANT); + } else { + var.abandon(p); + p.err_and_bump("expected enum variant"); + } + if !p.at(R_CURLY) { + p.expect(COMMA); + } + } + p.expect(R_CURLY); + m.complete(p, ENUM_VARIANT_LIST); +} + +pub(crate) fn named_field_def_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.bump(); + while !p.at(R_CURLY) && !p.at(EOF) { + if p.at(L_CURLY) { + error_block(p, "expected field"); + continue; + } + named_field_def(p); + if !p.at(R_CURLY) { + p.expect(COMMA); + } + } + p.expect(R_CURLY); + m.complete(p, NAMED_FIELD_DEF_LIST); + + fn named_field_def(p: &mut Parser) { + let m = p.start(); + // test field_attrs + // struct S { + // #[serde(with = "url_serde")] + // pub uri: Uri, + // } + attributes::outer_attributes(p); + opt_visibility(p); + if p.at(IDENT) { + name(p); + p.expect(COLON); + types::type_(p); + m.complete(p, NAMED_FIELD_DEF); + } else { + m.abandon(p); + p.err_and_bump("expected field declaration"); + } + } +} + +fn pos_field_def_list(p: &mut Parser) { + assert!(p.at(L_PAREN)); + let m = p.start(); + if !p.expect(L_PAREN) { + return; + } + while !p.at(R_PAREN) && !p.at(EOF) { + let m = p.start(); + // test pos_field_attrs + // struct S ( + // #[serde(with = "url_serde")] + // pub Uri, + // ); + // + // enum S { + // Uri(#[serde(with = "url_serde")] Uri), + // } + attributes::outer_attributes(p); + opt_visibility(p); + if !p.at_ts(types::TYPE_FIRST) { + p.error("expected a type"); + m.complete(p, ERROR); + break; + } + types::type_(p); + m.complete(p, POS_FIELD_DEF); + + if !p.at(R_PAREN) { + p.expect(COMMA); + } + } + p.expect(R_PAREN); + m.complete(p, POS_FIELD_DEF_LIST); +} diff --git a/crates/ra_syntax/src/parsing/grammar/items/traits.rs b/crates/ra_syntax/src/parsing/grammar/items/traits.rs new file mode 100644 index 000000000..d5a8ccd98 --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/items/traits.rs @@ -0,0 +1,137 @@ +use super::*; + +// test trait_item +// trait T: Hash + Clone where U: Copy {} +pub(super) fn trait_def(p: &mut Parser) { + assert!(p.at(TRAIT_KW)); + p.bump(); + name_r(p, ITEM_RECOVERY_SET); + type_params::opt_type_param_list(p); + if p.at(COLON) { + type_params::bounds(p); + } + type_params::opt_where_clause(p); + if p.at(L_CURLY) { + trait_item_list(p); + } else { + p.error("expected `{`"); + } +} + +// test trait_item_list +// impl F { +// type A: Clone; +// const B: i32; +// fn foo() {} +// fn bar(&self); +// } +pub(crate) fn trait_item_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.bump(); + while !p.at(EOF) && !p.at(R_CURLY) { + if p.at(L_CURLY) { + error_block(p, "expected an item"); + continue; + } + item_or_macro(p, true, ItemFlavor::Trait); + } + p.expect(R_CURLY); + m.complete(p, ITEM_LIST); +} + +// test impl_block +// impl Foo {} +pub(super) fn impl_block(p: &mut Parser) { + assert!(p.at(IMPL_KW)); + p.bump(); + if choose_type_params_over_qpath(p) { + type_params::opt_type_param_list(p); + } + + // TODO: never type + // impl ! {} + + // test impl_block_neg + // impl !Send for X {} + p.eat(EXCL); + impl_type(p); + if p.eat(FOR_KW) { + impl_type(p); + } + type_params::opt_where_clause(p); + if p.at(L_CURLY) { + impl_item_list(p); + } else { + p.error("expected `{`"); + } +} + +// test impl_item_list +// impl F { +// type A = i32; +// const B: i32 = 92; +// fn foo() {} +// fn bar(&self) {} +// } +pub(crate) fn impl_item_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.bump(); + // test impl_inner_attributes + // enum F{} + // impl F { + // //! This is a doc comment + // #![doc("This is also a doc comment")] + // } + attributes::inner_attributes(p); + + while !p.at(EOF) && !p.at(R_CURLY) { + if p.at(L_CURLY) { + error_block(p, "expected an item"); + continue; + } + item_or_macro(p, true, ItemFlavor::Mod); + } + p.expect(R_CURLY); + m.complete(p, ITEM_LIST); +} + +fn choose_type_params_over_qpath(p: &Parser) -> bool { + // There's an ambiguity between generic parameters and qualified paths in impls. + // If we see `<` it may start both, so we have to inspect some following tokens. + // The following combinations can only start generics, + // but not qualified paths (with one exception): + // `<` `>` - empty generic parameters + // `<` `#` - generic parameters with attributes + // `<` (LIFETIME|IDENT) `>` - single generic parameter + // `<` (LIFETIME|IDENT) `,` - first generic parameter in a list + // `<` (LIFETIME|IDENT) `:` - generic parameter with bounds + // `<` (LIFETIME|IDENT) `=` - generic parameter with a default + // The only truly ambiguous case is + // `<` IDENT `>` `::` IDENT ... + // we disambiguate it in favor of generics (`impl ::absolute::Path { ... }`) + // because this is what almost always expected in practice, qualified paths in impls + // (`impl ::AssocTy { ... }`) aren't even allowed by type checker at the moment. + if !p.at(L_ANGLE) { + return false; + } + if p.nth(1) == POUND || p.nth(1) == R_ANGLE { + return true; + } + (p.nth(1) == LIFETIME || p.nth(1) == IDENT) + && (p.nth(2) == R_ANGLE || p.nth(2) == COMMA || p.nth(2) == COLON || p.nth(2) == EQ) +} + +// test_err impl_type +// impl Type {} +// impl Trait1 for T {} +// impl impl NotType {} +// impl Trait2 for impl NotType {} +pub(crate) fn impl_type(p: &mut Parser) { + if p.at(IMPL_KW) { + p.error("expected trait or type"); + return; + } + types::type_(p); +} diff --git a/crates/ra_syntax/src/parsing/grammar/items/use_item.rs b/crates/ra_syntax/src/parsing/grammar/items/use_item.rs new file mode 100644 index 000000000..5111d37eb --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/items/use_item.rs @@ -0,0 +1,121 @@ +use super::*; + +pub(super) fn use_item(p: &mut Parser) { + assert!(p.at(USE_KW)); + p.bump(); + use_tree(p); + p.expect(SEMI); +} + +/// Parse a use 'tree', such as `some::path` in `use some::path;` +/// Note that this is called both by `use_item` and `use_tree_list`, +/// so handles both `some::path::{inner::path}` and `inner::path` in +/// `use some::path::{inner::path};` +fn use_tree(p: &mut Parser) { + let la = p.nth(1); + let m = p.start(); + match (p.current(), la) { + // Finish the use_tree for cases of e.g. + // `use some::path::{self, *};` or `use *;` + // This does not handle cases such as `use some::path::*` + // N.B. in Rust 2015 `use *;` imports all from crate root + // however in Rust 2018 `use *;` errors: ('cannot glob-import all possible crates') + // TODO: Add this error (if not out of scope) + + // test use_star + // use *; + // use ::*; + // use some::path::{*}; + // use some::path::{::*}; + (STAR, _) => p.bump(), + (COLONCOLON, STAR) => { + // Parse `use ::*;`, which imports all from the crate root in Rust 2015 + // This is invalid inside a use_tree_list, (e.g. `use some::path::{::*}`) + // but still parses and errors later: ('crate root in paths can only be used in start position') + // TODO: Add this error (if not out of scope) + // In Rust 2018, it is always invalid (see above) + p.bump(); + p.bump(); + } + // Open a use tree list + // Handles cases such as `use {some::path};` or `{inner::path}` in + // `use some::path::{{inner::path}, other::path}` + + // test use_tree_list + // use {crate::path::from::root, or::path::from::crate_name}; // Rust 2018 (with a crate named `or`) + // use {path::from::root}; // Rust 2015 + // use ::{some::arbritrary::path}; // Rust 2015 + // use ::{{{crate::export}}}; // Nonsensical but perfectly legal nestnig + (L_CURLY, _) | (COLONCOLON, L_CURLY) => { + if p.at(COLONCOLON) { + p.bump(); + } + use_tree_list(p); + } + // Parse a 'standard' path. + // Also handles aliases (e.g. `use something as something_else`) + + // test use_path + // use ::crate_name; // Rust 2018 - All flavours + // use crate_name; // Rust 2018 - Anchored paths + // use item_in_scope_or_crate_name; // Rust 2018 - Uniform Paths + // + // use self::module::Item; + // use crate::Item; + // use self::some::Struct; + // use crate_name::some_item; + _ if paths::is_path_start(p) => { + paths::use_path(p); + match p.current() { + AS_KW => { + // test use_alias + // use some::path as some_name; + // use some::{ + // other::path as some_other_name, + // different::path as different_name, + // yet::another::path, + // running::out::of::synonyms::for_::different::* + // }; + opt_alias(p); + } + COLONCOLON => { + p.bump(); + match p.current() { + STAR => { + p.bump(); + } + // test use_tree_list_after_path + // use crate::{Item}; + // use self::{Item}; + L_CURLY => use_tree_list(p), + _ => { + // is this unreachable? + p.error("expected `{` or `*`"); + } + } + } + _ => (), + } + } + _ => { + m.abandon(p); + p.err_and_bump("expected one of `*`, `::`, `{`, `self`, `super` or an indentifier"); + return; + } + } + m.complete(p, USE_TREE); +} + +pub(crate) fn use_tree_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.bump(); + while !p.at(EOF) && !p.at(R_CURLY) { + use_tree(p); + if !p.at(R_CURLY) { + p.expect(COMMA); + } + } + p.expect(R_CURLY); + m.complete(p, USE_TREE_LIST); +} diff --git a/crates/ra_syntax/src/parsing/grammar/params.rs b/crates/ra_syntax/src/parsing/grammar/params.rs new file mode 100644 index 000000000..185386569 --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/params.rs @@ -0,0 +1,139 @@ +use super::*; + +// test param_list +// fn a() {} +// fn b(x: i32) {} +// fn c(x: i32, ) {} +// fn d(x: i32, y: ()) {} +pub(super) fn param_list(p: &mut Parser) { + list_(p, Flavor::Normal) +} + +// test param_list_opt_patterns +// fn foo)>(){} +pub(super) fn param_list_opt_patterns(p: &mut Parser) { + list_(p, Flavor::OptionalPattern) +} + +pub(super) fn param_list_opt_types(p: &mut Parser) { + list_(p, Flavor::OptionalType) +} + +#[derive(Clone, Copy, Eq, PartialEq)] +enum Flavor { + OptionalType, + OptionalPattern, + Normal, +} + +impl Flavor { + fn type_required(self) -> bool { + match self { + Flavor::OptionalType => false, + _ => true, + } + } +} + +fn list_(p: &mut Parser, flavor: Flavor) { + let (bra, ket) = if flavor.type_required() { (L_PAREN, R_PAREN) } else { (PIPE, PIPE) }; + assert!(p.at(bra)); + let m = p.start(); + p.bump(); + if flavor.type_required() { + opt_self_param(p); + } + while !p.at(EOF) && !p.at(ket) { + if !p.at_ts(VALUE_PARAMETER_FIRST) { + p.error("expected value parameter"); + break; + } + value_parameter(p, flavor); + if !p.at(ket) { + p.expect(COMMA); + } + } + p.expect(ket); + m.complete(p, PARAM_LIST); +} + +const VALUE_PARAMETER_FIRST: TokenSet = patterns::PATTERN_FIRST.union(types::TYPE_FIRST); + +fn value_parameter(p: &mut Parser, flavor: Flavor) { + let m = p.start(); + match flavor { + Flavor::OptionalType | Flavor::Normal => { + patterns::pattern(p); + if p.at(COLON) || flavor.type_required() { + types::ascription(p) + } + } + // test value_parameters_no_patterns + // type F = Box; + Flavor::OptionalPattern => { + let la0 = p.current(); + let la1 = p.nth(1); + let la2 = p.nth(2); + let la3 = p.nth(3); + + // test trait_fn_placeholder_parameter + // trait Foo { + // fn bar(_: u64); + // } + if (la0 == IDENT || la0 == UNDERSCORE) && la1 == COLON + || la0 == AMP && la1 == IDENT && la2 == COLON + || la0 == AMP && la1 == MUT_KW && la2 == IDENT && la3 == COLON + { + patterns::pattern(p); + types::ascription(p); + } else { + types::type_(p); + } + } + } + m.complete(p, PARAM); +} + +// test self_param +// impl S { +// fn a(self) {} +// fn b(&self,) {} +// fn c(&'a self,) {} +// fn d(&'a mut self, x: i32) {} +// fn e(mut self) {} +// } +fn opt_self_param(p: &mut Parser) { + let m; + if p.at(SELF_KW) || p.at(MUT_KW) && p.nth(1) == SELF_KW { + m = p.start(); + p.eat(MUT_KW); + p.eat(SELF_KW); + // test arb_self_types + // impl S { + // fn a(self: &Self) {} + // fn b(mut self: Box) {} + // } + if p.at(COLON) { + types::ascription(p); + } + } else { + let la1 = p.nth(1); + let la2 = p.nth(2); + let la3 = p.nth(3); + let n_toks = match (p.current(), la1, la2, la3) { + (AMP, SELF_KW, _, _) => 2, + (AMP, MUT_KW, SELF_KW, _) => 3, + (AMP, LIFETIME, SELF_KW, _) => 3, + (AMP, LIFETIME, MUT_KW, SELF_KW) => 4, + _ => return, + }; + m = p.start(); + for _ in 0..n_toks { + p.bump(); + } + } + m.complete(p, SELF_PARAM); + if !p.at(R_PAREN) { + p.expect(COMMA); + } +} diff --git a/crates/ra_syntax/src/parsing/grammar/paths.rs b/crates/ra_syntax/src/parsing/grammar/paths.rs new file mode 100644 index 000000000..33a11886c --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/paths.rs @@ -0,0 +1,103 @@ +use super::*; + +pub(super) const PATH_FIRST: TokenSet = + token_set![IDENT, SELF_KW, SUPER_KW, CRATE_KW, COLONCOLON, L_ANGLE]; + +pub(super) fn is_path_start(p: &Parser) -> bool { + match p.current() { + IDENT | SELF_KW | SUPER_KW | CRATE_KW | COLONCOLON => true, + _ => false, + } +} + +pub(super) fn use_path(p: &mut Parser) { + path(p, Mode::Use) +} + +pub(super) fn type_path(p: &mut Parser) { + path(p, Mode::Type) +} + +pub(super) fn expr_path(p: &mut Parser) { + path(p, Mode::Expr) +} + +#[derive(Clone, Copy, Eq, PartialEq)] +enum Mode { + Use, + Type, + Expr, +} + +fn path(p: &mut Parser, mode: Mode) { + let path = p.start(); + path_segment(p, mode, true); + let mut qual = path.complete(p, PATH); + loop { + let use_tree = match p.nth(1) { + STAR | L_CURLY => true, + _ => false, + }; + if p.at(COLONCOLON) && !use_tree { + let path = qual.precede(p); + p.bump(); + path_segment(p, mode, false); + let path = path.complete(p, PATH); + qual = path; + } else { + break; + } + } +} + +fn path_segment(p: &mut Parser, mode: Mode, first: bool) { + let m = p.start(); + // test qual_paths + // type X = ::Output; + // fn foo() { ::default(); } + if first && p.eat(L_ANGLE) { + types::type_(p); + if p.eat(AS_KW) { + if is_path_start(p) { + types::path_type(p); + } else { + p.error("expected a trait"); + } + } + p.expect(R_ANGLE); + } else { + if first { + p.eat(COLONCOLON); + } + match p.current() { + IDENT => { + name_ref(p); + opt_path_type_args(p, mode); + } + // test crate_path + // use crate::foo; + SELF_KW | SUPER_KW | CRATE_KW => p.bump(), + _ => { + p.err_recover("expected identifier", items::ITEM_RECOVERY_SET); + } + }; + } + m.complete(p, PATH_SEGMENT); +} + +fn opt_path_type_args(p: &mut Parser, mode: Mode) { + match mode { + Mode::Use => return, + Mode::Type => { + // test path_fn_trait_args + // type F = Box ()>; + if p.at(L_PAREN) { + params::param_list_opt_patterns(p); + opt_fn_ret_type(p); + } else { + type_args::opt_type_arg_list(p, false) + } + } + Mode::Expr => type_args::opt_type_arg_list(p, true), + } +} diff --git a/crates/ra_syntax/src/parsing/grammar/patterns.rs b/crates/ra_syntax/src/parsing/grammar/patterns.rs new file mode 100644 index 000000000..9d7da639d --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/patterns.rs @@ -0,0 +1,248 @@ +use super::*; + +pub(super) const PATTERN_FIRST: TokenSet = expressions::LITERAL_FIRST + .union(paths::PATH_FIRST) + .union(token_set![REF_KW, MUT_KW, L_PAREN, L_BRACK, AMP, UNDERSCORE]); + +pub(super) fn pattern(p: &mut Parser) { + pattern_r(p, PAT_RECOVERY_SET) +} + +pub(super) fn pattern_r(p: &mut Parser, recovery_set: TokenSet) { + if let Some(lhs) = atom_pat(p, recovery_set) { + // test range_pat + // fn main() { + // match 92 { + // 0 ... 100 => (), + // 101 ..= 200 => (), + // 200 .. 301=> (), + // } + // } + if p.at(DOTDOTDOT) || p.at(DOTDOTEQ) || p.at(DOTDOT) { + let m = lhs.precede(p); + p.bump(); + atom_pat(p, recovery_set); + m.complete(p, RANGE_PAT); + } + } +} + +const PAT_RECOVERY_SET: TokenSet = + token_set![LET_KW, IF_KW, WHILE_KW, LOOP_KW, MATCH_KW, R_PAREN, COMMA]; + +fn atom_pat(p: &mut Parser, recovery_set: TokenSet) -> Option { + let la0 = p.nth(0); + let la1 = p.nth(1); + if la0 == REF_KW + || la0 == MUT_KW + || (la0 == IDENT && !(la1 == COLONCOLON || la1 == L_PAREN || la1 == L_CURLY)) + { + return Some(bind_pat(p, true)); + } + if paths::is_path_start(p) { + return Some(path_pat(p)); + } + + if is_literal_pat_start(p) { + return Some(literal_pat(p)); + } + + let m = match la0 { + UNDERSCORE => placeholder_pat(p), + AMP => ref_pat(p), + L_PAREN => tuple_pat(p), + L_BRACK => slice_pat(p), + _ => { + p.err_recover("expected pattern", recovery_set); + return None; + } + }; + Some(m) +} + +fn is_literal_pat_start(p: &mut Parser) -> bool { + p.at(MINUS) && (p.nth(1) == INT_NUMBER || p.nth(1) == FLOAT_NUMBER) + || p.at_ts(expressions::LITERAL_FIRST) +} + +// test literal_pattern +// fn main() { +// match () { +// -1 => (), +// 92 => (), +// 'c' => (), +// "hello" => (), +// } +// } +fn literal_pat(p: &mut Parser) -> CompletedMarker { + assert!(is_literal_pat_start(p)); + let m = p.start(); + if p.at(MINUS) { + p.bump(); + } + expressions::literal(p); + m.complete(p, LITERAL_PAT) +} + +// test path_part +// fn foo() { +// let foo::Bar = (); +// let ::Bar = (); +// let Bar { .. } = (); +// let Bar(..) = (); +// } +fn path_pat(p: &mut Parser) -> CompletedMarker { + assert!(paths::is_path_start(p)); + let m = p.start(); + paths::expr_path(p); + let kind = match p.current() { + L_PAREN => { + tuple_pat_fields(p); + TUPLE_STRUCT_PAT + } + L_CURLY => { + field_pat_list(p); + STRUCT_PAT + } + _ => PATH_PAT, + }; + m.complete(p, kind) +} + +// test tuple_pat_fields +// fn foo() { +// let S() = (); +// let S(_) = (); +// let S(_,) = (); +// let S(_, .. , x) = (); +// } +fn tuple_pat_fields(p: &mut Parser) { + assert!(p.at(L_PAREN)); + p.bump(); + pat_list(p, R_PAREN); + p.expect(R_PAREN); +} + +// test field_pat_list +// fn foo() { +// let S {} = (); +// let S { f, ref mut g } = (); +// let S { h: _, ..} = (); +// let S { h: _, } = (); +// } +fn field_pat_list(p: &mut Parser) { + assert!(p.at(L_CURLY)); + let m = p.start(); + p.bump(); + while !p.at(EOF) && !p.at(R_CURLY) { + match p.current() { + DOTDOT => p.bump(), + IDENT if p.nth(1) == COLON => field_pat(p), + L_CURLY => error_block(p, "expected ident"), + _ => { + bind_pat(p, false); + } + } + if !p.at(R_CURLY) { + p.expect(COMMA); + } + } + p.expect(R_CURLY); + m.complete(p, FIELD_PAT_LIST); +} + +fn field_pat(p: &mut Parser) { + assert!(p.at(IDENT)); + assert!(p.nth(1) == COLON); + + let m = p.start(); + name(p); + p.bump(); + pattern(p); + m.complete(p, FIELD_PAT); +} + +// test placeholder_pat +// fn main() { let _ = (); } +fn placeholder_pat(p: &mut Parser) -> CompletedMarker { + assert!(p.at(UNDERSCORE)); + let m = p.start(); + p.bump(); + m.complete(p, PLACEHOLDER_PAT) +} + +// test ref_pat +// fn main() { +// let &a = (); +// let &mut b = (); +// } +fn ref_pat(p: &mut Parser) -> CompletedMarker { + assert!(p.at(AMP)); + let m = p.start(); + p.bump(); + p.eat(MUT_KW); + pattern(p); + m.complete(p, REF_PAT) +} + +// test tuple_pat +// fn main() { +// let (a, b, ..) = (); +// } +fn tuple_pat(p: &mut Parser) -> CompletedMarker { + assert!(p.at(L_PAREN)); + let m = p.start(); + tuple_pat_fields(p); + m.complete(p, TUPLE_PAT) +} + +// test slice_pat +// fn main() { +// let [a, b, ..] = []; +// } +fn slice_pat(p: &mut Parser) -> CompletedMarker { + assert!(p.at(L_BRACK)); + let m = p.start(); + p.bump(); + pat_list(p, R_BRACK); + p.expect(R_BRACK); + m.complete(p, SLICE_PAT) +} + +fn pat_list(p: &mut Parser, ket: SyntaxKind) { + while !p.at(EOF) && !p.at(ket) { + match p.current() { + DOTDOT => p.bump(), + _ => { + if !p.at_ts(PATTERN_FIRST) { + p.error("expected a pattern"); + break; + } + pattern(p) + } + } + if !p.at(ket) { + p.expect(COMMA); + } + } +} + +// test bind_pat +// fn main() { +// let a = (); +// let mut b = (); +// let ref c = (); +// let ref mut d = (); +// let e @ _ = (); +// let ref mut f @ g @ _ = (); +// } +fn bind_pat(p: &mut Parser, with_at: bool) -> CompletedMarker { + let m = p.start(); + p.eat(REF_KW); + p.eat(MUT_KW); + name(p); + if with_at && p.eat(AT) { + pattern(p); + } + m.complete(p, BIND_PAT) +} diff --git a/crates/ra_syntax/src/parsing/grammar/type_args.rs b/crates/ra_syntax/src/parsing/grammar/type_args.rs new file mode 100644 index 000000000..f889419c5 --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/type_args.rs @@ -0,0 +1,48 @@ +use super::*; + +pub(super) fn opt_type_arg_list(p: &mut Parser, colon_colon_required: bool) { + let m; + match (colon_colon_required, p.nth(0), p.nth(1)) { + (_, COLONCOLON, L_ANGLE) => { + m = p.start(); + p.bump(); + p.bump(); + } + (false, L_ANGLE, _) => { + m = p.start(); + p.bump(); + } + _ => return, + }; + + while !p.at(EOF) && !p.at(R_ANGLE) { + type_arg(p); + if !p.at(R_ANGLE) && !p.expect(COMMA) { + break; + } + } + p.expect(R_ANGLE); + m.complete(p, TYPE_ARG_LIST); +} + +// test type_arg +// type A = B<'static, i32, Item=u64>; +fn type_arg(p: &mut Parser) { + let m = p.start(); + match p.current() { + LIFETIME => { + p.bump(); + m.complete(p, LIFETIME_ARG); + } + IDENT if p.nth(1) == EQ => { + name_ref(p); + p.bump(); + types::type_(p); + m.complete(p, ASSOC_TYPE_ARG); + } + _ => { + types::type_(p); + m.complete(p, TYPE_ARG); + } + } +} diff --git a/crates/ra_syntax/src/parsing/grammar/type_params.rs b/crates/ra_syntax/src/parsing/grammar/type_params.rs new file mode 100644 index 000000000..40f998682 --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/type_params.rs @@ -0,0 +1,175 @@ +use super::*; + +pub(super) fn opt_type_param_list(p: &mut Parser) { + if !p.at(L_ANGLE) { + return; + } + type_param_list(p); +} + +fn type_param_list(p: &mut Parser) { + assert!(p.at(L_ANGLE)); + let m = p.start(); + p.bump(); + + while !p.at(EOF) && !p.at(R_ANGLE) { + let m = p.start(); + + // test generic_lifetime_type_attribute + // fn foo<#[derive(Lifetime)] 'a, #[derive(Type)] T>(_: &'a T) { + // } + attributes::outer_attributes(p); + + match p.current() { + LIFETIME => lifetime_param(p, m), + IDENT => type_param(p, m), + _ => { + m.abandon(p); + p.err_and_bump("expected type parameter") + } + } + if !p.at(R_ANGLE) && !p.expect(COMMA) { + break; + } + } + p.expect(R_ANGLE); + m.complete(p, TYPE_PARAM_LIST); +} + +fn lifetime_param(p: &mut Parser, m: Marker) { + assert!(p.at(LIFETIME)); + p.bump(); + if p.at(COLON) { + lifetime_bounds(p); + } + m.complete(p, LIFETIME_PARAM); +} + +fn type_param(p: &mut Parser, m: Marker) { + assert!(p.at(IDENT)); + name(p); + if p.at(COLON) { + bounds(p); + } + // test type_param_default + // struct S; + if p.at(EQ) { + p.bump(); + types::type_(p) + } + m.complete(p, TYPE_PARAM); +} + +// test type_param_bounds +// struct S; +pub(super) fn bounds(p: &mut Parser) { + assert!(p.at(COLON)); + p.bump(); + bounds_without_colon(p); +} + +fn lifetime_bounds(p: &mut Parser) { + assert!(p.at(COLON)); + p.bump(); + while p.at(LIFETIME) { + p.bump(); + if !p.eat(PLUS) { + break; + } + } +} + +pub(super) fn bounds_without_colon(p: &mut Parser) { + loop { + let has_paren = p.eat(L_PAREN); + p.eat(QUESTION); + match p.current() { + LIFETIME => p.bump(), + FOR_KW => types::for_type(p), + _ if paths::is_path_start(p) => types::path_type(p), + _ => break, + } + if has_paren { + p.expect(R_PAREN); + } + if !p.eat(PLUS) { + break; + } + } +} + +// test where_clause +// fn foo() +// where +// 'a: 'b + 'c, +// T: Clone + Copy + 'static, +// Iterator::Item: 'a, +// ::Item: 'a +// {} +pub(super) fn opt_where_clause(p: &mut Parser) { + if !p.at(WHERE_KW) { + return; + } + let m = p.start(); + p.bump(); + + while is_where_predicate(p) { + where_predicate(p); + + let comma = p.eat(COMMA); + + if is_where_clause_end(p) { + break; + } + + if !comma { + p.error("expected comma"); + } + } + + m.complete(p, WHERE_CLAUSE); +} + +fn is_where_predicate(p: &mut Parser) -> bool { + match p.current() { + LIFETIME => true, + IMPL_KW => false, + token => types::TYPE_FIRST.contains(token), + } +} + +fn is_where_clause_end(p: &mut Parser) -> bool { + p.current() == L_CURLY || p.current() == SEMI || p.current() == EQ +} + +fn where_predicate(p: &mut Parser) { + let m = p.start(); + match p.current() { + LIFETIME => { + p.bump(); + if p.at(COLON) { + lifetime_bounds(p); + } else { + p.error("expected colon"); + } + } + IMPL_KW => { + p.error("expected lifetime or type"); + } + _ => { + // test where_pred_for + // fn test() + // where + // for<'a> F: Fn(&'a str) + // { } + types::type_(p); + + if p.at(COLON) { + bounds(p); + } else { + p.error("expected colon"); + } + } + } + m.complete(p, WHERE_PRED); +} diff --git a/crates/ra_syntax/src/parsing/grammar/types.rs b/crates/ra_syntax/src/parsing/grammar/types.rs new file mode 100644 index 000000000..adc189a29 --- /dev/null +++ b/crates/ra_syntax/src/parsing/grammar/types.rs @@ -0,0 +1,278 @@ +use super::*; + +pub(super) const TYPE_FIRST: TokenSet = paths::PATH_FIRST.union(token_set![ + L_PAREN, EXCL, STAR, L_BRACK, AMP, UNDERSCORE, FN_KW, UNSAFE_KW, EXTERN_KW, FOR_KW, IMPL_KW, + DYN_KW, L_ANGLE, +]); + +const TYPE_RECOVERY_SET: TokenSet = token_set![R_PAREN, COMMA]; + +pub(super) fn type_(p: &mut Parser) { + type_with_bounds_cond(p, true); +} + +pub(super) fn type_no_bounds(p: &mut Parser) { + type_with_bounds_cond(p, false); +} + +fn type_with_bounds_cond(p: &mut Parser, allow_bounds: bool) { + match p.current() { + L_PAREN => paren_or_tuple_type(p), + EXCL => never_type(p), + STAR => pointer_type(p), + L_BRACK => array_or_slice_type(p), + AMP => reference_type(p), + UNDERSCORE => placeholder_type(p), + FN_KW | UNSAFE_KW | EXTERN_KW => fn_pointer_type(p), + FOR_KW => for_type(p), + IMPL_KW => impl_trait_type(p), + DYN_KW => dyn_trait_type(p), + // Some path types are not allowed to have bounds (no plus) + L_ANGLE => path_type_(p, allow_bounds), + _ if paths::is_path_start(p) => path_or_macro_type_(p, allow_bounds), + _ => { + p.err_recover("expected type", TYPE_RECOVERY_SET); + } + } +} + +pub(super) fn ascription(p: &mut Parser) { + p.expect(COLON); + type_(p) +} + +fn paren_or_tuple_type(p: &mut Parser) { + assert!(p.at(L_PAREN)); + let m = p.start(); + p.bump(); + let mut n_types: u32 = 0; + let mut trailing_comma: bool = false; + while !p.at(EOF) && !p.at(R_PAREN) { + n_types += 1; + type_(p); + if p.eat(COMMA) { + trailing_comma = true; + } else { + trailing_comma = false; + break; + } + } + p.expect(R_PAREN); + + let kind = if n_types == 1 && !trailing_comma { + // test paren_type + // type T = (i32); + PAREN_TYPE + } else { + // test unit_type + // type T = (); + + // test singleton_tuple_type + // type T = (i32,); + TUPLE_TYPE + }; + m.complete(p, kind); +} + +// test never_type +// type Never = !; +fn never_type(p: &mut Parser) { + assert!(p.at(EXCL)); + let m = p.start(); + p.bump(); + m.complete(p, NEVER_TYPE); +} + +fn pointer_type(p: &mut Parser) { + assert!(p.at(STAR)); + let m = p.start(); + p.bump(); + + match p.current() { + // test pointer_type_mut + // type M = *mut (); + // type C = *mut (); + MUT_KW | CONST_KW => p.bump(), + _ => { + // test_err pointer_type_no_mutability + // type T = *(); + p.error( + "expected mut or const in raw pointer type \ + (use `*mut T` or `*const T` as appropriate)", + ); + } + }; + + type_no_bounds(p); + m.complete(p, POINTER_TYPE); +} + +fn array_or_slice_type(p: &mut Parser) { + assert!(p.at(L_BRACK)); + let m = p.start(); + p.bump(); + + type_(p); + let kind = match p.current() { + // test slice_type + // type T = [()]; + R_BRACK => { + p.bump(); + SLICE_TYPE + } + + // test array_type + // type T = [(); 92]; + SEMI => { + p.bump(); + expressions::expr(p); + p.expect(R_BRACK); + ARRAY_TYPE + } + // test_err array_type_missing_semi + // type T = [() 92]; + _ => { + p.error("expected `;` or `]`"); + SLICE_TYPE + } + }; + m.complete(p, kind); +} + +// test reference_type; +// type A = &(); +// type B = &'static (); +// type C = &mut (); +fn reference_type(p: &mut Parser) { + assert!(p.at(AMP)); + let m = p.start(); + p.bump(); + p.eat(LIFETIME); + p.eat(MUT_KW); + type_no_bounds(p); + m.complete(p, REFERENCE_TYPE); +} + +// test placeholder_type +// type Placeholder = _; +fn placeholder_type(p: &mut Parser) { + assert!(p.at(UNDERSCORE)); + let m = p.start(); + p.bump(); + m.complete(p, PLACEHOLDER_TYPE); +} + +// test fn_pointer_type +// type A = fn(); +// type B = unsafe fn(); +// type C = unsafe extern "C" fn(); +fn fn_pointer_type(p: &mut Parser) { + let m = p.start(); + p.eat(UNSAFE_KW); + if p.at(EXTERN_KW) { + abi(p); + } + // test_err fn_pointer_type_missing_fn + // type F = unsafe (); + if !p.eat(FN_KW) { + m.abandon(p); + p.error("expected `fn`"); + return; + } + if p.at(L_PAREN) { + params::param_list_opt_patterns(p); + } else { + p.error("expected parameters") + } + // test fn_pointer_type_with_ret + // type F = fn() -> (); + opt_fn_ret_type(p); + m.complete(p, FN_POINTER_TYPE); +} + +pub(super) fn for_binder(p: &mut Parser) { + assert!(p.at(FOR_KW)); + p.bump(); + if p.at(L_ANGLE) { + type_params::opt_type_param_list(p); + } else { + p.error("expected `<`"); + } +} + +// test for_type +// type A = for<'a> fn() -> (); +pub(super) fn for_type(p: &mut Parser) { + assert!(p.at(FOR_KW)); + let m = p.start(); + for_binder(p); + match p.current() { + FN_KW | UNSAFE_KW | EXTERN_KW => fn_pointer_type(p), + _ if paths::is_path_start(p) => path_type_(p, false), + _ => p.error("expected a path"), + } + m.complete(p, FOR_TYPE); +} + +// test impl_trait_type +// type A = impl Iterator> + 'a; +fn impl_trait_type(p: &mut Parser) { + assert!(p.at(IMPL_KW)); + let m = p.start(); + p.bump(); + type_params::bounds_without_colon(p); + m.complete(p, IMPL_TRAIT_TYPE); +} + +// test dyn_trait_type +// type A = dyn Iterator> + 'a; +fn dyn_trait_type(p: &mut Parser) { + assert!(p.at(DYN_KW)); + let m = p.start(); + p.bump(); + type_params::bounds_without_colon(p); + m.complete(p, DYN_TRAIT_TYPE); +} + +// test path_type +// type A = Foo; +// type B = ::Foo; +// type C = self::Foo; +// type D = super::Foo; +pub(super) fn path_type(p: &mut Parser) { + path_type_(p, true) +} + +// test macro_call_type +// type A = foo!(); +// type B = crate::foo!(); +fn path_or_macro_type_(p: &mut Parser, allow_bounds: bool) { + assert!(paths::is_path_start(p) || p.at(L_ANGLE)); + let m = p.start(); + paths::type_path(p); + + let kind = if p.at(EXCL) { + items::macro_call_after_excl(p); + MACRO_CALL + } else { + PATH_TYPE + }; + + if allow_bounds && p.eat(PLUS) { + type_params::bounds_without_colon(p); + } + + m.complete(p, kind); +} + +pub(super) fn path_type_(p: &mut Parser, allow_bounds: bool) { + assert!(paths::is_path_start(p) || p.at(L_ANGLE)); + let m = p.start(); + paths::type_path(p); + // test path_type_with_bounds + // fn foo() -> Box {} + if allow_bounds && p.eat(PLUS) { + type_params::bounds_without_colon(p); + } + m.complete(p, PATH_TYPE); +} diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs new file mode 100644 index 000000000..f9362120e --- /dev/null +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -0,0 +1,215 @@ +mod classes; +mod comments; +mod numbers; +mod ptr; +mod strings; + +use crate::{ + SyntaxKind::{self, *}, + TextUnit, +}; + +use self::{ + classes::*, + comments::{scan_comment, scan_shebang}, + numbers::scan_number, + ptr::Ptr, + strings::{ + is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, scan_string, + }, +}; + +/// A token of Rust source. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Token { + /// The kind of token. + pub kind: SyntaxKind, + /// The length of the token. + pub len: TextUnit, +} + +/// Break a string up into its component tokens +pub fn tokenize(text: &str) -> Vec { + let mut text = text; + let mut acc = Vec::new(); + while !text.is_empty() { + let token = next_token(text); + acc.push(token); + let len: u32 = token.len.into(); + text = &text[len as usize..]; + } + acc +} + +/// Get the next token from a string +pub fn next_token(text: &str) -> Token { + assert!(!text.is_empty()); + let mut ptr = Ptr::new(text); + let c = ptr.bump().unwrap(); + let kind = next_token_inner(c, &mut ptr); + let len = ptr.into_len(); + Token { kind, len } +} + +fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { + if is_whitespace(c) { + ptr.bump_while(is_whitespace); + return WHITESPACE; + } + + match c { + '#' => { + if scan_shebang(ptr) { + return SHEBANG; + } + } + '/' => { + if let Some(kind) = scan_comment(ptr) { + return kind; + } + } + _ => (), + } + + let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.current(), ptr.nth(1)); + if ident_start { + return scan_ident(c, ptr); + } + + if is_dec_digit(c) { + let kind = scan_number(c, ptr); + scan_literal_suffix(ptr); + return kind; + } + + // One-byte tokens. + if let Some(kind) = SyntaxKind::from_char(c) { + return kind; + } + + match c { + // Multi-byte tokens. + '.' => { + return match (ptr.current(), ptr.nth(1)) { + (Some('.'), Some('.')) => { + ptr.bump(); + ptr.bump(); + DOTDOTDOT + } + (Some('.'), Some('=')) => { + ptr.bump(); + ptr.bump(); + DOTDOTEQ + } + (Some('.'), _) => { + ptr.bump(); + DOTDOT + } + _ => DOT, + }; + } + ':' => { + return match ptr.current() { + Some(':') => { + ptr.bump(); + COLONCOLON + } + _ => COLON, + }; + } + '=' => { + return match ptr.current() { + Some('=') => { + ptr.bump(); + EQEQ + } + Some('>') => { + ptr.bump(); + FAT_ARROW + } + _ => EQ, + }; + } + '!' => { + return match ptr.current() { + Some('=') => { + ptr.bump(); + NEQ + } + _ => EXCL, + }; + } + '-' => { + return if ptr.at('>') { + ptr.bump(); + THIN_ARROW + } else { + MINUS + }; + } + + // If the character is an ident start not followed by another single + // quote, then this is a lifetime name: + '\'' => { + return if ptr.at_p(is_ident_start) && !ptr.at_str("''") { + ptr.bump(); + while ptr.at_p(is_ident_continue) { + ptr.bump(); + } + // lifetimes shouldn't end with a single quote + // if we find one, then this is an invalid character literal + if ptr.at('\'') { + ptr.bump(); + return CHAR; + } + LIFETIME + } else { + scan_char(ptr); + scan_literal_suffix(ptr); + CHAR + }; + } + 'b' => { + let kind = scan_byte_char_or_string(ptr); + scan_literal_suffix(ptr); + return kind; + } + '"' => { + scan_string(ptr); + scan_literal_suffix(ptr); + return STRING; + } + 'r' => { + scan_raw_string(ptr); + scan_literal_suffix(ptr); + return RAW_STRING; + } + _ => (), + } + ERROR +} + +fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { + let is_raw = match (c, ptr.current()) { + ('r', Some('#')) => { + ptr.bump(); + true + } + ('_', Some(c)) if !is_ident_continue(c) => return UNDERSCORE, + _ => false, + }; + ptr.bump_while(is_ident_continue); + if !is_raw { + if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) { + return kind; + } + } + IDENT +} + +fn scan_literal_suffix(ptr: &mut Ptr) { + if ptr.at_p(is_ident_start) { + ptr.bump(); + } + ptr.bump_while(is_ident_continue); +} diff --git a/crates/ra_syntax/src/parsing/lexer/classes.rs b/crates/ra_syntax/src/parsing/lexer/classes.rs new file mode 100644 index 000000000..4235d2648 --- /dev/null +++ b/crates/ra_syntax/src/parsing/lexer/classes.rs @@ -0,0 +1,26 @@ +use unicode_xid::UnicodeXID; + +pub fn is_ident_start(c: char) -> bool { + (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || c == '_' + || (c > '\x7f' && UnicodeXID::is_xid_start(c)) +} + +pub fn is_ident_continue(c: char) -> bool { + (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') + || c == '_' + || (c > '\x7f' && UnicodeXID::is_xid_continue(c)) +} + +pub fn is_whitespace(c: char) -> bool { + //FIXME: use is_pattern_whitespace + //https://github.com/behnam/rust-unic/issues/192 + c.is_whitespace() +} + +pub fn is_dec_digit(c: char) -> bool { + '0' <= c && c <= '9' +} diff --git a/crates/ra_syntax/src/parsing/lexer/comments.rs b/crates/ra_syntax/src/parsing/lexer/comments.rs new file mode 100644 index 000000000..8bbbe659b --- /dev/null +++ b/crates/ra_syntax/src/parsing/lexer/comments.rs @@ -0,0 +1,57 @@ +use crate::parsing::lexer::ptr::Ptr; + +use crate::SyntaxKind::{self, *}; + +pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { + if ptr.at_str("!/") { + ptr.bump(); + ptr.bump(); + bump_until_eol(ptr); + true + } else { + false + } +} + +fn scan_block_comment(ptr: &mut Ptr) -> Option { + if ptr.at('*') { + ptr.bump(); + let mut depth: u32 = 1; + while depth > 0 { + if ptr.at_str("*/") { + depth -= 1; + ptr.bump(); + ptr.bump(); + } else if ptr.at_str("/*") { + depth += 1; + ptr.bump(); + ptr.bump(); + } else if ptr.bump().is_none() { + break; + } + } + Some(COMMENT) + } else { + None + } +} + +pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option { + if ptr.at('/') { + bump_until_eol(ptr); + Some(COMMENT) + } else { + scan_block_comment(ptr) + } +} + +fn bump_until_eol(ptr: &mut Ptr) { + loop { + if ptr.at('\n') || ptr.at_str("\r\n") { + return; + } + if ptr.bump().is_none() { + break; + } + } +} diff --git a/crates/ra_syntax/src/parsing/lexer/numbers.rs b/crates/ra_syntax/src/parsing/lexer/numbers.rs new file mode 100644 index 000000000..7f6abe1d5 --- /dev/null +++ b/crates/ra_syntax/src/parsing/lexer/numbers.rs @@ -0,0 +1,69 @@ +use crate::parsing::lexer::{ + ptr::Ptr, + classes::*, +}; + +use crate::SyntaxKind::{self, *}; + +pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { + if c == '0' { + match ptr.current().unwrap_or('\0') { + 'b' | 'o' => { + ptr.bump(); + scan_digits(ptr, false); + } + 'x' => { + ptr.bump(); + scan_digits(ptr, true); + } + '0'...'9' | '_' | '.' | 'e' | 'E' => { + scan_digits(ptr, true); + } + _ => return INT_NUMBER, + } + } else { + scan_digits(ptr, false); + } + + // might be a float, but don't be greedy if this is actually an + // integer literal followed by field/method access or a range pattern + // (`0..2` and `12.foo()`) + if ptr.at('.') && !(ptr.at_str("..") || ptr.nth_is_p(1, is_ident_start)) { + // might have stuff after the ., and if it does, it needs to start + // with a number + ptr.bump(); + scan_digits(ptr, false); + scan_float_exponent(ptr); + return FLOAT_NUMBER; + } + // it might be a float if it has an exponent + if ptr.at('e') || ptr.at('E') { + scan_float_exponent(ptr); + return FLOAT_NUMBER; + } + INT_NUMBER +} + +fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { + while let Some(c) = ptr.current() { + match c { + '_' | '0'...'9' => { + ptr.bump(); + } + 'a'...'f' | 'A'...'F' if allow_hex => { + ptr.bump(); + } + _ => return, + } + } +} + +fn scan_float_exponent(ptr: &mut Ptr) { + if ptr.at('e') || ptr.at('E') { + ptr.bump(); + if ptr.at('-') || ptr.at('+') { + ptr.bump(); + } + scan_digits(ptr, false); + } +} diff --git a/crates/ra_syntax/src/parsing/lexer/ptr.rs b/crates/ra_syntax/src/parsing/lexer/ptr.rs new file mode 100644 index 000000000..c341c4176 --- /dev/null +++ b/crates/ra_syntax/src/parsing/lexer/ptr.rs @@ -0,0 +1,162 @@ +use crate::TextUnit; + +use std::str::Chars; + +/// A simple view into the characters of a string. +pub(crate) struct Ptr<'s> { + text: &'s str, + len: TextUnit, +} + +impl<'s> Ptr<'s> { + /// Creates a new `Ptr` from a string. + pub fn new(text: &'s str) -> Ptr<'s> { + Ptr { text, len: 0.into() } + } + + /// Gets the length of the remaining string. + pub fn into_len(self) -> TextUnit { + self.len + } + + /// Gets the current character, if one exists. + pub fn current(&self) -> Option { + self.chars().next() + } + + /// Gets the nth character from the current. + /// For example, 0 will return the current character, 1 will return the next, etc. + pub fn nth(&self, n: u32) -> Option { + self.chars().nth(n as usize) + } + + /// Checks whether the current character is `c`. + pub fn at(&self, c: char) -> bool { + self.current() == Some(c) + } + + /// Checks whether the next characters match `s`. + pub fn at_str(&self, s: &str) -> bool { + let chars = self.chars(); + chars.as_str().starts_with(s) + } + + /// Checks whether the current character satisfies the predicate `p`. + pub fn at_p bool>(&self, p: P) -> bool { + self.current().map(p) == Some(true) + } + + /// Checks whether the nth character satisfies the predicate `p`. + pub fn nth_is_p bool>(&self, n: u32, p: P) -> bool { + self.nth(n).map(p) == Some(true) + } + + /// Moves to the next character. + pub fn bump(&mut self) -> Option { + let ch = self.chars().next()?; + self.len += TextUnit::of_char(ch); + Some(ch) + } + + /// Moves to the next character as long as `pred` is satisfied. + pub fn bump_while bool>(&mut self, pred: F) { + loop { + match self.current() { + Some(c) if pred(c) => { + self.bump(); + } + _ => return, + } + } + } + + /// Returns the text up to the current point. + pub fn current_token_text(&self) -> &str { + let len: u32 = self.len.into(); + &self.text[..len as usize] + } + + /// Returns an iterator over the remaining characters. + fn chars(&self) -> Chars { + let len: u32 = self.len.into(); + self.text[len as usize..].chars() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_current() { + let ptr = Ptr::new("test"); + assert_eq!(ptr.current(), Some('t')); + } + + #[test] + fn test_nth() { + let ptr = Ptr::new("test"); + assert_eq!(ptr.nth(0), Some('t')); + assert_eq!(ptr.nth(1), Some('e')); + assert_eq!(ptr.nth(2), Some('s')); + assert_eq!(ptr.nth(3), Some('t')); + assert_eq!(ptr.nth(4), None); + } + + #[test] + fn test_at() { + let ptr = Ptr::new("test"); + assert!(ptr.at('t')); + assert!(!ptr.at('a')); + } + + #[test] + fn test_at_str() { + let ptr = Ptr::new("test"); + assert!(ptr.at_str("t")); + assert!(ptr.at_str("te")); + assert!(ptr.at_str("test")); + assert!(!ptr.at_str("tests")); + assert!(!ptr.at_str("rust")); + } + + #[test] + fn test_at_p() { + let ptr = Ptr::new("test"); + assert!(ptr.at_p(|c| c == 't')); + assert!(!ptr.at_p(|c| c == 'e')); + } + + #[test] + fn test_nth_is_p() { + let ptr = Ptr::new("test"); + assert!(ptr.nth_is_p(0, |c| c == 't')); + assert!(!ptr.nth_is_p(1, |c| c == 't')); + assert!(ptr.nth_is_p(3, |c| c == 't')); + assert!(!ptr.nth_is_p(150, |c| c == 't')); + } + + #[test] + fn test_bump() { + let mut ptr = Ptr::new("test"); + assert_eq!(ptr.current(), Some('t')); + ptr.bump(); + assert_eq!(ptr.current(), Some('e')); + ptr.bump(); + assert_eq!(ptr.current(), Some('s')); + ptr.bump(); + assert_eq!(ptr.current(), Some('t')); + ptr.bump(); + assert_eq!(ptr.current(), None); + ptr.bump(); + assert_eq!(ptr.current(), None); + } + + #[test] + fn test_bump_while() { + let mut ptr = Ptr::new("test"); + assert_eq!(ptr.current(), Some('t')); + ptr.bump_while(|c| c != 's'); + assert_eq!(ptr.current(), Some('s')); + } +} diff --git a/crates/ra_syntax/src/parsing/lexer/strings.rs b/crates/ra_syntax/src/parsing/lexer/strings.rs new file mode 100644 index 000000000..f74acff9e --- /dev/null +++ b/crates/ra_syntax/src/parsing/lexer/strings.rs @@ -0,0 +1,112 @@ +use crate::{ + parsing::lexer::ptr::Ptr, + SyntaxKind::{self, *}, +}; + +pub(crate) fn is_string_literal_start(c: char, c1: Option, c2: Option) -> bool { + match (c, c1, c2) { + ('r', Some('"'), _) + | ('r', Some('#'), Some('"')) + | ('r', Some('#'), Some('#')) + | ('b', Some('"'), _) + | ('b', Some('\''), _) + | ('b', Some('r'), Some('"')) + | ('b', Some('r'), Some('#')) => true, + _ => false, + } +} + +pub(crate) fn scan_char(ptr: &mut Ptr) { + while let Some(c) = ptr.current() { + match c { + '\\' => { + ptr.bump(); + if ptr.at('\\') || ptr.at('\'') { + ptr.bump(); + } + } + '\'' => { + ptr.bump(); + return; + } + '\n' => return, + _ => { + ptr.bump(); + } + } + } +} + +pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind { + // unwrapping and not-exhaustive match are ok + // because of string_literal_start + let c = ptr.bump().unwrap(); + match c { + '\'' => { + scan_byte(ptr); + BYTE + } + '"' => { + scan_byte_string(ptr); + BYTE_STRING + } + 'r' => { + scan_raw_string(ptr); + RAW_BYTE_STRING + } + _ => unreachable!(), + } +} + +pub(crate) fn scan_string(ptr: &mut Ptr) { + while let Some(c) = ptr.current() { + match c { + '\\' => { + ptr.bump(); + if ptr.at('\\') || ptr.at('"') { + ptr.bump(); + } + } + '"' => { + ptr.bump(); + return; + } + _ => { + ptr.bump(); + } + } + } +} + +pub(crate) fn scan_raw_string(ptr: &mut Ptr) { + let mut hashes = 0; + while ptr.at('#') { + hashes += 1; + ptr.bump(); + } + if !ptr.at('"') { + return; + } + ptr.bump(); + + while let Some(c) = ptr.bump() { + if c == '"' { + let mut hashes_left = hashes; + while ptr.at('#') && hashes_left > 0 { + hashes_left -= 1; + ptr.bump(); + } + if hashes_left == 0 { + return; + } + } + } +} + +fn scan_byte(ptr: &mut Ptr) { + scan_char(ptr) +} + +fn scan_byte_string(ptr: &mut Ptr) { + scan_string(ptr) +} diff --git a/crates/ra_syntax/src/parsing/parser_api.rs b/crates/ra_syntax/src/parsing/parser_api.rs new file mode 100644 index 000000000..781c407de --- /dev/null +++ b/crates/ra_syntax/src/parsing/parser_api.rs @@ -0,0 +1,195 @@ +use drop_bomb::DropBomb; + +use crate::{ + SyntaxKind::{self, ERROR}, + parsing::{ + token_set::TokenSet, + parser_impl::ParserImpl + }, +}; + +/// `Parser` struct provides the low-level API for +/// navigating through the stream of tokens and +/// constructing the parse tree. The actual parsing +/// happens in the `grammar` module. +/// +/// However, the result of this `Parser` is not a real +/// tree, but rather a flat stream of events of the form +/// "start expression, consume number literal, +/// finish expression". See `Event` docs for more. +pub(crate) struct Parser<'t>(pub(super) ParserImpl<'t>); + +impl<'t> Parser<'t> { + /// Returns the kind of the current token. + /// If parser has already reached the end of input, + /// the special `EOF` kind is returned. + pub(crate) fn current(&self) -> SyntaxKind { + self.nth(0) + } + + /// Returns the kinds of the current two tokens, if they are not separated + /// by trivia. + /// + /// Useful for parsing things like `>>`. + pub(crate) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { + self.0.current2() + } + + /// Returns the kinds of the current three tokens, if they are not separated + /// by trivia. + /// + /// Useful for parsing things like `=>>`. + pub(crate) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { + self.0.current3() + } + + /// Lookahead operation: returns the kind of the next nth + /// token. + pub(crate) fn nth(&self, n: u32) -> SyntaxKind { + self.0.nth(n) + } + + /// Checks if the current token is `kind`. + pub(crate) fn at(&self, kind: SyntaxKind) -> bool { + self.current() == kind + } + + /// Checks if the current token is in `kinds`. + pub(crate) fn at_ts(&self, kinds: TokenSet) -> bool { + kinds.contains(self.current()) + } + + /// Checks if the current token is contextual keyword with text `t`. + pub(crate) fn at_contextual_kw(&self, t: &str) -> bool { + self.0.at_kw(t) + } + + /// Starts a new node in the syntax tree. All nodes and tokens + /// consumed between the `start` and the corresponding `Marker::complete` + /// belong to the same node. + pub(crate) fn start(&mut self) -> Marker { + Marker::new(self.0.start()) + } + + /// Advances the parser by one token unconditionally. + pub(crate) fn bump(&mut self) { + self.0.bump(); + } + + /// Advances the parser by one token, remapping its kind. + /// This is useful to create contextual keywords from + /// identifiers. For example, the lexer creates an `union` + /// *identifier* token, but the parser remaps it to the + /// `union` keyword, and keyword is what ends up in the + /// final tree. + pub(crate) fn bump_remap(&mut self, kind: SyntaxKind) { + self.0.bump_remap(kind); + } + + /// Advances the parser by `n` tokens, remapping its kind. + /// This is useful to create compound tokens from parts. For + /// example, an `<<` token is two consecutive remapped `<` tokens + pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { + self.0.bump_compound(kind, n); + } + + /// Emit error with the `message` + /// TODO: this should be much more fancy and support + /// structured errors with spans and notes, like rustc + /// does. + pub(crate) fn error>(&mut self, message: T) { + self.0.error(message.into()) + } + + /// Consume the next token if `kind` matches. + pub(crate) fn eat(&mut self, kind: SyntaxKind) -> bool { + if !self.at(kind) { + return false; + } + self.bump(); + true + } + + /// Consume the next token if it is `kind` or emit an error + /// otherwise. + pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { + if self.eat(kind) { + return true; + } + self.error(format!("expected {:?}", kind)); + false + } + + /// Create an error node and consume the next token. + pub(crate) fn err_and_bump(&mut self, message: &str) { + self.err_recover(message, TokenSet::empty()); + } + + /// Create an error node and consume the next token. + pub(crate) fn err_recover(&mut self, message: &str, recovery: TokenSet) { + if self.at(SyntaxKind::L_CURLY) || self.at(SyntaxKind::R_CURLY) || self.at_ts(recovery) { + self.error(message); + } else { + let m = self.start(); + self.error(message); + self.bump(); + m.complete(self, ERROR); + }; + } +} + +/// See `Parser::start`. +pub(crate) struct Marker { + pos: u32, + bomb: DropBomb, +} + +impl Marker { + fn new(pos: u32) -> Marker { + Marker { pos, bomb: DropBomb::new("Marker must be either completed or abandoned") } + } + + /// Finishes the syntax tree node and assigns `kind` to it, + /// and mark the create a `CompletedMarker` for possible future + /// operation like `.precede()` to deal with forward_parent. + pub(crate) fn complete(mut self, p: &mut Parser, kind: SyntaxKind) -> CompletedMarker { + self.bomb.defuse(); + p.0.complete(self.pos, kind); + CompletedMarker::new(self.pos, kind) + } + + /// Abandons the syntax tree node. All its children + /// are attached to its parent instead. + pub(crate) fn abandon(mut self, p: &mut Parser) { + self.bomb.defuse(); + p.0.abandon(self.pos); + } +} + +pub(crate) struct CompletedMarker(u32, SyntaxKind); + +impl CompletedMarker { + fn new(pos: u32, kind: SyntaxKind) -> Self { + CompletedMarker(pos, kind) + } + + /// This method allows to create a new node which starts + /// *before* the current one. That is, parser could start + /// node `A`, then complete it, and then after parsing the + /// whole `A`, decide that it should have started some node + /// `B` before starting `A`. `precede` allows to do exactly + /// that. See also docs about `forward_parent` in `Event::Start`. + /// + /// Given completed events `[START, FINISH]` and its corresponding + /// `CompletedMarker(pos: 0, _)`. + /// Append a new `START` events as `[START, FINISH, NEWSTART]`, + /// then mark `NEWSTART` as `START`'s parent with saving its relative + /// distance to `NEWSTART` into forward_parent(=2 in this case); + pub(crate) fn precede(self, p: &mut Parser) -> Marker { + Marker::new(p.0.precede(self.0)) + } + + pub(crate) fn kind(&self) -> SyntaxKind { + self.1 + } +} diff --git a/crates/ra_syntax/src/parsing/parser_impl.rs b/crates/ra_syntax/src/parsing/parser_impl.rs new file mode 100644 index 000000000..c639d83e8 --- /dev/null +++ b/crates/ra_syntax/src/parsing/parser_impl.rs @@ -0,0 +1,199 @@ +mod event; +mod input; + +use std::cell::Cell; + +use crate::{ + SmolStr, + syntax_node::syntax_error::{ParseError, SyntaxError}, + parsing::{ + lexer::Token, + parser_api::Parser, + parser_impl::{ + event::{Event, EventProcessor}, + input::{InputPosition, ParserInput}, + }, +}}; + +use crate::SyntaxKind::{self, EOF, TOMBSTONE}; + +pub(crate) trait Sink { + type Tree; + + /// Adds new leaf to the current branch. + fn leaf(&mut self, kind: SyntaxKind, text: SmolStr); + + /// Start new branch and make it current. + fn start_branch(&mut self, kind: SyntaxKind); + + /// Finish current branch and restore previous + /// branch as current. + fn finish_branch(&mut self); + + fn error(&mut self, error: SyntaxError); + + /// Complete tree building. Make sure that + /// `start_branch` and `finish_branch` calls + /// are paired! + fn finish(self) -> Self::Tree; +} + +/// Parse a sequence of tokens into the representative node tree +pub(crate) fn parse_with( + sink: S, + text: &str, + tokens: &[Token], + parser: fn(&mut Parser), +) -> S::Tree { + let mut events = { + let input = input::ParserInput::new(text, tokens); + let parser_impl = ParserImpl::new(&input); + let mut parser_api = Parser(parser_impl); + parser(&mut parser_api); + parser_api.0.into_events() + }; + EventProcessor::new(sink, text, tokens, &mut events).process().finish() +} + +/// Implementation details of `Parser`, extracted +/// to a separate struct in order not to pollute +/// the public API of the `Parser`. +pub(crate) struct ParserImpl<'t> { + parser_input: &'t ParserInput<'t>, + pos: InputPosition, + events: Vec, + steps: Cell, +} + +impl<'t> ParserImpl<'t> { + fn new(inp: &'t ParserInput<'t>) -> ParserImpl<'t> { + ParserImpl { + parser_input: inp, + pos: InputPosition::new(), + events: Vec::new(), + steps: Cell::new(0), + } + } + + fn into_events(self) -> Vec { + assert_eq!(self.nth(0), EOF); + self.events + } + + pub(super) fn current2(&self) -> Option<(SyntaxKind, SyntaxKind)> { + let c1 = self.parser_input.kind(self.pos); + let c2 = self.parser_input.kind(self.pos + 1); + if self.parser_input.token_start_at(self.pos + 1) + == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) + { + Some((c1, c2)) + } else { + None + } + } + + pub(super) fn current3(&self) -> Option<(SyntaxKind, SyntaxKind, SyntaxKind)> { + let c1 = self.parser_input.kind(self.pos); + let c2 = self.parser_input.kind(self.pos + 1); + let c3 = self.parser_input.kind(self.pos + 2); + if self.parser_input.token_start_at(self.pos + 1) + == self.parser_input.token_start_at(self.pos) + self.parser_input.token_len(self.pos) + && self.parser_input.token_start_at(self.pos + 2) + == self.parser_input.token_start_at(self.pos + 1) + + self.parser_input.token_len(self.pos + 1) + { + Some((c1, c2, c3)) + } else { + None + } + } + + /// Get the syntax kind of the nth token. + pub(super) fn nth(&self, n: u32) -> SyntaxKind { + let steps = self.steps.get(); + assert!(steps <= 10_000_000, "the parser seems stuck"); + self.steps.set(steps + 1); + + self.parser_input.kind(self.pos + n) + } + + pub(super) fn at_kw(&self, t: &str) -> bool { + self.parser_input.token_text(self.pos) == t + } + + /// Start parsing right behind the last event. + pub(super) fn start(&mut self) -> u32 { + let pos = self.events.len() as u32; + self.push_event(Event::tombstone()); + pos + } + + /// Advances the parser by one token unconditionally. + pub(super) fn bump(&mut self) { + let kind = self.nth(0); + if kind == EOF { + return; + } + self.do_bump(kind, 1); + } + + pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { + if self.nth(0) == EOF { + // TODO: panic!? + return; + } + self.do_bump(kind, 1); + } + + pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { + self.do_bump(kind, n); + } + + fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { + self.pos += u32::from(n_raw_tokens); + self.push_event(Event::Token { kind, n_raw_tokens }); + } + + /// Append one Error event to the back of events. + pub(super) fn error(&mut self, msg: String) { + self.push_event(Event::Error { msg: ParseError(msg) }) + } + + /// Complete an event with appending a `Finish` event. + pub(super) fn complete(&mut self, pos: u32, kind: SyntaxKind) { + match self.events[pos as usize] { + Event::Start { kind: ref mut slot, .. } => { + *slot = kind; + } + _ => unreachable!(), + } + self.push_event(Event::Finish); + } + + /// Ignore the dummy `Start` event. + pub(super) fn abandon(&mut self, pos: u32) { + let idx = pos as usize; + if idx == self.events.len() - 1 { + match self.events.pop() { + Some(Event::Start { kind: TOMBSTONE, forward_parent: None }) => (), + _ => unreachable!(), + } + } + } + + /// Save the relative distance of a completed event to its forward_parent. + pub(super) fn precede(&mut self, pos: u32) -> u32 { + let new_pos = self.start(); + match self.events[pos as usize] { + Event::Start { ref mut forward_parent, .. } => { + *forward_parent = Some(new_pos - pos); + } + _ => unreachable!(), + } + new_pos + } + + fn push_event(&mut self, event: Event) { + self.events.push(event) + } +} diff --git a/crates/ra_syntax/src/parsing/parser_impl/event.rs b/crates/ra_syntax/src/parsing/parser_impl/event.rs new file mode 100644 index 000000000..fb43e19cc --- /dev/null +++ b/crates/ra_syntax/src/parsing/parser_impl/event.rs @@ -0,0 +1,254 @@ +//! This module provides a way to construct a `File`. +//! It is intended to be completely decoupled from the +//! parser, so as to allow to evolve the tree representation +//! and the parser algorithm independently. +//! +//! The `Sink` trait is the bridge between the parser and the +//! tree builder: the parser produces a stream of events like +//! `start node`, `finish node`, and `FileBuilder` converts +//! this stream to a real tree. +use std::mem; + +use crate::{ + SmolStr, + SyntaxKind::{self, *}, + TextRange, TextUnit, + syntax_node::syntax_error::{ + ParseError, + SyntaxError, + SyntaxErrorKind, + }, + parsing::{ + lexer::Token, + parser_impl::Sink, + }, +}; + +/// `Parser` produces a flat list of `Event`s. +/// They are converted to a tree-structure in +/// a separate pass, via `TreeBuilder`. +#[derive(Debug)] +pub(crate) enum Event { + /// This event signifies the start of the node. + /// It should be either abandoned (in which case the + /// `kind` is `TOMBSTONE`, and the event is ignored), + /// or completed via a `Finish` event. + /// + /// All tokens between a `Start` and a `Finish` would + /// become the children of the respective node. + /// + /// For left-recursive syntactic constructs, the parser produces + /// a child node before it sees a parent. `forward_parent` + /// saves the position of current event's parent. + /// + /// Consider this path + /// + /// foo::bar + /// + /// The events for it would look like this: + /// + /// + /// START(PATH) IDENT('foo') FINISH START(PATH) COLONCOLON IDENT('bar') FINISH + /// | /\ + /// | | + /// +------forward-parent------+ + /// + /// And the tree would look like this + /// + /// +--PATH---------+ + /// | | | + /// | | | + /// | '::' 'bar' + /// | + /// PATH + /// | + /// 'foo' + /// + /// See also `CompletedMarker::precede`. + Start { + kind: SyntaxKind, + forward_parent: Option, + }, + + /// Complete the previous `Start` event + Finish, + + /// Produce a single leaf-element. + /// `n_raw_tokens` is used to glue complex contextual tokens. + /// For example, lexer tokenizes `>>` as `>`, `>`, and + /// `n_raw_tokens = 2` is used to produced a single `>>`. + Token { + kind: SyntaxKind, + n_raw_tokens: u8, + }, + + Error { + msg: ParseError, + }, +} + +impl Event { + pub(crate) fn tombstone() -> Self { + Event::Start { kind: TOMBSTONE, forward_parent: None } + } +} + +pub(super) struct EventProcessor<'a, S: Sink> { + sink: S, + text_pos: TextUnit, + text: &'a str, + token_pos: usize, + tokens: &'a [Token], + events: &'a mut [Event], +} + +impl<'a, S: Sink> EventProcessor<'a, S> { + pub(super) fn new( + sink: S, + text: &'a str, + tokens: &'a [Token], + events: &'a mut [Event], + ) -> EventProcessor<'a, S> { + EventProcessor { sink, text_pos: 0.into(), text, token_pos: 0, tokens, events } + } + + /// Generate the syntax tree with the control of events. + pub(super) fn process(mut self) -> S { + let mut forward_parents = Vec::new(); + + for i in 0..self.events.len() { + match mem::replace(&mut self.events[i], Event::tombstone()) { + Event::Start { kind: TOMBSTONE, .. } => (), + + Event::Start { kind, forward_parent } => { + // For events[A, B, C], B is A's forward_parent, C is B's forward_parent, + // in the normal control flow, the parent-child relation: `A -> B -> C`, + // while with the magic forward_parent, it writes: `C <- B <- A`. + + // append `A` into parents. + forward_parents.push(kind); + let mut idx = i; + let mut fp = forward_parent; + while let Some(fwd) = fp { + idx += fwd as usize; + // append `A`'s forward_parent `B` + fp = match mem::replace(&mut self.events[idx], Event::tombstone()) { + Event::Start { kind, forward_parent } => { + forward_parents.push(kind); + forward_parent + } + _ => unreachable!(), + }; + // append `B`'s forward_parent `C` in the next stage. + } + + for kind in forward_parents.drain(..).rev() { + self.start(kind); + } + } + Event::Finish => { + let is_last = i == self.events.len() - 1; + self.finish(is_last); + } + Event::Token { kind, n_raw_tokens } => { + self.eat_trivias(); + let n_raw_tokens = n_raw_tokens as usize; + let len = self.tokens[self.token_pos..self.token_pos + n_raw_tokens] + .iter() + .map(|it| it.len) + .sum::(); + self.leaf(kind, len, n_raw_tokens); + } + Event::Error { msg } => self + .sink + .error(SyntaxError::new(SyntaxErrorKind::ParseError(msg), self.text_pos)), + } + } + self.sink + } + + /// Add the node into syntax tree but discard the comments/whitespaces. + fn start(&mut self, kind: SyntaxKind) { + if kind == SOURCE_FILE { + self.sink.start_branch(kind); + return; + } + let n_trivias = + self.tokens[self.token_pos..].iter().take_while(|it| it.kind.is_trivia()).count(); + let leading_trivias = &self.tokens[self.token_pos..self.token_pos + n_trivias]; + let mut trivia_end = + self.text_pos + leading_trivias.iter().map(|it| it.len).sum::(); + + let n_attached_trivias = { + let leading_trivias = leading_trivias.iter().rev().map(|it| { + let next_end = trivia_end - it.len; + let range = TextRange::from_to(next_end, trivia_end); + trivia_end = next_end; + (it.kind, &self.text[range]) + }); + n_attached_trivias(kind, leading_trivias) + }; + self.eat_n_trivias(n_trivias - n_attached_trivias); + self.sink.start_branch(kind); + self.eat_n_trivias(n_attached_trivias); + } + + fn finish(&mut self, is_last: bool) { + if is_last { + self.eat_trivias() + } + self.sink.finish_branch(); + } + + fn eat_trivias(&mut self) { + while let Some(&token) = self.tokens.get(self.token_pos) { + if !token.kind.is_trivia() { + break; + } + self.leaf(token.kind, token.len, 1); + } + } + + fn eat_n_trivias(&mut self, n: usize) { + for _ in 0..n { + let token = self.tokens[self.token_pos]; + assert!(token.kind.is_trivia()); + self.leaf(token.kind, token.len, 1); + } + } + + fn leaf(&mut self, kind: SyntaxKind, len: TextUnit, n_tokens: usize) { + let range = TextRange::offset_len(self.text_pos, len); + let text: SmolStr = self.text[range].into(); + self.text_pos += len; + self.token_pos += n_tokens; + self.sink.leaf(kind, text); + } +} + +fn n_attached_trivias<'a>( + kind: SyntaxKind, + trivias: impl Iterator, +) -> usize { + match kind { + CONST_DEF | TYPE_DEF | STRUCT_DEF | ENUM_DEF | ENUM_VARIANT | FN_DEF | TRAIT_DEF + | MODULE | NAMED_FIELD_DEF => { + let mut res = 0; + for (i, (kind, text)) in trivias.enumerate() { + match kind { + WHITESPACE => { + if text.contains("\n\n") { + break; + } + } + COMMENT => { + res = i + 1; + } + _ => (), + } + } + res + } + _ => 0, + } +} diff --git a/crates/ra_syntax/src/parsing/parser_impl/input.rs b/crates/ra_syntax/src/parsing/parser_impl/input.rs new file mode 100644 index 000000000..275d94918 --- /dev/null +++ b/crates/ra_syntax/src/parsing/parser_impl/input.rs @@ -0,0 +1,104 @@ +use crate::{ + SyntaxKind, SyntaxKind::EOF, TextRange, TextUnit, + parsing::lexer::Token, +}; + +use std::ops::{Add, AddAssign}; + +pub(crate) struct ParserInput<'t> { + text: &'t str, + /// start position of each token(expect whitespace and comment) + /// ```non-rust + /// struct Foo; + /// ^------^--- + /// | | ^- + /// 0 7 10 + /// ``` + /// (token, start_offset): `[(struct, 0), (Foo, 7), (;, 10)]` + start_offsets: Vec, + /// non-whitespace/comment tokens + /// ```non-rust + /// struct Foo {} + /// ^^^^^^ ^^^ ^^ + /// ``` + /// tokens: `[struct, Foo, {, }]` + tokens: Vec, +} + +impl<'t> ParserInput<'t> { + /// Generate input from tokens(expect comment and whitespace). + pub fn new(text: &'t str, raw_tokens: &'t [Token]) -> ParserInput<'t> { + let mut tokens = Vec::new(); + let mut start_offsets = Vec::new(); + let mut len = 0.into(); + for &token in raw_tokens.iter() { + if !token.kind.is_trivia() { + tokens.push(token); + start_offsets.push(len); + } + len += token.len; + } + + ParserInput { text, start_offsets, tokens } + } + + /// Get the syntax kind of token at given input position. + pub fn kind(&self, pos: InputPosition) -> SyntaxKind { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return EOF; + } + self.tokens[idx].kind + } + + /// Get the length of a token at given input position. + pub fn token_len(&self, pos: InputPosition) -> TextUnit { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return 0.into(); + } + self.tokens[idx].len + } + + /// Get the start position of a taken at given input position. + pub fn token_start_at(&self, pos: InputPosition) -> TextUnit { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return 0.into(); + } + self.start_offsets[idx] + } + + /// Get the raw text of a token at given input position. + pub fn token_text(&self, pos: InputPosition) -> &'t str { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return ""; + } + let range = TextRange::offset_len(self.start_offsets[idx], self.tokens[idx].len); + &self.text[range] + } +} + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub(crate) struct InputPosition(u32); + +impl InputPosition { + pub fn new() -> Self { + InputPosition(0) + } +} + +impl Add for InputPosition { + type Output = InputPosition; + + fn add(self, rhs: u32) -> InputPosition { + InputPosition(self.0 + rhs) + } +} + +impl AddAssign for InputPosition { + fn add_assign(&mut self, rhs: u32) { + self.0 += rhs + } +} diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs new file mode 100644 index 000000000..994e7e212 --- /dev/null +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -0,0 +1,370 @@ +use crate::{ + SyntaxKind::*, TextRange, TextUnit, + algo, + syntax_node::{GreenNode, SyntaxError, SyntaxNode}, + parsing::{ + grammar, + parser_impl, + builder::GreenBuilder, + parser_api::Parser, + lexer::{tokenize, Token}, + } +}; + +use ra_text_edit::AtomTextEdit; + +pub(crate) fn incremental_reparse( + node: &SyntaxNode, + edit: &AtomTextEdit, + errors: Vec, +) -> Option<(GreenNode, Vec)> { + let (node, green, new_errors) = + reparse_leaf(node, &edit).or_else(|| reparse_block(node, &edit))?; + let green_root = node.replace_with(green); + let errors = merge_errors(errors, new_errors, node, edit); + Some((green_root, errors)) +} + +fn reparse_leaf<'node>( + node: &'node SyntaxNode, + edit: &AtomTextEdit, +) -> Option<(&'node SyntaxNode, GreenNode, Vec)> { + let node = algo::find_covering_node(node, edit.delete); + match node.kind() { + WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { + let text = get_text_after_edit(node, &edit); + let tokens = tokenize(&text); + let token = match tokens[..] { + [token] if token.kind == node.kind() => token, + _ => return None, + }; + + if token.kind == IDENT && is_contextual_kw(&text) { + return None; + } + + let green = GreenNode::new_leaf(node.kind(), text.into()); + let new_errors = vec![]; + Some((node, green, new_errors)) + } + _ => None, + } +} + +fn reparse_block<'node>( + node: &'node SyntaxNode, + edit: &AtomTextEdit, +) -> Option<(&'node SyntaxNode, GreenNode, Vec)> { + let (node, reparser) = find_reparsable_node(node, edit.delete)?; + let text = get_text_after_edit(node, &edit); + let tokens = tokenize(&text); + if !is_balanced(&tokens) { + return None; + } + let (green, new_errors) = + parser_impl::parse_with(GreenBuilder::new(), &text, &tokens, reparser); + Some((node, green, new_errors)) +} + +fn get_text_after_edit(node: &SyntaxNode, edit: &AtomTextEdit) -> String { + let edit = AtomTextEdit::replace(edit.delete - node.range().start(), edit.insert.clone()); + edit.apply(node.text().to_string()) +} + +fn is_contextual_kw(text: &str) -> bool { + match text { + "auto" | "default" | "union" => true, + _ => false, + } +} + +type ParseFn = fn(&mut Parser); +fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(&SyntaxNode, ParseFn)> { + let node = algo::find_covering_node(node, range); + return node.ancestors().filter_map(|node| reparser(node).map(|r| (node, r))).next(); + + fn reparser(node: &SyntaxNode) -> Option { + let res = match node.kind() { + BLOCK => grammar::block, + NAMED_FIELD_DEF_LIST => grammar::named_field_def_list, + NAMED_FIELD_LIST => grammar::named_field_list, + ENUM_VARIANT_LIST => grammar::enum_variant_list, + MATCH_ARM_LIST => grammar::match_arm_list, + USE_TREE_LIST => grammar::use_tree_list, + EXTERN_ITEM_LIST => grammar::extern_item_list, + TOKEN_TREE if node.first_child().unwrap().kind() == L_CURLY => grammar::token_tree, + ITEM_LIST => { + let parent = node.parent().unwrap(); + match parent.kind() { + IMPL_BLOCK => grammar::impl_item_list, + TRAIT_DEF => grammar::trait_item_list, + MODULE => grammar::mod_item_list, + _ => return None, + } + } + _ => return None, + }; + Some(res) + } +} + +fn is_balanced(tokens: &[Token]) -> bool { + if tokens.is_empty() + || tokens.first().unwrap().kind != L_CURLY + || tokens.last().unwrap().kind != R_CURLY + { + return false; + } + let mut balance = 0usize; + for t in tokens.iter() { + match t.kind { + L_CURLY => balance += 1, + R_CURLY => { + balance = match balance.checked_sub(1) { + Some(b) => b, + None => return false, + } + } + _ => (), + } + } + balance == 0 +} + +fn merge_errors( + old_errors: Vec, + new_errors: Vec, + old_node: &SyntaxNode, + edit: &AtomTextEdit, +) -> Vec { + let mut res = Vec::new(); + for e in old_errors { + if e.offset() <= old_node.range().start() { + res.push(e) + } else if e.offset() >= old_node.range().end() { + res.push(e.add_offset(TextUnit::of_str(&edit.insert) - edit.delete.len())); + } + } + for e in new_errors { + res.push(e.add_offset(old_node.range().start())); + } + res +} + +#[cfg(test)] +mod tests { + use test_utils::{extract_range, assert_eq_text}; + + use crate::{SourceFile, AstNode, utils::dump_tree}; + use super::*; + + fn do_check(before: &str, replace_with: &str, reparser: F) + where + for<'a> F: Fn( + &'a SyntaxNode, + &AtomTextEdit, + ) -> Option<(&'a SyntaxNode, GreenNode, Vec)>, + { + let (range, before) = extract_range(before); + let edit = AtomTextEdit::replace(range, replace_with.to_owned()); + let after = edit.apply(before.clone()); + + let fully_reparsed = SourceFile::parse(&after); + let incrementally_reparsed = { + let f = SourceFile::parse(&before); + let edit = AtomTextEdit { delete: range, insert: replace_with.to_string() }; + let (node, green, new_errors) = + reparser(f.syntax(), &edit).expect("cannot incrementally reparse"); + let green_root = node.replace_with(green); + let errors = super::merge_errors(f.errors(), new_errors, node, &edit); + SourceFile::new(green_root, errors) + }; + + assert_eq_text!( + &dump_tree(fully_reparsed.syntax()), + &dump_tree(incrementally_reparsed.syntax()), + ) + } + + #[test] + fn reparse_block_tests() { + let do_check = |before, replace_to| do_check(before, replace_to, reparse_block); + + do_check( + r" +fn foo() { + let x = foo + <|>bar<|> +} +", + "baz", + ); + do_check( + r" +fn foo() { + let x = foo<|> + bar<|> +} +", + "baz", + ); + do_check( + r" +struct Foo { + f: foo<|><|> +} +", + ",\n g: (),", + ); + do_check( + r" +fn foo { + let; + 1 + 1; + <|>92<|>; +} +", + "62", + ); + do_check( + r" +mod foo { + fn <|><|> +} +", + "bar", + ); + do_check( + r" +trait Foo { + type <|>Foo<|>; +} +", + "Output", + ); + do_check( + r" +impl IntoIterator for Foo { + f<|><|> +} +", + "n next(", + ); + do_check( + r" +use a::b::{foo,<|>,bar<|>}; + ", + "baz", + ); + do_check( + r" +pub enum A { + Foo<|><|> +} +", + "\nBar;\n", + ); + do_check( + r" +foo!{a, b<|><|> d} +", + ", c[3]", + ); + do_check( + r" +fn foo() { + vec![<|><|>] +} +", + "123", + ); + do_check( + r" +extern { + fn<|>;<|> +} +", + " exit(code: c_int)", + ); + } + + #[test] + fn reparse_leaf_tests() { + let do_check = |before, replace_to| do_check(before, replace_to, reparse_leaf); + + do_check( + r"<|><|> +fn foo() -> i32 { 1 } +", + "\n\n\n \n", + ); + do_check( + r" +fn foo() -> <|><|> {} +", + " \n", + ); + do_check( + r" +fn <|>foo<|>() -> i32 { 1 } +", + "bar", + ); + do_check( + r" +fn foo<|><|>foo() { } +", + "bar", + ); + do_check( + r" +fn foo /* <|><|> */ () {} +", + "some comment", + ); + do_check( + r" +fn baz <|><|> () {} +", + " \t\t\n\n", + ); + do_check( + r" +fn baz <|><|> () {} +", + " \t\t\n\n", + ); + do_check( + r" +/// foo <|><|>omment +mod { } +", + "c", + ); + do_check( + r#" +fn -> &str { "Hello<|><|>" } +"#, + ", world", + ); + do_check( + r#" +fn -> &str { // "Hello<|><|>" +"#, + ", world", + ); + do_check( + r##" +fn -> &str { r#"Hello<|><|>"# +"##, + ", world", + ); + do_check( + r" +#[derive(<|>Copy<|>)] +enum Foo { + +} +", + "Clone", + ); + } +} diff --git a/crates/ra_syntax/src/parsing/token_set.rs b/crates/ra_syntax/src/parsing/token_set.rs new file mode 100644 index 000000000..5719fe5a2 --- /dev/null +++ b/crates/ra_syntax/src/parsing/token_set.rs @@ -0,0 +1,41 @@ +use crate::SyntaxKind; + +#[derive(Clone, Copy)] +pub(crate) struct TokenSet(u128); + +impl TokenSet { + pub(crate) const fn empty() -> TokenSet { + TokenSet(0) + } + + pub(crate) const fn singleton(kind: SyntaxKind) -> TokenSet { + TokenSet(mask(kind)) + } + + pub(crate) const fn union(self, other: TokenSet) -> TokenSet { + TokenSet(self.0 | other.0) + } + + pub(crate) fn contains(&self, kind: SyntaxKind) -> bool { + self.0 & mask(kind) != 0 + } +} + +const fn mask(kind: SyntaxKind) -> u128 { + 1u128 << (kind as usize) +} + +#[macro_export] +macro_rules! token_set { + ($($t:ident),*) => { TokenSet::empty()$(.union(TokenSet::singleton($t)))* }; + ($($t:ident),* ,) => { token_set!($($t),*) }; +} + +#[test] +fn token_set_works_for_tokens() { + use crate::SyntaxKind::*; + let ts = token_set! { EOF, SHEBANG }; + assert!(ts.contains(EOF)); + assert!(ts.contains(SHEBANG)); + assert!(!ts.contains(PLUS)); +} -- cgit v1.2.3