From 40170885e799ebdefb24ed00865cd1c7800af491 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 9 Sep 2019 14:52:31 +0300 Subject: WIP: switch to fully decomposed tokens internally --- crates/ra_parser/src/grammar/expressions.rs | 198 +++++++++++++------------ crates/ra_parser/src/grammar/items.rs | 2 +- crates/ra_parser/src/grammar/items/use_item.rs | 24 +-- crates/ra_parser/src/grammar/params.rs | 9 +- crates/ra_parser/src/grammar/paths.rs | 9 +- crates/ra_parser/src/grammar/patterns.rs | 33 +++-- crates/ra_parser/src/grammar/type_args.rs | 25 ++-- 7 files changed, 160 insertions(+), 140 deletions(-) (limited to 'crates/ra_parser/src/grammar') diff --git a/crates/ra_parser/src/grammar/expressions.rs b/crates/ra_parser/src/grammar/expressions.rs index 30036eb46..ea04b9458 100644 --- a/crates/ra_parser/src/grammar/expressions.rs +++ b/crates/ra_parser/src/grammar/expressions.rs @@ -212,52 +212,48 @@ struct Restrictions { prefer_stmt: bool, } -enum Op { - Simple, - Composite(SyntaxKind, u8), -} - -fn current_op(p: &Parser) -> (u8, Op) { - if let Some(t) = p.current3() { - match t { - (T![<], T![<], T![=]) => return (1, Op::Composite(T![<<=], 3)), - (T![>], T![>], T![=]) => return (1, Op::Composite(T![>>=], 3)), - _ => (), - } - } - - if let Some(t) = p.current2() { - match t { - (T![+], T![=]) => return (1, Op::Composite(T![+=], 2)), - (T![-], T![=]) => return (1, Op::Composite(T![-=], 2)), - (T![*], T![=]) => return (1, Op::Composite(T![*=], 2)), - (T![%], T![=]) => return (1, Op::Composite(T![%=], 2)), - (T![/], T![=]) => return (1, Op::Composite(T![/=], 2)), - (T![|], T![=]) => return (1, Op::Composite(T![|=], 2)), - (T![&], T![=]) => return (1, Op::Composite(T![&=], 2)), - (T![^], T![=]) => return (1, Op::Composite(T![^=], 2)), - (T![|], T![|]) => return (3, Op::Composite(T![||], 2)), - (T![&], T![&]) => return (4, Op::Composite(T![&&], 2)), - (T![<], T![=]) => return (5, Op::Composite(T![<=], 2)), - (T![>], T![=]) => return (5, Op::Composite(T![>=], 2)), - (T![<], T![<]) => return (9, Op::Composite(T![<<], 2)), - (T![>], T![>]) => return (9, Op::Composite(T![>>], 2)), - _ => (), - } +/// Binding powers of operators for a Pratt parser. +/// +/// See https://www.oilshell.org/blog/2016/11/03.html +#[rustfmt::skip] +fn current_op(p: &Parser) -> (u8, SyntaxKind) { + const NOT_AN_OP: (u8, SyntaxKind) = (0, T![@]); + match p.current() { + T![|] if p.at(T![||]) => (3, T![||]), + T![|] if p.at(T![|=]) => (1, T![|=]), + T![|] => (6, T![|]), + T![>] if p.at(T![>>=]) => (1, T![>>=]), + T![>] if p.at(T![>>]) => (9, T![>>]), + T![>] if p.at(T![>=]) => (5, T![>=]), + T![>] => (5, T![>]), + T![=] if p.at(T![=>]) => NOT_AN_OP, + T![=] if p.at(T![==]) => (5, T![==]), + T![=] => (1, T![=]), + T![<] if p.at(T![<=]) => (5, T![<=]), + T![<] if p.at(T![<<=]) => (1, T![<<=]), + T![<] if p.at(T![<<]) => (9, T![<<]), + T![<] => (5, T![<]), + T![+] if p.at(T![+=]) => (1, T![+=]), + T![+] => (10, T![+]), + T![^] if p.at(T![^=]) => (1, T![^=]), + T![^] => (7, T![^]), + T![%] if p.at(T![%=]) => (1, T![%=]), + T![%] => (11, T![%]), + T![&] if p.at(T![&=]) => (1, T![&=]), + T![&] if p.at(T![&&]) => (4, T![&&]), + T![&] => (8, T![&]), + T![/] if p.at(T![/=]) => (1, T![/=]), + T![/] => (11, T![/]), + T![*] if p.at(T![*=]) => (1, T![*=]), + T![*] => (11, T![*]), + T![.] if p.at(T![..=]) => (2, T![..=]), + T![.] if p.at(T![..]) => (2, T![..]), + T![!] if p.at(T![!=]) => (5, T![!=]), + T![-] if p.at(T![-=]) => (1, T![-=]), + T![-] => (10, T![-]), + + _ => NOT_AN_OP } - - let bp = match p.current() { - T![=] => 1, - T![..] | T![..=] => 2, - T![==] | T![!=] | T![<] | T![>] => 5, - T![|] => 6, - T![^] => 7, - T![&] => 8, - T![-] | T![+] => 10, - T![*] | T![/] | T![%] => 11, - _ => 0, - }; - (bp, Op::Simple) } // Parses expression with binding power of at least bp. @@ -308,12 +304,7 @@ fn expr_bp( break; } let m = lhs.precede(p); - match op { - Op::Simple => p.bump_any(), - Op::Composite(kind, n) => { - p.bump_compound(kind, n); - } - } + p.bump(op); expr_bp(p, r, op_bp + 1, dollar_lvl); lhs = m.complete(p, if is_range { RANGE_EXPR } else { BIN_EXPR }); @@ -321,8 +312,7 @@ fn expr_bp( (Some(lhs), BlockLike::NotBlock) } -const LHS_FIRST: TokenSet = - atom::ATOM_EXPR_FIRST.union(token_set![AMP, STAR, EXCL, DOTDOT, DOTDOTEQ, MINUS]); +const LHS_FIRST: TokenSet = atom::ATOM_EXPR_FIRST.union(token_set![AMP, STAR, EXCL, DOT, MINUS]); fn lhs( p: &mut Parser, @@ -353,17 +343,20 @@ fn lhs( p.bump_any(); PREFIX_EXPR } - // test full_range_expr - // fn foo() { xs[..]; } - T![..] | T![..=] => { - m = p.start(); - p.bump_any(); - if p.at_ts(EXPR_FIRST) { - expr_bp(p, r, 2, dollar_lvl); - } - return Some((m.complete(p, RANGE_EXPR), BlockLike::NotBlock)); - } _ => { + // test full_range_expr + // fn foo() { xs[..]; } + for &op in [T![..=], T![..]].iter() { + if p.at(op) { + m = p.start(); + p.bump(op); + if p.at_ts(EXPR_FIRST) { + expr_bp(p, r, 2, dollar_lvl); + } + return Some((m.complete(p, RANGE_EXPR), BlockLike::NotBlock)); + } + } + // test expression_after_block // fn foo() { // let mut p = F{x: 5}; @@ -399,29 +392,13 @@ fn postfix_expr( // } T!['('] if allow_calls => call_expr(p, lhs), T!['['] if allow_calls => index_expr(p, lhs), - T![.] if p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth(2) == T![::]) => { - method_call_expr(p, lhs) - } - T![.] if p.nth(1) == AWAIT_KW => { - // test await_expr - // fn foo() { - // x.await; - // x.0.await; - // x.0().await?.hello(); - // } - let m = lhs.precede(p); - p.bump_any(); - p.bump_any(); - m.complete(p, AWAIT_EXPR) - } - T![.] => field_expr(p, lhs), - // test postfix_range - // fn foo() { let x = 1..; } - T![..] | T![..=] if !EXPR_FIRST.contains(p.nth(1)) => { - let m = lhs.precede(p); - p.bump_any(); - m.complete(p, RANGE_EXPR) - } + T![.] => match postfix_dot_expr(p, lhs) { + Ok(it) => it, + Err(it) => { + lhs = it; + break; + } + }, T![?] => try_expr(p, lhs), T![as] => cast_expr(p, lhs), _ => break, @@ -429,7 +406,46 @@ fn postfix_expr( allow_calls = true; block_like = BlockLike::NotBlock; } - (lhs, block_like) + return (lhs, block_like); + + fn postfix_dot_expr( + p: &mut Parser, + lhs: CompletedMarker, + ) -> Result { + assert!(p.at(T![.])); + if p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::])) { + return Ok(method_call_expr(p, lhs)); + } + + // test await_expr + // fn foo() { + // x.await; + // x.0.await; + // x.0().await?.hello(); + // } + if p.nth(1) == T![await] { + let m = lhs.precede(p); + p.bump(T![.]); + p.bump(T![await]); + return Ok(m.complete(p, AWAIT_EXPR)); + } + + // test postfix_range + // fn foo() { let x = 1..; } + for &(op, la) in [(T![..=], 3), (T![..], 2)].iter() { + if p.at(op) { + return if EXPR_FIRST.contains(p.nth(la)) { + Err(lhs) + } else { + let m = lhs.precede(p); + p.bump(op); + Ok(m.complete(p, RANGE_EXPR)) + }; + } + } + + Ok(field_expr(p, lhs)) + } } // test call_expr @@ -465,7 +481,7 @@ fn index_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { // y.bar::(1, 2,); // } fn method_call_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { - assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth(2) == T![::])); + assert!(p.at(T![.]) && p.nth(1) == IDENT && (p.nth(2) == T!['('] || p.nth_at(2, T![::]))); let m = lhs.precede(p); p.bump_any(); name_ref(p); @@ -567,7 +583,7 @@ fn path_expr(p: &mut Parser, r: Restrictions) -> (CompletedMarker, BlockLike) { record_field_list(p); (m.complete(p, RECORD_LIT), BlockLike::NotBlock) } - T![!] => { + T![!] if !p.at(T![!=]) => { let block_like = items::macro_call_after_excl(p); (m.complete(p, MACRO_CALL), block_like) } @@ -601,8 +617,8 @@ pub(crate) fn record_field_list(p: &mut Parser) { } m.complete(p, RECORD_FIELD); } - T![..] => { - p.bump_any(); + T![.] if p.at(T![..]) => { + p.bump(T![..]); expr(p); } T!['{'] => error_block(p, "expected a field"), diff --git a/crates/ra_parser/src/grammar/items.rs b/crates/ra_parser/src/grammar/items.rs index f27cc85ff..eff9d67e4 100644 --- a/crates/ra_parser/src/grammar/items.rs +++ b/crates/ra_parser/src/grammar/items.rs @@ -422,7 +422,7 @@ pub(crate) fn token_tree(p: &mut Parser) { return; } T![')'] | T![']'] => p.err_and_bump("unmatched brace"), - _ => p.bump_raw(), + _ => p.bump_any(), } } p.expect(closing_paren_kind); diff --git a/crates/ra_parser/src/grammar/items/use_item.rs b/crates/ra_parser/src/grammar/items/use_item.rs index 7a1693a34..f28f522b8 100644 --- a/crates/ra_parser/src/grammar/items/use_item.rs +++ b/crates/ra_parser/src/grammar/items/use_item.rs @@ -13,9 +13,8 @@ pub(super) fn use_item(p: &mut Parser, m: Marker) { /// so handles both `some::path::{inner::path}` and `inner::path` in /// `use some::path::{inner::path};` fn use_tree(p: &mut Parser) { - let la = p.nth(1); let m = p.start(); - match (p.current(), la) { + match p.current() { // Finish the use_tree for cases of e.g. // `use some::path::{self, *};` or `use *;` // This does not handle cases such as `use some::path::*` @@ -28,15 +27,15 @@ fn use_tree(p: &mut Parser) { // use ::*; // use some::path::{*}; // use some::path::{::*}; - (T![*], _) => p.bump_any(), - (T![::], T![*]) => { + T![*] => p.bump(T![*]), + T![:] if p.at(T![::]) && p.nth(2) == T![*] => { // Parse `use ::*;`, which imports all from the crate root in Rust 2015 // This is invalid inside a use_tree_list, (e.g. `use some::path::{::*}`) // but still parses and errors later: ('crate root in paths can only be used in start position') // FIXME: Add this error (if not out of scope) // In Rust 2018, it is always invalid (see above) - p.bump_any(); - p.bump_any(); + p.bump(T![::]); + p.bump(T![*]); } // Open a use tree list // Handles cases such as `use {some::path};` or `{inner::path}` in @@ -47,10 +46,11 @@ fn use_tree(p: &mut Parser) { // use {path::from::root}; // Rust 2015 // use ::{some::arbritrary::path}; // Rust 2015 // use ::{{{crate::export}}}; // Nonsensical but perfectly legal nestnig - (T!['{'], _) | (T![::], T!['{']) => { - if p.at(T![::]) { - p.bump_any(); - } + T!['{'] => { + use_tree_list(p); + } + T![:] if p.at(T![::]) && p.nth(2) == T!['{'] => { + p.bump(T![::]); use_tree_list(p); } // Parse a 'standard' path. @@ -80,8 +80,8 @@ fn use_tree(p: &mut Parser) { // use Trait as _; opt_alias(p); } - T![::] => { - p.bump_any(); + T![:] if p.at(T![::]) => { + p.bump(T![::]); match p.current() { T![*] => { p.bump_any(); diff --git a/crates/ra_parser/src/grammar/params.rs b/crates/ra_parser/src/grammar/params.rs index 56e457325..5893b22fd 100644 --- a/crates/ra_parser/src/grammar/params.rs +++ b/crates/ra_parser/src/grammar/params.rs @@ -80,7 +80,7 @@ fn value_parameter(p: &mut Parser, flavor: Flavor) { match flavor { Flavor::OptionalType | Flavor::Normal => { patterns::pattern(p); - if p.at(T![:]) || flavor.type_required() { + if p.at(T![:]) && !p.at(T![::]) || flavor.type_required() { types::ascription(p) } } @@ -96,10 +96,11 @@ fn value_parameter(p: &mut Parser, flavor: Flavor) { // trait Foo { // fn bar(_: u64, mut x: i32); // } - if (la0 == IDENT || la0 == T![_]) && la1 == T![:] + if (la0 == IDENT || la0 == T![_]) && la1 == T![:] && !p.nth_at(1, T![::]) || la0 == T![mut] && la1 == IDENT && la2 == T![:] - || la0 == T![&] && la1 == IDENT && la2 == T![:] - || la0 == T![&] && la1 == T![mut] && la2 == IDENT && la3 == T![:] + || la0 == T![&] + && (la1 == IDENT && la2 == T![:] && !p.nth_at(2, T![::]) + || la1 == T![mut] && la2 == IDENT && la3 == T![:] && !p.nth_at(3, T![::])) { patterns::pattern(p); types::ascription(p); diff --git a/crates/ra_parser/src/grammar/paths.rs b/crates/ra_parser/src/grammar/paths.rs index 345c93f55..24b65128e 100644 --- a/crates/ra_parser/src/grammar/paths.rs +++ b/crates/ra_parser/src/grammar/paths.rs @@ -1,7 +1,7 @@ use super::*; pub(super) const PATH_FIRST: TokenSet = - token_set![IDENT, SELF_KW, SUPER_KW, CRATE_KW, COLONCOLON, L_ANGLE]; + token_set![IDENT, SELF_KW, SUPER_KW, CRATE_KW, COLON, L_ANGLE]; pub(super) fn is_path_start(p: &Parser) -> bool { is_use_path_start(p) || p.at(T![<]) @@ -9,7 +9,8 @@ pub(super) fn is_path_start(p: &Parser) -> bool { pub(super) fn is_use_path_start(p: &Parser) -> bool { match p.current() { - IDENT | T![self] | T![super] | T![crate] | T![::] => true, + IDENT | T![self] | T![super] | T![crate] => true, + T![:] if p.at(T![::]) => true, _ => false, } } @@ -38,13 +39,13 @@ fn path(p: &mut Parser, mode: Mode) { path_segment(p, mode, true); let mut qual = path.complete(p, PATH); loop { - let use_tree = match p.nth(1) { + let use_tree = match p.nth(2) { T![*] | T!['{'] => true, _ => false, }; if p.at(T![::]) && !use_tree { let path = qual.precede(p); - p.bump_any(); + p.bump(T![::]); path_segment(p, mode, false); let path = path.complete(p, PATH); qual = path; diff --git a/crates/ra_parser/src/grammar/patterns.rs b/crates/ra_parser/src/grammar/patterns.rs index d2f4296f8..dd1d25b07 100644 --- a/crates/ra_parser/src/grammar/patterns.rs +++ b/crates/ra_parser/src/grammar/patterns.rs @@ -34,17 +34,20 @@ pub(super) fn pattern_r(p: &mut Parser, recovery_set: TokenSet) { // 200 .. 301=> (), // } // } - if p.at(T![...]) || p.at(T![..=]) || p.at(T![..]) { - let m = lhs.precede(p); - p.bump_any(); - atom_pat(p, recovery_set); - m.complete(p, RANGE_PAT); + for &range_op in [T![...], T![..=], T![..]].iter() { + if p.at(range_op) { + let m = lhs.precede(p); + p.bump(range_op); + atom_pat(p, recovery_set); + m.complete(p, RANGE_PAT); + return; + } } // test marco_pat // fn main() { // let m!(x) = 0; // } - else if lhs.kind() == PATH_PAT && p.at(T![!]) { + if lhs.kind() == PATH_PAT && p.at(T![!]) { let m = lhs.precede(p); items::macro_call_after_excl(p); m.complete(p, MACRO_CALL); @@ -56,14 +59,16 @@ const PAT_RECOVERY_SET: TokenSet = token_set![LET_KW, IF_KW, WHILE_KW, LOOP_KW, MATCH_KW, R_PAREN, COMMA]; fn atom_pat(p: &mut Parser, recovery_set: TokenSet) -> Option { - // Checks the token after an IDENT to see if a pattern is a path (Struct { .. }) or macro - // (T![x]). - let is_path_or_macro_pat = - |la1| la1 == T![::] || la1 == T!['('] || la1 == T!['{'] || la1 == T![!]; - let m = match p.nth(0) { T![box] => box_pat(p), - T![ref] | T![mut] | IDENT if !is_path_or_macro_pat(p.nth(1)) => bind_pat(p, true), + T![ref] | T![mut] => bind_pat(p, true), + IDENT => match p.nth(1) { + // Checks the token after an IDENT to see if a pattern is a path (Struct { .. }) or macro + // (T![x]). + T!['('] | T!['{'] | T![!] => path_pat(p), + T![:] if p.nth_at(1, T![::]) => path_pat(p), + _ => bind_pat(p, true), + }, _ if paths::is_use_path_start(p) => path_pat(p), _ if is_literal_pat_start(p) => literal_pat(p), @@ -158,7 +163,7 @@ fn record_field_pat_list(p: &mut Parser) { p.bump_any(); while !p.at(EOF) && !p.at(T!['}']) { match p.current() { - T![..] => p.bump_any(), + T![.] if p.at(T![..]) => p.bump(T![..]), IDENT if p.nth(1) == T![:] => record_field_pat(p), T!['{'] => error_block(p, "expected ident"), T![box] => { @@ -237,7 +242,7 @@ fn slice_pat(p: &mut Parser) -> CompletedMarker { fn pat_list(p: &mut Parser, ket: SyntaxKind) { while !p.at(EOF) && !p.at(ket) { match p.current() { - T![..] => p.bump_any(), + T![.] if p.at(T![..]) => p.bump(T![..]), _ => { if !p.at_ts(PATTERN_FIRST) { p.error("expected a pattern"); diff --git a/crates/ra_parser/src/grammar/type_args.rs b/crates/ra_parser/src/grammar/type_args.rs index e100af531..edc7d4ff2 100644 --- a/crates/ra_parser/src/grammar/type_args.rs +++ b/crates/ra_parser/src/grammar/type_args.rs @@ -2,19 +2,16 @@ use super::*; pub(super) fn opt_type_arg_list(p: &mut Parser, colon_colon_required: bool) { let m; - match (colon_colon_required, p.nth(0), p.nth(1)) { - (_, T![::], T![<]) => { - m = p.start(); - p.bump_any(); - p.bump_any(); - } - (false, T![<], T![=]) => return, - (false, T![<], _) => { - m = p.start(); - p.bump_any(); - } - _ => return, - }; + if p.at(T![::]) && p.nth(2) == T![<] { + m = p.start(); + p.bump(T![::]); + p.bump(T![<]); + } else if !colon_colon_required && p.at(T![<]) && p.nth(1) != T![=] { + m = p.start(); + p.bump(T![<]); + } else { + return; + } while !p.at(EOF) && !p.at(T![>]) { type_arg(p); @@ -37,7 +34,7 @@ fn type_arg(p: &mut Parser) { } // test associated_type_bounds // fn print_all>(printables: T) {} - IDENT if p.nth(1) == T![:] => { + IDENT if p.nth(1) == T![:] && p.nth(2) != T![:] => { name_ref(p); type_params::bounds(p); m.complete(p, ASSOC_TYPE_ARG); -- cgit v1.2.3