From 1e1e2e83c462b7efacaa0e33812beed72a88ab5f Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 5 Aug 2018 16:09:25 +0300 Subject: compound ops --- src/grammar.ron | 4 ++ src/grammar/expressions/mod.rs | 47 +++++++++++++---- src/parser_api.rs | 11 ++++ src/parser_impl/input.rs | 17 +++++- src/parser_impl/mod.rs | 19 +++++-- src/syntax_kinds/generated.rs | 12 +++++ tests/data/parser/inline/0079_compound_ops.rs | 5 ++ tests/data/parser/inline/0079_compound_ops.txt | 72 ++++++++++++++++++++++++++ 8 files changed, 171 insertions(+), 16 deletions(-) create mode 100644 tests/data/parser/inline/0079_compound_ops.rs create mode 100644 tests/data/parser/inline/0079_compound_ops.txt diff --git a/src/grammar.ron b/src/grammar.ron index 0a79eeb95..67022b296 100644 --- a/src/grammar.ron +++ b/src/grammar.ron @@ -37,6 +37,10 @@ Grammar( ["!=", "NEQ"], ["-", "MINUS"], ["->", "THIN_ARROW"], + ["<=", "LTEQ"], + [">=", "GTEQ"], + ["+=", "PLUSEQ"], + ["-=", "MINUSEQ"], ], keywords: [ "use", diff --git a/src/grammar/expressions/mod.rs b/src/grammar/expressions/mod.rs index 15669f99d..baff0da52 100644 --- a/src/grammar/expressions/mod.rs +++ b/src/grammar/expressions/mod.rs @@ -33,19 +33,45 @@ struct Restrictions { forbid_structs: bool } +enum Op { + Simple, + Composite(SyntaxKind, u8) +} + // test expr_binding_power // fn foo() { // 1 + 2 * 3 == 1 * 2 + 3; // *x = 1 + 1; // } -fn bp_of(op: SyntaxKind) -> u8 { - match op { + +// test compound_ops +// fn foo() { +// x += 1; +// 1 + 1 <= 2 * 3; +// z -= 3 >= 0; +// } +fn current_op(p: &Parser) -> (u8, Op) { + if p.at_compound2(L_ANGLE, EQ) { + return (2, Op::Composite(LTEQ, 2)) + } + if p.at_compound2(R_ANGLE, EQ) { + return (2, Op::Composite(GTEQ, 2)) + } + if p.at_compound2(PLUS, EQ) { + return (1, Op::Composite(PLUSEQ, 2)) + } + if p.at_compound2(MINUS, EQ) { + return (1, Op::Composite(MINUSEQ, 2)) + } + + let bp = match p.current() { EQ => 1, EQEQ | NEQ => 2, MINUS | PLUS => 3, STAR | SLASH => 4, - _ => 0 - } + _ => 0, + }; + (bp, Op::Simple) } // Parses expression with binding power of at least bp. @@ -56,10 +82,16 @@ fn expr_bp(p: &mut Parser, r: Restrictions, bp: u8) { }; loop { - let op_bp = bp_of(p.current()); + let (op_bp, op) = current_op(p); if op_bp < bp { break; } + match op { + Op::Simple => p.bump(), + Op::Composite(kind, n) => { + p.bump_compound(kind, n); + }, + } lhs = bin_expr(p, r, lhs, op_bp); } } @@ -254,12 +286,7 @@ fn struct_lit(p: &mut Parser) { } fn bin_expr(p: &mut Parser, r: Restrictions, lhs: CompletedMarker, bp: u8) -> CompletedMarker { - assert!(match p.current() { - MINUS | PLUS | STAR | SLASH | EQEQ | NEQ | EQ => true, - _ => false, - }); let m = lhs.precede(p); - p.bump(); expr_bp(p, r, bp); m.complete(p, BIN_EXPR) } diff --git a/src/parser_api.rs b/src/parser_api.rs index 95394e39d..fef21c5fd 100644 --- a/src/parser_api.rs +++ b/src/parser_api.rs @@ -58,6 +58,10 @@ impl<'t> Parser<'t> { self.current() == kind } + pub(crate) fn at_compound2(&self, c1: SyntaxKind, c2: SyntaxKind) -> bool { + self.0.at_compound2(c1, c2) + } + /// Checks if the current token is contextual keyword with text `t`. pub(crate) fn at_contextual_kw(&self, t: &str) -> bool { self.0.at_kw(t) @@ -85,6 +89,13 @@ impl<'t> Parser<'t> { self.0.bump_remap(kind); } + /// Advances the parser by `n` tokens, remapping its kind. + /// This is useful to create compound tokens from parts. For + /// example, an `<<` token is two consecutive remapped `<` tokens + pub(crate) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { + self.0.bump_compound(kind, n); + } + /// Emit error with the `message` /// TODO: this should be much more fancy and support /// structured errors with spans and notes, like rustc diff --git a/src/parser_impl/input.rs b/src/parser_impl/input.rs index db76364b2..c0fe4d488 100644 --- a/src/parser_impl/input.rs +++ b/src/parser_impl/input.rs @@ -36,7 +36,22 @@ impl<'t> ParserInput<'t> { self.tokens[idx].kind } - #[allow(unused)] + pub fn len(&self, pos: InputPosition) -> TextUnit { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return 0.into(); + } + self.tokens[idx].len + } + + pub fn start(&self, pos: InputPosition) -> TextUnit { + let idx = pos.0 as usize; + if !(idx < self.tokens.len()) { + return 0.into(); + } + self.start_offsets[idx] + } + pub fn text(&self, pos: InputPosition) -> &'t str { let idx = pos.0 as usize; if !(idx < self.tokens.len()) { diff --git a/src/parser_impl/mod.rs b/src/parser_impl/mod.rs index 2791c8da5..d640a7784 100644 --- a/src/parser_impl/mod.rs +++ b/src/parser_impl/mod.rs @@ -65,6 +65,11 @@ impl<'t> ParserImpl<'t> { self.events } + pub(super) fn at_compound2(&self, c1: SyntaxKind, c2: SyntaxKind) -> bool { + self.inp.kind(self.pos) == c1 && self.inp.kind(self.pos + 1) == c2 + && self.inp.start(self.pos + 1) == self.inp.start(self.pos) + self.inp.len(self.pos) + } + pub(super) fn nth(&self, n: u32) -> SyntaxKind { self.inp.kind(self.pos + n) } @@ -87,7 +92,7 @@ impl<'t> ParserImpl<'t> { if kind == EOF { return; } - self.do_bump(kind); + self.do_bump(kind, 1); } pub(super) fn bump_remap(&mut self, kind: SyntaxKind) { @@ -95,14 +100,18 @@ impl<'t> ParserImpl<'t> { // TODO: panic!? return; } - self.do_bump(kind); + self.do_bump(kind, 1); + } + + pub(super) fn bump_compound(&mut self, kind: SyntaxKind, n: u8) { + self.do_bump(kind, n); } - fn do_bump(&mut self, kind: SyntaxKind) { - self.pos += 1; + fn do_bump(&mut self, kind: SyntaxKind, n_raw_tokens: u8) { + self.pos += u32::from(n_raw_tokens); self.event(Event::Token { kind, - n_raw_tokens: 1, + n_raw_tokens, }); } diff --git a/src/syntax_kinds/generated.rs b/src/syntax_kinds/generated.rs index 8ac13fd63..8dfeb2b82 100644 --- a/src/syntax_kinds/generated.rs +++ b/src/syntax_kinds/generated.rs @@ -40,6 +40,10 @@ pub enum SyntaxKind { NEQ, MINUS, THIN_ARROW, + LTEQ, + GTEQ, + PLUSEQ, + MINUSEQ, USE_KW, FN_KW, STRUCT_KW, @@ -261,6 +265,10 @@ impl SyntaxKind { NEQ => &SyntaxInfo { name: "NEQ" }, MINUS => &SyntaxInfo { name: "MINUS" }, THIN_ARROW => &SyntaxInfo { name: "THIN_ARROW" }, + LTEQ => &SyntaxInfo { name: "LTEQ" }, + GTEQ => &SyntaxInfo { name: "GTEQ" }, + PLUSEQ => &SyntaxInfo { name: "PLUSEQ" }, + MINUSEQ => &SyntaxInfo { name: "MINUSEQ" }, USE_KW => &SyntaxInfo { name: "USE_KW" }, FN_KW => &SyntaxInfo { name: "FN_KW" }, STRUCT_KW => &SyntaxInfo { name: "STRUCT_KW" }, @@ -502,6 +510,10 @@ impl SyntaxKind { NEQ => "!=", MINUS => "-", THIN_ARROW => "->", + LTEQ => "<=", + GTEQ => ">=", + PLUSEQ => "+=", + MINUSEQ => "-=", USE_KW => "use", FN_KW => "fn", diff --git a/tests/data/parser/inline/0079_compound_ops.rs b/tests/data/parser/inline/0079_compound_ops.rs new file mode 100644 index 000000000..48be5aebe --- /dev/null +++ b/tests/data/parser/inline/0079_compound_ops.rs @@ -0,0 +1,5 @@ +fn foo() { + x += 1; + 1 + 1 <= 2 * 3; + z -= 3 >= 0; +} diff --git a/tests/data/parser/inline/0079_compound_ops.txt b/tests/data/parser/inline/0079_compound_ops.txt new file mode 100644 index 000000000..6eaed8c75 --- /dev/null +++ b/tests/data/parser/inline/0079_compound_ops.txt @@ -0,0 +1,72 @@ +FILE@[0; 62) + FN_ITEM@[0; 62) + FN_KW@[0; 2) + NAME@[2; 6) + WHITESPACE@[2; 3) + IDENT@[3; 6) "foo" + PARAM_LIST@[6; 9) + L_PAREN@[6; 7) + R_PAREN@[7; 8) + WHITESPACE@[8; 9) + BLOCK_EXPR@[9; 62) + L_CURLY@[9; 10) + EXPR_STMT@[10; 27) + BIN_EXPR@[10; 21) + PATH_EXPR@[10; 17) + PATH@[10; 17) + PATH_SEGMENT@[10; 17) + NAME_REF@[10; 17) + WHITESPACE@[10; 15) + IDENT@[15; 16) "x" + WHITESPACE@[16; 17) + PLUSEQ@[17; 19) + LITERAL@[19; 21) + WHITESPACE@[19; 20) + INT_NUMBER@[20; 21) "1" + SEMI@[21; 22) + WHITESPACE@[22; 27) + EXPR_STMT@[27; 47) + BIN_EXPR@[27; 41) + BIN_EXPR@[27; 33) + LITERAL@[27; 29) + INT_NUMBER@[27; 28) "1" + WHITESPACE@[28; 29) + PLUS@[29; 30) + LITERAL@[30; 33) + WHITESPACE@[30; 31) + INT_NUMBER@[31; 32) "1" + WHITESPACE@[32; 33) + LTEQ@[33; 35) + BIN_EXPR@[35; 41) + LITERAL@[35; 38) + WHITESPACE@[35; 36) + INT_NUMBER@[36; 37) "2" + WHITESPACE@[37; 38) + STAR@[38; 39) + LITERAL@[39; 41) + WHITESPACE@[39; 40) + INT_NUMBER@[40; 41) "3" + SEMI@[41; 42) + WHITESPACE@[42; 47) + EXPR_STMT@[47; 60) + BIN_EXPR@[47; 58) + PATH_EXPR@[47; 49) + PATH@[47; 49) + PATH_SEGMENT@[47; 49) + NAME_REF@[47; 49) + IDENT@[47; 48) "z" + WHITESPACE@[48; 49) + MINUSEQ@[49; 51) + BIN_EXPR@[51; 58) + LITERAL@[51; 54) + WHITESPACE@[51; 52) + INT_NUMBER@[52; 53) "3" + WHITESPACE@[53; 54) + GTEQ@[54; 56) + LITERAL@[56; 58) + WHITESPACE@[56; 57) + INT_NUMBER@[57; 58) "0" + SEMI@[58; 59) + WHITESPACE@[59; 60) + R_CURLY@[60; 61) + WHITESPACE@[61; 62) -- cgit v1.2.3