From 1cd6d6539a9d85bc44db364bb9165e6d9253790d Mon Sep 17 00:00:00 2001 From: Josh Robson Chase Date: Wed, 23 Jan 2019 12:15:47 -0500 Subject: Add raw idents to lexer and parser --- crates/ra_syntax/src/ast.rs | 4 ++-- crates/ra_syntax/src/grammar.ron | 5 +++++ crates/ra_syntax/src/grammar.rs | 4 ++-- crates/ra_syntax/src/grammar/expressions.rs | 8 ++++---- crates/ra_syntax/src/grammar/expressions/atom.rs | 1 + crates/ra_syntax/src/grammar/items.rs | 10 +++++----- crates/ra_syntax/src/grammar/items/nominal.rs | 4 ++-- crates/ra_syntax/src/grammar/items/traits.rs | 2 +- crates/ra_syntax/src/grammar/params.rs | 6 +++--- crates/ra_syntax/src/grammar/paths.rs | 6 +++--- crates/ra_syntax/src/grammar/patterns.rs | 6 +++--- crates/ra_syntax/src/grammar/type_args.rs | 2 +- crates/ra_syntax/src/grammar/type_params.rs | 4 ++-- crates/ra_syntax/src/lexer.rs | 21 +++++++++++++-------- crates/ra_syntax/src/lexer/strings.rs | 3 ++- crates/ra_syntax/src/parser_api.rs | 8 ++++++++ crates/ra_syntax/src/reparsing.rs | 4 ++-- crates/ra_syntax/src/syntax_kinds/generated.rs | 10 ++++++++++ crates/ra_syntax/src/syntax_kinds/generated.rs.tera | 9 +++++++++ crates/ra_syntax/src/yellow.rs | 2 +- 20 files changed, 79 insertions(+), 40 deletions(-) (limited to 'crates/ra_syntax/src') diff --git a/crates/ra_syntax/src/ast.rs b/crates/ra_syntax/src/ast.rs index bcbd4c60c..4d8412d46 100644 --- a/crates/ra_syntax/src/ast.rs +++ b/crates/ra_syntax/src/ast.rs @@ -142,7 +142,7 @@ impl Attr { pub fn as_atom(&self) -> Option { let tt = self.value()?; let (_bra, attr, _ket) = tt.syntax().children().collect_tuple()?; - if attr.kind() == IDENT { + if attr.kind().is_ident() { Some(attr.leaf_text().unwrap().clone()) } else { None @@ -153,7 +153,7 @@ impl Attr { let tt = self.value()?; let (_bra, attr, args, _ket) = tt.syntax().children().collect_tuple()?; let args = TokenTree::cast(args)?; - if attr.kind() == IDENT { + if attr.kind().is_ident() { Some((attr.leaf_text().unwrap().clone(), args)) } else { None diff --git a/crates/ra_syntax/src/grammar.ron b/crates/ra_syntax/src/grammar.ron index 0385183fd..64beb8252 100644 --- a/crates/ra_syntax/src/grammar.ron +++ b/crates/ra_syntax/src/grammar.ron @@ -102,6 +102,7 @@ Grammar( tokens: [ "ERROR", "IDENT", + "RAW_IDENT", "UNDERSCORE", "WHITESPACE", "INT_NUMBER", @@ -116,6 +117,10 @@ Grammar( "COMMENT", "SHEBANG", ], + ident_tokens: [ + "IDENT", + "RAW_IDENT", + ], nodes: [ "SOURCE_FILE", diff --git a/crates/ra_syntax/src/grammar.rs b/crates/ra_syntax/src/grammar.rs index 060c0ccdf..531d0458f 100644 --- a/crates/ra_syntax/src/grammar.rs +++ b/crates/ra_syntax/src/grammar.rs @@ -140,7 +140,7 @@ fn opt_fn_ret_type(p: &mut Parser) -> bool { } fn name_r(p: &mut Parser, recovery: TokenSet) { - if p.at(IDENT) { + if p.current().is_ident() { let m = p.start(); p.bump(); m.complete(p, NAME); @@ -154,7 +154,7 @@ fn name(p: &mut Parser) { } fn name_ref(p: &mut Parser) { - if p.at(IDENT) { + if p.current().is_ident() { let m = p.start(); p.bump(); m.complete(p, NAME_REF); diff --git a/crates/ra_syntax/src/grammar/expressions.rs b/crates/ra_syntax/src/grammar/expressions.rs index 2236555e0..107b7cda4 100644 --- a/crates/ra_syntax/src/grammar/expressions.rs +++ b/crates/ra_syntax/src/grammar/expressions.rs @@ -281,7 +281,7 @@ fn postfix_expr( // } L_PAREN if allow_calls => call_expr(p, lhs), L_BRACK if allow_calls => index_expr(p, lhs), - DOT if p.nth(1) == IDENT && (p.nth(2) == L_PAREN || p.nth(2) == COLONCOLON) => { + DOT if p.nth(1).is_ident() && (p.nth(2) == L_PAREN || p.nth(2) == COLONCOLON) => { method_call_expr(p, lhs) } DOT => field_expr(p, lhs), @@ -332,7 +332,7 @@ fn index_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { // y.bar::(1, 2,); // } fn method_call_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { - assert!(p.at(DOT) && p.nth(1) == IDENT && (p.nth(2) == L_PAREN || p.nth(2) == COLONCOLON)); + assert!(p.at(DOT) && p.nth(1).is_ident() && (p.nth(2) == L_PAREN || p.nth(2) == COLONCOLON)); let m = lhs.precede(p); p.bump(); name_ref(p); @@ -352,7 +352,7 @@ fn field_expr(p: &mut Parser, lhs: CompletedMarker) -> CompletedMarker { assert!(p.at(DOT)); let m = lhs.precede(p); p.bump(); - if p.at(IDENT) { + if p.current().is_ident() { name_ref(p) } else if p.at(INT_NUMBER) { p.bump() @@ -443,7 +443,7 @@ pub(crate) fn named_field_list(p: &mut Parser) { p.bump(); while !p.at(EOF) && !p.at(R_CURLY) { match p.current() { - IDENT => { + IDENT | RAW_IDENT => { let m = p.start(); name_ref(p); if p.eat(COLON) { diff --git a/crates/ra_syntax/src/grammar/expressions/atom.rs b/crates/ra_syntax/src/grammar/expressions/atom.rs index 167a76551..3fbe22856 100644 --- a/crates/ra_syntax/src/grammar/expressions/atom.rs +++ b/crates/ra_syntax/src/grammar/expressions/atom.rs @@ -48,6 +48,7 @@ pub(super) const ATOM_EXPR_FIRST: TokenSet = LITERAL_FIRST.union(token_set![ UNSAFE_KW, RETURN_KW, IDENT, + RAW_IDENT, SELF_KW, SUPER_KW, CRATE_KW, diff --git a/crates/ra_syntax/src/grammar/items.rs b/crates/ra_syntax/src/grammar/items.rs index 265e84570..c49798444 100644 --- a/crates/ra_syntax/src/grammar/items.rs +++ b/crates/ra_syntax/src/grammar/items.rs @@ -99,11 +99,11 @@ pub(super) fn maybe_item(p: &mut Parser, flavor: ItemFlavor) -> MaybeItem { has_mods = true; abi(p); } - if p.at(IDENT) && p.at_contextual_kw("auto") && p.nth(1) == TRAIT_KW { + if p.current().is_ident() && p.at_contextual_kw("auto") && p.nth(1) == TRAIT_KW { p.bump_remap(AUTO_KW); has_mods = true; } - if p.at(IDENT) && p.at_contextual_kw("default") && p.nth(1) == IMPL_KW { + if p.current().is_ident() && p.at_contextual_kw("default") && p.nth(1) == IMPL_KW { p.bump_remap(DEFAULT_KW); has_mods = true; } @@ -202,7 +202,7 @@ fn items_without_modifiers(p: &mut Parser) -> Option { } STRUCT_DEF } - IDENT if p.at_contextual_kw("union") && p.nth(1) == IDENT => { + IDENT | RAW_IDENT if p.at_contextual_kw("union") && p.nth(1).is_ident() => { // test union_items // union Foo {} // union Foo { @@ -220,7 +220,7 @@ fn items_without_modifiers(p: &mut Parser) -> Option { use_item::use_item(p); USE_ITEM } - CONST_KW if (la == IDENT || la == MUT_KW) => { + CONST_KW if (la.is_ident() || la == MUT_KW) => { consts::const_def(p); CONST_DEF } @@ -351,7 +351,7 @@ fn macro_call(p: &mut Parser) -> BlockLike { pub(super) fn macro_call_after_excl(p: &mut Parser) -> BlockLike { p.expect(EXCL); - p.eat(IDENT); + p.eat_one(&[IDENT, RAW_IDENT]); match p.current() { L_CURLY => { token_tree(p); diff --git a/crates/ra_syntax/src/grammar/items/nominal.rs b/crates/ra_syntax/src/grammar/items/nominal.rs index 0784fb7b1..897306883 100644 --- a/crates/ra_syntax/src/grammar/items/nominal.rs +++ b/crates/ra_syntax/src/grammar/items/nominal.rs @@ -70,7 +70,7 @@ pub(crate) fn enum_variant_list(p: &mut Parser) { } let var = p.start(); attributes::outer_attributes(p); - if p.at(IDENT) { + if p.current().is_ident() { name(p); match p.current() { L_CURLY => named_field_def_list(p), @@ -120,7 +120,7 @@ pub(crate) fn named_field_def_list(p: &mut Parser) { // } attributes::outer_attributes(p); opt_visibility(p); - if p.at(IDENT) { + if p.current().is_ident() { name(p); p.expect(COLON); types::type_(p); diff --git a/crates/ra_syntax/src/grammar/items/traits.rs b/crates/ra_syntax/src/grammar/items/traits.rs index 0a0621753..a78bbba2b 100644 --- a/crates/ra_syntax/src/grammar/items/traits.rs +++ b/crates/ra_syntax/src/grammar/items/traits.rs @@ -112,7 +112,7 @@ fn choose_type_params_over_qpath(p: &Parser) -> bool { if p.nth(1) == POUND || p.nth(1) == R_ANGLE { return true; } - (p.nth(1) == LIFETIME || p.nth(1) == IDENT) + (p.nth(1) == LIFETIME || p.nth(1).is_ident()) && (p.nth(2) == R_ANGLE || p.nth(2) == COMMA || p.nth(2) == COLON || p.nth(2) == EQ) } diff --git a/crates/ra_syntax/src/grammar/params.rs b/crates/ra_syntax/src/grammar/params.rs index 13158429a..ada07c17e 100644 --- a/crates/ra_syntax/src/grammar/params.rs +++ b/crates/ra_syntax/src/grammar/params.rs @@ -84,9 +84,9 @@ fn value_parameter(p: &mut Parser, flavor: Flavor) { // trait Foo { // fn bar(_: u64); // } - if (la0 == IDENT || la0 == UNDERSCORE) && la1 == COLON - || la0 == AMP && la1 == IDENT && la2 == COLON - || la0 == AMP && la1 == MUT_KW && la2 == IDENT && la3 == COLON + if (la0.is_ident() || la0 == UNDERSCORE) && la1 == COLON + || la0 == AMP && la1.is_ident() && la2 == COLON + || la0 == AMP && la1 == MUT_KW && la2.is_ident() && la3 == COLON { patterns::pattern(p); types::ascription(p); diff --git a/crates/ra_syntax/src/grammar/paths.rs b/crates/ra_syntax/src/grammar/paths.rs index 33a11886c..0e1c1d334 100644 --- a/crates/ra_syntax/src/grammar/paths.rs +++ b/crates/ra_syntax/src/grammar/paths.rs @@ -1,11 +1,11 @@ use super::*; pub(super) const PATH_FIRST: TokenSet = - token_set![IDENT, SELF_KW, SUPER_KW, CRATE_KW, COLONCOLON, L_ANGLE]; + token_set![IDENT, RAW_IDENT, SELF_KW, SUPER_KW, CRATE_KW, COLONCOLON, L_ANGLE]; pub(super) fn is_path_start(p: &Parser) -> bool { match p.current() { - IDENT | SELF_KW | SUPER_KW | CRATE_KW | COLONCOLON => true, + IDENT | RAW_IDENT | SELF_KW | SUPER_KW | CRATE_KW | COLONCOLON => true, _ => false, } } @@ -70,7 +70,7 @@ fn path_segment(p: &mut Parser, mode: Mode, first: bool) { p.eat(COLONCOLON); } match p.current() { - IDENT => { + IDENT | RAW_IDENT => { name_ref(p); opt_path_type_args(p, mode); } diff --git a/crates/ra_syntax/src/grammar/patterns.rs b/crates/ra_syntax/src/grammar/patterns.rs index 1ac5efdf6..925eabe1b 100644 --- a/crates/ra_syntax/src/grammar/patterns.rs +++ b/crates/ra_syntax/src/grammar/patterns.rs @@ -37,7 +37,7 @@ fn atom_pat(p: &mut Parser, recovery_set: TokenSet) -> Option { let la1 = p.nth(1); if la0 == REF_KW || la0 == MUT_KW - || (la0 == IDENT && !(la1 == COLONCOLON || la1 == L_PAREN || la1 == L_CURLY)) + || (la0.is_ident() && !(la1 == COLONCOLON || la1 == L_PAREN || la1 == L_CURLY)) { return Some(bind_pat(p, true)); } @@ -128,7 +128,7 @@ fn field_pat_list(p: &mut Parser) { while !p.at(EOF) && !p.at(R_CURLY) { match p.current() { DOTDOT => p.bump(), - IDENT if p.nth(1) == COLON => field_pat(p), + IDENT | RAW_IDENT if p.nth(1) == COLON => field_pat(p), L_CURLY => error_block(p, "expected ident"), _ => { bind_pat(p, false); @@ -143,7 +143,7 @@ fn field_pat_list(p: &mut Parser) { } fn field_pat(p: &mut Parser) { - assert!(p.at(IDENT)); + assert!(p.current().is_ident()); assert!(p.nth(1) == COLON); let m = p.start(); diff --git a/crates/ra_syntax/src/grammar/type_args.rs b/crates/ra_syntax/src/grammar/type_args.rs index f889419c5..469595d71 100644 --- a/crates/ra_syntax/src/grammar/type_args.rs +++ b/crates/ra_syntax/src/grammar/type_args.rs @@ -34,7 +34,7 @@ fn type_arg(p: &mut Parser) { p.bump(); m.complete(p, LIFETIME_ARG); } - IDENT if p.nth(1) == EQ => { + IDENT | RAW_IDENT if p.nth(1) == EQ => { name_ref(p); p.bump(); types::type_(p); diff --git a/crates/ra_syntax/src/grammar/type_params.rs b/crates/ra_syntax/src/grammar/type_params.rs index 1ec813b3e..3cebd0675 100644 --- a/crates/ra_syntax/src/grammar/type_params.rs +++ b/crates/ra_syntax/src/grammar/type_params.rs @@ -15,7 +15,7 @@ fn type_param_list(p: &mut Parser) { while !p.at(EOF) && !p.at(R_ANGLE) { match p.current() { LIFETIME => lifetime_param(p), - IDENT => type_param(p), + IDENT | RAW_IDENT => type_param(p), _ => p.err_and_bump("expected type parameter"), } if !p.at(R_ANGLE) && !p.expect(COMMA) { @@ -37,7 +37,7 @@ fn lifetime_param(p: &mut Parser) { } fn type_param(p: &mut Parser) { - assert!(p.at(IDENT)); + assert!(p.current().is_ident()); let m = p.start(); name(p); if p.at(COLON) { diff --git a/crates/ra_syntax/src/lexer.rs b/crates/ra_syntax/src/lexer.rs index c6acd095e..fab184a2d 100644 --- a/crates/ra_syntax/src/lexer.rs +++ b/crates/ra_syntax/src/lexer.rs @@ -190,19 +190,24 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { } fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { - let is_single_letter = match ptr.current() { - None => true, - Some(c) if !is_ident_continue(c) => true, + let is_raw = match (c, ptr.current()) { + ('r', Some('#')) => { + ptr.bump(); + true + } + ('_', Some(c)) if !is_ident_continue(c) => return UNDERSCORE, _ => false, }; - if is_single_letter { - return if c == '_' { UNDERSCORE } else { IDENT }; - } + ptr.bump_while(is_ident_continue); - if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) { + + if is_raw { + RAW_IDENT + } else if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) { return kind; + } else { + IDENT } - IDENT } fn scan_literal_suffix(ptr: &mut Ptr) { diff --git a/crates/ra_syntax/src/lexer/strings.rs b/crates/ra_syntax/src/lexer/strings.rs index 0865b7f3b..5c1cf3e9c 100644 --- a/crates/ra_syntax/src/lexer/strings.rs +++ b/crates/ra_syntax/src/lexer/strings.rs @@ -5,7 +5,8 @@ use crate::lexer::ptr::Ptr; pub(crate) fn is_string_literal_start(c: char, c1: Option, c2: Option) -> bool { match (c, c1, c2) { ('r', Some('"'), _) - | ('r', Some('#'), _) + | ('r', Some('#'), Some('"')) + | ('r', Some('#'), Some('#')) | ('b', Some('"'), _) | ('b', Some('\''), _) | ('b', Some('r'), Some('"')) diff --git a/crates/ra_syntax/src/parser_api.rs b/crates/ra_syntax/src/parser_api.rs index 3148371c5..d795cbaf1 100644 --- a/crates/ra_syntax/src/parser_api.rs +++ b/crates/ra_syntax/src/parser_api.rs @@ -100,6 +100,14 @@ impl<'t> Parser<'t> { true } + /// Consume the next token matching one of the `kinds` + pub(crate) fn eat_one<'k, K>(&mut self, kinds: K) -> bool + where + K: IntoIterator + 'k, + { + kinds.into_iter().map(|k| self.eat(*k)).any(|eaten| eaten) + } + /// Consume the next token if it is `kind` or emit an error /// otherwise. pub(crate) fn expect(&mut self, kind: SyntaxKind) -> bool { diff --git a/crates/ra_syntax/src/reparsing.rs b/crates/ra_syntax/src/reparsing.rs index 2f1de6b02..b38985bc8 100644 --- a/crates/ra_syntax/src/reparsing.rs +++ b/crates/ra_syntax/src/reparsing.rs @@ -25,7 +25,7 @@ fn reparse_leaf<'node>( ) -> Option<(&'node SyntaxNode, GreenNode, Vec)> { let node = algo::find_covering_node(node, edit.delete); match node.kind() { - WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { + WHITESPACE | COMMENT | IDENT | RAW_IDENT | STRING | RAW_STRING => { let text = get_text_after_edit(node, &edit); let tokens = tokenize(&text); let token = match tokens[..] { @@ -33,7 +33,7 @@ fn reparse_leaf<'node>( _ => return None, }; - if token.kind == IDENT && is_contextual_kw(&text) { + if token.kind.is_ident() && is_contextual_kw(&text) { return None; } diff --git a/crates/ra_syntax/src/syntax_kinds/generated.rs b/crates/ra_syntax/src/syntax_kinds/generated.rs index 06faf7557..aa1ab3326 100644 --- a/crates/ra_syntax/src/syntax_kinds/generated.rs +++ b/crates/ra_syntax/src/syntax_kinds/generated.rs @@ -105,6 +105,7 @@ pub enum SyntaxKind { UNION_KW, ERROR, IDENT, + RAW_IDENT, UNDERSCORE, WHITESPACE, INT_NUMBER, @@ -368,6 +369,7 @@ impl SyntaxKind { UNION_KW => &SyntaxInfo { name: "UNION_KW" }, ERROR => &SyntaxInfo { name: "ERROR" }, IDENT => &SyntaxInfo { name: "IDENT" }, + RAW_IDENT => &SyntaxInfo { name: "RAW_IDENT" }, UNDERSCORE => &SyntaxInfo { name: "UNDERSCORE" }, WHITESPACE => &SyntaxInfo { name: "WHITESPACE" }, INT_NUMBER => &SyntaxInfo { name: "INT_NUMBER" }, @@ -563,4 +565,12 @@ impl SyntaxKind { }; Some(tok) } + + pub(crate) fn is_ident(&self) -> bool { + match self { + | IDENT + | RAW_IDENT => true, + _ => false, + } + } } diff --git a/crates/ra_syntax/src/syntax_kinds/generated.rs.tera b/crates/ra_syntax/src/syntax_kinds/generated.rs.tera index 21f9444b1..83787f820 100644 --- a/crates/ra_syntax/src/syntax_kinds/generated.rs.tera +++ b/crates/ra_syntax/src/syntax_kinds/generated.rs.tera @@ -74,4 +74,13 @@ impl SyntaxKind { }; Some(tok) } + + pub(crate) fn is_ident(&self) -> bool { + match self { +{%- for kind in ident_tokens %} + | {{kind}} +{%- endfor %} => true, + _ => false, + } + } } diff --git a/crates/ra_syntax/src/yellow.rs b/crates/ra_syntax/src/yellow.rs index a7bfb80e2..48f01128b 100644 --- a/crates/ra_syntax/src/yellow.rs +++ b/crates/ra_syntax/src/yellow.rs @@ -207,7 +207,7 @@ impl<'a> Iterator for SyntaxNodeChildren<'a> { fn has_short_text(kind: SyntaxKind) -> bool { use crate::SyntaxKind::*; match kind { - IDENT | LIFETIME | INT_NUMBER | FLOAT_NUMBER => true, + IDENT | RAW_IDENT | LIFETIME | INT_NUMBER | FLOAT_NUMBER => true, _ => false, } } -- cgit v1.2.3