From 7fb7cfc2d1cd2781a20e4034b12384566d13c728 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Mon, 30 Jul 2018 17:46:50 +0300 Subject: generate single byte tokens --- src/grammar.ron | 74 ++++++++++++------------ src/lexer/mod.rs | 27 ++------- src/syntax_kinds/generated.rs | 113 +++++++++++++++++++++++-------------- src/syntax_kinds/generated.rs.tera | 26 ++++++++- 4 files changed, 138 insertions(+), 102 deletions(-) (limited to 'src') diff --git a/src/grammar.ron b/src/grammar.ron index 8232ba1dc..38453b45c 100644 --- a/src/grammar.ron +++ b/src/grammar.ron @@ -1,4 +1,43 @@ Grammar( + single_byte_tokens: [ + [";", "SEMI"], + [",", "COMMA"], + ["(", "L_PAREN"], + [")", "R_PAREN"], + ["{", "L_CURLY"], + ["}", "R_CURLY"], + ["[", "L_BRACK"], + ["]", "R_BRACK"], + ["<", "L_ANGLE"], + [">", "R_ANGLE"], + ["@", "AT"], + ["#", "POUND"], + ["~", "TILDE"], + ["?", "QUESTION"], + ["$", "DOLLAR"], + ["&", "AMPERSAND"], + ["|", "PIPE"], + ["+", "PLUS"], + ["*", "STAR"], + ["/", "SLASH"], + ["^", "CARET"], + ["%", "PERCENT"], + ], + multi_byte_tokens: [ + [".", "DOT"], + ["..", "DOTDOT"], + ["...", "DOTDOTDOT"], + ["..=", "DOTDOTEQ"], + [":", "COLON"], + ["::", "COLONCOLON"], + ["=", "EQ"], + ["==", "EQEQ"], + ["=>", "FAT_ARROW"], + ["!", "EXCL"], + ["!=", "NEQ"], + ["-", "MINUS"], + ["->", "THIN_ARROW"], + ], keywords: [ "use", "fn", @@ -42,32 +81,6 @@ Grammar( "WHITESPACE", "INT_NUMBER", "FLOAT_NUMBER", - "SEMI", - "COMMA", - "DOT", - "DOTDOT", - "DOTDOTDOT", - "DOTDOTEQ", - "L_PAREN", - "R_PAREN", - "L_CURLY", - "R_CURLY", - "L_BRACK", - "R_BRACK", - "L_ANGLE", - "R_ANGLE", - "AT", - "POUND", - "TILDE", - "QUESTION", - "COLON", - "COLONCOLON", - "DOLLAR", - "EQ", - "EQEQ", - "FAT_ARROW", - "NEQ", - "EXCL", "LIFETIME", "CHAR", "BYTE", @@ -75,15 +88,6 @@ Grammar( "RAW_STRING", "BYTE_STRING", "RAW_BYTE_STRING", - "PLUS", - "MINUS", - "STAR", - "SLASH", - "CARET", - "PERCENT", - "AMPERSAND", - "PIPE", - "THIN_ARROW", "COMMENT", "DOC_COMMENT", "SHEBANG", diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index f647838ea..f8fdc41ac 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -79,30 +79,11 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { } // One-byte tokens. - match c { - ';' => return SEMI, - ',' => return COMMA, - '(' => return L_PAREN, - ')' => return R_PAREN, - '{' => return L_CURLY, - '}' => return R_CURLY, - '[' => return L_BRACK, - ']' => return R_BRACK, - '<' => return L_ANGLE, - '>' => return R_ANGLE, - '@' => return AT, - '#' => return POUND, - '~' => return TILDE, - '?' => return QUESTION, - '$' => return DOLLAR, - '&' => return AMPERSAND, - '|' => return PIPE, - '+' => return PLUS, - '*' => return STAR, - '/' => return SLASH, - '^' => return CARET, - '%' => return PERCENT, + if let Some(kind) = SyntaxKind::from_char(c) { + return kind; + } + match c { // Multi-byte tokens. '.' => { return match (ptr.next(), ptr.nnext()) { diff --git a/src/syntax_kinds/generated.rs b/src/syntax_kinds/generated.rs index bc2a995f6..de0d1c51d 100644 --- a/src/syntax_kinds/generated.rs +++ b/src/syntax_kinds/generated.rs @@ -5,18 +5,8 @@ use super::SyntaxInfo; /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum SyntaxKind { - ERROR, - IDENT, - UNDERSCORE, - WHITESPACE, - INT_NUMBER, - FLOAT_NUMBER, SEMI, COMMA, - DOT, - DOTDOT, - DOTDOTDOT, - DOTDOTEQ, L_PAREN, R_PAREN, L_CURLY, @@ -29,14 +19,33 @@ pub enum SyntaxKind { POUND, TILDE, QUESTION, + DOLLAR, + AMPERSAND, + PIPE, + PLUS, + STAR, + SLASH, + CARET, + PERCENT, + DOT, + DOTDOT, + DOTDOTDOT, + DOTDOTEQ, COLON, COLONCOLON, - DOLLAR, EQ, EQEQ, FAT_ARROW, - NEQ, EXCL, + NEQ, + MINUS, + THIN_ARROW, + ERROR, + IDENT, + UNDERSCORE, + WHITESPACE, + INT_NUMBER, + FLOAT_NUMBER, LIFETIME, CHAR, BYTE, @@ -44,15 +53,6 @@ pub enum SyntaxKind { RAW_STRING, BYTE_STRING, RAW_BYTE_STRING, - PLUS, - MINUS, - STAR, - SLASH, - CARET, - PERCENT, - AMPERSAND, - PIPE, - THIN_ARROW, COMMENT, DOC_COMMENT, SHEBANG, @@ -151,18 +151,8 @@ use self::SyntaxKind::*; impl SyntaxKind { pub(crate) fn info(self) -> &'static SyntaxInfo { match self { - ERROR => &SyntaxInfo { name: "ERROR" }, - IDENT => &SyntaxInfo { name: "IDENT" }, - UNDERSCORE => &SyntaxInfo { name: "UNDERSCORE" }, - WHITESPACE => &SyntaxInfo { name: "WHITESPACE" }, - INT_NUMBER => &SyntaxInfo { name: "INT_NUMBER" }, - FLOAT_NUMBER => &SyntaxInfo { name: "FLOAT_NUMBER" }, SEMI => &SyntaxInfo { name: "SEMI" }, COMMA => &SyntaxInfo { name: "COMMA" }, - DOT => &SyntaxInfo { name: "DOT" }, - DOTDOT => &SyntaxInfo { name: "DOTDOT" }, - DOTDOTDOT => &SyntaxInfo { name: "DOTDOTDOT" }, - DOTDOTEQ => &SyntaxInfo { name: "DOTDOTEQ" }, L_PAREN => &SyntaxInfo { name: "L_PAREN" }, R_PAREN => &SyntaxInfo { name: "R_PAREN" }, L_CURLY => &SyntaxInfo { name: "L_CURLY" }, @@ -175,14 +165,33 @@ impl SyntaxKind { POUND => &SyntaxInfo { name: "POUND" }, TILDE => &SyntaxInfo { name: "TILDE" }, QUESTION => &SyntaxInfo { name: "QUESTION" }, + DOLLAR => &SyntaxInfo { name: "DOLLAR" }, + AMPERSAND => &SyntaxInfo { name: "AMPERSAND" }, + PIPE => &SyntaxInfo { name: "PIPE" }, + PLUS => &SyntaxInfo { name: "PLUS" }, + STAR => &SyntaxInfo { name: "STAR" }, + SLASH => &SyntaxInfo { name: "SLASH" }, + CARET => &SyntaxInfo { name: "CARET" }, + PERCENT => &SyntaxInfo { name: "PERCENT" }, + DOT => &SyntaxInfo { name: "DOT" }, + DOTDOT => &SyntaxInfo { name: "DOTDOT" }, + DOTDOTDOT => &SyntaxInfo { name: "DOTDOTDOT" }, + DOTDOTEQ => &SyntaxInfo { name: "DOTDOTEQ" }, COLON => &SyntaxInfo { name: "COLON" }, COLONCOLON => &SyntaxInfo { name: "COLONCOLON" }, - DOLLAR => &SyntaxInfo { name: "DOLLAR" }, EQ => &SyntaxInfo { name: "EQ" }, EQEQ => &SyntaxInfo { name: "EQEQ" }, FAT_ARROW => &SyntaxInfo { name: "FAT_ARROW" }, - NEQ => &SyntaxInfo { name: "NEQ" }, EXCL => &SyntaxInfo { name: "EXCL" }, + NEQ => &SyntaxInfo { name: "NEQ" }, + MINUS => &SyntaxInfo { name: "MINUS" }, + THIN_ARROW => &SyntaxInfo { name: "THIN_ARROW" }, + ERROR => &SyntaxInfo { name: "ERROR" }, + IDENT => &SyntaxInfo { name: "IDENT" }, + UNDERSCORE => &SyntaxInfo { name: "UNDERSCORE" }, + WHITESPACE => &SyntaxInfo { name: "WHITESPACE" }, + INT_NUMBER => &SyntaxInfo { name: "INT_NUMBER" }, + FLOAT_NUMBER => &SyntaxInfo { name: "FLOAT_NUMBER" }, LIFETIME => &SyntaxInfo { name: "LIFETIME" }, CHAR => &SyntaxInfo { name: "CHAR" }, BYTE => &SyntaxInfo { name: "BYTE" }, @@ -190,15 +199,6 @@ impl SyntaxKind { RAW_STRING => &SyntaxInfo { name: "RAW_STRING" }, BYTE_STRING => &SyntaxInfo { name: "BYTE_STRING" }, RAW_BYTE_STRING => &SyntaxInfo { name: "RAW_BYTE_STRING" }, - PLUS => &SyntaxInfo { name: "PLUS" }, - MINUS => &SyntaxInfo { name: "MINUS" }, - STAR => &SyntaxInfo { name: "STAR" }, - SLASH => &SyntaxInfo { name: "SLASH" }, - CARET => &SyntaxInfo { name: "CARET" }, - PERCENT => &SyntaxInfo { name: "PERCENT" }, - AMPERSAND => &SyntaxInfo { name: "AMPERSAND" }, - PIPE => &SyntaxInfo { name: "PIPE" }, - THIN_ARROW => &SyntaxInfo { name: "THIN_ARROW" }, COMMENT => &SyntaxInfo { name: "COMMENT" }, DOC_COMMENT => &SyntaxInfo { name: "DOC_COMMENT" }, SHEBANG => &SyntaxInfo { name: "SHEBANG" }, @@ -325,5 +325,34 @@ impl SyntaxKind { }; Some(kw) } + + pub(crate) fn from_char(c: char) -> Option { + let tok = match c { + ';' => SEMI, + ',' => COMMA, + '(' => L_PAREN, + ')' => R_PAREN, + '{' => L_CURLY, + '}' => R_CURLY, + '[' => L_BRACK, + ']' => R_BRACK, + '<' => L_ANGLE, + '>' => R_ANGLE, + '@' => AT, + '#' => POUND, + '~' => TILDE, + '?' => QUESTION, + '$' => DOLLAR, + '&' => AMPERSAND, + '|' => PIPE, + '+' => PLUS, + '*' => STAR, + '/' => SLASH, + '^' => CARET, + '%' => PERCENT, + _ => return None, + }; + Some(tok) + } } diff --git a/src/syntax_kinds/generated.rs.tera b/src/syntax_kinds/generated.rs.tera index aa672d89a..21e471b71 100644 --- a/src/syntax_kinds/generated.rs.tera +++ b/src/syntax_kinds/generated.rs.tera @@ -5,7 +5,13 @@ use super::SyntaxInfo; /// The kind of syntax node, e.g. `IDENT`, `USE_KW`, or `STRUCT_DEF`. #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum SyntaxKind { -{%- for t in tokens %} +{%- for t in single_byte_tokens %} + {{t.1}}, +{%- endfor -%} +{% for t in multi_byte_tokens %} + {{t.1}}, +{%- endfor -%} +{% for t in tokens %} {{t}}, {%- endfor -%} {% for kw in keywords %} @@ -29,7 +35,13 @@ use self::SyntaxKind::*; impl SyntaxKind { pub(crate) fn info(self) -> &'static SyntaxInfo { match self { -{%- for t in tokens %} +{%- for t in single_byte_tokens %} + {{t.1}} => &SyntaxInfo { name: "{{t.1}}" }, +{%- endfor -%} +{% for t in multi_byte_tokens %} + {{t.1}} => &SyntaxInfo { name: "{{t.1}}" }, +{%- endfor -%} +{% for t in tokens %} {{t}} => &SyntaxInfo { name: "{{t}}" }, {%- endfor -%} {% for kw in keywords %} @@ -55,5 +67,15 @@ impl SyntaxKind { }; Some(kw) } + + pub(crate) fn from_char(c: char) -> Option { + let tok = match c { +{%- for t in single_byte_tokens %} + '{{t.0}}' => {{t.1}}, +{%- endfor %} + _ => return None, + }; + Some(tok) + } } -- cgit v1.2.3