From fad3e50987311a3c42a45bd3d9dbcf7c7a77e544 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 30 Dec 2017 16:30:37 +0300 Subject: Lexer: symbols --- grammar.ron | 19 ++++++++++++++++++ src/lexer/mod.rs | 41 +++++++++++++++++++++++++++++++++++++++ src/syntax_kinds.rs | 40 +++++++++++++++++++++++++++++++++++++- tests/data/lexer/0004_number.txt | 11 +++++------ tests/data/lexer/0005_symbols.rs | 3 +++ tests/data/lexer/0005_symbols.txt | 37 +++++++++++++++++++++++++++++++++++ 6 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 tests/data/lexer/0005_symbols.rs create mode 100644 tests/data/lexer/0005_symbols.txt diff --git a/grammar.ron b/grammar.ron index a86fe693f..56617f757 100644 --- a/grammar.ron +++ b/grammar.ron @@ -6,5 +6,24 @@ Grammar( "WHITESPACE", "INT_NUMBER", "FLOAT_NUMBER", + "SEMI", + "COMMA", + "DOT", + "DOTDOT", + "DOTDOTDOT", + "DOTDOTEQ", + "L_PAREN", + "R_PAREN", + "L_CURLY", + "R_CURLY", + "L_BRACK", + "R_BRACK", + "AT", + "POUND", + "TILDE", + "QUESTION", + "COLON", + "COLONCOLON", + "DOLLAR", ] ) \ No newline at end of file diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index e60dbbe8e..d2d4aaa22 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -37,6 +37,47 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { return scan_number(c, ptr); } + // One-byte tokens. + match c { + ';' => return SEMI, + ',' => return COMMA, + '(' => return L_PAREN, + ')' => return R_PAREN, + '{' => return L_CURLY, + '}' => return R_CURLY, + '[' => return L_BRACK, + ']' => return R_BRACK, + '@' => return AT, + '#' => return POUND, + '~' => return TILDE, + '?' => return QUESTION, + '$' => return DOLLAR, + '.' => return match (ptr.next(), ptr.nnext()) { + (Some('.'), Some('.')) => { + ptr.bump(); + ptr.bump(); + DOTDOTDOT + }, + (Some('.'), Some('=')) => { + ptr.bump(); + ptr.bump(); + DOTDOTEQ + }, + (Some('.'), _) => { + ptr.bump(); + DOTDOT + }, + _ => DOT + }, + ':' => return match ptr.next() { + Some(':') => { + ptr.bump(); + COLONCOLON + } + _ => COLON + }, + _ => (), + } ERROR } diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs index bd1265bde..6982cba95 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds.rs @@ -7,14 +7,52 @@ pub const UNDERSCORE: SyntaxKind = SyntaxKind(2); pub const WHITESPACE: SyntaxKind = SyntaxKind(3); pub const INT_NUMBER: SyntaxKind = SyntaxKind(4); pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5); +pub const SEMI: SyntaxKind = SyntaxKind(6); +pub const COMMA: SyntaxKind = SyntaxKind(7); +pub const DOT: SyntaxKind = SyntaxKind(8); +pub const DOTDOT: SyntaxKind = SyntaxKind(9); +pub const DOTDOTDOT: SyntaxKind = SyntaxKind(10); +pub const DOTDOTEQ: SyntaxKind = SyntaxKind(11); +pub const L_PAREN: SyntaxKind = SyntaxKind(12); +pub const R_PAREN: SyntaxKind = SyntaxKind(13); +pub const L_CURLY: SyntaxKind = SyntaxKind(14); +pub const R_CURLY: SyntaxKind = SyntaxKind(15); +pub const L_BRACK: SyntaxKind = SyntaxKind(16); +pub const R_BRACK: SyntaxKind = SyntaxKind(17); +pub const AT: SyntaxKind = SyntaxKind(18); +pub const POUND: SyntaxKind = SyntaxKind(19); +pub const TILDE: SyntaxKind = SyntaxKind(20); +pub const QUESTION: SyntaxKind = SyntaxKind(21); +pub const COLON: SyntaxKind = SyntaxKind(22); +pub const COLONCOLON: SyntaxKind = SyntaxKind(23); +pub const DOLLAR: SyntaxKind = SyntaxKind(24); -static INFOS: [SyntaxInfo; 6] = [ +static INFOS: [SyntaxInfo; 25] = [ SyntaxInfo { name: "ERROR" }, SyntaxInfo { name: "IDENT" }, SyntaxInfo { name: "UNDERSCORE" }, SyntaxInfo { name: "WHITESPACE" }, SyntaxInfo { name: "INT_NUMBER" }, SyntaxInfo { name: "FLOAT_NUMBER" }, + SyntaxInfo { name: "SEMI" }, + SyntaxInfo { name: "COMMA" }, + SyntaxInfo { name: "DOT" }, + SyntaxInfo { name: "DOTDOT" }, + SyntaxInfo { name: "DOTDOTDOT" }, + SyntaxInfo { name: "DOTDOTEQ" }, + SyntaxInfo { name: "L_PAREN" }, + SyntaxInfo { name: "R_PAREN" }, + SyntaxInfo { name: "L_CURLY" }, + SyntaxInfo { name: "R_CURLY" }, + SyntaxInfo { name: "L_BRACK" }, + SyntaxInfo { name: "R_BRACK" }, + SyntaxInfo { name: "AT" }, + SyntaxInfo { name: "POUND" }, + SyntaxInfo { name: "TILDE" }, + SyntaxInfo { name: "QUESTION" }, + SyntaxInfo { name: "COLON" }, + SyntaxInfo { name: "COLONCOLON" }, + SyntaxInfo { name: "DOLLAR" }, ]; pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { diff --git a/tests/data/lexer/0004_number.txt b/tests/data/lexer/0004_number.txt index 94fe0302d..ede39aa99 100644 --- a/tests/data/lexer/0004_number.txt +++ b/tests/data/lexer/0004_number.txt @@ -38,22 +38,21 @@ WHITESPACE 1 " " INT_NUMBER 6 "0E1279" WHITESPACE 1 "\n" INT_NUMBER 1 "0" -ERROR 1 "." -ERROR 1 "." +DOTDOT 2 ".." INT_NUMBER 1 "2" WHITESPACE 1 "\n" INT_NUMBER 1 "0" -ERROR 1 "." +DOT 1 "." IDENT 3 "foo" -ERROR 1 "(" -ERROR 1 ")" +L_PAREN 1 "(" +R_PAREN 1 ")" WHITESPACE 1 "\n" INT_NUMBER 2 "0e" ERROR 1 "+" INT_NUMBER 1 "1" WHITESPACE 1 "\n" INT_NUMBER 1 "0" -ERROR 1 "." +DOT 1 "." IDENT 1 "e" ERROR 1 "+" INT_NUMBER 1 "1" diff --git a/tests/data/lexer/0005_symbols.rs b/tests/data/lexer/0005_symbols.rs new file mode 100644 index 000000000..79747d5e1 --- /dev/null +++ b/tests/data/lexer/0005_symbols.rs @@ -0,0 +1,3 @@ +; , ( ) { } [ ] @ # ~ ? $ +. .. ... ..= +: :: \ No newline at end of file diff --git a/tests/data/lexer/0005_symbols.txt b/tests/data/lexer/0005_symbols.txt new file mode 100644 index 000000000..c845d2dff --- /dev/null +++ b/tests/data/lexer/0005_symbols.txt @@ -0,0 +1,37 @@ +SEMI 1 ";" +WHITESPACE 1 " " +COMMA 1 "," +WHITESPACE 1 " " +L_PAREN 1 "(" +WHITESPACE 1 " " +R_PAREN 1 ")" +WHITESPACE 1 " " +L_CURLY 1 "{" +WHITESPACE 1 " " +R_CURLY 1 "}" +WHITESPACE 1 " " +L_BRACK 1 "[" +WHITESPACE 1 " " +R_BRACK 1 "]" +WHITESPACE 1 " " +AT 1 "@" +WHITESPACE 1 " " +POUND 1 "#" +WHITESPACE 1 " " +TILDE 1 "~" +WHITESPACE 1 " " +QUESTION 1 "?" +WHITESPACE 1 " " +DOLLAR 1 "$" +WHITESPACE 1 "\n" +DOT 1 "." +WHITESPACE 1 " " +DOTDOT 2 ".." +WHITESPACE 1 " " +DOTDOTDOT 3 "..." +WHITESPACE 1 " " +DOTDOTEQ 3 "..=" +WHITESPACE 1 "\n" +COLON 1 ":" +WHITESPACE 1 " " +COLONCOLON 2 "::" -- cgit v1.2.3