diff options
author | Aleksey Kladov <[email protected]> | 2017-12-30 13:30:37 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2017-12-30 13:30:44 +0000 |
commit | fad3e50987311a3c42a45bd3d9dbcf7c7a77e544 (patch) | |
tree | b5f1d262e5ecb1226dc73780db50ee3eebe66ff8 | |
parent | 3e91e8b77db443775eea2ccd40b0cf1e27dc77d8 (diff) |
Lexer: symbols
-rw-r--r-- | grammar.ron | 19 | ||||
-rw-r--r-- | src/lexer/mod.rs | 41 | ||||
-rw-r--r-- | src/syntax_kinds.rs | 40 | ||||
-rw-r--r-- | tests/data/lexer/0004_number.txt | 11 | ||||
-rw-r--r-- | tests/data/lexer/0005_symbols.rs | 3 | ||||
-rw-r--r-- | tests/data/lexer/0005_symbols.txt | 37 |
6 files changed, 144 insertions, 7 deletions
diff --git a/grammar.ron b/grammar.ron index a86fe693f..56617f757 100644 --- a/grammar.ron +++ b/grammar.ron | |||
@@ -6,5 +6,24 @@ Grammar( | |||
6 | "WHITESPACE", | 6 | "WHITESPACE", |
7 | "INT_NUMBER", | 7 | "INT_NUMBER", |
8 | "FLOAT_NUMBER", | 8 | "FLOAT_NUMBER", |
9 | "SEMI", | ||
10 | "COMMA", | ||
11 | "DOT", | ||
12 | "DOTDOT", | ||
13 | "DOTDOTDOT", | ||
14 | "DOTDOTEQ", | ||
15 | "L_PAREN", | ||
16 | "R_PAREN", | ||
17 | "L_CURLY", | ||
18 | "R_CURLY", | ||
19 | "L_BRACK", | ||
20 | "R_BRACK", | ||
21 | "AT", | ||
22 | "POUND", | ||
23 | "TILDE", | ||
24 | "QUESTION", | ||
25 | "COLON", | ||
26 | "COLONCOLON", | ||
27 | "DOLLAR", | ||
9 | ] | 28 | ] |
10 | ) \ No newline at end of file | 29 | ) \ No newline at end of file |
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index e60dbbe8e..d2d4aaa22 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs | |||
@@ -37,6 +37,47 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
37 | return scan_number(c, ptr); | 37 | return scan_number(c, ptr); |
38 | } | 38 | } |
39 | 39 | ||
40 | // One-byte tokens. | ||
41 | match c { | ||
42 | ';' => return SEMI, | ||
43 | ',' => return COMMA, | ||
44 | '(' => return L_PAREN, | ||
45 | ')' => return R_PAREN, | ||
46 | '{' => return L_CURLY, | ||
47 | '}' => return R_CURLY, | ||
48 | '[' => return L_BRACK, | ||
49 | ']' => return R_BRACK, | ||
50 | '@' => return AT, | ||
51 | '#' => return POUND, | ||
52 | '~' => return TILDE, | ||
53 | '?' => return QUESTION, | ||
54 | '$' => return DOLLAR, | ||
55 | '.' => return match (ptr.next(), ptr.nnext()) { | ||
56 | (Some('.'), Some('.')) => { | ||
57 | ptr.bump(); | ||
58 | ptr.bump(); | ||
59 | DOTDOTDOT | ||
60 | }, | ||
61 | (Some('.'), Some('=')) => { | ||
62 | ptr.bump(); | ||
63 | ptr.bump(); | ||
64 | DOTDOTEQ | ||
65 | }, | ||
66 | (Some('.'), _) => { | ||
67 | ptr.bump(); | ||
68 | DOTDOT | ||
69 | }, | ||
70 | _ => DOT | ||
71 | }, | ||
72 | ':' => return match ptr.next() { | ||
73 | Some(':') => { | ||
74 | ptr.bump(); | ||
75 | COLONCOLON | ||
76 | } | ||
77 | _ => COLON | ||
78 | }, | ||
79 | _ => (), | ||
80 | } | ||
40 | ERROR | 81 | ERROR |
41 | } | 82 | } |
42 | 83 | ||
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs index bd1265bde..6982cba95 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds.rs | |||
@@ -7,14 +7,52 @@ pub const UNDERSCORE: SyntaxKind = SyntaxKind(2); | |||
7 | pub const WHITESPACE: SyntaxKind = SyntaxKind(3); | 7 | pub const WHITESPACE: SyntaxKind = SyntaxKind(3); |
8 | pub const INT_NUMBER: SyntaxKind = SyntaxKind(4); | 8 | pub const INT_NUMBER: SyntaxKind = SyntaxKind(4); |
9 | pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5); | 9 | pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5); |
10 | pub const SEMI: SyntaxKind = SyntaxKind(6); | ||
11 | pub const COMMA: SyntaxKind = SyntaxKind(7); | ||
12 | pub const DOT: SyntaxKind = SyntaxKind(8); | ||
13 | pub const DOTDOT: SyntaxKind = SyntaxKind(9); | ||
14 | pub const DOTDOTDOT: SyntaxKind = SyntaxKind(10); | ||
15 | pub const DOTDOTEQ: SyntaxKind = SyntaxKind(11); | ||
16 | pub const L_PAREN: SyntaxKind = SyntaxKind(12); | ||
17 | pub const R_PAREN: SyntaxKind = SyntaxKind(13); | ||
18 | pub const L_CURLY: SyntaxKind = SyntaxKind(14); | ||
19 | pub const R_CURLY: SyntaxKind = SyntaxKind(15); | ||
20 | pub const L_BRACK: SyntaxKind = SyntaxKind(16); | ||
21 | pub const R_BRACK: SyntaxKind = SyntaxKind(17); | ||
22 | pub const AT: SyntaxKind = SyntaxKind(18); | ||
23 | pub const POUND: SyntaxKind = SyntaxKind(19); | ||
24 | pub const TILDE: SyntaxKind = SyntaxKind(20); | ||
25 | pub const QUESTION: SyntaxKind = SyntaxKind(21); | ||
26 | pub const COLON: SyntaxKind = SyntaxKind(22); | ||
27 | pub const COLONCOLON: SyntaxKind = SyntaxKind(23); | ||
28 | pub const DOLLAR: SyntaxKind = SyntaxKind(24); | ||
10 | 29 | ||
11 | static INFOS: [SyntaxInfo; 6] = [ | 30 | static INFOS: [SyntaxInfo; 25] = [ |
12 | SyntaxInfo { name: "ERROR" }, | 31 | SyntaxInfo { name: "ERROR" }, |
13 | SyntaxInfo { name: "IDENT" }, | 32 | SyntaxInfo { name: "IDENT" }, |
14 | SyntaxInfo { name: "UNDERSCORE" }, | 33 | SyntaxInfo { name: "UNDERSCORE" }, |
15 | SyntaxInfo { name: "WHITESPACE" }, | 34 | SyntaxInfo { name: "WHITESPACE" }, |
16 | SyntaxInfo { name: "INT_NUMBER" }, | 35 | SyntaxInfo { name: "INT_NUMBER" }, |
17 | SyntaxInfo { name: "FLOAT_NUMBER" }, | 36 | SyntaxInfo { name: "FLOAT_NUMBER" }, |
37 | SyntaxInfo { name: "SEMI" }, | ||
38 | SyntaxInfo { name: "COMMA" }, | ||
39 | SyntaxInfo { name: "DOT" }, | ||
40 | SyntaxInfo { name: "DOTDOT" }, | ||
41 | SyntaxInfo { name: "DOTDOTDOT" }, | ||
42 | SyntaxInfo { name: "DOTDOTEQ" }, | ||
43 | SyntaxInfo { name: "L_PAREN" }, | ||
44 | SyntaxInfo { name: "R_PAREN" }, | ||
45 | SyntaxInfo { name: "L_CURLY" }, | ||
46 | SyntaxInfo { name: "R_CURLY" }, | ||
47 | SyntaxInfo { name: "L_BRACK" }, | ||
48 | SyntaxInfo { name: "R_BRACK" }, | ||
49 | SyntaxInfo { name: "AT" }, | ||
50 | SyntaxInfo { name: "POUND" }, | ||
51 | SyntaxInfo { name: "TILDE" }, | ||
52 | SyntaxInfo { name: "QUESTION" }, | ||
53 | SyntaxInfo { name: "COLON" }, | ||
54 | SyntaxInfo { name: "COLONCOLON" }, | ||
55 | SyntaxInfo { name: "DOLLAR" }, | ||
18 | ]; | 56 | ]; |
19 | 57 | ||
20 | pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { | 58 | pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { |
diff --git a/tests/data/lexer/0004_number.txt b/tests/data/lexer/0004_number.txt index 94fe0302d..ede39aa99 100644 --- a/tests/data/lexer/0004_number.txt +++ b/tests/data/lexer/0004_number.txt | |||
@@ -38,22 +38,21 @@ WHITESPACE 1 " " | |||
38 | INT_NUMBER 6 "0E1279" | 38 | INT_NUMBER 6 "0E1279" |
39 | WHITESPACE 1 "\n" | 39 | WHITESPACE 1 "\n" |
40 | INT_NUMBER 1 "0" | 40 | INT_NUMBER 1 "0" |
41 | ERROR 1 "." | 41 | DOTDOT 2 ".." |
42 | ERROR 1 "." | ||
43 | INT_NUMBER 1 "2" | 42 | INT_NUMBER 1 "2" |
44 | WHITESPACE 1 "\n" | 43 | WHITESPACE 1 "\n" |
45 | INT_NUMBER 1 "0" | 44 | INT_NUMBER 1 "0" |
46 | ERROR 1 "." | 45 | DOT 1 "." |
47 | IDENT 3 "foo" | 46 | IDENT 3 "foo" |
48 | ERROR 1 "(" | 47 | L_PAREN 1 "(" |
49 | ERROR 1 ")" | 48 | R_PAREN 1 ")" |
50 | WHITESPACE 1 "\n" | 49 | WHITESPACE 1 "\n" |
51 | INT_NUMBER 2 "0e" | 50 | INT_NUMBER 2 "0e" |
52 | ERROR 1 "+" | 51 | ERROR 1 "+" |
53 | INT_NUMBER 1 "1" | 52 | INT_NUMBER 1 "1" |
54 | WHITESPACE 1 "\n" | 53 | WHITESPACE 1 "\n" |
55 | INT_NUMBER 1 "0" | 54 | INT_NUMBER 1 "0" |
56 | ERROR 1 "." | 55 | DOT 1 "." |
57 | IDENT 1 "e" | 56 | IDENT 1 "e" |
58 | ERROR 1 "+" | 57 | ERROR 1 "+" |
59 | INT_NUMBER 1 "1" | 58 | INT_NUMBER 1 "1" |
diff --git a/tests/data/lexer/0005_symbols.rs b/tests/data/lexer/0005_symbols.rs new file mode 100644 index 000000000..79747d5e1 --- /dev/null +++ b/tests/data/lexer/0005_symbols.rs | |||
@@ -0,0 +1,3 @@ | |||
1 | ; , ( ) { } [ ] @ # ~ ? $ | ||
2 | . .. ... ..= | ||
3 | : :: \ No newline at end of file | ||
diff --git a/tests/data/lexer/0005_symbols.txt b/tests/data/lexer/0005_symbols.txt new file mode 100644 index 000000000..c845d2dff --- /dev/null +++ b/tests/data/lexer/0005_symbols.txt | |||
@@ -0,0 +1,37 @@ | |||
1 | SEMI 1 ";" | ||
2 | WHITESPACE 1 " " | ||
3 | COMMA 1 "," | ||
4 | WHITESPACE 1 " " | ||
5 | L_PAREN 1 "(" | ||
6 | WHITESPACE 1 " " | ||
7 | R_PAREN 1 ")" | ||
8 | WHITESPACE 1 " " | ||
9 | L_CURLY 1 "{" | ||
10 | WHITESPACE 1 " " | ||
11 | R_CURLY 1 "}" | ||
12 | WHITESPACE 1 " " | ||
13 | L_BRACK 1 "[" | ||
14 | WHITESPACE 1 " " | ||
15 | R_BRACK 1 "]" | ||
16 | WHITESPACE 1 " " | ||
17 | AT 1 "@" | ||
18 | WHITESPACE 1 " " | ||
19 | POUND 1 "#" | ||
20 | WHITESPACE 1 " " | ||
21 | TILDE 1 "~" | ||
22 | WHITESPACE 1 " " | ||
23 | QUESTION 1 "?" | ||
24 | WHITESPACE 1 " " | ||
25 | DOLLAR 1 "$" | ||
26 | WHITESPACE 1 "\n" | ||
27 | DOT 1 "." | ||
28 | WHITESPACE 1 " " | ||
29 | DOTDOT 2 ".." | ||
30 | WHITESPACE 1 " " | ||
31 | DOTDOTDOT 3 "..." | ||
32 | WHITESPACE 1 " " | ||
33 | DOTDOTEQ 3 "..=" | ||
34 | WHITESPACE 1 "\n" | ||
35 | COLON 1 ":" | ||
36 | WHITESPACE 1 " " | ||
37 | COLONCOLON 2 "::" | ||