aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2017-12-30 13:30:37 +0000
committerAleksey Kladov <[email protected]>2017-12-30 13:30:44 +0000
commitfad3e50987311a3c42a45bd3d9dbcf7c7a77e544 (patch)
treeb5f1d262e5ecb1226dc73780db50ee3eebe66ff8
parent3e91e8b77db443775eea2ccd40b0cf1e27dc77d8 (diff)
Lexer: symbols
-rw-r--r--grammar.ron19
-rw-r--r--src/lexer/mod.rs41
-rw-r--r--src/syntax_kinds.rs40
-rw-r--r--tests/data/lexer/0004_number.txt11
-rw-r--r--tests/data/lexer/0005_symbols.rs3
-rw-r--r--tests/data/lexer/0005_symbols.txt37
6 files changed, 144 insertions, 7 deletions
diff --git a/grammar.ron b/grammar.ron
index a86fe693f..56617f757 100644
--- a/grammar.ron
+++ b/grammar.ron
@@ -6,5 +6,24 @@ Grammar(
6 "WHITESPACE", 6 "WHITESPACE",
7 "INT_NUMBER", 7 "INT_NUMBER",
8 "FLOAT_NUMBER", 8 "FLOAT_NUMBER",
9 "SEMI",
10 "COMMA",
11 "DOT",
12 "DOTDOT",
13 "DOTDOTDOT",
14 "DOTDOTEQ",
15 "L_PAREN",
16 "R_PAREN",
17 "L_CURLY",
18 "R_CURLY",
19 "L_BRACK",
20 "R_BRACK",
21 "AT",
22 "POUND",
23 "TILDE",
24 "QUESTION",
25 "COLON",
26 "COLONCOLON",
27 "DOLLAR",
9 ] 28 ]
10) \ No newline at end of file 29) \ No newline at end of file
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index e60dbbe8e..d2d4aaa22 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -37,6 +37,47 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
37 return scan_number(c, ptr); 37 return scan_number(c, ptr);
38 } 38 }
39 39
40 // One-byte tokens.
41 match c {
42 ';' => return SEMI,
43 ',' => return COMMA,
44 '(' => return L_PAREN,
45 ')' => return R_PAREN,
46 '{' => return L_CURLY,
47 '}' => return R_CURLY,
48 '[' => return L_BRACK,
49 ']' => return R_BRACK,
50 '@' => return AT,
51 '#' => return POUND,
52 '~' => return TILDE,
53 '?' => return QUESTION,
54 '$' => return DOLLAR,
55 '.' => return match (ptr.next(), ptr.nnext()) {
56 (Some('.'), Some('.')) => {
57 ptr.bump();
58 ptr.bump();
59 DOTDOTDOT
60 },
61 (Some('.'), Some('=')) => {
62 ptr.bump();
63 ptr.bump();
64 DOTDOTEQ
65 },
66 (Some('.'), _) => {
67 ptr.bump();
68 DOTDOT
69 },
70 _ => DOT
71 },
72 ':' => return match ptr.next() {
73 Some(':') => {
74 ptr.bump();
75 COLONCOLON
76 }
77 _ => COLON
78 },
79 _ => (),
80 }
40 ERROR 81 ERROR
41} 82}
42 83
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs
index bd1265bde..6982cba95 100644
--- a/src/syntax_kinds.rs
+++ b/src/syntax_kinds.rs
@@ -7,14 +7,52 @@ pub const UNDERSCORE: SyntaxKind = SyntaxKind(2);
7pub const WHITESPACE: SyntaxKind = SyntaxKind(3); 7pub const WHITESPACE: SyntaxKind = SyntaxKind(3);
8pub const INT_NUMBER: SyntaxKind = SyntaxKind(4); 8pub const INT_NUMBER: SyntaxKind = SyntaxKind(4);
9pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5); 9pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5);
10pub const SEMI: SyntaxKind = SyntaxKind(6);
11pub const COMMA: SyntaxKind = SyntaxKind(7);
12pub const DOT: SyntaxKind = SyntaxKind(8);
13pub const DOTDOT: SyntaxKind = SyntaxKind(9);
14pub const DOTDOTDOT: SyntaxKind = SyntaxKind(10);
15pub const DOTDOTEQ: SyntaxKind = SyntaxKind(11);
16pub const L_PAREN: SyntaxKind = SyntaxKind(12);
17pub const R_PAREN: SyntaxKind = SyntaxKind(13);
18pub const L_CURLY: SyntaxKind = SyntaxKind(14);
19pub const R_CURLY: SyntaxKind = SyntaxKind(15);
20pub const L_BRACK: SyntaxKind = SyntaxKind(16);
21pub const R_BRACK: SyntaxKind = SyntaxKind(17);
22pub const AT: SyntaxKind = SyntaxKind(18);
23pub const POUND: SyntaxKind = SyntaxKind(19);
24pub const TILDE: SyntaxKind = SyntaxKind(20);
25pub const QUESTION: SyntaxKind = SyntaxKind(21);
26pub const COLON: SyntaxKind = SyntaxKind(22);
27pub const COLONCOLON: SyntaxKind = SyntaxKind(23);
28pub const DOLLAR: SyntaxKind = SyntaxKind(24);
10 29
11static INFOS: [SyntaxInfo; 6] = [ 30static INFOS: [SyntaxInfo; 25] = [
12 SyntaxInfo { name: "ERROR" }, 31 SyntaxInfo { name: "ERROR" },
13 SyntaxInfo { name: "IDENT" }, 32 SyntaxInfo { name: "IDENT" },
14 SyntaxInfo { name: "UNDERSCORE" }, 33 SyntaxInfo { name: "UNDERSCORE" },
15 SyntaxInfo { name: "WHITESPACE" }, 34 SyntaxInfo { name: "WHITESPACE" },
16 SyntaxInfo { name: "INT_NUMBER" }, 35 SyntaxInfo { name: "INT_NUMBER" },
17 SyntaxInfo { name: "FLOAT_NUMBER" }, 36 SyntaxInfo { name: "FLOAT_NUMBER" },
37 SyntaxInfo { name: "SEMI" },
38 SyntaxInfo { name: "COMMA" },
39 SyntaxInfo { name: "DOT" },
40 SyntaxInfo { name: "DOTDOT" },
41 SyntaxInfo { name: "DOTDOTDOT" },
42 SyntaxInfo { name: "DOTDOTEQ" },
43 SyntaxInfo { name: "L_PAREN" },
44 SyntaxInfo { name: "R_PAREN" },
45 SyntaxInfo { name: "L_CURLY" },
46 SyntaxInfo { name: "R_CURLY" },
47 SyntaxInfo { name: "L_BRACK" },
48 SyntaxInfo { name: "R_BRACK" },
49 SyntaxInfo { name: "AT" },
50 SyntaxInfo { name: "POUND" },
51 SyntaxInfo { name: "TILDE" },
52 SyntaxInfo { name: "QUESTION" },
53 SyntaxInfo { name: "COLON" },
54 SyntaxInfo { name: "COLONCOLON" },
55 SyntaxInfo { name: "DOLLAR" },
18]; 56];
19 57
20pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { 58pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
diff --git a/tests/data/lexer/0004_number.txt b/tests/data/lexer/0004_number.txt
index 94fe0302d..ede39aa99 100644
--- a/tests/data/lexer/0004_number.txt
+++ b/tests/data/lexer/0004_number.txt
@@ -38,22 +38,21 @@ WHITESPACE 1 " "
38INT_NUMBER 6 "0E1279" 38INT_NUMBER 6 "0E1279"
39WHITESPACE 1 "\n" 39WHITESPACE 1 "\n"
40INT_NUMBER 1 "0" 40INT_NUMBER 1 "0"
41ERROR 1 "." 41DOTDOT 2 ".."
42ERROR 1 "."
43INT_NUMBER 1 "2" 42INT_NUMBER 1 "2"
44WHITESPACE 1 "\n" 43WHITESPACE 1 "\n"
45INT_NUMBER 1 "0" 44INT_NUMBER 1 "0"
46ERROR 1 "." 45DOT 1 "."
47IDENT 3 "foo" 46IDENT 3 "foo"
48ERROR 1 "(" 47L_PAREN 1 "("
49ERROR 1 ")" 48R_PAREN 1 ")"
50WHITESPACE 1 "\n" 49WHITESPACE 1 "\n"
51INT_NUMBER 2 "0e" 50INT_NUMBER 2 "0e"
52ERROR 1 "+" 51ERROR 1 "+"
53INT_NUMBER 1 "1" 52INT_NUMBER 1 "1"
54WHITESPACE 1 "\n" 53WHITESPACE 1 "\n"
55INT_NUMBER 1 "0" 54INT_NUMBER 1 "0"
56ERROR 1 "." 55DOT 1 "."
57IDENT 1 "e" 56IDENT 1 "e"
58ERROR 1 "+" 57ERROR 1 "+"
59INT_NUMBER 1 "1" 58INT_NUMBER 1 "1"
diff --git a/tests/data/lexer/0005_symbols.rs b/tests/data/lexer/0005_symbols.rs
new file mode 100644
index 000000000..79747d5e1
--- /dev/null
+++ b/tests/data/lexer/0005_symbols.rs
@@ -0,0 +1,3 @@
1; , ( ) { } [ ] @ # ~ ? $
2. .. ... ..=
3: :: \ No newline at end of file
diff --git a/tests/data/lexer/0005_symbols.txt b/tests/data/lexer/0005_symbols.txt
new file mode 100644
index 000000000..c845d2dff
--- /dev/null
+++ b/tests/data/lexer/0005_symbols.txt
@@ -0,0 +1,37 @@
1SEMI 1 ";"
2WHITESPACE 1 " "
3COMMA 1 ","
4WHITESPACE 1 " "
5L_PAREN 1 "("
6WHITESPACE 1 " "
7R_PAREN 1 ")"
8WHITESPACE 1 " "
9L_CURLY 1 "{"
10WHITESPACE 1 " "
11R_CURLY 1 "}"
12WHITESPACE 1 " "
13L_BRACK 1 "["
14WHITESPACE 1 " "
15R_BRACK 1 "]"
16WHITESPACE 1 " "
17AT 1 "@"
18WHITESPACE 1 " "
19POUND 1 "#"
20WHITESPACE 1 " "
21TILDE 1 "~"
22WHITESPACE 1 " "
23QUESTION 1 "?"
24WHITESPACE 1 " "
25DOLLAR 1 "$"
26WHITESPACE 1 "\n"
27DOT 1 "."
28WHITESPACE 1 " "
29DOTDOT 2 ".."
30WHITESPACE 1 " "
31DOTDOTDOT 3 "..."
32WHITESPACE 1 " "
33DOTDOTEQ 3 "..="
34WHITESPACE 1 "\n"
35COLON 1 ":"
36WHITESPACE 1 " "
37COLONCOLON 2 "::"