aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2017-12-31 07:41:42 +0000
committerAleksey Kladov <[email protected]>2017-12-31 07:41:42 +0000
commitd6a922459ed3ebc77ba5d79cd65144078f43e321 (patch)
tree2e8082efef7cd018e06c38740828fa6a3e1c0a8e
parent492f6e6b1c8d062c6732f9aa3360c14708bb1452 (diff)
Lexer: basic chars & lifetimes
-rw-r--r--grammar.ron2
-rw-r--r--src/lexer/mod.rs45
-rw-r--r--src/lexer/ptr.rs4
-rw-r--r--src/syntax_kinds.rs6
-rw-r--r--validation.md1
5 files changed, 56 insertions, 2 deletions
diff --git a/grammar.ron b/grammar.ron
index b707248f3..995d71f81 100644
--- a/grammar.ron
+++ b/grammar.ron
@@ -32,5 +32,7 @@ Grammar(
32 "FAT_ARROW", 32 "FAT_ARROW",
33 "NEQ", 33 "NEQ",
34 "NOT", 34 "NOT",
35 "CHAR",
36 "LIFETIME",
35 ] 37 ]
36) \ No newline at end of file 38) \ No newline at end of file
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index 24c14add0..3f277bd2b 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -34,7 +34,9 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
34 } 34 }
35 35
36 if is_dec_digit(c) { 36 if is_dec_digit(c) {
37 return scan_number(c, ptr); 37 let kind = scan_number(c, ptr);
38 scan_literal_suffix(ptr);
39 return kind;
38 } 40 }
39 41
40 // One-byte tokens. 42 // One-byte tokens.
@@ -98,6 +100,8 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
98 } 100 }
99 _ => NOT, 101 _ => NOT,
100 }, 102 },
103
104 // '\'' => scan_char_or_lifetime(ptr),
101 _ => (), 105 _ => (),
102 } 106 }
103 ERROR 107 ERROR
@@ -116,6 +120,45 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
116 IDENT 120 IDENT
117} 121}
118 122
123fn scan_char_or_lifetime(ptr: &mut Ptr) -> SyntaxKind {
124 // Either a character constant 'a' OR a lifetime name 'abc
125 let c = match ptr.bump() {
126 Some(c) => c,
127 None => return CHAR, // TODO: error reporting is upper in the stack
128 };
129
130 // If the character is an ident start not followed by another single
131 // quote, then this is a lifetime name:
132 if is_ident_start(c) && !ptr.next_is('\'') {
133 while ptr.next_is_p(is_ident_continue) {
134 ptr.bump();
135 }
136
137 // lifetimes shouldn't end with a single quote
138 // if we find one, then this is an invalid character literal
139 if ptr.next_is('\'') {
140 ptr.bump();
141 return CHAR;
142 }
143 return LIFETIME;
144 }
145 scan_char_or_byte(ptr);
146 if !ptr.next_is('\'') {
147 return CHAR; // TODO: error reporting
148 }
149 ptr.bump();
150 scan_literal_suffix(ptr);
151 CHAR
152}
153
154fn scan_literal_suffix(ptr: &mut Ptr) {
155
156}
157
158fn scan_char_or_byte(ptr: &mut Ptr) {
159 ptr.bump();
160}
161
119fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { 162fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
120 match (c, c1, c2) { 163 match (c, c1, c2) {
121 ('r', Some('"'), _) | 164 ('r', Some('"'), _) |
diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs
index b380117e6..2f759119a 100644
--- a/src/lexer/ptr.rs
+++ b/src/lexer/ptr.rs
@@ -34,6 +34,10 @@ impl<'s> Ptr<'s> {
34 self.nnext() == Some(c) 34 self.nnext() == Some(c)
35 } 35 }
36 36
37 pub fn next_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
38 self.next().map(p) == Some(true)
39 }
40
37 pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { 41 pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
38 self.nnext().map(p) == Some(true) 42 self.nnext().map(p) == Some(true)
39 } 43 }
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs
index 040ac1257..4c023757b 100644
--- a/src/syntax_kinds.rs
+++ b/src/syntax_kinds.rs
@@ -33,8 +33,10 @@ pub const EQEQ: SyntaxKind = SyntaxKind(28);
33pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); 33pub const FAT_ARROW: SyntaxKind = SyntaxKind(29);
34pub const NEQ: SyntaxKind = SyntaxKind(30); 34pub const NEQ: SyntaxKind = SyntaxKind(30);
35pub const NOT: SyntaxKind = SyntaxKind(31); 35pub const NOT: SyntaxKind = SyntaxKind(31);
36pub const CHAR: SyntaxKind = SyntaxKind(32);
37pub const LIFETIME: SyntaxKind = SyntaxKind(33);
36 38
37static INFOS: [SyntaxInfo; 32] = [ 39static INFOS: [SyntaxInfo; 34] = [
38 SyntaxInfo { name: "ERROR" }, 40 SyntaxInfo { name: "ERROR" },
39 SyntaxInfo { name: "IDENT" }, 41 SyntaxInfo { name: "IDENT" },
40 SyntaxInfo { name: "UNDERSCORE" }, 42 SyntaxInfo { name: "UNDERSCORE" },
@@ -67,6 +69,8 @@ static INFOS: [SyntaxInfo; 32] = [
67 SyntaxInfo { name: "FAT_ARROW" }, 69 SyntaxInfo { name: "FAT_ARROW" },
68 SyntaxInfo { name: "NEQ" }, 70 SyntaxInfo { name: "NEQ" },
69 SyntaxInfo { name: "NOT" }, 71 SyntaxInfo { name: "NOT" },
72 SyntaxInfo { name: "CHAR" },
73 SyntaxInfo { name: "LIFETIME" },
70]; 74];
71 75
72pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { 76pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
diff --git a/validation.md b/validation.md
index b21ffebd5..a38b4a96e 100644
--- a/validation.md
+++ b/validation.md
@@ -4,4 +4,5 @@ Fixmes:
4* Add more thorough tests for idents for XID_Start & XID_Continue 4* Add more thorough tests for idents for XID_Start & XID_Continue
5* Validate that float and integer literals use digits only of the appropriate 5* Validate that float and integer literals use digits only of the appropriate
6 base, and are in range 6 base, and are in range
7* Validation for unclosed char literal
7 8