diff options
author | Aleksey Kladov <[email protected]> | 2017-12-31 07:41:42 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2017-12-31 07:41:42 +0000 |
commit | d6a922459ed3ebc77ba5d79cd65144078f43e321 (patch) | |
tree | 2e8082efef7cd018e06c38740828fa6a3e1c0a8e | |
parent | 492f6e6b1c8d062c6732f9aa3360c14708bb1452 (diff) |
Lexer: basic chars & lifetimes
-rw-r--r-- | grammar.ron | 2 | ||||
-rw-r--r-- | src/lexer/mod.rs | 45 | ||||
-rw-r--r-- | src/lexer/ptr.rs | 4 | ||||
-rw-r--r-- | src/syntax_kinds.rs | 6 | ||||
-rw-r--r-- | validation.md | 1 |
5 files changed, 56 insertions, 2 deletions
diff --git a/grammar.ron b/grammar.ron index b707248f3..995d71f81 100644 --- a/grammar.ron +++ b/grammar.ron | |||
@@ -32,5 +32,7 @@ Grammar( | |||
32 | "FAT_ARROW", | 32 | "FAT_ARROW", |
33 | "NEQ", | 33 | "NEQ", |
34 | "NOT", | 34 | "NOT", |
35 | "CHAR", | ||
36 | "LIFETIME", | ||
35 | ] | 37 | ] |
36 | ) \ No newline at end of file | 38 | ) \ No newline at end of file |
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 24c14add0..3f277bd2b 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs | |||
@@ -34,7 +34,9 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
34 | } | 34 | } |
35 | 35 | ||
36 | if is_dec_digit(c) { | 36 | if is_dec_digit(c) { |
37 | return scan_number(c, ptr); | 37 | let kind = scan_number(c, ptr); |
38 | scan_literal_suffix(ptr); | ||
39 | return kind; | ||
38 | } | 40 | } |
39 | 41 | ||
40 | // One-byte tokens. | 42 | // One-byte tokens. |
@@ -98,6 +100,8 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
98 | } | 100 | } |
99 | _ => NOT, | 101 | _ => NOT, |
100 | }, | 102 | }, |
103 | |||
104 | // '\'' => scan_char_or_lifetime(ptr), | ||
101 | _ => (), | 105 | _ => (), |
102 | } | 106 | } |
103 | ERROR | 107 | ERROR |
@@ -116,6 +120,45 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
116 | IDENT | 120 | IDENT |
117 | } | 121 | } |
118 | 122 | ||
123 | fn scan_char_or_lifetime(ptr: &mut Ptr) -> SyntaxKind { | ||
124 | // Either a character constant 'a' OR a lifetime name 'abc | ||
125 | let c = match ptr.bump() { | ||
126 | Some(c) => c, | ||
127 | None => return CHAR, // TODO: error reporting is upper in the stack | ||
128 | }; | ||
129 | |||
130 | // If the character is an ident start not followed by another single | ||
131 | // quote, then this is a lifetime name: | ||
132 | if is_ident_start(c) && !ptr.next_is('\'') { | ||
133 | while ptr.next_is_p(is_ident_continue) { | ||
134 | ptr.bump(); | ||
135 | } | ||
136 | |||
137 | // lifetimes shouldn't end with a single quote | ||
138 | // if we find one, then this is an invalid character literal | ||
139 | if ptr.next_is('\'') { | ||
140 | ptr.bump(); | ||
141 | return CHAR; | ||
142 | } | ||
143 | return LIFETIME; | ||
144 | } | ||
145 | scan_char_or_byte(ptr); | ||
146 | if !ptr.next_is('\'') { | ||
147 | return CHAR; // TODO: error reporting | ||
148 | } | ||
149 | ptr.bump(); | ||
150 | scan_literal_suffix(ptr); | ||
151 | CHAR | ||
152 | } | ||
153 | |||
154 | fn scan_literal_suffix(ptr: &mut Ptr) { | ||
155 | |||
156 | } | ||
157 | |||
158 | fn scan_char_or_byte(ptr: &mut Ptr) { | ||
159 | ptr.bump(); | ||
160 | } | ||
161 | |||
119 | fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { | 162 | fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { |
120 | match (c, c1, c2) { | 163 | match (c, c1, c2) { |
121 | ('r', Some('"'), _) | | 164 | ('r', Some('"'), _) | |
diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs index b380117e6..2f759119a 100644 --- a/src/lexer/ptr.rs +++ b/src/lexer/ptr.rs | |||
@@ -34,6 +34,10 @@ impl<'s> Ptr<'s> { | |||
34 | self.nnext() == Some(c) | 34 | self.nnext() == Some(c) |
35 | } | 35 | } |
36 | 36 | ||
37 | pub fn next_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { | ||
38 | self.next().map(p) == Some(true) | ||
39 | } | ||
40 | |||
37 | pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { | 41 | pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { |
38 | self.nnext().map(p) == Some(true) | 42 | self.nnext().map(p) == Some(true) |
39 | } | 43 | } |
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs index 040ac1257..4c023757b 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds.rs | |||
@@ -33,8 +33,10 @@ pub const EQEQ: SyntaxKind = SyntaxKind(28); | |||
33 | pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); | 33 | pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); |
34 | pub const NEQ: SyntaxKind = SyntaxKind(30); | 34 | pub const NEQ: SyntaxKind = SyntaxKind(30); |
35 | pub const NOT: SyntaxKind = SyntaxKind(31); | 35 | pub const NOT: SyntaxKind = SyntaxKind(31); |
36 | pub const CHAR: SyntaxKind = SyntaxKind(32); | ||
37 | pub const LIFETIME: SyntaxKind = SyntaxKind(33); | ||
36 | 38 | ||
37 | static INFOS: [SyntaxInfo; 32] = [ | 39 | static INFOS: [SyntaxInfo; 34] = [ |
38 | SyntaxInfo { name: "ERROR" }, | 40 | SyntaxInfo { name: "ERROR" }, |
39 | SyntaxInfo { name: "IDENT" }, | 41 | SyntaxInfo { name: "IDENT" }, |
40 | SyntaxInfo { name: "UNDERSCORE" }, | 42 | SyntaxInfo { name: "UNDERSCORE" }, |
@@ -67,6 +69,8 @@ static INFOS: [SyntaxInfo; 32] = [ | |||
67 | SyntaxInfo { name: "FAT_ARROW" }, | 69 | SyntaxInfo { name: "FAT_ARROW" }, |
68 | SyntaxInfo { name: "NEQ" }, | 70 | SyntaxInfo { name: "NEQ" }, |
69 | SyntaxInfo { name: "NOT" }, | 71 | SyntaxInfo { name: "NOT" }, |
72 | SyntaxInfo { name: "CHAR" }, | ||
73 | SyntaxInfo { name: "LIFETIME" }, | ||
70 | ]; | 74 | ]; |
71 | 75 | ||
72 | pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { | 76 | pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { |
diff --git a/validation.md b/validation.md index b21ffebd5..a38b4a96e 100644 --- a/validation.md +++ b/validation.md | |||
@@ -4,4 +4,5 @@ Fixmes: | |||
4 | * Add more thorough tests for idents for XID_Start & XID_Continue | 4 | * Add more thorough tests for idents for XID_Start & XID_Continue |
5 | * Validate that float and integer literals use digits only of the appropriate | 5 | * Validate that float and integer literals use digits only of the appropriate |
6 | base, and are in range | 6 | base, and are in range |
7 | * Validation for unclosed char literal | ||
7 | 8 | ||