diff options
-rw-r--r-- | src/lexer/mod.rs | 72 | ||||
-rw-r--r-- | src/lexer/strings.rs | 65 |
2 files changed, 88 insertions, 49 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 320d59de5..ccc25b8c9 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs | |||
@@ -10,6 +10,9 @@ use self::classes::*; | |||
10 | mod numbers; | 10 | mod numbers; |
11 | use self::numbers::scan_number; | 11 | use self::numbers::scan_number; |
12 | 12 | ||
13 | mod strings; | ||
14 | use self::strings::{string_literal_start, scan_char, scan_byte_char_or_string}; | ||
15 | |||
13 | pub fn next_token(text: &str) -> Token { | 16 | pub fn next_token(text: &str) -> Token { |
14 | assert!(!text.is_empty()); | 17 | assert!(!text.is_empty()); |
15 | let mut ptr = Ptr::new(text); | 18 | let mut ptr = Ptr::new(text); |
@@ -101,7 +104,26 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
101 | _ => NOT, | 104 | _ => NOT, |
102 | }, | 105 | }, |
103 | 106 | ||
104 | '\'' => return scan_char_or_lifetime(ptr), | 107 | // If the character is an ident start not followed by another single |
108 | // quote, then this is a lifetime name: | ||
109 | '\'' => return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') { | ||
110 | ptr.bump(); | ||
111 | while ptr.next_is_p(is_ident_continue) { | ||
112 | ptr.bump(); | ||
113 | } | ||
114 | // lifetimes shouldn't end with a single quote | ||
115 | // if we find one, then this is an invalid character literal | ||
116 | if ptr.next_is('\'') { | ||
117 | ptr.bump(); | ||
118 | return CHAR; // TODO: error reporting | ||
119 | } | ||
120 | LIFETIME | ||
121 | } else { | ||
122 | scan_char(ptr); | ||
123 | scan_literal_suffix(ptr); | ||
124 | CHAR | ||
125 | }, | ||
126 | 'b' => return scan_byte_char_or_string(ptr), | ||
105 | _ => (), | 127 | _ => (), |
106 | } | 128 | } |
107 | ERROR | 129 | ERROR |
@@ -120,57 +142,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
120 | IDENT | 142 | IDENT |
121 | } | 143 | } |
122 | 144 | ||
123 | fn scan_char_or_lifetime(ptr: &mut Ptr) -> SyntaxKind { | ||
124 | // Either a character constant 'a' OR a lifetime name 'abc | ||
125 | let c = match ptr.bump() { | ||
126 | Some(c) => c, | ||
127 | None => return CHAR, // TODO: error reporting is upper in the stack | ||
128 | }; | ||
129 | |||
130 | // If the character is an ident start not followed by another single | ||
131 | // quote, then this is a lifetime name: | ||
132 | if is_ident_start(c) && !ptr.next_is('\'') { | ||
133 | while ptr.next_is_p(is_ident_continue) { | ||
134 | ptr.bump(); | ||
135 | } | ||
136 | |||
137 | // lifetimes shouldn't end with a single quote | ||
138 | // if we find one, then this is an invalid character literal | ||
139 | if ptr.next_is('\'') { | ||
140 | ptr.bump(); | ||
141 | return CHAR; | ||
142 | } | ||
143 | return LIFETIME; | ||
144 | } | ||
145 | scan_char_or_byte(ptr); | ||
146 | if !ptr.next_is('\'') { | ||
147 | return CHAR; // TODO: error reporting | ||
148 | } | ||
149 | ptr.bump(); | ||
150 | scan_literal_suffix(ptr); | ||
151 | CHAR | ||
152 | } | ||
153 | |||
154 | fn scan_literal_suffix(ptr: &mut Ptr) { | 145 | fn scan_literal_suffix(ptr: &mut Ptr) { |
155 | if ptr.next_is_p(is_ident_start) { | 146 | if ptr.next_is_p(is_ident_start) { |
156 | ptr.bump(); | 147 | ptr.bump(); |
157 | } | 148 | } |
158 | ptr.bump_while(is_ident_continue); | 149 | ptr.bump_while(is_ident_continue); |
159 | } | 150 | } |
160 | |||
161 | fn scan_char_or_byte(ptr: &mut Ptr) { | ||
162 | //FIXME: deal with escape sequencies | ||
163 | ptr.bump(); | ||
164 | } | ||
165 | |||
166 | fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { | ||
167 | match (c, c1, c2) { | ||
168 | ('r', Some('"'), _) | | ||
169 | ('r', Some('#'), _) | | ||
170 | ('b', Some('"'), _) | | ||
171 | ('b', Some('\''), _) | | ||
172 | ('b', Some('r'), Some('"')) | | ||
173 | ('b', Some('r'), Some('#')) => true, | ||
174 | _ => false | ||
175 | } | ||
176 | } | ||
diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs new file mode 100644 index 000000000..40e5e4528 --- /dev/null +++ b/src/lexer/strings.rs | |||
@@ -0,0 +1,65 @@ | |||
1 | use {SyntaxKind}; | ||
2 | use syntax_kinds::*; | ||
3 | |||
4 | use lexer::ptr::Ptr; | ||
5 | |||
6 | pub(crate) fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { | ||
7 | match (c, c1, c2) { | ||
8 | ('r', Some('"'), _) | | ||
9 | ('r', Some('#'), _) | | ||
10 | ('b', Some('"'), _) | | ||
11 | ('b', Some('\''), _) | | ||
12 | ('b', Some('r'), Some('"')) | | ||
13 | ('b', Some('r'), Some('#')) => true, | ||
14 | _ => false | ||
15 | } | ||
16 | } | ||
17 | |||
18 | pub(crate) fn scan_char(ptr: &mut Ptr) { | ||
19 | if ptr.bump().is_none() { | ||
20 | return; // TODO: error reporting is upper in the stack | ||
21 | } | ||
22 | scan_char_or_byte(ptr); | ||
23 | if !ptr.next_is('\'') { | ||
24 | return; // TODO: error reporting | ||
25 | } | ||
26 | ptr.bump(); | ||
27 | } | ||
28 | |||
29 | pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind { | ||
30 | // unwrapping and not-exhaustive match are ok | ||
31 | // because of string_literal_start | ||
32 | let c = ptr.bump().unwrap(); | ||
33 | match c { | ||
34 | '\'' => { | ||
35 | scan_byte(ptr); | ||
36 | CHAR | ||
37 | } | ||
38 | '"' => { | ||
39 | scan_byte_string(ptr); | ||
40 | CHAR | ||
41 | } | ||
42 | 'r' => { | ||
43 | scan_raw_byte_string(ptr); | ||
44 | CHAR | ||
45 | } | ||
46 | _ => unreachable!(), | ||
47 | } | ||
48 | } | ||
49 | |||
50 | fn scan_byte(ptr: &mut Ptr) { | ||
51 | |||
52 | } | ||
53 | |||
54 | fn scan_byte_string(ptr: &mut Ptr) { | ||
55 | |||
56 | } | ||
57 | |||
58 | fn scan_raw_byte_string(ptr: &mut Ptr) { | ||
59 | |||
60 | } | ||
61 | |||
62 | fn scan_char_or_byte(ptr: &mut Ptr) { | ||
63 | //FIXME: deal with escape sequencies | ||
64 | ptr.bump(); | ||
65 | } \ No newline at end of file | ||