diff options
Diffstat (limited to 'src/lexer/mod.rs')
-rw-r--r-- | src/lexer/mod.rs | 143 |
1 files changed, 78 insertions, 65 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 842059a42..f62dfc507 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs | |||
@@ -1,4 +1,4 @@ | |||
1 | use {Token, SyntaxKind}; | 1 | use {SyntaxKind, Token}; |
2 | use syntax_kinds::*; | 2 | use syntax_kinds::*; |
3 | 3 | ||
4 | mod ptr; | 4 | mod ptr; |
@@ -11,10 +11,11 @@ mod numbers; | |||
11 | use self::numbers::scan_number; | 11 | use self::numbers::scan_number; |
12 | 12 | ||
13 | mod strings; | 13 | mod strings; |
14 | use self::strings::{is_string_literal_start, scan_char, scan_byte_char_or_string, scan_string, scan_raw_string}; | 14 | use self::strings::{is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, |
15 | scan_string}; | ||
15 | 16 | ||
16 | mod comments; | 17 | mod comments; |
17 | use self::comments::{scan_shebang, scan_comment}; | 18 | use self::comments::{scan_comment, scan_shebang}; |
18 | 19 | ||
19 | pub fn tokenize(text: &str) -> Vec<Token> { | 20 | pub fn tokenize(text: &str) -> Vec<Token> { |
20 | let mut text = text; | 21 | let mut text = text; |
@@ -45,10 +46,10 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
45 | match c { | 46 | match c { |
46 | '#' => if scan_shebang(ptr) { | 47 | '#' => if scan_shebang(ptr) { |
47 | return SHEBANG; | 48 | return SHEBANG; |
48 | } | 49 | }, |
49 | '/' => if let Some(kind) = scan_comment(ptr) { | 50 | '/' => if let Some(kind) = scan_comment(ptr) { |
50 | return kind; | 51 | return kind; |
51 | } | 52 | }, |
52 | _ => (), | 53 | _ => (), |
53 | } | 54 | } |
54 | 55 | ||
@@ -89,79 +90,91 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
89 | '%' => return PERCENT, | 90 | '%' => return PERCENT, |
90 | 91 | ||
91 | // Multi-byte tokens. | 92 | // Multi-byte tokens. |
92 | '.' => return match (ptr.next(), ptr.nnext()) { | 93 | '.' => { |
93 | (Some('.'), Some('.')) => { | 94 | return match (ptr.next(), ptr.nnext()) { |
94 | ptr.bump(); | 95 | (Some('.'), Some('.')) => { |
95 | ptr.bump(); | 96 | ptr.bump(); |
96 | DOTDOTDOT | 97 | ptr.bump(); |
97 | }, | 98 | DOTDOTDOT |
98 | (Some('.'), Some('=')) => { | 99 | } |
99 | ptr.bump(); | 100 | (Some('.'), Some('=')) => { |
100 | ptr.bump(); | 101 | ptr.bump(); |
101 | DOTDOTEQ | 102 | ptr.bump(); |
102 | }, | 103 | DOTDOTEQ |
103 | (Some('.'), _) => { | 104 | } |
104 | ptr.bump(); | 105 | (Some('.'), _) => { |
105 | DOTDOT | 106 | ptr.bump(); |
106 | }, | 107 | DOTDOT |
107 | _ => DOT | 108 | } |
108 | }, | 109 | _ => DOT, |
109 | ':' => return match ptr.next() { | ||
110 | Some(':') => { | ||
111 | ptr.bump(); | ||
112 | COLONCOLON | ||
113 | } | 110 | } |
114 | _ => COLON | 111 | } |
115 | }, | 112 | ':' => { |
116 | '=' => return match ptr.next() { | 113 | return match ptr.next() { |
117 | Some('=') => { | 114 | Some(':') => { |
118 | ptr.bump(); | 115 | ptr.bump(); |
119 | EQEQ | 116 | COLONCOLON |
117 | } | ||
118 | _ => COLON, | ||
120 | } | 119 | } |
121 | Some('>') => { | 120 | } |
122 | ptr.bump(); | 121 | '=' => { |
123 | FAT_ARROW | 122 | return match ptr.next() { |
123 | Some('=') => { | ||
124 | ptr.bump(); | ||
125 | EQEQ | ||
126 | } | ||
127 | Some('>') => { | ||
128 | ptr.bump(); | ||
129 | FAT_ARROW | ||
130 | } | ||
131 | _ => EQ, | ||
124 | } | 132 | } |
125 | _ => EQ, | 133 | } |
126 | }, | 134 | '!' => { |
127 | '!' => return match ptr.next() { | 135 | return match ptr.next() { |
128 | Some('=') => { | 136 | Some('=') => { |
137 | ptr.bump(); | ||
138 | NEQ | ||
139 | } | ||
140 | _ => EXCL, | ||
141 | } | ||
142 | } | ||
143 | '-' => { | ||
144 | return if ptr.next_is('>') { | ||
129 | ptr.bump(); | 145 | ptr.bump(); |
130 | NEQ | 146 | THIN_ARROW |
147 | } else { | ||
148 | MINUS | ||
131 | } | 149 | } |
132 | _ => EXCL, | 150 | } |
133 | }, | ||
134 | '-' => return if ptr.next_is('>') { | ||
135 | ptr.bump(); | ||
136 | THIN_ARROW | ||
137 | } else { | ||
138 | MINUS | ||
139 | }, | ||
140 | 151 | ||
141 | // If the character is an ident start not followed by another single | 152 | // If the character is an ident start not followed by another single |
142 | // quote, then this is a lifetime name: | 153 | // quote, then this is a lifetime name: |
143 | '\'' => return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') { | 154 | '\'' => { |
144 | ptr.bump(); | 155 | return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') { |
145 | while ptr.next_is_p(is_ident_continue) { | ||
146 | ptr.bump(); | 156 | ptr.bump(); |
157 | while ptr.next_is_p(is_ident_continue) { | ||
158 | ptr.bump(); | ||
159 | } | ||
160 | // lifetimes shouldn't end with a single quote | ||
161 | // if we find one, then this is an invalid character literal | ||
162 | if ptr.next_is('\'') { | ||
163 | ptr.bump(); | ||
164 | return CHAR; // TODO: error reporting | ||
165 | } | ||
166 | LIFETIME | ||
167 | } else { | ||
168 | scan_char(ptr); | ||
169 | scan_literal_suffix(ptr); | ||
170 | CHAR | ||
147 | } | 171 | } |
148 | // lifetimes shouldn't end with a single quote | 172 | } |
149 | // if we find one, then this is an invalid character literal | ||
150 | if ptr.next_is('\'') { | ||
151 | ptr.bump(); | ||
152 | return CHAR; // TODO: error reporting | ||
153 | } | ||
154 | LIFETIME | ||
155 | } else { | ||
156 | scan_char(ptr); | ||
157 | scan_literal_suffix(ptr); | ||
158 | CHAR | ||
159 | }, | ||
160 | 'b' => { | 173 | 'b' => { |
161 | let kind = scan_byte_char_or_string(ptr); | 174 | let kind = scan_byte_char_or_string(ptr); |
162 | scan_literal_suffix(ptr); | 175 | scan_literal_suffix(ptr); |
163 | return kind | 176 | return kind; |
164 | }, | 177 | } |
165 | '"' => { | 178 | '"' => { |
166 | scan_string(ptr); | 179 | scan_string(ptr); |
167 | scan_literal_suffix(ptr); | 180 | scan_literal_suffix(ptr); |