diff options
Diffstat (limited to 'crates/ra_syntax/src/parsing/lexer.rs')
-rw-r--r-- | crates/ra_syntax/src/parsing/lexer.rs | 165 |
1 files changed, 17 insertions, 148 deletions
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 1c818fdf4..2a4343b0a 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs | |||
@@ -1,22 +1,6 @@ | |||
1 | mod classes; | ||
2 | mod comments; | ||
3 | mod numbers; | ||
4 | mod ptr; | ||
5 | mod strings; | ||
6 | |||
7 | use crate::{ | 1 | use crate::{ |
8 | SyntaxKind::{self, *}, | 2 | SyntaxKind::{self, *}, |
9 | TextUnit, T, | 3 | TextUnit, |
10 | }; | ||
11 | |||
12 | use self::{ | ||
13 | classes::*, | ||
14 | comments::{scan_comment, scan_shebang}, | ||
15 | numbers::scan_number, | ||
16 | ptr::Ptr, | ||
17 | strings::{ | ||
18 | is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, scan_string, | ||
19 | }, | ||
20 | }; | 4 | }; |
21 | 5 | ||
22 | /// A token of Rust source. | 6 | /// A token of Rust source. |
@@ -141,138 +125,23 @@ pub fn tokenize(text: &str) -> Vec<Token> { | |||
141 | acc | 125 | acc |
142 | } | 126 | } |
143 | 127 | ||
144 | /// Get the next token from a string | ||
145 | fn next_token(text: &str) -> Token { | ||
146 | assert!(!text.is_empty()); | ||
147 | let mut ptr = Ptr::new(text); | ||
148 | let c = ptr.bump().unwrap(); | ||
149 | let kind = next_token_inner(c, &mut ptr); | ||
150 | let len = ptr.into_len(); | ||
151 | Token { kind, len } | ||
152 | } | ||
153 | |||
154 | fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | ||
155 | if is_whitespace(c) { | ||
156 | ptr.bump_while(is_whitespace); | ||
157 | return WHITESPACE; | ||
158 | } | ||
159 | |||
160 | match c { | ||
161 | '#' => { | ||
162 | if scan_shebang(ptr) { | ||
163 | return SHEBANG; | ||
164 | } | ||
165 | } | ||
166 | '/' => { | ||
167 | if let Some(kind) = scan_comment(ptr) { | ||
168 | return kind; | ||
169 | } | ||
170 | } | ||
171 | _ => (), | ||
172 | } | ||
173 | |||
174 | let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.current(), ptr.nth(1)); | ||
175 | if ident_start { | ||
176 | return scan_ident(c, ptr); | ||
177 | } | ||
178 | |||
179 | if is_dec_digit(c) { | ||
180 | let kind = scan_number(c, ptr); | ||
181 | scan_literal_suffix(ptr); | ||
182 | return kind; | ||
183 | } | ||
184 | |||
185 | // One-byte tokens. | ||
186 | if let Some(kind) = SyntaxKind::from_char(c) { | ||
187 | return kind; | ||
188 | } | ||
189 | |||
190 | match c { | ||
191 | // Possiblily multi-byte tokens, | ||
192 | // but we only produce single byte token now | ||
193 | // T![...], T![..], T![..=], T![.] | ||
194 | '.' => return T![.], | ||
195 | // T![::] T![:] | ||
196 | ':' => return T![:], | ||
197 | // T![==] FATARROW T![=] | ||
198 | '=' => return T![=], | ||
199 | // T![!=] T![!] | ||
200 | '!' => return T![!], | ||
201 | // T![->] T![-] | ||
202 | '-' => return T![-], | ||
203 | |||
204 | // If the character is an ident start not followed by another single | ||
205 | // quote, then this is a lifetime name: | ||
206 | '\'' => { | ||
207 | return if ptr.at_p(is_ident_start) && !ptr.at_str("''") { | ||
208 | ptr.bump(); | ||
209 | while ptr.at_p(is_ident_continue) { | ||
210 | ptr.bump(); | ||
211 | } | ||
212 | // lifetimes shouldn't end with a single quote | ||
213 | // if we find one, then this is an invalid character literal | ||
214 | if ptr.at('\'') { | ||
215 | ptr.bump(); | ||
216 | return CHAR; | ||
217 | } | ||
218 | LIFETIME | ||
219 | } else { | ||
220 | scan_char(ptr); | ||
221 | scan_literal_suffix(ptr); | ||
222 | CHAR | ||
223 | }; | ||
224 | } | ||
225 | 'b' => { | ||
226 | let kind = scan_byte_char_or_string(ptr); | ||
227 | scan_literal_suffix(ptr); | ||
228 | return kind; | ||
229 | } | ||
230 | '"' => { | ||
231 | scan_string(ptr); | ||
232 | scan_literal_suffix(ptr); | ||
233 | return STRING; | ||
234 | } | ||
235 | 'r' => { | ||
236 | scan_raw_string(ptr); | ||
237 | scan_literal_suffix(ptr); | ||
238 | return RAW_STRING; | ||
239 | } | ||
240 | _ => (), | ||
241 | } | ||
242 | ERROR | ||
243 | } | ||
244 | |||
245 | fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | ||
246 | let is_raw = match (c, ptr.current()) { | ||
247 | ('r', Some('#')) => { | ||
248 | ptr.bump(); | ||
249 | true | ||
250 | } | ||
251 | ('_', None) => return T![_], | ||
252 | ('_', Some(c)) if !is_ident_continue(c) => return T![_], | ||
253 | _ => false, | ||
254 | }; | ||
255 | ptr.bump_while(is_ident_continue); | ||
256 | if !is_raw { | ||
257 | if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) { | ||
258 | return kind; | ||
259 | } | ||
260 | } | ||
261 | IDENT | ||
262 | } | ||
263 | |||
264 | fn scan_literal_suffix(ptr: &mut Ptr) { | ||
265 | if ptr.at_p(is_ident_start) { | ||
266 | ptr.bump(); | ||
267 | } | ||
268 | ptr.bump_while(is_ident_continue); | ||
269 | } | ||
270 | |||
271 | pub fn classify_literal(text: &str) -> Option<Token> { | 128 | pub fn classify_literal(text: &str) -> Option<Token> { |
272 | let tkn = next_token(text); | 129 | let t = ra_rustc_lexer::first_token(text); |
273 | if !tkn.kind.is_literal() || tkn.len.to_usize() != text.len() { | 130 | if t.len != text.len() { |
274 | return None; | 131 | return None; |
275 | } | 132 | } |
276 | 133 | let kind = match t.kind { | |
277 | Some(tkn) | 134 | ra_rustc_lexer::TokenKind::Literal { kind, .. } => match kind { |
135 | ra_rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER, | ||
136 | ra_rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER, | ||
137 | ra_rustc_lexer::LiteralKind::Char { .. } => CHAR, | ||
138 | ra_rustc_lexer::LiteralKind::Byte { .. } => BYTE, | ||
139 | ra_rustc_lexer::LiteralKind::Str { .. } => STRING, | ||
140 | ra_rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING, | ||
141 | ra_rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING, | ||
142 | ra_rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING, | ||
143 | }, | ||
144 | _ => return None, | ||
145 | }; | ||
146 | Some(Token { kind, len: TextUnit::from_usize(t.len) }) | ||
278 | } | 147 | } |