aboutsummaryrefslogtreecommitdiff
path: root/src/lexer/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer/mod.rs')
-rw-r--r--src/lexer/mod.rs64
1 files changed, 64 insertions, 0 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
new file mode 100644
index 000000000..136afb7b8
--- /dev/null
+++ b/src/lexer/mod.rs
@@ -0,0 +1,64 @@
1use unicode_xid::UnicodeXID;
2
3use {Token, SyntaxKind};
4use syntax_kinds::*;
5
6mod ptr;
7use self::ptr::Ptr;
8
9pub fn next_token(text: &str) -> Token {
10 assert!(!text.is_empty());
11 let mut ptr = Ptr::new(text);
12 let c = ptr.bump().unwrap();
13 let kind = next_token_inner(c, &mut ptr);
14 let len = ptr.into_len();
15 Token { kind, len }
16}
17
18fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
19 // Note: r as in r" or r#" is part of a raw string literal,
20 // b as in b' is part of a byte literal.
21 // They are not identifiers, and are handled further down.
22 let ident_start = ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext());
23 if ident_start {
24 loop {
25 match ptr.next() {
26 Some(c) if ident_continue(c) => {
27 ptr.bump();
28 },
29 _ => break,
30 }
31 }
32 IDENT
33 } else {
34 WHITESPACE
35 }
36}
37
38fn ident_start(c: char) -> bool {
39 (c >= 'a' && c <= 'z')
40 || (c >= 'A' && c <= 'Z')
41 || c == '_'
42 || (c > '\x7f' && UnicodeXID::is_xid_start(c))
43}
44
45fn ident_continue(c: char) -> bool {
46 (c >= 'a' && c <= 'z')
47 || (c >= 'A' && c <= 'Z')
48 || (c >= '0' && c <= '9')
49 || c == '_'
50 || (c > '\x7f' && UnicodeXID::is_xid_continue(c))
51}
52
53
54fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
55 match (c, c1, c2) {
56 ('r', Some('"'), _) |
57 ('r', Some('#'), _) |
58 ('b', Some('"'), _) |
59 ('b', Some('\''), _) |
60 ('b', Some('r'), Some('"')) |
61 ('b', Some('r'), Some('#')) => true,
62 _ => false
63 }
64}