aboutsummaryrefslogtreecommitdiff
path: root/src/lexer
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer')
-rw-r--r--src/lexer/classes.rs22
-rw-r--r--src/lexer/mod.rs39
-rw-r--r--src/lexer/ptr.rs11
3 files changed, 44 insertions, 28 deletions
diff --git a/src/lexer/classes.rs b/src/lexer/classes.rs
new file mode 100644
index 000000000..7cc050bde
--- /dev/null
+++ b/src/lexer/classes.rs
@@ -0,0 +1,22 @@
1use unicode_xid::UnicodeXID;
2
3pub fn is_ident_start(c: char) -> bool {
4 (c >= 'a' && c <= 'z')
5 || (c >= 'A' && c <= 'Z')
6 || c == '_'
7 || (c > '\x7f' && UnicodeXID::is_xid_start(c))
8}
9
10pub fn is_ident_continue(c: char) -> bool {
11 (c >= 'a' && c <= 'z')
12 || (c >= 'A' && c <= 'Z')
13 || (c >= '0' && c <= '9')
14 || c == '_'
15 || (c > '\x7f' && UnicodeXID::is_xid_continue(c))
16}
17
18pub fn is_whitespace(c: char) -> bool {
19 //FIXME: use is_pattern_whitespace
20 //https://github.com/behnam/rust-unic/issues/192
21 c.is_whitespace()
22}
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index 136afb7b8..dd3e2896d 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -1,11 +1,12 @@
1use unicode_xid::UnicodeXID;
2
3use {Token, SyntaxKind}; 1use {Token, SyntaxKind};
4use syntax_kinds::*; 2use syntax_kinds::*;
5 3
6mod ptr; 4mod ptr;
7use self::ptr::Ptr; 5use self::ptr::Ptr;
8 6
7mod classes;
8use self::classes::*;
9
9pub fn next_token(text: &str) -> Token { 10pub fn next_token(text: &str) -> Token {
10 assert!(!text.is_empty()); 11 assert!(!text.is_empty());
11 let mut ptr = Ptr::new(text); 12 let mut ptr = Ptr::new(text);
@@ -19,38 +20,20 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
19 // Note: r as in r" or r#" is part of a raw string literal, 20 // Note: r as in r" or r#" is part of a raw string literal,
20 // b as in b' is part of a byte literal. 21 // b as in b' is part of a byte literal.
21 // They are not identifiers, and are handled further down. 22 // They are not identifiers, and are handled further down.
22 let ident_start = ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext()); 23 let ident_start = is_ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext());
23 if ident_start { 24 if ident_start {
24 loop { 25 ptr.bump_while(is_ident_continue);
25 match ptr.next() { 26 return IDENT;
26 Some(c) if ident_continue(c) => {
27 ptr.bump();
28 },
29 _ => break,
30 }
31 }
32 IDENT
33 } else {
34 WHITESPACE
35 } 27 }
36}
37 28
38fn ident_start(c: char) -> bool { 29 if is_whitespace(c) {
39 (c >= 'a' && c <= 'z') 30 ptr.bump_while(is_whitespace);
40 || (c >= 'A' && c <= 'Z') 31 return WHITESPACE;
41 || c == '_' 32 }
42 || (c > '\x7f' && UnicodeXID::is_xid_start(c))
43}
44 33
45fn ident_continue(c: char) -> bool { 34 return ERROR
46 (c >= 'a' && c <= 'z')
47 || (c >= 'A' && c <= 'Z')
48 || (c >= '0' && c <= '9')
49 || c == '_'
50 || (c > '\x7f' && UnicodeXID::is_xid_continue(c))
51} 35}
52 36
53
54fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { 37fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
55 match (c, c1, c2) { 38 match (c, c1, c2) {
56 ('r', Some('"'), _) | 39 ('r', Some('"'), _) |
diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs
index 4638dac21..e8aa6f37b 100644
--- a/src/lexer/ptr.rs
+++ b/src/lexer/ptr.rs
@@ -32,6 +32,17 @@ impl<'s> Ptr<'s> {
32 Some(ch) 32 Some(ch)
33 } 33 }
34 34
35 pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) {
36 loop {
37 match self.next() {
38 Some(c) if pred(c) => {
39 self.bump();
40 },
41 _ => return,
42 }
43 }
44 }
45
35 fn chars(&self) -> Chars { 46 fn chars(&self) -> Chars {
36 self.text[self.len.0 as usize ..].chars() 47 self.text[self.len.0 as usize ..].chars()
37 } 48 }