aboutsummaryrefslogtreecommitdiff
path: root/src/lexer
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2017-12-30 12:22:40 +0000
committerAleksey Kladov <[email protected]>2017-12-30 12:23:38 +0000
commitddc637c16120fb352183698f635fc93a68580f7b (patch)
tree288f1497551f2667af693157f2451be40c25d697 /src/lexer
parent8103772a10f00378c4dcdd09f9af310c23146933 (diff)
Lexer: start numbers
Diffstat (limited to 'src/lexer')
-rw-r--r--src/lexer/classes.rs4
-rw-r--r--src/lexer/mod.rs91
-rw-r--r--src/lexer/ptr.rs12
3 files changed, 97 insertions, 10 deletions
diff --git a/src/lexer/classes.rs b/src/lexer/classes.rs
index 7cc050bde..4235d2648 100644
--- a/src/lexer/classes.rs
+++ b/src/lexer/classes.rs
@@ -20,3 +20,7 @@ pub fn is_whitespace(c: char) -> bool {
20 //https://github.com/behnam/rust-unic/issues/192 20 //https://github.com/behnam/rust-unic/issues/192
21 c.is_whitespace() 21 c.is_whitespace()
22} 22}
23
24pub fn is_dec_digit(c: char) -> bool {
25 '0' <= c && c <= '9'
26}
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index 83a411cdd..afbbee4d0 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -22,16 +22,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
22 // They are not identifiers, and are handled further down. 22 // They are not identifiers, and are handled further down.
23 let ident_start = is_ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext()); 23 let ident_start = is_ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext());
24 if ident_start { 24 if ident_start {
25 let is_single_letter = match ptr.next() { 25 return scan_ident(c, ptr);
26 None => true,
27 Some(c) if !is_ident_continue(c) => true,
28 _ => false,
29 };
30 if is_single_letter {
31 return if c == '_' { UNDERSCORE } else { IDENT };
32 }
33 ptr.bump_while(is_ident_continue);
34 return IDENT;
35 } 26 }
36 27
37 if is_whitespace(c) { 28 if is_whitespace(c) {
@@ -39,9 +30,89 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
39 return WHITESPACE; 30 return WHITESPACE;
40 } 31 }
41 32
33 if is_dec_digit(c) {
34 return scan_number(c, ptr);
35 }
36
42 ERROR 37 ERROR
43} 38}
44 39
40fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
41 let is_single_letter = match ptr.next() {
42 None => true,
43 Some(c) if !is_ident_continue(c) => true,
44 _ => false,
45 };
46 if is_single_letter {
47 return if c == '_' { UNDERSCORE } else { IDENT };
48 }
49 ptr.bump_while(is_ident_continue);
50 IDENT
51}
52
53fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
54 if c == '0' {
55 match ptr.next().unwrap_or('\0') {
56 'b' | 'o' => {
57 ptr.bump();
58 scan_digits(ptr, false);
59 }
60 'x' => {
61 ptr.bump();
62 scan_digits(ptr, true);
63 }
64 '0'...'9' | '_' | '.' | 'e' | 'E' => {
65 scan_digits(ptr, true);
66 }
67 _ => return INT_NUMBER,
68 }
69 } else {
70 scan_digits(ptr, false);
71 }
72
73 // might be a float, but don't be greedy if this is actually an
74 // integer literal followed by field/method access or a range pattern
75 // (`0..2` and `12.foo()`)
76 if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) {
77 // might have stuff after the ., and if it does, it needs to start
78 // with a number
79 ptr.bump();
80 scan_digits(ptr, false);
81 scan_float_exponent(ptr);
82 return FLOAT_NUMBER;
83 }
84 // it might be a float if it has an exponent
85 if ptr.next_is('e') || ptr.next_is('E') {
86 scan_float_exponent(ptr);
87 return FLOAT_NUMBER;
88 }
89 INT_NUMBER
90}
91
92fn scan_digits(ptr: &mut Ptr, allow_hex: bool) {
93 while let Some(c) = ptr.next() {
94 match c {
95 '_' | '0'...'9' => {
96 ptr.bump();
97 }
98 'a'...'f' | 'A' ... 'F' if allow_hex => {
99 ptr.bump();
100 }
101 _ => return
102 }
103 }
104}
105
106fn scan_float_exponent(ptr: &mut Ptr) {
107 if ptr.next_is('e') || ptr.next_is('E') {
108 ptr.bump();
109 if ptr.next_is('-') || ptr.next_is('+') {
110 ptr.bump();
111 }
112 scan_digits(ptr, false);
113 }
114}
115
45fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { 116fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
46 match (c, c1, c2) { 117 match (c, c1, c2) {
47 ('r', Some('"'), _) | 118 ('r', Some('"'), _) |
diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs
index e8aa6f37b..d441b826b 100644
--- a/src/lexer/ptr.rs
+++ b/src/lexer/ptr.rs
@@ -26,6 +26,18 @@ impl<'s> Ptr<'s> {
26 chars.next() 26 chars.next()
27 } 27 }
28 28
29 pub fn next_is(&self, c: char) -> bool {
30 self.next() == Some(c)
31 }
32
33 pub fn nnext_is(&self, c: char) -> bool {
34 self.nnext() == Some(c)
35 }
36
37 pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
38 self.nnext().map(p) == Some(true)
39 }
40
29 pub fn bump(&mut self) -> Option<char> { 41 pub fn bump(&mut self) -> Option<char> {
30 let ch = self.chars().next()?; 42 let ch = self.chars().next()?;
31 self.len += TextUnit::len_of_char(ch); 43 self.len += TextUnit::len_of_char(ch);