diff options
-rw-r--r-- | src/lexer/mod.rs | 66 | ||||
-rw-r--r-- | src/lexer/numbers.rs | 68 | ||||
-rw-r--r-- | tests/data/lexer/0004_number.rs | 1 | ||||
-rw-r--r-- | tests/data/lexer/0004_number.txt | 1 |
4 files changed, 73 insertions, 63 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index afbbee4d0..e60dbbe8e 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs | |||
@@ -7,6 +7,9 @@ use self::ptr::Ptr; | |||
7 | mod classes; | 7 | mod classes; |
8 | use self::classes::*; | 8 | use self::classes::*; |
9 | 9 | ||
10 | mod numbers; | ||
11 | use self::numbers::scan_number; | ||
12 | |||
10 | pub fn next_token(text: &str) -> Token { | 13 | pub fn next_token(text: &str) -> Token { |
11 | assert!(!text.is_empty()); | 14 | assert!(!text.is_empty()); |
12 | let mut ptr = Ptr::new(text); | 15 | let mut ptr = Ptr::new(text); |
@@ -50,69 +53,6 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
50 | IDENT | 53 | IDENT |
51 | } | 54 | } |
52 | 55 | ||
53 | fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { | ||
54 | if c == '0' { | ||
55 | match ptr.next().unwrap_or('\0') { | ||
56 | 'b' | 'o' => { | ||
57 | ptr.bump(); | ||
58 | scan_digits(ptr, false); | ||
59 | } | ||
60 | 'x' => { | ||
61 | ptr.bump(); | ||
62 | scan_digits(ptr, true); | ||
63 | } | ||
64 | '0'...'9' | '_' | '.' | 'e' | 'E' => { | ||
65 | scan_digits(ptr, true); | ||
66 | } | ||
67 | _ => return INT_NUMBER, | ||
68 | } | ||
69 | } else { | ||
70 | scan_digits(ptr, false); | ||
71 | } | ||
72 | |||
73 | // might be a float, but don't be greedy if this is actually an | ||
74 | // integer literal followed by field/method access or a range pattern | ||
75 | // (`0..2` and `12.foo()`) | ||
76 | if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) { | ||
77 | // might have stuff after the ., and if it does, it needs to start | ||
78 | // with a number | ||
79 | ptr.bump(); | ||
80 | scan_digits(ptr, false); | ||
81 | scan_float_exponent(ptr); | ||
82 | return FLOAT_NUMBER; | ||
83 | } | ||
84 | // it might be a float if it has an exponent | ||
85 | if ptr.next_is('e') || ptr.next_is('E') { | ||
86 | scan_float_exponent(ptr); | ||
87 | return FLOAT_NUMBER; | ||
88 | } | ||
89 | INT_NUMBER | ||
90 | } | ||
91 | |||
92 | fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { | ||
93 | while let Some(c) = ptr.next() { | ||
94 | match c { | ||
95 | '_' | '0'...'9' => { | ||
96 | ptr.bump(); | ||
97 | } | ||
98 | 'a'...'f' | 'A' ... 'F' if allow_hex => { | ||
99 | ptr.bump(); | ||
100 | } | ||
101 | _ => return | ||
102 | } | ||
103 | } | ||
104 | } | ||
105 | |||
106 | fn scan_float_exponent(ptr: &mut Ptr) { | ||
107 | if ptr.next_is('e') || ptr.next_is('E') { | ||
108 | ptr.bump(); | ||
109 | if ptr.next_is('-') || ptr.next_is('+') { | ||
110 | ptr.bump(); | ||
111 | } | ||
112 | scan_digits(ptr, false); | ||
113 | } | ||
114 | } | ||
115 | |||
116 | fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { | 56 | fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { |
117 | match (c, c1, c2) { | 57 | match (c, c1, c2) { |
118 | ('r', Some('"'), _) | | 58 | ('r', Some('"'), _) | |
diff --git a/src/lexer/numbers.rs b/src/lexer/numbers.rs new file mode 100644 index 000000000..4c7edfe1c --- /dev/null +++ b/src/lexer/numbers.rs | |||
@@ -0,0 +1,68 @@ | |||
1 | use lexer::ptr::Ptr; | ||
2 | use lexer::classes::*; | ||
3 | |||
4 | use {SyntaxKind}; | ||
5 | use syntax_kinds::*; | ||
6 | |||
7 | pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { | ||
8 | if c == '0' { | ||
9 | match ptr.next().unwrap_or('\0') { | ||
10 | 'b' | 'o' => { | ||
11 | ptr.bump(); | ||
12 | scan_digits(ptr, false); | ||
13 | } | ||
14 | 'x' => { | ||
15 | ptr.bump(); | ||
16 | scan_digits(ptr, true); | ||
17 | } | ||
18 | '0'...'9' | '_' | '.' | 'e' | 'E' => { | ||
19 | scan_digits(ptr, true); | ||
20 | } | ||
21 | _ => return INT_NUMBER, | ||
22 | } | ||
23 | } else { | ||
24 | scan_digits(ptr, false); | ||
25 | } | ||
26 | |||
27 | // might be a float, but don't be greedy if this is actually an | ||
28 | // integer literal followed by field/method access or a range pattern | ||
29 | // (`0..2` and `12.foo()`) | ||
30 | if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) { | ||
31 | // might have stuff after the ., and if it does, it needs to start | ||
32 | // with a number | ||
33 | ptr.bump(); | ||
34 | scan_digits(ptr, false); | ||
35 | scan_float_exponent(ptr); | ||
36 | return FLOAT_NUMBER; | ||
37 | } | ||
38 | // it might be a float if it has an exponent | ||
39 | if ptr.next_is('e') || ptr.next_is('E') { | ||
40 | scan_float_exponent(ptr); | ||
41 | return FLOAT_NUMBER; | ||
42 | } | ||
43 | INT_NUMBER | ||
44 | } | ||
45 | |||
46 | fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { | ||
47 | while let Some(c) = ptr.next() { | ||
48 | match c { | ||
49 | '_' | '0'...'9' => { | ||
50 | ptr.bump(); | ||
51 | } | ||
52 | 'a'...'f' | 'A' ... 'F' if allow_hex => { | ||
53 | ptr.bump(); | ||
54 | } | ||
55 | _ => return | ||
56 | } | ||
57 | } | ||
58 | } | ||
59 | |||
60 | fn scan_float_exponent(ptr: &mut Ptr) { | ||
61 | if ptr.next_is('e') || ptr.next_is('E') { | ||
62 | ptr.bump(); | ||
63 | if ptr.next_is('-') || ptr.next_is('+') { | ||
64 | ptr.bump(); | ||
65 | } | ||
66 | scan_digits(ptr, false); | ||
67 | } | ||
68 | } | ||
diff --git a/tests/data/lexer/0004_number.rs b/tests/data/lexer/0004_number.rs index af53ff2cd..0c0d37627 100644 --- a/tests/data/lexer/0004_number.rs +++ b/tests/data/lexer/0004_number.rs | |||
@@ -5,3 +5,4 @@ | |||
5 | 0e+1 | 5 | 0e+1 |
6 | 0.e+1 | 6 | 0.e+1 |
7 | 0.0E-2 | 7 | 0.0E-2 |
8 | 0___0.10000____0000e+111__ \ No newline at end of file | ||
diff --git a/tests/data/lexer/0004_number.txt b/tests/data/lexer/0004_number.txt index 7dedd2cac..94fe0302d 100644 --- a/tests/data/lexer/0004_number.txt +++ b/tests/data/lexer/0004_number.txt | |||
@@ -60,3 +60,4 @@ INT_NUMBER 1 "1" | |||
60 | WHITESPACE 1 "\n" | 60 | WHITESPACE 1 "\n" |
61 | FLOAT_NUMBER 6 "0.0E-2" | 61 | FLOAT_NUMBER 6 "0.0E-2" |
62 | WHITESPACE 1 "\n" | 62 | WHITESPACE 1 "\n" |
63 | FLOAT_NUMBER 26 "0___0.10000____0000e+111__" | ||