aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2017-12-30 12:56:52 +0000
committerAleksey Kladov <[email protected]>2017-12-30 12:56:58 +0000
commit3e91e8b77db443775eea2ccd40b0cf1e27dc77d8 (patch)
treedee794779536390e1becbc6939d23076e4137586
parent2d5217dbb76593a1dba1ff3db3ca0221c457e687 (diff)
Lexer: move numbers to a separate file
-rw-r--r--src/lexer/mod.rs66
-rw-r--r--src/lexer/numbers.rs68
-rw-r--r--tests/data/lexer/0004_number.rs1
-rw-r--r--tests/data/lexer/0004_number.txt1
4 files changed, 73 insertions, 63 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index afbbee4d0..e60dbbe8e 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -7,6 +7,9 @@ use self::ptr::Ptr;
7mod classes; 7mod classes;
8use self::classes::*; 8use self::classes::*;
9 9
10mod numbers;
11use self::numbers::scan_number;
12
10pub fn next_token(text: &str) -> Token { 13pub fn next_token(text: &str) -> Token {
11 assert!(!text.is_empty()); 14 assert!(!text.is_empty());
12 let mut ptr = Ptr::new(text); 15 let mut ptr = Ptr::new(text);
@@ -50,69 +53,6 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
50 IDENT 53 IDENT
51} 54}
52 55
53fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
54 if c == '0' {
55 match ptr.next().unwrap_or('\0') {
56 'b' | 'o' => {
57 ptr.bump();
58 scan_digits(ptr, false);
59 }
60 'x' => {
61 ptr.bump();
62 scan_digits(ptr, true);
63 }
64 '0'...'9' | '_' | '.' | 'e' | 'E' => {
65 scan_digits(ptr, true);
66 }
67 _ => return INT_NUMBER,
68 }
69 } else {
70 scan_digits(ptr, false);
71 }
72
73 // might be a float, but don't be greedy if this is actually an
74 // integer literal followed by field/method access or a range pattern
75 // (`0..2` and `12.foo()`)
76 if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) {
77 // might have stuff after the ., and if it does, it needs to start
78 // with a number
79 ptr.bump();
80 scan_digits(ptr, false);
81 scan_float_exponent(ptr);
82 return FLOAT_NUMBER;
83 }
84 // it might be a float if it has an exponent
85 if ptr.next_is('e') || ptr.next_is('E') {
86 scan_float_exponent(ptr);
87 return FLOAT_NUMBER;
88 }
89 INT_NUMBER
90}
91
92fn scan_digits(ptr: &mut Ptr, allow_hex: bool) {
93 while let Some(c) = ptr.next() {
94 match c {
95 '_' | '0'...'9' => {
96 ptr.bump();
97 }
98 'a'...'f' | 'A' ... 'F' if allow_hex => {
99 ptr.bump();
100 }
101 _ => return
102 }
103 }
104}
105
106fn scan_float_exponent(ptr: &mut Ptr) {
107 if ptr.next_is('e') || ptr.next_is('E') {
108 ptr.bump();
109 if ptr.next_is('-') || ptr.next_is('+') {
110 ptr.bump();
111 }
112 scan_digits(ptr, false);
113 }
114}
115
116fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool { 56fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
117 match (c, c1, c2) { 57 match (c, c1, c2) {
118 ('r', Some('"'), _) | 58 ('r', Some('"'), _) |
diff --git a/src/lexer/numbers.rs b/src/lexer/numbers.rs
new file mode 100644
index 000000000..4c7edfe1c
--- /dev/null
+++ b/src/lexer/numbers.rs
@@ -0,0 +1,68 @@
1use lexer::ptr::Ptr;
2use lexer::classes::*;
3
4use {SyntaxKind};
5use syntax_kinds::*;
6
7pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
8 if c == '0' {
9 match ptr.next().unwrap_or('\0') {
10 'b' | 'o' => {
11 ptr.bump();
12 scan_digits(ptr, false);
13 }
14 'x' => {
15 ptr.bump();
16 scan_digits(ptr, true);
17 }
18 '0'...'9' | '_' | '.' | 'e' | 'E' => {
19 scan_digits(ptr, true);
20 }
21 _ => return INT_NUMBER,
22 }
23 } else {
24 scan_digits(ptr, false);
25 }
26
27 // might be a float, but don't be greedy if this is actually an
28 // integer literal followed by field/method access or a range pattern
29 // (`0..2` and `12.foo()`)
30 if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) {
31 // might have stuff after the ., and if it does, it needs to start
32 // with a number
33 ptr.bump();
34 scan_digits(ptr, false);
35 scan_float_exponent(ptr);
36 return FLOAT_NUMBER;
37 }
38 // it might be a float if it has an exponent
39 if ptr.next_is('e') || ptr.next_is('E') {
40 scan_float_exponent(ptr);
41 return FLOAT_NUMBER;
42 }
43 INT_NUMBER
44}
45
46fn scan_digits(ptr: &mut Ptr, allow_hex: bool) {
47 while let Some(c) = ptr.next() {
48 match c {
49 '_' | '0'...'9' => {
50 ptr.bump();
51 }
52 'a'...'f' | 'A' ... 'F' if allow_hex => {
53 ptr.bump();
54 }
55 _ => return
56 }
57 }
58}
59
60fn scan_float_exponent(ptr: &mut Ptr) {
61 if ptr.next_is('e') || ptr.next_is('E') {
62 ptr.bump();
63 if ptr.next_is('-') || ptr.next_is('+') {
64 ptr.bump();
65 }
66 scan_digits(ptr, false);
67 }
68}
diff --git a/tests/data/lexer/0004_number.rs b/tests/data/lexer/0004_number.rs
index af53ff2cd..0c0d37627 100644
--- a/tests/data/lexer/0004_number.rs
+++ b/tests/data/lexer/0004_number.rs
@@ -5,3 +5,4 @@
50e+1 50e+1
60.e+1 60.e+1
70.0E-2 70.0E-2
80___0.10000____0000e+111__ \ No newline at end of file
diff --git a/tests/data/lexer/0004_number.txt b/tests/data/lexer/0004_number.txt
index 7dedd2cac..94fe0302d 100644
--- a/tests/data/lexer/0004_number.txt
+++ b/tests/data/lexer/0004_number.txt
@@ -60,3 +60,4 @@ INT_NUMBER 1 "1"
60WHITESPACE 1 "\n" 60WHITESPACE 1 "\n"
61FLOAT_NUMBER 6 "0.0E-2" 61FLOAT_NUMBER 6 "0.0E-2"
62WHITESPACE 1 "\n" 62WHITESPACE 1 "\n"
63FLOAT_NUMBER 26 "0___0.10000____0000e+111__"