aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2017-12-31 10:32:00 +0000
committerAleksey Kladov <[email protected]>2017-12-31 10:32:00 +0000
commitf1a840cc385798fc2e3e9ac9ddb0dd57fd0ac509 (patch)
tree702e0d346b4d5aab3fe16c554aa878d08aa527c6
parent9d5138bf11eb0c979c49f904010d2d3690bdf74c (diff)
Lexer: extract string lexing to a separate file
-rw-r--r--src/lexer/mod.rs72
-rw-r--r--src/lexer/strings.rs65
2 files changed, 88 insertions, 49 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index 320d59de5..ccc25b8c9 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -10,6 +10,9 @@ use self::classes::*;
10mod numbers; 10mod numbers;
11use self::numbers::scan_number; 11use self::numbers::scan_number;
12 12
13mod strings;
14use self::strings::{string_literal_start, scan_char, scan_byte_char_or_string};
15
13pub fn next_token(text: &str) -> Token { 16pub fn next_token(text: &str) -> Token {
14 assert!(!text.is_empty()); 17 assert!(!text.is_empty());
15 let mut ptr = Ptr::new(text); 18 let mut ptr = Ptr::new(text);
@@ -101,7 +104,26 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
101 _ => NOT, 104 _ => NOT,
102 }, 105 },
103 106
104 '\'' => return scan_char_or_lifetime(ptr), 107 // If the character is an ident start not followed by another single
108 // quote, then this is a lifetime name:
109 '\'' => return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') {
110 ptr.bump();
111 while ptr.next_is_p(is_ident_continue) {
112 ptr.bump();
113 }
114 // lifetimes shouldn't end with a single quote
115 // if we find one, then this is an invalid character literal
116 if ptr.next_is('\'') {
117 ptr.bump();
118 return CHAR; // TODO: error reporting
119 }
120 LIFETIME
121 } else {
122 scan_char(ptr);
123 scan_literal_suffix(ptr);
124 CHAR
125 },
126 'b' => return scan_byte_char_or_string(ptr),
105 _ => (), 127 _ => (),
106 } 128 }
107 ERROR 129 ERROR
@@ -120,57 +142,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
120 IDENT 142 IDENT
121} 143}
122 144
123fn scan_char_or_lifetime(ptr: &mut Ptr) -> SyntaxKind {
124 // Either a character constant 'a' OR a lifetime name 'abc
125 let c = match ptr.bump() {
126 Some(c) => c,
127 None => return CHAR, // TODO: error reporting is upper in the stack
128 };
129
130 // If the character is an ident start not followed by another single
131 // quote, then this is a lifetime name:
132 if is_ident_start(c) && !ptr.next_is('\'') {
133 while ptr.next_is_p(is_ident_continue) {
134 ptr.bump();
135 }
136
137 // lifetimes shouldn't end with a single quote
138 // if we find one, then this is an invalid character literal
139 if ptr.next_is('\'') {
140 ptr.bump();
141 return CHAR;
142 }
143 return LIFETIME;
144 }
145 scan_char_or_byte(ptr);
146 if !ptr.next_is('\'') {
147 return CHAR; // TODO: error reporting
148 }
149 ptr.bump();
150 scan_literal_suffix(ptr);
151 CHAR
152}
153
154fn scan_literal_suffix(ptr: &mut Ptr) { 145fn scan_literal_suffix(ptr: &mut Ptr) {
155 if ptr.next_is_p(is_ident_start) { 146 if ptr.next_is_p(is_ident_start) {
156 ptr.bump(); 147 ptr.bump();
157 } 148 }
158 ptr.bump_while(is_ident_continue); 149 ptr.bump_while(is_ident_continue);
159} 150}
160
161fn scan_char_or_byte(ptr: &mut Ptr) {
162 //FIXME: deal with escape sequencies
163 ptr.bump();
164}
165
166fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
167 match (c, c1, c2) {
168 ('r', Some('"'), _) |
169 ('r', Some('#'), _) |
170 ('b', Some('"'), _) |
171 ('b', Some('\''), _) |
172 ('b', Some('r'), Some('"')) |
173 ('b', Some('r'), Some('#')) => true,
174 _ => false
175 }
176}
diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs
new file mode 100644
index 000000000..40e5e4528
--- /dev/null
+++ b/src/lexer/strings.rs
@@ -0,0 +1,65 @@
1use {SyntaxKind};
2use syntax_kinds::*;
3
4use lexer::ptr::Ptr;
5
6pub(crate) fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
7 match (c, c1, c2) {
8 ('r', Some('"'), _) |
9 ('r', Some('#'), _) |
10 ('b', Some('"'), _) |
11 ('b', Some('\''), _) |
12 ('b', Some('r'), Some('"')) |
13 ('b', Some('r'), Some('#')) => true,
14 _ => false
15 }
16}
17
18pub(crate) fn scan_char(ptr: &mut Ptr) {
19 if ptr.bump().is_none() {
20 return; // TODO: error reporting is upper in the stack
21 }
22 scan_char_or_byte(ptr);
23 if !ptr.next_is('\'') {
24 return; // TODO: error reporting
25 }
26 ptr.bump();
27}
28
29pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
30 // unwrapping and not-exhaustive match are ok
31 // because of string_literal_start
32 let c = ptr.bump().unwrap();
33 match c {
34 '\'' => {
35 scan_byte(ptr);
36 CHAR
37 }
38 '"' => {
39 scan_byte_string(ptr);
40 CHAR
41 }
42 'r' => {
43 scan_raw_byte_string(ptr);
44 CHAR
45 }
46 _ => unreachable!(),
47 }
48}
49
50fn scan_byte(ptr: &mut Ptr) {
51
52}
53
54fn scan_byte_string(ptr: &mut Ptr) {
55
56}
57
58fn scan_raw_byte_string(ptr: &mut Ptr) {
59
60}
61
62fn scan_char_or_byte(ptr: &mut Ptr) {
63 //FIXME: deal with escape sequencies
64 ptr.bump();
65} \ No newline at end of file