2 files changed, 88 insertions, 49 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index 320d59de5..ccc25b8c9 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -10,6 +10,9 @@ use self::classes::*;
 mod numbers;
 use self::numbers::scan_number;
+mod strings;
+use self::strings::{string_literal_start, scan_char, scan_byte_char_or_string};
 pub fn next_token(text: &str) -> Token {
    assert!(!text.is_empty());
    let mut ptr = Ptr::new(text);
@@ -101,7 +104,26 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
            _ => NOT,
        },
-        '\'' => return scan_char_or_lifetime(ptr),
+        // If the character is an ident start not followed by another single
+        // quote, then this is a lifetime name:
+        '\'' => return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') {
+            ptr.bump();
+            while ptr.next_is_p(is_ident_continue) {
+                ptr.bump();
+            }
+            // lifetimes shouldn't end with a single quote
+            // if we find one, then this is an invalid character literal
+            if ptr.next_is('\'') {
+                ptr.bump();
+                return CHAR; // TODO: error reporting
+            }
+            LIFETIME
+        } else {
+            scan_char(ptr);
+            scan_literal_suffix(ptr);
+            CHAR
+        },
+        'b' => return scan_byte_char_or_string(ptr),
        _ => (),
    }
    ERROR
@@ -120,57 +142,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
    IDENT
 }
-fn scan_char_or_lifetime(ptr: &mut Ptr) -> SyntaxKind {
-    // Either a character constant 'a' OR a lifetime name 'abc
-    let c = match ptr.bump() {
-        Some(c) => c,
-        None => return CHAR, // TODO: error reporting is upper in the stack
-    };
-    // If the character is an ident start not followed by another single
-    // quote, then this is a lifetime name:
-    if is_ident_start(c) && !ptr.next_is('\'') {
-        while ptr.next_is_p(is_ident_continue) {
-            ptr.bump();
-        }
-        // lifetimes shouldn't end with a single quote
-        // if we find one, then this is an invalid character literal
-        if ptr.next_is('\'') {
-            ptr.bump();
-            return CHAR;
-        }
-        return LIFETIME;
-    }
-    scan_char_or_byte(ptr);
-    if !ptr.next_is('\'') {
-        return CHAR; // TODO: error reporting
-    }
-    ptr.bump();
-    scan_literal_suffix(ptr);
-    CHAR
-}
 fn scan_literal_suffix(ptr: &mut Ptr) {
    if ptr.next_is_p(is_ident_start) {
        ptr.bump();
    }
    ptr.bump_while(is_ident_continue);
 }
-fn scan_char_or_byte(ptr: &mut Ptr) {
-    //FIXME: deal with escape sequencies
-    ptr.bump();
-}
-fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
-    match (c, c1, c2) {
-        ('r', Some('"'), _) |
-        ('r', Some('#'), _) |
-        ('b', Some('"'), _) |
-        ('b', Some('\''), _) |
-        ('b', Some('r'), Some('"')) |
-        ('b', Some('r'), Some('#')) => true,
-        _ => false
-    }
-}
diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs
new file mode 100644
index 000000000..40e5e4528
--- /dev/null
+++ b/src/lexer/strings.rs
@@ -0,0 +1,65 @@
+use {SyntaxKind};
+use syntax_kinds::*;
+use lexer::ptr::Ptr;
+pub(crate) fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
+    match (c, c1, c2) {
+        ('r', Some('"'), _) |
+        ('r', Some('#'), _) |
+        ('b', Some('"'), _) |
+        ('b', Some('\''), _) |
+        ('b', Some('r'), Some('"')) |
+        ('b', Some('r'), Some('#')) => true,
+        _ => false
+    }
+}
+pub(crate) fn scan_char(ptr: &mut Ptr) {
+    if ptr.bump().is_none() {
+        return; // TODO: error reporting is upper in the stack
+    }
+    scan_char_or_byte(ptr);
+    if !ptr.next_is('\'') {
+        return; // TODO: error reporting
+    }
+    ptr.bump();
+}
+pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
+    // unwrapping and not-exhaustive match are ok
+    // because of string_literal_start
+    let c = ptr.bump().unwrap();
+    match c {
+        '\'' => {
+            scan_byte(ptr);
+            CHAR
+        }
+        '"' => {
+            scan_byte_string(ptr);
+            CHAR
+        }
+        'r' => {
+            scan_raw_byte_string(ptr);
+            CHAR
+        }
+        _ => unreachable!(),
+    }
+}
+fn scan_byte(ptr: &mut Ptr) {
+}
+fn scan_byte_string(ptr: &mut Ptr) {
+}
+fn scan_raw_byte_string(ptr: &mut Ptr) {
+}
+fn scan_char_or_byte(ptr: &mut Ptr) {
+    //FIXME: deal with escape sequencies
+    ptr.bump();
+}
+\ No newline at end of file

diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 320d59de5..ccc25b8c9 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs
@@ -10,6 +10,9 @@ use self::classes::*;
10	mod numbers;	10	mod numbers;
11	use self::numbers::scan_number;	11	use self::numbers::scan_number;
12		12
		13	mod strings;
		14	use self::strings::{string_literal_start, scan_char, scan_byte_char_or_string};
		15
13	pub fn next_token(text: &str) -> Token {	16	pub fn next_token(text: &str) -> Token {
14	assert!(!text.is_empty());	17	assert!(!text.is_empty());
15	let mut ptr = Ptr::new(text);	18	let mut ptr = Ptr::new(text);
@@ -101,7 +104,26 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
101	_ => NOT,	104	_ => NOT,
102	},	105	},
103		106
104	'\'' => return scan_char_or_lifetime(ptr),	107	// If the character is an ident start not followed by another single
		108	// quote, then this is a lifetime name:
		109	'\'' => return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') {
		110	ptr.bump();
		111	while ptr.next_is_p(is_ident_continue) {
		112	ptr.bump();
		113	}
		114	// lifetimes shouldn't end with a single quote
		115	// if we find one, then this is an invalid character literal
		116	if ptr.next_is('\'') {
		117	ptr.bump();
		118	return CHAR; // TODO: error reporting
		119	}
		120	LIFETIME
		121	} else {
		122	scan_char(ptr);
		123	scan_literal_suffix(ptr);
		124	CHAR
		125	},
		126	'b' => return scan_byte_char_or_string(ptr),
105	_ => (),	127	_ => (),
106	}	128	}
107	ERROR	129	ERROR
@@ -120,57 +142,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
120	IDENT	142	IDENT
121	}	143	}
122		144
123	fn scan_char_or_lifetime(ptr: &mut Ptr) -> SyntaxKind {
124	// Either a character constant 'a' OR a lifetime name 'abc
125	let c = match ptr.bump() {
126	Some(c) => c,
127	None => return CHAR, // TODO: error reporting is upper in the stack
128	};
129
130	// If the character is an ident start not followed by another single
131	// quote, then this is a lifetime name:
132	if is_ident_start(c) && !ptr.next_is('\'') {
133	while ptr.next_is_p(is_ident_continue) {
134	ptr.bump();
135	}
136
137	// lifetimes shouldn't end with a single quote
138	// if we find one, then this is an invalid character literal
139	if ptr.next_is('\'') {
140	ptr.bump();
141	return CHAR;
142	}
143	return LIFETIME;
144	}
145	scan_char_or_byte(ptr);
146	if !ptr.next_is('\'') {
147	return CHAR; // TODO: error reporting
148	}
149	ptr.bump();
150	scan_literal_suffix(ptr);
151	CHAR
152	}
153
154	fn scan_literal_suffix(ptr: &mut Ptr) {	145	fn scan_literal_suffix(ptr: &mut Ptr) {
155	if ptr.next_is_p(is_ident_start) {	146	if ptr.next_is_p(is_ident_start) {
156	ptr.bump();	147	ptr.bump();
157	}	148	}
158	ptr.bump_while(is_ident_continue);	149	ptr.bump_while(is_ident_continue);
159	}	150	}
160
161	fn scan_char_or_byte(ptr: &mut Ptr) {
162	//FIXME: deal with escape sequencies
163	ptr.bump();
164	}
165
166	fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
167	match (c, c1, c2) {
168	('r', Some('"'), _) \|
169	('r', Some('#'), _) \|
170	('b', Some('"'), _) \|
171	('b', Some('\''), _) \|
172	('b', Some('r'), Some('"')) \|
173	('b', Some('r'), Some('#')) => true,
174	_ => false
175	}
176	}


diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs new file mode 100644 index 000000000..40e5e4528 --- /dev/null +++ b/src/lexer/strings.rs
@@ -0,0 +1,65 @@
		1	use {SyntaxKind};
		2	use syntax_kinds::*;
		3
		4	use lexer::ptr::Ptr;
		5
		6	pub(crate) fn string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
		7	match (c, c1, c2) {
		8	('r', Some('"'), _) \|
		9	('r', Some('#'), _) \|
		10	('b', Some('"'), _) \|
		11	('b', Some('\''), _) \|
		12	('b', Some('r'), Some('"')) \|
		13	('b', Some('r'), Some('#')) => true,
		14	_ => false
		15	}
		16	}
		17
		18	pub(crate) fn scan_char(ptr: &mut Ptr) {
		19	if ptr.bump().is_none() {
		20	return; // TODO: error reporting is upper in the stack
		21	}
		22	scan_char_or_byte(ptr);
		23	if !ptr.next_is('\'') {
		24	return; // TODO: error reporting
		25	}
		26	ptr.bump();
		27	}
		28
		29	pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
		30	// unwrapping and not-exhaustive match are ok
		31	// because of string_literal_start
		32	let c = ptr.bump().unwrap();
		33	match c {
		34	'\'' => {
		35	scan_byte(ptr);
		36	CHAR
		37	}
		38	'"' => {
		39	scan_byte_string(ptr);
		40	CHAR
		41	}
		42	'r' => {
		43	scan_raw_byte_string(ptr);
		44	CHAR
		45	}
		46	_ => unreachable!(),
		47	}
		48	}
		49
		50	fn scan_byte(ptr: &mut Ptr) {
		51
		52	}
		53
		54	fn scan_byte_string(ptr: &mut Ptr) {
		55
		56	}
		57
		58	fn scan_raw_byte_string(ptr: &mut Ptr) {
		59
		60	}
		61
		62	fn scan_char_or_byte(ptr: &mut Ptr) {
		63	//FIXME: deal with escape sequencies
		64	ptr.bump();
		65	} \ No newline at end of file