organizize

author: Aleksey Kladov <[email protected]> 2018-08-10 20:33:29 +0100
committer: Aleksey Kladov <[email protected]> 2018-08-10 20:33:29 +0100
commit: 7c67612b8a894187fa3b64725531a5459f9211bf (patch)
tree: 9e2a536efa0c880d921fd8d4d74423afc9451fd4 /src/lexer
parent: 26262aaf05983c5b7f41cc438e287523268fe1eb (diff)
6 files changed, 0 insertions, 539 deletions
diff --git a/src/lexer/classes.rs b/src/lexer/classes.rs
deleted file mode 100644
index 4235d2648..000000000
--- a/src/lexer/classes.rs
+++ /dev/null
@@ -1,26 +0,0 @@
-use unicode_xid::UnicodeXID;
-pub fn is_ident_start(c: char) -> bool {
-    (c >= 'a' && c <= 'z')
-        || (c >= 'A' && c <= 'Z')
-        || c == '_'
-        || (c > '\x7f' && UnicodeXID::is_xid_start(c))
-}
-pub fn is_ident_continue(c: char) -> bool {
-    (c >= 'a' && c <= 'z')
-        || (c >= 'A' && c <= 'Z')
-        || (c >= '0' && c <= '9')
-        || c == '_'
-        || (c > '\x7f' && UnicodeXID::is_xid_continue(c))
-}
-pub fn is_whitespace(c: char) -> bool {
-    //FIXME: use is_pattern_whitespace
-    //https://github.com/behnam/rust-unic/issues/192
-    c.is_whitespace()
-}
-pub fn is_dec_digit(c: char) -> bool {
-    '0' <= c && c <= '9'
-}
diff --git a/src/lexer/comments.rs b/src/lexer/comments.rs
deleted file mode 100644
index 01acb6515..000000000
--- a/src/lexer/comments.rs
+++ /dev/null
@@ -1,57 +0,0 @@
-use lexer::ptr::Ptr;
-use SyntaxKind::{self, *};
-pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool {
-    if ptr.next_is('!') && ptr.nnext_is('/') {
-        ptr.bump();
-        ptr.bump();
-        bump_until_eol(ptr);
-        true
-    } else {
-        false
-    }
-}
-fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
-    if ptr.next_is('*') {
-        ptr.bump();
-        let mut depth: u32 = 1;
-        while depth > 0 {
-            if ptr.next_is('*') && ptr.nnext_is('/') {
-                depth -= 1;
-                ptr.bump();
-                ptr.bump();
-            } else if ptr.next_is('/') && ptr.nnext_is('*') {
-                depth += 1;
-                ptr.bump();
-                ptr.bump();
-            } else if ptr.bump().is_none() {
-                break;
-            }
-        }
-        Some(COMMENT)
-    } else {
-        None
-    }
-}
-pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
-    if ptr.next_is('/') {
-        bump_until_eol(ptr);
-        Some(COMMENT)
-    } else {
-        scan_block_comment(ptr)
-    }
-}
-fn bump_until_eol(ptr: &mut Ptr) {
-    loop {
-        if ptr.next_is('\n') || ptr.next_is('\r') && ptr.nnext_is('\n') {
-            return;
-        }
-        if ptr.bump().is_none() {
-            break;
-        }
-    }
-}
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
deleted file mode 100644
index f8fdc41ac..000000000
--- a/src/lexer/mod.rs
+++ /dev/null
@@ -1,209 +0,0 @@
-mod classes;
-mod comments;
-mod numbers;
-mod ptr;
-mod strings;
-use {
-    SyntaxKind::{self, *},
-    TextUnit,
-};
-use self::{
-    classes::*,
-    comments::{scan_comment, scan_shebang},
-    numbers::scan_number,
-    ptr::Ptr,
-    strings::{
-        is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, scan_string,
-    },
-};
-/// A token of Rust source.
-#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct Token {
-    /// The kind of token.
-    pub kind: SyntaxKind,
-    /// The length of the token.
-    pub len: TextUnit,
-}
-/// Break a string up into its component tokens
-pub fn tokenize(text: &str) -> Vec<Token> {
-    let mut text = text;
-    let mut acc = Vec::new();
-    while !text.is_empty() {
-        let token = next_token(text);
-        acc.push(token);
-        let len: u32 = token.len.into();
-        text = &text[len as usize..];
-    }
-    acc
-}
-/// Get the next token from a string
-pub fn next_token(text: &str) -> Token {
-    assert!(!text.is_empty());
-    let mut ptr = Ptr::new(text);
-    let c = ptr.bump().unwrap();
-    let kind = next_token_inner(c, &mut ptr);
-    let len = ptr.into_len();
-    Token { kind, len }
-}
-fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
-    if is_whitespace(c) {
-        ptr.bump_while(is_whitespace);
-        return WHITESPACE;
-    }
-    match c {
-        '#' => if scan_shebang(ptr) {
-            return SHEBANG;
-        },
-        '/' => if let Some(kind) = scan_comment(ptr) {
-            return kind;
-        },
-        _ => (),
-    }
-    let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext());
-    if ident_start {
-        return scan_ident(c, ptr);
-    }
-    if is_dec_digit(c) {
-        let kind = scan_number(c, ptr);
-        scan_literal_suffix(ptr);
-        return kind;
-    }
-    // One-byte tokens.
-    if let Some(kind) = SyntaxKind::from_char(c) {
-        return kind;
-    }
-    match c {
-        // Multi-byte tokens.
-        '.' => {
-            return match (ptr.next(), ptr.nnext()) {
-                (Some('.'), Some('.')) => {
-                    ptr.bump();
-                    ptr.bump();
-                    DOTDOTDOT
-                }
-                (Some('.'), Some('=')) => {
-                    ptr.bump();
-                    ptr.bump();
-                    DOTDOTEQ
-                }
-                (Some('.'), _) => {
-                    ptr.bump();
-                    DOTDOT
-                }
-                _ => DOT,
-            };
-        }
-        ':' => {
-            return match ptr.next() {
-                Some(':') => {
-                    ptr.bump();
-                    COLONCOLON
-                }
-                _ => COLON,
-            };
-        }
-        '=' => {
-            return match ptr.next() {
-                Some('=') => {
-                    ptr.bump();
-                    EQEQ
-                }
-                Some('>') => {
-                    ptr.bump();
-                    FAT_ARROW
-                }
-                _ => EQ,
-            };
-        }
-        '!' => {
-            return match ptr.next() {
-                Some('=') => {
-                    ptr.bump();
-                    NEQ
-                }
-                _ => EXCL,
-            };
-        }
-        '-' => {
-            return if ptr.next_is('>') {
-                ptr.bump();
-                THIN_ARROW
-            } else {
-                MINUS
-            };
-        }
-        // If the character is an ident start not followed by another single
-        // quote, then this is a lifetime name:
-        '\'' => {
-            return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') {
-                ptr.bump();
-                while ptr.next_is_p(is_ident_continue) {
-                    ptr.bump();
-                }
-                // lifetimes shouldn't end with a single quote
-                // if we find one, then this is an invalid character literal
-                if ptr.next_is('\'') {
-                    ptr.bump();
-                    return CHAR; // TODO: error reporting
-                }
-                LIFETIME
-            } else {
-                scan_char(ptr);
-                scan_literal_suffix(ptr);
-                CHAR
-            };
-        }
-        'b' => {
-            let kind = scan_byte_char_or_string(ptr);
-            scan_literal_suffix(ptr);
-            return kind;
-        }
-        '"' => {
-            scan_string(ptr);
-            scan_literal_suffix(ptr);
-            return STRING;
-        }
-        'r' => {
-            scan_raw_string(ptr);
-            scan_literal_suffix(ptr);
-            return RAW_STRING;
-        }
-        _ => (),
-    }
-    ERROR
-}
-fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
-    let is_single_letter = match ptr.next() {
-        None => true,
-        Some(c) if !is_ident_continue(c) => true,
-        _ => false,
-    };
-    if is_single_letter {
-        return if c == '_' { UNDERSCORE } else { IDENT };
-    }
-    ptr.bump_while(is_ident_continue);
-    if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) {
-        return kind;
-    }
-    IDENT
-}
-fn scan_literal_suffix(ptr: &mut Ptr) {
-    if ptr.next_is_p(is_ident_start) {
-        ptr.bump();
-    }
-    ptr.bump_while(is_ident_continue);
-}
diff --git a/src/lexer/numbers.rs b/src/lexer/numbers.rs
deleted file mode 100644
index 5c4641a2d..000000000
--- a/src/lexer/numbers.rs
+++ /dev/null
@@ -1,67 +0,0 @@
-use lexer::classes::*;
-use lexer::ptr::Ptr;
-use SyntaxKind::{self, *};
-pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
-    if c == '0' {
-        match ptr.next().unwrap_or('\0') {
-            'b' | 'o' => {
-                ptr.bump();
-                scan_digits(ptr, false);
-            }
-            'x' => {
-                ptr.bump();
-                scan_digits(ptr, true);
-            }
-            '0'...'9' | '_' | '.' | 'e' | 'E' => {
-                scan_digits(ptr, true);
-            }
-            _ => return INT_NUMBER,
-        }
-    } else {
-        scan_digits(ptr, false);
-    }
-    // might be a float, but don't be greedy if this is actually an
-    // integer literal followed by field/method access or a range pattern
-    // (`0..2` and `12.foo()`)
-    if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) {
-        // might have stuff after the ., and if it does, it needs to start
-        // with a number
-        ptr.bump();
-        scan_digits(ptr, false);
-        scan_float_exponent(ptr);
-        return FLOAT_NUMBER;
-    }
-    // it might be a float if it has an exponent
-    if ptr.next_is('e') || ptr.next_is('E') {
-        scan_float_exponent(ptr);
-        return FLOAT_NUMBER;
-    }
-    INT_NUMBER
-}
-fn scan_digits(ptr: &mut Ptr, allow_hex: bool) {
-    while let Some(c) = ptr.next() {
-        match c {
-            '_' | '0'...'9' => {
-                ptr.bump();
-            }
-            'a'...'f' | 'A'...'F' if allow_hex => {
-                ptr.bump();
-            }
-            _ => return,
-        }
-    }
-}
-fn scan_float_exponent(ptr: &mut Ptr) {
-    if ptr.next_is('e') || ptr.next_is('E') {
-        ptr.bump();
-        if ptr.next_is('-') || ptr.next_is('+') {
-            ptr.bump();
-        }
-        scan_digits(ptr, false);
-    }
-}
diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs
deleted file mode 100644
index d1391fd5f..000000000
--- a/src/lexer/ptr.rs
+++ /dev/null
@@ -1,74 +0,0 @@
-use TextUnit;
-use std::str::Chars;
-pub(crate) struct Ptr<'s> {
-    text: &'s str,
-    len: TextUnit,
-}
-impl<'s> Ptr<'s> {
-    pub fn new(text: &'s str) -> Ptr<'s> {
-        Ptr {
-            text,
-            len: 0.into(),
-        }
-    }
-    pub fn into_len(self) -> TextUnit {
-        self.len
-    }
-    pub fn next(&self) -> Option<char> {
-        self.chars().next()
-    }
-    pub fn nnext(&self) -> Option<char> {
-        let mut chars = self.chars();
-        chars.next()?;
-        chars.next()
-    }
-    pub fn next_is(&self, c: char) -> bool {
-        self.next() == Some(c)
-    }
-    pub fn nnext_is(&self, c: char) -> bool {
-        self.nnext() == Some(c)
-    }
-    pub fn next_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
-        self.next().map(p) == Some(true)
-    }
-    pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
-        self.nnext().map(p) == Some(true)
-    }
-    pub fn bump(&mut self) -> Option<char> {
-        let ch = self.chars().next()?;
-        self.len += TextUnit::of_char(ch);
-        Some(ch)
-    }
-    pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) {
-        loop {
-            match self.next() {
-                Some(c) if pred(c) => {
-                    self.bump();
-                }
-                _ => return,
-            }
-        }
-    }
-    pub fn current_token_text(&self) -> &str {
-        let len: u32 = self.len.into();
-        &self.text[..len as usize]
-    }
-    fn chars(&self) -> Chars {
-        let len: u32 = self.len.into();
-        self.text[len as usize..].chars()
-    }
-}
diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs
deleted file mode 100644
index e3704fbb3..000000000
--- a/src/lexer/strings.rs
+++ /dev/null
@@ -1,106 +0,0 @@
-use SyntaxKind::{self, *};
-use lexer::ptr::Ptr;
-pub(crate) fn is_string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
-    match (c, c1, c2) {
-        ('r', Some('"'), _)
-        | ('r', Some('#'), _)
-        | ('b', Some('"'), _)
-        | ('b', Some('\''), _)
-        | ('b', Some('r'), Some('"'))
-        | ('b', Some('r'), Some('#')) => true,
-        _ => false,
-    }
-}
-pub(crate) fn scan_char(ptr: &mut Ptr) {
-    if ptr.bump().is_none() {
-        return; // TODO: error reporting is upper in the stack
-    }
-    scan_char_or_byte(ptr);
-    if !ptr.next_is('\'') {
-        return; // TODO: error reporting
-    }
-    ptr.bump();
-}
-pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
-    // unwrapping and not-exhaustive match are ok
-    // because of string_literal_start
-    let c = ptr.bump().unwrap();
-    match c {
-        '\'' => {
-            scan_byte(ptr);
-            BYTE
-        }
-        '"' => {
-            scan_byte_string(ptr);
-            BYTE_STRING
-        }
-        'r' => {
-            scan_raw_byte_string(ptr);
-            RAW_BYTE_STRING
-        }
-        _ => unreachable!(),
-    }
-}
-pub(crate) fn scan_string(ptr: &mut Ptr) {
-    while let Some(c) = ptr.bump() {
-        if c == '"' {
-            return;
-        }
-    }
-}
-pub(crate) fn scan_raw_string(ptr: &mut Ptr) {
-    if !ptr.next_is('"') {
-        return;
-    }
-    ptr.bump();
-    while let Some(c) = ptr.bump() {
-        if c == '"' {
-            return;
-        }
-    }
-}
-fn scan_byte(ptr: &mut Ptr) {
-    if ptr.next_is('\'') {
-        ptr.bump();
-        return;
-    }
-    ptr.bump();
-    if ptr.next_is('\'') {
-        ptr.bump();
-        return;
-    }
-}
-fn scan_byte_string(ptr: &mut Ptr) {
-    while let Some(c) = ptr.bump() {
-        if c == '"' {
-            return;
-        }
-    }
-}
-fn scan_raw_byte_string(ptr: &mut Ptr) {
-    if !ptr.next_is('"') {
-        return;
-    }
-    ptr.bump();
-    while let Some(c) = ptr.bump() {
-        if c == '"' {
-            return;
-        }
-    }
-}
-fn scan_char_or_byte(ptr: &mut Ptr) {
-    //FIXME: deal with escape sequencies
-    ptr.bump();
-}
author	Aleksey Kladov <[email protected]>	2018-08-10 20:33:29 +0100
committer	Aleksey Kladov <[email protected]>	2018-08-10 20:33:29 +0100
commit	7c67612b8a894187fa3b64725531a5459f9211bf (patch)
tree	9e2a536efa0c880d921fd8d4d74423afc9451fd4 /src/lexer
parent	26262aaf05983c5b7f41cc438e287523268fe1eb (diff)

diff --git a/src/lexer/classes.rs b/src/lexer/classes.rs deleted file mode 100644 index 4235d2648..000000000 --- a/src/lexer/classes.rs +++ /dev/null
@@ -1,26 +0,0 @@
1	use unicode_xid::UnicodeXID;
2
3	pub fn is_ident_start(c: char) -> bool {
4	(c >= 'a' && c <= 'z')
5	\|\| (c >= 'A' && c <= 'Z')
6	\|\| c == '_'
7	\|\| (c > '\x7f' && UnicodeXID::is_xid_start(c))
8	}
9
10	pub fn is_ident_continue(c: char) -> bool {
11	(c >= 'a' && c <= 'z')
12	\|\| (c >= 'A' && c <= 'Z')
13	\|\| (c >= '0' && c <= '9')
14	\|\| c == '_'
15	\|\| (c > '\x7f' && UnicodeXID::is_xid_continue(c))
16	}
17
18	pub fn is_whitespace(c: char) -> bool {
19	//FIXME: use is_pattern_whitespace
20	//https://github.com/behnam/rust-unic/issues/192
21	c.is_whitespace()
22	}
23
24	pub fn is_dec_digit(c: char) -> bool {
25	'0' <= c && c <= '9'
26	}


diff --git a/src/lexer/comments.rs b/src/lexer/comments.rs deleted file mode 100644 index 01acb6515..000000000 --- a/src/lexer/comments.rs +++ /dev/null
@@ -1,57 +0,0 @@
1	use lexer::ptr::Ptr;
2
3	use SyntaxKind::{self, *};
4
5	pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool {
6	if ptr.next_is('!') && ptr.nnext_is('/') {
7	ptr.bump();
8	ptr.bump();
9	bump_until_eol(ptr);
10	true
11	} else {
12	false
13	}
14	}
15
16	fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
17	if ptr.next_is('*') {
18	ptr.bump();
19	let mut depth: u32 = 1;
20	while depth > 0 {
21	if ptr.next_is('*') && ptr.nnext_is('/') {
22	depth -= 1;
23	ptr.bump();
24	ptr.bump();
25	} else if ptr.next_is('/') && ptr.nnext_is('*') {
26	depth += 1;
27	ptr.bump();
28	ptr.bump();
29	} else if ptr.bump().is_none() {
30	break;
31	}
32	}
33	Some(COMMENT)
34	} else {
35	None
36	}
37	}
38
39	pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
40	if ptr.next_is('/') {
41	bump_until_eol(ptr);
42	Some(COMMENT)
43	} else {
44	scan_block_comment(ptr)
45	}
46	}
47
48	fn bump_until_eol(ptr: &mut Ptr) {
49	loop {
50	if ptr.next_is('\n') \|\| ptr.next_is('\r') && ptr.nnext_is('\n') {
51	return;
52	}
53	if ptr.bump().is_none() {
54	break;
55	}
56	}
57	}


diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs deleted file mode 100644 index f8fdc41ac..000000000 --- a/src/lexer/mod.rs +++ /dev/null
@@ -1,209 +0,0 @@
1	mod classes;
2	mod comments;
3	mod numbers;
4	mod ptr;
5	mod strings;
6
7	use {
8	SyntaxKind::{self, *},
9	TextUnit,
10	};
11
12	use self::{
13	classes::*,
14	comments::{scan_comment, scan_shebang},
15	numbers::scan_number,
16	ptr::Ptr,
17	strings::{
18	is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, scan_string,
19	},
20	};
21
22	/// A token of Rust source.
23	#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
24	pub struct Token {
25	/// The kind of token.
26	pub kind: SyntaxKind,
27	/// The length of the token.
28	pub len: TextUnit,
29	}
30
31	/// Break a string up into its component tokens
32	pub fn tokenize(text: &str) -> Vec<Token> {
33	let mut text = text;
34	let mut acc = Vec::new();
35	while !text.is_empty() {
36	let token = next_token(text);
37	acc.push(token);
38	let len: u32 = token.len.into();
39	text = &text[len as usize..];
40	}
41	acc
42	}
43
44	/// Get the next token from a string
45	pub fn next_token(text: &str) -> Token {
46	assert!(!text.is_empty());
47	let mut ptr = Ptr::new(text);
48	let c = ptr.bump().unwrap();
49	let kind = next_token_inner(c, &mut ptr);
50	let len = ptr.into_len();
51	Token { kind, len }
52	}
53
54	fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
55	if is_whitespace(c) {
56	ptr.bump_while(is_whitespace);
57	return WHITESPACE;
58	}
59
60	match c {
61	'#' => if scan_shebang(ptr) {
62	return SHEBANG;
63	},
64	'/' => if let Some(kind) = scan_comment(ptr) {
65	return kind;
66	},
67	_ => (),
68	}
69
70	let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext());
71	if ident_start {
72	return scan_ident(c, ptr);
73	}
74
75	if is_dec_digit(c) {
76	let kind = scan_number(c, ptr);
77	scan_literal_suffix(ptr);
78	return kind;
79	}
80
81	// One-byte tokens.
82	if let Some(kind) = SyntaxKind::from_char(c) {
83	return kind;
84	}
85
86	match c {
87	// Multi-byte tokens.
88	'.' => {
89	return match (ptr.next(), ptr.nnext()) {
90	(Some('.'), Some('.')) => {
91	ptr.bump();
92	ptr.bump();
93	DOTDOTDOT
94	}
95	(Some('.'), Some('=')) => {
96	ptr.bump();
97	ptr.bump();
98	DOTDOTEQ
99	}
100	(Some('.'), _) => {
101	ptr.bump();
102	DOTDOT
103	}
104	_ => DOT,
105	};
106	}
107	':' => {
108	return match ptr.next() {
109	Some(':') => {
110	ptr.bump();
111	COLONCOLON
112	}
113	_ => COLON,
114	};
115	}
116	'=' => {
117	return match ptr.next() {
118	Some('=') => {
119	ptr.bump();
120	EQEQ
121	}
122	Some('>') => {
123	ptr.bump();
124	FAT_ARROW
125	}
126	_ => EQ,
127	};
128	}
129	'!' => {
130	return match ptr.next() {
131	Some('=') => {
132	ptr.bump();
133	NEQ
134	}
135	_ => EXCL,
136	};
137	}
138	'-' => {
139	return if ptr.next_is('>') {
140	ptr.bump();
141	THIN_ARROW
142	} else {
143	MINUS
144	};
145	}
146
147	// If the character is an ident start not followed by another single
148	// quote, then this is a lifetime name:
149	'\'' => {
150	return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') {
151	ptr.bump();
152	while ptr.next_is_p(is_ident_continue) {
153	ptr.bump();
154	}
155	// lifetimes shouldn't end with a single quote
156	// if we find one, then this is an invalid character literal
157	if ptr.next_is('\'') {
158	ptr.bump();
159	return CHAR; // TODO: error reporting
160	}
161	LIFETIME
162	} else {
163	scan_char(ptr);
164	scan_literal_suffix(ptr);
165	CHAR
166	};
167	}
168	'b' => {
169	let kind = scan_byte_char_or_string(ptr);
170	scan_literal_suffix(ptr);
171	return kind;
172	}
173	'"' => {
174	scan_string(ptr);
175	scan_literal_suffix(ptr);
176	return STRING;
177	}
178	'r' => {
179	scan_raw_string(ptr);
180	scan_literal_suffix(ptr);
181	return RAW_STRING;
182	}
183	_ => (),
184	}
185	ERROR
186	}
187
188	fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
189	let is_single_letter = match ptr.next() {
190	None => true,
191	Some(c) if !is_ident_continue(c) => true,
192	_ => false,
193	};
194	if is_single_letter {
195	return if c == '_' { UNDERSCORE } else { IDENT };
196	}
197	ptr.bump_while(is_ident_continue);
198	if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) {
199	return kind;
200	}
201	IDENT
202	}
203
204	fn scan_literal_suffix(ptr: &mut Ptr) {
205	if ptr.next_is_p(is_ident_start) {
206	ptr.bump();
207	}
208	ptr.bump_while(is_ident_continue);
209	}


diff --git a/src/lexer/numbers.rs b/src/lexer/numbers.rs deleted file mode 100644 index 5c4641a2d..000000000 --- a/src/lexer/numbers.rs +++ /dev/null
@@ -1,67 +0,0 @@
1	use lexer::classes::*;
2	use lexer::ptr::Ptr;
3
4	use SyntaxKind::{self, *};
5
6	pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
7	if c == '0' {
8	match ptr.next().unwrap_or('\0') {
9	'b' \| 'o' => {
10	ptr.bump();
11	scan_digits(ptr, false);
12	}
13	'x' => {
14	ptr.bump();
15	scan_digits(ptr, true);
16	}
17	'0'...'9' \| '_' \| '.' \| 'e' \| 'E' => {
18	scan_digits(ptr, true);
19	}
20	_ => return INT_NUMBER,
21	}
22	} else {
23	scan_digits(ptr, false);
24	}
25
26	// might be a float, but don't be greedy if this is actually an
27	// integer literal followed by field/method access or a range pattern
28	// (`0..2` and `12.foo()`)
29	if ptr.next_is('.') && !(ptr.nnext_is('.') \|\| ptr.nnext_is_p(is_ident_start)) {
30	// might have stuff after the ., and if it does, it needs to start
31	// with a number
32	ptr.bump();
33	scan_digits(ptr, false);
34	scan_float_exponent(ptr);
35	return FLOAT_NUMBER;
36	}
37	// it might be a float if it has an exponent
38	if ptr.next_is('e') \|\| ptr.next_is('E') {
39	scan_float_exponent(ptr);
40	return FLOAT_NUMBER;
41	}
42	INT_NUMBER
43	}
44
45	fn scan_digits(ptr: &mut Ptr, allow_hex: bool) {
46	while let Some(c) = ptr.next() {
47	match c {
48	'_' \| '0'...'9' => {
49	ptr.bump();
50	}
51	'a'...'f' \| 'A'...'F' if allow_hex => {
52	ptr.bump();
53	}
54	_ => return,
55	}
56	}
57	}
58
59	fn scan_float_exponent(ptr: &mut Ptr) {
60	if ptr.next_is('e') \|\| ptr.next_is('E') {
61	ptr.bump();
62	if ptr.next_is('-') \|\| ptr.next_is('+') {
63	ptr.bump();
64	}
65	scan_digits(ptr, false);
66	}
67	}


diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs deleted file mode 100644 index d1391fd5f..000000000 --- a/src/lexer/ptr.rs +++ /dev/null
@@ -1,74 +0,0 @@
1	use TextUnit;
2
3	use std::str::Chars;
4
5	pub(crate) struct Ptr<'s> {
6	text: &'s str,
7	len: TextUnit,
8	}
9
10	impl<'s> Ptr<'s> {
11	pub fn new(text: &'s str) -> Ptr<'s> {
12	Ptr {
13	text,
14	len: 0.into(),
15	}
16	}
17
18	pub fn into_len(self) -> TextUnit {
19	self.len
20	}
21
22	pub fn next(&self) -> Option<char> {
23	self.chars().next()
24	}
25
26	pub fn nnext(&self) -> Option<char> {
27	let mut chars = self.chars();
28	chars.next()?;
29	chars.next()
30	}
31
32	pub fn next_is(&self, c: char) -> bool {
33	self.next() == Some(c)
34	}
35
36	pub fn nnext_is(&self, c: char) -> bool {
37	self.nnext() == Some(c)
38	}
39
40	pub fn next_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
41	self.next().map(p) == Some(true)
42	}
43
44	pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
45	self.nnext().map(p) == Some(true)
46	}
47
48	pub fn bump(&mut self) -> Option<char> {
49	let ch = self.chars().next()?;
50	self.len += TextUnit::of_char(ch);
51	Some(ch)
52	}
53
54	pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) {
55	loop {
56	match self.next() {
57	Some(c) if pred(c) => {
58	self.bump();
59	}
60	_ => return,
61	}
62	}
63	}
64
65	pub fn current_token_text(&self) -> &str {
66	let len: u32 = self.len.into();
67	&self.text[..len as usize]
68	}
69
70	fn chars(&self) -> Chars {
71	let len: u32 = self.len.into();
72	self.text[len as usize..].chars()
73	}
74	}


diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs deleted file mode 100644 index e3704fbb3..000000000 --- a/src/lexer/strings.rs +++ /dev/null
@@ -1,106 +0,0 @@
1	use SyntaxKind::{self, *};
2
3	use lexer::ptr::Ptr;
4
5	pub(crate) fn is_string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
6	match (c, c1, c2) {
7	('r', Some('"'), _)
8	\| ('r', Some('#'), _)
9	\| ('b', Some('"'), _)
10	\| ('b', Some('\''), _)
11	\| ('b', Some('r'), Some('"'))
12	\| ('b', Some('r'), Some('#')) => true,
13	_ => false,
14	}
15	}
16
17	pub(crate) fn scan_char(ptr: &mut Ptr) {
18	if ptr.bump().is_none() {
19	return; // TODO: error reporting is upper in the stack
20	}
21	scan_char_or_byte(ptr);
22	if !ptr.next_is('\'') {
23	return; // TODO: error reporting
24	}
25	ptr.bump();
26	}
27
28	pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
29	// unwrapping and not-exhaustive match are ok
30	// because of string_literal_start
31	let c = ptr.bump().unwrap();
32	match c {
33	'\'' => {
34	scan_byte(ptr);
35	BYTE
36	}
37	'"' => {
38	scan_byte_string(ptr);
39	BYTE_STRING
40	}
41	'r' => {
42	scan_raw_byte_string(ptr);
43	RAW_BYTE_STRING
44	}
45	_ => unreachable!(),
46	}
47	}
48
49	pub(crate) fn scan_string(ptr: &mut Ptr) {
50	while let Some(c) = ptr.bump() {
51	if c == '"' {
52	return;
53	}
54	}
55	}
56
57	pub(crate) fn scan_raw_string(ptr: &mut Ptr) {
58	if !ptr.next_is('"') {
59	return;
60	}
61	ptr.bump();
62
63	while let Some(c) = ptr.bump() {
64	if c == '"' {
65	return;
66	}
67	}
68	}
69
70	fn scan_byte(ptr: &mut Ptr) {
71	if ptr.next_is('\'') {
72	ptr.bump();
73	return;
74	}
75	ptr.bump();
76	if ptr.next_is('\'') {
77	ptr.bump();
78	return;
79	}
80	}
81
82	fn scan_byte_string(ptr: &mut Ptr) {
83	while let Some(c) = ptr.bump() {
84	if c == '"' {
85	return;
86	}
87	}
88	}
89
90	fn scan_raw_byte_string(ptr: &mut Ptr) {
91	if !ptr.next_is('"') {
92	return;
93	}
94	ptr.bump();
95
96	while let Some(c) = ptr.bump() {
97	if c == '"' {
98	return;
99	}
100	}
101	}
102
103	fn scan_char_or_byte(ptr: &mut Ptr) {
104	//FIXME: deal with escape sequencies
105	ptr.bump();
106	}