diff options
Diffstat (limited to 'src/lexer/mod.rs')
-rw-r--r-- | src/lexer/mod.rs | 50 |
1 files changed, 31 insertions, 19 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 65a994327..69cab5b57 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs | |||
@@ -1,21 +1,32 @@ | |||
1 | use {SyntaxKind, Token}; | ||
2 | use syntax_kinds::*; | ||
3 | |||
4 | mod ptr; | 1 | mod ptr; |
5 | use self::ptr::Ptr; | 2 | mod comments; |
6 | 3 | mod strings; | |
4 | mod numbers; | ||
7 | mod classes; | 5 | mod classes; |
8 | use self::classes::*; | ||
9 | 6 | ||
10 | mod numbers; | 7 | use { |
11 | use self::numbers::scan_number; | 8 | TextUnit, |
9 | SyntaxKind::{self, *}, | ||
10 | }; | ||
12 | 11 | ||
13 | mod strings; | 12 | use self::{ |
14 | use self::strings::{is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, | 13 | ptr::Ptr, |
15 | scan_string}; | 14 | classes::*, |
15 | numbers::scan_number, | ||
16 | strings::{ | ||
17 | is_string_literal_start, scan_byte_char_or_string, scan_char, | ||
18 | scan_raw_string, scan_string}, | ||
19 | comments::{scan_comment, scan_shebang}, | ||
20 | }; | ||
16 | 21 | ||
17 | mod comments; | 22 | /// A token of Rust source. |
18 | use self::comments::{scan_comment, scan_shebang}; | 23 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
24 | pub struct Token { | ||
25 | /// The kind of token. | ||
26 | pub kind: SyntaxKind, | ||
27 | /// The length of the token. | ||
28 | pub len: TextUnit, | ||
29 | } | ||
19 | 30 | ||
20 | /// Break a string up into its component tokens | 31 | /// Break a string up into its component tokens |
21 | pub fn tokenize(text: &str) -> Vec<Token> { | 32 | pub fn tokenize(text: &str) -> Vec<Token> { |
@@ -29,6 +40,7 @@ pub fn tokenize(text: &str) -> Vec<Token> { | |||
29 | } | 40 | } |
30 | acc | 41 | acc |
31 | } | 42 | } |
43 | |||
32 | /// Get the next token from a string | 44 | /// Get the next token from a string |
33 | pub fn next_token(text: &str) -> Token { | 45 | pub fn next_token(text: &str) -> Token { |
34 | assert!(!text.is_empty()); | 46 | assert!(!text.is_empty()); |
@@ -109,7 +121,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
109 | DOTDOT | 121 | DOTDOT |
110 | } | 122 | } |
111 | _ => DOT, | 123 | _ => DOT, |
112 | } | 124 | }; |
113 | } | 125 | } |
114 | ':' => { | 126 | ':' => { |
115 | return match ptr.next() { | 127 | return match ptr.next() { |
@@ -118,7 +130,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
118 | COLONCOLON | 130 | COLONCOLON |
119 | } | 131 | } |
120 | _ => COLON, | 132 | _ => COLON, |
121 | } | 133 | }; |
122 | } | 134 | } |
123 | '=' => { | 135 | '=' => { |
124 | return match ptr.next() { | 136 | return match ptr.next() { |
@@ -131,7 +143,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
131 | FAT_ARROW | 143 | FAT_ARROW |
132 | } | 144 | } |
133 | _ => EQ, | 145 | _ => EQ, |
134 | } | 146 | }; |
135 | } | 147 | } |
136 | '!' => { | 148 | '!' => { |
137 | return match ptr.next() { | 149 | return match ptr.next() { |
@@ -140,7 +152,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
140 | NEQ | 152 | NEQ |
141 | } | 153 | } |
142 | _ => EXCL, | 154 | _ => EXCL, |
143 | } | 155 | }; |
144 | } | 156 | } |
145 | '-' => { | 157 | '-' => { |
146 | return if ptr.next_is('>') { | 158 | return if ptr.next_is('>') { |
@@ -148,7 +160,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
148 | THIN_ARROW | 160 | THIN_ARROW |
149 | } else { | 161 | } else { |
150 | MINUS | 162 | MINUS |
151 | } | 163 | }; |
152 | } | 164 | } |
153 | 165 | ||
154 | // If the character is an ident start not followed by another single | 166 | // If the character is an ident start not followed by another single |
@@ -202,7 +214,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
202 | return if c == '_' { UNDERSCORE } else { IDENT }; | 214 | return if c == '_' { UNDERSCORE } else { IDENT }; |
203 | } | 215 | } |
204 | ptr.bump_while(is_ident_continue); | 216 | ptr.bump_while(is_ident_continue); |
205 | if let Some(kind) = ident_to_keyword(ptr.current_token_text()) { | 217 | if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) { |
206 | return kind; | 218 | return kind; |
207 | } | 219 | } |
208 | IDENT | 220 | IDENT |