aboutsummaryrefslogtreecommitdiff
path: root/src/lexer/mod.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lexer/mod.rs')
-rw-r--r--src/lexer/mod.rs209
1 files changed, 0 insertions, 209 deletions
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
deleted file mode 100644
index f8fdc41ac..000000000
--- a/src/lexer/mod.rs
+++ /dev/null
@@ -1,209 +0,0 @@
1mod classes;
2mod comments;
3mod numbers;
4mod ptr;
5mod strings;
6
7use {
8 SyntaxKind::{self, *},
9 TextUnit,
10};
11
12use self::{
13 classes::*,
14 comments::{scan_comment, scan_shebang},
15 numbers::scan_number,
16 ptr::Ptr,
17 strings::{
18 is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, scan_string,
19 },
20};
21
22/// A token of Rust source.
23#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
24pub struct Token {
25 /// The kind of token.
26 pub kind: SyntaxKind,
27 /// The length of the token.
28 pub len: TextUnit,
29}
30
31/// Break a string up into its component tokens
32pub fn tokenize(text: &str) -> Vec<Token> {
33 let mut text = text;
34 let mut acc = Vec::new();
35 while !text.is_empty() {
36 let token = next_token(text);
37 acc.push(token);
38 let len: u32 = token.len.into();
39 text = &text[len as usize..];
40 }
41 acc
42}
43
44/// Get the next token from a string
45pub fn next_token(text: &str) -> Token {
46 assert!(!text.is_empty());
47 let mut ptr = Ptr::new(text);
48 let c = ptr.bump().unwrap();
49 let kind = next_token_inner(c, &mut ptr);
50 let len = ptr.into_len();
51 Token { kind, len }
52}
53
54fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
55 if is_whitespace(c) {
56 ptr.bump_while(is_whitespace);
57 return WHITESPACE;
58 }
59
60 match c {
61 '#' => if scan_shebang(ptr) {
62 return SHEBANG;
63 },
64 '/' => if let Some(kind) = scan_comment(ptr) {
65 return kind;
66 },
67 _ => (),
68 }
69
70 let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext());
71 if ident_start {
72 return scan_ident(c, ptr);
73 }
74
75 if is_dec_digit(c) {
76 let kind = scan_number(c, ptr);
77 scan_literal_suffix(ptr);
78 return kind;
79 }
80
81 // One-byte tokens.
82 if let Some(kind) = SyntaxKind::from_char(c) {
83 return kind;
84 }
85
86 match c {
87 // Multi-byte tokens.
88 '.' => {
89 return match (ptr.next(), ptr.nnext()) {
90 (Some('.'), Some('.')) => {
91 ptr.bump();
92 ptr.bump();
93 DOTDOTDOT
94 }
95 (Some('.'), Some('=')) => {
96 ptr.bump();
97 ptr.bump();
98 DOTDOTEQ
99 }
100 (Some('.'), _) => {
101 ptr.bump();
102 DOTDOT
103 }
104 _ => DOT,
105 };
106 }
107 ':' => {
108 return match ptr.next() {
109 Some(':') => {
110 ptr.bump();
111 COLONCOLON
112 }
113 _ => COLON,
114 };
115 }
116 '=' => {
117 return match ptr.next() {
118 Some('=') => {
119 ptr.bump();
120 EQEQ
121 }
122 Some('>') => {
123 ptr.bump();
124 FAT_ARROW
125 }
126 _ => EQ,
127 };
128 }
129 '!' => {
130 return match ptr.next() {
131 Some('=') => {
132 ptr.bump();
133 NEQ
134 }
135 _ => EXCL,
136 };
137 }
138 '-' => {
139 return if ptr.next_is('>') {
140 ptr.bump();
141 THIN_ARROW
142 } else {
143 MINUS
144 };
145 }
146
147 // If the character is an ident start not followed by another single
148 // quote, then this is a lifetime name:
149 '\'' => {
150 return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') {
151 ptr.bump();
152 while ptr.next_is_p(is_ident_continue) {
153 ptr.bump();
154 }
155 // lifetimes shouldn't end with a single quote
156 // if we find one, then this is an invalid character literal
157 if ptr.next_is('\'') {
158 ptr.bump();
159 return CHAR; // TODO: error reporting
160 }
161 LIFETIME
162 } else {
163 scan_char(ptr);
164 scan_literal_suffix(ptr);
165 CHAR
166 };
167 }
168 'b' => {
169 let kind = scan_byte_char_or_string(ptr);
170 scan_literal_suffix(ptr);
171 return kind;
172 }
173 '"' => {
174 scan_string(ptr);
175 scan_literal_suffix(ptr);
176 return STRING;
177 }
178 'r' => {
179 scan_raw_string(ptr);
180 scan_literal_suffix(ptr);
181 return RAW_STRING;
182 }
183 _ => (),
184 }
185 ERROR
186}
187
188fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
189 let is_single_letter = match ptr.next() {
190 None => true,
191 Some(c) if !is_ident_continue(c) => true,
192 _ => false,
193 };
194 if is_single_letter {
195 return if c == '_' { UNDERSCORE } else { IDENT };
196 }
197 ptr.bump_while(is_ident_continue);
198 if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) {
199 return kind;
200 }
201 IDENT
202}
203
204fn scan_literal_suffix(ptr: &mut Ptr) {
205 if ptr.next_is_p(is_ident_start) {
206 ptr.bump();
207 }
208 ptr.bump_while(is_ident_continue);
209}