aboutsummaryrefslogtreecommitdiff
path: root/crates/libsyntax2/src/lexer
diff options
context:
space:
mode:
Diffstat (limited to 'crates/libsyntax2/src/lexer')
-rw-r--r--crates/libsyntax2/src/lexer/comments.rs12
-rw-r--r--crates/libsyntax2/src/lexer/mod.rs22
-rw-r--r--crates/libsyntax2/src/lexer/numbers.rs12
-rw-r--r--crates/libsyntax2/src/lexer/ptr.rs120
-rw-r--r--crates/libsyntax2/src/lexer/strings.rs16
5 files changed, 137 insertions, 45 deletions
diff --git a/crates/libsyntax2/src/lexer/comments.rs b/crates/libsyntax2/src/lexer/comments.rs
index 01acb6515..eb417c2dc 100644
--- a/crates/libsyntax2/src/lexer/comments.rs
+++ b/crates/libsyntax2/src/lexer/comments.rs
@@ -3,7 +3,7 @@ use lexer::ptr::Ptr;
3use SyntaxKind::{self, *}; 3use SyntaxKind::{self, *};
4 4
5pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { 5pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool {
6 if ptr.next_is('!') && ptr.nnext_is('/') { 6 if ptr.at_str("!/") {
7 ptr.bump(); 7 ptr.bump();
8 ptr.bump(); 8 ptr.bump();
9 bump_until_eol(ptr); 9 bump_until_eol(ptr);
@@ -14,15 +14,15 @@ pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool {
14} 14}
15 15
16fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { 16fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
17 if ptr.next_is('*') { 17 if ptr.at('*') {
18 ptr.bump(); 18 ptr.bump();
19 let mut depth: u32 = 1; 19 let mut depth: u32 = 1;
20 while depth > 0 { 20 while depth > 0 {
21 if ptr.next_is('*') && ptr.nnext_is('/') { 21 if ptr.at_str("*/") {
22 depth -= 1; 22 depth -= 1;
23 ptr.bump(); 23 ptr.bump();
24 ptr.bump(); 24 ptr.bump();
25 } else if ptr.next_is('/') && ptr.nnext_is('*') { 25 } else if ptr.at_str("/*") {
26 depth += 1; 26 depth += 1;
27 ptr.bump(); 27 ptr.bump();
28 ptr.bump(); 28 ptr.bump();
@@ -37,7 +37,7 @@ fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
37} 37}
38 38
39pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { 39pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
40 if ptr.next_is('/') { 40 if ptr.at('/') {
41 bump_until_eol(ptr); 41 bump_until_eol(ptr);
42 Some(COMMENT) 42 Some(COMMENT)
43 } else { 43 } else {
@@ -47,7 +47,7 @@ pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
47 47
48fn bump_until_eol(ptr: &mut Ptr) { 48fn bump_until_eol(ptr: &mut Ptr) {
49 loop { 49 loop {
50 if ptr.next_is('\n') || ptr.next_is('\r') && ptr.nnext_is('\n') { 50 if ptr.at('\n') || ptr.at_str("\r\n") {
51 return; 51 return;
52 } 52 }
53 if ptr.bump().is_none() { 53 if ptr.bump().is_none() {
diff --git a/crates/libsyntax2/src/lexer/mod.rs b/crates/libsyntax2/src/lexer/mod.rs
index f8fdc41ac..3e11db88b 100644
--- a/crates/libsyntax2/src/lexer/mod.rs
+++ b/crates/libsyntax2/src/lexer/mod.rs
@@ -67,7 +67,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
67 _ => (), 67 _ => (),
68 } 68 }
69 69
70 let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext()); 70 let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.current(), ptr.nth(1));
71 if ident_start { 71 if ident_start {
72 return scan_ident(c, ptr); 72 return scan_ident(c, ptr);
73 } 73 }
@@ -86,7 +86,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
86 match c { 86 match c {
87 // Multi-byte tokens. 87 // Multi-byte tokens.
88 '.' => { 88 '.' => {
89 return match (ptr.next(), ptr.nnext()) { 89 return match (ptr.current(), ptr.nth(1)) {
90 (Some('.'), Some('.')) => { 90 (Some('.'), Some('.')) => {
91 ptr.bump(); 91 ptr.bump();
92 ptr.bump(); 92 ptr.bump();
@@ -105,7 +105,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
105 }; 105 };
106 } 106 }
107 ':' => { 107 ':' => {
108 return match ptr.next() { 108 return match ptr.current() {
109 Some(':') => { 109 Some(':') => {
110 ptr.bump(); 110 ptr.bump();
111 COLONCOLON 111 COLONCOLON
@@ -114,7 +114,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
114 }; 114 };
115 } 115 }
116 '=' => { 116 '=' => {
117 return match ptr.next() { 117 return match ptr.current() {
118 Some('=') => { 118 Some('=') => {
119 ptr.bump(); 119 ptr.bump();
120 EQEQ 120 EQEQ
@@ -127,7 +127,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
127 }; 127 };
128 } 128 }
129 '!' => { 129 '!' => {
130 return match ptr.next() { 130 return match ptr.current() {
131 Some('=') => { 131 Some('=') => {
132 ptr.bump(); 132 ptr.bump();
133 NEQ 133 NEQ
@@ -136,7 +136,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
136 }; 136 };
137 } 137 }
138 '-' => { 138 '-' => {
139 return if ptr.next_is('>') { 139 return if ptr.at('>') {
140 ptr.bump(); 140 ptr.bump();
141 THIN_ARROW 141 THIN_ARROW
142 } else { 142 } else {
@@ -147,14 +147,14 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
147 // If the character is an ident start not followed by another single 147 // If the character is an ident start not followed by another single
148 // quote, then this is a lifetime name: 148 // quote, then this is a lifetime name:
149 '\'' => { 149 '\'' => {
150 return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') { 150 return if ptr.at_p(is_ident_start) && !ptr.at_str("''") {
151 ptr.bump(); 151 ptr.bump();
152 while ptr.next_is_p(is_ident_continue) { 152 while ptr.at_p(is_ident_continue) {
153 ptr.bump(); 153 ptr.bump();
154 } 154 }
155 // lifetimes shouldn't end with a single quote 155 // lifetimes shouldn't end with a single quote
156 // if we find one, then this is an invalid character literal 156 // if we find one, then this is an invalid character literal
157 if ptr.next_is('\'') { 157 if ptr.at('\'') {
158 ptr.bump(); 158 ptr.bump();
159 return CHAR; // TODO: error reporting 159 return CHAR; // TODO: error reporting
160 } 160 }
@@ -186,7 +186,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
186} 186}
187 187
188fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { 188fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
189 let is_single_letter = match ptr.next() { 189 let is_single_letter = match ptr.current() {
190 None => true, 190 None => true,
191 Some(c) if !is_ident_continue(c) => true, 191 Some(c) if !is_ident_continue(c) => true,
192 _ => false, 192 _ => false,
@@ -202,7 +202,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
202} 202}
203 203
204fn scan_literal_suffix(ptr: &mut Ptr) { 204fn scan_literal_suffix(ptr: &mut Ptr) {
205 if ptr.next_is_p(is_ident_start) { 205 if ptr.at_p(is_ident_start) {
206 ptr.bump(); 206 ptr.bump();
207 } 207 }
208 ptr.bump_while(is_ident_continue); 208 ptr.bump_while(is_ident_continue);
diff --git a/crates/libsyntax2/src/lexer/numbers.rs b/crates/libsyntax2/src/lexer/numbers.rs
index 5c4641a2d..22e7d4e99 100644
--- a/crates/libsyntax2/src/lexer/numbers.rs
+++ b/crates/libsyntax2/src/lexer/numbers.rs
@@ -5,7 +5,7 @@ use SyntaxKind::{self, *};
5 5
6pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { 6pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
7 if c == '0' { 7 if c == '0' {
8 match ptr.next().unwrap_or('\0') { 8 match ptr.current().unwrap_or('\0') {
9 'b' | 'o' => { 9 'b' | 'o' => {
10 ptr.bump(); 10 ptr.bump();
11 scan_digits(ptr, false); 11 scan_digits(ptr, false);
@@ -26,7 +26,7 @@ pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
26 // might be a float, but don't be greedy if this is actually an 26 // might be a float, but don't be greedy if this is actually an
27 // integer literal followed by field/method access or a range pattern 27 // integer literal followed by field/method access or a range pattern
28 // (`0..2` and `12.foo()`) 28 // (`0..2` and `12.foo()`)
29 if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) { 29 if ptr.at('.') && !(ptr.at_str("..") || ptr.nth_is_p(1, is_ident_start)) {
30 // might have stuff after the ., and if it does, it needs to start 30 // might have stuff after the ., and if it does, it needs to start
31 // with a number 31 // with a number
32 ptr.bump(); 32 ptr.bump();
@@ -35,7 +35,7 @@ pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
35 return FLOAT_NUMBER; 35 return FLOAT_NUMBER;
36 } 36 }
37 // it might be a float if it has an exponent 37 // it might be a float if it has an exponent
38 if ptr.next_is('e') || ptr.next_is('E') { 38 if ptr.at('e') || ptr.at('E') {
39 scan_float_exponent(ptr); 39 scan_float_exponent(ptr);
40 return FLOAT_NUMBER; 40 return FLOAT_NUMBER;
41 } 41 }
@@ -43,7 +43,7 @@ pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
43} 43}
44 44
45fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { 45fn scan_digits(ptr: &mut Ptr, allow_hex: bool) {
46 while let Some(c) = ptr.next() { 46 while let Some(c) = ptr.current() {
47 match c { 47 match c {
48 '_' | '0'...'9' => { 48 '_' | '0'...'9' => {
49 ptr.bump(); 49 ptr.bump();
@@ -57,9 +57,9 @@ fn scan_digits(ptr: &mut Ptr, allow_hex: bool) {
57} 57}
58 58
59fn scan_float_exponent(ptr: &mut Ptr) { 59fn scan_float_exponent(ptr: &mut Ptr) {
60 if ptr.next_is('e') || ptr.next_is('E') { 60 if ptr.at('e') || ptr.at('E') {
61 ptr.bump(); 61 ptr.bump();
62 if ptr.next_is('-') || ptr.next_is('+') { 62 if ptr.at('-') || ptr.at('+') {
63 ptr.bump(); 63 ptr.bump();
64 } 64 }
65 scan_digits(ptr, false); 65 scan_digits(ptr, false);
diff --git a/crates/libsyntax2/src/lexer/ptr.rs b/crates/libsyntax2/src/lexer/ptr.rs
index d1391fd5f..c9a5354ea 100644
--- a/crates/libsyntax2/src/lexer/ptr.rs
+++ b/crates/libsyntax2/src/lexer/ptr.rs
@@ -2,12 +2,14 @@ use TextUnit;
2 2
3use std::str::Chars; 3use std::str::Chars;
4 4
5/// A simple view into the characters of a string.
5pub(crate) struct Ptr<'s> { 6pub(crate) struct Ptr<'s> {
6 text: &'s str, 7 text: &'s str,
7 len: TextUnit, 8 len: TextUnit,
8} 9}
9 10
10impl<'s> Ptr<'s> { 11impl<'s> Ptr<'s> {
12 /// Creates a new `Ptr` from a string.
11 pub fn new(text: &'s str) -> Ptr<'s> { 13 pub fn new(text: &'s str) -> Ptr<'s> {
12 Ptr { 14 Ptr {
13 text, 15 text,
@@ -15,45 +17,55 @@ impl<'s> Ptr<'s> {
15 } 17 }
16 } 18 }
17 19
20 /// Gets the length of the remaining string.
18 pub fn into_len(self) -> TextUnit { 21 pub fn into_len(self) -> TextUnit {
19 self.len 22 self.len
20 } 23 }
21 24
22 pub fn next(&self) -> Option<char> { 25 /// Gets the current character, if one exists.
26 pub fn current(&self) -> Option<char> {
23 self.chars().next() 27 self.chars().next()
24 } 28 }
25 29
26 pub fn nnext(&self) -> Option<char> { 30 /// Gets the nth character from the current.
27 let mut chars = self.chars(); 31 /// For example, 0 will return the current token, 1 will return the next, etc.
28 chars.next()?; 32 pub fn nth(&self, n: u32) -> Option<char> {
29 chars.next() 33 let mut chars = self.chars().peekable();
34 chars.by_ref().skip(n as usize).next()
30 } 35 }
31 36
32 pub fn next_is(&self, c: char) -> bool { 37 /// Checks whether the current character is `c`.
33 self.next() == Some(c) 38 pub fn at(&self, c: char) -> bool {
39 self.current() == Some(c)
34 } 40 }
35 41
36 pub fn nnext_is(&self, c: char) -> bool { 42 /// Checks whether the next characters match `s`.
37 self.nnext() == Some(c) 43 pub fn at_str(&self, s: &str) -> bool {
44 let chars = self.chars();
45 chars.as_str().starts_with(s)
38 } 46 }
39 47
40 pub fn next_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { 48 /// Checks whether the current character satisfies the predicate `p`.
41 self.next().map(p) == Some(true) 49 pub fn at_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
50 self.current().map(p) == Some(true)
42 } 51 }
43 52
44 pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { 53 /// Checks whether the nth character satisfies the predicate `p`.
45 self.nnext().map(p) == Some(true) 54 pub fn nth_is_p<P: Fn(char) -> bool>(&self, n: u32, p: P) -> bool {
55 self.nth(n).map(p) == Some(true)
46 } 56 }
47 57
58 /// Moves to the next character.
48 pub fn bump(&mut self) -> Option<char> { 59 pub fn bump(&mut self) -> Option<char> {
49 let ch = self.chars().next()?; 60 let ch = self.chars().next()?;
50 self.len += TextUnit::of_char(ch); 61 self.len += TextUnit::of_char(ch);
51 Some(ch) 62 Some(ch)
52 } 63 }
53 64
65 /// Moves to the next character as long as `pred` is satisfied.
54 pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) { 66 pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) {
55 loop { 67 loop {
56 match self.next() { 68 match self.current() {
57 Some(c) if pred(c) => { 69 Some(c) if pred(c) => {
58 self.bump(); 70 self.bump();
59 } 71 }
@@ -62,13 +74,93 @@ impl<'s> Ptr<'s> {
62 } 74 }
63 } 75 }
64 76
77 /// Returns the text up to the current point.
65 pub fn current_token_text(&self) -> &str { 78 pub fn current_token_text(&self) -> &str {
66 let len: u32 = self.len.into(); 79 let len: u32 = self.len.into();
67 &self.text[..len as usize] 80 &self.text[..len as usize]
68 } 81 }
69 82
83 /// Returns an iterator over the remaining characters.
70 fn chars(&self) -> Chars { 84 fn chars(&self) -> Chars {
71 let len: u32 = self.len.into(); 85 let len: u32 = self.len.into();
72 self.text[len as usize..].chars() 86 self.text[len as usize..].chars()
73 } 87 }
74} 88}
89
90#[cfg(test)]
91mod tests {
92 use super::*;
93
94 #[test]
95 fn test_current() {
96 let ptr = Ptr::new("test");
97 assert_eq!(ptr.current(), Some('t'));
98 }
99
100 #[test]
101 fn test_nth() {
102 let ptr = Ptr::new("test");
103 assert_eq!(ptr.nth(0), Some('t'));
104 assert_eq!(ptr.nth(1), Some('e'));
105 assert_eq!(ptr.nth(2), Some('s'));
106 assert_eq!(ptr.nth(3), Some('t'));
107 assert_eq!(ptr.nth(4), None);
108 }
109
110 #[test]
111 fn test_at() {
112 let ptr = Ptr::new("test");
113 assert!(ptr.at('t'));
114 assert!(!ptr.at('a'));
115 }
116
117 #[test]
118 fn test_at_str() {
119 let ptr = Ptr::new("test");
120 assert!(ptr.at_str("t"));
121 assert!(ptr.at_str("te"));
122 assert!(ptr.at_str("test"));
123 assert!(!ptr.at_str("tests"));
124 assert!(!ptr.at_str("rust"));
125 }
126
127 #[test]
128 fn test_at_p() {
129 let ptr = Ptr::new("test");
130 assert!(ptr.at_p(|c| c == 't'));
131 assert!(!ptr.at_p(|c| c == 'e'));
132 }
133
134 #[test]
135 fn test_nth_is_p() {
136 let ptr = Ptr::new("test");
137 assert!(ptr.nth_is_p(0,|c| c == 't'));
138 assert!(!ptr.nth_is_p(1,|c| c == 't'));
139 assert!(ptr.nth_is_p(3,|c| c == 't'));
140 assert!(!ptr.nth_is_p(150,|c| c == 't'));
141 }
142
143 #[test]
144 fn test_bump() {
145 let mut ptr = Ptr::new("test");
146 assert_eq!(ptr.current(), Some('t'));
147 ptr.bump();
148 assert_eq!(ptr.current(), Some('e'));
149 ptr.bump();
150 assert_eq!(ptr.current(), Some('s'));
151 ptr.bump();
152 assert_eq!(ptr.current(), Some('t'));
153 ptr.bump();
154 assert_eq!(ptr.current(), None);
155 ptr.bump();
156 assert_eq!(ptr.current(), None);
157 }
158
159 #[test]
160 fn test_bump_while() {
161 let mut ptr = Ptr::new("test");
162 assert_eq!(ptr.current(), Some('t'));
163 ptr.bump_while(|c| c != 's');
164 assert_eq!(ptr.current(), Some('s'));
165 }
166}
diff --git a/crates/libsyntax2/src/lexer/strings.rs b/crates/libsyntax2/src/lexer/strings.rs
index e6ade54a4..5ff483d14 100644
--- a/crates/libsyntax2/src/lexer/strings.rs
+++ b/crates/libsyntax2/src/lexer/strings.rs
@@ -15,11 +15,11 @@ pub(crate) fn is_string_literal_start(c: char, c1: Option<char>, c2: Option<char
15} 15}
16 16
17pub(crate) fn scan_char(ptr: &mut Ptr) { 17pub(crate) fn scan_char(ptr: &mut Ptr) {
18 while let Some(c) = ptr.next() { 18 while let Some(c) = ptr.current() {
19 match c { 19 match c {
20 '\\' => { 20 '\\' => {
21 ptr.bump(); 21 ptr.bump();
22 if ptr.next_is('\\') || ptr.next_is('\'') { 22 if ptr.at('\\') || ptr.at('\'') {
23 ptr.bump(); 23 ptr.bump();
24 } 24 }
25 } 25 }
@@ -57,11 +57,11 @@ pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
57} 57}
58 58
59pub(crate) fn scan_string(ptr: &mut Ptr) { 59pub(crate) fn scan_string(ptr: &mut Ptr) {
60 while let Some(c) = ptr.next() { 60 while let Some(c) = ptr.current() {
61 match c { 61 match c {
62 '\\' => { 62 '\\' => {
63 ptr.bump(); 63 ptr.bump();
64 if ptr.next_is('\\') || ptr.next_is('"') { 64 if ptr.at('\\') || ptr.at('"') {
65 ptr.bump(); 65 ptr.bump();
66 } 66 }
67 } 67 }
@@ -78,11 +78,11 @@ pub(crate) fn scan_string(ptr: &mut Ptr) {
78 78
79pub(crate) fn scan_raw_string(ptr: &mut Ptr) { 79pub(crate) fn scan_raw_string(ptr: &mut Ptr) {
80 let mut hashes = 0; 80 let mut hashes = 0;
81 while ptr.next_is('#') { 81 while ptr.at('#') {
82 hashes += 1; 82 hashes += 1;
83 ptr.bump(); 83 ptr.bump();
84 } 84 }
85 if !ptr.next_is('"') { 85 if !ptr.at('"') {
86 return; 86 return;
87 } 87 }
88 ptr.bump(); 88 ptr.bump();
@@ -90,7 +90,7 @@ pub(crate) fn scan_raw_string(ptr: &mut Ptr) {
90 while let Some(c) = ptr.bump() { 90 while let Some(c) = ptr.bump() {
91 if c == '"' { 91 if c == '"' {
92 let mut hashes_left = hashes; 92 let mut hashes_left = hashes;
93 while ptr.next_is('#') && hashes_left > 0{ 93 while ptr.at('#') && hashes_left > 0{
94 hashes_left -= 1; 94 hashes_left -= 1;
95 ptr.bump(); 95 ptr.bump();
96 } 96 }
@@ -110,7 +110,7 @@ fn scan_byte_string(ptr: &mut Ptr) {
110} 110}
111 111
112fn scan_raw_byte_string(ptr: &mut Ptr) { 112fn scan_raw_byte_string(ptr: &mut Ptr) {
113 if !ptr.next_is('"') { 113 if !ptr.at('"') {
114 return; 114 return;
115 } 115 }
116 ptr.bump(); 116 ptr.bump();