diff options
Diffstat (limited to 'crates/libsyntax2')
-rw-r--r-- | crates/libsyntax2/src/lexer/comments.rs | 12 | ||||
-rw-r--r-- | crates/libsyntax2/src/lexer/mod.rs | 22 | ||||
-rw-r--r-- | crates/libsyntax2/src/lexer/numbers.rs | 12 | ||||
-rw-r--r-- | crates/libsyntax2/src/lexer/ptr.rs | 42 | ||||
-rw-r--r-- | crates/libsyntax2/src/lexer/strings.rs | 16 |
5 files changed, 59 insertions, 45 deletions
diff --git a/crates/libsyntax2/src/lexer/comments.rs b/crates/libsyntax2/src/lexer/comments.rs index 01acb6515..eb417c2dc 100644 --- a/crates/libsyntax2/src/lexer/comments.rs +++ b/crates/libsyntax2/src/lexer/comments.rs | |||
@@ -3,7 +3,7 @@ use lexer::ptr::Ptr; | |||
3 | use SyntaxKind::{self, *}; | 3 | use SyntaxKind::{self, *}; |
4 | 4 | ||
5 | pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { | 5 | pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { |
6 | if ptr.next_is('!') && ptr.nnext_is('/') { | 6 | if ptr.at_str("!/") { |
7 | ptr.bump(); | 7 | ptr.bump(); |
8 | ptr.bump(); | 8 | ptr.bump(); |
9 | bump_until_eol(ptr); | 9 | bump_until_eol(ptr); |
@@ -14,15 +14,15 @@ pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { | |||
14 | } | 14 | } |
15 | 15 | ||
16 | fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { | 16 | fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { |
17 | if ptr.next_is('*') { | 17 | if ptr.at('*') { |
18 | ptr.bump(); | 18 | ptr.bump(); |
19 | let mut depth: u32 = 1; | 19 | let mut depth: u32 = 1; |
20 | while depth > 0 { | 20 | while depth > 0 { |
21 | if ptr.next_is('*') && ptr.nnext_is('/') { | 21 | if ptr.at_str("*/") { |
22 | depth -= 1; | 22 | depth -= 1; |
23 | ptr.bump(); | 23 | ptr.bump(); |
24 | ptr.bump(); | 24 | ptr.bump(); |
25 | } else if ptr.next_is('/') && ptr.nnext_is('*') { | 25 | } else if ptr.at_str("/*") { |
26 | depth += 1; | 26 | depth += 1; |
27 | ptr.bump(); | 27 | ptr.bump(); |
28 | ptr.bump(); | 28 | ptr.bump(); |
@@ -37,7 +37,7 @@ fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { | |||
37 | } | 37 | } |
38 | 38 | ||
39 | pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { | 39 | pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { |
40 | if ptr.next_is('/') { | 40 | if ptr.at('/') { |
41 | bump_until_eol(ptr); | 41 | bump_until_eol(ptr); |
42 | Some(COMMENT) | 42 | Some(COMMENT) |
43 | } else { | 43 | } else { |
@@ -47,7 +47,7 @@ pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> { | |||
47 | 47 | ||
48 | fn bump_until_eol(ptr: &mut Ptr) { | 48 | fn bump_until_eol(ptr: &mut Ptr) { |
49 | loop { | 49 | loop { |
50 | if ptr.next_is('\n') || ptr.next_is('\r') && ptr.nnext_is('\n') { | 50 | if ptr.at('\n') || ptr.at_str("\r\n") { |
51 | return; | 51 | return; |
52 | } | 52 | } |
53 | if ptr.bump().is_none() { | 53 | if ptr.bump().is_none() { |
diff --git a/crates/libsyntax2/src/lexer/mod.rs b/crates/libsyntax2/src/lexer/mod.rs index f8fdc41ac..3e11db88b 100644 --- a/crates/libsyntax2/src/lexer/mod.rs +++ b/crates/libsyntax2/src/lexer/mod.rs | |||
@@ -67,7 +67,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
67 | _ => (), | 67 | _ => (), |
68 | } | 68 | } |
69 | 69 | ||
70 | let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext()); | 70 | let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.current(), ptr.nth(1)); |
71 | if ident_start { | 71 | if ident_start { |
72 | return scan_ident(c, ptr); | 72 | return scan_ident(c, ptr); |
73 | } | 73 | } |
@@ -86,7 +86,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
86 | match c { | 86 | match c { |
87 | // Multi-byte tokens. | 87 | // Multi-byte tokens. |
88 | '.' => { | 88 | '.' => { |
89 | return match (ptr.next(), ptr.nnext()) { | 89 | return match (ptr.current(), ptr.nth(1)) { |
90 | (Some('.'), Some('.')) => { | 90 | (Some('.'), Some('.')) => { |
91 | ptr.bump(); | 91 | ptr.bump(); |
92 | ptr.bump(); | 92 | ptr.bump(); |
@@ -105,7 +105,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
105 | }; | 105 | }; |
106 | } | 106 | } |
107 | ':' => { | 107 | ':' => { |
108 | return match ptr.next() { | 108 | return match ptr.current() { |
109 | Some(':') => { | 109 | Some(':') => { |
110 | ptr.bump(); | 110 | ptr.bump(); |
111 | COLONCOLON | 111 | COLONCOLON |
@@ -114,7 +114,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
114 | }; | 114 | }; |
115 | } | 115 | } |
116 | '=' => { | 116 | '=' => { |
117 | return match ptr.next() { | 117 | return match ptr.current() { |
118 | Some('=') => { | 118 | Some('=') => { |
119 | ptr.bump(); | 119 | ptr.bump(); |
120 | EQEQ | 120 | EQEQ |
@@ -127,7 +127,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
127 | }; | 127 | }; |
128 | } | 128 | } |
129 | '!' => { | 129 | '!' => { |
130 | return match ptr.next() { | 130 | return match ptr.current() { |
131 | Some('=') => { | 131 | Some('=') => { |
132 | ptr.bump(); | 132 | ptr.bump(); |
133 | NEQ | 133 | NEQ |
@@ -136,7 +136,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
136 | }; | 136 | }; |
137 | } | 137 | } |
138 | '-' => { | 138 | '-' => { |
139 | return if ptr.next_is('>') { | 139 | return if ptr.at('>') { |
140 | ptr.bump(); | 140 | ptr.bump(); |
141 | THIN_ARROW | 141 | THIN_ARROW |
142 | } else { | 142 | } else { |
@@ -147,14 +147,14 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
147 | // If the character is an ident start not followed by another single | 147 | // If the character is an ident start not followed by another single |
148 | // quote, then this is a lifetime name: | 148 | // quote, then this is a lifetime name: |
149 | '\'' => { | 149 | '\'' => { |
150 | return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') { | 150 | return if ptr.at_p(is_ident_start) && !ptr.at_str("''") { |
151 | ptr.bump(); | 151 | ptr.bump(); |
152 | while ptr.next_is_p(is_ident_continue) { | 152 | while ptr.at_p(is_ident_continue) { |
153 | ptr.bump(); | 153 | ptr.bump(); |
154 | } | 154 | } |
155 | // lifetimes shouldn't end with a single quote | 155 | // lifetimes shouldn't end with a single quote |
156 | // if we find one, then this is an invalid character literal | 156 | // if we find one, then this is an invalid character literal |
157 | if ptr.next_is('\'') { | 157 | if ptr.at('\'') { |
158 | ptr.bump(); | 158 | ptr.bump(); |
159 | return CHAR; // TODO: error reporting | 159 | return CHAR; // TODO: error reporting |
160 | } | 160 | } |
@@ -186,7 +186,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
186 | } | 186 | } |
187 | 187 | ||
188 | fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | 188 | fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { |
189 | let is_single_letter = match ptr.next() { | 189 | let is_single_letter = match ptr.current() { |
190 | None => true, | 190 | None => true, |
191 | Some(c) if !is_ident_continue(c) => true, | 191 | Some(c) if !is_ident_continue(c) => true, |
192 | _ => false, | 192 | _ => false, |
@@ -202,7 +202,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
202 | } | 202 | } |
203 | 203 | ||
204 | fn scan_literal_suffix(ptr: &mut Ptr) { | 204 | fn scan_literal_suffix(ptr: &mut Ptr) { |
205 | if ptr.next_is_p(is_ident_start) { | 205 | if ptr.at_p(is_ident_start) { |
206 | ptr.bump(); | 206 | ptr.bump(); |
207 | } | 207 | } |
208 | ptr.bump_while(is_ident_continue); | 208 | ptr.bump_while(is_ident_continue); |
diff --git a/crates/libsyntax2/src/lexer/numbers.rs b/crates/libsyntax2/src/lexer/numbers.rs index 5c4641a2d..22e7d4e99 100644 --- a/crates/libsyntax2/src/lexer/numbers.rs +++ b/crates/libsyntax2/src/lexer/numbers.rs | |||
@@ -5,7 +5,7 @@ use SyntaxKind::{self, *}; | |||
5 | 5 | ||
6 | pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { | 6 | pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { |
7 | if c == '0' { | 7 | if c == '0' { |
8 | match ptr.next().unwrap_or('\0') { | 8 | match ptr.current().unwrap_or('\0') { |
9 | 'b' | 'o' => { | 9 | 'b' | 'o' => { |
10 | ptr.bump(); | 10 | ptr.bump(); |
11 | scan_digits(ptr, false); | 11 | scan_digits(ptr, false); |
@@ -26,7 +26,7 @@ pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
26 | // might be a float, but don't be greedy if this is actually an | 26 | // might be a float, but don't be greedy if this is actually an |
27 | // integer literal followed by field/method access or a range pattern | 27 | // integer literal followed by field/method access or a range pattern |
28 | // (`0..2` and `12.foo()`) | 28 | // (`0..2` and `12.foo()`) |
29 | if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) { | 29 | if ptr.at('.') && !(ptr.at_str("..") || ptr.nth_is_p(1, is_ident_start)) { |
30 | // might have stuff after the ., and if it does, it needs to start | 30 | // might have stuff after the ., and if it does, it needs to start |
31 | // with a number | 31 | // with a number |
32 | ptr.bump(); | 32 | ptr.bump(); |
@@ -35,7 +35,7 @@ pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
35 | return FLOAT_NUMBER; | 35 | return FLOAT_NUMBER; |
36 | } | 36 | } |
37 | // it might be a float if it has an exponent | 37 | // it might be a float if it has an exponent |
38 | if ptr.next_is('e') || ptr.next_is('E') { | 38 | if ptr.at('e') || ptr.at('E') { |
39 | scan_float_exponent(ptr); | 39 | scan_float_exponent(ptr); |
40 | return FLOAT_NUMBER; | 40 | return FLOAT_NUMBER; |
41 | } | 41 | } |
@@ -43,7 +43,7 @@ pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
43 | } | 43 | } |
44 | 44 | ||
45 | fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { | 45 | fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { |
46 | while let Some(c) = ptr.next() { | 46 | while let Some(c) = ptr.current() { |
47 | match c { | 47 | match c { |
48 | '_' | '0'...'9' => { | 48 | '_' | '0'...'9' => { |
49 | ptr.bump(); | 49 | ptr.bump(); |
@@ -57,9 +57,9 @@ fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { | |||
57 | } | 57 | } |
58 | 58 | ||
59 | fn scan_float_exponent(ptr: &mut Ptr) { | 59 | fn scan_float_exponent(ptr: &mut Ptr) { |
60 | if ptr.next_is('e') || ptr.next_is('E') { | 60 | if ptr.at('e') || ptr.at('E') { |
61 | ptr.bump(); | 61 | ptr.bump(); |
62 | if ptr.next_is('-') || ptr.next_is('+') { | 62 | if ptr.at('-') || ptr.at('+') { |
63 | ptr.bump(); | 63 | ptr.bump(); |
64 | } | 64 | } |
65 | scan_digits(ptr, false); | 65 | scan_digits(ptr, false); |
diff --git a/crates/libsyntax2/src/lexer/ptr.rs b/crates/libsyntax2/src/lexer/ptr.rs index d1391fd5f..aa59e33cc 100644 --- a/crates/libsyntax2/src/lexer/ptr.rs +++ b/crates/libsyntax2/src/lexer/ptr.rs | |||
@@ -2,12 +2,14 @@ use TextUnit; | |||
2 | 2 | ||
3 | use std::str::Chars; | 3 | use std::str::Chars; |
4 | 4 | ||
5 | /// A simple view into the characters of a string. | ||
5 | pub(crate) struct Ptr<'s> { | 6 | pub(crate) struct Ptr<'s> { |
6 | text: &'s str, | 7 | text: &'s str, |
7 | len: TextUnit, | 8 | len: TextUnit, |
8 | } | 9 | } |
9 | 10 | ||
10 | impl<'s> Ptr<'s> { | 11 | impl<'s> Ptr<'s> { |
12 | /// Creates a new `Ptr` from a string. | ||
11 | pub fn new(text: &'s str) -> Ptr<'s> { | 13 | pub fn new(text: &'s str) -> Ptr<'s> { |
12 | Ptr { | 14 | Ptr { |
13 | text, | 15 | text, |
@@ -15,45 +17,55 @@ impl<'s> Ptr<'s> { | |||
15 | } | 17 | } |
16 | } | 18 | } |
17 | 19 | ||
20 | /// Gets the length of the remaining string. | ||
18 | pub fn into_len(self) -> TextUnit { | 21 | pub fn into_len(self) -> TextUnit { |
19 | self.len | 22 | self.len |
20 | } | 23 | } |
21 | 24 | ||
22 | pub fn next(&self) -> Option<char> { | 25 | /// Gets the current character, if one exists. |
26 | pub fn current(&self) -> Option<char> { | ||
23 | self.chars().next() | 27 | self.chars().next() |
24 | } | 28 | } |
25 | 29 | ||
26 | pub fn nnext(&self) -> Option<char> { | 30 | /// Gets the nth character from the current. |
27 | let mut chars = self.chars(); | 31 | /// For example, 0 will return the current token, 1 will return the next, etc. |
28 | chars.next()?; | 32 | pub fn nth(&self, n: u32) -> Option<char> { |
29 | chars.next() | 33 | let mut chars = self.chars().peekable(); |
34 | chars.by_ref().skip(n as usize).next() | ||
30 | } | 35 | } |
31 | 36 | ||
32 | pub fn next_is(&self, c: char) -> bool { | 37 | /// Checks whether the current character is `c`. |
33 | self.next() == Some(c) | 38 | pub fn at(&self, c: char) -> bool { |
39 | self.current() == Some(c) | ||
34 | } | 40 | } |
35 | 41 | ||
36 | pub fn nnext_is(&self, c: char) -> bool { | 42 | /// Checks whether the next characters match `s`. |
37 | self.nnext() == Some(c) | 43 | pub fn at_str(&self, s: &str) -> bool { |
44 | let chars = self.chars(); | ||
45 | chars.as_str().starts_with(s) | ||
38 | } | 46 | } |
39 | 47 | ||
40 | pub fn next_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { | 48 | /// Checks whether the current character satisfies the predicate `p`. |
41 | self.next().map(p) == Some(true) | 49 | pub fn at_p<P: Fn(char) -> bool>(&self, p: P) -> bool { |
50 | self.current().map(p) == Some(true) | ||
42 | } | 51 | } |
43 | 52 | ||
44 | pub fn nnext_is_p<P: Fn(char) -> bool>(&self, p: P) -> bool { | 53 | /// Checks whether the nth character satisfies the predicate `p`. |
45 | self.nnext().map(p) == Some(true) | 54 | pub fn nth_is_p<P: Fn(char) -> bool>(&self, n: u32, p: P) -> bool { |
55 | self.nth(n).map(p) == Some(true) | ||
46 | } | 56 | } |
47 | 57 | ||
58 | /// Moves to the next character. | ||
48 | pub fn bump(&mut self) -> Option<char> { | 59 | pub fn bump(&mut self) -> Option<char> { |
49 | let ch = self.chars().next()?; | 60 | let ch = self.chars().next()?; |
50 | self.len += TextUnit::of_char(ch); | 61 | self.len += TextUnit::of_char(ch); |
51 | Some(ch) | 62 | Some(ch) |
52 | } | 63 | } |
53 | 64 | ||
65 | /// Moves to the next character as long as `pred` is satisfied. | ||
54 | pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) { | 66 | pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) { |
55 | loop { | 67 | loop { |
56 | match self.next() { | 68 | match self.current() { |
57 | Some(c) if pred(c) => { | 69 | Some(c) if pred(c) => { |
58 | self.bump(); | 70 | self.bump(); |
59 | } | 71 | } |
@@ -62,11 +74,13 @@ impl<'s> Ptr<'s> { | |||
62 | } | 74 | } |
63 | } | 75 | } |
64 | 76 | ||
77 | /// Returns the text up to the current point. | ||
65 | pub fn current_token_text(&self) -> &str { | 78 | pub fn current_token_text(&self) -> &str { |
66 | let len: u32 = self.len.into(); | 79 | let len: u32 = self.len.into(); |
67 | &self.text[..len as usize] | 80 | &self.text[..len as usize] |
68 | } | 81 | } |
69 | 82 | ||
83 | /// Returns an iterator over the remaining characters. | ||
70 | fn chars(&self) -> Chars { | 84 | fn chars(&self) -> Chars { |
71 | let len: u32 = self.len.into(); | 85 | let len: u32 = self.len.into(); |
72 | self.text[len as usize..].chars() | 86 | self.text[len as usize..].chars() |
diff --git a/crates/libsyntax2/src/lexer/strings.rs b/crates/libsyntax2/src/lexer/strings.rs index e6ade54a4..5ff483d14 100644 --- a/crates/libsyntax2/src/lexer/strings.rs +++ b/crates/libsyntax2/src/lexer/strings.rs | |||
@@ -15,11 +15,11 @@ pub(crate) fn is_string_literal_start(c: char, c1: Option<char>, c2: Option<char | |||
15 | } | 15 | } |
16 | 16 | ||
17 | pub(crate) fn scan_char(ptr: &mut Ptr) { | 17 | pub(crate) fn scan_char(ptr: &mut Ptr) { |
18 | while let Some(c) = ptr.next() { | 18 | while let Some(c) = ptr.current() { |
19 | match c { | 19 | match c { |
20 | '\\' => { | 20 | '\\' => { |
21 | ptr.bump(); | 21 | ptr.bump(); |
22 | if ptr.next_is('\\') || ptr.next_is('\'') { | 22 | if ptr.at('\\') || ptr.at('\'') { |
23 | ptr.bump(); | 23 | ptr.bump(); |
24 | } | 24 | } |
25 | } | 25 | } |
@@ -57,11 +57,11 @@ pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind { | |||
57 | } | 57 | } |
58 | 58 | ||
59 | pub(crate) fn scan_string(ptr: &mut Ptr) { | 59 | pub(crate) fn scan_string(ptr: &mut Ptr) { |
60 | while let Some(c) = ptr.next() { | 60 | while let Some(c) = ptr.current() { |
61 | match c { | 61 | match c { |
62 | '\\' => { | 62 | '\\' => { |
63 | ptr.bump(); | 63 | ptr.bump(); |
64 | if ptr.next_is('\\') || ptr.next_is('"') { | 64 | if ptr.at('\\') || ptr.at('"') { |
65 | ptr.bump(); | 65 | ptr.bump(); |
66 | } | 66 | } |
67 | } | 67 | } |
@@ -78,11 +78,11 @@ pub(crate) fn scan_string(ptr: &mut Ptr) { | |||
78 | 78 | ||
79 | pub(crate) fn scan_raw_string(ptr: &mut Ptr) { | 79 | pub(crate) fn scan_raw_string(ptr: &mut Ptr) { |
80 | let mut hashes = 0; | 80 | let mut hashes = 0; |
81 | while ptr.next_is('#') { | 81 | while ptr.at('#') { |
82 | hashes += 1; | 82 | hashes += 1; |
83 | ptr.bump(); | 83 | ptr.bump(); |
84 | } | 84 | } |
85 | if !ptr.next_is('"') { | 85 | if !ptr.at('"') { |
86 | return; | 86 | return; |
87 | } | 87 | } |
88 | ptr.bump(); | 88 | ptr.bump(); |
@@ -90,7 +90,7 @@ pub(crate) fn scan_raw_string(ptr: &mut Ptr) { | |||
90 | while let Some(c) = ptr.bump() { | 90 | while let Some(c) = ptr.bump() { |
91 | if c == '"' { | 91 | if c == '"' { |
92 | let mut hashes_left = hashes; | 92 | let mut hashes_left = hashes; |
93 | while ptr.next_is('#') && hashes_left > 0{ | 93 | while ptr.at('#') && hashes_left > 0{ |
94 | hashes_left -= 1; | 94 | hashes_left -= 1; |
95 | ptr.bump(); | 95 | ptr.bump(); |
96 | } | 96 | } |
@@ -110,7 +110,7 @@ fn scan_byte_string(ptr: &mut Ptr) { | |||
110 | } | 110 | } |
111 | 111 | ||
112 | fn scan_raw_byte_string(ptr: &mut Ptr) { | 112 | fn scan_raw_byte_string(ptr: &mut Ptr) { |
113 | if !ptr.next_is('"') { | 113 | if !ptr.at('"') { |
114 | return; | 114 | return; |
115 | } | 115 | } |
116 | ptr.bump(); | 116 | ptr.bump(); |