diff options
author | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-04-02 17:15:46 +0100 |
---|---|---|
committer | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-04-02 17:15:46 +0100 |
commit | 5cdf525caa51311515854c4dd398d3fb907d1368 (patch) | |
tree | 86f744285152fd74db56fae579efd9ef589b6a3a /crates/ra_syntax | |
parent | 9e46400e0ca72099fb5f464fdf6eefbace10fbd4 (diff) | |
parent | cf1caf518122b84b9516e1b9f65ba778f1900bf3 (diff) |
Merge #1093
1093: simplify r=matklad a=matklad
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax')
-rw-r--r-- | crates/ra_syntax/src/string_lexing.rs | 338 | ||||
-rw-r--r-- | crates/ra_syntax/src/string_lexing/parser.rs | 168 | ||||
-rw-r--r-- | crates/ra_syntax/src/string_lexing/string.rs | 222 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation/byte.rs | 2 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation/byte_string.rs | 2 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation/char.rs | 2 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation/string.rs | 2 |
7 files changed, 336 insertions, 400 deletions
diff --git a/crates/ra_syntax/src/string_lexing.rs b/crates/ra_syntax/src/string_lexing.rs index 349733f3f..4c3eea3d2 100644 --- a/crates/ra_syntax/src/string_lexing.rs +++ b/crates/ra_syntax/src/string_lexing.rs | |||
@@ -1,7 +1,333 @@ | |||
1 | mod parser; | 1 | use crate::{TextRange, TextUnit}; |
2 | mod string; | 2 | use self::StringComponentKind::*; |
3 | 3 | ||
4 | pub use self::{ | 4 | #[derive(Debug, Eq, PartialEq, Clone)] |
5 | parser::{StringComponent, StringComponentKind}, | 5 | pub(crate) struct StringComponent { |
6 | string::{parse_string_literal, parse_char_literal, parse_byte_literal, parse_byte_string_literal}, | 6 | pub(crate) range: TextRange, |
7 | }; | 7 | pub(crate) kind: StringComponentKind, |
8 | } | ||
9 | |||
10 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
11 | pub(crate) enum StringComponentKind { | ||
12 | IgnoreNewline, | ||
13 | CodePoint, | ||
14 | AsciiEscape, | ||
15 | AsciiCodeEscape, | ||
16 | UnicodeEscape, | ||
17 | } | ||
18 | |||
19 | pub(crate) fn parse_quoted_literal( | ||
20 | prefix: Option<char>, | ||
21 | quote: char, | ||
22 | src: &str, | ||
23 | ) -> StringComponentIter { | ||
24 | let prefix = prefix.map(|p| match p { | ||
25 | 'b' => b'b', | ||
26 | _ => panic!("invalid prefix"), | ||
27 | }); | ||
28 | let quote = match quote { | ||
29 | '\'' => b'\'', | ||
30 | '"' => b'"', | ||
31 | _ => panic!("invalid quote"), | ||
32 | }; | ||
33 | StringComponentIter { src, prefix, quote, pos: 0, has_closing_quote: false, suffix: None } | ||
34 | } | ||
35 | |||
36 | pub(crate) struct StringComponentIter<'a> { | ||
37 | src: &'a str, | ||
38 | prefix: Option<u8>, | ||
39 | quote: u8, | ||
40 | pos: usize, | ||
41 | pub(crate) has_closing_quote: bool, | ||
42 | pub(crate) suffix: Option<TextRange>, | ||
43 | } | ||
44 | |||
45 | impl<'a> Iterator for StringComponentIter<'a> { | ||
46 | type Item = StringComponent; | ||
47 | fn next(&mut self) -> Option<StringComponent> { | ||
48 | if self.pos == 0 { | ||
49 | if let Some(prefix) = self.prefix { | ||
50 | assert!( | ||
51 | self.advance() == prefix as char, | ||
52 | "literal should start with a {:?}", | ||
53 | prefix as char, | ||
54 | ); | ||
55 | } | ||
56 | assert!( | ||
57 | self.advance() == self.quote as char, | ||
58 | "literal should start with a {:?}", | ||
59 | self.quote as char, | ||
60 | ); | ||
61 | } | ||
62 | |||
63 | if let Some(component) = self.parse_component() { | ||
64 | return Some(component); | ||
65 | } | ||
66 | |||
67 | // We get here when there are no char components left to parse | ||
68 | if self.peek() == Some(self.quote as char) { | ||
69 | self.advance(); | ||
70 | self.has_closing_quote = true; | ||
71 | if let Some(range) = self.parse_suffix() { | ||
72 | self.suffix = Some(range); | ||
73 | } | ||
74 | } | ||
75 | |||
76 | assert!( | ||
77 | self.peek() == None, | ||
78 | "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", | ||
79 | self.src, | ||
80 | self.pos, | ||
81 | self.src.len() | ||
82 | ); | ||
83 | |||
84 | None | ||
85 | } | ||
86 | } | ||
87 | |||
88 | impl<'a> StringComponentIter<'a> { | ||
89 | fn peek(&self) -> Option<char> { | ||
90 | if self.pos == self.src.len() { | ||
91 | return None; | ||
92 | } | ||
93 | |||
94 | self.src[self.pos..].chars().next() | ||
95 | } | ||
96 | |||
97 | fn advance(&mut self) -> char { | ||
98 | let next = self.peek().expect("cannot advance if end of input is reached"); | ||
99 | self.pos += next.len_utf8(); | ||
100 | next | ||
101 | } | ||
102 | |||
103 | fn parse_component(&mut self) -> Option<StringComponent> { | ||
104 | let next = self.peek()?; | ||
105 | |||
106 | // Ignore string close | ||
107 | if next == self.quote as char { | ||
108 | return None; | ||
109 | } | ||
110 | |||
111 | let start = self.start_range(); | ||
112 | self.advance(); | ||
113 | |||
114 | if next == '\\' { | ||
115 | // Strings can use `\` to ignore newlines, so we first try to parse one of those | ||
116 | // before falling back to parsing char escapes | ||
117 | if self.quote == b'"' { | ||
118 | if let Some(component) = self.parse_ignore_newline(start) { | ||
119 | return Some(component); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | Some(self.parse_escape(start)) | ||
124 | } else { | ||
125 | Some(self.finish_component(start, CodePoint)) | ||
126 | } | ||
127 | } | ||
128 | |||
129 | fn parse_ignore_newline(&mut self, start: TextUnit) -> Option<StringComponent> { | ||
130 | // In string literals, when a `\` occurs immediately before the newline, the `\`, | ||
131 | // the newline, and all whitespace at the beginning of the next line are ignored | ||
132 | match self.peek() { | ||
133 | Some('\n') | Some('\r') => { | ||
134 | self.skip_whitespace(); | ||
135 | Some(self.finish_component(start, IgnoreNewline)) | ||
136 | } | ||
137 | _ => None, | ||
138 | } | ||
139 | } | ||
140 | |||
141 | fn skip_whitespace(&mut self) { | ||
142 | while self.peek().map(|c| c.is_whitespace()) == Some(true) { | ||
143 | self.advance(); | ||
144 | } | ||
145 | } | ||
146 | |||
147 | fn parse_escape(&mut self, start: TextUnit) -> StringComponent { | ||
148 | if self.peek().is_none() { | ||
149 | return self.finish_component(start, AsciiEscape); | ||
150 | } | ||
151 | |||
152 | let next = self.advance(); | ||
153 | match next { | ||
154 | 'x' => self.parse_ascii_code_escape(start), | ||
155 | 'u' => self.parse_unicode_escape(start), | ||
156 | _ => self.finish_component(start, AsciiEscape), | ||
157 | } | ||
158 | } | ||
159 | |||
160 | fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent { | ||
161 | match self.peek() { | ||
162 | Some('{') => { | ||
163 | self.advance(); | ||
164 | |||
165 | // Parse anything until we reach `}` | ||
166 | while let Some(next) = self.peek() { | ||
167 | self.advance(); | ||
168 | if next == '}' { | ||
169 | break; | ||
170 | } | ||
171 | } | ||
172 | |||
173 | self.finish_component(start, UnicodeEscape) | ||
174 | } | ||
175 | Some(_) | None => self.finish_component(start, UnicodeEscape), | ||
176 | } | ||
177 | } | ||
178 | |||
179 | fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent { | ||
180 | let code_start = self.pos; | ||
181 | while let Some(next) = self.peek() { | ||
182 | if next == '\'' || (self.pos - code_start == 2) { | ||
183 | break; | ||
184 | } | ||
185 | |||
186 | self.advance(); | ||
187 | } | ||
188 | self.finish_component(start, AsciiCodeEscape) | ||
189 | } | ||
190 | |||
191 | fn parse_suffix(&mut self) -> Option<TextRange> { | ||
192 | let start = self.start_range(); | ||
193 | let _ = self.peek()?; | ||
194 | while let Some(_) = self.peek() { | ||
195 | self.advance(); | ||
196 | } | ||
197 | Some(self.finish_range(start)) | ||
198 | } | ||
199 | |||
200 | fn start_range(&self) -> TextUnit { | ||
201 | TextUnit::from_usize(self.pos) | ||
202 | } | ||
203 | |||
204 | fn finish_range(&self, start: TextUnit) -> TextRange { | ||
205 | TextRange::from_to(start, TextUnit::from_usize(self.pos)) | ||
206 | } | ||
207 | |||
208 | fn finish_component(&self, start: TextUnit, kind: StringComponentKind) -> StringComponent { | ||
209 | let range = self.finish_range(start); | ||
210 | StringComponent { range, kind } | ||
211 | } | ||
212 | } | ||
213 | |||
214 | #[cfg(test)] | ||
215 | mod tests { | ||
216 | use super::*; | ||
217 | |||
218 | fn parse(src: &str) -> (bool, Vec<StringComponent>) { | ||
219 | let component_iterator = &mut parse_quoted_literal(None, '\'', src); | ||
220 | let components: Vec<_> = component_iterator.collect(); | ||
221 | (component_iterator.has_closing_quote, components) | ||
222 | } | ||
223 | |||
224 | fn unclosed_char_component(src: &str) -> StringComponent { | ||
225 | let (has_closing_quote, components) = parse(src); | ||
226 | assert!(!has_closing_quote, "char should not have closing quote"); | ||
227 | assert!(components.len() == 1); | ||
228 | components[0].clone() | ||
229 | } | ||
230 | |||
231 | fn closed_char_component(src: &str) -> StringComponent { | ||
232 | let (has_closing_quote, components) = parse(src); | ||
233 | assert!(has_closing_quote, "char should have closing quote"); | ||
234 | assert!(components.len() == 1, "Literal: {}\nComponents: {:#?}", src, components); | ||
235 | components[0].clone() | ||
236 | } | ||
237 | |||
238 | fn closed_char_components(src: &str) -> Vec<StringComponent> { | ||
239 | let (has_closing_quote, components) = parse(src); | ||
240 | assert!(has_closing_quote, "char should have closing quote"); | ||
241 | components | ||
242 | } | ||
243 | |||
244 | fn range_closed(src: &str) -> TextRange { | ||
245 | TextRange::from_to(1.into(), (src.len() as u32 - 1).into()) | ||
246 | } | ||
247 | |||
248 | fn range_unclosed(src: &str) -> TextRange { | ||
249 | TextRange::from_to(1.into(), (src.len() as u32).into()) | ||
250 | } | ||
251 | |||
252 | #[test] | ||
253 | fn test_unicode_escapes() { | ||
254 | let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; | ||
255 | for escape in unicode_escapes { | ||
256 | let escape_sequence = format!(r"'\u{}'", escape); | ||
257 | let component = closed_char_component(&escape_sequence); | ||
258 | let expected_range = range_closed(&escape_sequence); | ||
259 | assert_eq!(component.kind, UnicodeEscape); | ||
260 | assert_eq!(component.range, expected_range); | ||
261 | } | ||
262 | } | ||
263 | |||
264 | #[test] | ||
265 | fn test_unicode_escapes_unclosed() { | ||
266 | let unicode_escapes = &["{DEAD", "{BEEF", "{FF"]; | ||
267 | for escape in unicode_escapes { | ||
268 | let escape_sequence = format!(r"'\u{}'", escape); | ||
269 | let component = unclosed_char_component(&escape_sequence); | ||
270 | let expected_range = range_unclosed(&escape_sequence); | ||
271 | assert_eq!(component.kind, UnicodeEscape); | ||
272 | assert_eq!(component.range, expected_range); | ||
273 | } | ||
274 | } | ||
275 | |||
276 | #[test] | ||
277 | fn test_empty_char() { | ||
278 | let (has_closing_quote, components) = parse("''"); | ||
279 | assert!(has_closing_quote, "char should have closing quote"); | ||
280 | assert!(components.len() == 0); | ||
281 | } | ||
282 | |||
283 | #[test] | ||
284 | fn test_unclosed_char() { | ||
285 | let component = unclosed_char_component("'a"); | ||
286 | assert!(component.kind == CodePoint); | ||
287 | assert!(component.range == TextRange::from_to(1.into(), 2.into())); | ||
288 | } | ||
289 | |||
290 | #[test] | ||
291 | fn test_digit_escapes() { | ||
292 | let literals = &[r"", r"5", r"55"]; | ||
293 | |||
294 | for literal in literals { | ||
295 | let lit_text = format!(r"'\x{}'", literal); | ||
296 | let component = closed_char_component(&lit_text); | ||
297 | assert!(component.kind == AsciiCodeEscape); | ||
298 | assert!(component.range == range_closed(&lit_text)); | ||
299 | } | ||
300 | |||
301 | // More than 2 digits starts a new codepoint | ||
302 | let components = closed_char_components(r"'\x555'"); | ||
303 | assert!(components.len() == 2); | ||
304 | assert!(components[1].kind == CodePoint); | ||
305 | } | ||
306 | |||
307 | #[test] | ||
308 | fn test_ascii_escapes() { | ||
309 | let literals = &[ | ||
310 | r"\'", "\\\"", // equivalent to \" | ||
311 | r"\n", r"\r", r"\t", r"\\", r"\0", | ||
312 | ]; | ||
313 | |||
314 | for literal in literals { | ||
315 | let lit_text = format!("'{}'", literal); | ||
316 | let component = closed_char_component(&lit_text); | ||
317 | assert!(component.kind == AsciiEscape); | ||
318 | assert!(component.range == range_closed(&lit_text)); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | #[test] | ||
323 | fn test_no_escapes() { | ||
324 | let literals = &['"', 'n', 'r', 't', '0', 'x', 'u']; | ||
325 | |||
326 | for &literal in literals { | ||
327 | let lit_text = format!("'{}'", literal); | ||
328 | let component = closed_char_component(&lit_text); | ||
329 | assert!(component.kind == CodePoint); | ||
330 | assert!(component.range == range_closed(&lit_text)); | ||
331 | } | ||
332 | } | ||
333 | } | ||
diff --git a/crates/ra_syntax/src/string_lexing/parser.rs b/crates/ra_syntax/src/string_lexing/parser.rs deleted file mode 100644 index 7469eb903..000000000 --- a/crates/ra_syntax/src/string_lexing/parser.rs +++ /dev/null | |||
@@ -1,168 +0,0 @@ | |||
1 | use rowan::{TextRange, TextUnit}; | ||
2 | |||
3 | use self::StringComponentKind::*; | ||
4 | |||
5 | pub struct Parser<'a> { | ||
6 | pub(super) quote: u8, | ||
7 | pub(super) src: &'a str, | ||
8 | pub(super) pos: usize, | ||
9 | } | ||
10 | |||
11 | impl<'a> Parser<'a> { | ||
12 | pub fn new(src: &'a str, quote: u8) -> Parser<'a> { | ||
13 | Parser { quote, src, pos: 0 } | ||
14 | } | ||
15 | |||
16 | // Utility methods | ||
17 | |||
18 | pub fn peek(&self) -> Option<char> { | ||
19 | if self.pos == self.src.len() { | ||
20 | return None; | ||
21 | } | ||
22 | |||
23 | self.src[self.pos..].chars().next() | ||
24 | } | ||
25 | |||
26 | pub fn advance(&mut self) -> char { | ||
27 | let next = self.peek().expect("cannot advance if end of input is reached"); | ||
28 | self.pos += next.len_utf8(); | ||
29 | next | ||
30 | } | ||
31 | |||
32 | pub fn skip_whitespace(&mut self) { | ||
33 | while self.peek().map(|c| c.is_whitespace()) == Some(true) { | ||
34 | self.advance(); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | pub fn get_pos(&self) -> TextUnit { | ||
39 | (self.pos as u32).into() | ||
40 | } | ||
41 | |||
42 | // Char parsing methods | ||
43 | |||
44 | fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent { | ||
45 | match self.peek() { | ||
46 | Some('{') => { | ||
47 | self.advance(); | ||
48 | |||
49 | // Parse anything until we reach `}` | ||
50 | while let Some(next) = self.peek() { | ||
51 | self.advance(); | ||
52 | if next == '}' { | ||
53 | break; | ||
54 | } | ||
55 | } | ||
56 | |||
57 | let end = self.get_pos(); | ||
58 | StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) | ||
59 | } | ||
60 | Some(_) | None => { | ||
61 | let end = self.get_pos(); | ||
62 | StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) | ||
63 | } | ||
64 | } | ||
65 | } | ||
66 | |||
67 | fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent { | ||
68 | let code_start = self.get_pos(); | ||
69 | while let Some(next) = self.peek() { | ||
70 | if next == '\'' || (self.get_pos() - code_start == 2.into()) { | ||
71 | break; | ||
72 | } | ||
73 | |||
74 | self.advance(); | ||
75 | } | ||
76 | |||
77 | let end = self.get_pos(); | ||
78 | StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) | ||
79 | } | ||
80 | |||
81 | fn parse_escape(&mut self, start: TextUnit) -> StringComponent { | ||
82 | if self.peek().is_none() { | ||
83 | return StringComponent::new(TextRange::from_to(start, self.get_pos()), AsciiEscape); | ||
84 | } | ||
85 | |||
86 | let next = self.advance(); | ||
87 | let end = self.get_pos(); | ||
88 | let range = TextRange::from_to(start, end); | ||
89 | match next { | ||
90 | 'x' => self.parse_ascii_code_escape(start), | ||
91 | 'u' => self.parse_unicode_escape(start), | ||
92 | _ => StringComponent::new(range, AsciiEscape), | ||
93 | } | ||
94 | } | ||
95 | |||
96 | pub fn parse_ignore_newline(&mut self, start: TextUnit) -> Option<StringComponent> { | ||
97 | // In string literals, when a `\` occurs immediately before the newline, the `\`, | ||
98 | // the newline, and all whitespace at the beginning of the next line are ignored | ||
99 | match self.peek() { | ||
100 | Some('\n') | Some('\r') => { | ||
101 | self.skip_whitespace(); | ||
102 | Some(StringComponent::new( | ||
103 | TextRange::from_to(start, self.get_pos()), | ||
104 | StringComponentKind::IgnoreNewline, | ||
105 | )) | ||
106 | } | ||
107 | _ => None, | ||
108 | } | ||
109 | } | ||
110 | |||
111 | pub fn parse_component(&mut self) -> Option<StringComponent> { | ||
112 | let next = self.peek()?; | ||
113 | |||
114 | // Ignore string close | ||
115 | if next == self.quote as char { | ||
116 | return None; | ||
117 | } | ||
118 | |||
119 | let start = self.get_pos(); | ||
120 | self.advance(); | ||
121 | |||
122 | if next == '\\' { | ||
123 | // Strings can use `\` to ignore newlines, so we first try to parse one of those | ||
124 | // before falling back to parsing char escapes | ||
125 | if self.quote == b'"' { | ||
126 | if let Some(component) = self.parse_ignore_newline(start) { | ||
127 | return Some(component); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | Some(self.parse_escape(start)) | ||
132 | } else { | ||
133 | let end = self.get_pos(); | ||
134 | Some(StringComponent::new(TextRange::from_to(start, end), CodePoint)) | ||
135 | } | ||
136 | } | ||
137 | |||
138 | pub fn parse_suffix(&mut self) -> Option<TextRange> { | ||
139 | let start = self.get_pos(); | ||
140 | let _ = self.peek()?; | ||
141 | while let Some(_) = self.peek() { | ||
142 | self.advance(); | ||
143 | } | ||
144 | let end = self.get_pos(); | ||
145 | Some(TextRange::from_to(start, end)) | ||
146 | } | ||
147 | } | ||
148 | |||
149 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
150 | pub struct StringComponent { | ||
151 | pub range: TextRange, | ||
152 | pub kind: StringComponentKind, | ||
153 | } | ||
154 | |||
155 | impl StringComponent { | ||
156 | fn new(range: TextRange, kind: StringComponentKind) -> StringComponent { | ||
157 | StringComponent { range, kind } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
162 | pub enum StringComponentKind { | ||
163 | IgnoreNewline, | ||
164 | CodePoint, | ||
165 | AsciiEscape, | ||
166 | AsciiCodeEscape, | ||
167 | UnicodeEscape, | ||
168 | } | ||
diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs deleted file mode 100644 index a4742a0d1..000000000 --- a/crates/ra_syntax/src/string_lexing/string.rs +++ /dev/null | |||
@@ -1,222 +0,0 @@ | |||
1 | use crate::{ | ||
2 | TextRange, | ||
3 | string_lexing::{ | ||
4 | parser::Parser, | ||
5 | StringComponent, | ||
6 | }}; | ||
7 | |||
8 | pub fn parse_string_literal(src: &str) -> StringComponentIterator { | ||
9 | StringComponentIterator { | ||
10 | parser: Parser::new(src, b'"'), | ||
11 | has_closing_quote: false, | ||
12 | suffix: None, | ||
13 | prefix: None, | ||
14 | quote: b'"', | ||
15 | } | ||
16 | } | ||
17 | |||
18 | pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator { | ||
19 | StringComponentIterator { | ||
20 | parser: Parser::new(src, b'"'), | ||
21 | has_closing_quote: false, | ||
22 | suffix: None, | ||
23 | prefix: Some(b'b'), | ||
24 | quote: b'"', | ||
25 | } | ||
26 | } | ||
27 | |||
28 | pub fn parse_char_literal(src: &str) -> StringComponentIterator { | ||
29 | StringComponentIterator { | ||
30 | parser: Parser::new(src, b'\''), | ||
31 | has_closing_quote: false, | ||
32 | suffix: None, | ||
33 | prefix: None, | ||
34 | quote: b'\'', | ||
35 | } | ||
36 | } | ||
37 | |||
38 | pub fn parse_byte_literal(src: &str) -> StringComponentIterator { | ||
39 | StringComponentIterator { | ||
40 | parser: Parser::new(src, b'\''), | ||
41 | has_closing_quote: false, | ||
42 | suffix: None, | ||
43 | prefix: Some(b'b'), | ||
44 | quote: b'\'', | ||
45 | } | ||
46 | } | ||
47 | |||
48 | pub struct StringComponentIterator<'a> { | ||
49 | parser: Parser<'a>, | ||
50 | pub has_closing_quote: bool, | ||
51 | pub suffix: Option<TextRange>, | ||
52 | prefix: Option<u8>, | ||
53 | quote: u8, | ||
54 | } | ||
55 | |||
56 | impl<'a> Iterator for StringComponentIterator<'a> { | ||
57 | type Item = StringComponent; | ||
58 | fn next(&mut self) -> Option<StringComponent> { | ||
59 | if self.parser.pos == 0 { | ||
60 | if let Some(prefix) = self.prefix { | ||
61 | assert!( | ||
62 | self.parser.advance() == prefix as char, | ||
63 | "literal should start with a {:?}", | ||
64 | prefix as char, | ||
65 | ); | ||
66 | } | ||
67 | assert!( | ||
68 | self.parser.advance() == self.quote as char, | ||
69 | "literal should start with a {:?}", | ||
70 | self.quote as char, | ||
71 | ); | ||
72 | } | ||
73 | |||
74 | if let Some(component) = self.parser.parse_component() { | ||
75 | return Some(component); | ||
76 | } | ||
77 | |||
78 | // We get here when there are no char components left to parse | ||
79 | if self.parser.peek() == Some(self.quote as char) { | ||
80 | self.parser.advance(); | ||
81 | self.has_closing_quote = true; | ||
82 | if let Some(range) = self.parser.parse_suffix() { | ||
83 | self.suffix = Some(range); | ||
84 | } | ||
85 | } | ||
86 | |||
87 | assert!( | ||
88 | self.parser.peek() == None, | ||
89 | "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", | ||
90 | self.parser.src, | ||
91 | self.parser.pos, | ||
92 | self.parser.src.len() | ||
93 | ); | ||
94 | |||
95 | None | ||
96 | } | ||
97 | } | ||
98 | |||
99 | #[cfg(test)] | ||
100 | mod tests { | ||
101 | use rowan::TextRange; | ||
102 | use crate::string_lexing::{ | ||
103 | StringComponent, | ||
104 | StringComponentKind::*, | ||
105 | }; | ||
106 | |||
107 | fn parse(src: &str) -> (bool, Vec<StringComponent>) { | ||
108 | let component_iterator = &mut super::parse_char_literal(src); | ||
109 | let components: Vec<_> = component_iterator.collect(); | ||
110 | (component_iterator.has_closing_quote, components) | ||
111 | } | ||
112 | |||
113 | fn unclosed_char_component(src: &str) -> StringComponent { | ||
114 | let (has_closing_quote, components) = parse(src); | ||
115 | assert!(!has_closing_quote, "char should not have closing quote"); | ||
116 | assert!(components.len() == 1); | ||
117 | components[0].clone() | ||
118 | } | ||
119 | |||
120 | fn closed_char_component(src: &str) -> StringComponent { | ||
121 | let (has_closing_quote, components) = parse(src); | ||
122 | assert!(has_closing_quote, "char should have closing quote"); | ||
123 | assert!(components.len() == 1, "Literal: {}\nComponents: {:#?}", src, components); | ||
124 | components[0].clone() | ||
125 | } | ||
126 | |||
127 | fn closed_char_components(src: &str) -> Vec<StringComponent> { | ||
128 | let (has_closing_quote, components) = parse(src); | ||
129 | assert!(has_closing_quote, "char should have closing quote"); | ||
130 | components | ||
131 | } | ||
132 | |||
133 | fn range_closed(src: &str) -> TextRange { | ||
134 | TextRange::from_to(1.into(), (src.len() as u32 - 1).into()) | ||
135 | } | ||
136 | |||
137 | fn range_unclosed(src: &str) -> TextRange { | ||
138 | TextRange::from_to(1.into(), (src.len() as u32).into()) | ||
139 | } | ||
140 | |||
141 | #[test] | ||
142 | fn test_unicode_escapes() { | ||
143 | let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; | ||
144 | for escape in unicode_escapes { | ||
145 | let escape_sequence = format!(r"'\u{}'", escape); | ||
146 | let component = closed_char_component(&escape_sequence); | ||
147 | let expected_range = range_closed(&escape_sequence); | ||
148 | assert_eq!(component.kind, UnicodeEscape); | ||
149 | assert_eq!(component.range, expected_range); | ||
150 | } | ||
151 | } | ||
152 | |||
153 | #[test] | ||
154 | fn test_unicode_escapes_unclosed() { | ||
155 | let unicode_escapes = &["{DEAD", "{BEEF", "{FF"]; | ||
156 | for escape in unicode_escapes { | ||
157 | let escape_sequence = format!(r"'\u{}'", escape); | ||
158 | let component = unclosed_char_component(&escape_sequence); | ||
159 | let expected_range = range_unclosed(&escape_sequence); | ||
160 | assert_eq!(component.kind, UnicodeEscape); | ||
161 | assert_eq!(component.range, expected_range); | ||
162 | } | ||
163 | } | ||
164 | |||
165 | #[test] | ||
166 | fn test_empty_char() { | ||
167 | let (has_closing_quote, components) = parse("''"); | ||
168 | assert!(has_closing_quote, "char should have closing quote"); | ||
169 | assert!(components.len() == 0); | ||
170 | } | ||
171 | |||
172 | #[test] | ||
173 | fn test_unclosed_char() { | ||
174 | let component = unclosed_char_component("'a"); | ||
175 | assert!(component.kind == CodePoint); | ||
176 | assert!(component.range == TextRange::from_to(1.into(), 2.into())); | ||
177 | } | ||
178 | |||
179 | #[test] | ||
180 | fn test_digit_escapes() { | ||
181 | let literals = &[r"", r"5", r"55"]; | ||
182 | |||
183 | for literal in literals { | ||
184 | let lit_text = format!(r"'\x{}'", literal); | ||
185 | let component = closed_char_component(&lit_text); | ||
186 | assert!(component.kind == AsciiCodeEscape); | ||
187 | assert!(component.range == range_closed(&lit_text)); | ||
188 | } | ||
189 | |||
190 | // More than 2 digits starts a new codepoint | ||
191 | let components = closed_char_components(r"'\x555'"); | ||
192 | assert!(components.len() == 2); | ||
193 | assert!(components[1].kind == CodePoint); | ||
194 | } | ||
195 | |||
196 | #[test] | ||
197 | fn test_ascii_escapes() { | ||
198 | let literals = &[ | ||
199 | r"\'", "\\\"", // equivalent to \" | ||
200 | r"\n", r"\r", r"\t", r"\\", r"\0", | ||
201 | ]; | ||
202 | |||
203 | for literal in literals { | ||
204 | let lit_text = format!("'{}'", literal); | ||
205 | let component = closed_char_component(&lit_text); | ||
206 | assert!(component.kind == AsciiEscape); | ||
207 | assert!(component.range == range_closed(&lit_text)); | ||
208 | } | ||
209 | } | ||
210 | |||
211 | #[test] | ||
212 | fn test_no_escapes() { | ||
213 | let literals = &['"', 'n', 'r', 't', '0', 'x', 'u']; | ||
214 | |||
215 | for &literal in literals { | ||
216 | let lit_text = format!("'{}'", literal); | ||
217 | let component = closed_char_component(&lit_text); | ||
218 | assert!(component.kind == CodePoint); | ||
219 | assert!(component.range == range_closed(&lit_text)); | ||
220 | } | ||
221 | } | ||
222 | } | ||
diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs index 290f80fc6..f653e65d0 100644 --- a/crates/ra_syntax/src/validation/byte.rs +++ b/crates/ra_syntax/src/validation/byte.rs | |||
@@ -12,7 +12,7 @@ use crate::{ | |||
12 | pub(super) fn validate_byte_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { | 12 | pub(super) fn validate_byte_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { |
13 | let literal_text = node.text(); | 13 | let literal_text = node.text(); |
14 | let literal_range = node.range(); | 14 | let literal_range = node.range(); |
15 | let mut components = string_lexing::parse_byte_literal(literal_text); | 15 | let mut components = string_lexing::parse_quoted_literal(Some('b'), '\'', literal_text); |
16 | let mut len = 0; | 16 | let mut len = 0; |
17 | for component in &mut components { | 17 | for component in &mut components { |
18 | len += 1; | 18 | len += 1; |
diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs index eae395e9d..1d48c2d9b 100644 --- a/crates/ra_syntax/src/validation/byte_string.rs +++ b/crates/ra_syntax/src/validation/byte_string.rs | |||
@@ -10,7 +10,7 @@ use super::byte; | |||
10 | pub(crate) fn validate_byte_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { | 10 | pub(crate) fn validate_byte_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { |
11 | let literal_text = node.text(); | 11 | let literal_text = node.text(); |
12 | let literal_range = node.range(); | 12 | let literal_range = node.range(); |
13 | let mut components = string_lexing::parse_byte_string_literal(literal_text); | 13 | let mut components = string_lexing::parse_quoted_literal(Some('b'), '"', literal_text); |
14 | for component in &mut components { | 14 | for component in &mut components { |
15 | let range = component.range + literal_range.start(); | 15 | let range = component.range + literal_range.start(); |
16 | 16 | ||
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs index a385accdd..0f1885873 100644 --- a/crates/ra_syntax/src/validation/char.rs +++ b/crates/ra_syntax/src/validation/char.rs | |||
@@ -15,7 +15,7 @@ use crate::{ | |||
15 | pub(super) fn validate_char_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { | 15 | pub(super) fn validate_char_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { |
16 | let literal_text = node.text(); | 16 | let literal_text = node.text(); |
17 | let literal_range = node.range(); | 17 | let literal_range = node.range(); |
18 | let mut components = string_lexing::parse_char_literal(literal_text); | 18 | let mut components = string_lexing::parse_quoted_literal(None, '\'', literal_text); |
19 | let mut len = 0; | 19 | let mut len = 0; |
20 | for component in &mut components { | 20 | for component in &mut components { |
21 | len += 1; | 21 | len += 1; |
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs index f7f5c02c0..fc2f1b992 100644 --- a/crates/ra_syntax/src/validation/string.rs +++ b/crates/ra_syntax/src/validation/string.rs | |||
@@ -10,7 +10,7 @@ use super::char; | |||
10 | pub(crate) fn validate_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { | 10 | pub(crate) fn validate_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { |
11 | let literal_text = node.text(); | 11 | let literal_text = node.text(); |
12 | let literal_range = node.range(); | 12 | let literal_range = node.range(); |
13 | let mut components = string_lexing::parse_string_literal(literal_text); | 13 | let mut components = string_lexing::parse_quoted_literal(None, '"', literal_text); |
14 | for component in &mut components { | 14 | for component in &mut components { |
15 | let range = component.range + literal_range.start(); | 15 | let range = component.range + literal_range.start(); |
16 | 16 | ||