diff options
author | Aleksey Kladov <[email protected]> | 2019-07-24 09:47:28 +0100 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2019-07-24 09:47:28 +0100 |
commit | 2473cb6a5cc2af6d703129adc01942b00c059810 (patch) | |
tree | 0b51a9d40266d0b89083199191573a8a4a194d99 /crates | |
parent | c79eea9fc1d7c3400031674b7ebb2b0671aa05e8 (diff) |
switch to upstream unescape
Diffstat (limited to 'crates')
-rw-r--r-- | crates/ra_syntax/Cargo.toml | 2 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation.rs | 91 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation/unescape.rs | 521 |
3 files changed, 88 insertions, 526 deletions
diff --git a/crates/ra_syntax/Cargo.toml b/crates/ra_syntax/Cargo.toml index 19e690f9e..40d63ef7a 100644 --- a/crates/ra_syntax/Cargo.toml +++ b/crates/ra_syntax/Cargo.toml | |||
@@ -11,7 +11,7 @@ repository = "https://github.com/rust-analyzer/rust-analyzer" | |||
11 | unicode-xid = "0.1.0" | 11 | unicode-xid = "0.1.0" |
12 | itertools = "0.8.0" | 12 | itertools = "0.8.0" |
13 | rowan = "0.6.0" | 13 | rowan = "0.6.0" |
14 | ra_rustc_lexer = { version = "0.1.0-pre.1", features = [ "unicode-xid" ] } | 14 | ra_rustc_lexer = { version = "0.1.0-pre.2" } |
15 | 15 | ||
16 | # ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here | 16 | # ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here |
17 | # to reduce number of compilations | 17 | # to reduce number of compilations |
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs index e03c02d1b..1f904434e 100644 --- a/crates/ra_syntax/src/validation.rs +++ b/crates/ra_syntax/src/validation.rs | |||
@@ -1,16 +1,99 @@ | |||
1 | mod unescape; | ||
2 | |||
3 | mod block; | 1 | mod block; |
4 | mod field_expr; | 2 | mod field_expr; |
5 | 3 | ||
4 | use ra_rustc_lexer::unescape; | ||
5 | |||
6 | use crate::{ | 6 | use crate::{ |
7 | algo::visit::{visitor_ctx, VisitorCtx}, | 7 | algo::visit::{visitor_ctx, VisitorCtx}, |
8 | ast, SyntaxError, | 8 | ast, SyntaxError, SyntaxErrorKind, |
9 | SyntaxKind::{BYTE, BYTE_STRING, CHAR, STRING}, | 9 | SyntaxKind::{BYTE, BYTE_STRING, CHAR, STRING}, |
10 | SyntaxNode, TextUnit, T, | 10 | SyntaxNode, TextUnit, T, |
11 | }; | 11 | }; |
12 | 12 | ||
13 | pub(crate) use unescape::EscapeError; | 13 | #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] |
14 | pub enum EscapeError { | ||
15 | ZeroChars, | ||
16 | MoreThanOneChar, | ||
17 | LoneSlash, | ||
18 | InvalidEscape, | ||
19 | BareCarriageReturn, | ||
20 | EscapeOnlyChar, | ||
21 | TooShortHexEscape, | ||
22 | InvalidCharInHexEscape, | ||
23 | OutOfRangeHexEscape, | ||
24 | NoBraceInUnicodeEscape, | ||
25 | InvalidCharInUnicodeEscape, | ||
26 | EmptyUnicodeEscape, | ||
27 | UnclosedUnicodeEscape, | ||
28 | LeadingUnderscoreUnicodeEscape, | ||
29 | OverlongUnicodeEscape, | ||
30 | LoneSurrogateUnicodeEscape, | ||
31 | OutOfRangeUnicodeEscape, | ||
32 | UnicodeEscapeInByte, | ||
33 | NonAsciiCharInByte, | ||
34 | } | ||
35 | |||
36 | impl From<ra_rustc_lexer::unescape::EscapeError> for EscapeError { | ||
37 | fn from(err: ra_rustc_lexer::unescape::EscapeError) -> Self { | ||
38 | match err { | ||
39 | ra_rustc_lexer::unescape::EscapeError::ZeroChars => EscapeError::ZeroChars, | ||
40 | ra_rustc_lexer::unescape::EscapeError::MoreThanOneChar => EscapeError::MoreThanOneChar, | ||
41 | ra_rustc_lexer::unescape::EscapeError::LoneSlash => EscapeError::LoneSlash, | ||
42 | ra_rustc_lexer::unescape::EscapeError::InvalidEscape => EscapeError::InvalidEscape, | ||
43 | ra_rustc_lexer::unescape::EscapeError::BareCarriageReturn | ||
44 | | ra_rustc_lexer::unescape::EscapeError::BareCarriageReturnInRawString => { | ||
45 | EscapeError::BareCarriageReturn | ||
46 | } | ||
47 | ra_rustc_lexer::unescape::EscapeError::EscapeOnlyChar => EscapeError::EscapeOnlyChar, | ||
48 | ra_rustc_lexer::unescape::EscapeError::TooShortHexEscape => { | ||
49 | EscapeError::TooShortHexEscape | ||
50 | } | ||
51 | ra_rustc_lexer::unescape::EscapeError::InvalidCharInHexEscape => { | ||
52 | EscapeError::InvalidCharInHexEscape | ||
53 | } | ||
54 | ra_rustc_lexer::unescape::EscapeError::OutOfRangeHexEscape => { | ||
55 | EscapeError::OutOfRangeHexEscape | ||
56 | } | ||
57 | ra_rustc_lexer::unescape::EscapeError::NoBraceInUnicodeEscape => { | ||
58 | EscapeError::NoBraceInUnicodeEscape | ||
59 | } | ||
60 | ra_rustc_lexer::unescape::EscapeError::InvalidCharInUnicodeEscape => { | ||
61 | EscapeError::InvalidCharInUnicodeEscape | ||
62 | } | ||
63 | ra_rustc_lexer::unescape::EscapeError::EmptyUnicodeEscape => { | ||
64 | EscapeError::EmptyUnicodeEscape | ||
65 | } | ||
66 | ra_rustc_lexer::unescape::EscapeError::UnclosedUnicodeEscape => { | ||
67 | EscapeError::UnclosedUnicodeEscape | ||
68 | } | ||
69 | ra_rustc_lexer::unescape::EscapeError::LeadingUnderscoreUnicodeEscape => { | ||
70 | EscapeError::LeadingUnderscoreUnicodeEscape | ||
71 | } | ||
72 | ra_rustc_lexer::unescape::EscapeError::OverlongUnicodeEscape => { | ||
73 | EscapeError::OverlongUnicodeEscape | ||
74 | } | ||
75 | ra_rustc_lexer::unescape::EscapeError::LoneSurrogateUnicodeEscape => { | ||
76 | EscapeError::LoneSurrogateUnicodeEscape | ||
77 | } | ||
78 | ra_rustc_lexer::unescape::EscapeError::OutOfRangeUnicodeEscape => { | ||
79 | EscapeError::OutOfRangeUnicodeEscape | ||
80 | } | ||
81 | ra_rustc_lexer::unescape::EscapeError::UnicodeEscapeInByte => { | ||
82 | EscapeError::UnicodeEscapeInByte | ||
83 | } | ||
84 | ra_rustc_lexer::unescape::EscapeError::NonAsciiCharInByte | ||
85 | | ra_rustc_lexer::unescape::EscapeError::NonAsciiCharInByteString => { | ||
86 | EscapeError::NonAsciiCharInByte | ||
87 | } | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | impl From<ra_rustc_lexer::unescape::EscapeError> for SyntaxErrorKind { | ||
93 | fn from(err: ra_rustc_lexer::unescape::EscapeError) -> Self { | ||
94 | SyntaxErrorKind::EscapeError(err.into()) | ||
95 | } | ||
96 | } | ||
14 | 97 | ||
15 | pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> { | 98 | pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> { |
16 | let mut errors = Vec::new(); | 99 | let mut errors = Vec::new(); |
diff --git a/crates/ra_syntax/src/validation/unescape.rs b/crates/ra_syntax/src/validation/unescape.rs deleted file mode 100644 index 7eed6c663..000000000 --- a/crates/ra_syntax/src/validation/unescape.rs +++ /dev/null | |||
@@ -1,521 +0,0 @@ | |||
1 | //! Utilities for validating string and char literals and turning them into | ||
2 | //! values they represent. | ||
3 | //! | ||
4 | //! This file is copy-pasted from the compiler | ||
5 | //! | ||
6 | //! https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs | ||
7 | //! | ||
8 | //! Hopefully, we'll share this code in a proper way some day | ||
9 | |||
10 | use std::ops::Range; | ||
11 | use std::str::Chars; | ||
12 | |||
13 | #[derive(Debug, PartialEq, Eq, Clone, Hash)] | ||
14 | pub enum EscapeError { | ||
15 | ZeroChars, | ||
16 | MoreThanOneChar, | ||
17 | |||
18 | LoneSlash, | ||
19 | InvalidEscape, | ||
20 | BareCarriageReturn, | ||
21 | EscapeOnlyChar, | ||
22 | |||
23 | TooShortHexEscape, | ||
24 | InvalidCharInHexEscape, | ||
25 | OutOfRangeHexEscape, | ||
26 | |||
27 | NoBraceInUnicodeEscape, | ||
28 | InvalidCharInUnicodeEscape, | ||
29 | EmptyUnicodeEscape, | ||
30 | UnclosedUnicodeEscape, | ||
31 | LeadingUnderscoreUnicodeEscape, | ||
32 | OverlongUnicodeEscape, | ||
33 | LoneSurrogateUnicodeEscape, | ||
34 | OutOfRangeUnicodeEscape, | ||
35 | |||
36 | UnicodeEscapeInByte, | ||
37 | NonAsciiCharInByte, | ||
38 | } | ||
39 | |||
40 | /// Takes a contents of a char literal (without quotes), and returns an | ||
41 | /// unescaped char or an error | ||
42 | pub(crate) fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> { | ||
43 | let mut chars = literal_text.chars(); | ||
44 | unescape_char_or_byte(&mut chars, Mode::Char) | ||
45 | .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) | ||
46 | } | ||
47 | |||
48 | /// Takes a contents of a string literal (without quotes) and produces a | ||
49 | /// sequence of escaped characters or errors. | ||
50 | pub(crate) fn unescape_str<F>(literal_text: &str, callback: &mut F) | ||
51 | where | ||
52 | F: FnMut(Range<usize>, Result<char, EscapeError>), | ||
53 | { | ||
54 | unescape_str_or_byte_str(literal_text, Mode::Str, callback) | ||
55 | } | ||
56 | |||
57 | pub(crate) fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> { | ||
58 | let mut chars = literal_text.chars(); | ||
59 | unescape_char_or_byte(&mut chars, Mode::Byte) | ||
60 | .map(byte_from_char) | ||
61 | .map_err(|err| (literal_text.len() - chars.as_str().len(), err)) | ||
62 | } | ||
63 | |||
64 | /// Takes a contents of a string literal (without quotes) and produces a | ||
65 | /// sequence of escaped characters or errors. | ||
66 | pub(crate) fn unescape_byte_str<F>(literal_text: &str, callback: &mut F) | ||
67 | where | ||
68 | F: FnMut(Range<usize>, Result<u8, EscapeError>), | ||
69 | { | ||
70 | unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| { | ||
71 | callback(range, char.map(byte_from_char)) | ||
72 | }) | ||
73 | } | ||
74 | |||
75 | #[derive(Debug, Clone, Copy)] | ||
76 | pub(crate) enum Mode { | ||
77 | Char, | ||
78 | Str, | ||
79 | Byte, | ||
80 | ByteStr, | ||
81 | } | ||
82 | |||
83 | impl Mode { | ||
84 | fn in_single_quotes(self) -> bool { | ||
85 | match self { | ||
86 | Mode::Char | Mode::Byte => true, | ||
87 | Mode::Str | Mode::ByteStr => false, | ||
88 | } | ||
89 | } | ||
90 | |||
91 | pub(crate) fn in_double_quotes(self) -> bool { | ||
92 | !self.in_single_quotes() | ||
93 | } | ||
94 | |||
95 | pub(crate) fn is_bytes(self) -> bool { | ||
96 | match self { | ||
97 | Mode::Byte | Mode::ByteStr => true, | ||
98 | Mode::Char | Mode::Str => false, | ||
99 | } | ||
100 | } | ||
101 | } | ||
102 | |||
103 | fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> { | ||
104 | if first_char != '\\' { | ||
105 | return match first_char { | ||
106 | '\t' | '\n' => Err(EscapeError::EscapeOnlyChar), | ||
107 | '\r' => Err(if chars.clone().next() == Some('\n') { | ||
108 | EscapeError::EscapeOnlyChar | ||
109 | } else { | ||
110 | EscapeError::BareCarriageReturn | ||
111 | }), | ||
112 | '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar), | ||
113 | '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar), | ||
114 | _ => { | ||
115 | if mode.is_bytes() && !first_char.is_ascii() { | ||
116 | return Err(EscapeError::NonAsciiCharInByte); | ||
117 | } | ||
118 | Ok(first_char) | ||
119 | } | ||
120 | }; | ||
121 | } | ||
122 | |||
123 | let second_char = chars.next().ok_or(EscapeError::LoneSlash)?; | ||
124 | |||
125 | let res = match second_char { | ||
126 | '"' => '"', | ||
127 | 'n' => '\n', | ||
128 | 'r' => '\r', | ||
129 | 't' => '\t', | ||
130 | '\\' => '\\', | ||
131 | '\'' => '\'', | ||
132 | '0' => '\0', | ||
133 | |||
134 | 'x' => { | ||
135 | let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?; | ||
136 | let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; | ||
137 | |||
138 | let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?; | ||
139 | let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; | ||
140 | |||
141 | let value = hi * 16 + lo; | ||
142 | |||
143 | if !mode.is_bytes() && !is_ascii(value) { | ||
144 | return Err(EscapeError::OutOfRangeHexEscape); | ||
145 | } | ||
146 | let value = value as u8; | ||
147 | |||
148 | value as char | ||
149 | } | ||
150 | |||
151 | 'u' => { | ||
152 | if chars.next() != Some('{') { | ||
153 | return Err(EscapeError::NoBraceInUnicodeEscape); | ||
154 | } | ||
155 | |||
156 | let mut n_digits = 1; | ||
157 | let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { | ||
158 | '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), | ||
159 | '}' => return Err(EscapeError::EmptyUnicodeEscape), | ||
160 | c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?, | ||
161 | }; | ||
162 | |||
163 | loop { | ||
164 | match chars.next() { | ||
165 | None => return Err(EscapeError::UnclosedUnicodeEscape), | ||
166 | Some('_') => continue, | ||
167 | Some('}') => { | ||
168 | if n_digits > 6 { | ||
169 | return Err(EscapeError::OverlongUnicodeEscape); | ||
170 | } | ||
171 | if mode.is_bytes() { | ||
172 | return Err(EscapeError::UnicodeEscapeInByte); | ||
173 | } | ||
174 | |||
175 | break std::char::from_u32(value).ok_or_else(|| { | ||
176 | if value > 0x0010_FFFF { | ||
177 | EscapeError::OutOfRangeUnicodeEscape | ||
178 | } else { | ||
179 | EscapeError::LoneSurrogateUnicodeEscape | ||
180 | } | ||
181 | })?; | ||
182 | } | ||
183 | Some(c) => { | ||
184 | let digit = | ||
185 | c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?; | ||
186 | n_digits += 1; | ||
187 | if n_digits > 6 { | ||
188 | continue; | ||
189 | } | ||
190 | let digit = digit as u32; | ||
191 | value = value * 16 + digit; | ||
192 | } | ||
193 | }; | ||
194 | } | ||
195 | } | ||
196 | _ => return Err(EscapeError::InvalidEscape), | ||
197 | }; | ||
198 | Ok(res) | ||
199 | } | ||
200 | |||
201 | fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> { | ||
202 | let first_char = chars.next().ok_or(EscapeError::ZeroChars)?; | ||
203 | let res = scan_escape(first_char, chars, mode)?; | ||
204 | if chars.next().is_some() { | ||
205 | return Err(EscapeError::MoreThanOneChar); | ||
206 | } | ||
207 | Ok(res) | ||
208 | } | ||
209 | |||
210 | /// Takes a contents of a string literal (without quotes) and produces a | ||
211 | /// sequence of escaped characters or errors. | ||
212 | fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F) | ||
213 | where | ||
214 | F: FnMut(Range<usize>, Result<char, EscapeError>), | ||
215 | { | ||
216 | assert!(mode.in_double_quotes()); | ||
217 | let initial_len = src.len(); | ||
218 | let mut chars = src.chars(); | ||
219 | while let Some(first_char) = chars.next() { | ||
220 | let start = initial_len - chars.as_str().len() - first_char.len_utf8(); | ||
221 | |||
222 | let unescaped_char = match first_char { | ||
223 | '\\' => { | ||
224 | let (second_char, third_char) = { | ||
225 | let mut chars = chars.clone(); | ||
226 | (chars.next(), chars.next()) | ||
227 | }; | ||
228 | match (second_char, third_char) { | ||
229 | (Some('\n'), _) | (Some('\r'), Some('\n')) => { | ||
230 | skip_ascii_whitespace(&mut chars); | ||
231 | continue; | ||
232 | } | ||
233 | _ => scan_escape(first_char, &mut chars, mode), | ||
234 | } | ||
235 | } | ||
236 | '\r' => { | ||
237 | let second_char = chars.clone().next(); | ||
238 | if second_char == Some('\n') { | ||
239 | chars.next(); | ||
240 | Ok('\n') | ||
241 | } else { | ||
242 | scan_escape(first_char, &mut chars, mode) | ||
243 | } | ||
244 | } | ||
245 | '\n' => Ok('\n'), | ||
246 | '\t' => Ok('\t'), | ||
247 | _ => scan_escape(first_char, &mut chars, mode), | ||
248 | }; | ||
249 | let end = initial_len - chars.as_str().len(); | ||
250 | callback(start..end, unescaped_char); | ||
251 | } | ||
252 | |||
253 | fn skip_ascii_whitespace(chars: &mut Chars<'_>) { | ||
254 | let str = chars.as_str(); | ||
255 | let first_non_space = str | ||
256 | .bytes() | ||
257 | .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') | ||
258 | .unwrap_or_else(|| str.len()); | ||
259 | *chars = str[first_non_space..].chars() | ||
260 | } | ||
261 | } | ||
262 | |||
263 | fn byte_from_char(c: char) -> u8 { | ||
264 | let res = c as u32; | ||
265 | assert!(res <= u32::from(u8::max_value()), "guaranteed because of Mode::Byte"); | ||
266 | res as u8 | ||
267 | } | ||
268 | |||
269 | fn is_ascii(x: u32) -> bool { | ||
270 | x <= 0x7F | ||
271 | } | ||
272 | |||
273 | #[cfg(test)] | ||
274 | mod tests { | ||
275 | use super::*; | ||
276 | |||
277 | #[test] | ||
278 | fn test_unescape_char_bad() { | ||
279 | fn check(literal_text: &str, expected_error: EscapeError) { | ||
280 | let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err); | ||
281 | assert_eq!(actual_result, Err(expected_error)); | ||
282 | } | ||
283 | |||
284 | check("", EscapeError::ZeroChars); | ||
285 | check(r"\", EscapeError::LoneSlash); | ||
286 | |||
287 | check("\n", EscapeError::EscapeOnlyChar); | ||
288 | check("\r\n", EscapeError::EscapeOnlyChar); | ||
289 | check("\t", EscapeError::EscapeOnlyChar); | ||
290 | check("'", EscapeError::EscapeOnlyChar); | ||
291 | check("\r", EscapeError::BareCarriageReturn); | ||
292 | |||
293 | check("spam", EscapeError::MoreThanOneChar); | ||
294 | check(r"\x0ff", EscapeError::MoreThanOneChar); | ||
295 | check(r#"\"a"#, EscapeError::MoreThanOneChar); | ||
296 | check(r"\na", EscapeError::MoreThanOneChar); | ||
297 | check(r"\ra", EscapeError::MoreThanOneChar); | ||
298 | check(r"\ta", EscapeError::MoreThanOneChar); | ||
299 | check(r"\\a", EscapeError::MoreThanOneChar); | ||
300 | check(r"\'a", EscapeError::MoreThanOneChar); | ||
301 | check(r"\0a", EscapeError::MoreThanOneChar); | ||
302 | check(r"\u{0}x", EscapeError::MoreThanOneChar); | ||
303 | check(r"\u{1F63b}}", EscapeError::MoreThanOneChar); | ||
304 | |||
305 | check(r"\v", EscapeError::InvalidEscape); | ||
306 | check(r"\💩", EscapeError::InvalidEscape); | ||
307 | check(r"\●", EscapeError::InvalidEscape); | ||
308 | |||
309 | check(r"\x", EscapeError::TooShortHexEscape); | ||
310 | check(r"\x0", EscapeError::TooShortHexEscape); | ||
311 | check(r"\xf", EscapeError::TooShortHexEscape); | ||
312 | check(r"\xa", EscapeError::TooShortHexEscape); | ||
313 | check(r"\xx", EscapeError::InvalidCharInHexEscape); | ||
314 | check(r"\xы", EscapeError::InvalidCharInHexEscape); | ||
315 | check(r"\x🦀", EscapeError::InvalidCharInHexEscape); | ||
316 | check(r"\xtt", EscapeError::InvalidCharInHexEscape); | ||
317 | check(r"\xff", EscapeError::OutOfRangeHexEscape); | ||
318 | check(r"\xFF", EscapeError::OutOfRangeHexEscape); | ||
319 | check(r"\x80", EscapeError::OutOfRangeHexEscape); | ||
320 | |||
321 | check(r"\u", EscapeError::NoBraceInUnicodeEscape); | ||
322 | check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); | ||
323 | check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); | ||
324 | check(r"\u{", EscapeError::UnclosedUnicodeEscape); | ||
325 | check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); | ||
326 | check(r"\u{}", EscapeError::EmptyUnicodeEscape); | ||
327 | check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); | ||
328 | check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); | ||
329 | check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape); | ||
330 | check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); | ||
331 | check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape); | ||
332 | |||
333 | check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape); | ||
334 | check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape); | ||
335 | check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape); | ||
336 | |||
337 | check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape); | ||
338 | check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape); | ||
339 | check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape); | ||
340 | } | ||
341 | |||
342 | #[test] | ||
343 | fn test_unescape_char_good() { | ||
344 | fn check(literal_text: &str, expected_char: char) { | ||
345 | let actual_result = unescape_char(literal_text); | ||
346 | assert_eq!(actual_result, Ok(expected_char)); | ||
347 | } | ||
348 | |||
349 | check("a", 'a'); | ||
350 | check("ы", 'ы'); | ||
351 | check("🦀", '🦀'); | ||
352 | |||
353 | check(r#"\""#, '"'); | ||
354 | check(r"\n", '\n'); | ||
355 | check(r"\r", '\r'); | ||
356 | check(r"\t", '\t'); | ||
357 | check(r"\\", '\\'); | ||
358 | check(r"\'", '\''); | ||
359 | check(r"\0", '\0'); | ||
360 | |||
361 | check(r"\x00", '\0'); | ||
362 | check(r"\x5a", 'Z'); | ||
363 | check(r"\x5A", 'Z'); | ||
364 | check(r"\x7f", 127 as char); | ||
365 | |||
366 | check(r"\u{0}", '\0'); | ||
367 | check(r"\u{000000}", '\0'); | ||
368 | check(r"\u{41}", 'A'); | ||
369 | check(r"\u{0041}", 'A'); | ||
370 | check(r"\u{00_41}", 'A'); | ||
371 | check(r"\u{4__1__}", 'A'); | ||
372 | check(r"\u{1F63b}", '😻'); | ||
373 | } | ||
374 | |||
375 | #[test] | ||
376 | fn test_unescape_str_good() { | ||
377 | fn check(literal_text: &str, expected: &str) { | ||
378 | let mut buf = Ok(String::with_capacity(literal_text.len())); | ||
379 | unescape_str(literal_text, &mut |range, c| { | ||
380 | if let Ok(b) = &mut buf { | ||
381 | match c { | ||
382 | Ok(c) => b.push(c), | ||
383 | Err(e) => buf = Err((range, e)), | ||
384 | } | ||
385 | } | ||
386 | }); | ||
387 | let buf = buf.as_ref().map(|it| it.as_ref()); | ||
388 | assert_eq!(buf, Ok(expected)) | ||
389 | } | ||
390 | |||
391 | check("foo", "foo"); | ||
392 | check("", ""); | ||
393 | check(" \t\n\r\n", " \t\n\n"); | ||
394 | |||
395 | check("hello \\\n world", "hello world"); | ||
396 | check("hello \\\r\n world", "hello world"); | ||
397 | check("thread's", "thread's") | ||
398 | } | ||
399 | |||
400 | #[test] | ||
401 | fn test_unescape_byte_bad() { | ||
402 | fn check(literal_text: &str, expected_error: EscapeError) { | ||
403 | let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err); | ||
404 | assert_eq!(actual_result, Err(expected_error)); | ||
405 | } | ||
406 | |||
407 | check("", EscapeError::ZeroChars); | ||
408 | check(r"\", EscapeError::LoneSlash); | ||
409 | |||
410 | check("\n", EscapeError::EscapeOnlyChar); | ||
411 | check("\r\n", EscapeError::EscapeOnlyChar); | ||
412 | check("\t", EscapeError::EscapeOnlyChar); | ||
413 | check("'", EscapeError::EscapeOnlyChar); | ||
414 | check("\r", EscapeError::BareCarriageReturn); | ||
415 | |||
416 | check("spam", EscapeError::MoreThanOneChar); | ||
417 | check(r"\x0ff", EscapeError::MoreThanOneChar); | ||
418 | check(r#"\"a"#, EscapeError::MoreThanOneChar); | ||
419 | check(r"\na", EscapeError::MoreThanOneChar); | ||
420 | check(r"\ra", EscapeError::MoreThanOneChar); | ||
421 | check(r"\ta", EscapeError::MoreThanOneChar); | ||
422 | check(r"\\a", EscapeError::MoreThanOneChar); | ||
423 | check(r"\'a", EscapeError::MoreThanOneChar); | ||
424 | check(r"\0a", EscapeError::MoreThanOneChar); | ||
425 | |||
426 | check(r"\v", EscapeError::InvalidEscape); | ||
427 | check(r"\💩", EscapeError::InvalidEscape); | ||
428 | check(r"\●", EscapeError::InvalidEscape); | ||
429 | |||
430 | check(r"\x", EscapeError::TooShortHexEscape); | ||
431 | check(r"\x0", EscapeError::TooShortHexEscape); | ||
432 | check(r"\xa", EscapeError::TooShortHexEscape); | ||
433 | check(r"\xf", EscapeError::TooShortHexEscape); | ||
434 | check(r"\xx", EscapeError::InvalidCharInHexEscape); | ||
435 | check(r"\xы", EscapeError::InvalidCharInHexEscape); | ||
436 | check(r"\x🦀", EscapeError::InvalidCharInHexEscape); | ||
437 | check(r"\xtt", EscapeError::InvalidCharInHexEscape); | ||
438 | |||
439 | check(r"\u", EscapeError::NoBraceInUnicodeEscape); | ||
440 | check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape); | ||
441 | check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape); | ||
442 | check(r"\u{", EscapeError::UnclosedUnicodeEscape); | ||
443 | check(r"\u{0000", EscapeError::UnclosedUnicodeEscape); | ||
444 | check(r"\u{}", EscapeError::EmptyUnicodeEscape); | ||
445 | check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape); | ||
446 | check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape); | ||
447 | |||
448 | check("ы", EscapeError::NonAsciiCharInByte); | ||
449 | check("🦀", EscapeError::NonAsciiCharInByte); | ||
450 | |||
451 | check(r"\u{0}", EscapeError::UnicodeEscapeInByte); | ||
452 | check(r"\u{000000}", EscapeError::UnicodeEscapeInByte); | ||
453 | check(r"\u{41}", EscapeError::UnicodeEscapeInByte); | ||
454 | check(r"\u{0041}", EscapeError::UnicodeEscapeInByte); | ||
455 | check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte); | ||
456 | check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte); | ||
457 | check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte); | ||
458 | check(r"\u{0}x", EscapeError::UnicodeEscapeInByte); | ||
459 | check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte); | ||
460 | check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte); | ||
461 | check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); | ||
462 | check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte); | ||
463 | check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte); | ||
464 | check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte); | ||
465 | check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte); | ||
466 | check(r"\u{D800}", EscapeError::UnicodeEscapeInByte); | ||
467 | check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte); | ||
468 | check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte); | ||
469 | } | ||
470 | |||
471 | #[test] | ||
472 | fn test_unescape_byte_good() { | ||
473 | fn check(literal_text: &str, expected_byte: u8) { | ||
474 | let actual_result = unescape_byte(literal_text); | ||
475 | assert_eq!(actual_result, Ok(expected_byte)); | ||
476 | } | ||
477 | |||
478 | check("a", b'a'); | ||
479 | |||
480 | check(r#"\""#, b'"'); | ||
481 | check(r"\n", b'\n'); | ||
482 | check(r"\r", b'\r'); | ||
483 | check(r"\t", b'\t'); | ||
484 | check(r"\\", b'\\'); | ||
485 | check(r"\'", b'\''); | ||
486 | check(r"\0", b'\0'); | ||
487 | |||
488 | check(r"\x00", b'\0'); | ||
489 | check(r"\x5a", b'Z'); | ||
490 | check(r"\x5A", b'Z'); | ||
491 | check(r"\x7f", 127); | ||
492 | check(r"\x80", 128); | ||
493 | check(r"\xff", 255); | ||
494 | check(r"\xFF", 255); | ||
495 | } | ||
496 | |||
497 | #[test] | ||
498 | fn test_unescape_byte_str_good() { | ||
499 | fn check(literal_text: &str, expected: &[u8]) { | ||
500 | let mut buf = Ok(Vec::with_capacity(literal_text.len())); | ||
501 | unescape_byte_str(literal_text, &mut |range, c| { | ||
502 | if let Ok(b) = &mut buf { | ||
503 | match c { | ||
504 | Ok(c) => b.push(c), | ||
505 | Err(e) => buf = Err((range, e)), | ||
506 | } | ||
507 | } | ||
508 | }); | ||
509 | let buf = buf.as_ref().map(|it| it.as_ref()); | ||
510 | assert_eq!(buf, Ok(expected)) | ||
511 | } | ||
512 | |||
513 | check("foo", b"foo"); | ||
514 | check("", b""); | ||
515 | check(" \t\n\r\n", b" \t\n\n"); | ||
516 | |||
517 | check("hello \\\n world", b"hello world"); | ||
518 | check("hello \\\r\n world", b"hello world"); | ||
519 | check("thread's", b"thread's") | ||
520 | } | ||
521 | } | ||