aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_syntax/src')
-rw-r--r--crates/ra_syntax/src/lib.rs2
-rw-r--r--crates/ra_syntax/src/tests.rs101
-rw-r--r--crates/ra_syntax/src/validation.rs91
-rw-r--r--crates/ra_syntax/src/validation/unescape.rs521
4 files changed, 190 insertions, 525 deletions
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs
index 7f69b86e1..4c4e0580a 100644
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@@ -24,6 +24,8 @@ mod syntax_error;
24mod parsing; 24mod parsing;
25mod validation; 25mod validation;
26mod ptr; 26mod ptr;
27#[cfg(test)]
28mod tests;
27 29
28pub mod algo; 30pub mod algo;
29pub mod ast; 31pub mod ast;
diff --git a/crates/ra_syntax/src/tests.rs b/crates/ra_syntax/src/tests.rs
new file mode 100644
index 000000000..fa5d2d5d8
--- /dev/null
+++ b/crates/ra_syntax/src/tests.rs
@@ -0,0 +1,101 @@
1use std::{
2 fmt::Write,
3 path::{Component, PathBuf},
4};
5
6use test_utils::{collect_tests, dir_tests, project_dir, read_text};
7
8use crate::{fuzz, SourceFile};
9
10#[test]
11fn lexer_tests() {
12 dir_tests(&test_data_dir(), &["lexer"], |text, _| {
13 let tokens = crate::tokenize(text);
14 dump_tokens(&tokens, text)
15 })
16}
17
18#[test]
19fn parser_tests() {
20 dir_tests(&test_data_dir(), &["parser/inline/ok", "parser/ok"], |text, path| {
21 let parse = SourceFile::parse(text);
22 let errors = parse.errors();
23 assert_eq!(
24 errors,
25 &[] as &[crate::SyntaxError],
26 "There should be no errors in the file {:?}",
27 path.display(),
28 );
29 parse.debug_dump()
30 });
31 dir_tests(&test_data_dir(), &["parser/err", "parser/inline/err"], |text, path| {
32 let parse = SourceFile::parse(text);
33 let errors = parse.errors();
34 assert!(!errors.is_empty(), "There should be errors in the file {:?}", path.display());
35 parse.debug_dump()
36 });
37}
38
39#[test]
40fn parser_fuzz_tests() {
41 for (_, text) in collect_tests(&test_data_dir(), &["parser/fuzz-failures"]) {
42 fuzz::check_parser(&text)
43 }
44}
45
46#[test]
47fn reparse_fuzz_tests() {
48 for (_, text) in collect_tests(&test_data_dir(), &["reparse/fuzz-failures"]) {
49 let check = fuzz::CheckReparse::from_data(text.as_bytes()).unwrap();
50 println!("{:?}", check);
51 check.run();
52 }
53}
54
55/// Test that Rust-analyzer can parse and validate the rust-analyzer
56/// FIXME: Use this as a benchmark
57#[test]
58fn self_hosting_parsing() {
59 use std::ffi::OsStr;
60 let dir = project_dir().join("crates");
61 let mut count = 0;
62 for entry in walkdir::WalkDir::new(dir)
63 .into_iter()
64 .filter_entry(|entry| {
65 !entry.path().components().any(|component| {
66 // Get all files which are not in the crates/ra_syntax/tests/data folder
67 component == Component::Normal(OsStr::new("test_data"))
68 })
69 })
70 .map(|e| e.unwrap())
71 .filter(|entry| {
72 // Get all `.rs ` files
73 !entry.path().is_dir() && (entry.path().extension() == Some(OsStr::new("rs")))
74 })
75 {
76 count += 1;
77 let text = read_text(entry.path());
78 SourceFile::parse(&text).ok().expect("There should be no errors in the file");
79 }
80 assert!(
81 count > 30,
82 "self_hosting_parsing found too few files - is it running in the right directory?"
83 )
84}
85
86fn test_data_dir() -> PathBuf {
87 project_dir().join("crates/ra_syntax/test_data")
88}
89
90fn dump_tokens(tokens: &[crate::Token], text: &str) -> String {
91 let mut acc = String::new();
92 let mut offset = 0;
93 for token in tokens {
94 let len: u32 = token.len.into();
95 let len = len as usize;
96 let token_text = &text[offset..offset + len];
97 offset += len;
98 write!(acc, "{:?} {} {:?}\n", token.kind, token.len, token_text).unwrap()
99 }
100 acc
101}
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs
index e03c02d1b..1f904434e 100644
--- a/crates/ra_syntax/src/validation.rs
+++ b/crates/ra_syntax/src/validation.rs
@@ -1,16 +1,99 @@
1mod unescape;
2
3mod block; 1mod block;
4mod field_expr; 2mod field_expr;
5 3
4use ra_rustc_lexer::unescape;
5
6use crate::{ 6use crate::{
7 algo::visit::{visitor_ctx, VisitorCtx}, 7 algo::visit::{visitor_ctx, VisitorCtx},
8 ast, SyntaxError, 8 ast, SyntaxError, SyntaxErrorKind,
9 SyntaxKind::{BYTE, BYTE_STRING, CHAR, STRING}, 9 SyntaxKind::{BYTE, BYTE_STRING, CHAR, STRING},
10 SyntaxNode, TextUnit, T, 10 SyntaxNode, TextUnit, T,
11}; 11};
12 12
13pub(crate) use unescape::EscapeError; 13#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
14pub enum EscapeError {
15 ZeroChars,
16 MoreThanOneChar,
17 LoneSlash,
18 InvalidEscape,
19 BareCarriageReturn,
20 EscapeOnlyChar,
21 TooShortHexEscape,
22 InvalidCharInHexEscape,
23 OutOfRangeHexEscape,
24 NoBraceInUnicodeEscape,
25 InvalidCharInUnicodeEscape,
26 EmptyUnicodeEscape,
27 UnclosedUnicodeEscape,
28 LeadingUnderscoreUnicodeEscape,
29 OverlongUnicodeEscape,
30 LoneSurrogateUnicodeEscape,
31 OutOfRangeUnicodeEscape,
32 UnicodeEscapeInByte,
33 NonAsciiCharInByte,
34}
35
36impl From<ra_rustc_lexer::unescape::EscapeError> for EscapeError {
37 fn from(err: ra_rustc_lexer::unescape::EscapeError) -> Self {
38 match err {
39 ra_rustc_lexer::unescape::EscapeError::ZeroChars => EscapeError::ZeroChars,
40 ra_rustc_lexer::unescape::EscapeError::MoreThanOneChar => EscapeError::MoreThanOneChar,
41 ra_rustc_lexer::unescape::EscapeError::LoneSlash => EscapeError::LoneSlash,
42 ra_rustc_lexer::unescape::EscapeError::InvalidEscape => EscapeError::InvalidEscape,
43 ra_rustc_lexer::unescape::EscapeError::BareCarriageReturn
44 | ra_rustc_lexer::unescape::EscapeError::BareCarriageReturnInRawString => {
45 EscapeError::BareCarriageReturn
46 }
47 ra_rustc_lexer::unescape::EscapeError::EscapeOnlyChar => EscapeError::EscapeOnlyChar,
48 ra_rustc_lexer::unescape::EscapeError::TooShortHexEscape => {
49 EscapeError::TooShortHexEscape
50 }
51 ra_rustc_lexer::unescape::EscapeError::InvalidCharInHexEscape => {
52 EscapeError::InvalidCharInHexEscape
53 }
54 ra_rustc_lexer::unescape::EscapeError::OutOfRangeHexEscape => {
55 EscapeError::OutOfRangeHexEscape
56 }
57 ra_rustc_lexer::unescape::EscapeError::NoBraceInUnicodeEscape => {
58 EscapeError::NoBraceInUnicodeEscape
59 }
60 ra_rustc_lexer::unescape::EscapeError::InvalidCharInUnicodeEscape => {
61 EscapeError::InvalidCharInUnicodeEscape
62 }
63 ra_rustc_lexer::unescape::EscapeError::EmptyUnicodeEscape => {
64 EscapeError::EmptyUnicodeEscape
65 }
66 ra_rustc_lexer::unescape::EscapeError::UnclosedUnicodeEscape => {
67 EscapeError::UnclosedUnicodeEscape
68 }
69 ra_rustc_lexer::unescape::EscapeError::LeadingUnderscoreUnicodeEscape => {
70 EscapeError::LeadingUnderscoreUnicodeEscape
71 }
72 ra_rustc_lexer::unescape::EscapeError::OverlongUnicodeEscape => {
73 EscapeError::OverlongUnicodeEscape
74 }
75 ra_rustc_lexer::unescape::EscapeError::LoneSurrogateUnicodeEscape => {
76 EscapeError::LoneSurrogateUnicodeEscape
77 }
78 ra_rustc_lexer::unescape::EscapeError::OutOfRangeUnicodeEscape => {
79 EscapeError::OutOfRangeUnicodeEscape
80 }
81 ra_rustc_lexer::unescape::EscapeError::UnicodeEscapeInByte => {
82 EscapeError::UnicodeEscapeInByte
83 }
84 ra_rustc_lexer::unescape::EscapeError::NonAsciiCharInByte
85 | ra_rustc_lexer::unescape::EscapeError::NonAsciiCharInByteString => {
86 EscapeError::NonAsciiCharInByte
87 }
88 }
89 }
90}
91
92impl From<ra_rustc_lexer::unescape::EscapeError> for SyntaxErrorKind {
93 fn from(err: ra_rustc_lexer::unescape::EscapeError) -> Self {
94 SyntaxErrorKind::EscapeError(err.into())
95 }
96}
14 97
15pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> { 98pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
16 let mut errors = Vec::new(); 99 let mut errors = Vec::new();
diff --git a/crates/ra_syntax/src/validation/unescape.rs b/crates/ra_syntax/src/validation/unescape.rs
deleted file mode 100644
index 7eed6c663..000000000
--- a/crates/ra_syntax/src/validation/unescape.rs
+++ /dev/null
@@ -1,521 +0,0 @@
1//! Utilities for validating string and char literals and turning them into
2//! values they represent.
3//!
4//! This file is copy-pasted from the compiler
5//!
6//! https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs
7//!
8//! Hopefully, we'll share this code in a proper way some day
9
10use std::ops::Range;
11use std::str::Chars;
12
13#[derive(Debug, PartialEq, Eq, Clone, Hash)]
14pub enum EscapeError {
15 ZeroChars,
16 MoreThanOneChar,
17
18 LoneSlash,
19 InvalidEscape,
20 BareCarriageReturn,
21 EscapeOnlyChar,
22
23 TooShortHexEscape,
24 InvalidCharInHexEscape,
25 OutOfRangeHexEscape,
26
27 NoBraceInUnicodeEscape,
28 InvalidCharInUnicodeEscape,
29 EmptyUnicodeEscape,
30 UnclosedUnicodeEscape,
31 LeadingUnderscoreUnicodeEscape,
32 OverlongUnicodeEscape,
33 LoneSurrogateUnicodeEscape,
34 OutOfRangeUnicodeEscape,
35
36 UnicodeEscapeInByte,
37 NonAsciiCharInByte,
38}
39
40/// Takes a contents of a char literal (without quotes), and returns an
41/// unescaped char or an error
42pub(crate) fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
43 let mut chars = literal_text.chars();
44 unescape_char_or_byte(&mut chars, Mode::Char)
45 .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
46}
47
48/// Takes a contents of a string literal (without quotes) and produces a
49/// sequence of escaped characters or errors.
50pub(crate) fn unescape_str<F>(literal_text: &str, callback: &mut F)
51where
52 F: FnMut(Range<usize>, Result<char, EscapeError>),
53{
54 unescape_str_or_byte_str(literal_text, Mode::Str, callback)
55}
56
57pub(crate) fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
58 let mut chars = literal_text.chars();
59 unescape_char_or_byte(&mut chars, Mode::Byte)
60 .map(byte_from_char)
61 .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
62}
63
64/// Takes a contents of a string literal (without quotes) and produces a
65/// sequence of escaped characters or errors.
66pub(crate) fn unescape_byte_str<F>(literal_text: &str, callback: &mut F)
67where
68 F: FnMut(Range<usize>, Result<u8, EscapeError>),
69{
70 unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
71 callback(range, char.map(byte_from_char))
72 })
73}
74
75#[derive(Debug, Clone, Copy)]
76pub(crate) enum Mode {
77 Char,
78 Str,
79 Byte,
80 ByteStr,
81}
82
83impl Mode {
84 fn in_single_quotes(self) -> bool {
85 match self {
86 Mode::Char | Mode::Byte => true,
87 Mode::Str | Mode::ByteStr => false,
88 }
89 }
90
91 pub(crate) fn in_double_quotes(self) -> bool {
92 !self.in_single_quotes()
93 }
94
95 pub(crate) fn is_bytes(self) -> bool {
96 match self {
97 Mode::Byte | Mode::ByteStr => true,
98 Mode::Char | Mode::Str => false,
99 }
100 }
101}
102
103fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
104 if first_char != '\\' {
105 return match first_char {
106 '\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
107 '\r' => Err(if chars.clone().next() == Some('\n') {
108 EscapeError::EscapeOnlyChar
109 } else {
110 EscapeError::BareCarriageReturn
111 }),
112 '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
113 '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
114 _ => {
115 if mode.is_bytes() && !first_char.is_ascii() {
116 return Err(EscapeError::NonAsciiCharInByte);
117 }
118 Ok(first_char)
119 }
120 };
121 }
122
123 let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
124
125 let res = match second_char {
126 '"' => '"',
127 'n' => '\n',
128 'r' => '\r',
129 't' => '\t',
130 '\\' => '\\',
131 '\'' => '\'',
132 '0' => '\0',
133
134 'x' => {
135 let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
136 let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
137
138 let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
139 let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
140
141 let value = hi * 16 + lo;
142
143 if !mode.is_bytes() && !is_ascii(value) {
144 return Err(EscapeError::OutOfRangeHexEscape);
145 }
146 let value = value as u8;
147
148 value as char
149 }
150
151 'u' => {
152 if chars.next() != Some('{') {
153 return Err(EscapeError::NoBraceInUnicodeEscape);
154 }
155
156 let mut n_digits = 1;
157 let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
158 '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
159 '}' => return Err(EscapeError::EmptyUnicodeEscape),
160 c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
161 };
162
163 loop {
164 match chars.next() {
165 None => return Err(EscapeError::UnclosedUnicodeEscape),
166 Some('_') => continue,
167 Some('}') => {
168 if n_digits > 6 {
169 return Err(EscapeError::OverlongUnicodeEscape);
170 }
171 if mode.is_bytes() {
172 return Err(EscapeError::UnicodeEscapeInByte);
173 }
174
175 break std::char::from_u32(value).ok_or_else(|| {
176 if value > 0x0010_FFFF {
177 EscapeError::OutOfRangeUnicodeEscape
178 } else {
179 EscapeError::LoneSurrogateUnicodeEscape
180 }
181 })?;
182 }
183 Some(c) => {
184 let digit =
185 c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
186 n_digits += 1;
187 if n_digits > 6 {
188 continue;
189 }
190 let digit = digit as u32;
191 value = value * 16 + digit;
192 }
193 };
194 }
195 }
196 _ => return Err(EscapeError::InvalidEscape),
197 };
198 Ok(res)
199}
200
201fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
202 let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
203 let res = scan_escape(first_char, chars, mode)?;
204 if chars.next().is_some() {
205 return Err(EscapeError::MoreThanOneChar);
206 }
207 Ok(res)
208}
209
210/// Takes a contents of a string literal (without quotes) and produces a
211/// sequence of escaped characters or errors.
212fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
213where
214 F: FnMut(Range<usize>, Result<char, EscapeError>),
215{
216 assert!(mode.in_double_quotes());
217 let initial_len = src.len();
218 let mut chars = src.chars();
219 while let Some(first_char) = chars.next() {
220 let start = initial_len - chars.as_str().len() - first_char.len_utf8();
221
222 let unescaped_char = match first_char {
223 '\\' => {
224 let (second_char, third_char) = {
225 let mut chars = chars.clone();
226 (chars.next(), chars.next())
227 };
228 match (second_char, third_char) {
229 (Some('\n'), _) | (Some('\r'), Some('\n')) => {
230 skip_ascii_whitespace(&mut chars);
231 continue;
232 }
233 _ => scan_escape(first_char, &mut chars, mode),
234 }
235 }
236 '\r' => {
237 let second_char = chars.clone().next();
238 if second_char == Some('\n') {
239 chars.next();
240 Ok('\n')
241 } else {
242 scan_escape(first_char, &mut chars, mode)
243 }
244 }
245 '\n' => Ok('\n'),
246 '\t' => Ok('\t'),
247 _ => scan_escape(first_char, &mut chars, mode),
248 };
249 let end = initial_len - chars.as_str().len();
250 callback(start..end, unescaped_char);
251 }
252
253 fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
254 let str = chars.as_str();
255 let first_non_space = str
256 .bytes()
257 .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
258 .unwrap_or_else(|| str.len());
259 *chars = str[first_non_space..].chars()
260 }
261}
262
263fn byte_from_char(c: char) -> u8 {
264 let res = c as u32;
265 assert!(res <= u32::from(u8::max_value()), "guaranteed because of Mode::Byte");
266 res as u8
267}
268
269fn is_ascii(x: u32) -> bool {
270 x <= 0x7F
271}
272
273#[cfg(test)]
274mod tests {
275 use super::*;
276
277 #[test]
278 fn test_unescape_char_bad() {
279 fn check(literal_text: &str, expected_error: EscapeError) {
280 let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err);
281 assert_eq!(actual_result, Err(expected_error));
282 }
283
284 check("", EscapeError::ZeroChars);
285 check(r"\", EscapeError::LoneSlash);
286
287 check("\n", EscapeError::EscapeOnlyChar);
288 check("\r\n", EscapeError::EscapeOnlyChar);
289 check("\t", EscapeError::EscapeOnlyChar);
290 check("'", EscapeError::EscapeOnlyChar);
291 check("\r", EscapeError::BareCarriageReturn);
292
293 check("spam", EscapeError::MoreThanOneChar);
294 check(r"\x0ff", EscapeError::MoreThanOneChar);
295 check(r#"\"a"#, EscapeError::MoreThanOneChar);
296 check(r"\na", EscapeError::MoreThanOneChar);
297 check(r"\ra", EscapeError::MoreThanOneChar);
298 check(r"\ta", EscapeError::MoreThanOneChar);
299 check(r"\\a", EscapeError::MoreThanOneChar);
300 check(r"\'a", EscapeError::MoreThanOneChar);
301 check(r"\0a", EscapeError::MoreThanOneChar);
302 check(r"\u{0}x", EscapeError::MoreThanOneChar);
303 check(r"\u{1F63b}}", EscapeError::MoreThanOneChar);
304
305 check(r"\v", EscapeError::InvalidEscape);
306 check(r"\💩", EscapeError::InvalidEscape);
307 check(r"\●", EscapeError::InvalidEscape);
308
309 check(r"\x", EscapeError::TooShortHexEscape);
310 check(r"\x0", EscapeError::TooShortHexEscape);
311 check(r"\xf", EscapeError::TooShortHexEscape);
312 check(r"\xa", EscapeError::TooShortHexEscape);
313 check(r"\xx", EscapeError::InvalidCharInHexEscape);
314 check(r"\xы", EscapeError::InvalidCharInHexEscape);
315 check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
316 check(r"\xtt", EscapeError::InvalidCharInHexEscape);
317 check(r"\xff", EscapeError::OutOfRangeHexEscape);
318 check(r"\xFF", EscapeError::OutOfRangeHexEscape);
319 check(r"\x80", EscapeError::OutOfRangeHexEscape);
320
321 check(r"\u", EscapeError::NoBraceInUnicodeEscape);
322 check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
323 check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
324 check(r"\u{", EscapeError::UnclosedUnicodeEscape);
325 check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
326 check(r"\u{}", EscapeError::EmptyUnicodeEscape);
327 check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
328 check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
329 check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape);
330 check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
331 check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
332
333 check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape);
334 check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape);
335 check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape);
336
337 check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape);
338 check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape);
339 check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape);
340 }
341
342 #[test]
343 fn test_unescape_char_good() {
344 fn check(literal_text: &str, expected_char: char) {
345 let actual_result = unescape_char(literal_text);
346 assert_eq!(actual_result, Ok(expected_char));
347 }
348
349 check("a", 'a');
350 check("ы", 'ы');
351 check("🦀", '🦀');
352
353 check(r#"\""#, '"');
354 check(r"\n", '\n');
355 check(r"\r", '\r');
356 check(r"\t", '\t');
357 check(r"\\", '\\');
358 check(r"\'", '\'');
359 check(r"\0", '\0');
360
361 check(r"\x00", '\0');
362 check(r"\x5a", 'Z');
363 check(r"\x5A", 'Z');
364 check(r"\x7f", 127 as char);
365
366 check(r"\u{0}", '\0');
367 check(r"\u{000000}", '\0');
368 check(r"\u{41}", 'A');
369 check(r"\u{0041}", 'A');
370 check(r"\u{00_41}", 'A');
371 check(r"\u{4__1__}", 'A');
372 check(r"\u{1F63b}", '😻');
373 }
374
375 #[test]
376 fn test_unescape_str_good() {
377 fn check(literal_text: &str, expected: &str) {
378 let mut buf = Ok(String::with_capacity(literal_text.len()));
379 unescape_str(literal_text, &mut |range, c| {
380 if let Ok(b) = &mut buf {
381 match c {
382 Ok(c) => b.push(c),
383 Err(e) => buf = Err((range, e)),
384 }
385 }
386 });
387 let buf = buf.as_ref().map(|it| it.as_ref());
388 assert_eq!(buf, Ok(expected))
389 }
390
391 check("foo", "foo");
392 check("", "");
393 check(" \t\n\r\n", " \t\n\n");
394
395 check("hello \\\n world", "hello world");
396 check("hello \\\r\n world", "hello world");
397 check("thread's", "thread's")
398 }
399
400 #[test]
401 fn test_unescape_byte_bad() {
402 fn check(literal_text: &str, expected_error: EscapeError) {
403 let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err);
404 assert_eq!(actual_result, Err(expected_error));
405 }
406
407 check("", EscapeError::ZeroChars);
408 check(r"\", EscapeError::LoneSlash);
409
410 check("\n", EscapeError::EscapeOnlyChar);
411 check("\r\n", EscapeError::EscapeOnlyChar);
412 check("\t", EscapeError::EscapeOnlyChar);
413 check("'", EscapeError::EscapeOnlyChar);
414 check("\r", EscapeError::BareCarriageReturn);
415
416 check("spam", EscapeError::MoreThanOneChar);
417 check(r"\x0ff", EscapeError::MoreThanOneChar);
418 check(r#"\"a"#, EscapeError::MoreThanOneChar);
419 check(r"\na", EscapeError::MoreThanOneChar);
420 check(r"\ra", EscapeError::MoreThanOneChar);
421 check(r"\ta", EscapeError::MoreThanOneChar);
422 check(r"\\a", EscapeError::MoreThanOneChar);
423 check(r"\'a", EscapeError::MoreThanOneChar);
424 check(r"\0a", EscapeError::MoreThanOneChar);
425
426 check(r"\v", EscapeError::InvalidEscape);
427 check(r"\💩", EscapeError::InvalidEscape);
428 check(r"\●", EscapeError::InvalidEscape);
429
430 check(r"\x", EscapeError::TooShortHexEscape);
431 check(r"\x0", EscapeError::TooShortHexEscape);
432 check(r"\xa", EscapeError::TooShortHexEscape);
433 check(r"\xf", EscapeError::TooShortHexEscape);
434 check(r"\xx", EscapeError::InvalidCharInHexEscape);
435 check(r"\xы", EscapeError::InvalidCharInHexEscape);
436 check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
437 check(r"\xtt", EscapeError::InvalidCharInHexEscape);
438
439 check(r"\u", EscapeError::NoBraceInUnicodeEscape);
440 check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
441 check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
442 check(r"\u{", EscapeError::UnclosedUnicodeEscape);
443 check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
444 check(r"\u{}", EscapeError::EmptyUnicodeEscape);
445 check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
446 check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
447
448 check("ы", EscapeError::NonAsciiCharInByte);
449 check("🦀", EscapeError::NonAsciiCharInByte);
450
451 check(r"\u{0}", EscapeError::UnicodeEscapeInByte);
452 check(r"\u{000000}", EscapeError::UnicodeEscapeInByte);
453 check(r"\u{41}", EscapeError::UnicodeEscapeInByte);
454 check(r"\u{0041}", EscapeError::UnicodeEscapeInByte);
455 check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte);
456 check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte);
457 check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte);
458 check(r"\u{0}x", EscapeError::UnicodeEscapeInByte);
459 check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte);
460 check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte);
461 check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
462 check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
463 check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte);
464 check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte);
465 check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte);
466 check(r"\u{D800}", EscapeError::UnicodeEscapeInByte);
467 check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte);
468 check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte);
469 }
470
471 #[test]
472 fn test_unescape_byte_good() {
473 fn check(literal_text: &str, expected_byte: u8) {
474 let actual_result = unescape_byte(literal_text);
475 assert_eq!(actual_result, Ok(expected_byte));
476 }
477
478 check("a", b'a');
479
480 check(r#"\""#, b'"');
481 check(r"\n", b'\n');
482 check(r"\r", b'\r');
483 check(r"\t", b'\t');
484 check(r"\\", b'\\');
485 check(r"\'", b'\'');
486 check(r"\0", b'\0');
487
488 check(r"\x00", b'\0');
489 check(r"\x5a", b'Z');
490 check(r"\x5A", b'Z');
491 check(r"\x7f", 127);
492 check(r"\x80", 128);
493 check(r"\xff", 255);
494 check(r"\xFF", 255);
495 }
496
497 #[test]
498 fn test_unescape_byte_str_good() {
499 fn check(literal_text: &str, expected: &[u8]) {
500 let mut buf = Ok(Vec::with_capacity(literal_text.len()));
501 unescape_byte_str(literal_text, &mut |range, c| {
502 if let Ok(b) = &mut buf {
503 match c {
504 Ok(c) => b.push(c),
505 Err(e) => buf = Err((range, e)),
506 }
507 }
508 });
509 let buf = buf.as_ref().map(|it| it.as_ref());
510 assert_eq!(buf, Ok(expected))
511 }
512
513 check("foo", b"foo");
514 check("", b"");
515 check(" \t\n\r\n", b" \t\n\n");
516
517 check("hello \\\n world", b"hello world");
518 check("hello \\\r\n world", b"hello world");
519 check("thread's", b"thread's")
520 }
521}