switch to upstream unescape

author: Aleksey Kladov <[email protected]> 2019-07-24 09:47:28 +0100
committer: Aleksey Kladov <[email protected]> 2019-07-24 09:47:28 +0100
commit: 2473cb6a5cc2af6d703129adc01942b00c059810 (patch)
tree: 0b51a9d40266d0b89083199191573a8a4a194d99 /crates/ra_syntax/src/validation
parent: c79eea9fc1d7c3400031674b7ebb2b0671aa05e8 (diff)
1 files changed, 0 insertions, 521 deletions
diff --git a/crates/ra_syntax/src/validation/unescape.rs b/crates/ra_syntax/src/validation/unescape.rs
deleted file mode 100644
index 7eed6c663..000000000
--- a/crates/ra_syntax/src/validation/unescape.rs
+++ /dev/null
@@ -1,521 +0,0 @@
-//! Utilities for validating  string and char literals and turning them into
-//! values they represent.
-//!
-//! This file is copy-pasted from the compiler
-//!
-//! https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs
-//!
-//! Hopefully, we'll share this code in a proper way some day
-use std::ops::Range;
-use std::str::Chars;
-#[derive(Debug, PartialEq, Eq, Clone, Hash)]
-pub enum EscapeError {
-    ZeroChars,
-    MoreThanOneChar,
-    LoneSlash,
-    InvalidEscape,
-    BareCarriageReturn,
-    EscapeOnlyChar,
-    TooShortHexEscape,
-    InvalidCharInHexEscape,
-    OutOfRangeHexEscape,
-    NoBraceInUnicodeEscape,
-    InvalidCharInUnicodeEscape,
-    EmptyUnicodeEscape,
-    UnclosedUnicodeEscape,
-    LeadingUnderscoreUnicodeEscape,
-    OverlongUnicodeEscape,
-    LoneSurrogateUnicodeEscape,
-    OutOfRangeUnicodeEscape,
-    UnicodeEscapeInByte,
-    NonAsciiCharInByte,
-}
-/// Takes a contents of a char literal (without quotes), and returns an
-/// unescaped char or an error
-pub(crate) fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
-    let mut chars = literal_text.chars();
-    unescape_char_or_byte(&mut chars, Mode::Char)
-        .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
-}
-/// Takes a contents of a string literal (without quotes) and produces a
-/// sequence of escaped characters or errors.
-pub(crate) fn unescape_str<F>(literal_text: &str, callback: &mut F)
-where
-    F: FnMut(Range<usize>, Result<char, EscapeError>),
-{
-    unescape_str_or_byte_str(literal_text, Mode::Str, callback)
-}
-pub(crate) fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
-    let mut chars = literal_text.chars();
-    unescape_char_or_byte(&mut chars, Mode::Byte)
-        .map(byte_from_char)
-        .map_err(|err| (literal_text.len() - chars.as_str().len(), err))
-}
-/// Takes a contents of a string literal (without quotes) and produces a
-/// sequence of escaped characters or errors.
-pub(crate) fn unescape_byte_str<F>(literal_text: &str, callback: &mut F)
-where
-    F: FnMut(Range<usize>, Result<u8, EscapeError>),
-{
-    unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut |range, char| {
-        callback(range, char.map(byte_from_char))
-    })
-}
-#[derive(Debug, Clone, Copy)]
-pub(crate) enum Mode {
-    Char,
-    Str,
-    Byte,
-    ByteStr,
-}
-impl Mode {
-    fn in_single_quotes(self) -> bool {
-        match self {
-            Mode::Char | Mode::Byte => true,
-            Mode::Str | Mode::ByteStr => false,
-        }
-    }
-    pub(crate) fn in_double_quotes(self) -> bool {
-        !self.in_single_quotes()
-    }
-    pub(crate) fn is_bytes(self) -> bool {
-        match self {
-            Mode::Byte | Mode::ByteStr => true,
-            Mode::Char | Mode::Str => false,
-        }
-    }
-}
-fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
-    if first_char != '\\' {
-        return match first_char {
-            '\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
-            '\r' => Err(if chars.clone().next() == Some('\n') {
-                EscapeError::EscapeOnlyChar
-            } else {
-                EscapeError::BareCarriageReturn
-            }),
-            '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
-            '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
-            _ => {
-                if mode.is_bytes() && !first_char.is_ascii() {
-                    return Err(EscapeError::NonAsciiCharInByte);
-                }
-                Ok(first_char)
-            }
-        };
-    }
-    let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
-    let res = match second_char {
-        '"' => '"',
-        'n' => '\n',
-        'r' => '\r',
-        't' => '\t',
-        '\\' => '\\',
-        '\'' => '\'',
-        '0' => '\0',
-        'x' => {
-            let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
-            let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
-            let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
-            let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
-            let value = hi * 16 + lo;
-            if !mode.is_bytes() && !is_ascii(value) {
-                return Err(EscapeError::OutOfRangeHexEscape);
-            }
-            let value = value as u8;
-            value as char
-        }
-        'u' => {
-            if chars.next() != Some('{') {
-                return Err(EscapeError::NoBraceInUnicodeEscape);
-            }
-            let mut n_digits = 1;
-            let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
-                '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
-                '}' => return Err(EscapeError::EmptyUnicodeEscape),
-                c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
-            };
-            loop {
-                match chars.next() {
-                    None => return Err(EscapeError::UnclosedUnicodeEscape),
-                    Some('_') => continue,
-                    Some('}') => {
-                        if n_digits > 6 {
-                            return Err(EscapeError::OverlongUnicodeEscape);
-                        }
-                        if mode.is_bytes() {
-                            return Err(EscapeError::UnicodeEscapeInByte);
-                        }
-                        break std::char::from_u32(value).ok_or_else(|| {
-                            if value > 0x0010_FFFF {
-                                EscapeError::OutOfRangeUnicodeEscape
-                            } else {
-                                EscapeError::LoneSurrogateUnicodeEscape
-                            }
-                        })?;
-                    }
-                    Some(c) => {
-                        let digit =
-                            c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
-                        n_digits += 1;
-                        if n_digits > 6 {
-                            continue;
-                        }
-                        let digit = digit as u32;
-                        value = value * 16 + digit;
-                    }
-                };
-            }
-        }
-        _ => return Err(EscapeError::InvalidEscape),
-    };
-    Ok(res)
-}
-fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
-    let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
-    let res = scan_escape(first_char, chars, mode)?;
-    if chars.next().is_some() {
-        return Err(EscapeError::MoreThanOneChar);
-    }
-    Ok(res)
-}
-/// Takes a contents of a string literal (without quotes) and produces a
-/// sequence of escaped characters or errors.
-fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
-where
-    F: FnMut(Range<usize>, Result<char, EscapeError>),
-{
-    assert!(mode.in_double_quotes());
-    let initial_len = src.len();
-    let mut chars = src.chars();
-    while let Some(first_char) = chars.next() {
-        let start = initial_len - chars.as_str().len() - first_char.len_utf8();
-        let unescaped_char = match first_char {
-            '\\' => {
-                let (second_char, third_char) = {
-                    let mut chars = chars.clone();
-                    (chars.next(), chars.next())
-                };
-                match (second_char, third_char) {
-                    (Some('\n'), _) | (Some('\r'), Some('\n')) => {
-                        skip_ascii_whitespace(&mut chars);
-                        continue;
-                    }
-                    _ => scan_escape(first_char, &mut chars, mode),
-                }
-            }
-            '\r' => {
-                let second_char = chars.clone().next();
-                if second_char == Some('\n') {
-                    chars.next();
-                    Ok('\n')
-                } else {
-                    scan_escape(first_char, &mut chars, mode)
-                }
-            }
-            '\n' => Ok('\n'),
-            '\t' => Ok('\t'),
-            _ => scan_escape(first_char, &mut chars, mode),
-        };
-        let end = initial_len - chars.as_str().len();
-        callback(start..end, unescaped_char);
-    }
-    fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
-        let str = chars.as_str();
-        let first_non_space = str
-            .bytes()
-            .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
-            .unwrap_or_else(|| str.len());
-        *chars = str[first_non_space..].chars()
-    }
-}
-fn byte_from_char(c: char) -> u8 {
-    let res = c as u32;
-    assert!(res <= u32::from(u8::max_value()), "guaranteed because of Mode::Byte");
-    res as u8
-}
-fn is_ascii(x: u32) -> bool {
-    x <= 0x7F
-}
-#[cfg(test)]
-mod tests {
-    use super::*;
-    #[test]
-    fn test_unescape_char_bad() {
-        fn check(literal_text: &str, expected_error: EscapeError) {
-            let actual_result = unescape_char(literal_text).map_err(|(_offset, err)| err);
-            assert_eq!(actual_result, Err(expected_error));
-        }
-        check("", EscapeError::ZeroChars);
-        check(r"\", EscapeError::LoneSlash);
-        check("\n", EscapeError::EscapeOnlyChar);
-        check("\r\n", EscapeError::EscapeOnlyChar);
-        check("\t", EscapeError::EscapeOnlyChar);
-        check("'", EscapeError::EscapeOnlyChar);
-        check("\r", EscapeError::BareCarriageReturn);
-        check("spam", EscapeError::MoreThanOneChar);
-        check(r"\x0ff", EscapeError::MoreThanOneChar);
-        check(r#"\"a"#, EscapeError::MoreThanOneChar);
-        check(r"\na", EscapeError::MoreThanOneChar);
-        check(r"\ra", EscapeError::MoreThanOneChar);
-        check(r"\ta", EscapeError::MoreThanOneChar);
-        check(r"\\a", EscapeError::MoreThanOneChar);
-        check(r"\'a", EscapeError::MoreThanOneChar);
-        check(r"\0a", EscapeError::MoreThanOneChar);
-        check(r"\u{0}x", EscapeError::MoreThanOneChar);
-        check(r"\u{1F63b}}", EscapeError::MoreThanOneChar);
-        check(r"\v", EscapeError::InvalidEscape);
-        check(r"\💩", EscapeError::InvalidEscape);
-        check(r"\●", EscapeError::InvalidEscape);
-        check(r"\x", EscapeError::TooShortHexEscape);
-        check(r"\x0", EscapeError::TooShortHexEscape);
-        check(r"\xf", EscapeError::TooShortHexEscape);
-        check(r"\xa", EscapeError::TooShortHexEscape);
-        check(r"\xx", EscapeError::InvalidCharInHexEscape);
-        check(r"\xы", EscapeError::InvalidCharInHexEscape);
-        check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
-        check(r"\xtt", EscapeError::InvalidCharInHexEscape);
-        check(r"\xff", EscapeError::OutOfRangeHexEscape);
-        check(r"\xFF", EscapeError::OutOfRangeHexEscape);
-        check(r"\x80", EscapeError::OutOfRangeHexEscape);
-        check(r"\u", EscapeError::NoBraceInUnicodeEscape);
-        check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
-        check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
-        check(r"\u{", EscapeError::UnclosedUnicodeEscape);
-        check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
-        check(r"\u{}", EscapeError::EmptyUnicodeEscape);
-        check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
-        check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
-        check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape);
-        check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
-        check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
-        check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape);
-        check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape);
-        check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape);
-        check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape);
-        check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape);
-        check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape);
-    }
-    #[test]
-    fn test_unescape_char_good() {
-        fn check(literal_text: &str, expected_char: char) {
-            let actual_result = unescape_char(literal_text);
-            assert_eq!(actual_result, Ok(expected_char));
-        }
-        check("a", 'a');
-        check("ы", 'ы');
-        check("🦀", '🦀');
-        check(r#"\""#, '"');
-        check(r"\n", '\n');
-        check(r"\r", '\r');
-        check(r"\t", '\t');
-        check(r"\\", '\\');
-        check(r"\'", '\'');
-        check(r"\0", '\0');
-        check(r"\x00", '\0');
-        check(r"\x5a", 'Z');
-        check(r"\x5A", 'Z');
-        check(r"\x7f", 127 as char);
-        check(r"\u{0}", '\0');
-        check(r"\u{000000}", '\0');
-        check(r"\u{41}", 'A');
-        check(r"\u{0041}", 'A');
-        check(r"\u{00_41}", 'A');
-        check(r"\u{4__1__}", 'A');
-        check(r"\u{1F63b}", '😻');
-    }
-    #[test]
-    fn test_unescape_str_good() {
-        fn check(literal_text: &str, expected: &str) {
-            let mut buf = Ok(String::with_capacity(literal_text.len()));
-            unescape_str(literal_text, &mut |range, c| {
-                if let Ok(b) = &mut buf {
-                    match c {
-                        Ok(c) => b.push(c),
-                        Err(e) => buf = Err((range, e)),
-                    }
-                }
-            });
-            let buf = buf.as_ref().map(|it| it.as_ref());
-            assert_eq!(buf, Ok(expected))
-        }
-        check("foo", "foo");
-        check("", "");
-        check(" \t\n\r\n", " \t\n\n");
-        check("hello \\\n     world", "hello world");
-        check("hello \\\r\n     world", "hello world");
-        check("thread's", "thread's")
-    }
-    #[test]
-    fn test_unescape_byte_bad() {
-        fn check(literal_text: &str, expected_error: EscapeError) {
-            let actual_result = unescape_byte(literal_text).map_err(|(_offset, err)| err);
-            assert_eq!(actual_result, Err(expected_error));
-        }
-        check("", EscapeError::ZeroChars);
-        check(r"\", EscapeError::LoneSlash);
-        check("\n", EscapeError::EscapeOnlyChar);
-        check("\r\n", EscapeError::EscapeOnlyChar);
-        check("\t", EscapeError::EscapeOnlyChar);
-        check("'", EscapeError::EscapeOnlyChar);
-        check("\r", EscapeError::BareCarriageReturn);
-        check("spam", EscapeError::MoreThanOneChar);
-        check(r"\x0ff", EscapeError::MoreThanOneChar);
-        check(r#"\"a"#, EscapeError::MoreThanOneChar);
-        check(r"\na", EscapeError::MoreThanOneChar);
-        check(r"\ra", EscapeError::MoreThanOneChar);
-        check(r"\ta", EscapeError::MoreThanOneChar);
-        check(r"\\a", EscapeError::MoreThanOneChar);
-        check(r"\'a", EscapeError::MoreThanOneChar);
-        check(r"\0a", EscapeError::MoreThanOneChar);
-        check(r"\v", EscapeError::InvalidEscape);
-        check(r"\💩", EscapeError::InvalidEscape);
-        check(r"\●", EscapeError::InvalidEscape);
-        check(r"\x", EscapeError::TooShortHexEscape);
-        check(r"\x0", EscapeError::TooShortHexEscape);
-        check(r"\xa", EscapeError::TooShortHexEscape);
-        check(r"\xf", EscapeError::TooShortHexEscape);
-        check(r"\xx", EscapeError::InvalidCharInHexEscape);
-        check(r"\xы", EscapeError::InvalidCharInHexEscape);
-        check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
-        check(r"\xtt", EscapeError::InvalidCharInHexEscape);
-        check(r"\u", EscapeError::NoBraceInUnicodeEscape);
-        check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
-        check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
-        check(r"\u{", EscapeError::UnclosedUnicodeEscape);
-        check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
-        check(r"\u{}", EscapeError::EmptyUnicodeEscape);
-        check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
-        check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
-        check("ы", EscapeError::NonAsciiCharInByte);
-        check("🦀", EscapeError::NonAsciiCharInByte);
-        check(r"\u{0}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{000000}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{41}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{0041}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{0}x", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{D800}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte);
-        check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte);
-    }
-    #[test]
-    fn test_unescape_byte_good() {
-        fn check(literal_text: &str, expected_byte: u8) {
-            let actual_result = unescape_byte(literal_text);
-            assert_eq!(actual_result, Ok(expected_byte));
-        }
-        check("a", b'a');
-        check(r#"\""#, b'"');
-        check(r"\n", b'\n');
-        check(r"\r", b'\r');
-        check(r"\t", b'\t');
-        check(r"\\", b'\\');
-        check(r"\'", b'\'');
-        check(r"\0", b'\0');
-        check(r"\x00", b'\0');
-        check(r"\x5a", b'Z');
-        check(r"\x5A", b'Z');
-        check(r"\x7f", 127);
-        check(r"\x80", 128);
-        check(r"\xff", 255);
-        check(r"\xFF", 255);
-    }
-    #[test]
-    fn test_unescape_byte_str_good() {
-        fn check(literal_text: &str, expected: &[u8]) {
-            let mut buf = Ok(Vec::with_capacity(literal_text.len()));
-            unescape_byte_str(literal_text, &mut |range, c| {
-                if let Ok(b) = &mut buf {
-                    match c {
-                        Ok(c) => b.push(c),
-                        Err(e) => buf = Err((range, e)),
-                    }
-                }
-            });
-            let buf = buf.as_ref().map(|it| it.as_ref());
-            assert_eq!(buf, Ok(expected))
-        }
-        check("foo", b"foo");
-        check("", b"");
-        check(" \t\n\r\n", b" \t\n\n");
-        check("hello \\\n     world", b"hello world");
-        check("hello \\\r\n     world", b"hello world");
-        check("thread's", b"thread's")
-    }
-}
author	Aleksey Kladov <[email protected]>	2019-07-24 09:47:28 +0100
committer	Aleksey Kladov <[email protected]>	2019-07-24 09:47:28 +0100
commit	2473cb6a5cc2af6d703129adc01942b00c059810 (patch)
tree	0b51a9d40266d0b89083199191573a8a4a194d99 /crates/ra_syntax/src/validation
parent	c79eea9fc1d7c3400031674b7ebb2b0671aa05e8 (diff)

diff --git a/crates/ra_syntax/src/validation/unescape.rs b/crates/ra_syntax/src/validation/unescape.rs deleted file mode 100644 index 7eed6c663..000000000 --- a/crates/ra_syntax/src/validation/unescape.rs +++ /dev/null
@@ -1,521 +0,0 @@
1	//! Utilities for validating string and char literals and turning them into
2	//! values they represent.
3	//!
4	//! This file is copy-pasted from the compiler
5	//!
6	//! https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs
7	//!
8	//! Hopefully, we'll share this code in a proper way some day
9
10	use std::ops::Range;
11	use std::str::Chars;
12
13	#[derive(Debug, PartialEq, Eq, Clone, Hash)]
14	pub enum EscapeError {
15	ZeroChars,
16	MoreThanOneChar,
17
18	LoneSlash,
19	InvalidEscape,
20	BareCarriageReturn,
21	EscapeOnlyChar,
22
23	TooShortHexEscape,
24	InvalidCharInHexEscape,
25	OutOfRangeHexEscape,
26
27	NoBraceInUnicodeEscape,
28	InvalidCharInUnicodeEscape,
29	EmptyUnicodeEscape,
30	UnclosedUnicodeEscape,
31	LeadingUnderscoreUnicodeEscape,
32	OverlongUnicodeEscape,
33	LoneSurrogateUnicodeEscape,
34	OutOfRangeUnicodeEscape,
35
36	UnicodeEscapeInByte,
37	NonAsciiCharInByte,
38	}
39
40	/// Takes a contents of a char literal (without quotes), and returns an
41	/// unescaped char or an error
42	pub(crate) fn unescape_char(literal_text: &str) -> Result<char, (usize, EscapeError)> {
43	let mut chars = literal_text.chars();
44	unescape_char_or_byte(&mut chars, Mode::Char)
45	.map_err(\|err\| (literal_text.len() - chars.as_str().len(), err))
46	}
47
48	/// Takes a contents of a string literal (without quotes) and produces a
49	/// sequence of escaped characters or errors.
50	pub(crate) fn unescape_str<F>(literal_text: &str, callback: &mut F)
51	where
52	F: FnMut(Range<usize>, Result<char, EscapeError>),
53	{
54	unescape_str_or_byte_str(literal_text, Mode::Str, callback)
55	}
56
57	pub(crate) fn unescape_byte(literal_text: &str) -> Result<u8, (usize, EscapeError)> {
58	let mut chars = literal_text.chars();
59	unescape_char_or_byte(&mut chars, Mode::Byte)
60	.map(byte_from_char)
61	.map_err(\|err\| (literal_text.len() - chars.as_str().len(), err))
62	}
63
64	/// Takes a contents of a string literal (without quotes) and produces a
65	/// sequence of escaped characters or errors.
66	pub(crate) fn unescape_byte_str<F>(literal_text: &str, callback: &mut F)
67	where
68	F: FnMut(Range<usize>, Result<u8, EscapeError>),
69	{
70	unescape_str_or_byte_str(literal_text, Mode::ByteStr, &mut \|range, char\| {
71	callback(range, char.map(byte_from_char))
72	})
73	}
74
75	#[derive(Debug, Clone, Copy)]
76	pub(crate) enum Mode {
77	Char,
78	Str,
79	Byte,
80	ByteStr,
81	}
82
83	impl Mode {
84	fn in_single_quotes(self) -> bool {
85	match self {
86	Mode::Char \| Mode::Byte => true,
87	Mode::Str \| Mode::ByteStr => false,
88	}
89	}
90
91	pub(crate) fn in_double_quotes(self) -> bool {
92	!self.in_single_quotes()
93	}
94
95	pub(crate) fn is_bytes(self) -> bool {
96	match self {
97	Mode::Byte \| Mode::ByteStr => true,
98	Mode::Char \| Mode::Str => false,
99	}
100	}
101	}
102
103	fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
104	if first_char != '\\' {
105	return match first_char {
106	'\t' \| '\n' => Err(EscapeError::EscapeOnlyChar),
107	'\r' => Err(if chars.clone().next() == Some('\n') {
108	EscapeError::EscapeOnlyChar
109	} else {
110	EscapeError::BareCarriageReturn
111	}),
112	'\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
113	'"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
114	_ => {
115	if mode.is_bytes() && !first_char.is_ascii() {
116	return Err(EscapeError::NonAsciiCharInByte);
117	}
118	Ok(first_char)
119	}
120	};
121	}
122
123	let second_char = chars.next().ok_or(EscapeError::LoneSlash)?;
124
125	let res = match second_char {
126	'"' => '"',
127	'n' => '\n',
128	'r' => '\r',
129	't' => '\t',
130	'\\' => '\\',
131	'\'' => '\'',
132	'0' => '\0',
133
134	'x' => {
135	let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
136	let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
137
138	let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?;
139	let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?;
140
141	let value = hi * 16 + lo;
142
143	if !mode.is_bytes() && !is_ascii(value) {
144	return Err(EscapeError::OutOfRangeHexEscape);
145	}
146	let value = value as u8;
147
148	value as char
149	}
150
151	'u' => {
152	if chars.next() != Some('{') {
153	return Err(EscapeError::NoBraceInUnicodeEscape);
154	}
155
156	let mut n_digits = 1;
157	let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? {
158	'_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape),
159	'}' => return Err(EscapeError::EmptyUnicodeEscape),
160	c => c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?,
161	};
162
163	loop {
164	match chars.next() {
165	None => return Err(EscapeError::UnclosedUnicodeEscape),
166	Some('_') => continue,
167	Some('}') => {
168	if n_digits > 6 {
169	return Err(EscapeError::OverlongUnicodeEscape);
170	}
171	if mode.is_bytes() {
172	return Err(EscapeError::UnicodeEscapeInByte);
173	}
174
175	break std::char::from_u32(value).ok_or_else(\|\| {
176	if value > 0x0010_FFFF {
177	EscapeError::OutOfRangeUnicodeEscape
178	} else {
179	EscapeError::LoneSurrogateUnicodeEscape
180	}
181	})?;
182	}
183	Some(c) => {
184	let digit =
185	c.to_digit(16).ok_or(EscapeError::InvalidCharInUnicodeEscape)?;
186	n_digits += 1;
187	if n_digits > 6 {
188	continue;
189	}
190	let digit = digit as u32;
191	value = value * 16 + digit;
192	}
193	};
194	}
195	}
196	_ => return Err(EscapeError::InvalidEscape),
197	};
198	Ok(res)
199	}
200
201	fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result<char, EscapeError> {
202	let first_char = chars.next().ok_or(EscapeError::ZeroChars)?;
203	let res = scan_escape(first_char, chars, mode)?;
204	if chars.next().is_some() {
205	return Err(EscapeError::MoreThanOneChar);
206	}
207	Ok(res)
208	}
209
210	/// Takes a contents of a string literal (without quotes) and produces a
211	/// sequence of escaped characters or errors.
212	fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)
213	where
214	F: FnMut(Range<usize>, Result<char, EscapeError>),
215	{
216	assert!(mode.in_double_quotes());
217	let initial_len = src.len();
218	let mut chars = src.chars();
219	while let Some(first_char) = chars.next() {
220	let start = initial_len - chars.as_str().len() - first_char.len_utf8();
221
222	let unescaped_char = match first_char {
223	'\\' => {
224	let (second_char, third_char) = {
225	let mut chars = chars.clone();
226	(chars.next(), chars.next())
227	};
228	match (second_char, third_char) {
229	(Some('\n'), _) \| (Some('\r'), Some('\n')) => {
230	skip_ascii_whitespace(&mut chars);
231	continue;
232	}
233	_ => scan_escape(first_char, &mut chars, mode),
234	}
235	}
236	'\r' => {
237	let second_char = chars.clone().next();
238	if second_char == Some('\n') {
239	chars.next();
240	Ok('\n')
241	} else {
242	scan_escape(first_char, &mut chars, mode)
243	}
244	}
245	'\n' => Ok('\n'),
246	'\t' => Ok('\t'),
247	_ => scan_escape(first_char, &mut chars, mode),
248	};
249	let end = initial_len - chars.as_str().len();
250	callback(start..end, unescaped_char);
251	}
252
253	fn skip_ascii_whitespace(chars: &mut Chars<'_>) {
254	let str = chars.as_str();
255	let first_non_space = str
256	.bytes()
257	.position(\|b\| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r')
258	.unwrap_or_else(\|\| str.len());
259	*chars = str[first_non_space..].chars()
260	}
261	}
262
263	fn byte_from_char(c: char) -> u8 {
264	let res = c as u32;
265	assert!(res <= u32::from(u8::max_value()), "guaranteed because of Mode::Byte");
266	res as u8
267	}
268
269	fn is_ascii(x: u32) -> bool {
270	x <= 0x7F
271	}
272
273	#[cfg(test)]
274	mod tests {
275	use super::*;
276
277	#[test]
278	fn test_unescape_char_bad() {
279	fn check(literal_text: &str, expected_error: EscapeError) {
280	let actual_result = unescape_char(literal_text).map_err(\|(_offset, err)\| err);
281	assert_eq!(actual_result, Err(expected_error));
282	}
283
284	check("", EscapeError::ZeroChars);
285	check(r"\", EscapeError::LoneSlash);
286
287	check("\n", EscapeError::EscapeOnlyChar);
288	check("\r\n", EscapeError::EscapeOnlyChar);
289	check("\t", EscapeError::EscapeOnlyChar);
290	check("'", EscapeError::EscapeOnlyChar);
291	check("\r", EscapeError::BareCarriageReturn);
292
293	check("spam", EscapeError::MoreThanOneChar);
294	check(r"\x0ff", EscapeError::MoreThanOneChar);
295	check(r#"\"a"#, EscapeError::MoreThanOneChar);
296	check(r"\na", EscapeError::MoreThanOneChar);
297	check(r"\ra", EscapeError::MoreThanOneChar);
298	check(r"\ta", EscapeError::MoreThanOneChar);
299	check(r"\\a", EscapeError::MoreThanOneChar);
300	check(r"\'a", EscapeError::MoreThanOneChar);
301	check(r"\0a", EscapeError::MoreThanOneChar);
302	check(r"\u{0}x", EscapeError::MoreThanOneChar);
303	check(r"\u{1F63b}}", EscapeError::MoreThanOneChar);
304
305	check(r"\v", EscapeError::InvalidEscape);
306	check(r"\💩", EscapeError::InvalidEscape);
307	check(r"\●", EscapeError::InvalidEscape);
308
309	check(r"\x", EscapeError::TooShortHexEscape);
310	check(r"\x0", EscapeError::TooShortHexEscape);
311	check(r"\xf", EscapeError::TooShortHexEscape);
312	check(r"\xa", EscapeError::TooShortHexEscape);
313	check(r"\xx", EscapeError::InvalidCharInHexEscape);
314	check(r"\xы", EscapeError::InvalidCharInHexEscape);
315	check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
316	check(r"\xtt", EscapeError::InvalidCharInHexEscape);
317	check(r"\xff", EscapeError::OutOfRangeHexEscape);
318	check(r"\xFF", EscapeError::OutOfRangeHexEscape);
319	check(r"\x80", EscapeError::OutOfRangeHexEscape);
320
321	check(r"\u", EscapeError::NoBraceInUnicodeEscape);
322	check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
323	check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
324	check(r"\u{", EscapeError::UnclosedUnicodeEscape);
325	check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
326	check(r"\u{}", EscapeError::EmptyUnicodeEscape);
327	check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
328	check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
329	check(r"\u{FFFFFF}", EscapeError::OutOfRangeUnicodeEscape);
330	check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
331	check(r"\u{ffffff}", EscapeError::OutOfRangeUnicodeEscape);
332
333	check(r"\u{DC00}", EscapeError::LoneSurrogateUnicodeEscape);
334	check(r"\u{DDDD}", EscapeError::LoneSurrogateUnicodeEscape);
335	check(r"\u{DFFF}", EscapeError::LoneSurrogateUnicodeEscape);
336
337	check(r"\u{D800}", EscapeError::LoneSurrogateUnicodeEscape);
338	check(r"\u{DAAA}", EscapeError::LoneSurrogateUnicodeEscape);
339	check(r"\u{DBFF}", EscapeError::LoneSurrogateUnicodeEscape);
340	}
341
342	#[test]
343	fn test_unescape_char_good() {
344	fn check(literal_text: &str, expected_char: char) {
345	let actual_result = unescape_char(literal_text);
346	assert_eq!(actual_result, Ok(expected_char));
347	}
348
349	check("a", 'a');
350	check("ы", 'ы');
351	check("🦀", '🦀');
352
353	check(r#"\""#, '"');
354	check(r"\n", '\n');
355	check(r"\r", '\r');
356	check(r"\t", '\t');
357	check(r"\\", '\\');
358	check(r"\'", '\'');
359	check(r"\0", '\0');
360
361	check(r"\x00", '\0');
362	check(r"\x5a", 'Z');
363	check(r"\x5A", 'Z');
364	check(r"\x7f", 127 as char);
365
366	check(r"\u{0}", '\0');
367	check(r"\u{000000}", '\0');
368	check(r"\u{41}", 'A');
369	check(r"\u{0041}", 'A');
370	check(r"\u{00_41}", 'A');
371	check(r"\u{4__1__}", 'A');
372	check(r"\u{1F63b}", '😻');
373	}
374
375	#[test]
376	fn test_unescape_str_good() {
377	fn check(literal_text: &str, expected: &str) {
378	let mut buf = Ok(String::with_capacity(literal_text.len()));
379	unescape_str(literal_text, &mut \|range, c\| {
380	if let Ok(b) = &mut buf {
381	match c {
382	Ok(c) => b.push(c),
383	Err(e) => buf = Err((range, e)),
384	}
385	}
386	});
387	let buf = buf.as_ref().map(\|it\| it.as_ref());
388	assert_eq!(buf, Ok(expected))
389	}
390
391	check("foo", "foo");
392	check("", "");
393	check(" \t\n\r\n", " \t\n\n");
394
395	check("hello \\\n world", "hello world");
396	check("hello \\\r\n world", "hello world");
397	check("thread's", "thread's")
398	}
399
400	#[test]
401	fn test_unescape_byte_bad() {
402	fn check(literal_text: &str, expected_error: EscapeError) {
403	let actual_result = unescape_byte(literal_text).map_err(\|(_offset, err)\| err);
404	assert_eq!(actual_result, Err(expected_error));
405	}
406
407	check("", EscapeError::ZeroChars);
408	check(r"\", EscapeError::LoneSlash);
409
410	check("\n", EscapeError::EscapeOnlyChar);
411	check("\r\n", EscapeError::EscapeOnlyChar);
412	check("\t", EscapeError::EscapeOnlyChar);
413	check("'", EscapeError::EscapeOnlyChar);
414	check("\r", EscapeError::BareCarriageReturn);
415
416	check("spam", EscapeError::MoreThanOneChar);
417	check(r"\x0ff", EscapeError::MoreThanOneChar);
418	check(r#"\"a"#, EscapeError::MoreThanOneChar);
419	check(r"\na", EscapeError::MoreThanOneChar);
420	check(r"\ra", EscapeError::MoreThanOneChar);
421	check(r"\ta", EscapeError::MoreThanOneChar);
422	check(r"\\a", EscapeError::MoreThanOneChar);
423	check(r"\'a", EscapeError::MoreThanOneChar);
424	check(r"\0a", EscapeError::MoreThanOneChar);
425
426	check(r"\v", EscapeError::InvalidEscape);
427	check(r"\💩", EscapeError::InvalidEscape);
428	check(r"\●", EscapeError::InvalidEscape);
429
430	check(r"\x", EscapeError::TooShortHexEscape);
431	check(r"\x0", EscapeError::TooShortHexEscape);
432	check(r"\xa", EscapeError::TooShortHexEscape);
433	check(r"\xf", EscapeError::TooShortHexEscape);
434	check(r"\xx", EscapeError::InvalidCharInHexEscape);
435	check(r"\xы", EscapeError::InvalidCharInHexEscape);
436	check(r"\x🦀", EscapeError::InvalidCharInHexEscape);
437	check(r"\xtt", EscapeError::InvalidCharInHexEscape);
438
439	check(r"\u", EscapeError::NoBraceInUnicodeEscape);
440	check(r"\u[0123]", EscapeError::NoBraceInUnicodeEscape);
441	check(r"\u{0x}", EscapeError::InvalidCharInUnicodeEscape);
442	check(r"\u{", EscapeError::UnclosedUnicodeEscape);
443	check(r"\u{0000", EscapeError::UnclosedUnicodeEscape);
444	check(r"\u{}", EscapeError::EmptyUnicodeEscape);
445	check(r"\u{_0000}", EscapeError::LeadingUnderscoreUnicodeEscape);
446	check(r"\u{0000000}", EscapeError::OverlongUnicodeEscape);
447
448	check("ы", EscapeError::NonAsciiCharInByte);
449	check("🦀", EscapeError::NonAsciiCharInByte);
450
451	check(r"\u{0}", EscapeError::UnicodeEscapeInByte);
452	check(r"\u{000000}", EscapeError::UnicodeEscapeInByte);
453	check(r"\u{41}", EscapeError::UnicodeEscapeInByte);
454	check(r"\u{0041}", EscapeError::UnicodeEscapeInByte);
455	check(r"\u{00_41}", EscapeError::UnicodeEscapeInByte);
456	check(r"\u{4__1__}", EscapeError::UnicodeEscapeInByte);
457	check(r"\u{1F63b}", EscapeError::UnicodeEscapeInByte);
458	check(r"\u{0}x", EscapeError::UnicodeEscapeInByte);
459	check(r"\u{1F63b}}", EscapeError::UnicodeEscapeInByte);
460	check(r"\u{FFFFFF}", EscapeError::UnicodeEscapeInByte);
461	check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
462	check(r"\u{ffffff}", EscapeError::UnicodeEscapeInByte);
463	check(r"\u{DC00}", EscapeError::UnicodeEscapeInByte);
464	check(r"\u{DDDD}", EscapeError::UnicodeEscapeInByte);
465	check(r"\u{DFFF}", EscapeError::UnicodeEscapeInByte);
466	check(r"\u{D800}", EscapeError::UnicodeEscapeInByte);
467	check(r"\u{DAAA}", EscapeError::UnicodeEscapeInByte);
468	check(r"\u{DBFF}", EscapeError::UnicodeEscapeInByte);
469	}
470
471	#[test]
472	fn test_unescape_byte_good() {
473	fn check(literal_text: &str, expected_byte: u8) {
474	let actual_result = unescape_byte(literal_text);
475	assert_eq!(actual_result, Ok(expected_byte));
476	}
477
478	check("a", b'a');
479
480	check(r#"\""#, b'"');
481	check(r"\n", b'\n');
482	check(r"\r", b'\r');
483	check(r"\t", b'\t');
484	check(r"\\", b'\\');
485	check(r"\'", b'\'');
486	check(r"\0", b'\0');
487
488	check(r"\x00", b'\0');
489	check(r"\x5a", b'Z');
490	check(r"\x5A", b'Z');
491	check(r"\x7f", 127);
492	check(r"\x80", 128);
493	check(r"\xff", 255);
494	check(r"\xFF", 255);
495	}
496
497	#[test]
498	fn test_unescape_byte_str_good() {
499	fn check(literal_text: &str, expected: &[u8]) {
500	let mut buf = Ok(Vec::with_capacity(literal_text.len()));
501	unescape_byte_str(literal_text, &mut \|range, c\| {
502	if let Ok(b) = &mut buf {
503	match c {
504	Ok(c) => b.push(c),
505	Err(e) => buf = Err((range, e)),
506	}
507	}
508	});
509	let buf = buf.as_ref().map(\|it\| it.as_ref());
510	assert_eq!(buf, Ok(expected))
511	}
512
513	check("foo", b"foo");
514	check("", b"");
515	check(" \t\n\r\n", b" \t\n\n");
516
517	check("hello \\\n world", b"hello world");
518	check("hello \\\r\n world", b"hello world");
519	check("thread's", b"thread's")
520	}
521	}