5 files changed, 857 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs
new file mode 100644
index 000000000..43c0d7edd
--- /dev/null
+++ b/crates/ra_syntax/src/validation/byte.rs
@@ -0,0 +1,211 @@
+//! Validation of byte literals
+use crate::{
+    ast::{self, AstNode},
+    string_lexing::{self, CharComponentKind},
+    TextRange,
+    validation::char,
+    yellow::{
+        SyntaxError,
+        SyntaxErrorKind::*,
+    },
+};
+pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>) {
+    let literal_text = node.text();
+    let literal_range = node.syntax().range();
+    let mut components = string_lexing::parse_byte_literal(literal_text);
+    let mut len = 0;
+    for component in &mut components {
+        len += 1;
+        let text = &literal_text[component.range];
+        let range = component.range + literal_range.start();
+        validate_byte_component(text, component.kind, range, errors);
+    }
+    if !components.has_closing_quote {
+        errors.push(SyntaxError::new(UnclosedByte, literal_range));
+    }
+    if len == 0 {
+        errors.push(SyntaxError::new(EmptyByte, literal_range));
+    }
+    if len > 1 {
+        errors.push(SyntaxError::new(OverlongByte, literal_range));
+    }
+}
+pub(super) fn validate_byte_component(
+    text: &str,
+    kind: CharComponentKind,
+    range: TextRange,
+    errors: &mut Vec<SyntaxError>,
+) {
+    use self::CharComponentKind::*;
+    match kind {
+        AsciiEscape => validate_byte_escape(text, range, errors),
+        AsciiCodeEscape => validate_byte_code_escape(text, range, errors),
+        UnicodeEscape => errors.push(SyntaxError::new(UnicodeEscapeForbidden, range)),
+        CodePoint => {
+            let c = text
+                .chars()
+                .next()
+                .expect("Code points should be one character long");
+            // These bytes must always be escaped
+            if c == '\t' || c == '\r' || c == '\n' {
+                errors.push(SyntaxError::new(UnescapedByte, range));
+            }
+            // Only ASCII bytes are allowed
+            if c > 0x7F as char {
+                errors.push(SyntaxError::new(ByteOutOfRange, range));
+            }
+        }
+    }
+}
+fn validate_byte_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
+    if text.len() == 1 {
+        // Escape sequence consists only of leading `\`
+        errors.push(SyntaxError::new(EmptyByteEscape, range));
+    } else {
+        let escape_code = text.chars().skip(1).next().unwrap();
+        if !char::is_ascii_escape(escape_code) {
+            errors.push(SyntaxError::new(InvalidByteEscape, range));
+        }
+    }
+}
+fn validate_byte_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
+    // A ByteCodeEscape has 4 chars, example: `\xDD`
+    if text.len() < 4 {
+        errors.push(SyntaxError::new(TooShortByteCodeEscape, range));
+    } else {
+        assert!(
+            text.chars().count() == 4,
+            "ByteCodeEscape cannot be longer than 4 chars"
+        );
+        if u8::from_str_radix(&text[2..], 16).is_err() {
+            errors.push(SyntaxError::new(MalformedByteCodeEscape, range));
+        }
+    }
+}
+#[cfg(test)]
+mod test {
+    use crate::SourceFileNode;
+    fn build_file(literal: &str) -> SourceFileNode {
+        let src = format!("const C: u8 = b'{}';", literal);
+        SourceFileNode::parse(&src)
+    }
+    fn assert_valid_byte(literal: &str) {
+        let file = build_file(literal);
+        assert!(
+            file.errors().len() == 0,
+            "Errors for literal '{}': {:?}",
+            literal,
+            file.errors()
+        );
+    }
+    fn assert_invalid_byte(literal: &str) {
+        let file = build_file(literal);
+        assert!(file.errors().len() > 0);
+    }
+    #[test]
+    fn test_ansi_codepoints() {
+        for byte in 0..128 {
+            match byte {
+                b'\n' | b'\r' | b'\t' => assert_invalid_byte(&(byte as char).to_string()),
+                b'\'' | b'\\' => { /* Ignore character close and backslash */ }
+                _ => assert_valid_byte(&(byte as char).to_string()),
+            }
+        }
+        for byte in 128..=255u8 {
+            assert_invalid_byte(&(byte as char).to_string());
+        }
+    }
+    #[test]
+    fn test_unicode_codepoints() {
+        let invalid = ["Ƒ", "バ", "メ", "﷽"];
+        for c in &invalid {
+            assert_invalid_byte(c);
+        }
+    }
+    #[test]
+    fn test_unicode_multiple_codepoints() {
+        let invalid = ["नी", "👨‍👨‍"];
+        for c in &invalid {
+            assert_invalid_byte(c);
+        }
+    }
+    #[test]
+    fn test_valid_byte_escape() {
+        let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
+        for c in &valid {
+            assert_valid_byte(c);
+        }
+    }
+    #[test]
+    fn test_invalid_byte_escape() {
+        let invalid = [r"\a", r"\?", r"\"];
+        for c in &invalid {
+            assert_invalid_byte(c);
+        }
+    }
+    #[test]
+    fn test_valid_byte_code_escape() {
+        let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
+        for c in &valid {
+            assert_valid_byte(c);
+        }
+    }
+    #[test]
+    fn test_invalid_byte_code_escape() {
+        let invalid = [r"\x", r"\x7"];
+        for c in &invalid {
+            assert_invalid_byte(c);
+        }
+    }
+    #[test]
+    fn test_invalid_unicode_escape() {
+        let well_formed = [
+            r"\u{FF}",
+            r"\u{0}",
+            r"\u{F}",
+            r"\u{10FFFF}",
+            r"\u{1_0__FF___FF_____}",
+        ];
+        for c in &well_formed {
+            assert_invalid_byte(c);
+        }
+        let invalid = [
+            r"\u",
+            r"\u{}",
+            r"\u{",
+            r"\u{FF",
+            r"\u{FFFFFF}",
+            r"\u{_F}",
+            r"\u{00FFFFF}",
+            r"\u{110000}",
+        ];
+        for c in &invalid {
+            assert_invalid_byte(c);
+        }
+    }
+}
diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs
new file mode 100644
index 000000000..7b830e97c
--- /dev/null
+++ b/crates/ra_syntax/src/validation/byte_string.rs
@@ -0,0 +1,178 @@
+use crate::{
+    ast::{self, AstNode},
+    string_lexing::{self, StringComponentKind},
+    yellow::{
+        SyntaxError,
+        SyntaxErrorKind::*,
+    },
+};
+use super::byte;
+pub(crate) fn validate_byte_string_node(node: ast::ByteString, errors: &mut Vec<SyntaxError>) {
+    let literal_text = node.text();
+    let literal_range = node.syntax().range();
+    let mut components = string_lexing::parse_byte_string_literal(literal_text);
+    for component in &mut components {
+        let range = component.range + literal_range.start();
+        match component.kind {
+            StringComponentKind::Char(kind) => {
+                // Chars must escape \t, \n and \r codepoints, but strings don't
+                let text = &literal_text[component.range];
+                match text {
+                    "\t" | "\n" | "\r" => { /* always valid */ }
+                    _ => byte::validate_byte_component(text, kind, range, errors),
+                }
+            }
+            StringComponentKind::IgnoreNewline => { /* always valid */ }
+        }
+    }
+    if !components.has_closing_quote {
+        errors.push(SyntaxError::new(UnclosedString, literal_range));
+    }
+}
+#[cfg(test)]
+mod test {
+    use crate::SourceFileNode;
+    fn build_file(literal: &str) -> SourceFileNode {
+        let src = format!(r#"const S: &'static [u8] = b"{}";"#, literal);
+        println!("Source: {}", src);
+        SourceFileNode::parse(&src)
+    }
+    fn assert_valid_str(literal: &str) {
+        let file = build_file(literal);
+        assert!(
+            file.errors().len() == 0,
+            "Errors for literal '{}': {:?}",
+            literal,
+            file.errors()
+        );
+    }
+    fn assert_invalid_str(literal: &str) {
+        let file = build_file(literal);
+        assert!(file.errors().len() > 0);
+    }
+    #[test]
+    fn test_ansi_codepoints() {
+        for byte in 0..128 {
+            match byte {
+                b'\"' | b'\\' => { /* Ignore string close and backslash */ }
+                _ => assert_valid_str(&(byte as char).to_string()),
+            }
+        }
+        for byte in 128..=255u8 {
+            assert_invalid_str(&(byte as char).to_string());
+        }
+    }
+    #[test]
+    fn test_unicode_codepoints() {
+        let invalid = ["Ƒ", "バ", "メ", "﷽"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_unicode_multiple_codepoints() {
+        let invalid = ["नी", "👨‍👨‍"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_escape() {
+        let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_escape() {
+        let invalid = [r"\a", r"\?", r"\"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_code_escape() {
+        let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_code_escape() {
+        let invalid = [r"\x", r"\x7"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_unicode_escape() {
+        let well_formed = [
+            r"\u{FF}",
+            r"\u{0}",
+            r"\u{F}",
+            r"\u{10FFFF}",
+            r"\u{1_0__FF___FF_____}",
+        ];
+        for c in &well_formed {
+            assert_invalid_str(c);
+        }
+        let invalid = [
+            r"\u",
+            r"\u{}",
+            r"\u{",
+            r"\u{FF",
+            r"\u{FFFFFF}",
+            r"\u{_F}",
+            r"\u{00FFFFF}",
+            r"\u{110000}",
+        ];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_mixed_invalid() {
+        assert_invalid_str(
+            r"This is the tale of a string
+with a newline in between, some emoji (👨‍👨‍) here and there,
+unicode escapes like this: \u{1FFBB} and weird stuff like
+this ﷽",
+        );
+    }
+    #[test]
+    fn test_mixed_valid() {
+        assert_valid_str(
+            r"This is the tale of a string
+with a newline in between, no emoji at all,
+nor unicode escapes or weird stuff",
+        );
+    }
+    #[test]
+    fn test_ignore_newline() {
+        assert_valid_str(
+            "Hello \
+             World",
+        );
+    }
+}
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
new file mode 100644
index 000000000..4728c85e6
--- /dev/null
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -0,0 +1,276 @@
+//! Validation of char literals
+use std::u32;
+use arrayvec::ArrayString;
+use crate::{
+    ast::{self, AstNode},
+    string_lexing::{self, CharComponentKind},
+    TextRange,
+    yellow::{
+        SyntaxError,
+        SyntaxErrorKind::*,
+    },
+};
+pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
+    let literal_text = node.text();
+    let literal_range = node.syntax().range();
+    let mut components = string_lexing::parse_char_literal(literal_text);
+    let mut len = 0;
+    for component in &mut components {
+        len += 1;
+        let text = &literal_text[component.range];
+        let range = component.range + literal_range.start();
+        validate_char_component(text, component.kind, range, errors);
+    }
+    if !components.has_closing_quote {
+        errors.push(SyntaxError::new(UnclosedChar, literal_range));
+    }
+    if len == 0 {
+        errors.push(SyntaxError::new(EmptyChar, literal_range));
+    }
+    if len > 1 {
+        errors.push(SyntaxError::new(OverlongChar, literal_range));
+    }
+}
+pub(super) fn validate_char_component(
+    text: &str,
+    kind: CharComponentKind,
+    range: TextRange,
+    errors: &mut Vec<SyntaxError>,
+) {
+    // Validate escapes
+    use self::CharComponentKind::*;
+    match kind {
+        AsciiEscape => validate_ascii_escape(text, range, errors),
+        AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
+        UnicodeEscape => validate_unicode_escape(text, range, errors),
+        CodePoint => {
+            // These code points must always be escaped
+            if text == "\t" || text == "\r" || text == "\n" {
+                errors.push(SyntaxError::new(UnescapedCodepoint, range));
+            }
+        }
+    }
+}
+fn validate_ascii_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
+    if text.len() == 1 {
+        // Escape sequence consists only of leading `\`
+        errors.push(SyntaxError::new(EmptyAsciiEscape, range));
+    } else {
+        let escape_code = text.chars().skip(1).next().unwrap();
+        if !is_ascii_escape(escape_code) {
+            errors.push(SyntaxError::new(InvalidAsciiEscape, range));
+        }
+    }
+}
+pub(super) fn is_ascii_escape(code: char) -> bool {
+    match code {
+        '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
+        _ => false,
+    }
+}
+fn validate_ascii_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
+    // An AsciiCodeEscape has 4 chars, example: `\xDD`
+    if text.len() < 4 {
+        errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
+    } else {
+        assert!(
+            text.chars().count() == 4,
+            "AsciiCodeEscape cannot be longer than 4 chars"
+        );
+        match u8::from_str_radix(&text[2..], 16) {
+            Ok(code) if code < 128 => { /* Escape code is valid */ }
+            Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
+            Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
+        }
+    }
+}
+fn validate_unicode_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
+    assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
+    if text.len() == 2 {
+        // No starting `{`
+        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+        return;
+    }
+    if text.len() == 3 {
+        // Only starting `{`
+        errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
+        return;
+    }
+    let mut code = ArrayString::<[_; 6]>::new();
+    let mut closed = false;
+    for c in text[3..].chars() {
+        assert!(!closed, "no characters after escape is closed");
+        if c.is_digit(16) {
+            if code.len() == 6 {
+                errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
+                return;
+            }
+            code.push(c);
+        } else if c == '_' {
+            // Reject leading _
+            if code.len() == 0 {
+                errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+                return;
+            }
+        } else if c == '}' {
+            closed = true;
+        } else {
+            errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+            return;
+        }
+    }
+    if !closed {
+        errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
+    }
+    if code.len() == 0 {
+        errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
+        return;
+    }
+    match u32::from_str_radix(&code, 16) {
+        Ok(code_u32) if code_u32 > 0x10FFFF => {
+            errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
+        }
+        Ok(_) => {
+            // Valid escape code
+        }
+        Err(_) => {
+            errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+        }
+    }
+}
+#[cfg(test)]
+mod test {
+    use crate::SourceFileNode;
+    fn build_file(literal: &str) -> SourceFileNode {
+        let src = format!("const C: char = '{}';", literal);
+        SourceFileNode::parse(&src)
+    }
+    fn assert_valid_char(literal: &str) {
+        let file = build_file(literal);
+        assert!(
+            file.errors().len() == 0,
+            "Errors for literal '{}': {:?}",
+            literal,
+            file.errors()
+        );
+    }
+    fn assert_invalid_char(literal: &str) {
+        let file = build_file(literal);
+        assert!(file.errors().len() > 0);
+    }
+    #[test]
+    fn test_ansi_codepoints() {
+        for byte in 0..=255u8 {
+            match byte {
+                b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
+                b'\'' | b'\\' => { /* Ignore character close and backslash */ }
+                _ => assert_valid_char(&(byte as char).to_string()),
+            }
+        }
+    }
+    #[test]
+    fn test_unicode_codepoints() {
+        let valid = ["Ƒ", "バ", "メ", "﷽"];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_unicode_multiple_codepoints() {
+        let invalid = ["नी", "👨‍👨‍"];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_escape() {
+        let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_escape() {
+        let invalid = [r"\a", r"\?", r"\"];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_code_escape() {
+        let valid = [r"\x00", r"\x7F", r"\x55"];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_code_escape() {
+        let invalid = [r"\x", r"\x7", r"\xF0"];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+    #[test]
+    fn test_valid_unicode_escape() {
+        let valid = [
+            r"\u{FF}",
+            r"\u{0}",
+            r"\u{F}",
+            r"\u{10FFFF}",
+            r"\u{1_0__FF___FF_____}",
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_invalid_unicode_escape() {
+        let invalid = [
+            r"\u",
+            r"\u{}",
+            r"\u{",
+            r"\u{FF",
+            r"\u{FFFFFF}",
+            r"\u{_F}",
+            r"\u{00FFFFF}",
+            r"\u{110000}",
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+}
diff --git a/crates/ra_syntax/src/validation/mod.rs b/crates/ra_syntax/src/validation/mod.rs
new file mode 100644
index 000000000..bdee8120c
--- /dev/null
+++ b/crates/ra_syntax/src/validation/mod.rs
@@ -0,0 +1,24 @@
+use crate::{
+    algo::visit::{visitor_ctx, VisitorCtx},
+    ast,
+    SourceFileNode,
+    yellow::SyntaxError,
+};
+mod byte;
+mod byte_string;
+mod char;
+mod string;
+pub(crate) fn validate(file: &SourceFileNode) -> Vec<SyntaxError> {
+    let mut errors = Vec::new();
+    for node in file.syntax().descendants() {
+        let _ = visitor_ctx(&mut errors)
+            .visit::<ast::Byte, _>(self::byte::validate_byte_node)
+            .visit::<ast::ByteString, _>(self::byte_string::validate_byte_string_node)
+            .visit::<ast::Char, _>(self::char::validate_char_node)
+            .visit::<ast::String, _>(self::string::validate_string_node)
+            .accept(node);
+    }
+    errors
+}
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs
new file mode 100644
index 000000000..089879d15
--- /dev/null
+++ b/crates/ra_syntax/src/validation/string.rs
@@ -0,0 +1,168 @@
+use crate::{
+    ast::{self, AstNode},
+    string_lexing::{self, StringComponentKind},
+    yellow::{
+        SyntaxError,
+        SyntaxErrorKind::*,
+    },
+};
+use super::char;
+pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxError>) {
+    let literal_text = node.text();
+    let literal_range = node.syntax().range();
+    let mut components = string_lexing::parse_string_literal(literal_text);
+    for component in &mut components {
+        let range = component.range + literal_range.start();
+        match component.kind {
+            StringComponentKind::Char(kind) => {
+                // Chars must escape \t, \n and \r codepoints, but strings don't
+                let text = &literal_text[component.range];
+                match text {
+                    "\t" | "\n" | "\r" => { /* always valid */ }
+                    _ => char::validate_char_component(text, kind, range, errors),
+                }
+            }
+            StringComponentKind::IgnoreNewline => { /* always valid */ }
+        }
+    }
+    if !components.has_closing_quote {
+        errors.push(SyntaxError::new(UnclosedString, literal_range));
+    }
+}
+#[cfg(test)]
+mod test {
+    use crate::SourceFileNode;
+    fn build_file(literal: &str) -> SourceFileNode {
+        let src = format!(r#"const S: &'static str = "{}";"#, literal);
+        println!("Source: {}", src);
+        SourceFileNode::parse(&src)
+    }
+    fn assert_valid_str(literal: &str) {
+        let file = build_file(literal);
+        assert!(
+            file.errors().len() == 0,
+            "Errors for literal '{}': {:?}",
+            literal,
+            file.errors()
+        );
+    }
+    fn assert_invalid_str(literal: &str) {
+        let file = build_file(literal);
+        assert!(file.errors().len() > 0);
+    }
+    #[test]
+    fn test_ansi_codepoints() {
+        for byte in 0..=255u8 {
+            match byte {
+                b'\"' | b'\\' => { /* Ignore string close and backslash */ }
+                _ => assert_valid_str(&(byte as char).to_string()),
+            }
+        }
+    }
+    #[test]
+    fn test_unicode_codepoints() {
+        let valid = ["Ƒ", "バ", "メ", "﷽"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_unicode_multiple_codepoints() {
+        let valid = ["नी", "👨‍👨‍"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_escape() {
+        let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_escape() {
+        let invalid = [r"\a", r"\?", r"\"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_code_escape() {
+        let valid = [r"\x00", r"\x7F", r"\x55"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_code_escape() {
+        let invalid = [r"\x", r"\x7", r"\xF0"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_unicode_escape() {
+        let valid = [
+            r"\u{FF}",
+            r"\u{0}",
+            r"\u{F}",
+            r"\u{10FFFF}",
+            r"\u{1_0__FF___FF_____}",
+        ];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_unicode_escape() {
+        let invalid = [
+            r"\u",
+            r"\u{}",
+            r"\u{",
+            r"\u{FF",
+            r"\u{FFFFFF}",
+            r"\u{_F}",
+            r"\u{00FFFFF}",
+            r"\u{110000}",
+        ];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_mixed() {
+        assert_valid_str(
+            r"This is the tale of a string
+with a newline in between, some emoji (👨‍👨‍) here and there,
+unicode escapes like this: \u{1FFBB} and weird stuff like
+this ﷽",
+        );
+    }
+    #[test]
+    fn test_ignore_newline() {
+        assert_valid_str(
+            "Hello \
+             World",
+        );
+    }
+}

diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs new file mode 100644 index 000000000..43c0d7edd --- /dev/null +++ b/crates/ra_syntax/src/validation/byte.rs
@@ -0,0 +1,211 @@
	1	//! Validation of byte literals
	2
	3	use crate::{
	4	ast::{self, AstNode},
	5	string_lexing::{self, CharComponentKind},
	6	TextRange,
	7	validation::char,
	8	yellow::{
	9	SyntaxError,
	10	SyntaxErrorKind::*,
	11	},
	12	};
	13
	14	pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>) {
	15	let literal_text = node.text();
	16	let literal_range = node.syntax().range();
	17	let mut components = string_lexing::parse_byte_literal(literal_text);
	18	let mut len = 0;
	19	for component in &mut components {
	20	len += 1;
	21	let text = &literal_text[component.range];
	22	let range = component.range + literal_range.start();
	23	validate_byte_component(text, component.kind, range, errors);
	24	}
	25
	26	if !components.has_closing_quote {
	27	errors.push(SyntaxError::new(UnclosedByte, literal_range));
	28	}
	29
	30	if len == 0 {
	31	errors.push(SyntaxError::new(EmptyByte, literal_range));
	32	}
	33
	34	if len > 1 {
	35	errors.push(SyntaxError::new(OverlongByte, literal_range));
	36	}
	37	}
	38
	39	pub(super) fn validate_byte_component(
	40	text: &str,
	41	kind: CharComponentKind,
	42	range: TextRange,
	43	errors: &mut Vec<SyntaxError>,
	44	) {
	45	use self::CharComponentKind::*;
	46	match kind {
	47	AsciiEscape => validate_byte_escape(text, range, errors),
	48	AsciiCodeEscape => validate_byte_code_escape(text, range, errors),
	49	UnicodeEscape => errors.push(SyntaxError::new(UnicodeEscapeForbidden, range)),
	50	CodePoint => {
	51	let c = text
	52	.chars()
	53	.next()
	54	.expect("Code points should be one character long");
	55
	56	// These bytes must always be escaped
	57	if c == '\t' \|\| c == '\r' \|\| c == '\n' {
	58	errors.push(SyntaxError::new(UnescapedByte, range));
	59	}
	60
	61	// Only ASCII bytes are allowed
	62	if c > 0x7F as char {
	63	errors.push(SyntaxError::new(ByteOutOfRange, range));
	64	}
	65	}
	66	}
	67	}
	68
	69	fn validate_byte_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
	70	if text.len() == 1 {
	71	// Escape sequence consists only of leading `\`
	72	errors.push(SyntaxError::new(EmptyByteEscape, range));
	73	} else {
	74	let escape_code = text.chars().skip(1).next().unwrap();
	75	if !char::is_ascii_escape(escape_code) {
	76	errors.push(SyntaxError::new(InvalidByteEscape, range));
	77	}
	78	}
	79	}
	80
	81	fn validate_byte_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
	82	// A ByteCodeEscape has 4 chars, example: `\xDD`
	83	if text.len() < 4 {
	84	errors.push(SyntaxError::new(TooShortByteCodeEscape, range));
	85	} else {
	86	assert!(
	87	text.chars().count() == 4,
	88	"ByteCodeEscape cannot be longer than 4 chars"
	89	);
	90
	91	if u8::from_str_radix(&text[2..], 16).is_err() {
	92	errors.push(SyntaxError::new(MalformedByteCodeEscape, range));
	93	}
	94	}
	95	}
	96
	97	#[cfg(test)]
	98	mod test {
	99	use crate::SourceFileNode;
	100
	101	fn build_file(literal: &str) -> SourceFileNode {
	102	let src = format!("const C: u8 = b'{}';", literal);
	103	SourceFileNode::parse(&src)
	104	}
	105
	106	fn assert_valid_byte(literal: &str) {
	107	let file = build_file(literal);
	108	assert!(
	109	file.errors().len() == 0,
	110	"Errors for literal '{}': {:?}",
	111	literal,
	112	file.errors()
	113	);
	114	}
	115
	116	fn assert_invalid_byte(literal: &str) {
	117	let file = build_file(literal);
	118	assert!(file.errors().len() > 0);
	119	}
	120
	121	#[test]
	122	fn test_ansi_codepoints() {
	123	for byte in 0..128 {
	124	match byte {
	125	b'\n' \| b'\r' \| b'\t' => assert_invalid_byte(&(byte as char).to_string()),
	126	b'\'' \| b'\\' => { /* Ignore character close and backslash */ }
	127	_ => assert_valid_byte(&(byte as char).to_string()),
	128	}
	129	}
	130
	131	for byte in 128..=255u8 {
	132	assert_invalid_byte(&(byte as char).to_string());
	133	}
	134	}
	135
	136	#[test]
	137	fn test_unicode_codepoints() {
	138	let invalid = ["Ƒ", "バ", "メ", "﷽"];
	139	for c in &invalid {
	140	assert_invalid_byte(c);
	141	}
	142	}
	143
	144	#[test]
	145	fn test_unicode_multiple_codepoints() {
	146	let invalid = ["नी", "👨‍👨‍"];
	147	for c in &invalid {
	148	assert_invalid_byte(c);
	149	}
	150	}
	151
	152	#[test]
	153	fn test_valid_byte_escape() {
	154	let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
	155	for c in &valid {
	156	assert_valid_byte(c);
	157	}
	158	}
	159
	160	#[test]
	161	fn test_invalid_byte_escape() {
	162	let invalid = [r"\a", r"\?", r"\"];
	163	for c in &invalid {
	164	assert_invalid_byte(c);
	165	}
	166	}
	167
	168	#[test]
	169	fn test_valid_byte_code_escape() {
	170	let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
	171	for c in &valid {
	172	assert_valid_byte(c);
	173	}
	174	}
	175
	176	#[test]
	177	fn test_invalid_byte_code_escape() {
	178	let invalid = [r"\x", r"\x7"];
	179	for c in &invalid {
	180	assert_invalid_byte(c);
	181	}
	182	}
	183
	184	#[test]
	185	fn test_invalid_unicode_escape() {
	186	let well_formed = [
	187	r"\u{FF}",
	188	r"\u{0}",
	189	r"\u{F}",
	190	r"\u{10FFFF}",
	191	r"\u{1_0__FF___FF_____}",
	192	];
	193	for c in &well_formed {
	194	assert_invalid_byte(c);
	195	}
	196
	197	let invalid = [
	198	r"\u",
	199	r"\u{}",
	200	r"\u{",
	201	r"\u{FF",
	202	r"\u{FFFFFF}",
	203	r"\u{_F}",
	204	r"\u{00FFFFF}",
	205	r"\u{110000}",
	206	];
	207	for c in &invalid {
	208	assert_invalid_byte(c);
	209	}
	210	}
	211	}


diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs new file mode 100644 index 000000000..7b830e97c --- /dev/null +++ b/crates/ra_syntax/src/validation/byte_string.rs
@@ -0,0 +1,178 @@
	1	use crate::{
	2	ast::{self, AstNode},
	3	string_lexing::{self, StringComponentKind},
	4	yellow::{
	5	SyntaxError,
	6	SyntaxErrorKind::*,
	7	},
	8	};
	9
	10	use super::byte;
	11
	12	pub(crate) fn validate_byte_string_node(node: ast::ByteString, errors: &mut Vec<SyntaxError>) {
	13	let literal_text = node.text();
	14	let literal_range = node.syntax().range();
	15	let mut components = string_lexing::parse_byte_string_literal(literal_text);
	16	for component in &mut components {
	17	let range = component.range + literal_range.start();
	18
	19	match component.kind {
	20	StringComponentKind::Char(kind) => {
	21	// Chars must escape \t, \n and \r codepoints, but strings don't
	22	let text = &literal_text[component.range];
	23	match text {
	24	"\t" \| "\n" \| "\r" => { /* always valid */ }
	25	_ => byte::validate_byte_component(text, kind, range, errors),
	26	}
	27	}
	28	StringComponentKind::IgnoreNewline => { /* always valid */ }
	29	}
	30	}
	31
	32	if !components.has_closing_quote {
	33	errors.push(SyntaxError::new(UnclosedString, literal_range));
	34	}
	35	}
	36
	37	#[cfg(test)]
	38	mod test {
	39	use crate::SourceFileNode;
	40
	41	fn build_file(literal: &str) -> SourceFileNode {
	42	let src = format!(r#"const S: &'static [u8] = b"{}";"#, literal);
	43	println!("Source: {}", src);
	44	SourceFileNode::parse(&src)
	45	}
	46
	47	fn assert_valid_str(literal: &str) {
	48	let file = build_file(literal);
	49	assert!(
	50	file.errors().len() == 0,
	51	"Errors for literal '{}': {:?}",
	52	literal,
	53	file.errors()
	54	);
	55	}
	56
	57	fn assert_invalid_str(literal: &str) {
	58	let file = build_file(literal);
	59	assert!(file.errors().len() > 0);
	60	}
	61
	62	#[test]
	63	fn test_ansi_codepoints() {
	64	for byte in 0..128 {
	65	match byte {
	66	b'\"' \| b'\\' => { /* Ignore string close and backslash */ }
	67	_ => assert_valid_str(&(byte as char).to_string()),
	68	}
	69	}
	70
	71	for byte in 128..=255u8 {
	72	assert_invalid_str(&(byte as char).to_string());
	73	}
	74	}
	75
	76	#[test]
	77	fn test_unicode_codepoints() {
	78	let invalid = ["Ƒ", "バ", "メ", "﷽"];
	79	for c in &invalid {
	80	assert_invalid_str(c);
	81	}
	82	}
	83
	84	#[test]
	85	fn test_unicode_multiple_codepoints() {
	86	let invalid = ["नी", "👨‍👨‍"];
	87	for c in &invalid {
	88	assert_invalid_str(c);
	89	}
	90	}
	91
	92	#[test]
	93	fn test_valid_ascii_escape() {
	94	let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
	95	for c in &valid {
	96	assert_valid_str(c);
	97	}
	98	}
	99
	100	#[test]
	101	fn test_invalid_ascii_escape() {
	102	let invalid = [r"\a", r"\?", r"\"];
	103	for c in &invalid {
	104	assert_invalid_str(c);
	105	}
	106	}
	107
	108	#[test]
	109	fn test_valid_ascii_code_escape() {
	110	let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
	111	for c in &valid {
	112	assert_valid_str(c);
	113	}
	114	}
	115
	116	#[test]
	117	fn test_invalid_ascii_code_escape() {
	118	let invalid = [r"\x", r"\x7"];
	119	for c in &invalid {
	120	assert_invalid_str(c);
	121	}
	122	}
	123
	124	#[test]
	125	fn test_invalid_unicode_escape() {
	126	let well_formed = [
	127	r"\u{FF}",
	128	r"\u{0}",
	129	r"\u{F}",
	130	r"\u{10FFFF}",
	131	r"\u{1_0__FF___FF_____}",
	132	];
	133	for c in &well_formed {
	134	assert_invalid_str(c);
	135	}
	136
	137	let invalid = [
	138	r"\u",
	139	r"\u{}",
	140	r"\u{",
	141	r"\u{FF",
	142	r"\u{FFFFFF}",
	143	r"\u{_F}",
	144	r"\u{00FFFFF}",
	145	r"\u{110000}",
	146	];
	147	for c in &invalid {
	148	assert_invalid_str(c);
	149	}
	150	}
	151
	152	#[test]
	153	fn test_mixed_invalid() {
	154	assert_invalid_str(
	155	r"This is the tale of a string
	156	with a newline in between, some emoji (👨‍👨‍) here and there,
	157	unicode escapes like this: \u{1FFBB} and weird stuff like
	158	this ﷽",
	159	);
	160	}
	161
	162	#[test]
	163	fn test_mixed_valid() {
	164	assert_valid_str(
	165	r"This is the tale of a string
	166	with a newline in between, no emoji at all,
	167	nor unicode escapes or weird stuff",
	168	);
	169	}
	170
	171	#[test]
	172	fn test_ignore_newline() {
	173	assert_valid_str(
	174	"Hello \
	175	World",
	176	);
	177	}
	178	}


diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs new file mode 100644 index 000000000..4728c85e6 --- /dev/null +++ b/crates/ra_syntax/src/validation/char.rs
@@ -0,0 +1,276 @@
	1	//! Validation of char literals
	2
	3	use std::u32;
	4
	5	use arrayvec::ArrayString;
	6
	7	use crate::{
	8	ast::{self, AstNode},
	9	string_lexing::{self, CharComponentKind},
	10	TextRange,
	11	yellow::{
	12	SyntaxError,
	13	SyntaxErrorKind::*,
	14	},
	15	};
	16
	17	pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
	18	let literal_text = node.text();
	19	let literal_range = node.syntax().range();
	20	let mut components = string_lexing::parse_char_literal(literal_text);
	21	let mut len = 0;
	22	for component in &mut components {
	23	len += 1;
	24	let text = &literal_text[component.range];
	25	let range = component.range + literal_range.start();
	26	validate_char_component(text, component.kind, range, errors);
	27	}
	28
	29	if !components.has_closing_quote {
	30	errors.push(SyntaxError::new(UnclosedChar, literal_range));
	31	}
	32
	33	if len == 0 {
	34	errors.push(SyntaxError::new(EmptyChar, literal_range));
	35	}
	36
	37	if len > 1 {
	38	errors.push(SyntaxError::new(OverlongChar, literal_range));
	39	}
	40	}
	41
	42	pub(super) fn validate_char_component(
	43	text: &str,
	44	kind: CharComponentKind,
	45	range: TextRange,
	46	errors: &mut Vec<SyntaxError>,
	47	) {
	48	// Validate escapes
	49	use self::CharComponentKind::*;
	50	match kind {
	51	AsciiEscape => validate_ascii_escape(text, range, errors),
	52	AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
	53	UnicodeEscape => validate_unicode_escape(text, range, errors),
	54	CodePoint => {
	55	// These code points must always be escaped
	56	if text == "\t" \|\| text == "\r" \|\| text == "\n" {
	57	errors.push(SyntaxError::new(UnescapedCodepoint, range));
	58	}
	59	}
	60	}
	61	}
	62
	63	fn validate_ascii_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
	64	if text.len() == 1 {
	65	// Escape sequence consists only of leading `\`
	66	errors.push(SyntaxError::new(EmptyAsciiEscape, range));
	67	} else {
	68	let escape_code = text.chars().skip(1).next().unwrap();
	69	if !is_ascii_escape(escape_code) {
	70	errors.push(SyntaxError::new(InvalidAsciiEscape, range));
	71	}
	72	}
	73	}
	74
	75	pub(super) fn is_ascii_escape(code: char) -> bool {
	76	match code {
	77	'\\' \| '\'' \| '"' \| 'n' \| 'r' \| 't' \| '0' => true,
	78	_ => false,
	79	}
	80	}
	81
	82	fn validate_ascii_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
	83	// An AsciiCodeEscape has 4 chars, example: `\xDD`
	84	if text.len() < 4 {
	85	errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
	86	} else {
	87	assert!(
	88	text.chars().count() == 4,
	89	"AsciiCodeEscape cannot be longer than 4 chars"
	90	);
	91
	92	match u8::from_str_radix(&text[2..], 16) {
	93	Ok(code) if code < 128 => { /* Escape code is valid */ }
	94	Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
	95	Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
	96	}
	97	}
	98	}
	99
	100	fn validate_unicode_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
	101	assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
	102
	103	if text.len() == 2 {
	104	// No starting `{`
	105	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
	106	return;
	107	}
	108
	109	if text.len() == 3 {
	110	// Only starting `{`
	111	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
	112	return;
	113	}
	114
	115	let mut code = ArrayString::<[_; 6]>::new();
	116	let mut closed = false;
	117	for c in text[3..].chars() {
	118	assert!(!closed, "no characters after escape is closed");
	119
	120	if c.is_digit(16) {
	121	if code.len() == 6 {
	122	errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
	123	return;
	124	}
	125
	126	code.push(c);
	127	} else if c == '_' {
	128	// Reject leading _
	129	if code.len() == 0 {
	130	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
	131	return;
	132	}
	133	} else if c == '}' {
	134	closed = true;
	135	} else {
	136	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
	137	return;
	138	}
	139	}
	140
	141	if !closed {
	142	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
	143	}
	144
	145	if code.len() == 0 {
	146	errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
	147	return;
	148	}
	149
	150	match u32::from_str_radix(&code, 16) {
	151	Ok(code_u32) if code_u32 > 0x10FFFF => {
	152	errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
	153	}
	154	Ok(_) => {
	155	// Valid escape code
	156	}
	157	Err(_) => {
	158	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
	159	}
	160	}
	161	}
	162
	163	#[cfg(test)]
	164	mod test {
	165	use crate::SourceFileNode;
	166
	167	fn build_file(literal: &str) -> SourceFileNode {
	168	let src = format!("const C: char = '{}';", literal);
	169	SourceFileNode::parse(&src)
	170	}
	171
	172	fn assert_valid_char(literal: &str) {
	173	let file = build_file(literal);
	174	assert!(
	175	file.errors().len() == 0,
	176	"Errors for literal '{}': {:?}",
	177	literal,
	178	file.errors()
	179	);
	180	}
	181
	182	fn assert_invalid_char(literal: &str) {
	183	let file = build_file(literal);
	184	assert!(file.errors().len() > 0);
	185	}
	186
	187	#[test]
	188	fn test_ansi_codepoints() {
	189	for byte in 0..=255u8 {
	190	match byte {
	191	b'\n' \| b'\r' \| b'\t' => assert_invalid_char(&(byte as char).to_string()),
	192	b'\'' \| b'\\' => { /* Ignore character close and backslash */ }
	193	_ => assert_valid_char(&(byte as char).to_string()),
	194	}
	195	}
	196	}
	197
	198	#[test]
	199	fn test_unicode_codepoints() {
	200	let valid = ["Ƒ", "バ", "メ", "﷽"];
	201	for c in &valid {
	202	assert_valid_char(c);
	203	}
	204	}
	205
	206	#[test]
	207	fn test_unicode_multiple_codepoints() {
	208	let invalid = ["नी", "👨‍👨‍"];
	209	for c in &invalid {
	210	assert_invalid_char(c);
	211	}
	212	}
	213
	214	#[test]
	215	fn test_valid_ascii_escape() {
	216	let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
	217	for c in &valid {
	218	assert_valid_char(c);
	219	}
	220	}
	221
	222	#[test]
	223	fn test_invalid_ascii_escape() {
	224	let invalid = [r"\a", r"\?", r"\"];
	225	for c in &invalid {
	226	assert_invalid_char(c);
	227	}
	228	}
	229
	230	#[test]
	231	fn test_valid_ascii_code_escape() {
	232	let valid = [r"\x00", r"\x7F", r"\x55"];
	233	for c in &valid {
	234	assert_valid_char(c);
	235	}
	236	}
	237
	238	#[test]
	239	fn test_invalid_ascii_code_escape() {
	240	let invalid = [r"\x", r"\x7", r"\xF0"];
	241	for c in &invalid {
	242	assert_invalid_char(c);
	243	}
	244	}
	245
	246	#[test]
	247	fn test_valid_unicode_escape() {
	248	let valid = [
	249	r"\u{FF}",
	250	r"\u{0}",
	251	r"\u{F}",
	252	r"\u{10FFFF}",
	253	r"\u{1_0__FF___FF_____}",
	254	];
	255	for c in &valid {
	256	assert_valid_char(c);
	257	}
	258	}
	259
	260	#[test]
	261	fn test_invalid_unicode_escape() {
	262	let invalid = [
	263	r"\u",
	264	r"\u{}",
	265	r"\u{",
	266	r"\u{FF",
	267	r"\u{FFFFFF}",
	268	r"\u{_F}",
	269	r"\u{00FFFFF}",
	270	r"\u{110000}",
	271	];
	272	for c in &invalid {
	273	assert_invalid_char(c);
	274	}
	275	}
	276	}


diff --git a/crates/ra_syntax/src/validation/mod.rs b/crates/ra_syntax/src/validation/mod.rs new file mode 100644 index 000000000..bdee8120c --- /dev/null +++ b/crates/ra_syntax/src/validation/mod.rs
@@ -0,0 +1,24 @@
	1	use crate::{
	2	algo::visit::{visitor_ctx, VisitorCtx},
	3	ast,
	4	SourceFileNode,
	5	yellow::SyntaxError,
	6	};
	7
	8	mod byte;
	9	mod byte_string;
	10	mod char;
	11	mod string;
	12
	13	pub(crate) fn validate(file: &SourceFileNode) -> Vec<SyntaxError> {
	14	let mut errors = Vec::new();
	15	for node in file.syntax().descendants() {
	16	let _ = visitor_ctx(&mut errors)
	17	.visit::<ast::Byte, _>(self::byte::validate_byte_node)
	18	.visit::<ast::ByteString, _>(self::byte_string::validate_byte_string_node)
	19	.visit::<ast::Char, _>(self::char::validate_char_node)
	20	.visit::<ast::String, _>(self::string::validate_string_node)
	21	.accept(node);
	22	}
	23	errors
	24	}


diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs new file mode 100644 index 000000000..089879d15 --- /dev/null +++ b/crates/ra_syntax/src/validation/string.rs
@@ -0,0 +1,168 @@
	1	use crate::{
	2	ast::{self, AstNode},
	3	string_lexing::{self, StringComponentKind},
	4	yellow::{
	5	SyntaxError,
	6	SyntaxErrorKind::*,
	7	},
	8	};
	9
	10	use super::char;
	11
	12	pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxError>) {
	13	let literal_text = node.text();
	14	let literal_range = node.syntax().range();
	15	let mut components = string_lexing::parse_string_literal(literal_text);
	16	for component in &mut components {
	17	let range = component.range + literal_range.start();
	18
	19	match component.kind {
	20	StringComponentKind::Char(kind) => {
	21	// Chars must escape \t, \n and \r codepoints, but strings don't
	22	let text = &literal_text[component.range];
	23	match text {
	24	"\t" \| "\n" \| "\r" => { /* always valid */ }
	25	_ => char::validate_char_component(text, kind, range, errors),
	26	}
	27	}
	28	StringComponentKind::IgnoreNewline => { /* always valid */ }
	29	}
	30	}
	31
	32	if !components.has_closing_quote {
	33	errors.push(SyntaxError::new(UnclosedString, literal_range));
	34	}
	35	}
	36
	37	#[cfg(test)]
	38	mod test {
	39	use crate::SourceFileNode;
	40
	41	fn build_file(literal: &str) -> SourceFileNode {
	42	let src = format!(r#"const S: &'static str = "{}";"#, literal);
	43	println!("Source: {}", src);
	44	SourceFileNode::parse(&src)
	45	}
	46
	47	fn assert_valid_str(literal: &str) {
	48	let file = build_file(literal);
	49	assert!(
	50	file.errors().len() == 0,
	51	"Errors for literal '{}': {:?}",
	52	literal,
	53	file.errors()
	54	);
	55	}
	56
	57	fn assert_invalid_str(literal: &str) {
	58	let file = build_file(literal);
	59	assert!(file.errors().len() > 0);
	60	}
	61
	62	#[test]
	63	fn test_ansi_codepoints() {
	64	for byte in 0..=255u8 {
	65	match byte {
	66	b'\"' \| b'\\' => { /* Ignore string close and backslash */ }
	67	_ => assert_valid_str(&(byte as char).to_string()),
	68	}
	69	}
	70	}
	71
	72	#[test]
	73	fn test_unicode_codepoints() {
	74	let valid = ["Ƒ", "バ", "メ", "﷽"];
	75	for c in &valid {
	76	assert_valid_str(c);
	77	}
	78	}
	79
	80	#[test]
	81	fn test_unicode_multiple_codepoints() {
	82	let valid = ["नी", "👨‍👨‍"];
	83	for c in &valid {
	84	assert_valid_str(c);
	85	}
	86	}
	87
	88	#[test]
	89	fn test_valid_ascii_escape() {
	90	let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
	91	for c in &valid {
	92	assert_valid_str(c);
	93	}
	94	}
	95
	96	#[test]
	97	fn test_invalid_ascii_escape() {
	98	let invalid = [r"\a", r"\?", r"\"];
	99	for c in &invalid {
	100	assert_invalid_str(c);
	101	}
	102	}
	103
	104	#[test]
	105	fn test_valid_ascii_code_escape() {
	106	let valid = [r"\x00", r"\x7F", r"\x55"];
	107	for c in &valid {
	108	assert_valid_str(c);
	109	}
	110	}
	111
	112	#[test]
	113	fn test_invalid_ascii_code_escape() {
	114	let invalid = [r"\x", r"\x7", r"\xF0"];
	115	for c in &invalid {
	116	assert_invalid_str(c);
	117	}
	118	}
	119
	120	#[test]
	121	fn test_valid_unicode_escape() {
	122	let valid = [
	123	r"\u{FF}",
	124	r"\u{0}",
	125	r"\u{F}",
	126	r"\u{10FFFF}",
	127	r"\u{1_0__FF___FF_____}",
	128	];
	129	for c in &valid {
	130	assert_valid_str(c);
	131	}
	132	}
	133
	134	#[test]
	135	fn test_invalid_unicode_escape() {
	136	let invalid = [
	137	r"\u",
	138	r"\u{}",
	139	r"\u{",
	140	r"\u{FF",
	141	r"\u{FFFFFF}",
	142	r"\u{_F}",
	143	r"\u{00FFFFF}",
	144	r"\u{110000}",
	145	];
	146	for c in &invalid {
	147	assert_invalid_str(c);
	148	}
	149	}
	150
	151	#[test]
	152	fn test_mixed() {
	153	assert_valid_str(
	154	r"This is the tale of a string
	155	with a newline in between, some emoji (👨‍👨‍) here and there,
	156	unicode escapes like this: \u{1FFBB} and weird stuff like
	157	this ﷽",
	158	);
	159	}
	160
	161	#[test]
	162	fn test_ignore_newline() {
	163	assert_valid_str(
	164	"Hello \
	165	World",
	166	);
	167	}
	168	}