3 files changed, 458 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
new file mode 100644
index 000000000..63f9bad24
--- /dev/null
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -0,0 +1,270 @@
+use std::u32;
+use arrayvec::ArrayString;
+use crate::{
+    ast::{self, AstNode},
+    string_lexing::{self, CharComponentKind},
+    TextRange,
+    yellow::{
+        SyntaxError,
+        SyntaxErrorKind::*,
+    },
+};
+pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
+    let literal_text = node.text();
+    let literal_range = node.syntax().range();
+    let mut components = string_lexing::parse_char_literal(literal_text);
+    let mut len = 0;
+    for component in &mut components {
+        len += 1;
+        let text = &literal_text[component.range];
+        let range = component.range + literal_range.start();
+        validate_char_component(text, component.kind, range, errors);
+    }
+    if !components.has_closing_quote {
+        errors.push(SyntaxError::new(UnclosedChar, literal_range));
+    }
+    if len == 0 {
+        errors.push(SyntaxError::new(EmptyChar, literal_range));
+    }
+    if len > 1 {
+        errors.push(SyntaxError::new(OverlongChar, literal_range));
+    }
+}
+pub(crate) fn validate_char_component(
+    text: &str,
+    kind: CharComponentKind,
+    range: TextRange,
+    errors: &mut Vec<SyntaxError>,
+) {
+    // Validate escapes
+    use self::CharComponentKind::*;
+    match kind {
+        AsciiEscape => {
+            if text.len() == 1 {
+                // Escape sequence consists only of leading `\`
+                errors.push(SyntaxError::new(EmptyAsciiEscape, range));
+            } else {
+                let escape_code = text.chars().skip(1).next().unwrap();
+                if !is_ascii_escape(escape_code) {
+                    errors.push(SyntaxError::new(InvalidAsciiEscape, range));
+                }
+            }
+        }
+        AsciiCodeEscape => {
+            // An AsciiCodeEscape has 4 chars, example: `\xDD`
+            if text.len() < 4 {
+                errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
+            } else {
+                assert!(
+                    text.chars().count() == 4,
+                    "AsciiCodeEscape cannot be longer than 4 chars"
+                );
+                match u8::from_str_radix(&text[2..], 16) {
+                    Ok(code) if code < 128 => { /* Escape code is valid */ }
+                    Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
+                    Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
+                }
+            }
+        }
+        UnicodeEscape => {
+            assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
+            if text.len() == 2 {
+                // No starting `{`
+                errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+                return;
+            }
+            if text.len() == 3 {
+                // Only starting `{`
+                errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
+                return;
+            }
+            let mut code = ArrayString::<[_; 6]>::new();
+            let mut closed = false;
+            for c in text[3..].chars() {
+                assert!(!closed, "no characters after escape is closed");
+                if c.is_digit(16) {
+                    if code.len() == 6 {
+                        errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
+                        return;
+                    }
+                    code.push(c);
+                } else if c == '_' {
+                    // Reject leading _
+                    if code.len() == 0 {
+                        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+                        return;
+                    }
+                } else if c == '}' {
+                    closed = true;
+                } else {
+                    errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+                    return;
+                }
+            }
+            if !closed {
+                errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
+            }
+            if code.len() == 0 {
+                errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
+                return;
+            }
+            match u32::from_str_radix(&code, 16) {
+                Ok(code_u32) if code_u32 > 0x10FFFF => {
+                    errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
+                }
+                Ok(_) => {
+                    // Valid escape code
+                }
+                Err(_) => {
+                    errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+                }
+            }
+        }
+        CodePoint => {
+            // These code points must always be escaped
+            if text == "\t" || text == "\r" {
+                errors.push(SyntaxError::new(UnescapedCodepoint, range));
+            }
+        }
+    }
+}
+fn is_ascii_escape(code: char) -> bool {
+    match code {
+        '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
+        _ => false,
+    }
+}
+#[cfg(test)]
+mod test {
+    use crate::SourceFileNode;
+    fn build_file(literal: &str) -> SourceFileNode {
+        let src = format!("const C: char = '{}';", literal);
+        SourceFileNode::parse(&src)
+    }
+    fn assert_valid_char(literal: &str) {
+        let file = build_file(literal);
+        assert!(
+            file.errors().len() == 0,
+            "Errors for literal '{}': {:?}",
+            literal,
+            file.errors()
+        );
+    }
+    fn assert_invalid_char(literal: &str) {
+        let file = build_file(literal);
+        assert!(file.errors().len() > 0);
+    }
+    #[test]
+    fn test_ansi_codepoints() {
+        for byte in 0..=255u8 {
+            match byte {
+                b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
+                b'\'' | b'\\' => { /* Ignore character close and backslash */ }
+                _ => assert_valid_char(&(byte as char).to_string()),
+            }
+        }
+    }
+    #[test]
+    fn test_unicode_codepoints() {
+        let valid = ["Ƒ", "バ", "メ", "﷽"];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_unicode_multiple_codepoints() {
+        let invalid = ["नी", "👨‍👨‍"];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_escape() {
+        let valid = [
+            r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b",
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_escape() {
+        let invalid = [r"\a", r"\?", r"\"];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_code_escape() {
+        let valid = [r"\x00", r"\x7F", r"\x55"];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_code_escape() {
+        let invalid = [r"\x", r"\x7", r"\xF0"];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+    #[test]
+    fn test_valid_unicode_escape() {
+        let valid = [
+            r"\u{FF}",
+            r"\u{0}",
+            r"\u{F}",
+            r"\u{10FFFF}",
+            r"\u{1_0__FF___FF_____}",
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_invalid_unicode_escape() {
+        let invalid = [
+            r"\u",
+            r"\u{}",
+            r"\u{",
+            r"\u{FF",
+            r"\u{FFFFFF}",
+            r"\u{_F}",
+            r"\u{00FFFFF}",
+            r"\u{110000}",
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+}
diff --git a/crates/ra_syntax/src/validation/mod.rs b/crates/ra_syntax/src/validation/mod.rs
new file mode 100644
index 000000000..2ff0bc26d
--- /dev/null
+++ b/crates/ra_syntax/src/validation/mod.rs
@@ -0,0 +1,20 @@
+use crate::{
+    algo::visit::{visitor_ctx, VisitorCtx},
+    ast,
+    SourceFileNode,
+    yellow::SyntaxError,
+};
+mod char;
+mod string;
+pub(crate) fn validate(file: &SourceFileNode) -> Vec<SyntaxError> {
+    let mut errors = Vec::new();
+    for node in file.syntax().descendants() {
+        let _ = visitor_ctx(&mut errors)
+            .visit::<ast::Char, _>(self::char::validate_char_node)
+            .visit::<ast::String, _>(self::string::validate_string_node)
+            .accept(node);
+    }
+    errors
+}
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs
new file mode 100644
index 000000000..089879d15
--- /dev/null
+++ b/crates/ra_syntax/src/validation/string.rs
@@ -0,0 +1,168 @@
+use crate::{
+    ast::{self, AstNode},
+    string_lexing::{self, StringComponentKind},
+    yellow::{
+        SyntaxError,
+        SyntaxErrorKind::*,
+    },
+};
+use super::char;
+pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxError>) {
+    let literal_text = node.text();
+    let literal_range = node.syntax().range();
+    let mut components = string_lexing::parse_string_literal(literal_text);
+    for component in &mut components {
+        let range = component.range + literal_range.start();
+        match component.kind {
+            StringComponentKind::Char(kind) => {
+                // Chars must escape \t, \n and \r codepoints, but strings don't
+                let text = &literal_text[component.range];
+                match text {
+                    "\t" | "\n" | "\r" => { /* always valid */ }
+                    _ => char::validate_char_component(text, kind, range, errors),
+                }
+            }
+            StringComponentKind::IgnoreNewline => { /* always valid */ }
+        }
+    }
+    if !components.has_closing_quote {
+        errors.push(SyntaxError::new(UnclosedString, literal_range));
+    }
+}
+#[cfg(test)]
+mod test {
+    use crate::SourceFileNode;
+    fn build_file(literal: &str) -> SourceFileNode {
+        let src = format!(r#"const S: &'static str = "{}";"#, literal);
+        println!("Source: {}", src);
+        SourceFileNode::parse(&src)
+    }
+    fn assert_valid_str(literal: &str) {
+        let file = build_file(literal);
+        assert!(
+            file.errors().len() == 0,
+            "Errors for literal '{}': {:?}",
+            literal,
+            file.errors()
+        );
+    }
+    fn assert_invalid_str(literal: &str) {
+        let file = build_file(literal);
+        assert!(file.errors().len() > 0);
+    }
+    #[test]
+    fn test_ansi_codepoints() {
+        for byte in 0..=255u8 {
+            match byte {
+                b'\"' | b'\\' => { /* Ignore string close and backslash */ }
+                _ => assert_valid_str(&(byte as char).to_string()),
+            }
+        }
+    }
+    #[test]
+    fn test_unicode_codepoints() {
+        let valid = ["Ƒ", "バ", "メ", "﷽"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_unicode_multiple_codepoints() {
+        let valid = ["नी", "👨‍👨‍"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_escape() {
+        let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_escape() {
+        let invalid = [r"\a", r"\?", r"\"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_code_escape() {
+        let valid = [r"\x00", r"\x7F", r"\x55"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_code_escape() {
+        let invalid = [r"\x", r"\x7", r"\xF0"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_unicode_escape() {
+        let valid = [
+            r"\u{FF}",
+            r"\u{0}",
+            r"\u{F}",
+            r"\u{10FFFF}",
+            r"\u{1_0__FF___FF_____}",
+        ];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_unicode_escape() {
+        let invalid = [
+            r"\u",
+            r"\u{}",
+            r"\u{",
+            r"\u{FF",
+            r"\u{FFFFFF}",
+            r"\u{_F}",
+            r"\u{00FFFFF}",
+            r"\u{110000}",
+        ];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_mixed() {
+        assert_valid_str(
+            r"This is the tale of a string
+with a newline in between, some emoji (👨‍👨‍) here and there,
+unicode escapes like this: \u{1FFBB} and weird stuff like
+this ﷽",
+        );
+    }
+    #[test]
+    fn test_ignore_newline() {
+        assert_valid_str(
+            "Hello \
+             World",
+        );
+    }
+}

diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs new file mode 100644 index 000000000..63f9bad24 --- /dev/null +++ b/crates/ra_syntax/src/validation/char.rs
@@ -0,0 +1,270 @@
	1	use std::u32;
	2
	3	use arrayvec::ArrayString;
	4
	5	use crate::{
	6	ast::{self, AstNode},
	7	string_lexing::{self, CharComponentKind},
	8	TextRange,
	9	yellow::{
	10	SyntaxError,
	11	SyntaxErrorKind::*,
	12	},
	13	};
	14
	15	pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
	16	let literal_text = node.text();
	17	let literal_range = node.syntax().range();
	18	let mut components = string_lexing::parse_char_literal(literal_text);
	19	let mut len = 0;
	20	for component in &mut components {
	21	len += 1;
	22	let text = &literal_text[component.range];
	23	let range = component.range + literal_range.start();
	24	validate_char_component(text, component.kind, range, errors);
	25	}
	26
	27	if !components.has_closing_quote {
	28	errors.push(SyntaxError::new(UnclosedChar, literal_range));
	29	}
	30
	31	if len == 0 {
	32	errors.push(SyntaxError::new(EmptyChar, literal_range));
	33	}
	34
	35	if len > 1 {
	36	errors.push(SyntaxError::new(OverlongChar, literal_range));
	37	}
	38	}
	39
	40	pub(crate) fn validate_char_component(
	41	text: &str,
	42	kind: CharComponentKind,
	43	range: TextRange,
	44	errors: &mut Vec<SyntaxError>,
	45	) {
	46	// Validate escapes
	47	use self::CharComponentKind::*;
	48	match kind {
	49	AsciiEscape => {
	50	if text.len() == 1 {
	51	// Escape sequence consists only of leading `\`
	52	errors.push(SyntaxError::new(EmptyAsciiEscape, range));
	53	} else {
	54	let escape_code = text.chars().skip(1).next().unwrap();
	55	if !is_ascii_escape(escape_code) {
	56	errors.push(SyntaxError::new(InvalidAsciiEscape, range));
	57	}
	58	}
	59	}
	60	AsciiCodeEscape => {
	61	// An AsciiCodeEscape has 4 chars, example: `\xDD`
	62	if text.len() < 4 {
	63	errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
	64	} else {
	65	assert!(
	66	text.chars().count() == 4,
	67	"AsciiCodeEscape cannot be longer than 4 chars"
	68	);
	69
	70	match u8::from_str_radix(&text[2..], 16) {
	71	Ok(code) if code < 128 => { /* Escape code is valid */ }
	72	Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
	73	Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
	74	}
	75	}
	76	}
	77	UnicodeEscape => {
	78	assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
	79
	80	if text.len() == 2 {
	81	// No starting `{`
	82	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
	83	return;
	84	}
	85
	86	if text.len() == 3 {
	87	// Only starting `{`
	88	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
	89	return;
	90	}
	91
	92	let mut code = ArrayString::<[_; 6]>::new();
	93	let mut closed = false;
	94	for c in text[3..].chars() {
	95	assert!(!closed, "no characters after escape is closed");
	96
	97	if c.is_digit(16) {
	98	if code.len() == 6 {
	99	errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
	100	return;
	101	}
	102
	103	code.push(c);
	104	} else if c == '_' {
	105	// Reject leading _
	106	if code.len() == 0 {
	107	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
	108	return;
	109	}
	110	} else if c == '}' {
	111	closed = true;
	112	} else {
	113	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
	114	return;
	115	}
	116	}
	117
	118	if !closed {
	119	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
	120	}
	121
	122	if code.len() == 0 {
	123	errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
	124	return;
	125	}
	126
	127	match u32::from_str_radix(&code, 16) {
	128	Ok(code_u32) if code_u32 > 0x10FFFF => {
	129	errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
	130	}
	131	Ok(_) => {
	132	// Valid escape code
	133	}
	134	Err(_) => {
	135	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
	136	}
	137	}
	138	}
	139	CodePoint => {
	140	// These code points must always be escaped
	141	if text == "\t" \|\| text == "\r" {
	142	errors.push(SyntaxError::new(UnescapedCodepoint, range));
	143	}
	144	}
	145	}
	146	}
	147
	148	fn is_ascii_escape(code: char) -> bool {
	149	match code {
	150	'\\' \| '\'' \| '"' \| 'n' \| 'r' \| 't' \| '0' => true,
	151	_ => false,
	152	}
	153	}
	154
	155	#[cfg(test)]
	156	mod test {
	157	use crate::SourceFileNode;
	158
	159	fn build_file(literal: &str) -> SourceFileNode {
	160	let src = format!("const C: char = '{}';", literal);
	161	SourceFileNode::parse(&src)
	162	}
	163
	164	fn assert_valid_char(literal: &str) {
	165	let file = build_file(literal);
	166	assert!(
	167	file.errors().len() == 0,
	168	"Errors for literal '{}': {:?}",
	169	literal,
	170	file.errors()
	171	);
	172	}
	173
	174	fn assert_invalid_char(literal: &str) {
	175	let file = build_file(literal);
	176	assert!(file.errors().len() > 0);
	177	}
	178
	179	#[test]
	180	fn test_ansi_codepoints() {
	181	for byte in 0..=255u8 {
	182	match byte {
	183	b'\n' \| b'\r' \| b'\t' => assert_invalid_char(&(byte as char).to_string()),
	184	b'\'' \| b'\\' => { /* Ignore character close and backslash */ }
	185	_ => assert_valid_char(&(byte as char).to_string()),
	186	}
	187	}
	188	}
	189
	190	#[test]
	191	fn test_unicode_codepoints() {
	192	let valid = ["Ƒ", "バ", "メ", "﷽"];
	193	for c in &valid {
	194	assert_valid_char(c);
	195	}
	196	}
	197
	198	#[test]
	199	fn test_unicode_multiple_codepoints() {
	200	let invalid = ["नी", "👨‍👨‍"];
	201	for c in &invalid {
	202	assert_invalid_char(c);
	203	}
	204	}
	205
	206	#[test]
	207	fn test_valid_ascii_escape() {
	208	let valid = [
	209	r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b",
	210	];
	211	for c in &valid {
	212	assert_valid_char(c);
	213	}
	214	}
	215
	216	#[test]
	217	fn test_invalid_ascii_escape() {
	218	let invalid = [r"\a", r"\?", r"\"];
	219	for c in &invalid {
	220	assert_invalid_char(c);
	221	}
	222	}
	223
	224	#[test]
	225	fn test_valid_ascii_code_escape() {
	226	let valid = [r"\x00", r"\x7F", r"\x55"];
	227	for c in &valid {
	228	assert_valid_char(c);
	229	}
	230	}
	231
	232	#[test]
	233	fn test_invalid_ascii_code_escape() {
	234	let invalid = [r"\x", r"\x7", r"\xF0"];
	235	for c in &invalid {
	236	assert_invalid_char(c);
	237	}
	238	}
	239
	240	#[test]
	241	fn test_valid_unicode_escape() {
	242	let valid = [
	243	r"\u{FF}",
	244	r"\u{0}",
	245	r"\u{F}",
	246	r"\u{10FFFF}",
	247	r"\u{1_0__FF___FF_____}",
	248	];
	249	for c in &valid {
	250	assert_valid_char(c);
	251	}
	252	}
	253
	254	#[test]
	255	fn test_invalid_unicode_escape() {
	256	let invalid = [
	257	r"\u",
	258	r"\u{}",
	259	r"\u{",
	260	r"\u{FF",
	261	r"\u{FFFFFF}",
	262	r"\u{_F}",
	263	r"\u{00FFFFF}",
	264	r"\u{110000}",
	265	];
	266	for c in &invalid {
	267	assert_invalid_char(c);
	268	}
	269	}
	270	}


diff --git a/crates/ra_syntax/src/validation/mod.rs b/crates/ra_syntax/src/validation/mod.rs new file mode 100644 index 000000000..2ff0bc26d --- /dev/null +++ b/crates/ra_syntax/src/validation/mod.rs
@@ -0,0 +1,20 @@
	1	use crate::{
	2	algo::visit::{visitor_ctx, VisitorCtx},
	3	ast,
	4	SourceFileNode,
	5	yellow::SyntaxError,
	6	};
	7
	8	mod char;
	9	mod string;
	10
	11	pub(crate) fn validate(file: &SourceFileNode) -> Vec<SyntaxError> {
	12	let mut errors = Vec::new();
	13	for node in file.syntax().descendants() {
	14	let _ = visitor_ctx(&mut errors)
	15	.visit::<ast::Char, _>(self::char::validate_char_node)
	16	.visit::<ast::String, _>(self::string::validate_string_node)
	17	.accept(node);
	18	}
	19	errors
	20	}


diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs new file mode 100644 index 000000000..089879d15 --- /dev/null +++ b/crates/ra_syntax/src/validation/string.rs
@@ -0,0 +1,168 @@
	1	use crate::{
	2	ast::{self, AstNode},
	3	string_lexing::{self, StringComponentKind},
	4	yellow::{
	5	SyntaxError,
	6	SyntaxErrorKind::*,
	7	},
	8	};
	9
	10	use super::char;
	11
	12	pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxError>) {
	13	let literal_text = node.text();
	14	let literal_range = node.syntax().range();
	15	let mut components = string_lexing::parse_string_literal(literal_text);
	16	for component in &mut components {
	17	let range = component.range + literal_range.start();
	18
	19	match component.kind {
	20	StringComponentKind::Char(kind) => {
	21	// Chars must escape \t, \n and \r codepoints, but strings don't
	22	let text = &literal_text[component.range];
	23	match text {
	24	"\t" \| "\n" \| "\r" => { /* always valid */ }
	25	_ => char::validate_char_component(text, kind, range, errors),
	26	}
	27	}
	28	StringComponentKind::IgnoreNewline => { /* always valid */ }
	29	}
	30	}
	31
	32	if !components.has_closing_quote {
	33	errors.push(SyntaxError::new(UnclosedString, literal_range));
	34	}
	35	}
	36
	37	#[cfg(test)]
	38	mod test {
	39	use crate::SourceFileNode;
	40
	41	fn build_file(literal: &str) -> SourceFileNode {
	42	let src = format!(r#"const S: &'static str = "{}";"#, literal);
	43	println!("Source: {}", src);
	44	SourceFileNode::parse(&src)
	45	}
	46
	47	fn assert_valid_str(literal: &str) {
	48	let file = build_file(literal);
	49	assert!(
	50	file.errors().len() == 0,
	51	"Errors for literal '{}': {:?}",
	52	literal,
	53	file.errors()
	54	);
	55	}
	56
	57	fn assert_invalid_str(literal: &str) {
	58	let file = build_file(literal);
	59	assert!(file.errors().len() > 0);
	60	}
	61
	62	#[test]
	63	fn test_ansi_codepoints() {
	64	for byte in 0..=255u8 {
	65	match byte {
	66	b'\"' \| b'\\' => { /* Ignore string close and backslash */ }
	67	_ => assert_valid_str(&(byte as char).to_string()),
	68	}
	69	}
	70	}
	71
	72	#[test]
	73	fn test_unicode_codepoints() {
	74	let valid = ["Ƒ", "バ", "メ", "﷽"];
	75	for c in &valid {
	76	assert_valid_str(c);
	77	}
	78	}
	79
	80	#[test]
	81	fn test_unicode_multiple_codepoints() {
	82	let valid = ["नी", "👨‍👨‍"];
	83	for c in &valid {
	84	assert_valid_str(c);
	85	}
	86	}
	87
	88	#[test]
	89	fn test_valid_ascii_escape() {
	90	let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
	91	for c in &valid {
	92	assert_valid_str(c);
	93	}
	94	}
	95
	96	#[test]
	97	fn test_invalid_ascii_escape() {
	98	let invalid = [r"\a", r"\?", r"\"];
	99	for c in &invalid {
	100	assert_invalid_str(c);
	101	}
	102	}
	103
	104	#[test]
	105	fn test_valid_ascii_code_escape() {
	106	let valid = [r"\x00", r"\x7F", r"\x55"];
	107	for c in &valid {
	108	assert_valid_str(c);
	109	}
	110	}
	111
	112	#[test]
	113	fn test_invalid_ascii_code_escape() {
	114	let invalid = [r"\x", r"\x7", r"\xF0"];
	115	for c in &invalid {
	116	assert_invalid_str(c);
	117	}
	118	}
	119
	120	#[test]
	121	fn test_valid_unicode_escape() {
	122	let valid = [
	123	r"\u{FF}",
	124	r"\u{0}",
	125	r"\u{F}",
	126	r"\u{10FFFF}",
	127	r"\u{1_0__FF___FF_____}",
	128	];
	129	for c in &valid {
	130	assert_valid_str(c);
	131	}
	132	}
	133
	134	#[test]
	135	fn test_invalid_unicode_escape() {
	136	let invalid = [
	137	r"\u",
	138	r"\u{}",
	139	r"\u{",
	140	r"\u{FF",
	141	r"\u{FFFFFF}",
	142	r"\u{_F}",
	143	r"\u{00FFFFF}",
	144	r"\u{110000}",
	145	];
	146	for c in &invalid {
	147	assert_invalid_str(c);
	148	}
	149	}
	150
	151	#[test]
	152	fn test_mixed() {
	153	assert_valid_str(
	154	r"This is the tale of a string
	155	with a newline in between, some emoji (👨‍👨‍) here and there,
	156	unicode escapes like this: \u{1FFBB} and weird stuff like
	157	this ﷽",
	158	);
	159	}
	160
	161	#[test]
	162	fn test_ignore_newline() {
	163	assert_valid_str(
	164	"Hello \
	165	World",
	166	);
	167	}
	168	}