1 files changed, 168 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs
new file mode 100644
index 000000000..089879d15
--- /dev/null
+++ b/crates/ra_syntax/src/validation/string.rs
@@ -0,0 +1,168 @@
+use crate::{
+    ast::{self, AstNode},
+    string_lexing::{self, StringComponentKind},
+    yellow::{
+        SyntaxError,
+        SyntaxErrorKind::*,
+    },
+};
+use super::char;
+pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxError>) {
+    let literal_text = node.text();
+    let literal_range = node.syntax().range();
+    let mut components = string_lexing::parse_string_literal(literal_text);
+    for component in &mut components {
+        let range = component.range + literal_range.start();
+        match component.kind {
+            StringComponentKind::Char(kind) => {
+                // Chars must escape \t, \n and \r codepoints, but strings don't
+                let text = &literal_text[component.range];
+                match text {
+                    "\t" | "\n" | "\r" => { /* always valid */ }
+                    _ => char::validate_char_component(text, kind, range, errors),
+                }
+            }
+            StringComponentKind::IgnoreNewline => { /* always valid */ }
+        }
+    }
+    if !components.has_closing_quote {
+        errors.push(SyntaxError::new(UnclosedString, literal_range));
+    }
+}
+#[cfg(test)]
+mod test {
+    use crate::SourceFileNode;
+    fn build_file(literal: &str) -> SourceFileNode {
+        let src = format!(r#"const S: &'static str = "{}";"#, literal);
+        println!("Source: {}", src);
+        SourceFileNode::parse(&src)
+    }
+    fn assert_valid_str(literal: &str) {
+        let file = build_file(literal);
+        assert!(
+            file.errors().len() == 0,
+            "Errors for literal '{}': {:?}",
+            literal,
+            file.errors()
+        );
+    }
+    fn assert_invalid_str(literal: &str) {
+        let file = build_file(literal);
+        assert!(file.errors().len() > 0);
+    }
+    #[test]
+    fn test_ansi_codepoints() {
+        for byte in 0..=255u8 {
+            match byte {
+                b'\"' | b'\\' => { /* Ignore string close and backslash */ }
+                _ => assert_valid_str(&(byte as char).to_string()),
+            }
+        }
+    }
+    #[test]
+    fn test_unicode_codepoints() {
+        let valid = ["Ƒ", "バ", "メ", "﷽"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_unicode_multiple_codepoints() {
+        let valid = ["नी", "👨‍👨‍"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_escape() {
+        let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_escape() {
+        let invalid = [r"\a", r"\?", r"\"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_code_escape() {
+        let valid = [r"\x00", r"\x7F", r"\x55"];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_code_escape() {
+        let invalid = [r"\x", r"\x7", r"\xF0"];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_valid_unicode_escape() {
+        let valid = [
+            r"\u{FF}",
+            r"\u{0}",
+            r"\u{F}",
+            r"\u{10FFFF}",
+            r"\u{1_0__FF___FF_____}",
+        ];
+        for c in &valid {
+            assert_valid_str(c);
+        }
+    }
+    #[test]
+    fn test_invalid_unicode_escape() {
+        let invalid = [
+            r"\u",
+            r"\u{}",
+            r"\u{",
+            r"\u{FF",
+            r"\u{FFFFFF}",
+            r"\u{_F}",
+            r"\u{00FFFFF}",
+            r"\u{110000}",
+        ];
+        for c in &invalid {
+            assert_invalid_str(c);
+        }
+    }
+    #[test]
+    fn test_mixed() {
+        assert_valid_str(
+            r"This is the tale of a string
+with a newline in between, some emoji (👨‍👨‍) here and there,
+unicode escapes like this: \u{1FFBB} and weird stuff like
+this ﷽",
+        );
+    }
+    #[test]
+    fn test_ignore_newline() {
+        assert_valid_str(
+            "Hello \
+             World",
+        );
+    }
+}

diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs new file mode 100644 index 000000000..089879d15 --- /dev/null +++ b/crates/ra_syntax/src/validation/string.rs
@@ -0,0 +1,168 @@
	1	use crate::{
	2	ast::{self, AstNode},
	3	string_lexing::{self, StringComponentKind},
	4	yellow::{
	5	SyntaxError,
	6	SyntaxErrorKind::*,
	7	},
	8	};
	9
	10	use super::char;
	11
	12	pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxError>) {
	13	let literal_text = node.text();
	14	let literal_range = node.syntax().range();
	15	let mut components = string_lexing::parse_string_literal(literal_text);
	16	for component in &mut components {
	17	let range = component.range + literal_range.start();
	18
	19	match component.kind {
	20	StringComponentKind::Char(kind) => {
	21	// Chars must escape \t, \n and \r codepoints, but strings don't
	22	let text = &literal_text[component.range];
	23	match text {
	24	"\t" \| "\n" \| "\r" => { /* always valid */ }
	25	_ => char::validate_char_component(text, kind, range, errors),
	26	}
	27	}
	28	StringComponentKind::IgnoreNewline => { /* always valid */ }
	29	}
	30	}
	31
	32	if !components.has_closing_quote {
	33	errors.push(SyntaxError::new(UnclosedString, literal_range));
	34	}
	35	}
	36
	37	#[cfg(test)]
	38	mod test {
	39	use crate::SourceFileNode;
	40
	41	fn build_file(literal: &str) -> SourceFileNode {
	42	let src = format!(r#"const S: &'static str = "{}";"#, literal);
	43	println!("Source: {}", src);
	44	SourceFileNode::parse(&src)
	45	}
	46
	47	fn assert_valid_str(literal: &str) {
	48	let file = build_file(literal);
	49	assert!(
	50	file.errors().len() == 0,
	51	"Errors for literal '{}': {:?}",
	52	literal,
	53	file.errors()
	54	);
	55	}
	56
	57	fn assert_invalid_str(literal: &str) {
	58	let file = build_file(literal);
	59	assert!(file.errors().len() > 0);
	60	}
	61
	62	#[test]
	63	fn test_ansi_codepoints() {
	64	for byte in 0..=255u8 {
	65	match byte {
	66	b'\"' \| b'\\' => { /* Ignore string close and backslash */ }
	67	_ => assert_valid_str(&(byte as char).to_string()),
	68	}
	69	}
	70	}
	71
	72	#[test]
	73	fn test_unicode_codepoints() {
	74	let valid = ["Ƒ", "バ", "メ", "﷽"];
	75	for c in &valid {
	76	assert_valid_str(c);
	77	}
	78	}
	79
	80	#[test]
	81	fn test_unicode_multiple_codepoints() {
	82	let valid = ["नी", "👨‍👨‍"];
	83	for c in &valid {
	84	assert_valid_str(c);
	85	}
	86	}
	87
	88	#[test]
	89	fn test_valid_ascii_escape() {
	90	let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
	91	for c in &valid {
	92	assert_valid_str(c);
	93	}
	94	}
	95
	96	#[test]
	97	fn test_invalid_ascii_escape() {
	98	let invalid = [r"\a", r"\?", r"\"];
	99	for c in &invalid {
	100	assert_invalid_str(c);
	101	}
	102	}
	103
	104	#[test]
	105	fn test_valid_ascii_code_escape() {
	106	let valid = [r"\x00", r"\x7F", r"\x55"];
	107	for c in &valid {
	108	assert_valid_str(c);
	109	}
	110	}
	111
	112	#[test]
	113	fn test_invalid_ascii_code_escape() {
	114	let invalid = [r"\x", r"\x7", r"\xF0"];
	115	for c in &invalid {
	116	assert_invalid_str(c);
	117	}
	118	}
	119
	120	#[test]
	121	fn test_valid_unicode_escape() {
	122	let valid = [
	123	r"\u{FF}",
	124	r"\u{0}",
	125	r"\u{F}",
	126	r"\u{10FFFF}",
	127	r"\u{1_0__FF___FF_____}",
	128	];
	129	for c in &valid {
	130	assert_valid_str(c);
	131	}
	132	}
	133
	134	#[test]
	135	fn test_invalid_unicode_escape() {
	136	let invalid = [
	137	r"\u",
	138	r"\u{}",
	139	r"\u{",
	140	r"\u{FF",
	141	r"\u{FFFFFF}",
	142	r"\u{_F}",
	143	r"\u{00FFFFF}",
	144	r"\u{110000}",
	145	];
	146	for c in &invalid {
	147	assert_invalid_str(c);
	148	}
	149	}
	150
	151	#[test]
	152	fn test_mixed() {
	153	assert_valid_str(
	154	r"This is the tale of a string
	155	with a newline in between, some emoji (👨‍👨‍) here and there,
	156	unicode escapes like this: \u{1FFBB} and weird stuff like
	157	this ﷽",
	158	);
	159	}
	160
	161	#[test]
	162	fn test_ignore_newline() {
	163	assert_valid_str(
	164	"Hello \
	165	World",
	166	);
	167	}
	168	}