Merge #1253

1253: Share literal validation logic with compiler r=matklad a=matklad This is neat: the unescape module is literary what compiler is using right now: https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs So, yeah, code sharing via copy-paste! Co-authored-by: Aleksey Kladov <[email protected]>
author: bors[bot] <bors[bot]@users.noreply.github.com> 2019-05-07 17:43:10 +0100
committer: bors[bot] <bors[bot]@users.noreply.github.com> 2019-05-07 17:43:10 +0100
commit: d3efedb752bb2198796603d8a479a5e3ee472a97 (patch)
tree: ca6a4aee6ad4077a869a932a18c6c8d134406f8c /crates/ra_syntax/src/validation/byte_string.rs
parent: ef782adc293deb287128f005dbab2038ba3ccdc1 (diff)
parent: 313314e14b629ebf50389dbd2d440bda922f6ae7 (diff)
1 files changed, 0 insertions, 169 deletions
diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs
deleted file mode 100644
index 1d48c2d9b..000000000
--- a/crates/ra_syntax/src/validation/byte_string.rs
+++ /dev/null
@@ -1,169 +0,0 @@
-use crate::{
-    string_lexing::{self, StringComponentKind},
-    SyntaxError,
-    SyntaxErrorKind::*,
-    SyntaxToken,
-};
-use super::byte;
-pub(crate) fn validate_byte_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) {
-    let literal_text = node.text();
-    let literal_range = node.range();
-    let mut components = string_lexing::parse_quoted_literal(Some('b'), '"', literal_text);
-    for component in &mut components {
-        let range = component.range + literal_range.start();
-        match component.kind {
-            StringComponentKind::IgnoreNewline => { /* always valid */ }
-            _ => {
-                // Chars must escape \t, \n and \r codepoints, but strings don't
-                let text = &literal_text[component.range];
-                match text {
-                    "\t" | "\n" | "\r" => { /* always valid */ }
-                    _ => byte::validate_byte_component(text, component.kind, range, errors),
-                }
-            }
-        }
-    }
-    if !components.has_closing_quote {
-        errors.push(SyntaxError::new(UnclosedString, literal_range));
-    }
-    if let Some(range) = components.suffix {
-        errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start()));
-    }
-}
-#[cfg(test)]
-mod test {
-    use crate::{SourceFile, TreeArc};
-    fn build_file(literal: &str) -> TreeArc<SourceFile> {
-        let src = format!(r#"const S: &'static [u8] = b"{}";"#, literal);
-        println!("Source: {}", src);
-        SourceFile::parse(&src)
-    }
-    fn assert_valid_str(literal: &str) {
-        let file = build_file(literal);
-        assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
-    }
-    fn assert_invalid_str(literal: &str) {
-        let file = build_file(literal);
-        assert!(file.errors().len() > 0);
-    }
-    #[test]
-    fn test_ansi_codepoints() {
-        for byte in 0..128 {
-            match byte {
-                b'\"' | b'\\' => { /* Ignore string close and backslash */ }
-                _ => assert_valid_str(&(byte as char).to_string()),
-            }
-        }
-        for byte in 128..=255u8 {
-            assert_invalid_str(&(byte as char).to_string());
-        }
-    }
-    #[test]
-    fn test_unicode_codepoints() {
-        let invalid = ["Ƒ", "バ", "メ", "﷽"];
-        for c in &invalid {
-            assert_invalid_str(c);
-        }
-    }
-    #[test]
-    fn test_unicode_multiple_codepoints() {
-        let invalid = ["नी", "👨‍👨‍"];
-        for c in &invalid {
-            assert_invalid_str(c);
-        }
-    }
-    #[test]
-    fn test_valid_ascii_escape() {
-        let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
-        for c in &valid {
-            assert_valid_str(c);
-        }
-    }
-    #[test]
-    fn test_invalid_ascii_escape() {
-        let invalid = [r"\a", r"\?", r"\"];
-        for c in &invalid {
-            assert_invalid_str(c);
-        }
-    }
-    #[test]
-    fn test_valid_ascii_code_escape() {
-        let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
-        for c in &valid {
-            assert_valid_str(c);
-        }
-    }
-    #[test]
-    fn test_invalid_ascii_code_escape() {
-        let invalid = [r"\x", r"\x7"];
-        for c in &invalid {
-            assert_invalid_str(c);
-        }
-    }
-    #[test]
-    fn test_invalid_unicode_escape() {
-        let well_formed = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"];
-        for c in &well_formed {
-            assert_invalid_str(c);
-        }
-        let invalid = [
-            r"\u",
-            r"\u{}",
-            r"\u{",
-            r"\u{FF",
-            r"\u{FFFFFF}",
-            r"\u{_F}",
-            r"\u{00FFFFF}",
-            r"\u{110000}",
-        ];
-        for c in &invalid {
-            assert_invalid_str(c);
-        }
-    }
-    #[test]
-    fn test_mixed_invalid() {
-        assert_invalid_str(
-            r"This is the tale of a string
-with a newline in between, some emoji (👨‍👨‍) here and there,
-unicode escapes like this: \u{1FFBB} and weird stuff like
-this ﷽",
-        );
-    }
-    #[test]
-    fn test_mixed_valid() {
-        assert_valid_str(
-            r"This is the tale of a string
-with a newline in between, no emoji at all,
-nor unicode escapes or weird stuff",
-        );
-    }
-    #[test]
-    fn test_ignore_newline() {
-        assert_valid_str(
-            "Hello \
-             World",
-        );
-    }
-}
author	bors[bot] <bors[bot]@users.noreply.github.com>	2019-05-07 17:43:10 +0100
committer	bors[bot] <bors[bot]@users.noreply.github.com>	2019-05-07 17:43:10 +0100
commit	d3efedb752bb2198796603d8a479a5e3ee472a97 (patch)
tree	ca6a4aee6ad4077a869a932a18c6c8d134406f8c /crates/ra_syntax/src/validation/byte_string.rs
parent	ef782adc293deb287128f005dbab2038ba3ccdc1 (diff)
parent	313314e14b629ebf50389dbd2d440bda922f6ae7 (diff)

diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs deleted file mode 100644 index 1d48c2d9b..000000000 --- a/crates/ra_syntax/src/validation/byte_string.rs +++ /dev/null
@@ -1,169 +0,0 @@
1	use crate::{
2	string_lexing::{self, StringComponentKind},
3	SyntaxError,
4	SyntaxErrorKind::*,
5	SyntaxToken,
6	};
7
8	use super::byte;
9
10	pub(crate) fn validate_byte_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) {
11	let literal_text = node.text();
12	let literal_range = node.range();
13	let mut components = string_lexing::parse_quoted_literal(Some('b'), '"', literal_text);
14	for component in &mut components {
15	let range = component.range + literal_range.start();
16
17	match component.kind {
18	StringComponentKind::IgnoreNewline => { /* always valid */ }
19	_ => {
20	// Chars must escape \t, \n and \r codepoints, but strings don't
21	let text = &literal_text[component.range];
22	match text {
23	"\t" \| "\n" \| "\r" => { /* always valid */ }
24	_ => byte::validate_byte_component(text, component.kind, range, errors),
25	}
26	}
27	}
28	}
29
30	if !components.has_closing_quote {
31	errors.push(SyntaxError::new(UnclosedString, literal_range));
32	}
33
34	if let Some(range) = components.suffix {
35	errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start()));
36	}
37	}
38
39	#[cfg(test)]
40	mod test {
41	use crate::{SourceFile, TreeArc};
42
43	fn build_file(literal: &str) -> TreeArc<SourceFile> {
44	let src = format!(r#"const S: &'static [u8] = b"{}";"#, literal);
45	println!("Source: {}", src);
46	SourceFile::parse(&src)
47	}
48
49	fn assert_valid_str(literal: &str) {
50	let file = build_file(literal);
51	assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
52	}
53
54	fn assert_invalid_str(literal: &str) {
55	let file = build_file(literal);
56	assert!(file.errors().len() > 0);
57	}
58
59	#[test]
60	fn test_ansi_codepoints() {
61	for byte in 0..128 {
62	match byte {
63	b'\"' \| b'\\' => { /* Ignore string close and backslash */ }
64	_ => assert_valid_str(&(byte as char).to_string()),
65	}
66	}
67
68	for byte in 128..=255u8 {
69	assert_invalid_str(&(byte as char).to_string());
70	}
71	}
72
73	#[test]
74	fn test_unicode_codepoints() {
75	let invalid = ["Ƒ", "バ", "メ", "﷽"];
76	for c in &invalid {
77	assert_invalid_str(c);
78	}
79	}
80
81	#[test]
82	fn test_unicode_multiple_codepoints() {
83	let invalid = ["नी", "👨‍👨‍"];
84	for c in &invalid {
85	assert_invalid_str(c);
86	}
87	}
88
89	#[test]
90	fn test_valid_ascii_escape() {
91	let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
92	for c in &valid {
93	assert_valid_str(c);
94	}
95	}
96
97	#[test]
98	fn test_invalid_ascii_escape() {
99	let invalid = [r"\a", r"\?", r"\"];
100	for c in &invalid {
101	assert_invalid_str(c);
102	}
103	}
104
105	#[test]
106	fn test_valid_ascii_code_escape() {
107	let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
108	for c in &valid {
109	assert_valid_str(c);
110	}
111	}
112
113	#[test]
114	fn test_invalid_ascii_code_escape() {
115	let invalid = [r"\x", r"\x7"];
116	for c in &invalid {
117	assert_invalid_str(c);
118	}
119	}
120
121	#[test]
122	fn test_invalid_unicode_escape() {
123	let well_formed = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"];
124	for c in &well_formed {
125	assert_invalid_str(c);
126	}
127
128	let invalid = [
129	r"\u",
130	r"\u{}",
131	r"\u{",
132	r"\u{FF",
133	r"\u{FFFFFF}",
134	r"\u{_F}",
135	r"\u{00FFFFF}",
136	r"\u{110000}",
137	];
138	for c in &invalid {
139	assert_invalid_str(c);
140	}
141	}
142
143	#[test]
144	fn test_mixed_invalid() {
145	assert_invalid_str(
146	r"This is the tale of a string
147	with a newline in between, some emoji (👨‍👨‍) here and there,
148	unicode escapes like this: \u{1FFBB} and weird stuff like
149	this ﷽",
150	);
151	}
152
153	#[test]
154	fn test_mixed_valid() {
155	assert_valid_str(
156	r"This is the tale of a string
157	with a newline in between, no emoji at all,
158	nor unicode escapes or weird stuff",
159	);
160	}
161
162	#[test]
163	fn test_ignore_newline() {
164	assert_valid_str(
165	"Hello \
166	World",
167	);
168	}
169	}