Validate string literals

author: Adolfo Ochagavía <[email protected]> 2018-11-08 14:42:00 +0000
committer: Adolfo Ochagavía <[email protected]> 2018-11-09 13:52:17 +0000
commit: 3b4c02c19e4af645fd37e8bff774b05d546dc0b6 (patch)
tree: 42c40e9201adf64d1c06bc1c69524f5688ee6e9f /crates/ra_syntax/src/validation.rs
parent: 5a9150df9bcdaf5faed5b500c22333f1f7c99f32 (diff)
1 files changed, 0 insertions, 271 deletions
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs
deleted file mode 100644
index a10b297c0..000000000
--- a/crates/ra_syntax/src/validation.rs
+++ /dev/null
@@ -1,271 +0,0 @@
-use std::u32;
-use arrayvec::ArrayString;
-use crate::{
-    algo::visit::{visitor_ctx, VisitorCtx},
-    ast::{self, AstNode},
-    SourceFileNode,
-    string_lexing::{self, CharComponentKind},
-    yellow::{
-        SyntaxError,
-        SyntaxErrorKind::*,
-    },
-};
-pub(crate) fn validate(file: &SourceFileNode) -> Vec<SyntaxError> {
-    let mut errors = Vec::new();
-    for node in file.syntax().descendants() {
-        let _ = visitor_ctx(&mut errors)
-            .visit::<ast::Char, _>(validate_char)
-            .accept(node);
-    }
-    errors
-}
-fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
-    let mut components = string_lexing::parse_char_literal(node.text());
-    let mut len = 0;
-    for component in &mut components {
-        len += 1;
-        // Validate escapes
-        let text = &node.text()[component.range];
-        let range = component.range + node.syntax().range().start();
-        use self::CharComponentKind::*;
-        match component.kind {
-            AsciiEscape => {
-                if text.len() == 1 {
-                    // Escape sequence consists only of leading `\`
-                    errors.push(SyntaxError::new(EmptyAsciiEscape, range));
-                } else {
-                    let escape_code = text.chars().skip(1).next().unwrap();
-                    if !is_ascii_escape(escape_code) {
-                        errors.push(SyntaxError::new(InvalidAsciiEscape, range));
-                    }
-                }
-            }
-            AsciiCodeEscape => {
-                // An AsciiCodeEscape has 4 chars, example: `\xDD`
-                if text.len() < 4 {
-                    errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
-                } else {
-                    assert!(
-                        text.chars().count() == 4,
-                        "AsciiCodeEscape cannot be longer than 4 chars"
-                    );
-                    match u8::from_str_radix(&text[2..], 16) {
-                        Ok(code) if code < 128 => { /* Escape code is valid */ }
-                        Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
-                        Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
-                    }
-                }
-            }
-            UnicodeEscape => {
-                assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
-                if text.len() == 2 {
-                    // No starting `{`
-                    errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                    return;
-                }
-                if text.len() == 3 {
-                    // Only starting `{`
-                    errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
-                    return;
-                }
-                let mut code = ArrayString::<[_; 6]>::new();
-                let mut closed = false;
-                for c in text[3..].chars() {
-                    assert!(!closed, "no characters after escape is closed");
-                    if c.is_digit(16) {
-                        if code.len() == 6 {
-                            errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
-                            return;
-                        }
-                        code.push(c);
-                    } else if c == '_' {
-                        // Reject leading _
-                        if code.len() == 0 {
-                            errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                            return;
-                        }
-                    } else if c == '}' {
-                        closed = true;
-                    } else {
-                        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                        return;
-                    }
-                }
-                if !closed {
-                    errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
-                }
-                if code.len() == 0 {
-                    errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
-                    return;
-                }
-                match u32::from_str_radix(&code, 16) {
-                    Ok(code_u32) if code_u32 > 0x10FFFF => {
-                        errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
-                    }
-                    Ok(_) => {
-                        // Valid escape code
-                    }
-                    Err(_) => {
-                        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                    }
-                }
-            }
-            CodePoint => {
-                // These code points must always be escaped
-                if text == "\t" || text == "\r" {
-                    errors.push(SyntaxError::new(UnescapedCodepoint, range));
-                }
-            }
-        }
-    }
-    if !components.has_closing_quote {
-        errors.push(SyntaxError::new(UnclosedChar, node.syntax().range()));
-    }
-    if len == 0 {
-        errors.push(SyntaxError::new(EmptyChar, node.syntax().range()));
-    }
-    if len > 1 {
-        errors.push(SyntaxError::new(LongChar, node.syntax().range()));
-    }
-}
-fn is_ascii_escape(code: char) -> bool {
-    match code {
-        '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
-        _ => false,
-    }
-}
-#[cfg(test)]
-mod test {
-    use crate::SourceFileNode;
-    fn build_file(literal: &str) -> SourceFileNode {
-        let src = format!("const C: char = '{}';", literal);
-        SourceFileNode::parse(&src)
-    }
-    fn assert_valid_char(literal: &str) {
-        let file = build_file(literal);
-        assert!(
-            file.errors().len() == 0,
-            "Errors for literal '{}': {:?}",
-            literal,
-            file.errors()
-        );
-    }
-    fn assert_invalid_char(literal: &str) {
-        let file = build_file(literal);
-        assert!(file.errors().len() > 0);
-    }
-    #[test]
-    fn test_ansi_codepoints() {
-        for byte in 0..=255u8 {
-            match byte {
-                b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
-                b'\'' | b'\\' => { /* Ignore character close and backslash */ }
-                _ => assert_valid_char(&(byte as char).to_string()),
-            }
-        }
-    }
-    #[test]
-    fn test_unicode_codepoints() {
-        let valid = ["Ƒ", "バ", "メ", "﷽"];
-        for c in &valid {
-            assert_valid_char(c);
-        }
-    }
-    #[test]
-    fn test_unicode_multiple_codepoints() {
-        let invalid = ["नी", "👨‍👨‍"];
-        for c in &invalid {
-            assert_invalid_char(c);
-        }
-    }
-    #[test]
-    fn test_valid_ascii_escape() {
-        let valid = [
-            r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b",
-        ];
-        for c in &valid {
-            assert_valid_char(c);
-        }
-    }
-    #[test]
-    fn test_invalid_ascii_escape() {
-        let invalid = [r"\a", r"\?", r"\"];
-        for c in &invalid {
-            assert_invalid_char(c);
-        }
-    }
-    #[test]
-    fn test_valid_ascii_code_escape() {
-        let valid = [r"\x00", r"\x7F", r"\x55"];
-        for c in &valid {
-            assert_valid_char(c);
-        }
-    }
-    #[test]
-    fn test_invalid_ascii_code_escape() {
-        let invalid = [r"\x", r"\x7", r"\xF0"];
-        for c in &invalid {
-            assert_invalid_char(c);
-        }
-    }
-    #[test]
-    fn test_valid_unicode_escape() {
-        let valid = [
-            r"\u{FF}",
-            r"\u{0}",
-            r"\u{F}",
-            r"\u{10FFFF}",
-            r"\u{1_0__FF___FF_____}",
-        ];
-        for c in &valid {
-            assert_valid_char(c);
-        }
-    }
-    #[test]
-    fn test_invalid_unicode_escape() {
-        let invalid = [
-            r"\u",
-            r"\u{}",
-            r"\u{",
-            r"\u{FF",
-            r"\u{FFFFFF}",
-            r"\u{_F}",
-            r"\u{00FFFFF}",
-            r"\u{110000}",
-        ];
-        for c in &invalid {
-            assert_invalid_char(c);
-        }
-    }
-}
author	Adolfo Ochagavía <[email protected]>	2018-11-08 14:42:00 +0000
committer	Adolfo Ochagavía <[email protected]>	2018-11-09 13:52:17 +0000
commit	3b4c02c19e4af645fd37e8bff774b05d546dc0b6 (patch)
tree	42c40e9201adf64d1c06bc1c69524f5688ee6e9f /crates/ra_syntax/src/validation.rs
parent	5a9150df9bcdaf5faed5b500c22333f1f7c99f32 (diff)

diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs deleted file mode 100644 index a10b297c0..000000000 --- a/crates/ra_syntax/src/validation.rs +++ /dev/null
@@ -1,271 +0,0 @@
1	use std::u32;
2
3	use arrayvec::ArrayString;
4
5	use crate::{
6	algo::visit::{visitor_ctx, VisitorCtx},
7	ast::{self, AstNode},
8	SourceFileNode,
9	string_lexing::{self, CharComponentKind},
10	yellow::{
11	SyntaxError,
12	SyntaxErrorKind::*,
13	},
14	};
15
16	pub(crate) fn validate(file: &SourceFileNode) -> Vec<SyntaxError> {
17	let mut errors = Vec::new();
18	for node in file.syntax().descendants() {
19	let _ = visitor_ctx(&mut errors)
20	.visit::<ast::Char, _>(validate_char)
21	.accept(node);
22	}
23	errors
24	}
25
26	fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
27	let mut components = string_lexing::parse_char_literal(node.text());
28	let mut len = 0;
29	for component in &mut components {
30	len += 1;
31
32	// Validate escapes
33	let text = &node.text()[component.range];
34	let range = component.range + node.syntax().range().start();
35	use self::CharComponentKind::*;
36	match component.kind {
37	AsciiEscape => {
38	if text.len() == 1 {
39	// Escape sequence consists only of leading `\`
40	errors.push(SyntaxError::new(EmptyAsciiEscape, range));
41	} else {
42	let escape_code = text.chars().skip(1).next().unwrap();
43	if !is_ascii_escape(escape_code) {
44	errors.push(SyntaxError::new(InvalidAsciiEscape, range));
45	}
46	}
47	}
48	AsciiCodeEscape => {
49	// An AsciiCodeEscape has 4 chars, example: `\xDD`
50	if text.len() < 4 {
51	errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
52	} else {
53	assert!(
54	text.chars().count() == 4,
55	"AsciiCodeEscape cannot be longer than 4 chars"
56	);
57
58	match u8::from_str_radix(&text[2..], 16) {
59	Ok(code) if code < 128 => { /* Escape code is valid */ }
60	Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
61	Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
62	}
63	}
64	}
65	UnicodeEscape => {
66	assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
67
68	if text.len() == 2 {
69	// No starting `{`
70	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
71	return;
72	}
73
74	if text.len() == 3 {
75	// Only starting `{`
76	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
77	return;
78	}
79
80	let mut code = ArrayString::<[_; 6]>::new();
81	let mut closed = false;
82	for c in text[3..].chars() {
83	assert!(!closed, "no characters after escape is closed");
84
85	if c.is_digit(16) {
86	if code.len() == 6 {
87	errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
88	return;
89	}
90
91	code.push(c);
92	} else if c == '_' {
93	// Reject leading _
94	if code.len() == 0 {
95	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
96	return;
97	}
98	} else if c == '}' {
99	closed = true;
100	} else {
101	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
102	return;
103	}
104	}
105
106	if !closed {
107	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
108	}
109
110	if code.len() == 0 {
111	errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
112	return;
113	}
114
115	match u32::from_str_radix(&code, 16) {
116	Ok(code_u32) if code_u32 > 0x10FFFF => {
117	errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
118	}
119	Ok(_) => {
120	// Valid escape code
121	}
122	Err(_) => {
123	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
124	}
125	}
126	}
127	CodePoint => {
128	// These code points must always be escaped
129	if text == "\t" \|\| text == "\r" {
130	errors.push(SyntaxError::new(UnescapedCodepoint, range));
131	}
132	}
133	}
134	}
135
136	if !components.has_closing_quote {
137	errors.push(SyntaxError::new(UnclosedChar, node.syntax().range()));
138	}
139
140	if len == 0 {
141	errors.push(SyntaxError::new(EmptyChar, node.syntax().range()));
142	}
143
144	if len > 1 {
145	errors.push(SyntaxError::new(LongChar, node.syntax().range()));
146	}
147	}
148
149	fn is_ascii_escape(code: char) -> bool {
150	match code {
151	'\\' \| '\'' \| '"' \| 'n' \| 'r' \| 't' \| '0' => true,
152	_ => false,
153	}
154	}
155
156	#[cfg(test)]
157	mod test {
158	use crate::SourceFileNode;
159
160	fn build_file(literal: &str) -> SourceFileNode {
161	let src = format!("const C: char = '{}';", literal);
162	SourceFileNode::parse(&src)
163	}
164
165	fn assert_valid_char(literal: &str) {
166	let file = build_file(literal);
167	assert!(
168	file.errors().len() == 0,
169	"Errors for literal '{}': {:?}",
170	literal,
171	file.errors()
172	);
173	}
174
175	fn assert_invalid_char(literal: &str) {
176	let file = build_file(literal);
177	assert!(file.errors().len() > 0);
178	}
179
180	#[test]
181	fn test_ansi_codepoints() {
182	for byte in 0..=255u8 {
183	match byte {
184	b'\n' \| b'\r' \| b'\t' => assert_invalid_char(&(byte as char).to_string()),
185	b'\'' \| b'\\' => { /* Ignore character close and backslash */ }
186	_ => assert_valid_char(&(byte as char).to_string()),
187	}
188	}
189	}
190
191	#[test]
192	fn test_unicode_codepoints() {
193	let valid = ["Ƒ", "バ", "メ", "﷽"];
194	for c in &valid {
195	assert_valid_char(c);
196	}
197	}
198
199	#[test]
200	fn test_unicode_multiple_codepoints() {
201	let invalid = ["नी", "👨‍👨‍"];
202	for c in &invalid {
203	assert_invalid_char(c);
204	}
205	}
206
207	#[test]
208	fn test_valid_ascii_escape() {
209	let valid = [
210	r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b",
211	];
212	for c in &valid {
213	assert_valid_char(c);
214	}
215	}
216
217	#[test]
218	fn test_invalid_ascii_escape() {
219	let invalid = [r"\a", r"\?", r"\"];
220	for c in &invalid {
221	assert_invalid_char(c);
222	}
223	}
224
225	#[test]
226	fn test_valid_ascii_code_escape() {
227	let valid = [r"\x00", r"\x7F", r"\x55"];
228	for c in &valid {
229	assert_valid_char(c);
230	}
231	}
232
233	#[test]
234	fn test_invalid_ascii_code_escape() {
235	let invalid = [r"\x", r"\x7", r"\xF0"];
236	for c in &invalid {
237	assert_invalid_char(c);
238	}
239	}
240
241	#[test]
242	fn test_valid_unicode_escape() {
243	let valid = [
244	r"\u{FF}",
245	r"\u{0}",
246	r"\u{F}",
247	r"\u{10FFFF}",
248	r"\u{1_0__FF___FF_____}",
249	];
250	for c in &valid {
251	assert_valid_char(c);
252	}
253	}
254
255	#[test]
256	fn test_invalid_unicode_escape() {
257	let invalid = [
258	r"\u",
259	r"\u{}",
260	r"\u{",
261	r"\u{FF",
262	r"\u{FFFFFF}",
263	r"\u{_F}",
264	r"\u{00FFFFF}",
265	r"\u{110000}",
266	];
267	for c in &invalid {
268	assert_invalid_char(c);
269	}
270	}
271	}