aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/validation.rs
diff options
context:
space:
mode:
authorAdolfo Ochagavía <[email protected]>2018-11-08 14:42:00 +0000
committerAdolfo Ochagavía <[email protected]>2018-11-09 13:52:17 +0000
commit3b4c02c19e4af645fd37e8bff774b05d546dc0b6 (patch)
tree42c40e9201adf64d1c06bc1c69524f5688ee6e9f /crates/ra_syntax/src/validation.rs
parent5a9150df9bcdaf5faed5b500c22333f1f7c99f32 (diff)
Validate string literals
Diffstat (limited to 'crates/ra_syntax/src/validation.rs')
-rw-r--r--crates/ra_syntax/src/validation.rs271
1 files changed, 0 insertions, 271 deletions
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs
deleted file mode 100644
index a10b297c0..000000000
--- a/crates/ra_syntax/src/validation.rs
+++ /dev/null
@@ -1,271 +0,0 @@
1use std::u32;
2
3use arrayvec::ArrayString;
4
5use crate::{
6 algo::visit::{visitor_ctx, VisitorCtx},
7 ast::{self, AstNode},
8 SourceFileNode,
9 string_lexing::{self, CharComponentKind},
10 yellow::{
11 SyntaxError,
12 SyntaxErrorKind::*,
13 },
14};
15
16pub(crate) fn validate(file: &SourceFileNode) -> Vec<SyntaxError> {
17 let mut errors = Vec::new();
18 for node in file.syntax().descendants() {
19 let _ = visitor_ctx(&mut errors)
20 .visit::<ast::Char, _>(validate_char)
21 .accept(node);
22 }
23 errors
24}
25
26fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
27 let mut components = string_lexing::parse_char_literal(node.text());
28 let mut len = 0;
29 for component in &mut components {
30 len += 1;
31
32 // Validate escapes
33 let text = &node.text()[component.range];
34 let range = component.range + node.syntax().range().start();
35 use self::CharComponentKind::*;
36 match component.kind {
37 AsciiEscape => {
38 if text.len() == 1 {
39 // Escape sequence consists only of leading `\`
40 errors.push(SyntaxError::new(EmptyAsciiEscape, range));
41 } else {
42 let escape_code = text.chars().skip(1).next().unwrap();
43 if !is_ascii_escape(escape_code) {
44 errors.push(SyntaxError::new(InvalidAsciiEscape, range));
45 }
46 }
47 }
48 AsciiCodeEscape => {
49 // An AsciiCodeEscape has 4 chars, example: `\xDD`
50 if text.len() < 4 {
51 errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
52 } else {
53 assert!(
54 text.chars().count() == 4,
55 "AsciiCodeEscape cannot be longer than 4 chars"
56 );
57
58 match u8::from_str_radix(&text[2..], 16) {
59 Ok(code) if code < 128 => { /* Escape code is valid */ }
60 Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
61 Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
62 }
63 }
64 }
65 UnicodeEscape => {
66 assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
67
68 if text.len() == 2 {
69 // No starting `{`
70 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
71 return;
72 }
73
74 if text.len() == 3 {
75 // Only starting `{`
76 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
77 return;
78 }
79
80 let mut code = ArrayString::<[_; 6]>::new();
81 let mut closed = false;
82 for c in text[3..].chars() {
83 assert!(!closed, "no characters after escape is closed");
84
85 if c.is_digit(16) {
86 if code.len() == 6 {
87 errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
88 return;
89 }
90
91 code.push(c);
92 } else if c == '_' {
93 // Reject leading _
94 if code.len() == 0 {
95 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
96 return;
97 }
98 } else if c == '}' {
99 closed = true;
100 } else {
101 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
102 return;
103 }
104 }
105
106 if !closed {
107 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
108 }
109
110 if code.len() == 0 {
111 errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
112 return;
113 }
114
115 match u32::from_str_radix(&code, 16) {
116 Ok(code_u32) if code_u32 > 0x10FFFF => {
117 errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
118 }
119 Ok(_) => {
120 // Valid escape code
121 }
122 Err(_) => {
123 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
124 }
125 }
126 }
127 CodePoint => {
128 // These code points must always be escaped
129 if text == "\t" || text == "\r" {
130 errors.push(SyntaxError::new(UnescapedCodepoint, range));
131 }
132 }
133 }
134 }
135
136 if !components.has_closing_quote {
137 errors.push(SyntaxError::new(UnclosedChar, node.syntax().range()));
138 }
139
140 if len == 0 {
141 errors.push(SyntaxError::new(EmptyChar, node.syntax().range()));
142 }
143
144 if len > 1 {
145 errors.push(SyntaxError::new(LongChar, node.syntax().range()));
146 }
147}
148
149fn is_ascii_escape(code: char) -> bool {
150 match code {
151 '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
152 _ => false,
153 }
154}
155
156#[cfg(test)]
157mod test {
158 use crate::SourceFileNode;
159
160 fn build_file(literal: &str) -> SourceFileNode {
161 let src = format!("const C: char = '{}';", literal);
162 SourceFileNode::parse(&src)
163 }
164
165 fn assert_valid_char(literal: &str) {
166 let file = build_file(literal);
167 assert!(
168 file.errors().len() == 0,
169 "Errors for literal '{}': {:?}",
170 literal,
171 file.errors()
172 );
173 }
174
175 fn assert_invalid_char(literal: &str) {
176 let file = build_file(literal);
177 assert!(file.errors().len() > 0);
178 }
179
180 #[test]
181 fn test_ansi_codepoints() {
182 for byte in 0..=255u8 {
183 match byte {
184 b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
185 b'\'' | b'\\' => { /* Ignore character close and backslash */ }
186 _ => assert_valid_char(&(byte as char).to_string()),
187 }
188 }
189 }
190
191 #[test]
192 fn test_unicode_codepoints() {
193 let valid = ["Ƒ", "バ", "メ", "﷽"];
194 for c in &valid {
195 assert_valid_char(c);
196 }
197 }
198
199 #[test]
200 fn test_unicode_multiple_codepoints() {
201 let invalid = ["नी", "👨‍👨‍"];
202 for c in &invalid {
203 assert_invalid_char(c);
204 }
205 }
206
207 #[test]
208 fn test_valid_ascii_escape() {
209 let valid = [
210 r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b",
211 ];
212 for c in &valid {
213 assert_valid_char(c);
214 }
215 }
216
217 #[test]
218 fn test_invalid_ascii_escape() {
219 let invalid = [r"\a", r"\?", r"\"];
220 for c in &invalid {
221 assert_invalid_char(c);
222 }
223 }
224
225 #[test]
226 fn test_valid_ascii_code_escape() {
227 let valid = [r"\x00", r"\x7F", r"\x55"];
228 for c in &valid {
229 assert_valid_char(c);
230 }
231 }
232
233 #[test]
234 fn test_invalid_ascii_code_escape() {
235 let invalid = [r"\x", r"\x7", r"\xF0"];
236 for c in &invalid {
237 assert_invalid_char(c);
238 }
239 }
240
241 #[test]
242 fn test_valid_unicode_escape() {
243 let valid = [
244 r"\u{FF}",
245 r"\u{0}",
246 r"\u{F}",
247 r"\u{10FFFF}",
248 r"\u{1_0__FF___FF_____}",
249 ];
250 for c in &valid {
251 assert_valid_char(c);
252 }
253 }
254
255 #[test]
256 fn test_invalid_unicode_escape() {
257 let invalid = [
258 r"\u",
259 r"\u{}",
260 r"\u{",
261 r"\u{FF",
262 r"\u{FFFFFF}",
263 r"\u{_F}",
264 r"\u{00FFFFF}",
265 r"\u{110000}",
266 ];
267 for c in &invalid {
268 assert_invalid_char(c);
269 }
270 }
271}