aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/validation/char.rs
diff options
context:
space:
mode:
authorAdolfo Ochagavía <[email protected]>2018-11-08 14:42:00 +0000
committerAdolfo Ochagavía <[email protected]>2018-11-09 13:52:17 +0000
commit3b4c02c19e4af645fd37e8bff774b05d546dc0b6 (patch)
tree42c40e9201adf64d1c06bc1c69524f5688ee6e9f /crates/ra_syntax/src/validation/char.rs
parent5a9150df9bcdaf5faed5b500c22333f1f7c99f32 (diff)
Validate string literals
Diffstat (limited to 'crates/ra_syntax/src/validation/char.rs')
-rw-r--r--crates/ra_syntax/src/validation/char.rs270
1 files changed, 270 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
new file mode 100644
index 000000000..63f9bad24
--- /dev/null
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -0,0 +1,270 @@
1use std::u32;
2
3use arrayvec::ArrayString;
4
5use crate::{
6 ast::{self, AstNode},
7 string_lexing::{self, CharComponentKind},
8 TextRange,
9 yellow::{
10 SyntaxError,
11 SyntaxErrorKind::*,
12 },
13};
14
15pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
16 let literal_text = node.text();
17 let literal_range = node.syntax().range();
18 let mut components = string_lexing::parse_char_literal(literal_text);
19 let mut len = 0;
20 for component in &mut components {
21 len += 1;
22 let text = &literal_text[component.range];
23 let range = component.range + literal_range.start();
24 validate_char_component(text, component.kind, range, errors);
25 }
26
27 if !components.has_closing_quote {
28 errors.push(SyntaxError::new(UnclosedChar, literal_range));
29 }
30
31 if len == 0 {
32 errors.push(SyntaxError::new(EmptyChar, literal_range));
33 }
34
35 if len > 1 {
36 errors.push(SyntaxError::new(OverlongChar, literal_range));
37 }
38}
39
40pub(crate) fn validate_char_component(
41 text: &str,
42 kind: CharComponentKind,
43 range: TextRange,
44 errors: &mut Vec<SyntaxError>,
45) {
46 // Validate escapes
47 use self::CharComponentKind::*;
48 match kind {
49 AsciiEscape => {
50 if text.len() == 1 {
51 // Escape sequence consists only of leading `\`
52 errors.push(SyntaxError::new(EmptyAsciiEscape, range));
53 } else {
54 let escape_code = text.chars().skip(1).next().unwrap();
55 if !is_ascii_escape(escape_code) {
56 errors.push(SyntaxError::new(InvalidAsciiEscape, range));
57 }
58 }
59 }
60 AsciiCodeEscape => {
61 // An AsciiCodeEscape has 4 chars, example: `\xDD`
62 if text.len() < 4 {
63 errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
64 } else {
65 assert!(
66 text.chars().count() == 4,
67 "AsciiCodeEscape cannot be longer than 4 chars"
68 );
69
70 match u8::from_str_radix(&text[2..], 16) {
71 Ok(code) if code < 128 => { /* Escape code is valid */ }
72 Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
73 Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
74 }
75 }
76 }
77 UnicodeEscape => {
78 assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
79
80 if text.len() == 2 {
81 // No starting `{`
82 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
83 return;
84 }
85
86 if text.len() == 3 {
87 // Only starting `{`
88 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
89 return;
90 }
91
92 let mut code = ArrayString::<[_; 6]>::new();
93 let mut closed = false;
94 for c in text[3..].chars() {
95 assert!(!closed, "no characters after escape is closed");
96
97 if c.is_digit(16) {
98 if code.len() == 6 {
99 errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
100 return;
101 }
102
103 code.push(c);
104 } else if c == '_' {
105 // Reject leading _
106 if code.len() == 0 {
107 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
108 return;
109 }
110 } else if c == '}' {
111 closed = true;
112 } else {
113 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
114 return;
115 }
116 }
117
118 if !closed {
119 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
120 }
121
122 if code.len() == 0 {
123 errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
124 return;
125 }
126
127 match u32::from_str_radix(&code, 16) {
128 Ok(code_u32) if code_u32 > 0x10FFFF => {
129 errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
130 }
131 Ok(_) => {
132 // Valid escape code
133 }
134 Err(_) => {
135 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
136 }
137 }
138 }
139 CodePoint => {
140 // These code points must always be escaped
141 if text == "\t" || text == "\r" {
142 errors.push(SyntaxError::new(UnescapedCodepoint, range));
143 }
144 }
145 }
146}
147
148fn is_ascii_escape(code: char) -> bool {
149 match code {
150 '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
151 _ => false,
152 }
153}
154
155#[cfg(test)]
156mod test {
157 use crate::SourceFileNode;
158
159 fn build_file(literal: &str) -> SourceFileNode {
160 let src = format!("const C: char = '{}';", literal);
161 SourceFileNode::parse(&src)
162 }
163
164 fn assert_valid_char(literal: &str) {
165 let file = build_file(literal);
166 assert!(
167 file.errors().len() == 0,
168 "Errors for literal '{}': {:?}",
169 literal,
170 file.errors()
171 );
172 }
173
174 fn assert_invalid_char(literal: &str) {
175 let file = build_file(literal);
176 assert!(file.errors().len() > 0);
177 }
178
179 #[test]
180 fn test_ansi_codepoints() {
181 for byte in 0..=255u8 {
182 match byte {
183 b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
184 b'\'' | b'\\' => { /* Ignore character close and backslash */ }
185 _ => assert_valid_char(&(byte as char).to_string()),
186 }
187 }
188 }
189
190 #[test]
191 fn test_unicode_codepoints() {
192 let valid = ["Ƒ", "バ", "メ", "﷽"];
193 for c in &valid {
194 assert_valid_char(c);
195 }
196 }
197
198 #[test]
199 fn test_unicode_multiple_codepoints() {
200 let invalid = ["नी", "👨‍👨‍"];
201 for c in &invalid {
202 assert_invalid_char(c);
203 }
204 }
205
206 #[test]
207 fn test_valid_ascii_escape() {
208 let valid = [
209 r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b",
210 ];
211 for c in &valid {
212 assert_valid_char(c);
213 }
214 }
215
216 #[test]
217 fn test_invalid_ascii_escape() {
218 let invalid = [r"\a", r"\?", r"\"];
219 for c in &invalid {
220 assert_invalid_char(c);
221 }
222 }
223
224 #[test]
225 fn test_valid_ascii_code_escape() {
226 let valid = [r"\x00", r"\x7F", r"\x55"];
227 for c in &valid {
228 assert_valid_char(c);
229 }
230 }
231
232 #[test]
233 fn test_invalid_ascii_code_escape() {
234 let invalid = [r"\x", r"\x7", r"\xF0"];
235 for c in &invalid {
236 assert_invalid_char(c);
237 }
238 }
239
240 #[test]
241 fn test_valid_unicode_escape() {
242 let valid = [
243 r"\u{FF}",
244 r"\u{0}",
245 r"\u{F}",
246 r"\u{10FFFF}",
247 r"\u{1_0__FF___FF_____}",
248 ];
249 for c in &valid {
250 assert_valid_char(c);
251 }
252 }
253
254 #[test]
255 fn test_invalid_unicode_escape() {
256 let invalid = [
257 r"\u",
258 r"\u{}",
259 r"\u{",
260 r"\u{FF",
261 r"\u{FFFFFF}",
262 r"\u{_F}",
263 r"\u{00FFFFF}",
264 r"\u{110000}",
265 ];
266 for c in &invalid {
267 assert_invalid_char(c);
268 }
269 }
270}