aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/validation/char.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_syntax/src/validation/char.rs')
-rw-r--r--crates/ra_syntax/src/validation/char.rs276
1 files changed, 276 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
new file mode 100644
index 000000000..4728c85e6
--- /dev/null
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -0,0 +1,276 @@
1//! Validation of char literals
2
3use std::u32;
4
5use arrayvec::ArrayString;
6
7use crate::{
8 ast::{self, AstNode},
9 string_lexing::{self, CharComponentKind},
10 TextRange,
11 yellow::{
12 SyntaxError,
13 SyntaxErrorKind::*,
14 },
15};
16
17pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
18 let literal_text = node.text();
19 let literal_range = node.syntax().range();
20 let mut components = string_lexing::parse_char_literal(literal_text);
21 let mut len = 0;
22 for component in &mut components {
23 len += 1;
24 let text = &literal_text[component.range];
25 let range = component.range + literal_range.start();
26 validate_char_component(text, component.kind, range, errors);
27 }
28
29 if !components.has_closing_quote {
30 errors.push(SyntaxError::new(UnclosedChar, literal_range));
31 }
32
33 if len == 0 {
34 errors.push(SyntaxError::new(EmptyChar, literal_range));
35 }
36
37 if len > 1 {
38 errors.push(SyntaxError::new(OverlongChar, literal_range));
39 }
40}
41
42pub(super) fn validate_char_component(
43 text: &str,
44 kind: CharComponentKind,
45 range: TextRange,
46 errors: &mut Vec<SyntaxError>,
47) {
48 // Validate escapes
49 use self::CharComponentKind::*;
50 match kind {
51 AsciiEscape => validate_ascii_escape(text, range, errors),
52 AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
53 UnicodeEscape => validate_unicode_escape(text, range, errors),
54 CodePoint => {
55 // These code points must always be escaped
56 if text == "\t" || text == "\r" || text == "\n" {
57 errors.push(SyntaxError::new(UnescapedCodepoint, range));
58 }
59 }
60 }
61}
62
63fn validate_ascii_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
64 if text.len() == 1 {
65 // Escape sequence consists only of leading `\`
66 errors.push(SyntaxError::new(EmptyAsciiEscape, range));
67 } else {
68 let escape_code = text.chars().skip(1).next().unwrap();
69 if !is_ascii_escape(escape_code) {
70 errors.push(SyntaxError::new(InvalidAsciiEscape, range));
71 }
72 }
73}
74
75pub(super) fn is_ascii_escape(code: char) -> bool {
76 match code {
77 '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
78 _ => false,
79 }
80}
81
82fn validate_ascii_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
83 // An AsciiCodeEscape has 4 chars, example: `\xDD`
84 if text.len() < 4 {
85 errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
86 } else {
87 assert!(
88 text.chars().count() == 4,
89 "AsciiCodeEscape cannot be longer than 4 chars"
90 );
91
92 match u8::from_str_radix(&text[2..], 16) {
93 Ok(code) if code < 128 => { /* Escape code is valid */ }
94 Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
95 Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
96 }
97 }
98}
99
100fn validate_unicode_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
101 assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
102
103 if text.len() == 2 {
104 // No starting `{`
105 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
106 return;
107 }
108
109 if text.len() == 3 {
110 // Only starting `{`
111 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
112 return;
113 }
114
115 let mut code = ArrayString::<[_; 6]>::new();
116 let mut closed = false;
117 for c in text[3..].chars() {
118 assert!(!closed, "no characters after escape is closed");
119
120 if c.is_digit(16) {
121 if code.len() == 6 {
122 errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
123 return;
124 }
125
126 code.push(c);
127 } else if c == '_' {
128 // Reject leading _
129 if code.len() == 0 {
130 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
131 return;
132 }
133 } else if c == '}' {
134 closed = true;
135 } else {
136 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
137 return;
138 }
139 }
140
141 if !closed {
142 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
143 }
144
145 if code.len() == 0 {
146 errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
147 return;
148 }
149
150 match u32::from_str_radix(&code, 16) {
151 Ok(code_u32) if code_u32 > 0x10FFFF => {
152 errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
153 }
154 Ok(_) => {
155 // Valid escape code
156 }
157 Err(_) => {
158 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
159 }
160 }
161}
162
163#[cfg(test)]
164mod test {
165 use crate::SourceFileNode;
166
167 fn build_file(literal: &str) -> SourceFileNode {
168 let src = format!("const C: char = '{}';", literal);
169 SourceFileNode::parse(&src)
170 }
171
172 fn assert_valid_char(literal: &str) {
173 let file = build_file(literal);
174 assert!(
175 file.errors().len() == 0,
176 "Errors for literal '{}': {:?}",
177 literal,
178 file.errors()
179 );
180 }
181
182 fn assert_invalid_char(literal: &str) {
183 let file = build_file(literal);
184 assert!(file.errors().len() > 0);
185 }
186
187 #[test]
188 fn test_ansi_codepoints() {
189 for byte in 0..=255u8 {
190 match byte {
191 b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
192 b'\'' | b'\\' => { /* Ignore character close and backslash */ }
193 _ => assert_valid_char(&(byte as char).to_string()),
194 }
195 }
196 }
197
198 #[test]
199 fn test_unicode_codepoints() {
200 let valid = ["Ƒ", "バ", "メ", "﷽"];
201 for c in &valid {
202 assert_valid_char(c);
203 }
204 }
205
206 #[test]
207 fn test_unicode_multiple_codepoints() {
208 let invalid = ["नी", "👨‍👨‍"];
209 for c in &invalid {
210 assert_invalid_char(c);
211 }
212 }
213
214 #[test]
215 fn test_valid_ascii_escape() {
216 let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
217 for c in &valid {
218 assert_valid_char(c);
219 }
220 }
221
222 #[test]
223 fn test_invalid_ascii_escape() {
224 let invalid = [r"\a", r"\?", r"\"];
225 for c in &invalid {
226 assert_invalid_char(c);
227 }
228 }
229
230 #[test]
231 fn test_valid_ascii_code_escape() {
232 let valid = [r"\x00", r"\x7F", r"\x55"];
233 for c in &valid {
234 assert_valid_char(c);
235 }
236 }
237
238 #[test]
239 fn test_invalid_ascii_code_escape() {
240 let invalid = [r"\x", r"\x7", r"\xF0"];
241 for c in &invalid {
242 assert_invalid_char(c);
243 }
244 }
245
246 #[test]
247 fn test_valid_unicode_escape() {
248 let valid = [
249 r"\u{FF}",
250 r"\u{0}",
251 r"\u{F}",
252 r"\u{10FFFF}",
253 r"\u{1_0__FF___FF_____}",
254 ];
255 for c in &valid {
256 assert_valid_char(c);
257 }
258 }
259
260 #[test]
261 fn test_invalid_unicode_escape() {
262 let invalid = [
263 r"\u",
264 r"\u{}",
265 r"\u{",
266 r"\u{FF",
267 r"\u{FFFFFF}",
268 r"\u{_F}",
269 r"\u{00FFFFF}",
270 r"\u{110000}",
271 ];
272 for c in &invalid {
273 assert_invalid_char(c);
274 }
275 }
276}