aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/validation/char.rs
diff options
context:
space:
mode:
authorbors[bot] <bors[bot]@users.noreply.github.com>2019-05-07 17:43:10 +0100
committerbors[bot] <bors[bot]@users.noreply.github.com>2019-05-07 17:43:10 +0100
commitd3efedb752bb2198796603d8a479a5e3ee472a97 (patch)
treeca6a4aee6ad4077a869a932a18c6c8d134406f8c /crates/ra_syntax/src/validation/char.rs
parentef782adc293deb287128f005dbab2038ba3ccdc1 (diff)
parent313314e14b629ebf50389dbd2d440bda922f6ae7 (diff)
Merge #1253
1253: Share literal validation logic with compiler r=matklad a=matklad This is neat: the unescape module is literary what compiler is using right now: https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs So, yeah, code sharing via copy-paste! Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax/src/validation/char.rs')
-rw-r--r--crates/ra_syntax/src/validation/char.rs273
1 files changed, 0 insertions, 273 deletions
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
deleted file mode 100644
index 0f1885873..000000000
--- a/crates/ra_syntax/src/validation/char.rs
+++ /dev/null
@@ -1,273 +0,0 @@
1//! Validation of char literals
2
3use std::u32;
4
5use arrayvec::ArrayString;
6
7use crate::{
8 string_lexing::{self, StringComponentKind},
9 TextRange,
10 SyntaxError,
11 SyntaxErrorKind::*,
12 SyntaxToken,
13};
14
15pub(super) fn validate_char_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) {
16 let literal_text = node.text();
17 let literal_range = node.range();
18 let mut components = string_lexing::parse_quoted_literal(None, '\'', literal_text);
19 let mut len = 0;
20 for component in &mut components {
21 len += 1;
22 let text = &literal_text[component.range];
23 let range = component.range + literal_range.start();
24 validate_char_component(text, component.kind, range, errors);
25 }
26
27 if !components.has_closing_quote {
28 errors.push(SyntaxError::new(UnclosedChar, literal_range));
29 }
30
31 if let Some(range) = components.suffix {
32 errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start()));
33 }
34
35 if len == 0 {
36 errors.push(SyntaxError::new(EmptyChar, literal_range));
37 }
38
39 if len > 1 {
40 errors.push(SyntaxError::new(OverlongChar, literal_range));
41 }
42}
43
44pub(super) fn validate_char_component(
45 text: &str,
46 kind: StringComponentKind,
47 range: TextRange,
48 errors: &mut Vec<SyntaxError>,
49) {
50 // Validate escapes
51 use self::StringComponentKind::*;
52 match kind {
53 AsciiEscape => validate_ascii_escape(text, range, errors),
54 AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
55 UnicodeEscape => validate_unicode_escape(text, range, errors),
56 CodePoint => {
57 // These code points must always be escaped
58 if text == "\t" || text == "\r" || text == "\n" {
59 errors.push(SyntaxError::new(UnescapedCodepoint, range));
60 }
61 }
62 StringComponentKind::IgnoreNewline => { /* always valid */ }
63 }
64}
65
66fn validate_ascii_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
67 if text.len() == 1 {
68 // Escape sequence consists only of leading `\` (only occurs at EOF, otherwise e.g. '\' is treated as an unclosed char containing a single quote `'`)
69 errors.push(SyntaxError::new(EmptyAsciiEscape, range));
70 } else {
71 let escape_code = text.chars().skip(1).next().unwrap();
72 if !is_ascii_escape(escape_code) {
73 errors.push(SyntaxError::new(InvalidAsciiEscape, range));
74 }
75 }
76}
77
78pub(super) fn is_ascii_escape(code: char) -> bool {
79 match code {
80 '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
81 _ => false,
82 }
83}
84
85fn validate_ascii_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
86 // An AsciiCodeEscape has 4 chars, example: `\xDD`
87 if !text.is_ascii() {
88 // FIXME: Give a more precise error message (say what the invalid character was)
89 errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range));
90 } else if text.chars().count() < 4 {
91 errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
92 } else {
93 assert_eq!(
94 text.chars().count(),
95 4,
96 "AsciiCodeEscape cannot be longer than 4 chars, but text '{}' is",
97 text,
98 );
99
100 match u8::from_str_radix(&text[2..], 16) {
101 Ok(code) if code < 128 => { /* Escape code is valid */ }
102 Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
103 Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
104 }
105 }
106}
107
108fn validate_unicode_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
109 assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
110
111 if text.len() == 2 {
112 // No starting `{`
113 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
114 return;
115 }
116
117 if text.len() == 3 {
118 // Only starting `{`
119 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
120 return;
121 }
122
123 let mut code = ArrayString::<[_; 6]>::new();
124 let mut closed = false;
125 for c in text[3..].chars() {
126 assert!(!closed, "no characters after escape is closed");
127
128 if c.is_digit(16) {
129 if code.len() == 6 {
130 errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
131 return;
132 }
133
134 code.push(c);
135 } else if c == '_' {
136 // Reject leading _
137 if code.len() == 0 {
138 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
139 return;
140 }
141 } else if c == '}' {
142 closed = true;
143 } else {
144 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
145 return;
146 }
147 }
148
149 if !closed {
150 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
151 }
152
153 if code.len() == 0 {
154 errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
155 return;
156 }
157
158 match u32::from_str_radix(&code, 16) {
159 Ok(code_u32) if code_u32 > 0x10FFFF => {
160 errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
161 }
162 Ok(_) => {
163 // Valid escape code
164 }
165 Err(_) => {
166 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
167 }
168 }
169}
170
171#[cfg(test)]
172mod test {
173 use crate::{SourceFile, TreeArc};
174
175 fn build_file(literal: &str) -> TreeArc<SourceFile> {
176 let src = format!("const C: char = '{}';", literal);
177 SourceFile::parse(&src)
178 }
179
180 fn assert_valid_char(literal: &str) {
181 let file = build_file(literal);
182 assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
183 }
184
185 fn assert_invalid_char(literal: &str) {
186 let file = build_file(literal);
187 assert!(file.errors().len() > 0);
188 }
189
190 #[test]
191 fn test_ansi_codepoints() {
192 for byte in 0..=255u8 {
193 match byte {
194 b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
195 b'\'' | b'\\' => { /* Ignore character close and backslash */ }
196 _ => assert_valid_char(&(byte as char).to_string()),
197 }
198 }
199 }
200
201 #[test]
202 fn test_unicode_codepoints() {
203 let valid = ["Ƒ", "バ", "メ", "﷽"];
204 for c in &valid {
205 assert_valid_char(c);
206 }
207 }
208
209 #[test]
210 fn test_unicode_multiple_codepoints() {
211 let invalid = ["नी", "👨‍👨‍"];
212 for c in &invalid {
213 assert_invalid_char(c);
214 }
215 }
216
217 #[test]
218 fn test_valid_ascii_escape() {
219 let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
220 for c in &valid {
221 assert_valid_char(c);
222 }
223 }
224
225 #[test]
226 fn test_invalid_ascii_escape() {
227 let invalid = [r"\a", r"\?", r"\"];
228 for c in &invalid {
229 assert_invalid_char(c);
230 }
231 }
232
233 #[test]
234 fn test_valid_ascii_code_escape() {
235 let valid = [r"\x00", r"\x7F", r"\x55"];
236 for c in &valid {
237 assert_valid_char(c);
238 }
239 }
240
241 #[test]
242 fn test_invalid_ascii_code_escape() {
243 let invalid = [r"\x", r"\x7", r"\xF0"];
244 for c in &invalid {
245 assert_invalid_char(c);
246 }
247 }
248
249 #[test]
250 fn test_valid_unicode_escape() {
251 let valid = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"];
252 for c in &valid {
253 assert_valid_char(c);
254 }
255 }
256
257 #[test]
258 fn test_invalid_unicode_escape() {
259 let invalid = [
260 r"\u",
261 r"\u{}",
262 r"\u{",
263 r"\u{FF",
264 r"\u{FFFFFF}",
265 r"\u{_F}",
266 r"\u{00FFFFF}",
267 r"\u{110000}",
268 ];
269 for c in &invalid {
270 assert_invalid_char(c);
271 }
272 }
273}