diff options
Diffstat (limited to 'crates/ra_syntax/src/validation/byte.rs')
-rw-r--r-- | crates/ra_syntax/src/validation/byte.rs | 199 |
1 files changed, 0 insertions, 199 deletions
diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs deleted file mode 100644 index f653e65d0..000000000 --- a/crates/ra_syntax/src/validation/byte.rs +++ /dev/null | |||
@@ -1,199 +0,0 @@ | |||
1 | //! Validation of byte literals | ||
2 | |||
3 | use crate::{ | ||
4 | string_lexing::{self, StringComponentKind}, | ||
5 | TextRange, | ||
6 | validation::char, | ||
7 | SyntaxError, | ||
8 | SyntaxErrorKind::*, | ||
9 | SyntaxToken, | ||
10 | }; | ||
11 | |||
12 | pub(super) fn validate_byte_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { | ||
13 | let literal_text = node.text(); | ||
14 | let literal_range = node.range(); | ||
15 | let mut components = string_lexing::parse_quoted_literal(Some('b'), '\'', literal_text); | ||
16 | let mut len = 0; | ||
17 | for component in &mut components { | ||
18 | len += 1; | ||
19 | let text = &literal_text[component.range]; | ||
20 | let range = component.range + literal_range.start(); | ||
21 | validate_byte_component(text, component.kind, range, errors); | ||
22 | } | ||
23 | |||
24 | if !components.has_closing_quote { | ||
25 | errors.push(SyntaxError::new(UnclosedByte, literal_range)); | ||
26 | } | ||
27 | |||
28 | if let Some(range) = components.suffix { | ||
29 | errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start())); | ||
30 | } | ||
31 | |||
32 | if len == 0 { | ||
33 | errors.push(SyntaxError::new(EmptyByte, literal_range)); | ||
34 | } | ||
35 | |||
36 | if len > 1 { | ||
37 | errors.push(SyntaxError::new(OverlongByte, literal_range)); | ||
38 | } | ||
39 | } | ||
40 | |||
41 | pub(super) fn validate_byte_component( | ||
42 | text: &str, | ||
43 | kind: StringComponentKind, | ||
44 | range: TextRange, | ||
45 | errors: &mut Vec<SyntaxError>, | ||
46 | ) { | ||
47 | use self::StringComponentKind::*; | ||
48 | match kind { | ||
49 | AsciiEscape => validate_byte_escape(text, range, errors), | ||
50 | AsciiCodeEscape => validate_byte_code_escape(text, range, errors), | ||
51 | UnicodeEscape => errors.push(SyntaxError::new(UnicodeEscapeForbidden, range)), | ||
52 | CodePoint => { | ||
53 | let c = text.chars().next().expect("Code points should be one character long"); | ||
54 | |||
55 | // These bytes must always be escaped | ||
56 | if c == '\t' || c == '\r' || c == '\n' { | ||
57 | errors.push(SyntaxError::new(UnescapedByte, range)); | ||
58 | } | ||
59 | |||
60 | // Only ASCII bytes are allowed | ||
61 | if c > 0x7F as char { | ||
62 | errors.push(SyntaxError::new(ByteOutOfRange, range)); | ||
63 | } | ||
64 | } | ||
65 | IgnoreNewline => { /* always valid */ } | ||
66 | } | ||
67 | } | ||
68 | |||
69 | fn validate_byte_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) { | ||
70 | if text.len() == 1 { | ||
71 | // Escape sequence consists only of leading `\` | ||
72 | errors.push(SyntaxError::new(EmptyByteEscape, range)); | ||
73 | } else { | ||
74 | let escape_code = text.chars().skip(1).next().unwrap(); | ||
75 | if !char::is_ascii_escape(escape_code) { | ||
76 | errors.push(SyntaxError::new(InvalidByteEscape, range)); | ||
77 | } | ||
78 | } | ||
79 | } | ||
80 | |||
81 | fn validate_byte_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) { | ||
82 | // A ByteCodeEscape has 4 chars, example: `\xDD` | ||
83 | if !text.is_ascii() { | ||
84 | errors.push(SyntaxError::new(MalformedByteCodeEscape, range)); | ||
85 | } else if text.chars().count() < 4 { | ||
86 | errors.push(SyntaxError::new(TooShortByteCodeEscape, range)); | ||
87 | } else { | ||
88 | assert!(text.chars().count() == 4, "ByteCodeEscape cannot be longer than 4 chars"); | ||
89 | |||
90 | if u8::from_str_radix(&text[2..], 16).is_err() { | ||
91 | errors.push(SyntaxError::new(MalformedByteCodeEscape, range)); | ||
92 | } | ||
93 | } | ||
94 | } | ||
95 | |||
96 | #[cfg(test)] | ||
97 | mod test { | ||
98 | use crate::{SourceFile, TreeArc}; | ||
99 | |||
100 | fn build_file(literal: &str) -> TreeArc<SourceFile> { | ||
101 | let src = format!("const C: u8 = b'{}';", literal); | ||
102 | SourceFile::parse(&src) | ||
103 | } | ||
104 | |||
105 | fn assert_valid_byte(literal: &str) { | ||
106 | let file = build_file(literal); | ||
107 | assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors()); | ||
108 | } | ||
109 | |||
110 | fn assert_invalid_byte(literal: &str) { | ||
111 | let file = build_file(literal); | ||
112 | assert!(file.errors().len() > 0); | ||
113 | } | ||
114 | |||
115 | #[test] | ||
116 | fn test_ansi_codepoints() { | ||
117 | for byte in 0..128 { | ||
118 | match byte { | ||
119 | b'\n' | b'\r' | b'\t' => assert_invalid_byte(&(byte as char).to_string()), | ||
120 | b'\'' | b'\\' => { /* Ignore character close and backslash */ } | ||
121 | _ => assert_valid_byte(&(byte as char).to_string()), | ||
122 | } | ||
123 | } | ||
124 | |||
125 | for byte in 128..=255u8 { | ||
126 | assert_invalid_byte(&(byte as char).to_string()); | ||
127 | } | ||
128 | } | ||
129 | |||
130 | #[test] | ||
131 | fn test_unicode_codepoints() { | ||
132 | let invalid = ["Ƒ", "バ", "メ", "﷽"]; | ||
133 | for c in &invalid { | ||
134 | assert_invalid_byte(c); | ||
135 | } | ||
136 | } | ||
137 | |||
138 | #[test] | ||
139 | fn test_unicode_multiple_codepoints() { | ||
140 | let invalid = ["नी", "👨👨"]; | ||
141 | for c in &invalid { | ||
142 | assert_invalid_byte(c); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | #[test] | ||
147 | fn test_valid_byte_escape() { | ||
148 | let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"]; | ||
149 | for c in &valid { | ||
150 | assert_valid_byte(c); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | #[test] | ||
155 | fn test_invalid_byte_escape() { | ||
156 | let invalid = [r"\a", r"\?", r"\"]; | ||
157 | for c in &invalid { | ||
158 | assert_invalid_byte(c); | ||
159 | } | ||
160 | } | ||
161 | |||
162 | #[test] | ||
163 | fn test_valid_byte_code_escape() { | ||
164 | let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"]; | ||
165 | for c in &valid { | ||
166 | assert_valid_byte(c); | ||
167 | } | ||
168 | } | ||
169 | |||
170 | #[test] | ||
171 | fn test_invalid_byte_code_escape() { | ||
172 | let invalid = [r"\x", r"\x7"]; | ||
173 | for c in &invalid { | ||
174 | assert_invalid_byte(c); | ||
175 | } | ||
176 | } | ||
177 | |||
178 | #[test] | ||
179 | fn test_invalid_unicode_escape() { | ||
180 | let well_formed = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"]; | ||
181 | for c in &well_formed { | ||
182 | assert_invalid_byte(c); | ||
183 | } | ||
184 | |||
185 | let invalid = [ | ||
186 | r"\u", | ||
187 | r"\u{}", | ||
188 | r"\u{", | ||
189 | r"\u{FF", | ||
190 | r"\u{FFFFFF}", | ||
191 | r"\u{_F}", | ||
192 | r"\u{00FFFFF}", | ||
193 | r"\u{110000}", | ||
194 | ]; | ||
195 | for c in &invalid { | ||
196 | assert_invalid_byte(c); | ||
197 | } | ||
198 | } | ||
199 | } | ||