aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/validation/byte.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_syntax/src/validation/byte.rs')
-rw-r--r--crates/ra_syntax/src/validation/byte.rs211
1 files changed, 211 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs
new file mode 100644
index 000000000..43c0d7edd
--- /dev/null
+++ b/crates/ra_syntax/src/validation/byte.rs
@@ -0,0 +1,211 @@
1//! Validation of byte literals
2
3use crate::{
4 ast::{self, AstNode},
5 string_lexing::{self, CharComponentKind},
6 TextRange,
7 validation::char,
8 yellow::{
9 SyntaxError,
10 SyntaxErrorKind::*,
11 },
12};
13
14pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>) {
15 let literal_text = node.text();
16 let literal_range = node.syntax().range();
17 let mut components = string_lexing::parse_byte_literal(literal_text);
18 let mut len = 0;
19 for component in &mut components {
20 len += 1;
21 let text = &literal_text[component.range];
22 let range = component.range + literal_range.start();
23 validate_byte_component(text, component.kind, range, errors);
24 }
25
26 if !components.has_closing_quote {
27 errors.push(SyntaxError::new(UnclosedByte, literal_range));
28 }
29
30 if len == 0 {
31 errors.push(SyntaxError::new(EmptyByte, literal_range));
32 }
33
34 if len > 1 {
35 errors.push(SyntaxError::new(OverlongByte, literal_range));
36 }
37}
38
39pub(super) fn validate_byte_component(
40 text: &str,
41 kind: CharComponentKind,
42 range: TextRange,
43 errors: &mut Vec<SyntaxError>,
44) {
45 use self::CharComponentKind::*;
46 match kind {
47 AsciiEscape => validate_byte_escape(text, range, errors),
48 AsciiCodeEscape => validate_byte_code_escape(text, range, errors),
49 UnicodeEscape => errors.push(SyntaxError::new(UnicodeEscapeForbidden, range)),
50 CodePoint => {
51 let c = text
52 .chars()
53 .next()
54 .expect("Code points should be one character long");
55
56 // These bytes must always be escaped
57 if c == '\t' || c == '\r' || c == '\n' {
58 errors.push(SyntaxError::new(UnescapedByte, range));
59 }
60
61 // Only ASCII bytes are allowed
62 if c > 0x7F as char {
63 errors.push(SyntaxError::new(ByteOutOfRange, range));
64 }
65 }
66 }
67}
68
69fn validate_byte_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
70 if text.len() == 1 {
71 // Escape sequence consists only of leading `\`
72 errors.push(SyntaxError::new(EmptyByteEscape, range));
73 } else {
74 let escape_code = text.chars().skip(1).next().unwrap();
75 if !char::is_ascii_escape(escape_code) {
76 errors.push(SyntaxError::new(InvalidByteEscape, range));
77 }
78 }
79}
80
81fn validate_byte_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
82 // A ByteCodeEscape has 4 chars, example: `\xDD`
83 if text.len() < 4 {
84 errors.push(SyntaxError::new(TooShortByteCodeEscape, range));
85 } else {
86 assert!(
87 text.chars().count() == 4,
88 "ByteCodeEscape cannot be longer than 4 chars"
89 );
90
91 if u8::from_str_radix(&text[2..], 16).is_err() {
92 errors.push(SyntaxError::new(MalformedByteCodeEscape, range));
93 }
94 }
95}
96
97#[cfg(test)]
98mod test {
99 use crate::SourceFileNode;
100
101 fn build_file(literal: &str) -> SourceFileNode {
102 let src = format!("const C: u8 = b'{}';", literal);
103 SourceFileNode::parse(&src)
104 }
105
106 fn assert_valid_byte(literal: &str) {
107 let file = build_file(literal);
108 assert!(
109 file.errors().len() == 0,
110 "Errors for literal '{}': {:?}",
111 literal,
112 file.errors()
113 );
114 }
115
116 fn assert_invalid_byte(literal: &str) {
117 let file = build_file(literal);
118 assert!(file.errors().len() > 0);
119 }
120
121 #[test]
122 fn test_ansi_codepoints() {
123 for byte in 0..128 {
124 match byte {
125 b'\n' | b'\r' | b'\t' => assert_invalid_byte(&(byte as char).to_string()),
126 b'\'' | b'\\' => { /* Ignore character close and backslash */ }
127 _ => assert_valid_byte(&(byte as char).to_string()),
128 }
129 }
130
131 for byte in 128..=255u8 {
132 assert_invalid_byte(&(byte as char).to_string());
133 }
134 }
135
136 #[test]
137 fn test_unicode_codepoints() {
138 let invalid = ["Ƒ", "バ", "メ", "﷽"];
139 for c in &invalid {
140 assert_invalid_byte(c);
141 }
142 }
143
144 #[test]
145 fn test_unicode_multiple_codepoints() {
146 let invalid = ["नी", "👨‍👨‍"];
147 for c in &invalid {
148 assert_invalid_byte(c);
149 }
150 }
151
152 #[test]
153 fn test_valid_byte_escape() {
154 let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
155 for c in &valid {
156 assert_valid_byte(c);
157 }
158 }
159
160 #[test]
161 fn test_invalid_byte_escape() {
162 let invalid = [r"\a", r"\?", r"\"];
163 for c in &invalid {
164 assert_invalid_byte(c);
165 }
166 }
167
168 #[test]
169 fn test_valid_byte_code_escape() {
170 let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
171 for c in &valid {
172 assert_valid_byte(c);
173 }
174 }
175
176 #[test]
177 fn test_invalid_byte_code_escape() {
178 let invalid = [r"\x", r"\x7"];
179 for c in &invalid {
180 assert_invalid_byte(c);
181 }
182 }
183
184 #[test]
185 fn test_invalid_unicode_escape() {
186 let well_formed = [
187 r"\u{FF}",
188 r"\u{0}",
189 r"\u{F}",
190 r"\u{10FFFF}",
191 r"\u{1_0__FF___FF_____}",
192 ];
193 for c in &well_formed {
194 assert_invalid_byte(c);
195 }
196
197 let invalid = [
198 r"\u",
199 r"\u{}",
200 r"\u{",
201 r"\u{FF",
202 r"\u{FFFFFF}",
203 r"\u{_F}",
204 r"\u{00FFFFF}",
205 r"\u{110000}",
206 ];
207 for c in &invalid {
208 assert_invalid_byte(c);
209 }
210 }
211}