diff options
author | Adolfo Ochagavía <[email protected]> | 2018-11-11 19:27:00 +0000 |
---|---|---|
committer | Adolfo Ochagavía <[email protected]> | 2018-11-11 19:27:00 +0000 |
commit | c258b4fdb0e421813330c2428985c4537c787582 (patch) | |
tree | e53263f28c0cd07911a1e9c9ef6538c8ff0227fd /crates/ra_syntax/src/validation/byte.rs | |
parent | a4f7d7a7cd85a5b9b64a935dd84ad493b6860236 (diff) |
Add validator for byte
Diffstat (limited to 'crates/ra_syntax/src/validation/byte.rs')
-rw-r--r-- | crates/ra_syntax/src/validation/byte.rs | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs new file mode 100644 index 000000000..3d2806c4e --- /dev/null +++ b/crates/ra_syntax/src/validation/byte.rs | |||
@@ -0,0 +1,202 @@ | |||
1 | //! Validation of byte literals | ||
2 | |||
3 | use crate::{ | ||
4 | ast::{self, AstNode}, | ||
5 | string_lexing::{self, CharComponentKind}, | ||
6 | TextRange, | ||
7 | validation::char, | ||
8 | yellow::{ | ||
9 | SyntaxError, | ||
10 | SyntaxErrorKind::*, | ||
11 | }, | ||
12 | }; | ||
13 | |||
14 | pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>) { | ||
15 | let literal_text = node.text(); | ||
16 | let literal_range = node.syntax().range(); | ||
17 | let mut components = string_lexing::parse_byte_literal(literal_text); | ||
18 | let mut len = 0; | ||
19 | for component in &mut components { | ||
20 | len += 1; | ||
21 | let text = &literal_text[component.range]; | ||
22 | let range = component.range + literal_range.start(); | ||
23 | |||
24 | use self::CharComponentKind::*; | ||
25 | match component.kind { | ||
26 | AsciiEscape => validate_byte_escape(text, range, errors), | ||
27 | AsciiCodeEscape => validate_byte_code_escape(text, range, errors), | ||
28 | UnicodeEscape => errors.push(SyntaxError::new(UnicodeEscapeForbidden, range)), | ||
29 | CodePoint => { | ||
30 | let c = text.chars().next().expect("Code points should be one character long"); | ||
31 | |||
32 | // These bytes must always be escaped | ||
33 | if c == '\t' || c == '\r' || c == '\n' { | ||
34 | errors.push(SyntaxError::new(UnescapedByte, range)); | ||
35 | } | ||
36 | |||
37 | // Only ASCII bytes are allowed | ||
38 | if c > 0x7F as char { | ||
39 | errors.push(SyntaxError::new(ByteOutOfRange, range)); | ||
40 | } | ||
41 | } | ||
42 | } | ||
43 | } | ||
44 | |||
45 | if !components.has_closing_quote { | ||
46 | errors.push(SyntaxError::new(UnclosedByte, literal_range)); | ||
47 | } | ||
48 | |||
49 | if len == 0 { | ||
50 | errors.push(SyntaxError::new(EmptyByte, literal_range)); | ||
51 | } | ||
52 | |||
53 | if len > 1 { | ||
54 | errors.push(SyntaxError::new(OverlongByte, literal_range)); | ||
55 | } | ||
56 | } | ||
57 | |||
58 | fn validate_byte_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) { | ||
59 | if text.len() == 1 { | ||
60 | // Escape sequence consists only of leading `\` | ||
61 | errors.push(SyntaxError::new(EmptyByteEscape, range)); | ||
62 | } else { | ||
63 | let escape_code = text.chars().skip(1).next().unwrap(); | ||
64 | if !char::is_ascii_escape(escape_code) { | ||
65 | errors.push(SyntaxError::new(InvalidByteEscape, range)); | ||
66 | } | ||
67 | } | ||
68 | } | ||
69 | |||
70 | fn validate_byte_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) { | ||
71 | // A ByteCodeEscape has 4 chars, example: `\xDD` | ||
72 | if text.len() < 4 { | ||
73 | errors.push(SyntaxError::new(TooShortByteCodeEscape, range)); | ||
74 | } else { | ||
75 | assert!( | ||
76 | text.chars().count() == 4, | ||
77 | "ByteCodeEscape cannot be longer than 4 chars" | ||
78 | ); | ||
79 | |||
80 | if u8::from_str_radix(&text[2..], 16).is_err() { | ||
81 | errors.push(SyntaxError::new(MalformedByteCodeEscape, range)); | ||
82 | } | ||
83 | } | ||
84 | } | ||
85 | |||
86 | #[cfg(test)] | ||
87 | mod test { | ||
88 | use crate::SourceFileNode; | ||
89 | |||
90 | fn build_file(literal: &str) -> SourceFileNode { | ||
91 | let src = format!("const C: u8 = b'{}';", literal); | ||
92 | SourceFileNode::parse(&src) | ||
93 | } | ||
94 | |||
95 | fn assert_valid_byte(literal: &str) { | ||
96 | let file = build_file(literal); | ||
97 | assert!( | ||
98 | file.errors().len() == 0, | ||
99 | "Errors for literal '{}': {:?}", | ||
100 | literal, | ||
101 | file.errors() | ||
102 | ); | ||
103 | } | ||
104 | |||
105 | fn assert_invalid_byte(literal: &str) { | ||
106 | let file = build_file(literal); | ||
107 | assert!(file.errors().len() > 0); | ||
108 | } | ||
109 | |||
110 | #[test] | ||
111 | fn test_ansi_codepoints() { | ||
112 | for byte in 0..128 { | ||
113 | match byte { | ||
114 | b'\n' | b'\r' | b'\t' => assert_invalid_byte(&(byte as char).to_string()), | ||
115 | b'\'' | b'\\' => { /* Ignore character close and backslash */ } | ||
116 | _ => assert_valid_byte(&(byte as char).to_string()), | ||
117 | } | ||
118 | } | ||
119 | |||
120 | for byte in 128..=255u8 { | ||
121 | assert_invalid_byte(&(byte as char).to_string()); | ||
122 | } | ||
123 | } | ||
124 | |||
125 | #[test] | ||
126 | fn test_unicode_codepoints() { | ||
127 | let invalid = ["Ƒ", "バ", "メ", "﷽"]; | ||
128 | for c in &invalid { | ||
129 | assert_invalid_byte(c); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | #[test] | ||
134 | fn test_unicode_multiple_codepoints() { | ||
135 | let invalid = ["नी", "👨👨"]; | ||
136 | for c in &invalid { | ||
137 | assert_invalid_byte(c); | ||
138 | } | ||
139 | } | ||
140 | |||
141 | #[test] | ||
142 | fn test_valid_byte_escape() { | ||
143 | let valid = [ | ||
144 | r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b", | ||
145 | ]; | ||
146 | for c in &valid { | ||
147 | assert_valid_byte(c); | ||
148 | } | ||
149 | } | ||
150 | |||
151 | #[test] | ||
152 | fn test_invalid_byte_escape() { | ||
153 | let invalid = [r"\a", r"\?", r"\"]; | ||
154 | for c in &invalid { | ||
155 | assert_invalid_byte(c); | ||
156 | } | ||
157 | } | ||
158 | |||
159 | #[test] | ||
160 | fn test_valid_byte_code_escape() { | ||
161 | let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"]; | ||
162 | for c in &valid { | ||
163 | assert_valid_byte(c); | ||
164 | } | ||
165 | } | ||
166 | |||
167 | #[test] | ||
168 | fn test_invalid_byte_code_escape() { | ||
169 | let invalid = [r"\x", r"\x7"]; | ||
170 | for c in &invalid { | ||
171 | assert_invalid_byte(c); | ||
172 | } | ||
173 | } | ||
174 | |||
175 | #[test] | ||
176 | fn test_invalid_unicode_escape() { | ||
177 | let well_formed = [ | ||
178 | r"\u{FF}", | ||
179 | r"\u{0}", | ||
180 | r"\u{F}", | ||
181 | r"\u{10FFFF}", | ||
182 | r"\u{1_0__FF___FF_____}", | ||
183 | ]; | ||
184 | for c in &well_formed { | ||
185 | assert_invalid_byte(c); | ||
186 | } | ||
187 | |||
188 | let invalid = [ | ||
189 | r"\u", | ||
190 | r"\u{}", | ||
191 | r"\u{", | ||
192 | r"\u{FF", | ||
193 | r"\u{FFFFFF}", | ||
194 | r"\u{_F}", | ||
195 | r"\u{00FFFFF}", | ||
196 | r"\u{110000}", | ||
197 | ]; | ||
198 | for c in &invalid { | ||
199 | assert_invalid_byte(c); | ||
200 | } | ||
201 | } | ||
202 | } | ||