diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2020-08-12 17:31:42 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2020-08-12 17:31:42 +0100 |
commit | d583f2c46d22cf8d643ebf98be9cb7059a304431 (patch) | |
tree | 9d898eb9600b0c36a74e4f95238f679c683fa566 /crates/syntax/src/validation.rs | |
parent | 3d6889cba72a9d02199f7adaa2ecc69bc30af834 (diff) | |
parent | a1c187eef3ba08076aedb5154929f7eda8d1b424 (diff) |
Merge #5729
5729: Rename ra_syntax -> syntax
r=matklad a=matklad
bors r+
🤖
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/syntax/src/validation.rs')
-rw-r--r-- | crates/syntax/src/validation.rs | 303 |
1 files changed, 303 insertions, 0 deletions
diff --git a/crates/syntax/src/validation.rs b/crates/syntax/src/validation.rs new file mode 100644 index 000000000..2dddaf09a --- /dev/null +++ b/crates/syntax/src/validation.rs | |||
@@ -0,0 +1,303 @@ | |||
1 | //! FIXME: write short doc here | ||
2 | |||
3 | mod block; | ||
4 | |||
5 | use crate::{ | ||
6 | ast, match_ast, AstNode, SyntaxError, | ||
7 | SyntaxKind::{BYTE, BYTE_STRING, CHAR, CONST, FN, INT_NUMBER, STRING, TYPE_ALIAS}, | ||
8 | SyntaxNode, SyntaxToken, TextSize, T, | ||
9 | }; | ||
10 | use rustc_lexer::unescape::{ | ||
11 | self, unescape_byte, unescape_byte_literal, unescape_char, unescape_literal, Mode, | ||
12 | }; | ||
13 | use std::convert::TryFrom; | ||
14 | |||
15 | fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> &'static str { | ||
16 | use unescape::EscapeError as EE; | ||
17 | |||
18 | #[rustfmt::skip] | ||
19 | let err_message = match err { | ||
20 | EE::ZeroChars => { | ||
21 | "Literal must not be empty" | ||
22 | } | ||
23 | EE::MoreThanOneChar => { | ||
24 | "Literal must be one character long" | ||
25 | } | ||
26 | EE::LoneSlash => { | ||
27 | "Character must be escaped: `\\`" | ||
28 | } | ||
29 | EE::InvalidEscape => { | ||
30 | "Invalid escape" | ||
31 | } | ||
32 | EE::BareCarriageReturn | EE::BareCarriageReturnInRawString => { | ||
33 | "Character must be escaped: `\r`" | ||
34 | } | ||
35 | EE::EscapeOnlyChar => { | ||
36 | "Escape character `\\` must be escaped itself" | ||
37 | } | ||
38 | EE::TooShortHexEscape => { | ||
39 | "ASCII hex escape code must have exactly two digits" | ||
40 | } | ||
41 | EE::InvalidCharInHexEscape => { | ||
42 | "ASCII hex escape code must contain only hex characters" | ||
43 | } | ||
44 | EE::OutOfRangeHexEscape => { | ||
45 | "ASCII hex escape code must be at most 0x7F" | ||
46 | } | ||
47 | EE::NoBraceInUnicodeEscape => { | ||
48 | "Missing `{` to begin the unicode escape" | ||
49 | } | ||
50 | EE::InvalidCharInUnicodeEscape => { | ||
51 | "Unicode escape must contain only hex characters and underscores" | ||
52 | } | ||
53 | EE::EmptyUnicodeEscape => { | ||
54 | "Unicode escape must not be empty" | ||
55 | } | ||
56 | EE::UnclosedUnicodeEscape => { | ||
57 | "Missing `}` to terminate the unicode escape" | ||
58 | } | ||
59 | EE::LeadingUnderscoreUnicodeEscape => { | ||
60 | "Unicode escape code must not begin with an underscore" | ||
61 | } | ||
62 | EE::OverlongUnicodeEscape => { | ||
63 | "Unicode escape code must have at most 6 digits" | ||
64 | } | ||
65 | EE::LoneSurrogateUnicodeEscape => { | ||
66 | "Unicode escape code must not be a surrogate" | ||
67 | } | ||
68 | EE::OutOfRangeUnicodeEscape => { | ||
69 | "Unicode escape code must be at most 0x10FFFF" | ||
70 | } | ||
71 | EE::UnicodeEscapeInByte => { | ||
72 | "Byte literals must not contain unicode escapes" | ||
73 | } | ||
74 | EE::NonAsciiCharInByte | EE::NonAsciiCharInByteString => { | ||
75 | "Byte literals must not contain non-ASCII characters" | ||
76 | } | ||
77 | }; | ||
78 | |||
79 | err_message | ||
80 | } | ||
81 | |||
82 | pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> { | ||
83 | // FIXME: | ||
84 | // * Add unescape validation of raw string literals and raw byte string literals | ||
85 | // * Add validation of doc comments are being attached to nodes | ||
86 | |||
87 | let mut errors = Vec::new(); | ||
88 | for node in root.descendants() { | ||
89 | match_ast! { | ||
90 | match node { | ||
91 | ast::Literal(it) => validate_literal(it, &mut errors), | ||
92 | ast::BlockExpr(it) => block::validate_block_expr(it, &mut errors), | ||
93 | ast::FieldExpr(it) => validate_numeric_name(it.name_ref(), &mut errors), | ||
94 | ast::RecordExprField(it) => validate_numeric_name(it.name_ref(), &mut errors), | ||
95 | ast::Visibility(it) => validate_visibility(it, &mut errors), | ||
96 | ast::RangeExpr(it) => validate_range_expr(it, &mut errors), | ||
97 | ast::PathSegment(it) => validate_path_keywords(it, &mut errors), | ||
98 | _ => (), | ||
99 | } | ||
100 | } | ||
101 | } | ||
102 | errors | ||
103 | } | ||
104 | |||
105 | fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) { | ||
106 | // FIXME: move this function to outer scope (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366196658) | ||
107 | fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> { | ||
108 | text.rfind(end_delimiter).and_then(|end| text.get(prefix_len..end)) | ||
109 | } | ||
110 | |||
111 | let token = literal.token(); | ||
112 | let text = token.text().as_str(); | ||
113 | |||
114 | // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366199205) | ||
115 | let mut push_err = |prefix_len, (off, err): (usize, unescape::EscapeError)| { | ||
116 | let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap(); | ||
117 | acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off)); | ||
118 | }; | ||
119 | |||
120 | match token.kind() { | ||
121 | BYTE => { | ||
122 | if let Some(Err(e)) = unquote(text, 2, '\'').map(unescape_byte) { | ||
123 | push_err(2, e); | ||
124 | } | ||
125 | } | ||
126 | CHAR => { | ||
127 | if let Some(Err(e)) = unquote(text, 1, '\'').map(unescape_char) { | ||
128 | push_err(1, e); | ||
129 | } | ||
130 | } | ||
131 | BYTE_STRING => { | ||
132 | if let Some(without_quotes) = unquote(text, 2, '"') { | ||
133 | unescape_byte_literal(without_quotes, Mode::ByteStr, &mut |range, char| { | ||
134 | if let Err(err) = char { | ||
135 | push_err(2, (range.start, err)); | ||
136 | } | ||
137 | }) | ||
138 | } | ||
139 | } | ||
140 | STRING => { | ||
141 | if let Some(without_quotes) = unquote(text, 1, '"') { | ||
142 | unescape_literal(without_quotes, Mode::Str, &mut |range, char| { | ||
143 | if let Err(err) = char { | ||
144 | push_err(1, (range.start, err)); | ||
145 | } | ||
146 | }) | ||
147 | } | ||
148 | } | ||
149 | _ => (), | ||
150 | } | ||
151 | } | ||
152 | |||
153 | pub(crate) fn validate_block_structure(root: &SyntaxNode) { | ||
154 | let mut stack = Vec::new(); | ||
155 | for node in root.descendants() { | ||
156 | match node.kind() { | ||
157 | T!['{'] => stack.push(node), | ||
158 | T!['}'] => { | ||
159 | if let Some(pair) = stack.pop() { | ||
160 | assert_eq!( | ||
161 | node.parent(), | ||
162 | pair.parent(), | ||
163 | "\nunpaired curleys:\n{}\n{:#?}\n", | ||
164 | root.text(), | ||
165 | root, | ||
166 | ); | ||
167 | assert!( | ||
168 | node.next_sibling().is_none() && pair.prev_sibling().is_none(), | ||
169 | "\nfloating curlys at {:?}\nfile:\n{}\nerror:\n{}\n", | ||
170 | node, | ||
171 | root.text(), | ||
172 | node.text(), | ||
173 | ); | ||
174 | } | ||
175 | } | ||
176 | _ => (), | ||
177 | } | ||
178 | } | ||
179 | } | ||
180 | |||
181 | fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>) { | ||
182 | if let Some(int_token) = int_token(name_ref) { | ||
183 | if int_token.text().chars().any(|c| !c.is_digit(10)) { | ||
184 | errors.push(SyntaxError::new( | ||
185 | "Tuple (struct) field access is only allowed through \ | ||
186 | decimal integers with no underscores or suffix", | ||
187 | int_token.text_range(), | ||
188 | )); | ||
189 | } | ||
190 | } | ||
191 | |||
192 | fn int_token(name_ref: Option<ast::NameRef>) -> Option<SyntaxToken> { | ||
193 | name_ref?.syntax().first_child_or_token()?.into_token().filter(|it| it.kind() == INT_NUMBER) | ||
194 | } | ||
195 | } | ||
196 | |||
197 | fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) { | ||
198 | let parent = match vis.syntax().parent() { | ||
199 | Some(it) => it, | ||
200 | None => return, | ||
201 | }; | ||
202 | match parent.kind() { | ||
203 | FN | CONST | TYPE_ALIAS => (), | ||
204 | _ => return, | ||
205 | } | ||
206 | |||
207 | let impl_def = match parent.parent().and_then(|it| it.parent()).and_then(ast::Impl::cast) { | ||
208 | Some(it) => it, | ||
209 | None => return, | ||
210 | }; | ||
211 | if impl_def.trait_().is_some() { | ||
212 | errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range())); | ||
213 | } | ||
214 | } | ||
215 | |||
216 | fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) { | ||
217 | if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() { | ||
218 | errors.push(SyntaxError::new( | ||
219 | "An inclusive range must have an end expression", | ||
220 | expr.syntax().text_range(), | ||
221 | )); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | fn validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>) { | ||
226 | use ast::PathSegmentKind; | ||
227 | |||
228 | let path = segment.parent_path(); | ||
229 | let is_path_start = segment.coloncolon_token().is_none() && path.qualifier().is_none(); | ||
230 | |||
231 | if let Some(token) = segment.self_token() { | ||
232 | if !is_path_start { | ||
233 | errors.push(SyntaxError::new( | ||
234 | "The `self` keyword is only allowed as the first segment of a path", | ||
235 | token.text_range(), | ||
236 | )); | ||
237 | } | ||
238 | } else if let Some(token) = segment.crate_token() { | ||
239 | if !is_path_start || use_prefix(path).is_some() { | ||
240 | errors.push(SyntaxError::new( | ||
241 | "The `crate` keyword is only allowed as the first segment of a path", | ||
242 | token.text_range(), | ||
243 | )); | ||
244 | } | ||
245 | } else if let Some(token) = segment.super_token() { | ||
246 | if !all_supers(&path) { | ||
247 | errors.push(SyntaxError::new( | ||
248 | "The `super` keyword may only be preceded by other `super`s", | ||
249 | token.text_range(), | ||
250 | )); | ||
251 | return; | ||
252 | } | ||
253 | |||
254 | let mut curr_path = path; | ||
255 | while let Some(prefix) = use_prefix(curr_path) { | ||
256 | if !all_supers(&prefix) { | ||
257 | errors.push(SyntaxError::new( | ||
258 | "The `super` keyword may only be preceded by other `super`s", | ||
259 | token.text_range(), | ||
260 | )); | ||
261 | return; | ||
262 | } | ||
263 | curr_path = prefix; | ||
264 | } | ||
265 | } | ||
266 | |||
267 | fn use_prefix(mut path: ast::Path) -> Option<ast::Path> { | ||
268 | for node in path.syntax().ancestors().skip(1) { | ||
269 | match_ast! { | ||
270 | match node { | ||
271 | ast::UseTree(it) => if let Some(tree_path) = it.path() { | ||
272 | // Even a top-level path exists within a `UseTree` so we must explicitly | ||
273 | // allow our path but disallow anything else | ||
274 | if tree_path != path { | ||
275 | return Some(tree_path); | ||
276 | } | ||
277 | }, | ||
278 | ast::UseTreeList(_it) => continue, | ||
279 | ast::Path(parent) => path = parent, | ||
280 | _ => return None, | ||
281 | } | ||
282 | }; | ||
283 | } | ||
284 | return None; | ||
285 | } | ||
286 | |||
287 | fn all_supers(path: &ast::Path) -> bool { | ||
288 | let segment = match path.segment() { | ||
289 | Some(it) => it, | ||
290 | None => return false, | ||
291 | }; | ||
292 | |||
293 | if segment.kind() != Some(PathSegmentKind::SuperKw) { | ||
294 | return false; | ||
295 | } | ||
296 | |||
297 | if let Some(ref subpath) = path.qualifier() { | ||
298 | return all_supers(subpath); | ||
299 | } | ||
300 | |||
301 | return true; | ||
302 | } | ||
303 | } | ||