aboutsummaryrefslogtreecommitdiff
path: root/crates/syntax/src/validation.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/syntax/src/validation.rs')
-rw-r--r--crates/syntax/src/validation.rs303
1 files changed, 303 insertions, 0 deletions
diff --git a/crates/syntax/src/validation.rs b/crates/syntax/src/validation.rs
new file mode 100644
index 000000000..2dddaf09a
--- /dev/null
+++ b/crates/syntax/src/validation.rs
@@ -0,0 +1,303 @@
1//! FIXME: write short doc here
2
3mod block;
4
5use crate::{
6 ast, match_ast, AstNode, SyntaxError,
7 SyntaxKind::{BYTE, BYTE_STRING, CHAR, CONST, FN, INT_NUMBER, STRING, TYPE_ALIAS},
8 SyntaxNode, SyntaxToken, TextSize, T,
9};
10use rustc_lexer::unescape::{
11 self, unescape_byte, unescape_byte_literal, unescape_char, unescape_literal, Mode,
12};
13use std::convert::TryFrom;
14
15fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> &'static str {
16 use unescape::EscapeError as EE;
17
18 #[rustfmt::skip]
19 let err_message = match err {
20 EE::ZeroChars => {
21 "Literal must not be empty"
22 }
23 EE::MoreThanOneChar => {
24 "Literal must be one character long"
25 }
26 EE::LoneSlash => {
27 "Character must be escaped: `\\`"
28 }
29 EE::InvalidEscape => {
30 "Invalid escape"
31 }
32 EE::BareCarriageReturn | EE::BareCarriageReturnInRawString => {
33 "Character must be escaped: `\r`"
34 }
35 EE::EscapeOnlyChar => {
36 "Escape character `\\` must be escaped itself"
37 }
38 EE::TooShortHexEscape => {
39 "ASCII hex escape code must have exactly two digits"
40 }
41 EE::InvalidCharInHexEscape => {
42 "ASCII hex escape code must contain only hex characters"
43 }
44 EE::OutOfRangeHexEscape => {
45 "ASCII hex escape code must be at most 0x7F"
46 }
47 EE::NoBraceInUnicodeEscape => {
48 "Missing `{` to begin the unicode escape"
49 }
50 EE::InvalidCharInUnicodeEscape => {
51 "Unicode escape must contain only hex characters and underscores"
52 }
53 EE::EmptyUnicodeEscape => {
54 "Unicode escape must not be empty"
55 }
56 EE::UnclosedUnicodeEscape => {
57 "Missing `}` to terminate the unicode escape"
58 }
59 EE::LeadingUnderscoreUnicodeEscape => {
60 "Unicode escape code must not begin with an underscore"
61 }
62 EE::OverlongUnicodeEscape => {
63 "Unicode escape code must have at most 6 digits"
64 }
65 EE::LoneSurrogateUnicodeEscape => {
66 "Unicode escape code must not be a surrogate"
67 }
68 EE::OutOfRangeUnicodeEscape => {
69 "Unicode escape code must be at most 0x10FFFF"
70 }
71 EE::UnicodeEscapeInByte => {
72 "Byte literals must not contain unicode escapes"
73 }
74 EE::NonAsciiCharInByte | EE::NonAsciiCharInByteString => {
75 "Byte literals must not contain non-ASCII characters"
76 }
77 };
78
79 err_message
80}
81
82pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
83 // FIXME:
84 // * Add unescape validation of raw string literals and raw byte string literals
85 // * Add validation of doc comments are being attached to nodes
86
87 let mut errors = Vec::new();
88 for node in root.descendants() {
89 match_ast! {
90 match node {
91 ast::Literal(it) => validate_literal(it, &mut errors),
92 ast::BlockExpr(it) => block::validate_block_expr(it, &mut errors),
93 ast::FieldExpr(it) => validate_numeric_name(it.name_ref(), &mut errors),
94 ast::RecordExprField(it) => validate_numeric_name(it.name_ref(), &mut errors),
95 ast::Visibility(it) => validate_visibility(it, &mut errors),
96 ast::RangeExpr(it) => validate_range_expr(it, &mut errors),
97 ast::PathSegment(it) => validate_path_keywords(it, &mut errors),
98 _ => (),
99 }
100 }
101 }
102 errors
103}
104
105fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
106 // FIXME: move this function to outer scope (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366196658)
107 fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
108 text.rfind(end_delimiter).and_then(|end| text.get(prefix_len..end))
109 }
110
111 let token = literal.token();
112 let text = token.text().as_str();
113
114 // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366199205)
115 let mut push_err = |prefix_len, (off, err): (usize, unescape::EscapeError)| {
116 let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap();
117 acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off));
118 };
119
120 match token.kind() {
121 BYTE => {
122 if let Some(Err(e)) = unquote(text, 2, '\'').map(unescape_byte) {
123 push_err(2, e);
124 }
125 }
126 CHAR => {
127 if let Some(Err(e)) = unquote(text, 1, '\'').map(unescape_char) {
128 push_err(1, e);
129 }
130 }
131 BYTE_STRING => {
132 if let Some(without_quotes) = unquote(text, 2, '"') {
133 unescape_byte_literal(without_quotes, Mode::ByteStr, &mut |range, char| {
134 if let Err(err) = char {
135 push_err(2, (range.start, err));
136 }
137 })
138 }
139 }
140 STRING => {
141 if let Some(without_quotes) = unquote(text, 1, '"') {
142 unescape_literal(without_quotes, Mode::Str, &mut |range, char| {
143 if let Err(err) = char {
144 push_err(1, (range.start, err));
145 }
146 })
147 }
148 }
149 _ => (),
150 }
151}
152
153pub(crate) fn validate_block_structure(root: &SyntaxNode) {
154 let mut stack = Vec::new();
155 for node in root.descendants() {
156 match node.kind() {
157 T!['{'] => stack.push(node),
158 T!['}'] => {
159 if let Some(pair) = stack.pop() {
160 assert_eq!(
161 node.parent(),
162 pair.parent(),
163 "\nunpaired curleys:\n{}\n{:#?}\n",
164 root.text(),
165 root,
166 );
167 assert!(
168 node.next_sibling().is_none() && pair.prev_sibling().is_none(),
169 "\nfloating curlys at {:?}\nfile:\n{}\nerror:\n{}\n",
170 node,
171 root.text(),
172 node.text(),
173 );
174 }
175 }
176 _ => (),
177 }
178 }
179}
180
181fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>) {
182 if let Some(int_token) = int_token(name_ref) {
183 if int_token.text().chars().any(|c| !c.is_digit(10)) {
184 errors.push(SyntaxError::new(
185 "Tuple (struct) field access is only allowed through \
186 decimal integers with no underscores or suffix",
187 int_token.text_range(),
188 ));
189 }
190 }
191
192 fn int_token(name_ref: Option<ast::NameRef>) -> Option<SyntaxToken> {
193 name_ref?.syntax().first_child_or_token()?.into_token().filter(|it| it.kind() == INT_NUMBER)
194 }
195}
196
197fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) {
198 let parent = match vis.syntax().parent() {
199 Some(it) => it,
200 None => return,
201 };
202 match parent.kind() {
203 FN | CONST | TYPE_ALIAS => (),
204 _ => return,
205 }
206
207 let impl_def = match parent.parent().and_then(|it| it.parent()).and_then(ast::Impl::cast) {
208 Some(it) => it,
209 None => return,
210 };
211 if impl_def.trait_().is_some() {
212 errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range()));
213 }
214}
215
216fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) {
217 if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() {
218 errors.push(SyntaxError::new(
219 "An inclusive range must have an end expression",
220 expr.syntax().text_range(),
221 ));
222 }
223}
224
225fn validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>) {
226 use ast::PathSegmentKind;
227
228 let path = segment.parent_path();
229 let is_path_start = segment.coloncolon_token().is_none() && path.qualifier().is_none();
230
231 if let Some(token) = segment.self_token() {
232 if !is_path_start {
233 errors.push(SyntaxError::new(
234 "The `self` keyword is only allowed as the first segment of a path",
235 token.text_range(),
236 ));
237 }
238 } else if let Some(token) = segment.crate_token() {
239 if !is_path_start || use_prefix(path).is_some() {
240 errors.push(SyntaxError::new(
241 "The `crate` keyword is only allowed as the first segment of a path",
242 token.text_range(),
243 ));
244 }
245 } else if let Some(token) = segment.super_token() {
246 if !all_supers(&path) {
247 errors.push(SyntaxError::new(
248 "The `super` keyword may only be preceded by other `super`s",
249 token.text_range(),
250 ));
251 return;
252 }
253
254 let mut curr_path = path;
255 while let Some(prefix) = use_prefix(curr_path) {
256 if !all_supers(&prefix) {
257 errors.push(SyntaxError::new(
258 "The `super` keyword may only be preceded by other `super`s",
259 token.text_range(),
260 ));
261 return;
262 }
263 curr_path = prefix;
264 }
265 }
266
267 fn use_prefix(mut path: ast::Path) -> Option<ast::Path> {
268 for node in path.syntax().ancestors().skip(1) {
269 match_ast! {
270 match node {
271 ast::UseTree(it) => if let Some(tree_path) = it.path() {
272 // Even a top-level path exists within a `UseTree` so we must explicitly
273 // allow our path but disallow anything else
274 if tree_path != path {
275 return Some(tree_path);
276 }
277 },
278 ast::UseTreeList(_it) => continue,
279 ast::Path(parent) => path = parent,
280 _ => return None,
281 }
282 };
283 }
284 return None;
285 }
286
287 fn all_supers(path: &ast::Path) -> bool {
288 let segment = match path.segment() {
289 Some(it) => it,
290 None => return false,
291 };
292
293 if segment.kind() != Some(PathSegmentKind::SuperKw) {
294 return false;
295 }
296
297 if let Some(ref subpath) = path.qualifier() {
298 return all_supers(subpath);
299 }
300
301 return true;
302 }
303}