Merge #5729

5729: Rename ra_syntax -> syntax r=matklad a=matklad bors r+ 🤖 Co-authored-by: Aleksey Kladov <[email protected]>
author: bors[bot] <26634292+bors[bot]@users.noreply.github.com> 2020-08-12 17:31:42 +0100
committer: GitHub <[email protected]> 2020-08-12 17:31:42 +0100
commit: d583f2c46d22cf8d643ebf98be9cb7059a304431 (patch)
tree: 9d898eb9600b0c36a74e4f95238f679c683fa566 /crates/syntax/src/validation.rs
parent: 3d6889cba72a9d02199f7adaa2ecc69bc30af834 (diff)
parent: a1c187eef3ba08076aedb5154929f7eda8d1b424 (diff)
1 files changed, 303 insertions, 0 deletions
diff --git a/crates/syntax/src/validation.rs b/crates/syntax/src/validation.rs
new file mode 100644
index 000000000..2dddaf09a
--- /dev/null
+++ b/crates/syntax/src/validation.rs
@@ -0,0 +1,303 @@
+//! FIXME: write short doc here
+mod block;
+use crate::{
+    ast, match_ast, AstNode, SyntaxError,
+    SyntaxKind::{BYTE, BYTE_STRING, CHAR, CONST, FN, INT_NUMBER, STRING, TYPE_ALIAS},
+    SyntaxNode, SyntaxToken, TextSize, T,
+};
+use rustc_lexer::unescape::{
+    self, unescape_byte, unescape_byte_literal, unescape_char, unescape_literal, Mode,
+};
+use std::convert::TryFrom;
+fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> &'static str {
+    use unescape::EscapeError as EE;
+    #[rustfmt::skip]
+    let err_message = match err {
+        EE::ZeroChars => {
+            "Literal must not be empty"
+        }
+        EE::MoreThanOneChar => {
+            "Literal must be one character long"
+        }
+        EE::LoneSlash => {
+            "Character must be escaped: `\\`"
+        }
+        EE::InvalidEscape => {
+            "Invalid escape"
+        }
+        EE::BareCarriageReturn | EE::BareCarriageReturnInRawString => {
+            "Character must be escaped: `\r`"
+        }
+        EE::EscapeOnlyChar => {
+            "Escape character `\\` must be escaped itself"
+        }
+        EE::TooShortHexEscape => {
+            "ASCII hex escape code must have exactly two digits"
+        }
+        EE::InvalidCharInHexEscape => {
+            "ASCII hex escape code must contain only hex characters"
+        }
+        EE::OutOfRangeHexEscape => {
+            "ASCII hex escape code must be at most 0x7F"
+        }
+        EE::NoBraceInUnicodeEscape => {
+            "Missing `{` to begin the unicode escape"
+        }
+        EE::InvalidCharInUnicodeEscape => {
+            "Unicode escape must contain only hex characters and underscores"
+        }
+        EE::EmptyUnicodeEscape => {
+            "Unicode escape must not be empty"
+        }
+        EE::UnclosedUnicodeEscape => {
+            "Missing `}` to terminate the unicode escape"
+        }
+        EE::LeadingUnderscoreUnicodeEscape => {
+            "Unicode escape code must not begin with an underscore"
+        }
+        EE::OverlongUnicodeEscape => {
+            "Unicode escape code must have at most 6 digits"
+        }
+        EE::LoneSurrogateUnicodeEscape => {
+            "Unicode escape code must not be a surrogate"
+        }
+        EE::OutOfRangeUnicodeEscape => {
+            "Unicode escape code must be at most 0x10FFFF"
+        }
+        EE::UnicodeEscapeInByte => {
+            "Byte literals must not contain unicode escapes"
+        }
+        EE::NonAsciiCharInByte | EE::NonAsciiCharInByteString => {
+            "Byte literals must not contain non-ASCII characters"
+        }
+    };
+    err_message
+}
+pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
+    // FIXME:
+    // * Add unescape validation of raw string literals and raw byte string literals
+    // * Add validation of doc comments are being attached to nodes
+    let mut errors = Vec::new();
+    for node in root.descendants() {
+        match_ast! {
+            match node {
+                ast::Literal(it) => validate_literal(it, &mut errors),
+                ast::BlockExpr(it) => block::validate_block_expr(it, &mut errors),
+                ast::FieldExpr(it) => validate_numeric_name(it.name_ref(), &mut errors),
+                ast::RecordExprField(it) => validate_numeric_name(it.name_ref(), &mut errors),
+                ast::Visibility(it) => validate_visibility(it, &mut errors),
+                ast::RangeExpr(it) => validate_range_expr(it, &mut errors),
+                ast::PathSegment(it) => validate_path_keywords(it, &mut errors),
+                _ => (),
+            }
+        }
+    }
+    errors
+}
+fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
+    // FIXME: move this function to outer scope (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366196658)
+    fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
+        text.rfind(end_delimiter).and_then(|end| text.get(prefix_len..end))
+    }
+    let token = literal.token();
+    let text = token.text().as_str();
+    // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366199205)
+    let mut push_err = |prefix_len, (off, err): (usize, unescape::EscapeError)| {
+        let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap();
+        acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off));
+    };
+    match token.kind() {
+        BYTE => {
+            if let Some(Err(e)) = unquote(text, 2, '\'').map(unescape_byte) {
+                push_err(2, e);
+            }
+        }
+        CHAR => {
+            if let Some(Err(e)) = unquote(text, 1, '\'').map(unescape_char) {
+                push_err(1, e);
+            }
+        }
+        BYTE_STRING => {
+            if let Some(without_quotes) = unquote(text, 2, '"') {
+                unescape_byte_literal(without_quotes, Mode::ByteStr, &mut |range, char| {
+                    if let Err(err) = char {
+                        push_err(2, (range.start, err));
+                    }
+                })
+            }
+        }
+        STRING => {
+            if let Some(without_quotes) = unquote(text, 1, '"') {
+                unescape_literal(without_quotes, Mode::Str, &mut |range, char| {
+                    if let Err(err) = char {
+                        push_err(1, (range.start, err));
+                    }
+                })
+            }
+        }
+        _ => (),
+    }
+}
+pub(crate) fn validate_block_structure(root: &SyntaxNode) {
+    let mut stack = Vec::new();
+    for node in root.descendants() {
+        match node.kind() {
+            T!['{'] => stack.push(node),
+            T!['}'] => {
+                if let Some(pair) = stack.pop() {
+                    assert_eq!(
+                        node.parent(),
+                        pair.parent(),
+                        "\nunpaired curleys:\n{}\n{:#?}\n",
+                        root.text(),
+                        root,
+                    );
+                    assert!(
+                        node.next_sibling().is_none() && pair.prev_sibling().is_none(),
+                        "\nfloating curlys at {:?}\nfile:\n{}\nerror:\n{}\n",
+                        node,
+                        root.text(),
+                        node.text(),
+                    );
+                }
+            }
+            _ => (),
+        }
+    }
+}
+fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>) {
+    if let Some(int_token) = int_token(name_ref) {
+        if int_token.text().chars().any(|c| !c.is_digit(10)) {
+            errors.push(SyntaxError::new(
+                "Tuple (struct) field access is only allowed through \
+                decimal integers with no underscores or suffix",
+                int_token.text_range(),
+            ));
+        }
+    }
+    fn int_token(name_ref: Option<ast::NameRef>) -> Option<SyntaxToken> {
+        name_ref?.syntax().first_child_or_token()?.into_token().filter(|it| it.kind() == INT_NUMBER)
+    }
+}
+fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) {
+    let parent = match vis.syntax().parent() {
+        Some(it) => it,
+        None => return,
+    };
+    match parent.kind() {
+        FN | CONST | TYPE_ALIAS => (),
+        _ => return,
+    }
+    let impl_def = match parent.parent().and_then(|it| it.parent()).and_then(ast::Impl::cast) {
+        Some(it) => it,
+        None => return,
+    };
+    if impl_def.trait_().is_some() {
+        errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range()));
+    }
+}
+fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) {
+    if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() {
+        errors.push(SyntaxError::new(
+            "An inclusive range must have an end expression",
+            expr.syntax().text_range(),
+        ));
+    }
+}
+fn validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>) {
+    use ast::PathSegmentKind;
+    let path = segment.parent_path();
+    let is_path_start = segment.coloncolon_token().is_none() && path.qualifier().is_none();
+    if let Some(token) = segment.self_token() {
+        if !is_path_start {
+            errors.push(SyntaxError::new(
+                "The `self` keyword is only allowed as the first segment of a path",
+                token.text_range(),
+            ));
+        }
+    } else if let Some(token) = segment.crate_token() {
+        if !is_path_start || use_prefix(path).is_some() {
+            errors.push(SyntaxError::new(
+                "The `crate` keyword is only allowed as the first segment of a path",
+                token.text_range(),
+            ));
+        }
+    } else if let Some(token) = segment.super_token() {
+        if !all_supers(&path) {
+            errors.push(SyntaxError::new(
+                "The `super` keyword may only be preceded by other `super`s",
+                token.text_range(),
+            ));
+            return;
+        }
+        let mut curr_path = path;
+        while let Some(prefix) = use_prefix(curr_path) {
+            if !all_supers(&prefix) {
+                errors.push(SyntaxError::new(
+                    "The `super` keyword may only be preceded by other `super`s",
+                    token.text_range(),
+                ));
+                return;
+            }
+            curr_path = prefix;
+        }
+    }
+    fn use_prefix(mut path: ast::Path) -> Option<ast::Path> {
+        for node in path.syntax().ancestors().skip(1) {
+            match_ast! {
+                match node {
+                    ast::UseTree(it) => if let Some(tree_path) = it.path() {
+                        // Even a top-level path exists within a `UseTree` so we must explicitly
+                        // allow our path but disallow anything else
+                        if tree_path != path {
+                            return Some(tree_path);
+                        }
+                    },
+                    ast::UseTreeList(_it) => continue,
+                    ast::Path(parent) => path = parent,
+                    _ => return None,
+                }
+            };
+        }
+        return None;
+    }
+    fn all_supers(path: &ast::Path) -> bool {
+        let segment = match path.segment() {
+            Some(it) => it,
+            None => return false,
+        };
+        if segment.kind() != Some(PathSegmentKind::SuperKw) {
+            return false;
+        }
+        if let Some(ref subpath) = path.qualifier() {
+            return all_supers(subpath);
+        }
+        return true;
+    }
+}
author	bors[bot] <26634292+bors[bot]@users.noreply.github.com>	2020-08-12 17:31:42 +0100
committer	GitHub <[email protected]>	2020-08-12 17:31:42 +0100
commit	d583f2c46d22cf8d643ebf98be9cb7059a304431 (patch)
tree	9d898eb9600b0c36a74e4f95238f679c683fa566 /crates/syntax/src/validation.rs
parent	3d6889cba72a9d02199f7adaa2ecc69bc30af834 (diff)
parent	a1c187eef3ba08076aedb5154929f7eda8d1b424 (diff)

diff --git a/crates/syntax/src/validation.rs b/crates/syntax/src/validation.rs new file mode 100644 index 000000000..2dddaf09a --- /dev/null +++ b/crates/syntax/src/validation.rs
@@ -0,0 +1,303 @@
	1	//! FIXME: write short doc here
	2
	3	mod block;
	4
	5	use crate::{
	6	ast, match_ast, AstNode, SyntaxError,
	7	SyntaxKind::{BYTE, BYTE_STRING, CHAR, CONST, FN, INT_NUMBER, STRING, TYPE_ALIAS},
	8	SyntaxNode, SyntaxToken, TextSize, T,
	9	};
	10	use rustc_lexer::unescape::{
	11	self, unescape_byte, unescape_byte_literal, unescape_char, unescape_literal, Mode,
	12	};
	13	use std::convert::TryFrom;
	14
	15	fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> &'static str {
	16	use unescape::EscapeError as EE;
	17
	18	#[rustfmt::skip]
	19	let err_message = match err {
	20	EE::ZeroChars => {
	21	"Literal must not be empty"
	22	}
	23	EE::MoreThanOneChar => {
	24	"Literal must be one character long"
	25	}
	26	EE::LoneSlash => {
	27	"Character must be escaped: `\\`"
	28	}
	29	EE::InvalidEscape => {
	30	"Invalid escape"
	31	}
	32	EE::BareCarriageReturn \| EE::BareCarriageReturnInRawString => {
	33	"Character must be escaped: `\r`"
	34	}
	35	EE::EscapeOnlyChar => {
	36	"Escape character `\\` must be escaped itself"
	37	}
	38	EE::TooShortHexEscape => {
	39	"ASCII hex escape code must have exactly two digits"
	40	}
	41	EE::InvalidCharInHexEscape => {
	42	"ASCII hex escape code must contain only hex characters"
	43	}
	44	EE::OutOfRangeHexEscape => {
	45	"ASCII hex escape code must be at most 0x7F"
	46	}
	47	EE::NoBraceInUnicodeEscape => {
	48	"Missing `{` to begin the unicode escape"
	49	}
	50	EE::InvalidCharInUnicodeEscape => {
	51	"Unicode escape must contain only hex characters and underscores"
	52	}
	53	EE::EmptyUnicodeEscape => {
	54	"Unicode escape must not be empty"
	55	}
	56	EE::UnclosedUnicodeEscape => {
	57	"Missing `}` to terminate the unicode escape"
	58	}
	59	EE::LeadingUnderscoreUnicodeEscape => {
	60	"Unicode escape code must not begin with an underscore"
	61	}
	62	EE::OverlongUnicodeEscape => {
	63	"Unicode escape code must have at most 6 digits"
	64	}
	65	EE::LoneSurrogateUnicodeEscape => {
	66	"Unicode escape code must not be a surrogate"
	67	}
	68	EE::OutOfRangeUnicodeEscape => {
	69	"Unicode escape code must be at most 0x10FFFF"
	70	}
	71	EE::UnicodeEscapeInByte => {
	72	"Byte literals must not contain unicode escapes"
	73	}
	74	EE::NonAsciiCharInByte \| EE::NonAsciiCharInByteString => {
	75	"Byte literals must not contain non-ASCII characters"
	76	}
	77	};
	78
	79	err_message
	80	}
	81
	82	pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
	83	// FIXME:
	84	// * Add unescape validation of raw string literals and raw byte string literals
	85	// * Add validation of doc comments are being attached to nodes
	86
	87	let mut errors = Vec::new();
	88	for node in root.descendants() {
	89	match_ast! {
	90	match node {
	91	ast::Literal(it) => validate_literal(it, &mut errors),
	92	ast::BlockExpr(it) => block::validate_block_expr(it, &mut errors),
	93	ast::FieldExpr(it) => validate_numeric_name(it.name_ref(), &mut errors),
	94	ast::RecordExprField(it) => validate_numeric_name(it.name_ref(), &mut errors),
	95	ast::Visibility(it) => validate_visibility(it, &mut errors),
	96	ast::RangeExpr(it) => validate_range_expr(it, &mut errors),
	97	ast::PathSegment(it) => validate_path_keywords(it, &mut errors),
	98	_ => (),
	99	}
	100	}
	101	}
	102	errors
	103	}
	104
	105	fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
	106	// FIXME: move this function to outer scope (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366196658)
	107	fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
	108	text.rfind(end_delimiter).and_then(\|end\| text.get(prefix_len..end))
	109	}
	110
	111	let token = literal.token();
	112	let text = token.text().as_str();
	113
	114	// FIXME: lift this lambda refactor to `fn` (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366199205)
	115	let mut push_err = \|prefix_len, (off, err): (usize, unescape::EscapeError)\| {
	116	let off = token.text_range().start() + TextSize::try_from(off + prefix_len).unwrap();
	117	acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off));
	118	};
	119
	120	match token.kind() {
	121	BYTE => {
	122	if let Some(Err(e)) = unquote(text, 2, '\'').map(unescape_byte) {
	123	push_err(2, e);
	124	}
	125	}
	126	CHAR => {
	127	if let Some(Err(e)) = unquote(text, 1, '\'').map(unescape_char) {
	128	push_err(1, e);
	129	}
	130	}
	131	BYTE_STRING => {
	132	if let Some(without_quotes) = unquote(text, 2, '"') {
	133	unescape_byte_literal(without_quotes, Mode::ByteStr, &mut \|range, char\| {
	134	if let Err(err) = char {
	135	push_err(2, (range.start, err));
	136	}
	137	})
	138	}
	139	}
	140	STRING => {
	141	if let Some(without_quotes) = unquote(text, 1, '"') {
	142	unescape_literal(without_quotes, Mode::Str, &mut \|range, char\| {
	143	if let Err(err) = char {
	144	push_err(1, (range.start, err));
	145	}
	146	})
	147	}
	148	}
	149	_ => (),
	150	}
	151	}
	152
	153	pub(crate) fn validate_block_structure(root: &SyntaxNode) {
	154	let mut stack = Vec::new();
	155	for node in root.descendants() {
	156	match node.kind() {
	157	T!['{'] => stack.push(node),
	158	T!['}'] => {
	159	if let Some(pair) = stack.pop() {
	160	assert_eq!(
	161	node.parent(),
	162	pair.parent(),
	163	"\nunpaired curleys:\n{}\n{:#?}\n",
	164	root.text(),
	165	root,
	166	);
	167	assert!(
	168	node.next_sibling().is_none() && pair.prev_sibling().is_none(),
	169	"\nfloating curlys at {:?}\nfile:\n{}\nerror:\n{}\n",
	170	node,
	171	root.text(),
	172	node.text(),
	173	);
	174	}
	175	}
	176	_ => (),
	177	}
	178	}
	179	}
	180
	181	fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<SyntaxError>) {
	182	if let Some(int_token) = int_token(name_ref) {
	183	if int_token.text().chars().any(\|c\| !c.is_digit(10)) {
	184	errors.push(SyntaxError::new(
	185	"Tuple (struct) field access is only allowed through \
	186	decimal integers with no underscores or suffix",
	187	int_token.text_range(),
	188	));
	189	}
	190	}
	191
	192	fn int_token(name_ref: Option<ast::NameRef>) -> Option<SyntaxToken> {
	193	name_ref?.syntax().first_child_or_token()?.into_token().filter(\|it\| it.kind() == INT_NUMBER)
	194	}
	195	}
	196
	197	fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) {
	198	let parent = match vis.syntax().parent() {
	199	Some(it) => it,
	200	None => return,
	201	};
	202	match parent.kind() {
	203	FN \| CONST \| TYPE_ALIAS => (),
	204	_ => return,
	205	}
	206
	207	let impl_def = match parent.parent().and_then(\|it\| it.parent()).and_then(ast::Impl::cast) {
	208	Some(it) => it,
	209	None => return,
	210	};
	211	if impl_def.trait_().is_some() {
	212	errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range()));
	213	}
	214	}
	215
	216	fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) {
	217	if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() {
	218	errors.push(SyntaxError::new(
	219	"An inclusive range must have an end expression",
	220	expr.syntax().text_range(),
	221	));
	222	}
	223	}
	224
	225	fn validate_path_keywords(segment: ast::PathSegment, errors: &mut Vec<SyntaxError>) {
	226	use ast::PathSegmentKind;
	227
	228	let path = segment.parent_path();
	229	let is_path_start = segment.coloncolon_token().is_none() && path.qualifier().is_none();
	230
	231	if let Some(token) = segment.self_token() {
	232	if !is_path_start {
	233	errors.push(SyntaxError::new(
	234	"The `self` keyword is only allowed as the first segment of a path",
	235	token.text_range(),
	236	));
	237	}
	238	} else if let Some(token) = segment.crate_token() {
	239	if !is_path_start \|\| use_prefix(path).is_some() {
	240	errors.push(SyntaxError::new(
	241	"The `crate` keyword is only allowed as the first segment of a path",
	242	token.text_range(),
	243	));
	244	}
	245	} else if let Some(token) = segment.super_token() {
	246	if !all_supers(&path) {
	247	errors.push(SyntaxError::new(
	248	"The `super` keyword may only be preceded by other `super`s",
	249	token.text_range(),
	250	));
	251	return;
	252	}
	253
	254	let mut curr_path = path;
	255	while let Some(prefix) = use_prefix(curr_path) {
	256	if !all_supers(&prefix) {
	257	errors.push(SyntaxError::new(
	258	"The `super` keyword may only be preceded by other `super`s",
	259	token.text_range(),
	260	));
	261	return;
	262	}
	263	curr_path = prefix;
	264	}
	265	}
	266
	267	fn use_prefix(mut path: ast::Path) -> Option<ast::Path> {
	268	for node in path.syntax().ancestors().skip(1) {
	269	match_ast! {
	270	match node {
	271	ast::UseTree(it) => if let Some(tree_path) = it.path() {
	272	// Even a top-level path exists within a `UseTree` so we must explicitly
	273	// allow our path but disallow anything else
	274	if tree_path != path {
	275	return Some(tree_path);
	276	}
	277	},
	278	ast::UseTreeList(_it) => continue,
	279	ast::Path(parent) => path = parent,
	280	_ => return None,
	281	}
	282	};
	283	}
	284	return None;
	285	}
	286
	287	fn all_supers(path: &ast::Path) -> bool {
	288	let segment = match path.segment() {
	289	Some(it) => it,
	290	None => return false,
	291	};
	292
	293	if segment.kind() != Some(PathSegmentKind::SuperKw) {
	294	return false;
	295	}
	296
	297	if let Some(ref subpath) = path.qualifier() {
	298	return all_supers(subpath);
	299	}
	300
	301	return true;
	302	}
	303	}