3 files changed, 125 insertions, 6 deletions
diff --git a/crates/ra_syntax/src/lexer/ptr.rs b/crates/ra_syntax/src/lexer/ptr.rs
index 4c291b9c4..7e4df51aa 100644
--- a/crates/ra_syntax/src/lexer/ptr.rs
+++ b/crates/ra_syntax/src/lexer/ptr.rs
@@ -30,8 +30,7 @@ impl<'s> Ptr<'s> {
    /// Gets the nth character from the current.
    /// For example, 0 will return the current token, 1 will return the next, etc.
    pub fn nth(&self, n: u32) -> Option<char> {
-        let mut chars = self.chars().peekable();
+        self.chars().nth(n as usize)
-        chars.by_ref().nth(n as usize)
    }
    /// Checks whether the current character is `c`.
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs
index b560e5e85..1778f4b88 100644
--- a/crates/ra_syntax/src/validation.rs
+++ b/crates/ra_syntax/src/validation.rs
@@ -121,11 +121,13 @@ fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
                        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
                    }
                }
-                // FIXME: we really need tests for this
            }
-            // Code points are always valid
+            CodePoint => {
-            CodePoint => (),
+                // These code points must always be escaped
+                if text == "\t" || text == "\r" {
+                    errors.push(SyntaxError::new(UnescapedCodepoint, range));
+                }
+            },
        }
    }
@@ -148,3 +150,115 @@ fn is_ascii_escape(code: char) -> bool {
        _ => false,
    }
 }
+#[cfg(test)]
+mod test {
+    use crate::File;
+    fn build_file(literal: &str) -> File {
+        let src = format!("const C: char = '{}';", literal);
+        File::parse(&src)
+    }
+    fn assert_valid_char(literal: &str) {
+        let file = build_file(literal);
+        assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
+    }
+    fn assert_invalid_char(literal: &str) { //, expected_errors: HashSet<SyntaxErrorKind>) {
+        let file = build_file(literal);
+        assert!(file.errors().len() > 0);
+        //let found_errors = file.errors().iter().map(|e| e.kind()).collect();
+    }
+    #[test]
+    fn test_ansi_codepoints() {
+        for byte in 0..=255u8 {
+            match byte {
+                b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
+                b'\'' | b'\\' => { /* Ignore character close and backslash */ }
+                _ => assert_valid_char(&(byte as char).to_string()),
+            }
+        }
+    }
+    #[test]
+    fn test_unicode_codepoints() {
+        let valid = [
+            "Ƒ", "バ", "メ", "﷽"
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_unicode_multiple_codepoints() {
+        let invalid = [
+            "नी", "👨‍👨‍"
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_escape() {
+        let valid = [
+            r"\'", "\"", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b"
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_escape() {
+        let invalid = [
+            r"\a", r"\?", r"\"
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+    #[test]
+    fn test_valid_ascii_code_escape() {
+        let valid = [
+            r"\x00", r"\x7F", r"\x55"
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_invalid_ascii_code_escape() {
+        let invalid = [
+            r"\x", r"\x7", r"\xF0"
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+     #[test]
+    fn test_valid_unicode_escape() {
+        let valid = [
+            r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"
+        ];
+        for c in &valid {
+            assert_valid_char(c);
+        }
+    }
+    #[test]
+    fn test_invalid_unicode_escape() {
+        let invalid = [
+            r"\u", r"\u{}", r"\u{", r"\u{FF", r"\u{FFFFFF}", r"\u{_F}", r"\u{00FFFFF}", r"\u{110000}"
+        ];
+        for c in &invalid {
+            assert_invalid_char(c);
+        }
+    }
+}
diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs
index 9aed9e81e..e2e660975 100644
--- a/crates/ra_syntax/src/yellow/syntax_error.rs
+++ b/crates/ra_syntax/src/yellow/syntax_error.rs
@@ -34,6 +34,10 @@ impl SyntaxError {
        }
    }
+    pub fn kind(&self) -> SyntaxErrorKind {
+        self.kind.clone()
+    }
    pub fn location(&self) -> Location {
        self.location.clone()
    }
@@ -64,6 +68,7 @@ impl fmt::Display for SyntaxError {
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum SyntaxErrorKind {
    ParseError(ParseError),
+    UnescapedCodepoint,
    EmptyChar,
    UnclosedChar,
    LongChar,
@@ -86,6 +91,7 @@ impl fmt::Display for SyntaxErrorKind {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        use self::SyntaxErrorKind::*;
        match self {
+            UnescapedCodepoint => write!(f, "This codepoint should always be escaped"),
            EmptyAsciiEscape => write!(f, "Empty escape sequence"),
            InvalidAsciiEscape => write!(f, "Invalid escape sequence"),
            EmptyChar => write!(f, "Empty char literal"),

diff --git a/crates/ra_syntax/src/lexer/ptr.rs b/crates/ra_syntax/src/lexer/ptr.rs index 4c291b9c4..7e4df51aa 100644 --- a/crates/ra_syntax/src/lexer/ptr.rs +++ b/crates/ra_syntax/src/lexer/ptr.rs
@@ -30,8 +30,7 @@ impl<'s> Ptr<'s> {
30	/// Gets the nth character from the current.	30	/// Gets the nth character from the current.
31	/// For example, 0 will return the current token, 1 will return the next, etc.	31	/// For example, 0 will return the current token, 1 will return the next, etc.
32	pub fn nth(&self, n: u32) -> Option<char> {	32	pub fn nth(&self, n: u32) -> Option<char> {
33	let mut chars = self.chars().peekable();	33	self.chars().nth(n as usize)
34	chars.by_ref().nth(n as usize)
35	}	34	}
36		35
37	/// Checks whether the current character is `c`.	36	/// Checks whether the current character is `c`.


diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs index b560e5e85..1778f4b88 100644 --- a/crates/ra_syntax/src/validation.rs +++ b/crates/ra_syntax/src/validation.rs
@@ -121,11 +121,13 @@ fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
121	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));	121	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
122	}	122	}
123	}	123	}
124
125	// FIXME: we really need tests for this
126	}	124	}
127	// Code points are always valid	125	CodePoint => {
128	CodePoint => (),	126	// These code points must always be escaped
		127	if text == "\t" \|\| text == "\r" {
		128	errors.push(SyntaxError::new(UnescapedCodepoint, range));
		129	}
		130	},
129	}	131	}
130	}	132	}
131		133
@@ -148,3 +150,115 @@ fn is_ascii_escape(code: char) -> bool {
148	_ => false,	150	_ => false,
149	}	151	}
150	}	152	}
		153
		154	#[cfg(test)]
		155	mod test {
		156	use crate::File;
		157
		158	fn build_file(literal: &str) -> File {
		159	let src = format!("const C: char = '{}';", literal);
		160	File::parse(&src)
		161	}
		162
		163	fn assert_valid_char(literal: &str) {
		164	let file = build_file(literal);
		165	assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
		166	}
		167
		168	fn assert_invalid_char(literal: &str) { //, expected_errors: HashSet<SyntaxErrorKind>) {
		169	let file = build_file(literal);
		170	assert!(file.errors().len() > 0);
		171	//let found_errors = file.errors().iter().map(\|e\| e.kind()).collect();
		172	}
		173
		174	#[test]
		175	fn test_ansi_codepoints() {
		176	for byte in 0..=255u8 {
		177	match byte {
		178	b'\n' \| b'\r' \| b'\t' => assert_invalid_char(&(byte as char).to_string()),
		179	b'\'' \| b'\\' => { /* Ignore character close and backslash */ }
		180	_ => assert_valid_char(&(byte as char).to_string()),
		181	}
		182	}
		183	}
		184
		185	#[test]
		186	fn test_unicode_codepoints() {
		187	let valid = [
		188	"Ƒ", "バ", "メ", "﷽"
		189	];
		190	for c in &valid {
		191	assert_valid_char(c);
		192	}
		193	}
		194
		195	#[test]
		196	fn test_unicode_multiple_codepoints() {
		197	let invalid = [
		198	"नी", "👨‍👨‍"
		199	];
		200	for c in &invalid {
		201	assert_invalid_char(c);
		202	}
		203	}
		204
		205	#[test]
		206	fn test_valid_ascii_escape() {
		207	let valid = [
		208	r"\'", "\"", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b"
		209	];
		210	for c in &valid {
		211	assert_valid_char(c);
		212	}
		213	}
		214
		215	#[test]
		216	fn test_invalid_ascii_escape() {
		217	let invalid = [
		218	r"\a", r"\?", r"\"
		219	];
		220	for c in &invalid {
		221	assert_invalid_char(c);
		222	}
		223	}
		224
		225	#[test]
		226	fn test_valid_ascii_code_escape() {
		227	let valid = [
		228	r"\x00", r"\x7F", r"\x55"
		229	];
		230	for c in &valid {
		231	assert_valid_char(c);
		232	}
		233	}
		234
		235	#[test]
		236	fn test_invalid_ascii_code_escape() {
		237	let invalid = [
		238	r"\x", r"\x7", r"\xF0"
		239	];
		240	for c in &invalid {
		241	assert_invalid_char(c);
		242	}
		243	}
		244
		245	#[test]
		246	fn test_valid_unicode_escape() {
		247	let valid = [
		248	r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"
		249	];
		250	for c in &valid {
		251	assert_valid_char(c);
		252	}
		253	}
		254
		255	#[test]
		256	fn test_invalid_unicode_escape() {
		257	let invalid = [
		258	r"\u", r"\u{}", r"\u{", r"\u{FF", r"\u{FFFFFF}", r"\u{_F}", r"\u{00FFFFF}", r"\u{110000}"
		259	];
		260	for c in &invalid {
		261	assert_invalid_char(c);
		262	}
		263	}
		264	}


diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs index 9aed9e81e..e2e660975 100644 --- a/crates/ra_syntax/src/yellow/syntax_error.rs +++ b/crates/ra_syntax/src/yellow/syntax_error.rs
@@ -34,6 +34,10 @@ impl SyntaxError {
34	}	34	}
35	}	35	}
36		36
		37	pub fn kind(&self) -> SyntaxErrorKind {
		38	self.kind.clone()
		39	}
		40
37	pub fn location(&self) -> Location {	41	pub fn location(&self) -> Location {
38	self.location.clone()	42	self.location.clone()
39	}	43	}
@@ -64,6 +68,7 @@ impl fmt::Display for SyntaxError {
64	#[derive(Debug, Clone, PartialEq, Eq, Hash)]	68	#[derive(Debug, Clone, PartialEq, Eq, Hash)]
65	pub enum SyntaxErrorKind {	69	pub enum SyntaxErrorKind {
66	ParseError(ParseError),	70	ParseError(ParseError),
		71	UnescapedCodepoint,
67	EmptyChar,	72	EmptyChar,
68	UnclosedChar,	73	UnclosedChar,
69	LongChar,	74	LongChar,
@@ -86,6 +91,7 @@ impl fmt::Display for SyntaxErrorKind {
86	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {	91	fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
87	use self::SyntaxErrorKind::*;	92	use self::SyntaxErrorKind::*;
88	match self {	93	match self {
		94	UnescapedCodepoint => write!(f, "This codepoint should always be escaped"),
89	EmptyAsciiEscape => write!(f, "Empty escape sequence"),	95	EmptyAsciiEscape => write!(f, "Empty escape sequence"),
90	InvalidAsciiEscape => write!(f, "Invalid escape sequence"),	96	InvalidAsciiEscape => write!(f, "Invalid escape sequence"),
91	EmptyChar => write!(f, "Empty char literal"),	97	EmptyChar => write!(f, "Empty char literal"),