aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_syntax/src')
-rw-r--r--crates/ra_syntax/src/lexer/ptr.rs3
-rw-r--r--crates/ra_syntax/src/validation.rs122
-rw-r--r--crates/ra_syntax/src/yellow/syntax_error.rs6
3 files changed, 125 insertions, 6 deletions
diff --git a/crates/ra_syntax/src/lexer/ptr.rs b/crates/ra_syntax/src/lexer/ptr.rs
index 4c291b9c4..7e4df51aa 100644
--- a/crates/ra_syntax/src/lexer/ptr.rs
+++ b/crates/ra_syntax/src/lexer/ptr.rs
@@ -30,8 +30,7 @@ impl<'s> Ptr<'s> {
30 /// Gets the nth character from the current. 30 /// Gets the nth character from the current.
31 /// For example, 0 will return the current token, 1 will return the next, etc. 31 /// For example, 0 will return the current token, 1 will return the next, etc.
32 pub fn nth(&self, n: u32) -> Option<char> { 32 pub fn nth(&self, n: u32) -> Option<char> {
33 let mut chars = self.chars().peekable(); 33 self.chars().nth(n as usize)
34 chars.by_ref().nth(n as usize)
35 } 34 }
36 35
37 /// Checks whether the current character is `c`. 36 /// Checks whether the current character is `c`.
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs
index b560e5e85..1778f4b88 100644
--- a/crates/ra_syntax/src/validation.rs
+++ b/crates/ra_syntax/src/validation.rs
@@ -121,11 +121,13 @@ fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
121 errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); 121 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
122 } 122 }
123 } 123 }
124
125 // FIXME: we really need tests for this
126 } 124 }
127 // Code points are always valid 125 CodePoint => {
128 CodePoint => (), 126 // These code points must always be escaped
127 if text == "\t" || text == "\r" {
128 errors.push(SyntaxError::new(UnescapedCodepoint, range));
129 }
130 },
129 } 131 }
130 } 132 }
131 133
@@ -148,3 +150,115 @@ fn is_ascii_escape(code: char) -> bool {
148 _ => false, 150 _ => false,
149 } 151 }
150} 152}
153
154#[cfg(test)]
155mod test {
156 use crate::File;
157
158 fn build_file(literal: &str) -> File {
159 let src = format!("const C: char = '{}';", literal);
160 File::parse(&src)
161 }
162
163 fn assert_valid_char(literal: &str) {
164 let file = build_file(literal);
165 assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
166 }
167
168 fn assert_invalid_char(literal: &str) { //, expected_errors: HashSet<SyntaxErrorKind>) {
169 let file = build_file(literal);
170 assert!(file.errors().len() > 0);
171 //let found_errors = file.errors().iter().map(|e| e.kind()).collect();
172 }
173
174 #[test]
175 fn test_ansi_codepoints() {
176 for byte in 0..=255u8 {
177 match byte {
178 b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
179 b'\'' | b'\\' => { /* Ignore character close and backslash */ }
180 _ => assert_valid_char(&(byte as char).to_string()),
181 }
182 }
183 }
184
185 #[test]
186 fn test_unicode_codepoints() {
187 let valid = [
188 "Ƒ", "バ", "メ", "﷽"
189 ];
190 for c in &valid {
191 assert_valid_char(c);
192 }
193 }
194
195 #[test]
196 fn test_unicode_multiple_codepoints() {
197 let invalid = [
198 "नी", "👨‍👨‍"
199 ];
200 for c in &invalid {
201 assert_invalid_char(c);
202 }
203 }
204
205 #[test]
206 fn test_valid_ascii_escape() {
207 let valid = [
208 r"\'", "\"", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b"
209 ];
210 for c in &valid {
211 assert_valid_char(c);
212 }
213 }
214
215 #[test]
216 fn test_invalid_ascii_escape() {
217 let invalid = [
218 r"\a", r"\?", r"\"
219 ];
220 for c in &invalid {
221 assert_invalid_char(c);
222 }
223 }
224
225 #[test]
226 fn test_valid_ascii_code_escape() {
227 let valid = [
228 r"\x00", r"\x7F", r"\x55"
229 ];
230 for c in &valid {
231 assert_valid_char(c);
232 }
233 }
234
235 #[test]
236 fn test_invalid_ascii_code_escape() {
237 let invalid = [
238 r"\x", r"\x7", r"\xF0"
239 ];
240 for c in &invalid {
241 assert_invalid_char(c);
242 }
243 }
244
245 #[test]
246 fn test_valid_unicode_escape() {
247 let valid = [
248 r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"
249 ];
250 for c in &valid {
251 assert_valid_char(c);
252 }
253 }
254
255 #[test]
256 fn test_invalid_unicode_escape() {
257 let invalid = [
258 r"\u", r"\u{}", r"\u{", r"\u{FF", r"\u{FFFFFF}", r"\u{_F}", r"\u{00FFFFF}", r"\u{110000}"
259 ];
260 for c in &invalid {
261 assert_invalid_char(c);
262 }
263 }
264}
diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs
index 9aed9e81e..e2e660975 100644
--- a/crates/ra_syntax/src/yellow/syntax_error.rs
+++ b/crates/ra_syntax/src/yellow/syntax_error.rs
@@ -34,6 +34,10 @@ impl SyntaxError {
34 } 34 }
35 } 35 }
36 36
37 pub fn kind(&self) -> SyntaxErrorKind {
38 self.kind.clone()
39 }
40
37 pub fn location(&self) -> Location { 41 pub fn location(&self) -> Location {
38 self.location.clone() 42 self.location.clone()
39 } 43 }
@@ -64,6 +68,7 @@ impl fmt::Display for SyntaxError {
64#[derive(Debug, Clone, PartialEq, Eq, Hash)] 68#[derive(Debug, Clone, PartialEq, Eq, Hash)]
65pub enum SyntaxErrorKind { 69pub enum SyntaxErrorKind {
66 ParseError(ParseError), 70 ParseError(ParseError),
71 UnescapedCodepoint,
67 EmptyChar, 72 EmptyChar,
68 UnclosedChar, 73 UnclosedChar,
69 LongChar, 74 LongChar,
@@ -86,6 +91,7 @@ impl fmt::Display for SyntaxErrorKind {
86 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 91 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
87 use self::SyntaxErrorKind::*; 92 use self::SyntaxErrorKind::*;
88 match self { 93 match self {
94 UnescapedCodepoint => write!(f, "This codepoint should always be escaped"),
89 EmptyAsciiEscape => write!(f, "Empty escape sequence"), 95 EmptyAsciiEscape => write!(f, "Empty escape sequence"),
90 InvalidAsciiEscape => write!(f, "Invalid escape sequence"), 96 InvalidAsciiEscape => write!(f, "Invalid escape sequence"),
91 EmptyChar => write!(f, "Empty char literal"), 97 EmptyChar => write!(f, "Empty char literal"),