diff options
Diffstat (limited to 'crates/ra_syntax/src')
-rw-r--r-- | crates/ra_syntax/src/lexer/ptr.rs | 3 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation.rs | 122 | ||||
-rw-r--r-- | crates/ra_syntax/src/yellow/syntax_error.rs | 6 |
3 files changed, 125 insertions, 6 deletions
diff --git a/crates/ra_syntax/src/lexer/ptr.rs b/crates/ra_syntax/src/lexer/ptr.rs index 4c291b9c4..7e4df51aa 100644 --- a/crates/ra_syntax/src/lexer/ptr.rs +++ b/crates/ra_syntax/src/lexer/ptr.rs | |||
@@ -30,8 +30,7 @@ impl<'s> Ptr<'s> { | |||
30 | /// Gets the nth character from the current. | 30 | /// Gets the nth character from the current. |
31 | /// For example, 0 will return the current token, 1 will return the next, etc. | 31 | /// For example, 0 will return the current token, 1 will return the next, etc. |
32 | pub fn nth(&self, n: u32) -> Option<char> { | 32 | pub fn nth(&self, n: u32) -> Option<char> { |
33 | let mut chars = self.chars().peekable(); | 33 | self.chars().nth(n as usize) |
34 | chars.by_ref().nth(n as usize) | ||
35 | } | 34 | } |
36 | 35 | ||
37 | /// Checks whether the current character is `c`. | 36 | /// Checks whether the current character is `c`. |
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs index b560e5e85..1778f4b88 100644 --- a/crates/ra_syntax/src/validation.rs +++ b/crates/ra_syntax/src/validation.rs | |||
@@ -121,11 +121,13 @@ fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) { | |||
121 | errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); | 121 | errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); |
122 | } | 122 | } |
123 | } | 123 | } |
124 | |||
125 | // FIXME: we really need tests for this | ||
126 | } | 124 | } |
127 | // Code points are always valid | 125 | CodePoint => { |
128 | CodePoint => (), | 126 | // These code points must always be escaped |
127 | if text == "\t" || text == "\r" { | ||
128 | errors.push(SyntaxError::new(UnescapedCodepoint, range)); | ||
129 | } | ||
130 | }, | ||
129 | } | 131 | } |
130 | } | 132 | } |
131 | 133 | ||
@@ -148,3 +150,115 @@ fn is_ascii_escape(code: char) -> bool { | |||
148 | _ => false, | 150 | _ => false, |
149 | } | 151 | } |
150 | } | 152 | } |
153 | |||
154 | #[cfg(test)] | ||
155 | mod test { | ||
156 | use crate::File; | ||
157 | |||
158 | fn build_file(literal: &str) -> File { | ||
159 | let src = format!("const C: char = '{}';", literal); | ||
160 | File::parse(&src) | ||
161 | } | ||
162 | |||
163 | fn assert_valid_char(literal: &str) { | ||
164 | let file = build_file(literal); | ||
165 | assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors()); | ||
166 | } | ||
167 | |||
168 | fn assert_invalid_char(literal: &str) { //, expected_errors: HashSet<SyntaxErrorKind>) { | ||
169 | let file = build_file(literal); | ||
170 | assert!(file.errors().len() > 0); | ||
171 | //let found_errors = file.errors().iter().map(|e| e.kind()).collect(); | ||
172 | } | ||
173 | |||
174 | #[test] | ||
175 | fn test_ansi_codepoints() { | ||
176 | for byte in 0..=255u8 { | ||
177 | match byte { | ||
178 | b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()), | ||
179 | b'\'' | b'\\' => { /* Ignore character close and backslash */ } | ||
180 | _ => assert_valid_char(&(byte as char).to_string()), | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | |||
185 | #[test] | ||
186 | fn test_unicode_codepoints() { | ||
187 | let valid = [ | ||
188 | "Ƒ", "バ", "メ", "﷽" | ||
189 | ]; | ||
190 | for c in &valid { | ||
191 | assert_valid_char(c); | ||
192 | } | ||
193 | } | ||
194 | |||
195 | #[test] | ||
196 | fn test_unicode_multiple_codepoints() { | ||
197 | let invalid = [ | ||
198 | "नी", "👨👨" | ||
199 | ]; | ||
200 | for c in &invalid { | ||
201 | assert_invalid_char(c); | ||
202 | } | ||
203 | } | ||
204 | |||
205 | #[test] | ||
206 | fn test_valid_ascii_escape() { | ||
207 | let valid = [ | ||
208 | r"\'", "\"", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b" | ||
209 | ]; | ||
210 | for c in &valid { | ||
211 | assert_valid_char(c); | ||
212 | } | ||
213 | } | ||
214 | |||
215 | #[test] | ||
216 | fn test_invalid_ascii_escape() { | ||
217 | let invalid = [ | ||
218 | r"\a", r"\?", r"\" | ||
219 | ]; | ||
220 | for c in &invalid { | ||
221 | assert_invalid_char(c); | ||
222 | } | ||
223 | } | ||
224 | |||
225 | #[test] | ||
226 | fn test_valid_ascii_code_escape() { | ||
227 | let valid = [ | ||
228 | r"\x00", r"\x7F", r"\x55" | ||
229 | ]; | ||
230 | for c in &valid { | ||
231 | assert_valid_char(c); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | #[test] | ||
236 | fn test_invalid_ascii_code_escape() { | ||
237 | let invalid = [ | ||
238 | r"\x", r"\x7", r"\xF0" | ||
239 | ]; | ||
240 | for c in &invalid { | ||
241 | assert_invalid_char(c); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | #[test] | ||
246 | fn test_valid_unicode_escape() { | ||
247 | let valid = [ | ||
248 | r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}" | ||
249 | ]; | ||
250 | for c in &valid { | ||
251 | assert_valid_char(c); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | #[test] | ||
256 | fn test_invalid_unicode_escape() { | ||
257 | let invalid = [ | ||
258 | r"\u", r"\u{}", r"\u{", r"\u{FF", r"\u{FFFFFF}", r"\u{_F}", r"\u{00FFFFF}", r"\u{110000}" | ||
259 | ]; | ||
260 | for c in &invalid { | ||
261 | assert_invalid_char(c); | ||
262 | } | ||
263 | } | ||
264 | } | ||
diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs index 9aed9e81e..e2e660975 100644 --- a/crates/ra_syntax/src/yellow/syntax_error.rs +++ b/crates/ra_syntax/src/yellow/syntax_error.rs | |||
@@ -34,6 +34,10 @@ impl SyntaxError { | |||
34 | } | 34 | } |
35 | } | 35 | } |
36 | 36 | ||
37 | pub fn kind(&self) -> SyntaxErrorKind { | ||
38 | self.kind.clone() | ||
39 | } | ||
40 | |||
37 | pub fn location(&self) -> Location { | 41 | pub fn location(&self) -> Location { |
38 | self.location.clone() | 42 | self.location.clone() |
39 | } | 43 | } |
@@ -64,6 +68,7 @@ impl fmt::Display for SyntaxError { | |||
64 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | 68 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] |
65 | pub enum SyntaxErrorKind { | 69 | pub enum SyntaxErrorKind { |
66 | ParseError(ParseError), | 70 | ParseError(ParseError), |
71 | UnescapedCodepoint, | ||
67 | EmptyChar, | 72 | EmptyChar, |
68 | UnclosedChar, | 73 | UnclosedChar, |
69 | LongChar, | 74 | LongChar, |
@@ -86,6 +91,7 @@ impl fmt::Display for SyntaxErrorKind { | |||
86 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | 91 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
87 | use self::SyntaxErrorKind::*; | 92 | use self::SyntaxErrorKind::*; |
88 | match self { | 93 | match self { |
94 | UnescapedCodepoint => write!(f, "This codepoint should always be escaped"), | ||
89 | EmptyAsciiEscape => write!(f, "Empty escape sequence"), | 95 | EmptyAsciiEscape => write!(f, "Empty escape sequence"), |
90 | InvalidAsciiEscape => write!(f, "Invalid escape sequence"), | 96 | InvalidAsciiEscape => write!(f, "Invalid escape sequence"), |
91 | EmptyChar => write!(f, "Empty char literal"), | 97 | EmptyChar => write!(f, "Empty char literal"), |