diff options
-rw-r--r-- | crates/ra_syntax/src/string_lexing/mod.rs | 2 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation.rs | 83 | ||||
-rw-r--r-- | crates/ra_syntax/src/yellow/syntax_error.rs | 16 |
3 files changed, 93 insertions, 8 deletions
diff --git a/crates/ra_syntax/src/string_lexing/mod.rs b/crates/ra_syntax/src/string_lexing/mod.rs index f0812ff28..cc53e0aba 100644 --- a/crates/ra_syntax/src/string_lexing/mod.rs +++ b/crates/ra_syntax/src/string_lexing/mod.rs | |||
@@ -219,7 +219,7 @@ mod tests { | |||
219 | 219 | ||
220 | #[test] | 220 | #[test] |
221 | fn test_unicode_escapes() { | 221 | fn test_unicode_escapes() { |
222 | let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", ""]; | 222 | let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; |
223 | for escape in unicode_escapes { | 223 | for escape in unicode_escapes { |
224 | let escape_sequence = format!(r"'\u{}'", escape); | 224 | let escape_sequence = format!(r"'\u{}'", escape); |
225 | let component = closed_char_component(&escape_sequence); | 225 | let component = closed_char_component(&escape_sequence); |
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs index 009f5052f..a550ce0ab 100644 --- a/crates/ra_syntax/src/validation.rs +++ b/crates/ra_syntax/src/validation.rs | |||
@@ -1,3 +1,5 @@ | |||
1 | use std::u32; | ||
2 | |||
1 | use crate::{ | 3 | use crate::{ |
2 | algo::visit::{visitor_ctx, VisitorCtx}, | 4 | algo::visit::{visitor_ctx, VisitorCtx}, |
3 | ast::{self, AstNode}, | 5 | ast::{self, AstNode}, |
@@ -42,15 +44,82 @@ fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) { | |||
42 | } | 44 | } |
43 | } | 45 | } |
44 | AsciiCodeEscape => { | 46 | AsciiCodeEscape => { |
45 | // TODO: | 47 | // An AsciiCodeEscape has 4 chars, example: `\xDD` |
46 | // * First digit is octal | 48 | if text.len() < 4 { |
47 | // * Second digit is hex | 49 | errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range)); |
50 | } else { | ||
51 | assert!(text.chars().count() == 4, "AsciiCodeEscape cannot be longer than 4 chars"); | ||
52 | |||
53 | match u8::from_str_radix(&text[2..], 16) { | ||
54 | Ok(code) if code < 128 => { /* Escape code is valid */ }, | ||
55 | Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)), | ||
56 | Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)), | ||
57 | } | ||
58 | |||
59 | } | ||
48 | } | 60 | } |
49 | UnicodeEscape => { | 61 | UnicodeEscape => { |
50 | // TODO: | 62 | assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u"); |
51 | // * Only hex digits or underscores allowed | 63 | |
52 | // * Max 6 chars | 64 | if text.len() == 2 { |
53 | // * Within allowed range (must be at most 10FFFF) | 65 | // No starting `{` |
66 | errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); | ||
67 | return; | ||
68 | } | ||
69 | |||
70 | if text.len() == 3 { | ||
71 | // Only starting `{` | ||
72 | errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)); | ||
73 | return; | ||
74 | } | ||
75 | |||
76 | let mut code = String::new(); | ||
77 | let mut closed = false; | ||
78 | for c in text[3..].chars() { | ||
79 | assert!(!closed, "no characters after escape is closed"); | ||
80 | |||
81 | if c.is_digit(16) { | ||
82 | code.push(c); | ||
83 | } else if c == '_' { | ||
84 | // Reject leading _ | ||
85 | if code.len() == 0 { | ||
86 | errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); | ||
87 | return; | ||
88 | } | ||
89 | } else if c == '}' { | ||
90 | closed = true; | ||
91 | } else { | ||
92 | errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); | ||
93 | return; | ||
94 | } | ||
95 | } | ||
96 | |||
97 | if !closed { | ||
98 | errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)) | ||
99 | } | ||
100 | |||
101 | if code.len() == 0 { | ||
102 | errors.push(SyntaxError::new(EmptyUnicodeEcape, range)); | ||
103 | return; | ||
104 | } | ||
105 | |||
106 | if code.len() > 6 { | ||
107 | errors.push(SyntaxError::new(OverlongUnicodeEscape, range)); | ||
108 | } | ||
109 | |||
110 | match u32::from_str_radix(&code, 16) { | ||
111 | Ok(code_u32) if code_u32 > 0x10FFFF => { | ||
112 | errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range)); | ||
113 | } | ||
114 | Ok(_) => { | ||
115 | // Valid escape code | ||
116 | } | ||
117 | Err(_) => { | ||
118 | errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); | ||
119 | } | ||
120 | } | ||
121 | |||
122 | // FIXME: we really need tests for this | ||
54 | } | 123 | } |
55 | // Code points are always valid | 124 | // Code points are always valid |
56 | CodePoint => (), | 125 | CodePoint => (), |
diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs index f3df6bc15..9aed9e81e 100644 --- a/crates/ra_syntax/src/yellow/syntax_error.rs +++ b/crates/ra_syntax/src/yellow/syntax_error.rs | |||
@@ -69,6 +69,14 @@ pub enum SyntaxErrorKind { | |||
69 | LongChar, | 69 | LongChar, |
70 | EmptyAsciiEscape, | 70 | EmptyAsciiEscape, |
71 | InvalidAsciiEscape, | 71 | InvalidAsciiEscape, |
72 | TooShortAsciiCodeEscape, | ||
73 | AsciiCodeEscapeOutOfRange, | ||
74 | MalformedAsciiCodeEscape, | ||
75 | UnclosedUnicodeEscape, | ||
76 | MalformedUnicodeEscape, | ||
77 | EmptyUnicodeEcape, | ||
78 | OverlongUnicodeEscape, | ||
79 | UnicodeEscapeOutOfRange, | ||
72 | } | 80 | } |
73 | 81 | ||
74 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | 82 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] |
@@ -83,6 +91,14 @@ impl fmt::Display for SyntaxErrorKind { | |||
83 | EmptyChar => write!(f, "Empty char literal"), | 91 | EmptyChar => write!(f, "Empty char literal"), |
84 | UnclosedChar => write!(f, "Unclosed char literal"), | 92 | UnclosedChar => write!(f, "Unclosed char literal"), |
85 | LongChar => write!(f, "Char literal should be one character long"), | 93 | LongChar => write!(f, "Char literal should be one character long"), |
94 | TooShortAsciiCodeEscape => write!(f, "Escape sequence should have two digits"), | ||
95 | AsciiCodeEscapeOutOfRange => write!(f, "Escape sequence should be between \\x00 and \\x7F"), | ||
96 | MalformedAsciiCodeEscape => write!(f, "Escape sequence should be a hexadecimal number"), | ||
97 | UnclosedUnicodeEscape => write!(f, "Missing `}}`"), | ||
98 | MalformedUnicodeEscape => write!(f, "Malformed unicode escape sequence"), | ||
99 | EmptyUnicodeEcape => write!(f, "Empty unicode escape sequence"), | ||
100 | OverlongUnicodeEscape => write!(f, "Unicode escape sequence should have at most 6 digits"), | ||
101 | UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"), | ||
86 | ParseError(msg) => write!(f, "{}", msg.0), | 102 | ParseError(msg) => write!(f, "{}", msg.0), |
87 | } | 103 | } |
88 | } | 104 | } |