aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbors[bot] <bors[bot]@users.noreply.github.com>2018-11-10 14:36:45 +0000
committerbors[bot] <bors[bot]@users.noreply.github.com>2018-11-10 14:36:45 +0000
commit477de790b0211196256a772befe4f577d1a8ba14 (patch)
tree42c40e9201adf64d1c06bc1c69524f5688ee6e9f
parent5a9150df9bcdaf5faed5b500c22333f1f7c99f32 (diff)
parent3b4c02c19e4af645fd37e8bff774b05d546dc0b6 (diff)
Merge #222
222: Validate string literals r=aochagavia a=aochagavia Related: #6 (some validators are still missing), fixes #27 Co-authored-by: Adolfo Ochagavía <[email protected]>
-rw-r--r--crates/ra_syntax/src/ast/generated.rs37
-rw-r--r--crates/ra_syntax/src/ast/mod.rs9
-rw-r--r--crates/ra_syntax/src/grammar.ron1
-rw-r--r--crates/ra_syntax/src/string_lexing.rs (renamed from crates/ra_syntax/src/string_lexing/mod.rs)113
-rw-r--r--crates/ra_syntax/src/validation.rs271
-rw-r--r--crates/ra_syntax/src/validation/char.rs270
-rw-r--r--crates/ra_syntax/src/validation/mod.rs20
-rw-r--r--crates/ra_syntax/src/validation/string.rs168
-rw-r--r--crates/ra_syntax/src/yellow/syntax_error.rs6
9 files changed, 621 insertions, 274 deletions
diff --git a/crates/ra_syntax/src/ast/generated.rs b/crates/ra_syntax/src/ast/generated.rs
index 5b5f71ee7..2e9ae263a 100644
--- a/crates/ra_syntax/src/ast/generated.rs
+++ b/crates/ra_syntax/src/ast/generated.rs
@@ -3236,6 +3236,43 @@ impl<'a> AstNode<'a> for Stmt<'a> {
3236 3236
3237impl<'a> Stmt<'a> {} 3237impl<'a> Stmt<'a> {}
3238 3238
3239// String
3240#[derive(Debug, Clone, Copy,)]
3241pub struct StringNode<R: TreeRoot<RaTypes> = OwnedRoot> {
3242 pub(crate) syntax: SyntaxNode<R>,
3243}
3244pub type String<'a> = StringNode<RefRoot<'a>>;
3245
3246impl<R1: TreeRoot<RaTypes>, R2: TreeRoot<RaTypes>> PartialEq<StringNode<R1>> for StringNode<R2> {
3247 fn eq(&self, other: &StringNode<R1>) -> bool { self.syntax == other.syntax }
3248}
3249impl<R: TreeRoot<RaTypes>> Eq for StringNode<R> {}
3250impl<R: TreeRoot<RaTypes>> Hash for StringNode<R> {
3251 fn hash<H: Hasher>(&self, state: &mut H) { self.syntax.hash(state) }
3252}
3253
3254impl<'a> AstNode<'a> for String<'a> {
3255 fn cast(syntax: SyntaxNodeRef<'a>) -> Option<Self> {
3256 match syntax.kind() {
3257 STRING => Some(String { syntax }),
3258 _ => None,
3259 }
3260 }
3261 fn syntax(self) -> SyntaxNodeRef<'a> { self.syntax }
3262}
3263
3264impl<R: TreeRoot<RaTypes>> StringNode<R> {
3265 pub fn borrowed(&self) -> String {
3266 StringNode { syntax: self.syntax.borrowed() }
3267 }
3268 pub fn owned(&self) -> StringNode {
3269 StringNode { syntax: self.syntax.owned() }
3270 }
3271}
3272
3273
3274impl<'a> String<'a> {}
3275
3239// StructDef 3276// StructDef
3240#[derive(Debug, Clone, Copy,)] 3277#[derive(Debug, Clone, Copy,)]
3241pub struct StructDefNode<R: TreeRoot<RaTypes> = OwnedRoot> { 3278pub struct StructDefNode<R: TreeRoot<RaTypes> = OwnedRoot> {
diff --git a/crates/ra_syntax/src/ast/mod.rs b/crates/ra_syntax/src/ast/mod.rs
index 6b0d62610..f20714ede 100644
--- a/crates/ra_syntax/src/ast/mod.rs
+++ b/crates/ra_syntax/src/ast/mod.rs
@@ -1,6 +1,7 @@
1mod generated; 1mod generated;
2 2
3use std::marker::PhantomData; 3use std::marker::PhantomData;
4use std::string::String as RustString;
4 5
5use itertools::Itertools; 6use itertools::Itertools;
6 7
@@ -76,7 +77,7 @@ pub trait DocCommentsOwner<'a>: AstNode<'a> {
76 77
77 /// Returns the textual content of a doc comment block as a single string. 78 /// Returns the textual content of a doc comment block as a single string.
78 /// That is, strips leading `///` and joins lines 79 /// That is, strips leading `///` and joins lines
79 fn doc_comment_text(self) -> String { 80 fn doc_comment_text(self) -> RustString {
80 self.doc_comments() 81 self.doc_comments()
81 .map(|comment| { 82 .map(|comment| {
82 let prefix = comment.prefix(); 83 let prefix = comment.prefix();
@@ -133,6 +134,12 @@ impl<'a> Char<'a> {
133 } 134 }
134} 135}
135 136
137impl<'a> String<'a> {
138 pub fn text(&self) -> &SmolStr {
139 &self.syntax().leaf_text().unwrap()
140 }
141}
142
136impl<'a> Comment<'a> { 143impl<'a> Comment<'a> {
137 pub fn text(&self) -> &SmolStr { 144 pub fn text(&self) -> &SmolStr {
138 self.syntax().leaf_text().unwrap() 145 self.syntax().leaf_text().unwrap()
diff --git a/crates/ra_syntax/src/grammar.ron b/crates/ra_syntax/src/grammar.ron
index a92844415..c3184667e 100644
--- a/crates/ra_syntax/src/grammar.ron
+++ b/crates/ra_syntax/src/grammar.ron
@@ -411,6 +411,7 @@ Grammar(
411 "PrefixExpr": (), 411 "PrefixExpr": (),
412 "RangeExpr": (), 412 "RangeExpr": (),
413 "BinExpr": (), 413 "BinExpr": (),
414 "String": (),
414 "Char": (), 415 "Char": (),
415 "Literal": (), 416 "Literal": (),
416 417
diff --git a/crates/ra_syntax/src/string_lexing/mod.rs b/crates/ra_syntax/src/string_lexing.rs
index cc53e0aba..d613bb042 100644
--- a/crates/ra_syntax/src/string_lexing/mod.rs
+++ b/crates/ra_syntax/src/string_lexing.rs
@@ -1,6 +1,68 @@
1use self::CharComponentKind::*; 1use self::CharComponentKind::*;
2use rowan::{TextRange, TextUnit}; 2use rowan::{TextRange, TextUnit};
3 3
4pub fn parse_string_literal(src: &str) -> StringComponentIterator {
5 StringComponentIterator {
6 parser: Parser::new(src),
7 has_closing_quote: false,
8 }
9}
10
11#[derive(Debug, Eq, PartialEq, Clone)]
12pub struct StringComponent {
13 pub range: TextRange,
14 pub kind: StringComponentKind,
15}
16
17impl StringComponent {
18 fn new(range: TextRange, kind: StringComponentKind) -> StringComponent {
19 StringComponent { range, kind }
20 }
21}
22
23#[derive(Debug, Eq, PartialEq, Clone)]
24pub enum StringComponentKind {
25 IgnoreNewline,
26 Char(CharComponentKind),
27}
28
29pub struct StringComponentIterator<'a> {
30 parser: Parser<'a>,
31 pub has_closing_quote: bool,
32}
33
34impl<'a> Iterator for StringComponentIterator<'a> {
35 type Item = StringComponent;
36 fn next(&mut self) -> Option<StringComponent> {
37 if self.parser.pos == 0 {
38 assert!(
39 self.parser.advance() == '"',
40 "string literal should start with double quotes"
41 );
42 }
43
44 if let Some(component) = self.parser.parse_string_component() {
45 return Some(component);
46 }
47
48 // We get here when there are no char components left to parse
49 if self.parser.peek() == Some('"') {
50 self.parser.advance();
51 self.has_closing_quote = true;
52 }
53
54 assert!(
55 self.parser.peek() == None,
56 "string literal should leave no unparsed input: src = {}, pos = {}, length = {}",
57 self.parser.src,
58 self.parser.pos,
59 self.parser.src.len()
60 );
61
62 None
63 }
64}
65
4pub fn parse_char_literal(src: &str) -> CharComponentIterator { 66pub fn parse_char_literal(src: &str) -> CharComponentIterator {
5 CharComponentIterator { 67 CharComponentIterator {
6 parser: Parser::new(src), 68 parser: Parser::new(src),
@@ -93,6 +155,12 @@ impl<'a> Parser<'a> {
93 next 155 next
94 } 156 }
95 157
158 pub fn skip_whitespace(&mut self) {
159 while self.peek().map(|c| c.is_whitespace()) == Some(true) {
160 self.advance();
161 }
162 }
163
96 pub fn get_pos(&self) -> TextUnit { 164 pub fn get_pos(&self) -> TextUnit {
97 (self.pos as u32).into() 165 (self.pos as u32).into()
98 } 166 }
@@ -172,6 +240,51 @@ impl<'a> Parser<'a> {
172 )) 240 ))
173 } 241 }
174 } 242 }
243
244 pub fn parse_ignore_newline(&mut self, start: TextUnit) -> Option<StringComponent> {
245 // In string literals, when a `\` occurs immediately before the newline, the `\`,
246 // the newline, and all whitespace at the beginning of the next line are ignored
247 match self.peek() {
248 Some('\n') | Some('\r') => {
249 self.skip_whitespace();
250 Some(StringComponent::new(
251 TextRange::from_to(start, self.get_pos()),
252 StringComponentKind::IgnoreNewline,
253 ))
254 }
255 _ => None,
256 }
257 }
258
259 pub fn parse_string_component(&mut self) -> Option<StringComponent> {
260 let next = self.peek()?;
261
262 // Ignore string close
263 if next == '"' {
264 return None;
265 }
266
267 let start = self.get_pos();
268 self.advance();
269
270 if next == '\\' {
271 // Strings can use `\` to ignore newlines, so we first try to parse one of those
272 // before falling back to parsing char escapes
273 self.parse_ignore_newline(start).or_else(|| {
274 let char_component = self.parse_escape(start);
275 Some(StringComponent::new(
276 char_component.range,
277 StringComponentKind::Char(char_component.kind),
278 ))
279 })
280 } else {
281 let end = self.get_pos();
282 Some(StringComponent::new(
283 TextRange::from_to(start, end),
284 StringComponentKind::Char(CodePoint),
285 ))
286 }
287 }
175} 288}
176 289
177#[cfg(test)] 290#[cfg(test)]
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs
deleted file mode 100644
index a10b297c0..000000000
--- a/crates/ra_syntax/src/validation.rs
+++ /dev/null
@@ -1,271 +0,0 @@
1use std::u32;
2
3use arrayvec::ArrayString;
4
5use crate::{
6 algo::visit::{visitor_ctx, VisitorCtx},
7 ast::{self, AstNode},
8 SourceFileNode,
9 string_lexing::{self, CharComponentKind},
10 yellow::{
11 SyntaxError,
12 SyntaxErrorKind::*,
13 },
14};
15
16pub(crate) fn validate(file: &SourceFileNode) -> Vec<SyntaxError> {
17 let mut errors = Vec::new();
18 for node in file.syntax().descendants() {
19 let _ = visitor_ctx(&mut errors)
20 .visit::<ast::Char, _>(validate_char)
21 .accept(node);
22 }
23 errors
24}
25
26fn validate_char(node: ast::Char, errors: &mut Vec<SyntaxError>) {
27 let mut components = string_lexing::parse_char_literal(node.text());
28 let mut len = 0;
29 for component in &mut components {
30 len += 1;
31
32 // Validate escapes
33 let text = &node.text()[component.range];
34 let range = component.range + node.syntax().range().start();
35 use self::CharComponentKind::*;
36 match component.kind {
37 AsciiEscape => {
38 if text.len() == 1 {
39 // Escape sequence consists only of leading `\`
40 errors.push(SyntaxError::new(EmptyAsciiEscape, range));
41 } else {
42 let escape_code = text.chars().skip(1).next().unwrap();
43 if !is_ascii_escape(escape_code) {
44 errors.push(SyntaxError::new(InvalidAsciiEscape, range));
45 }
46 }
47 }
48 AsciiCodeEscape => {
49 // An AsciiCodeEscape has 4 chars, example: `\xDD`
50 if text.len() < 4 {
51 errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
52 } else {
53 assert!(
54 text.chars().count() == 4,
55 "AsciiCodeEscape cannot be longer than 4 chars"
56 );
57
58 match u8::from_str_radix(&text[2..], 16) {
59 Ok(code) if code < 128 => { /* Escape code is valid */ }
60 Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
61 Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
62 }
63 }
64 }
65 UnicodeEscape => {
66 assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
67
68 if text.len() == 2 {
69 // No starting `{`
70 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
71 return;
72 }
73
74 if text.len() == 3 {
75 // Only starting `{`
76 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
77 return;
78 }
79
80 let mut code = ArrayString::<[_; 6]>::new();
81 let mut closed = false;
82 for c in text[3..].chars() {
83 assert!(!closed, "no characters after escape is closed");
84
85 if c.is_digit(16) {
86 if code.len() == 6 {
87 errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
88 return;
89 }
90
91 code.push(c);
92 } else if c == '_' {
93 // Reject leading _
94 if code.len() == 0 {
95 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
96 return;
97 }
98 } else if c == '}' {
99 closed = true;
100 } else {
101 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
102 return;
103 }
104 }
105
106 if !closed {
107 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
108 }
109
110 if code.len() == 0 {
111 errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
112 return;
113 }
114
115 match u32::from_str_radix(&code, 16) {
116 Ok(code_u32) if code_u32 > 0x10FFFF => {
117 errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
118 }
119 Ok(_) => {
120 // Valid escape code
121 }
122 Err(_) => {
123 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
124 }
125 }
126 }
127 CodePoint => {
128 // These code points must always be escaped
129 if text == "\t" || text == "\r" {
130 errors.push(SyntaxError::new(UnescapedCodepoint, range));
131 }
132 }
133 }
134 }
135
136 if !components.has_closing_quote {
137 errors.push(SyntaxError::new(UnclosedChar, node.syntax().range()));
138 }
139
140 if len == 0 {
141 errors.push(SyntaxError::new(EmptyChar, node.syntax().range()));
142 }
143
144 if len > 1 {
145 errors.push(SyntaxError::new(LongChar, node.syntax().range()));
146 }
147}
148
149fn is_ascii_escape(code: char) -> bool {
150 match code {
151 '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
152 _ => false,
153 }
154}
155
156#[cfg(test)]
157mod test {
158 use crate::SourceFileNode;
159
160 fn build_file(literal: &str) -> SourceFileNode {
161 let src = format!("const C: char = '{}';", literal);
162 SourceFileNode::parse(&src)
163 }
164
165 fn assert_valid_char(literal: &str) {
166 let file = build_file(literal);
167 assert!(
168 file.errors().len() == 0,
169 "Errors for literal '{}': {:?}",
170 literal,
171 file.errors()
172 );
173 }
174
175 fn assert_invalid_char(literal: &str) {
176 let file = build_file(literal);
177 assert!(file.errors().len() > 0);
178 }
179
180 #[test]
181 fn test_ansi_codepoints() {
182 for byte in 0..=255u8 {
183 match byte {
184 b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
185 b'\'' | b'\\' => { /* Ignore character close and backslash */ }
186 _ => assert_valid_char(&(byte as char).to_string()),
187 }
188 }
189 }
190
191 #[test]
192 fn test_unicode_codepoints() {
193 let valid = ["Ƒ", "バ", "メ", "﷽"];
194 for c in &valid {
195 assert_valid_char(c);
196 }
197 }
198
199 #[test]
200 fn test_unicode_multiple_codepoints() {
201 let invalid = ["नी", "👨‍👨‍"];
202 for c in &invalid {
203 assert_invalid_char(c);
204 }
205 }
206
207 #[test]
208 fn test_valid_ascii_escape() {
209 let valid = [
210 r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b",
211 ];
212 for c in &valid {
213 assert_valid_char(c);
214 }
215 }
216
217 #[test]
218 fn test_invalid_ascii_escape() {
219 let invalid = [r"\a", r"\?", r"\"];
220 for c in &invalid {
221 assert_invalid_char(c);
222 }
223 }
224
225 #[test]
226 fn test_valid_ascii_code_escape() {
227 let valid = [r"\x00", r"\x7F", r"\x55"];
228 for c in &valid {
229 assert_valid_char(c);
230 }
231 }
232
233 #[test]
234 fn test_invalid_ascii_code_escape() {
235 let invalid = [r"\x", r"\x7", r"\xF0"];
236 for c in &invalid {
237 assert_invalid_char(c);
238 }
239 }
240
241 #[test]
242 fn test_valid_unicode_escape() {
243 let valid = [
244 r"\u{FF}",
245 r"\u{0}",
246 r"\u{F}",
247 r"\u{10FFFF}",
248 r"\u{1_0__FF___FF_____}",
249 ];
250 for c in &valid {
251 assert_valid_char(c);
252 }
253 }
254
255 #[test]
256 fn test_invalid_unicode_escape() {
257 let invalid = [
258 r"\u",
259 r"\u{}",
260 r"\u{",
261 r"\u{FF",
262 r"\u{FFFFFF}",
263 r"\u{_F}",
264 r"\u{00FFFFF}",
265 r"\u{110000}",
266 ];
267 for c in &invalid {
268 assert_invalid_char(c);
269 }
270 }
271}
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
new file mode 100644
index 000000000..63f9bad24
--- /dev/null
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -0,0 +1,270 @@
1use std::u32;
2
3use arrayvec::ArrayString;
4
5use crate::{
6 ast::{self, AstNode},
7 string_lexing::{self, CharComponentKind},
8 TextRange,
9 yellow::{
10 SyntaxError,
11 SyntaxErrorKind::*,
12 },
13};
14
15pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
16 let literal_text = node.text();
17 let literal_range = node.syntax().range();
18 let mut components = string_lexing::parse_char_literal(literal_text);
19 let mut len = 0;
20 for component in &mut components {
21 len += 1;
22 let text = &literal_text[component.range];
23 let range = component.range + literal_range.start();
24 validate_char_component(text, component.kind, range, errors);
25 }
26
27 if !components.has_closing_quote {
28 errors.push(SyntaxError::new(UnclosedChar, literal_range));
29 }
30
31 if len == 0 {
32 errors.push(SyntaxError::new(EmptyChar, literal_range));
33 }
34
35 if len > 1 {
36 errors.push(SyntaxError::new(OverlongChar, literal_range));
37 }
38}
39
40pub(crate) fn validate_char_component(
41 text: &str,
42 kind: CharComponentKind,
43 range: TextRange,
44 errors: &mut Vec<SyntaxError>,
45) {
46 // Validate escapes
47 use self::CharComponentKind::*;
48 match kind {
49 AsciiEscape => {
50 if text.len() == 1 {
51 // Escape sequence consists only of leading `\`
52 errors.push(SyntaxError::new(EmptyAsciiEscape, range));
53 } else {
54 let escape_code = text.chars().skip(1).next().unwrap();
55 if !is_ascii_escape(escape_code) {
56 errors.push(SyntaxError::new(InvalidAsciiEscape, range));
57 }
58 }
59 }
60 AsciiCodeEscape => {
61 // An AsciiCodeEscape has 4 chars, example: `\xDD`
62 if text.len() < 4 {
63 errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
64 } else {
65 assert!(
66 text.chars().count() == 4,
67 "AsciiCodeEscape cannot be longer than 4 chars"
68 );
69
70 match u8::from_str_radix(&text[2..], 16) {
71 Ok(code) if code < 128 => { /* Escape code is valid */ }
72 Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
73 Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
74 }
75 }
76 }
77 UnicodeEscape => {
78 assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
79
80 if text.len() == 2 {
81 // No starting `{`
82 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
83 return;
84 }
85
86 if text.len() == 3 {
87 // Only starting `{`
88 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
89 return;
90 }
91
92 let mut code = ArrayString::<[_; 6]>::new();
93 let mut closed = false;
94 for c in text[3..].chars() {
95 assert!(!closed, "no characters after escape is closed");
96
97 if c.is_digit(16) {
98 if code.len() == 6 {
99 errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
100 return;
101 }
102
103 code.push(c);
104 } else if c == '_' {
105 // Reject leading _
106 if code.len() == 0 {
107 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
108 return;
109 }
110 } else if c == '}' {
111 closed = true;
112 } else {
113 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
114 return;
115 }
116 }
117
118 if !closed {
119 errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
120 }
121
122 if code.len() == 0 {
123 errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
124 return;
125 }
126
127 match u32::from_str_radix(&code, 16) {
128 Ok(code_u32) if code_u32 > 0x10FFFF => {
129 errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
130 }
131 Ok(_) => {
132 // Valid escape code
133 }
134 Err(_) => {
135 errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
136 }
137 }
138 }
139 CodePoint => {
140 // These code points must always be escaped
141 if text == "\t" || text == "\r" {
142 errors.push(SyntaxError::new(UnescapedCodepoint, range));
143 }
144 }
145 }
146}
147
148fn is_ascii_escape(code: char) -> bool {
149 match code {
150 '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
151 _ => false,
152 }
153}
154
155#[cfg(test)]
156mod test {
157 use crate::SourceFileNode;
158
159 fn build_file(literal: &str) -> SourceFileNode {
160 let src = format!("const C: char = '{}';", literal);
161 SourceFileNode::parse(&src)
162 }
163
164 fn assert_valid_char(literal: &str) {
165 let file = build_file(literal);
166 assert!(
167 file.errors().len() == 0,
168 "Errors for literal '{}': {:?}",
169 literal,
170 file.errors()
171 );
172 }
173
174 fn assert_invalid_char(literal: &str) {
175 let file = build_file(literal);
176 assert!(file.errors().len() > 0);
177 }
178
179 #[test]
180 fn test_ansi_codepoints() {
181 for byte in 0..=255u8 {
182 match byte {
183 b'\n' | b'\r' | b'\t' => assert_invalid_char(&(byte as char).to_string()),
184 b'\'' | b'\\' => { /* Ignore character close and backslash */ }
185 _ => assert_valid_char(&(byte as char).to_string()),
186 }
187 }
188 }
189
190 #[test]
191 fn test_unicode_codepoints() {
192 let valid = ["Ƒ", "バ", "メ", "﷽"];
193 for c in &valid {
194 assert_valid_char(c);
195 }
196 }
197
198 #[test]
199 fn test_unicode_multiple_codepoints() {
200 let invalid = ["नी", "👨‍👨‍"];
201 for c in &invalid {
202 assert_invalid_char(c);
203 }
204 }
205
206 #[test]
207 fn test_valid_ascii_escape() {
208 let valid = [
209 r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b",
210 ];
211 for c in &valid {
212 assert_valid_char(c);
213 }
214 }
215
216 #[test]
217 fn test_invalid_ascii_escape() {
218 let invalid = [r"\a", r"\?", r"\"];
219 for c in &invalid {
220 assert_invalid_char(c);
221 }
222 }
223
224 #[test]
225 fn test_valid_ascii_code_escape() {
226 let valid = [r"\x00", r"\x7F", r"\x55"];
227 for c in &valid {
228 assert_valid_char(c);
229 }
230 }
231
232 #[test]
233 fn test_invalid_ascii_code_escape() {
234 let invalid = [r"\x", r"\x7", r"\xF0"];
235 for c in &invalid {
236 assert_invalid_char(c);
237 }
238 }
239
240 #[test]
241 fn test_valid_unicode_escape() {
242 let valid = [
243 r"\u{FF}",
244 r"\u{0}",
245 r"\u{F}",
246 r"\u{10FFFF}",
247 r"\u{1_0__FF___FF_____}",
248 ];
249 for c in &valid {
250 assert_valid_char(c);
251 }
252 }
253
254 #[test]
255 fn test_invalid_unicode_escape() {
256 let invalid = [
257 r"\u",
258 r"\u{}",
259 r"\u{",
260 r"\u{FF",
261 r"\u{FFFFFF}",
262 r"\u{_F}",
263 r"\u{00FFFFF}",
264 r"\u{110000}",
265 ];
266 for c in &invalid {
267 assert_invalid_char(c);
268 }
269 }
270}
diff --git a/crates/ra_syntax/src/validation/mod.rs b/crates/ra_syntax/src/validation/mod.rs
new file mode 100644
index 000000000..2ff0bc26d
--- /dev/null
+++ b/crates/ra_syntax/src/validation/mod.rs
@@ -0,0 +1,20 @@
1use crate::{
2 algo::visit::{visitor_ctx, VisitorCtx},
3 ast,
4 SourceFileNode,
5 yellow::SyntaxError,
6};
7
8mod char;
9mod string;
10
11pub(crate) fn validate(file: &SourceFileNode) -> Vec<SyntaxError> {
12 let mut errors = Vec::new();
13 for node in file.syntax().descendants() {
14 let _ = visitor_ctx(&mut errors)
15 .visit::<ast::Char, _>(self::char::validate_char_node)
16 .visit::<ast::String, _>(self::string::validate_string_node)
17 .accept(node);
18 }
19 errors
20}
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs
new file mode 100644
index 000000000..089879d15
--- /dev/null
+++ b/crates/ra_syntax/src/validation/string.rs
@@ -0,0 +1,168 @@
1use crate::{
2 ast::{self, AstNode},
3 string_lexing::{self, StringComponentKind},
4 yellow::{
5 SyntaxError,
6 SyntaxErrorKind::*,
7 },
8};
9
10use super::char;
11
12pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxError>) {
13 let literal_text = node.text();
14 let literal_range = node.syntax().range();
15 let mut components = string_lexing::parse_string_literal(literal_text);
16 for component in &mut components {
17 let range = component.range + literal_range.start();
18
19 match component.kind {
20 StringComponentKind::Char(kind) => {
21 // Chars must escape \t, \n and \r codepoints, but strings don't
22 let text = &literal_text[component.range];
23 match text {
24 "\t" | "\n" | "\r" => { /* always valid */ }
25 _ => char::validate_char_component(text, kind, range, errors),
26 }
27 }
28 StringComponentKind::IgnoreNewline => { /* always valid */ }
29 }
30 }
31
32 if !components.has_closing_quote {
33 errors.push(SyntaxError::new(UnclosedString, literal_range));
34 }
35}
36
37#[cfg(test)]
38mod test {
39 use crate::SourceFileNode;
40
41 fn build_file(literal: &str) -> SourceFileNode {
42 let src = format!(r#"const S: &'static str = "{}";"#, literal);
43 println!("Source: {}", src);
44 SourceFileNode::parse(&src)
45 }
46
47 fn assert_valid_str(literal: &str) {
48 let file = build_file(literal);
49 assert!(
50 file.errors().len() == 0,
51 "Errors for literal '{}': {:?}",
52 literal,
53 file.errors()
54 );
55 }
56
57 fn assert_invalid_str(literal: &str) {
58 let file = build_file(literal);
59 assert!(file.errors().len() > 0);
60 }
61
62 #[test]
63 fn test_ansi_codepoints() {
64 for byte in 0..=255u8 {
65 match byte {
66 b'\"' | b'\\' => { /* Ignore string close and backslash */ }
67 _ => assert_valid_str(&(byte as char).to_string()),
68 }
69 }
70 }
71
72 #[test]
73 fn test_unicode_codepoints() {
74 let valid = ["Ƒ", "バ", "メ", "﷽"];
75 for c in &valid {
76 assert_valid_str(c);
77 }
78 }
79
80 #[test]
81 fn test_unicode_multiple_codepoints() {
82 let valid = ["नी", "👨‍👨‍"];
83 for c in &valid {
84 assert_valid_str(c);
85 }
86 }
87
88 #[test]
89 fn test_valid_ascii_escape() {
90 let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
91 for c in &valid {
92 assert_valid_str(c);
93 }
94 }
95
96 #[test]
97 fn test_invalid_ascii_escape() {
98 let invalid = [r"\a", r"\?", r"\"];
99 for c in &invalid {
100 assert_invalid_str(c);
101 }
102 }
103
104 #[test]
105 fn test_valid_ascii_code_escape() {
106 let valid = [r"\x00", r"\x7F", r"\x55"];
107 for c in &valid {
108 assert_valid_str(c);
109 }
110 }
111
112 #[test]
113 fn test_invalid_ascii_code_escape() {
114 let invalid = [r"\x", r"\x7", r"\xF0"];
115 for c in &invalid {
116 assert_invalid_str(c);
117 }
118 }
119
120 #[test]
121 fn test_valid_unicode_escape() {
122 let valid = [
123 r"\u{FF}",
124 r"\u{0}",
125 r"\u{F}",
126 r"\u{10FFFF}",
127 r"\u{1_0__FF___FF_____}",
128 ];
129 for c in &valid {
130 assert_valid_str(c);
131 }
132 }
133
134 #[test]
135 fn test_invalid_unicode_escape() {
136 let invalid = [
137 r"\u",
138 r"\u{}",
139 r"\u{",
140 r"\u{FF",
141 r"\u{FFFFFF}",
142 r"\u{_F}",
143 r"\u{00FFFFF}",
144 r"\u{110000}",
145 ];
146 for c in &invalid {
147 assert_invalid_str(c);
148 }
149 }
150
151 #[test]
152 fn test_mixed() {
153 assert_valid_str(
154 r"This is the tale of a string
155with a newline in between, some emoji (👨‍👨‍) here and there,
156unicode escapes like this: \u{1FFBB} and weird stuff like
157this ﷽",
158 );
159 }
160
161 #[test]
162 fn test_ignore_newline() {
163 assert_valid_str(
164 "Hello \
165 World",
166 );
167 }
168}
diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs
index c524adf39..cf7b1d495 100644
--- a/crates/ra_syntax/src/yellow/syntax_error.rs
+++ b/crates/ra_syntax/src/yellow/syntax_error.rs
@@ -71,7 +71,7 @@ pub enum SyntaxErrorKind {
71 UnescapedCodepoint, 71 UnescapedCodepoint,
72 EmptyChar, 72 EmptyChar,
73 UnclosedChar, 73 UnclosedChar,
74 LongChar, 74 OverlongChar,
75 EmptyAsciiEscape, 75 EmptyAsciiEscape,
76 InvalidAsciiEscape, 76 InvalidAsciiEscape,
77 TooShortAsciiCodeEscape, 77 TooShortAsciiCodeEscape,
@@ -82,6 +82,7 @@ pub enum SyntaxErrorKind {
82 EmptyUnicodeEcape, 82 EmptyUnicodeEcape,
83 OverlongUnicodeEscape, 83 OverlongUnicodeEscape,
84 UnicodeEscapeOutOfRange, 84 UnicodeEscapeOutOfRange,
85 UnclosedString,
85} 86}
86 87
87#[derive(Debug, Clone, PartialEq, Eq, Hash)] 88#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -96,7 +97,7 @@ impl fmt::Display for SyntaxErrorKind {
96 InvalidAsciiEscape => write!(f, "Invalid escape sequence"), 97 InvalidAsciiEscape => write!(f, "Invalid escape sequence"),
97 EmptyChar => write!(f, "Empty char literal"), 98 EmptyChar => write!(f, "Empty char literal"),
98 UnclosedChar => write!(f, "Unclosed char literal"), 99 UnclosedChar => write!(f, "Unclosed char literal"),
99 LongChar => write!(f, "Char literal should be one character long"), 100 OverlongChar => write!(f, "Char literal should be one character long"),
100 TooShortAsciiCodeEscape => write!(f, "Escape sequence should have two digits"), 101 TooShortAsciiCodeEscape => write!(f, "Escape sequence should have two digits"),
101 AsciiCodeEscapeOutOfRange => { 102 AsciiCodeEscapeOutOfRange => {
102 write!(f, "Escape sequence should be between \\x00 and \\x7F") 103 write!(f, "Escape sequence should be between \\x00 and \\x7F")
@@ -109,6 +110,7 @@ impl fmt::Display for SyntaxErrorKind {
109 write!(f, "Unicode escape sequence should have at most 6 digits") 110 write!(f, "Unicode escape sequence should have at most 6 digits")
110 } 111 }
111 UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"), 112 UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"),
113 UnclosedString => write!(f, "Unclosed string literal"),
112 ParseError(msg) => write!(f, "{}", msg.0), 114 ParseError(msg) => write!(f, "{}", msg.0),
113 } 115 }
114 } 116 }