diff options
Diffstat (limited to 'crates/ra_syntax/src/validation/string.rs')
-rw-r--r-- | crates/ra_syntax/src/validation/string.rs | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs new file mode 100644 index 000000000..089879d15 --- /dev/null +++ b/crates/ra_syntax/src/validation/string.rs | |||
@@ -0,0 +1,168 @@ | |||
1 | use crate::{ | ||
2 | ast::{self, AstNode}, | ||
3 | string_lexing::{self, StringComponentKind}, | ||
4 | yellow::{ | ||
5 | SyntaxError, | ||
6 | SyntaxErrorKind::*, | ||
7 | }, | ||
8 | }; | ||
9 | |||
10 | use super::char; | ||
11 | |||
12 | pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxError>) { | ||
13 | let literal_text = node.text(); | ||
14 | let literal_range = node.syntax().range(); | ||
15 | let mut components = string_lexing::parse_string_literal(literal_text); | ||
16 | for component in &mut components { | ||
17 | let range = component.range + literal_range.start(); | ||
18 | |||
19 | match component.kind { | ||
20 | StringComponentKind::Char(kind) => { | ||
21 | // Chars must escape \t, \n and \r codepoints, but strings don't | ||
22 | let text = &literal_text[component.range]; | ||
23 | match text { | ||
24 | "\t" | "\n" | "\r" => { /* always valid */ } | ||
25 | _ => char::validate_char_component(text, kind, range, errors), | ||
26 | } | ||
27 | } | ||
28 | StringComponentKind::IgnoreNewline => { /* always valid */ } | ||
29 | } | ||
30 | } | ||
31 | |||
32 | if !components.has_closing_quote { | ||
33 | errors.push(SyntaxError::new(UnclosedString, literal_range)); | ||
34 | } | ||
35 | } | ||
36 | |||
37 | #[cfg(test)] | ||
38 | mod test { | ||
39 | use crate::SourceFileNode; | ||
40 | |||
41 | fn build_file(literal: &str) -> SourceFileNode { | ||
42 | let src = format!(r#"const S: &'static str = "{}";"#, literal); | ||
43 | println!("Source: {}", src); | ||
44 | SourceFileNode::parse(&src) | ||
45 | } | ||
46 | |||
47 | fn assert_valid_str(literal: &str) { | ||
48 | let file = build_file(literal); | ||
49 | assert!( | ||
50 | file.errors().len() == 0, | ||
51 | "Errors for literal '{}': {:?}", | ||
52 | literal, | ||
53 | file.errors() | ||
54 | ); | ||
55 | } | ||
56 | |||
57 | fn assert_invalid_str(literal: &str) { | ||
58 | let file = build_file(literal); | ||
59 | assert!(file.errors().len() > 0); | ||
60 | } | ||
61 | |||
62 | #[test] | ||
63 | fn test_ansi_codepoints() { | ||
64 | for byte in 0..=255u8 { | ||
65 | match byte { | ||
66 | b'\"' | b'\\' => { /* Ignore string close and backslash */ } | ||
67 | _ => assert_valid_str(&(byte as char).to_string()), | ||
68 | } | ||
69 | } | ||
70 | } | ||
71 | |||
72 | #[test] | ||
73 | fn test_unicode_codepoints() { | ||
74 | let valid = ["Ƒ", "バ", "メ", "﷽"]; | ||
75 | for c in &valid { | ||
76 | assert_valid_str(c); | ||
77 | } | ||
78 | } | ||
79 | |||
80 | #[test] | ||
81 | fn test_unicode_multiple_codepoints() { | ||
82 | let valid = ["नी", "👨👨"]; | ||
83 | for c in &valid { | ||
84 | assert_valid_str(c); | ||
85 | } | ||
86 | } | ||
87 | |||
88 | #[test] | ||
89 | fn test_valid_ascii_escape() { | ||
90 | let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"]; | ||
91 | for c in &valid { | ||
92 | assert_valid_str(c); | ||
93 | } | ||
94 | } | ||
95 | |||
96 | #[test] | ||
97 | fn test_invalid_ascii_escape() { | ||
98 | let invalid = [r"\a", r"\?", r"\"]; | ||
99 | for c in &invalid { | ||
100 | assert_invalid_str(c); | ||
101 | } | ||
102 | } | ||
103 | |||
104 | #[test] | ||
105 | fn test_valid_ascii_code_escape() { | ||
106 | let valid = [r"\x00", r"\x7F", r"\x55"]; | ||
107 | for c in &valid { | ||
108 | assert_valid_str(c); | ||
109 | } | ||
110 | } | ||
111 | |||
112 | #[test] | ||
113 | fn test_invalid_ascii_code_escape() { | ||
114 | let invalid = [r"\x", r"\x7", r"\xF0"]; | ||
115 | for c in &invalid { | ||
116 | assert_invalid_str(c); | ||
117 | } | ||
118 | } | ||
119 | |||
120 | #[test] | ||
121 | fn test_valid_unicode_escape() { | ||
122 | let valid = [ | ||
123 | r"\u{FF}", | ||
124 | r"\u{0}", | ||
125 | r"\u{F}", | ||
126 | r"\u{10FFFF}", | ||
127 | r"\u{1_0__FF___FF_____}", | ||
128 | ]; | ||
129 | for c in &valid { | ||
130 | assert_valid_str(c); | ||
131 | } | ||
132 | } | ||
133 | |||
134 | #[test] | ||
135 | fn test_invalid_unicode_escape() { | ||
136 | let invalid = [ | ||
137 | r"\u", | ||
138 | r"\u{}", | ||
139 | r"\u{", | ||
140 | r"\u{FF", | ||
141 | r"\u{FFFFFF}", | ||
142 | r"\u{_F}", | ||
143 | r"\u{00FFFFF}", | ||
144 | r"\u{110000}", | ||
145 | ]; | ||
146 | for c in &invalid { | ||
147 | assert_invalid_str(c); | ||
148 | } | ||
149 | } | ||
150 | |||
151 | #[test] | ||
152 | fn test_mixed() { | ||
153 | assert_valid_str( | ||
154 | r"This is the tale of a string | ||
155 | with a newline in between, some emoji (👨👨) here and there, | ||
156 | unicode escapes like this: \u{1FFBB} and weird stuff like | ||
157 | this ﷽", | ||
158 | ); | ||
159 | } | ||
160 | |||
161 | #[test] | ||
162 | fn test_ignore_newline() { | ||
163 | assert_valid_str( | ||
164 | "Hello \ | ||
165 | World", | ||
166 | ); | ||
167 | } | ||
168 | } | ||