aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/validation/string.rs
diff options
context:
space:
mode:
authorbors[bot] <bors[bot]@users.noreply.github.com>2019-05-07 17:43:10 +0100
committerbors[bot] <bors[bot]@users.noreply.github.com>2019-05-07 17:43:10 +0100
commitd3efedb752bb2198796603d8a479a5e3ee472a97 (patch)
treeca6a4aee6ad4077a869a932a18c6c8d134406f8c /crates/ra_syntax/src/validation/string.rs
parentef782adc293deb287128f005dbab2038ba3ccdc1 (diff)
parent313314e14b629ebf50389dbd2d440bda922f6ae7 (diff)
Merge #1253
1253: Share literal validation logic with compiler r=matklad a=matklad This is neat: the unescape module is literary what compiler is using right now: https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs So, yeah, code sharing via copy-paste! Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax/src/validation/string.rs')
-rw-r--r--crates/ra_syntax/src/validation/string.rs154
1 files changed, 0 insertions, 154 deletions
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs
deleted file mode 100644
index fc2f1b992..000000000
--- a/crates/ra_syntax/src/validation/string.rs
+++ /dev/null
@@ -1,154 +0,0 @@
1use crate::{
2 string_lexing,
3 SyntaxError,
4 SyntaxErrorKind::*,
5 SyntaxToken,
6};
7
8use super::char;
9
10pub(crate) fn validate_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) {
11 let literal_text = node.text();
12 let literal_range = node.range();
13 let mut components = string_lexing::parse_quoted_literal(None, '"', literal_text);
14 for component in &mut components {
15 let range = component.range + literal_range.start();
16
17 // Chars must escape \t, \n and \r codepoints, but strings don't
18 let text = &literal_text[component.range];
19 match text {
20 "\t" | "\n" | "\r" => { /* always valid */ }
21 _ => char::validate_char_component(text, component.kind, range, errors),
22 }
23 }
24
25 if !components.has_closing_quote {
26 errors.push(SyntaxError::new(UnclosedString, literal_range));
27 }
28
29 if let Some(range) = components.suffix {
30 errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start()));
31 }
32}
33
34#[cfg(test)]
35mod test {
36 use crate::{SourceFile, TreeArc};
37
38 fn build_file(literal: &str) -> TreeArc<SourceFile> {
39 let src = format!(r#"const S: &'static str = "{}";"#, literal);
40 println!("Source: {}", src);
41 SourceFile::parse(&src)
42 }
43
44 fn assert_valid_str(literal: &str) {
45 let file = build_file(literal);
46 assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
47 }
48
49 fn assert_invalid_str(literal: &str) {
50 let file = build_file(literal);
51 assert!(file.errors().len() > 0);
52 }
53
54 #[test]
55 fn test_ansi_codepoints() {
56 for byte in 0..=255u8 {
57 match byte {
58 b'\"' | b'\\' => { /* Ignore string close and backslash */ }
59 _ => assert_valid_str(&(byte as char).to_string()),
60 }
61 }
62 }
63
64 #[test]
65 fn test_unicode_codepoints() {
66 let valid = ["Ƒ", "バ", "メ", "﷽"];
67 for c in &valid {
68 assert_valid_str(c);
69 }
70 }
71
72 #[test]
73 fn test_unicode_multiple_codepoints() {
74 let valid = ["नी", "👨‍👨‍"];
75 for c in &valid {
76 assert_valid_str(c);
77 }
78 }
79
80 #[test]
81 fn test_valid_ascii_escape() {
82 let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
83 for c in &valid {
84 assert_valid_str(c);
85 }
86 }
87
88 #[test]
89 fn test_invalid_ascii_escape() {
90 let invalid = [r"\a", r"\?", r"\"];
91 for c in &invalid {
92 assert_invalid_str(c);
93 }
94 }
95
96 #[test]
97 fn test_valid_ascii_code_escape() {
98 let valid = [r"\x00", r"\x7F", r"\x55"];
99 for c in &valid {
100 assert_valid_str(c);
101 }
102 }
103
104 #[test]
105 fn test_invalid_ascii_code_escape() {
106 let invalid = [r"\x", r"\x7", r"\xF0"];
107 for c in &invalid {
108 assert_invalid_str(c);
109 }
110 }
111
112 #[test]
113 fn test_valid_unicode_escape() {
114 let valid = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"];
115 for c in &valid {
116 assert_valid_str(c);
117 }
118 }
119
120 #[test]
121 fn test_invalid_unicode_escape() {
122 let invalid = [
123 r"\u",
124 r"\u{}",
125 r"\u{",
126 r"\u{FF",
127 r"\u{FFFFFF}",
128 r"\u{_F}",
129 r"\u{00FFFFF}",
130 r"\u{110000}",
131 ];
132 for c in &invalid {
133 assert_invalid_str(c);
134 }
135 }
136
137 #[test]
138 fn test_mixed() {
139 assert_valid_str(
140 r"This is the tale of a string
141with a newline in between, some emoji (👨‍👨‍) here and there,
142unicode escapes like this: \u{1FFBB} and weird stuff like
143this ﷽",
144 );
145 }
146
147 #[test]
148 fn test_ignore_newline() {
149 assert_valid_str(
150 "Hello \
151 World",
152 );
153 }
154}