aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/validation/byte_string.rs
diff options
context:
space:
mode:
authorbors[bot] <bors[bot]@users.noreply.github.com>2019-05-07 17:43:10 +0100
committerbors[bot] <bors[bot]@users.noreply.github.com>2019-05-07 17:43:10 +0100
commitd3efedb752bb2198796603d8a479a5e3ee472a97 (patch)
treeca6a4aee6ad4077a869a932a18c6c8d134406f8c /crates/ra_syntax/src/validation/byte_string.rs
parentef782adc293deb287128f005dbab2038ba3ccdc1 (diff)
parent313314e14b629ebf50389dbd2d440bda922f6ae7 (diff)
Merge #1253
1253: Share literal validation logic with compiler r=matklad a=matklad This is neat: the unescape module is literary what compiler is using right now: https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs So, yeah, code sharing via copy-paste! Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax/src/validation/byte_string.rs')
-rw-r--r--crates/ra_syntax/src/validation/byte_string.rs169
1 files changed, 0 insertions, 169 deletions
diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs
deleted file mode 100644
index 1d48c2d9b..000000000
--- a/crates/ra_syntax/src/validation/byte_string.rs
+++ /dev/null
@@ -1,169 +0,0 @@
1use crate::{
2 string_lexing::{self, StringComponentKind},
3 SyntaxError,
4 SyntaxErrorKind::*,
5 SyntaxToken,
6};
7
8use super::byte;
9
10pub(crate) fn validate_byte_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) {
11 let literal_text = node.text();
12 let literal_range = node.range();
13 let mut components = string_lexing::parse_quoted_literal(Some('b'), '"', literal_text);
14 for component in &mut components {
15 let range = component.range + literal_range.start();
16
17 match component.kind {
18 StringComponentKind::IgnoreNewline => { /* always valid */ }
19 _ => {
20 // Chars must escape \t, \n and \r codepoints, but strings don't
21 let text = &literal_text[component.range];
22 match text {
23 "\t" | "\n" | "\r" => { /* always valid */ }
24 _ => byte::validate_byte_component(text, component.kind, range, errors),
25 }
26 }
27 }
28 }
29
30 if !components.has_closing_quote {
31 errors.push(SyntaxError::new(UnclosedString, literal_range));
32 }
33
34 if let Some(range) = components.suffix {
35 errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start()));
36 }
37}
38
39#[cfg(test)]
40mod test {
41 use crate::{SourceFile, TreeArc};
42
43 fn build_file(literal: &str) -> TreeArc<SourceFile> {
44 let src = format!(r#"const S: &'static [u8] = b"{}";"#, literal);
45 println!("Source: {}", src);
46 SourceFile::parse(&src)
47 }
48
49 fn assert_valid_str(literal: &str) {
50 let file = build_file(literal);
51 assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors());
52 }
53
54 fn assert_invalid_str(literal: &str) {
55 let file = build_file(literal);
56 assert!(file.errors().len() > 0);
57 }
58
59 #[test]
60 fn test_ansi_codepoints() {
61 for byte in 0..128 {
62 match byte {
63 b'\"' | b'\\' => { /* Ignore string close and backslash */ }
64 _ => assert_valid_str(&(byte as char).to_string()),
65 }
66 }
67
68 for byte in 128..=255u8 {
69 assert_invalid_str(&(byte as char).to_string());
70 }
71 }
72
73 #[test]
74 fn test_unicode_codepoints() {
75 let invalid = ["Ƒ", "バ", "メ", "﷽"];
76 for c in &invalid {
77 assert_invalid_str(c);
78 }
79 }
80
81 #[test]
82 fn test_unicode_multiple_codepoints() {
83 let invalid = ["नी", "👨‍👨‍"];
84 for c in &invalid {
85 assert_invalid_str(c);
86 }
87 }
88
89 #[test]
90 fn test_valid_ascii_escape() {
91 let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"];
92 for c in &valid {
93 assert_valid_str(c);
94 }
95 }
96
97 #[test]
98 fn test_invalid_ascii_escape() {
99 let invalid = [r"\a", r"\?", r"\"];
100 for c in &invalid {
101 assert_invalid_str(c);
102 }
103 }
104
105 #[test]
106 fn test_valid_ascii_code_escape() {
107 let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"];
108 for c in &valid {
109 assert_valid_str(c);
110 }
111 }
112
113 #[test]
114 fn test_invalid_ascii_code_escape() {
115 let invalid = [r"\x", r"\x7"];
116 for c in &invalid {
117 assert_invalid_str(c);
118 }
119 }
120
121 #[test]
122 fn test_invalid_unicode_escape() {
123 let well_formed = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"];
124 for c in &well_formed {
125 assert_invalid_str(c);
126 }
127
128 let invalid = [
129 r"\u",
130 r"\u{}",
131 r"\u{",
132 r"\u{FF",
133 r"\u{FFFFFF}",
134 r"\u{_F}",
135 r"\u{00FFFFF}",
136 r"\u{110000}",
137 ];
138 for c in &invalid {
139 assert_invalid_str(c);
140 }
141 }
142
143 #[test]
144 fn test_mixed_invalid() {
145 assert_invalid_str(
146 r"This is the tale of a string
147with a newline in between, some emoji (👨‍👨‍) here and there,
148unicode escapes like this: \u{1FFBB} and weird stuff like
149this ﷽",
150 );
151 }
152
153 #[test]
154 fn test_mixed_valid() {
155 assert_valid_str(
156 r"This is the tale of a string
157with a newline in between, no emoji at all,
158nor unicode escapes or weird stuff",
159 );
160 }
161
162 #[test]
163 fn test_ignore_newline() {
164 assert_valid_str(
165 "Hello \
166 World",
167 );
168 }
169}