diff options
author | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-05-07 17:43:10 +0100 |
---|---|---|
committer | bors[bot] <bors[bot]@users.noreply.github.com> | 2019-05-07 17:43:10 +0100 |
commit | d3efedb752bb2198796603d8a479a5e3ee472a97 (patch) | |
tree | ca6a4aee6ad4077a869a932a18c6c8d134406f8c /crates/ra_syntax/src/validation/byte_string.rs | |
parent | ef782adc293deb287128f005dbab2038ba3ccdc1 (diff) | |
parent | 313314e14b629ebf50389dbd2d440bda922f6ae7 (diff) |
Merge #1253
1253: Share literal validation logic with compiler r=matklad a=matklad
This is neat: the unescape module is literary what compiler is using right now:
https://github.com/rust-lang/rust/blob/c6ac57564852cb6e2d0db60f7b46d9eb98d4b449/src/libsyntax/parse/unescape.rs
So, yeah, code sharing via copy-paste!
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax/src/validation/byte_string.rs')
-rw-r--r-- | crates/ra_syntax/src/validation/byte_string.rs | 169 |
1 files changed, 0 insertions, 169 deletions
diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs deleted file mode 100644 index 1d48c2d9b..000000000 --- a/crates/ra_syntax/src/validation/byte_string.rs +++ /dev/null | |||
@@ -1,169 +0,0 @@ | |||
1 | use crate::{ | ||
2 | string_lexing::{self, StringComponentKind}, | ||
3 | SyntaxError, | ||
4 | SyntaxErrorKind::*, | ||
5 | SyntaxToken, | ||
6 | }; | ||
7 | |||
8 | use super::byte; | ||
9 | |||
10 | pub(crate) fn validate_byte_string_node(node: SyntaxToken, errors: &mut Vec<SyntaxError>) { | ||
11 | let literal_text = node.text(); | ||
12 | let literal_range = node.range(); | ||
13 | let mut components = string_lexing::parse_quoted_literal(Some('b'), '"', literal_text); | ||
14 | for component in &mut components { | ||
15 | let range = component.range + literal_range.start(); | ||
16 | |||
17 | match component.kind { | ||
18 | StringComponentKind::IgnoreNewline => { /* always valid */ } | ||
19 | _ => { | ||
20 | // Chars must escape \t, \n and \r codepoints, but strings don't | ||
21 | let text = &literal_text[component.range]; | ||
22 | match text { | ||
23 | "\t" | "\n" | "\r" => { /* always valid */ } | ||
24 | _ => byte::validate_byte_component(text, component.kind, range, errors), | ||
25 | } | ||
26 | } | ||
27 | } | ||
28 | } | ||
29 | |||
30 | if !components.has_closing_quote { | ||
31 | errors.push(SyntaxError::new(UnclosedString, literal_range)); | ||
32 | } | ||
33 | |||
34 | if let Some(range) = components.suffix { | ||
35 | errors.push(SyntaxError::new(InvalidSuffix, range + literal_range.start())); | ||
36 | } | ||
37 | } | ||
38 | |||
39 | #[cfg(test)] | ||
40 | mod test { | ||
41 | use crate::{SourceFile, TreeArc}; | ||
42 | |||
43 | fn build_file(literal: &str) -> TreeArc<SourceFile> { | ||
44 | let src = format!(r#"const S: &'static [u8] = b"{}";"#, literal); | ||
45 | println!("Source: {}", src); | ||
46 | SourceFile::parse(&src) | ||
47 | } | ||
48 | |||
49 | fn assert_valid_str(literal: &str) { | ||
50 | let file = build_file(literal); | ||
51 | assert!(file.errors().len() == 0, "Errors for literal '{}': {:?}", literal, file.errors()); | ||
52 | } | ||
53 | |||
54 | fn assert_invalid_str(literal: &str) { | ||
55 | let file = build_file(literal); | ||
56 | assert!(file.errors().len() > 0); | ||
57 | } | ||
58 | |||
59 | #[test] | ||
60 | fn test_ansi_codepoints() { | ||
61 | for byte in 0..128 { | ||
62 | match byte { | ||
63 | b'\"' | b'\\' => { /* Ignore string close and backslash */ } | ||
64 | _ => assert_valid_str(&(byte as char).to_string()), | ||
65 | } | ||
66 | } | ||
67 | |||
68 | for byte in 128..=255u8 { | ||
69 | assert_invalid_str(&(byte as char).to_string()); | ||
70 | } | ||
71 | } | ||
72 | |||
73 | #[test] | ||
74 | fn test_unicode_codepoints() { | ||
75 | let invalid = ["Ƒ", "バ", "メ", "﷽"]; | ||
76 | for c in &invalid { | ||
77 | assert_invalid_str(c); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | #[test] | ||
82 | fn test_unicode_multiple_codepoints() { | ||
83 | let invalid = ["नी", "👨👨"]; | ||
84 | for c in &invalid { | ||
85 | assert_invalid_str(c); | ||
86 | } | ||
87 | } | ||
88 | |||
89 | #[test] | ||
90 | fn test_valid_ascii_escape() { | ||
91 | let valid = [r"\'", r#"\""#, r"\\", r"\n", r"\r", r"\t", r"\0", "a", "b"]; | ||
92 | for c in &valid { | ||
93 | assert_valid_str(c); | ||
94 | } | ||
95 | } | ||
96 | |||
97 | #[test] | ||
98 | fn test_invalid_ascii_escape() { | ||
99 | let invalid = [r"\a", r"\?", r"\"]; | ||
100 | for c in &invalid { | ||
101 | assert_invalid_str(c); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | #[test] | ||
106 | fn test_valid_ascii_code_escape() { | ||
107 | let valid = [r"\x00", r"\x7F", r"\x55", r"\xF0"]; | ||
108 | for c in &valid { | ||
109 | assert_valid_str(c); | ||
110 | } | ||
111 | } | ||
112 | |||
113 | #[test] | ||
114 | fn test_invalid_ascii_code_escape() { | ||
115 | let invalid = [r"\x", r"\x7"]; | ||
116 | for c in &invalid { | ||
117 | assert_invalid_str(c); | ||
118 | } | ||
119 | } | ||
120 | |||
121 | #[test] | ||
122 | fn test_invalid_unicode_escape() { | ||
123 | let well_formed = [r"\u{FF}", r"\u{0}", r"\u{F}", r"\u{10FFFF}", r"\u{1_0__FF___FF_____}"]; | ||
124 | for c in &well_formed { | ||
125 | assert_invalid_str(c); | ||
126 | } | ||
127 | |||
128 | let invalid = [ | ||
129 | r"\u", | ||
130 | r"\u{}", | ||
131 | r"\u{", | ||
132 | r"\u{FF", | ||
133 | r"\u{FFFFFF}", | ||
134 | r"\u{_F}", | ||
135 | r"\u{00FFFFF}", | ||
136 | r"\u{110000}", | ||
137 | ]; | ||
138 | for c in &invalid { | ||
139 | assert_invalid_str(c); | ||
140 | } | ||
141 | } | ||
142 | |||
143 | #[test] | ||
144 | fn test_mixed_invalid() { | ||
145 | assert_invalid_str( | ||
146 | r"This is the tale of a string | ||
147 | with a newline in between, some emoji (👨👨) here and there, | ||
148 | unicode escapes like this: \u{1FFBB} and weird stuff like | ||
149 | this ﷽", | ||
150 | ); | ||
151 | } | ||
152 | |||
153 | #[test] | ||
154 | fn test_mixed_valid() { | ||
155 | assert_valid_str( | ||
156 | r"This is the tale of a string | ||
157 | with a newline in between, no emoji at all, | ||
158 | nor unicode escapes or weird stuff", | ||
159 | ); | ||
160 | } | ||
161 | |||
162 | #[test] | ||
163 | fn test_ignore_newline() { | ||
164 | assert_valid_str( | ||
165 | "Hello \ | ||
166 | World", | ||
167 | ); | ||
168 | } | ||
169 | } | ||