aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/string_lexing
diff options
context:
space:
mode:
authorAdolfo OchagavĂ­a <[email protected]>2018-11-08 14:42:00 +0000
committerAdolfo OchagavĂ­a <[email protected]>2018-11-09 13:52:17 +0000
commit3b4c02c19e4af645fd37e8bff774b05d546dc0b6 (patch)
tree42c40e9201adf64d1c06bc1c69524f5688ee6e9f /crates/ra_syntax/src/string_lexing
parent5a9150df9bcdaf5faed5b500c22333f1f7c99f32 (diff)
Validate string literals
Diffstat (limited to 'crates/ra_syntax/src/string_lexing')
-rw-r--r--crates/ra_syntax/src/string_lexing/mod.rs301
1 files changed, 0 insertions, 301 deletions
diff --git a/crates/ra_syntax/src/string_lexing/mod.rs b/crates/ra_syntax/src/string_lexing/mod.rs
deleted file mode 100644
index cc53e0aba..000000000
--- a/crates/ra_syntax/src/string_lexing/mod.rs
+++ /dev/null
@@ -1,301 +0,0 @@
1use self::CharComponentKind::*;
2use rowan::{TextRange, TextUnit};
3
4pub fn parse_char_literal(src: &str) -> CharComponentIterator {
5 CharComponentIterator {
6 parser: Parser::new(src),
7 has_closing_quote: false,
8 }
9}
10
11#[derive(Debug, Eq, PartialEq, Clone)]
12pub struct CharComponent {
13 pub range: TextRange,
14 pub kind: CharComponentKind,
15}
16
17impl CharComponent {
18 fn new(range: TextRange, kind: CharComponentKind) -> CharComponent {
19 CharComponent { range, kind }
20 }
21}
22
23#[derive(Debug, Eq, PartialEq, Clone)]
24pub enum CharComponentKind {
25 CodePoint,
26 AsciiEscape,
27 AsciiCodeEscape,
28 UnicodeEscape,
29}
30
31pub struct CharComponentIterator<'a> {
32 parser: Parser<'a>,
33 pub has_closing_quote: bool,
34}
35
36impl<'a> Iterator for CharComponentIterator<'a> {
37 type Item = CharComponent;
38 fn next(&mut self) -> Option<CharComponent> {
39 if self.parser.pos == 0 {
40 assert!(
41 self.parser.advance() == '\'',
42 "char literal should start with a quote"
43 );
44 }
45
46 if let Some(component) = self.parser.parse_char_component() {
47 return Some(component);
48 }
49
50 // We get here when there are no char components left to parse
51 if self.parser.peek() == Some('\'') {
52 self.parser.advance();
53 self.has_closing_quote = true;
54 }
55
56 assert!(
57 self.parser.peek() == None,
58 "char literal should leave no unparsed input: src = {}, pos = {}, length = {}",
59 self.parser.src,
60 self.parser.pos,
61 self.parser.src.len()
62 );
63
64 None
65 }
66}
67
68pub struct Parser<'a> {
69 src: &'a str,
70 pos: usize,
71}
72
73impl<'a> Parser<'a> {
74 pub fn new(src: &'a str) -> Parser<'a> {
75 Parser { src, pos: 0 }
76 }
77
78 // Utility methods
79
80 pub fn peek(&self) -> Option<char> {
81 if self.pos == self.src.len() {
82 return None;
83 }
84
85 self.src[self.pos..].chars().next()
86 }
87
88 pub fn advance(&mut self) -> char {
89 let next = self
90 .peek()
91 .expect("cannot advance if end of input is reached");
92 self.pos += next.len_utf8();
93 next
94 }
95
96 pub fn get_pos(&self) -> TextUnit {
97 (self.pos as u32).into()
98 }
99
100 // Char parsing methods
101
102 fn parse_unicode_escape(&mut self, start: TextUnit) -> CharComponent {
103 match self.peek() {
104 Some('{') => {
105 self.advance();
106
107 // Parse anything until we reach `}`
108 while let Some(next) = self.peek() {
109 self.advance();
110 if next == '}' {
111 break;
112 }
113 }
114
115 let end = self.get_pos();
116 CharComponent::new(TextRange::from_to(start, end), UnicodeEscape)
117 }
118 Some(_) | None => {
119 let end = self.get_pos();
120 CharComponent::new(TextRange::from_to(start, end), UnicodeEscape)
121 }
122 }
123 }
124
125 fn parse_ascii_code_escape(&mut self, start: TextUnit) -> CharComponent {
126 let code_start = self.get_pos();
127 while let Some(next) = self.peek() {
128 if next == '\'' || (self.get_pos() - code_start == 2.into()) {
129 break;
130 }
131
132 self.advance();
133 }
134
135 let end = self.get_pos();
136 CharComponent::new(TextRange::from_to(start, end), AsciiCodeEscape)
137 }
138
139 fn parse_escape(&mut self, start: TextUnit) -> CharComponent {
140 if self.peek().is_none() {
141 return CharComponent::new(TextRange::from_to(start, start), AsciiEscape);
142 }
143
144 let next = self.advance();
145 let end = self.get_pos();
146 let range = TextRange::from_to(start, end);
147 match next {
148 'x' => self.parse_ascii_code_escape(start),
149 'u' => self.parse_unicode_escape(start),
150 _ => CharComponent::new(range, AsciiEscape),
151 }
152 }
153
154 pub fn parse_char_component(&mut self) -> Option<CharComponent> {
155 let next = self.peek()?;
156
157 // Ignore character close
158 if next == '\'' {
159 return None;
160 }
161
162 let start = self.get_pos();
163 self.advance();
164
165 if next == '\\' {
166 Some(self.parse_escape(start))
167 } else {
168 let end = self.get_pos();
169 Some(CharComponent::new(
170 TextRange::from_to(start, end),
171 CodePoint,
172 ))
173 }
174 }
175}
176
177#[cfg(test)]
178mod tests {
179 use super::*;
180
181 fn parse(src: &str) -> (bool, Vec<CharComponent>) {
182 let component_iterator = &mut super::parse_char_literal(src);
183 let components: Vec<_> = component_iterator.collect();
184 (component_iterator.has_closing_quote, components)
185 }
186
187 fn unclosed_char_component(src: &str) -> CharComponent {
188 let (has_closing_quote, components) = parse(src);
189 assert!(!has_closing_quote, "char should not have closing quote");
190 assert!(components.len() == 1);
191 components[0].clone()
192 }
193
194 fn closed_char_component(src: &str) -> CharComponent {
195 let (has_closing_quote, components) = parse(src);
196 assert!(has_closing_quote, "char should have closing quote");
197 assert!(
198 components.len() == 1,
199 "Literal: {}\nComponents: {:#?}",
200 src,
201 components
202 );
203 components[0].clone()
204 }
205
206 fn closed_char_components(src: &str) -> Vec<CharComponent> {
207 let (has_closing_quote, components) = parse(src);
208 assert!(has_closing_quote, "char should have closing quote");
209 components
210 }
211
212 fn range_closed(src: &str) -> TextRange {
213 TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
214 }
215
216 fn range_unclosed(src: &str) -> TextRange {
217 TextRange::from_to(1.into(), (src.len() as u32).into())
218 }
219
220 #[test]
221 fn test_unicode_escapes() {
222 let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
223 for escape in unicode_escapes {
224 let escape_sequence = format!(r"'\u{}'", escape);
225 let component = closed_char_component(&escape_sequence);
226 let expected_range = range_closed(&escape_sequence);
227 assert_eq!(component.kind, CharComponentKind::UnicodeEscape);
228 assert_eq!(component.range, expected_range);
229 }
230 }
231
232 #[test]
233 fn test_unicode_escapes_unclosed() {
234 let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
235 for escape in unicode_escapes {
236 let escape_sequence = format!(r"'\u{}'", escape);
237 let component = unclosed_char_component(&escape_sequence);
238 let expected_range = range_unclosed(&escape_sequence);
239 assert_eq!(component.kind, CharComponentKind::UnicodeEscape);
240 assert_eq!(component.range, expected_range);
241 }
242 }
243
244 #[test]
245 fn test_empty_char() {
246 let (has_closing_quote, components) = parse("''");
247 assert!(has_closing_quote, "char should have closing quote");
248 assert!(components.len() == 0);
249 }
250
251 #[test]
252 fn test_unclosed_char() {
253 let component = unclosed_char_component("'a");
254 assert!(component.kind == CodePoint);
255 assert!(component.range == TextRange::from_to(1.into(), 2.into()));
256 }
257
258 #[test]
259 fn test_digit_escapes() {
260 let literals = &[r"", r"5", r"55"];
261
262 for literal in literals {
263 let lit_text = format!(r"'\x{}'", literal);
264 let component = closed_char_component(&lit_text);
265 assert!(component.kind == CharComponentKind::AsciiCodeEscape);
266 assert!(component.range == range_closed(&lit_text));
267 }
268
269 // More than 2 digits starts a new codepoint
270 let components = closed_char_components(r"'\x555'");
271 assert!(components.len() == 2);
272 assert!(components[1].kind == CharComponentKind::CodePoint);
273 }
274
275 #[test]
276 fn test_ascii_escapes() {
277 let literals = &[
278 r"\'", "\\\"", // equivalent to \"
279 r"\n", r"\r", r"\t", r"\\", r"\0",
280 ];
281
282 for literal in literals {
283 let lit_text = format!("'{}'", literal);
284 let component = closed_char_component(&lit_text);
285 assert!(component.kind == CharComponentKind::AsciiEscape);
286 assert!(component.range == range_closed(&lit_text));
287 }
288 }
289
290 #[test]
291 fn test_no_escapes() {
292 let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
293
294 for &literal in literals {
295 let lit_text = format!("'{}'", literal);
296 let component = closed_char_component(&lit_text);
297 assert!(component.kind == CharComponentKind::CodePoint);
298 assert!(component.range == range_closed(&lit_text));
299 }
300 }
301}