diff options
author | Aleksey Kladov <[email protected]> | 2018-12-27 11:42:46 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2018-12-27 11:42:46 +0000 |
commit | 73ded3c63ca2522b7bb6ca8eb7834c5adc1a3511 (patch) | |
tree | 521f3b0fa02e32b0b2a6e9a9d1370845d922657f /crates/ra_syntax/src/string_lexing/string.rs | |
parent | dbf03b674e7e1a49d9b32ec5ed656df2aedd3ed3 (diff) |
dedupe literal parsers
Diffstat (limited to 'crates/ra_syntax/src/string_lexing/string.rs')
-rw-r--r-- | crates/ra_syntax/src/string_lexing/string.rs | 187 |
1 files changed, 179 insertions, 8 deletions
diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs index d8351e9af..7476fea13 100644 --- a/crates/ra_syntax/src/string_lexing/string.rs +++ b/crates/ra_syntax/src/string_lexing/string.rs | |||
@@ -1,41 +1,82 @@ | |||
1 | use super::parser::Parser; | 1 | use crate::string_lexing::{ |
2 | use super::StringComponent; | 2 | parser::Parser, |
3 | StringComponent, | ||
4 | }; | ||
3 | 5 | ||
4 | pub fn parse_string_literal(src: &str) -> StringComponentIterator { | 6 | pub fn parse_string_literal(src: &str) -> StringComponentIterator { |
5 | StringComponentIterator { | 7 | StringComponentIterator { |
6 | parser: Parser::new(src), | 8 | parser: Parser::new(src, b'"'), |
7 | has_closing_quote: false, | 9 | has_closing_quote: false, |
10 | prefix: None, | ||
11 | quote: b'"', | ||
12 | } | ||
13 | } | ||
14 | |||
15 | pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator { | ||
16 | StringComponentIterator { | ||
17 | parser: Parser::new(src, b'"'), | ||
18 | has_closing_quote: false, | ||
19 | prefix: Some(b'b'), | ||
20 | quote: b'"', | ||
21 | } | ||
22 | } | ||
23 | |||
24 | pub fn parse_char_literal(src: &str) -> StringComponentIterator { | ||
25 | StringComponentIterator { | ||
26 | parser: Parser::new(src, b'\''), | ||
27 | has_closing_quote: false, | ||
28 | prefix: None, | ||
29 | quote: b'\'', | ||
30 | } | ||
31 | } | ||
32 | |||
33 | pub fn parse_byte_literal(src: &str) -> StringComponentIterator { | ||
34 | StringComponentIterator { | ||
35 | parser: Parser::new(src, b'\''), | ||
36 | has_closing_quote: false, | ||
37 | prefix: Some(b'b'), | ||
38 | quote: b'\'', | ||
8 | } | 39 | } |
9 | } | 40 | } |
10 | 41 | ||
11 | pub struct StringComponentIterator<'a> { | 42 | pub struct StringComponentIterator<'a> { |
12 | parser: Parser<'a>, | 43 | parser: Parser<'a>, |
13 | pub has_closing_quote: bool, | 44 | pub has_closing_quote: bool, |
45 | prefix: Option<u8>, | ||
46 | quote: u8, | ||
14 | } | 47 | } |
15 | 48 | ||
16 | impl<'a> Iterator for StringComponentIterator<'a> { | 49 | impl<'a> Iterator for StringComponentIterator<'a> { |
17 | type Item = StringComponent; | 50 | type Item = StringComponent; |
18 | fn next(&mut self) -> Option<StringComponent> { | 51 | fn next(&mut self) -> Option<StringComponent> { |
19 | if self.parser.pos == 0 { | 52 | if self.parser.pos == 0 { |
53 | if let Some(prefix) = self.prefix { | ||
54 | assert!( | ||
55 | self.parser.advance() == prefix as char, | ||
56 | "literal should start with a {:?}", | ||
57 | prefix as char, | ||
58 | ); | ||
59 | } | ||
20 | assert!( | 60 | assert!( |
21 | self.parser.advance() == '"', | 61 | self.parser.advance() == self.quote as char, |
22 | "string literal should start with double quotes" | 62 | "literal should start with a {:?}", |
63 | self.quote as char, | ||
23 | ); | 64 | ); |
24 | } | 65 | } |
25 | 66 | ||
26 | if let Some(component) = self.parser.parse_string_component() { | 67 | if let Some(component) = self.parser.parse_component() { |
27 | return Some(component); | 68 | return Some(component); |
28 | } | 69 | } |
29 | 70 | ||
30 | // We get here when there are no char components left to parse | 71 | // We get here when there are no char components left to parse |
31 | if self.parser.peek() == Some('"') { | 72 | if self.parser.peek() == Some(self.quote as char) { |
32 | self.parser.advance(); | 73 | self.parser.advance(); |
33 | self.has_closing_quote = true; | 74 | self.has_closing_quote = true; |
34 | } | 75 | } |
35 | 76 | ||
36 | assert!( | 77 | assert!( |
37 | self.parser.peek() == None, | 78 | self.parser.peek() == None, |
38 | "string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", | 79 | "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", |
39 | self.parser.src, | 80 | self.parser.src, |
40 | self.parser.pos, | 81 | self.parser.pos, |
41 | self.parser.src.len() | 82 | self.parser.src.len() |
@@ -44,3 +85,133 @@ impl<'a> Iterator for StringComponentIterator<'a> { | |||
44 | None | 85 | None |
45 | } | 86 | } |
46 | } | 87 | } |
88 | |||
89 | #[cfg(test)] | ||
90 | mod tests { | ||
91 | use rowan::TextRange; | ||
92 | use crate::string_lexing::{ | ||
93 | StringComponent, | ||
94 | StringComponentKind::*, | ||
95 | }; | ||
96 | |||
97 | fn parse(src: &str) -> (bool, Vec<StringComponent>) { | ||
98 | let component_iterator = &mut super::parse_char_literal(src); | ||
99 | let components: Vec<_> = component_iterator.collect(); | ||
100 | (component_iterator.has_closing_quote, components) | ||
101 | } | ||
102 | |||
103 | fn unclosed_char_component(src: &str) -> StringComponent { | ||
104 | let (has_closing_quote, components) = parse(src); | ||
105 | assert!(!has_closing_quote, "char should not have closing quote"); | ||
106 | assert!(components.len() == 1); | ||
107 | components[0].clone() | ||
108 | } | ||
109 | |||
110 | fn closed_char_component(src: &str) -> StringComponent { | ||
111 | let (has_closing_quote, components) = parse(src); | ||
112 | assert!(has_closing_quote, "char should have closing quote"); | ||
113 | assert!( | ||
114 | components.len() == 1, | ||
115 | "Literal: {}\nComponents: {:#?}", | ||
116 | src, | ||
117 | components | ||
118 | ); | ||
119 | components[0].clone() | ||
120 | } | ||
121 | |||
122 | fn closed_char_components(src: &str) -> Vec<StringComponent> { | ||
123 | let (has_closing_quote, components) = parse(src); | ||
124 | assert!(has_closing_quote, "char should have closing quote"); | ||
125 | components | ||
126 | } | ||
127 | |||
128 | fn range_closed(src: &str) -> TextRange { | ||
129 | TextRange::from_to(1.into(), (src.len() as u32 - 1).into()) | ||
130 | } | ||
131 | |||
132 | fn range_unclosed(src: &str) -> TextRange { | ||
133 | TextRange::from_to(1.into(), (src.len() as u32).into()) | ||
134 | } | ||
135 | |||
136 | #[test] | ||
137 | fn test_unicode_escapes() { | ||
138 | let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; | ||
139 | for escape in unicode_escapes { | ||
140 | let escape_sequence = format!(r"'\u{}'", escape); | ||
141 | let component = closed_char_component(&escape_sequence); | ||
142 | let expected_range = range_closed(&escape_sequence); | ||
143 | assert_eq!(component.kind, UnicodeEscape); | ||
144 | assert_eq!(component.range, expected_range); | ||
145 | } | ||
146 | } | ||
147 | |||
148 | #[test] | ||
149 | fn test_unicode_escapes_unclosed() { | ||
150 | let unicode_escapes = &["{DEAD", "{BEEF", "{FF"]; | ||
151 | for escape in unicode_escapes { | ||
152 | let escape_sequence = format!(r"'\u{}'", escape); | ||
153 | let component = unclosed_char_component(&escape_sequence); | ||
154 | let expected_range = range_unclosed(&escape_sequence); | ||
155 | assert_eq!(component.kind, UnicodeEscape); | ||
156 | assert_eq!(component.range, expected_range); | ||
157 | } | ||
158 | } | ||
159 | |||
160 | #[test] | ||
161 | fn test_empty_char() { | ||
162 | let (has_closing_quote, components) = parse("''"); | ||
163 | assert!(has_closing_quote, "char should have closing quote"); | ||
164 | assert!(components.len() == 0); | ||
165 | } | ||
166 | |||
167 | #[test] | ||
168 | fn test_unclosed_char() { | ||
169 | let component = unclosed_char_component("'a"); | ||
170 | assert!(component.kind == CodePoint); | ||
171 | assert!(component.range == TextRange::from_to(1.into(), 2.into())); | ||
172 | } | ||
173 | |||
174 | #[test] | ||
175 | fn test_digit_escapes() { | ||
176 | let literals = &[r"", r"5", r"55"]; | ||
177 | |||
178 | for literal in literals { | ||
179 | let lit_text = format!(r"'\x{}'", literal); | ||
180 | let component = closed_char_component(&lit_text); | ||
181 | assert!(component.kind == AsciiCodeEscape); | ||
182 | assert!(component.range == range_closed(&lit_text)); | ||
183 | } | ||
184 | |||
185 | // More than 2 digits starts a new codepoint | ||
186 | let components = closed_char_components(r"'\x555'"); | ||
187 | assert!(components.len() == 2); | ||
188 | assert!(components[1].kind == CodePoint); | ||
189 | } | ||
190 | |||
191 | #[test] | ||
192 | fn test_ascii_escapes() { | ||
193 | let literals = &[ | ||
194 | r"\'", "\\\"", // equivalent to \" | ||
195 | r"\n", r"\r", r"\t", r"\\", r"\0", | ||
196 | ]; | ||
197 | |||
198 | for literal in literals { | ||
199 | let lit_text = format!("'{}'", literal); | ||
200 | let component = closed_char_component(&lit_text); | ||
201 | assert!(component.kind == AsciiEscape); | ||
202 | assert!(component.range == range_closed(&lit_text)); | ||
203 | } | ||
204 | } | ||
205 | |||
206 | #[test] | ||
207 | fn test_no_escapes() { | ||
208 | let literals = &['"', 'n', 'r', 't', '0', 'x', 'u']; | ||
209 | |||
210 | for &literal in literals { | ||
211 | let lit_text = format!("'{}'", literal); | ||
212 | let component = closed_char_component(&lit_text); | ||
213 | assert!(component.kind == CodePoint); | ||
214 | assert!(component.range == range_closed(&lit_text)); | ||
215 | } | ||
216 | } | ||
217 | } | ||