aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/string_lexing/string.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_syntax/src/string_lexing/string.rs')
-rw-r--r--crates/ra_syntax/src/string_lexing/string.rs197
1 files changed, 189 insertions, 8 deletions
diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs
index d8351e9af..064f08544 100644
--- a/crates/ra_syntax/src/string_lexing/string.rs
+++ b/crates/ra_syntax/src/string_lexing/string.rs
@@ -1,41 +1,92 @@
1use super::parser::Parser; 1use crate::{
2use super::StringComponent; 2 TextRange,
3 string_lexing::{
4 parser::Parser,
5 StringComponent,
6}};
3 7
4pub fn parse_string_literal(src: &str) -> StringComponentIterator { 8pub fn parse_string_literal(src: &str) -> StringComponentIterator {
5 StringComponentIterator { 9 StringComponentIterator {
6 parser: Parser::new(src), 10 parser: Parser::new(src, b'"'),
7 has_closing_quote: false, 11 has_closing_quote: false,
12 suffix: None,
13 prefix: None,
14 quote: b'"',
15 }
16}
17
18pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator {
19 StringComponentIterator {
20 parser: Parser::new(src, b'"'),
21 has_closing_quote: false,
22 suffix: None,
23 prefix: Some(b'b'),
24 quote: b'"',
25 }
26}
27
28pub fn parse_char_literal(src: &str) -> StringComponentIterator {
29 StringComponentIterator {
30 parser: Parser::new(src, b'\''),
31 has_closing_quote: false,
32 suffix: None,
33 prefix: None,
34 quote: b'\'',
35 }
36}
37
38pub fn parse_byte_literal(src: &str) -> StringComponentIterator {
39 StringComponentIterator {
40 parser: Parser::new(src, b'\''),
41 has_closing_quote: false,
42 suffix: None,
43 prefix: Some(b'b'),
44 quote: b'\'',
8 } 45 }
9} 46}
10 47
11pub struct StringComponentIterator<'a> { 48pub struct StringComponentIterator<'a> {
12 parser: Parser<'a>, 49 parser: Parser<'a>,
13 pub has_closing_quote: bool, 50 pub has_closing_quote: bool,
51 pub suffix: Option<TextRange>,
52 prefix: Option<u8>,
53 quote: u8,
14} 54}
15 55
16impl<'a> Iterator for StringComponentIterator<'a> { 56impl<'a> Iterator for StringComponentIterator<'a> {
17 type Item = StringComponent; 57 type Item = StringComponent;
18 fn next(&mut self) -> Option<StringComponent> { 58 fn next(&mut self) -> Option<StringComponent> {
19 if self.parser.pos == 0 { 59 if self.parser.pos == 0 {
60 if let Some(prefix) = self.prefix {
61 assert!(
62 self.parser.advance() == prefix as char,
63 "literal should start with a {:?}",
64 prefix as char,
65 );
66 }
20 assert!( 67 assert!(
21 self.parser.advance() == '"', 68 self.parser.advance() == self.quote as char,
22 "string literal should start with double quotes" 69 "literal should start with a {:?}",
70 self.quote as char,
23 ); 71 );
24 } 72 }
25 73
26 if let Some(component) = self.parser.parse_string_component() { 74 if let Some(component) = self.parser.parse_component() {
27 return Some(component); 75 return Some(component);
28 } 76 }
29 77
30 // We get here when there are no char components left to parse 78 // We get here when there are no char components left to parse
31 if self.parser.peek() == Some('"') { 79 if self.parser.peek() == Some(self.quote as char) {
32 self.parser.advance(); 80 self.parser.advance();
33 self.has_closing_quote = true; 81 self.has_closing_quote = true;
82 if let Some(range) = self.parser.parse_suffix() {
83 self.suffix = Some(range);
84 }
34 } 85 }
35 86
36 assert!( 87 assert!(
37 self.parser.peek() == None, 88 self.parser.peek() == None,
38 "string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", 89 "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
39 self.parser.src, 90 self.parser.src,
40 self.parser.pos, 91 self.parser.pos,
41 self.parser.src.len() 92 self.parser.src.len()
@@ -44,3 +95,133 @@ impl<'a> Iterator for StringComponentIterator<'a> {
44 None 95 None
45 } 96 }
46} 97}
98
99#[cfg(test)]
100mod tests {
101 use rowan::TextRange;
102 use crate::string_lexing::{
103 StringComponent,
104 StringComponentKind::*,
105};
106
107 fn parse(src: &str) -> (bool, Vec<StringComponent>) {
108 let component_iterator = &mut super::parse_char_literal(src);
109 let components: Vec<_> = component_iterator.collect();
110 (component_iterator.has_closing_quote, components)
111 }
112
113 fn unclosed_char_component(src: &str) -> StringComponent {
114 let (has_closing_quote, components) = parse(src);
115 assert!(!has_closing_quote, "char should not have closing quote");
116 assert!(components.len() == 1);
117 components[0].clone()
118 }
119
120 fn closed_char_component(src: &str) -> StringComponent {
121 let (has_closing_quote, components) = parse(src);
122 assert!(has_closing_quote, "char should have closing quote");
123 assert!(
124 components.len() == 1,
125 "Literal: {}\nComponents: {:#?}",
126 src,
127 components
128 );
129 components[0].clone()
130 }
131
132 fn closed_char_components(src: &str) -> Vec<StringComponent> {
133 let (has_closing_quote, components) = parse(src);
134 assert!(has_closing_quote, "char should have closing quote");
135 components
136 }
137
138 fn range_closed(src: &str) -> TextRange {
139 TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
140 }
141
142 fn range_unclosed(src: &str) -> TextRange {
143 TextRange::from_to(1.into(), (src.len() as u32).into())
144 }
145
146 #[test]
147 fn test_unicode_escapes() {
148 let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
149 for escape in unicode_escapes {
150 let escape_sequence = format!(r"'\u{}'", escape);
151 let component = closed_char_component(&escape_sequence);
152 let expected_range = range_closed(&escape_sequence);
153 assert_eq!(component.kind, UnicodeEscape);
154 assert_eq!(component.range, expected_range);
155 }
156 }
157
158 #[test]
159 fn test_unicode_escapes_unclosed() {
160 let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
161 for escape in unicode_escapes {
162 let escape_sequence = format!(r"'\u{}'", escape);
163 let component = unclosed_char_component(&escape_sequence);
164 let expected_range = range_unclosed(&escape_sequence);
165 assert_eq!(component.kind, UnicodeEscape);
166 assert_eq!(component.range, expected_range);
167 }
168 }
169
170 #[test]
171 fn test_empty_char() {
172 let (has_closing_quote, components) = parse("''");
173 assert!(has_closing_quote, "char should have closing quote");
174 assert!(components.len() == 0);
175 }
176
177 #[test]
178 fn test_unclosed_char() {
179 let component = unclosed_char_component("'a");
180 assert!(component.kind == CodePoint);
181 assert!(component.range == TextRange::from_to(1.into(), 2.into()));
182 }
183
184 #[test]
185 fn test_digit_escapes() {
186 let literals = &[r"", r"5", r"55"];
187
188 for literal in literals {
189 let lit_text = format!(r"'\x{}'", literal);
190 let component = closed_char_component(&lit_text);
191 assert!(component.kind == AsciiCodeEscape);
192 assert!(component.range == range_closed(&lit_text));
193 }
194
195 // More than 2 digits starts a new codepoint
196 let components = closed_char_components(r"'\x555'");
197 assert!(components.len() == 2);
198 assert!(components[1].kind == CodePoint);
199 }
200
201 #[test]
202 fn test_ascii_escapes() {
203 let literals = &[
204 r"\'", "\\\"", // equivalent to \"
205 r"\n", r"\r", r"\t", r"\\", r"\0",
206 ];
207
208 for literal in literals {
209 let lit_text = format!("'{}'", literal);
210 let component = closed_char_component(&lit_text);
211 assert!(component.kind == AsciiEscape);
212 assert!(component.range == range_closed(&lit_text));
213 }
214 }
215
216 #[test]
217 fn test_no_escapes() {
218 let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
219
220 for &literal in literals {
221 let lit_text = format!("'{}'", literal);
222 let component = closed_char_component(&lit_text);
223 assert!(component.kind == CodePoint);
224 assert!(component.range == range_closed(&lit_text));
225 }
226 }
227}