aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/string_lexing/string.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_syntax/src/string_lexing/string.rs')
-rw-r--r--crates/ra_syntax/src/string_lexing/string.rs187
1 files changed, 179 insertions, 8 deletions
diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs
index d8351e9af..7476fea13 100644
--- a/crates/ra_syntax/src/string_lexing/string.rs
+++ b/crates/ra_syntax/src/string_lexing/string.rs
@@ -1,41 +1,82 @@
1use super::parser::Parser; 1use crate::string_lexing::{
2use super::StringComponent; 2 parser::Parser,
3 StringComponent,
4};
3 5
4pub fn parse_string_literal(src: &str) -> StringComponentIterator { 6pub fn parse_string_literal(src: &str) -> StringComponentIterator {
5 StringComponentIterator { 7 StringComponentIterator {
6 parser: Parser::new(src), 8 parser: Parser::new(src, b'"'),
7 has_closing_quote: false, 9 has_closing_quote: false,
10 prefix: None,
11 quote: b'"',
12 }
13}
14
15pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator {
16 StringComponentIterator {
17 parser: Parser::new(src, b'"'),
18 has_closing_quote: false,
19 prefix: Some(b'b'),
20 quote: b'"',
21 }
22}
23
24pub fn parse_char_literal(src: &str) -> StringComponentIterator {
25 StringComponentIterator {
26 parser: Parser::new(src, b'\''),
27 has_closing_quote: false,
28 prefix: None,
29 quote: b'\'',
30 }
31}
32
33pub fn parse_byte_literal(src: &str) -> StringComponentIterator {
34 StringComponentIterator {
35 parser: Parser::new(src, b'\''),
36 has_closing_quote: false,
37 prefix: Some(b'b'),
38 quote: b'\'',
8 } 39 }
9} 40}
10 41
11pub struct StringComponentIterator<'a> { 42pub struct StringComponentIterator<'a> {
12 parser: Parser<'a>, 43 parser: Parser<'a>,
13 pub has_closing_quote: bool, 44 pub has_closing_quote: bool,
45 prefix: Option<u8>,
46 quote: u8,
14} 47}
15 48
16impl<'a> Iterator for StringComponentIterator<'a> { 49impl<'a> Iterator for StringComponentIterator<'a> {
17 type Item = StringComponent; 50 type Item = StringComponent;
18 fn next(&mut self) -> Option<StringComponent> { 51 fn next(&mut self) -> Option<StringComponent> {
19 if self.parser.pos == 0 { 52 if self.parser.pos == 0 {
53 if let Some(prefix) = self.prefix {
54 assert!(
55 self.parser.advance() == prefix as char,
56 "literal should start with a {:?}",
57 prefix as char,
58 );
59 }
20 assert!( 60 assert!(
21 self.parser.advance() == '"', 61 self.parser.advance() == self.quote as char,
22 "string literal should start with double quotes" 62 "literal should start with a {:?}",
63 self.quote as char,
23 ); 64 );
24 } 65 }
25 66
26 if let Some(component) = self.parser.parse_string_component() { 67 if let Some(component) = self.parser.parse_component() {
27 return Some(component); 68 return Some(component);
28 } 69 }
29 70
30 // We get here when there are no char components left to parse 71 // We get here when there are no char components left to parse
31 if self.parser.peek() == Some('"') { 72 if self.parser.peek() == Some(self.quote as char) {
32 self.parser.advance(); 73 self.parser.advance();
33 self.has_closing_quote = true; 74 self.has_closing_quote = true;
34 } 75 }
35 76
36 assert!( 77 assert!(
37 self.parser.peek() == None, 78 self.parser.peek() == None,
38 "string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", 79 "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
39 self.parser.src, 80 self.parser.src,
40 self.parser.pos, 81 self.parser.pos,
41 self.parser.src.len() 82 self.parser.src.len()
@@ -44,3 +85,133 @@ impl<'a> Iterator for StringComponentIterator<'a> {
44 None 85 None
45 } 86 }
46} 87}
88
89#[cfg(test)]
90mod tests {
91 use rowan::TextRange;
92 use crate::string_lexing::{
93 StringComponent,
94 StringComponentKind::*,
95};
96
97 fn parse(src: &str) -> (bool, Vec<StringComponent>) {
98 let component_iterator = &mut super::parse_char_literal(src);
99 let components: Vec<_> = component_iterator.collect();
100 (component_iterator.has_closing_quote, components)
101 }
102
103 fn unclosed_char_component(src: &str) -> StringComponent {
104 let (has_closing_quote, components) = parse(src);
105 assert!(!has_closing_quote, "char should not have closing quote");
106 assert!(components.len() == 1);
107 components[0].clone()
108 }
109
110 fn closed_char_component(src: &str) -> StringComponent {
111 let (has_closing_quote, components) = parse(src);
112 assert!(has_closing_quote, "char should have closing quote");
113 assert!(
114 components.len() == 1,
115 "Literal: {}\nComponents: {:#?}",
116 src,
117 components
118 );
119 components[0].clone()
120 }
121
122 fn closed_char_components(src: &str) -> Vec<StringComponent> {
123 let (has_closing_quote, components) = parse(src);
124 assert!(has_closing_quote, "char should have closing quote");
125 components
126 }
127
128 fn range_closed(src: &str) -> TextRange {
129 TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
130 }
131
132 fn range_unclosed(src: &str) -> TextRange {
133 TextRange::from_to(1.into(), (src.len() as u32).into())
134 }
135
136 #[test]
137 fn test_unicode_escapes() {
138 let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
139 for escape in unicode_escapes {
140 let escape_sequence = format!(r"'\u{}'", escape);
141 let component = closed_char_component(&escape_sequence);
142 let expected_range = range_closed(&escape_sequence);
143 assert_eq!(component.kind, UnicodeEscape);
144 assert_eq!(component.range, expected_range);
145 }
146 }
147
148 #[test]
149 fn test_unicode_escapes_unclosed() {
150 let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
151 for escape in unicode_escapes {
152 let escape_sequence = format!(r"'\u{}'", escape);
153 let component = unclosed_char_component(&escape_sequence);
154 let expected_range = range_unclosed(&escape_sequence);
155 assert_eq!(component.kind, UnicodeEscape);
156 assert_eq!(component.range, expected_range);
157 }
158 }
159
160 #[test]
161 fn test_empty_char() {
162 let (has_closing_quote, components) = parse("''");
163 assert!(has_closing_quote, "char should have closing quote");
164 assert!(components.len() == 0);
165 }
166
167 #[test]
168 fn test_unclosed_char() {
169 let component = unclosed_char_component("'a");
170 assert!(component.kind == CodePoint);
171 assert!(component.range == TextRange::from_to(1.into(), 2.into()));
172 }
173
174 #[test]
175 fn test_digit_escapes() {
176 let literals = &[r"", r"5", r"55"];
177
178 for literal in literals {
179 let lit_text = format!(r"'\x{}'", literal);
180 let component = closed_char_component(&lit_text);
181 assert!(component.kind == AsciiCodeEscape);
182 assert!(component.range == range_closed(&lit_text));
183 }
184
185 // More than 2 digits starts a new codepoint
186 let components = closed_char_components(r"'\x555'");
187 assert!(components.len() == 2);
188 assert!(components[1].kind == CodePoint);
189 }
190
191 #[test]
192 fn test_ascii_escapes() {
193 let literals = &[
194 r"\'", "\\\"", // equivalent to \"
195 r"\n", r"\r", r"\t", r"\\", r"\0",
196 ];
197
198 for literal in literals {
199 let lit_text = format!("'{}'", literal);
200 let component = closed_char_component(&lit_text);
201 assert!(component.kind == AsciiEscape);
202 assert!(component.range == range_closed(&lit_text));
203 }
204 }
205
206 #[test]
207 fn test_no_escapes() {
208 let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
209
210 for &literal in literals {
211 let lit_text = format!("'{}'", literal);
212 let component = closed_char_component(&lit_text);
213 assert!(component.kind == CodePoint);
214 assert!(component.range == range_closed(&lit_text));
215 }
216 }
217}