diff options
author | Aleksey Kladov <[email protected]> | 2018-12-27 11:42:46 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2018-12-27 11:42:46 +0000 |
commit | 73ded3c63ca2522b7bb6ca8eb7834c5adc1a3511 (patch) | |
tree | 521f3b0fa02e32b0b2a6e9a9d1370845d922657f /crates/ra_syntax/src | |
parent | dbf03b674e7e1a49d9b32ec5ed656df2aedd3ed3 (diff) |
dedupe literal parsers
Diffstat (limited to 'crates/ra_syntax/src')
-rw-r--r-- | crates/ra_syntax/src/string_lexing.rs | 10 | ||||
-rw-r--r-- | crates/ra_syntax/src/string_lexing/byte.rs | 51 | ||||
-rw-r--r-- | crates/ra_syntax/src/string_lexing/byte_string.rs | 51 | ||||
-rw-r--r-- | crates/ra_syntax/src/string_lexing/char.rs | 176 | ||||
-rw-r--r-- | crates/ra_syntax/src/string_lexing/parser.rs | 82 | ||||
-rw-r--r-- | crates/ra_syntax/src/string_lexing/string.rs | 187 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation/byte.rs | 7 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation/byte_string.rs | 6 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation/char.rs | 7 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation/string.rs | 17 |
10 files changed, 220 insertions, 374 deletions
diff --git a/crates/ra_syntax/src/string_lexing.rs b/crates/ra_syntax/src/string_lexing.rs index 94853331f..349733f3f 100644 --- a/crates/ra_syntax/src/string_lexing.rs +++ b/crates/ra_syntax/src/string_lexing.rs | |||
@@ -1,13 +1,7 @@ | |||
1 | mod parser; | 1 | mod parser; |
2 | mod byte; | ||
3 | mod byte_string; | ||
4 | mod char; | ||
5 | mod string; | 2 | mod string; |
6 | 3 | ||
7 | pub use self::{ | 4 | pub use self::{ |
8 | byte::parse_byte_literal, | 5 | parser::{StringComponent, StringComponentKind}, |
9 | byte_string::parse_byte_string_literal, | 6 | string::{parse_string_literal, parse_char_literal, parse_byte_literal, parse_byte_string_literal}, |
10 | char::parse_char_literal, | ||
11 | parser::{CharComponent, CharComponentKind, StringComponent, StringComponentKind}, | ||
12 | string::parse_string_literal, | ||
13 | }; | 7 | }; |
diff --git a/crates/ra_syntax/src/string_lexing/byte.rs b/crates/ra_syntax/src/string_lexing/byte.rs deleted file mode 100644 index b3228d6ca..000000000 --- a/crates/ra_syntax/src/string_lexing/byte.rs +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | use super::parser::Parser; | ||
2 | use super::CharComponent; | ||
3 | |||
4 | pub fn parse_byte_literal(src: &str) -> ByteComponentIterator { | ||
5 | ByteComponentIterator { | ||
6 | parser: Parser::new(src), | ||
7 | has_closing_quote: false, | ||
8 | } | ||
9 | } | ||
10 | |||
11 | pub struct ByteComponentIterator<'a> { | ||
12 | parser: Parser<'a>, | ||
13 | pub has_closing_quote: bool, | ||
14 | } | ||
15 | |||
16 | impl<'a> Iterator for ByteComponentIterator<'a> { | ||
17 | type Item = CharComponent; | ||
18 | fn next(&mut self) -> Option<CharComponent> { | ||
19 | if self.parser.pos == 0 { | ||
20 | assert!( | ||
21 | self.parser.advance() == 'b', | ||
22 | "Byte literal should start with a `b`" | ||
23 | ); | ||
24 | |||
25 | assert!( | ||
26 | self.parser.advance() == '\'', | ||
27 | "Byte literal should start with a `b`, followed by a quote" | ||
28 | ); | ||
29 | } | ||
30 | |||
31 | if let Some(component) = self.parser.parse_char_component() { | ||
32 | return Some(component); | ||
33 | } | ||
34 | |||
35 | // We get here when there are no char components left to parse | ||
36 | if self.parser.peek() == Some('\'') { | ||
37 | self.parser.advance(); | ||
38 | self.has_closing_quote = true; | ||
39 | } | ||
40 | |||
41 | assert!( | ||
42 | self.parser.peek() == None, | ||
43 | "byte literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", | ||
44 | self.parser.src, | ||
45 | self.parser.pos, | ||
46 | self.parser.src.len() | ||
47 | ); | ||
48 | |||
49 | None | ||
50 | } | ||
51 | } | ||
diff --git a/crates/ra_syntax/src/string_lexing/byte_string.rs b/crates/ra_syntax/src/string_lexing/byte_string.rs deleted file mode 100644 index a6056159b..000000000 --- a/crates/ra_syntax/src/string_lexing/byte_string.rs +++ /dev/null | |||
@@ -1,51 +0,0 @@ | |||
1 | use super::parser::Parser; | ||
2 | use super::StringComponent; | ||
3 | |||
4 | pub fn parse_byte_string_literal(src: &str) -> ByteStringComponentIterator { | ||
5 | ByteStringComponentIterator { | ||
6 | parser: Parser::new(src), | ||
7 | has_closing_quote: false, | ||
8 | } | ||
9 | } | ||
10 | |||
11 | pub struct ByteStringComponentIterator<'a> { | ||
12 | parser: Parser<'a>, | ||
13 | pub has_closing_quote: bool, | ||
14 | } | ||
15 | |||
16 | impl<'a> Iterator for ByteStringComponentIterator<'a> { | ||
17 | type Item = StringComponent; | ||
18 | fn next(&mut self) -> Option<StringComponent> { | ||
19 | if self.parser.pos == 0 { | ||
20 | assert!( | ||
21 | self.parser.advance() == 'b', | ||
22 | "byte string literal should start with a `b`" | ||
23 | ); | ||
24 | |||
25 | assert!( | ||
26 | self.parser.advance() == '"', | ||
27 | "byte string literal should start with a `b`, followed by double quotes" | ||
28 | ); | ||
29 | } | ||
30 | |||
31 | if let Some(component) = self.parser.parse_string_component() { | ||
32 | return Some(component); | ||
33 | } | ||
34 | |||
35 | // We get here when there are no char components left to parse | ||
36 | if self.parser.peek() == Some('"') { | ||
37 | self.parser.advance(); | ||
38 | self.has_closing_quote = true; | ||
39 | } | ||
40 | |||
41 | assert!( | ||
42 | self.parser.peek() == None, | ||
43 | "byte string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", | ||
44 | self.parser.src, | ||
45 | self.parser.pos, | ||
46 | self.parser.src.len() | ||
47 | ); | ||
48 | |||
49 | None | ||
50 | } | ||
51 | } | ||
diff --git a/crates/ra_syntax/src/string_lexing/char.rs b/crates/ra_syntax/src/string_lexing/char.rs deleted file mode 100644 index e01813176..000000000 --- a/crates/ra_syntax/src/string_lexing/char.rs +++ /dev/null | |||
@@ -1,176 +0,0 @@ | |||
1 | use super::parser::Parser; | ||
2 | use super::CharComponent; | ||
3 | |||
4 | pub fn parse_char_literal(src: &str) -> CharComponentIterator { | ||
5 | CharComponentIterator { | ||
6 | parser: Parser::new(src), | ||
7 | has_closing_quote: false, | ||
8 | } | ||
9 | } | ||
10 | |||
11 | pub struct CharComponentIterator<'a> { | ||
12 | parser: Parser<'a>, | ||
13 | pub has_closing_quote: bool, | ||
14 | } | ||
15 | |||
16 | impl<'a> Iterator for CharComponentIterator<'a> { | ||
17 | type Item = CharComponent; | ||
18 | fn next(&mut self) -> Option<CharComponent> { | ||
19 | if self.parser.pos == 0 { | ||
20 | assert!( | ||
21 | self.parser.advance() == '\'', | ||
22 | "char literal should start with a quote" | ||
23 | ); | ||
24 | } | ||
25 | |||
26 | if let Some(component) = self.parser.parse_char_component() { | ||
27 | return Some(component); | ||
28 | } | ||
29 | |||
30 | // We get here when there are no char components left to parse | ||
31 | if self.parser.peek() == Some('\'') { | ||
32 | self.parser.advance(); | ||
33 | self.has_closing_quote = true; | ||
34 | } | ||
35 | |||
36 | assert!( | ||
37 | self.parser.peek() == None, | ||
38 | "char literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", | ||
39 | self.parser.src, | ||
40 | self.parser.pos, | ||
41 | self.parser.src.len() | ||
42 | ); | ||
43 | |||
44 | None | ||
45 | } | ||
46 | } | ||
47 | |||
48 | #[cfg(test)] | ||
49 | mod tests { | ||
50 | use rowan::TextRange; | ||
51 | use crate::string_lexing::{ | ||
52 | CharComponent, | ||
53 | CharComponentKind::*, | ||
54 | }; | ||
55 | |||
56 | fn parse(src: &str) -> (bool, Vec<CharComponent>) { | ||
57 | let component_iterator = &mut super::parse_char_literal(src); | ||
58 | let components: Vec<_> = component_iterator.collect(); | ||
59 | (component_iterator.has_closing_quote, components) | ||
60 | } | ||
61 | |||
62 | fn unclosed_char_component(src: &str) -> CharComponent { | ||
63 | let (has_closing_quote, components) = parse(src); | ||
64 | assert!(!has_closing_quote, "char should not have closing quote"); | ||
65 | assert!(components.len() == 1); | ||
66 | components[0].clone() | ||
67 | } | ||
68 | |||
69 | fn closed_char_component(src: &str) -> CharComponent { | ||
70 | let (has_closing_quote, components) = parse(src); | ||
71 | assert!(has_closing_quote, "char should have closing quote"); | ||
72 | assert!( | ||
73 | components.len() == 1, | ||
74 | "Literal: {}\nComponents: {:#?}", | ||
75 | src, | ||
76 | components | ||
77 | ); | ||
78 | components[0].clone() | ||
79 | } | ||
80 | |||
81 | fn closed_char_components(src: &str) -> Vec<CharComponent> { | ||
82 | let (has_closing_quote, components) = parse(src); | ||
83 | assert!(has_closing_quote, "char should have closing quote"); | ||
84 | components | ||
85 | } | ||
86 | |||
87 | fn range_closed(src: &str) -> TextRange { | ||
88 | TextRange::from_to(1.into(), (src.len() as u32 - 1).into()) | ||
89 | } | ||
90 | |||
91 | fn range_unclosed(src: &str) -> TextRange { | ||
92 | TextRange::from_to(1.into(), (src.len() as u32).into()) | ||
93 | } | ||
94 | |||
95 | #[test] | ||
96 | fn test_unicode_escapes() { | ||
97 | let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; | ||
98 | for escape in unicode_escapes { | ||
99 | let escape_sequence = format!(r"'\u{}'", escape); | ||
100 | let component = closed_char_component(&escape_sequence); | ||
101 | let expected_range = range_closed(&escape_sequence); | ||
102 | assert_eq!(component.kind, UnicodeEscape); | ||
103 | assert_eq!(component.range, expected_range); | ||
104 | } | ||
105 | } | ||
106 | |||
107 | #[test] | ||
108 | fn test_unicode_escapes_unclosed() { | ||
109 | let unicode_escapes = &["{DEAD", "{BEEF", "{FF"]; | ||
110 | for escape in unicode_escapes { | ||
111 | let escape_sequence = format!(r"'\u{}'", escape); | ||
112 | let component = unclosed_char_component(&escape_sequence); | ||
113 | let expected_range = range_unclosed(&escape_sequence); | ||
114 | assert_eq!(component.kind, UnicodeEscape); | ||
115 | assert_eq!(component.range, expected_range); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | #[test] | ||
120 | fn test_empty_char() { | ||
121 | let (has_closing_quote, components) = parse("''"); | ||
122 | assert!(has_closing_quote, "char should have closing quote"); | ||
123 | assert!(components.len() == 0); | ||
124 | } | ||
125 | |||
126 | #[test] | ||
127 | fn test_unclosed_char() { | ||
128 | let component = unclosed_char_component("'a"); | ||
129 | assert!(component.kind == CodePoint); | ||
130 | assert!(component.range == TextRange::from_to(1.into(), 2.into())); | ||
131 | } | ||
132 | |||
133 | #[test] | ||
134 | fn test_digit_escapes() { | ||
135 | let literals = &[r"", r"5", r"55"]; | ||
136 | |||
137 | for literal in literals { | ||
138 | let lit_text = format!(r"'\x{}'", literal); | ||
139 | let component = closed_char_component(&lit_text); | ||
140 | assert!(component.kind == AsciiCodeEscape); | ||
141 | assert!(component.range == range_closed(&lit_text)); | ||
142 | } | ||
143 | |||
144 | // More than 2 digits starts a new codepoint | ||
145 | let components = closed_char_components(r"'\x555'"); | ||
146 | assert!(components.len() == 2); | ||
147 | assert!(components[1].kind == CodePoint); | ||
148 | } | ||
149 | |||
150 | #[test] | ||
151 | fn test_ascii_escapes() { | ||
152 | let literals = &[ | ||
153 | r"\'", "\\\"", // equivalent to \" | ||
154 | r"\n", r"\r", r"\t", r"\\", r"\0", | ||
155 | ]; | ||
156 | |||
157 | for literal in literals { | ||
158 | let lit_text = format!("'{}'", literal); | ||
159 | let component = closed_char_component(&lit_text); | ||
160 | assert!(component.kind == AsciiEscape); | ||
161 | assert!(component.range == range_closed(&lit_text)); | ||
162 | } | ||
163 | } | ||
164 | |||
165 | #[test] | ||
166 | fn test_no_escapes() { | ||
167 | let literals = &['"', 'n', 'r', 't', '0', 'x', 'u']; | ||
168 | |||
169 | for &literal in literals { | ||
170 | let lit_text = format!("'{}'", literal); | ||
171 | let component = closed_char_component(&lit_text); | ||
172 | assert!(component.kind == CodePoint); | ||
173 | assert!(component.range == range_closed(&lit_text)); | ||
174 | } | ||
175 | } | ||
176 | } | ||
diff --git a/crates/ra_syntax/src/string_lexing/parser.rs b/crates/ra_syntax/src/string_lexing/parser.rs index 4a6d5bc93..13f3db889 100644 --- a/crates/ra_syntax/src/string_lexing/parser.rs +++ b/crates/ra_syntax/src/string_lexing/parser.rs | |||
@@ -1,15 +1,16 @@ | |||
1 | use rowan::{TextRange, TextUnit}; | 1 | use rowan::{TextRange, TextUnit}; |
2 | 2 | ||
3 | use self::CharComponentKind::*; | 3 | use self::StringComponentKind::*; |
4 | 4 | ||
5 | pub struct Parser<'a> { | 5 | pub struct Parser<'a> { |
6 | pub(super) quote: u8, | ||
6 | pub(super) src: &'a str, | 7 | pub(super) src: &'a str, |
7 | pub(super) pos: usize, | 8 | pub(super) pos: usize, |
8 | } | 9 | } |
9 | 10 | ||
10 | impl<'a> Parser<'a> { | 11 | impl<'a> Parser<'a> { |
11 | pub fn new(src: &'a str) -> Parser<'a> { | 12 | pub fn new(src: &'a str, quote: u8) -> Parser<'a> { |
12 | Parser { src, pos: 0 } | 13 | Parser { quote, src, pos: 0 } |
13 | } | 14 | } |
14 | 15 | ||
15 | // Utility methods | 16 | // Utility methods |
@@ -42,7 +43,7 @@ impl<'a> Parser<'a> { | |||
42 | 43 | ||
43 | // Char parsing methods | 44 | // Char parsing methods |
44 | 45 | ||
45 | fn parse_unicode_escape(&mut self, start: TextUnit) -> CharComponent { | 46 | fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent { |
46 | match self.peek() { | 47 | match self.peek() { |
47 | Some('{') => { | 48 | Some('{') => { |
48 | self.advance(); | 49 | self.advance(); |
@@ -56,16 +57,16 @@ impl<'a> Parser<'a> { | |||
56 | } | 57 | } |
57 | 58 | ||
58 | let end = self.get_pos(); | 59 | let end = self.get_pos(); |
59 | CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) | 60 | StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) |
60 | } | 61 | } |
61 | Some(_) | None => { | 62 | Some(_) | None => { |
62 | let end = self.get_pos(); | 63 | let end = self.get_pos(); |
63 | CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) | 64 | StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) |
64 | } | 65 | } |
65 | } | 66 | } |
66 | } | 67 | } |
67 | 68 | ||
68 | fn parse_ascii_code_escape(&mut self, start: TextUnit) -> CharComponent { | 69 | fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent { |
69 | let code_start = self.get_pos(); | 70 | let code_start = self.get_pos(); |
70 | while let Some(next) = self.peek() { | 71 | while let Some(next) = self.peek() { |
71 | if next == '\'' || (self.get_pos() - code_start == 2.into()) { | 72 | if next == '\'' || (self.get_pos() - code_start == 2.into()) { |
@@ -76,12 +77,12 @@ impl<'a> Parser<'a> { | |||
76 | } | 77 | } |
77 | 78 | ||
78 | let end = self.get_pos(); | 79 | let end = self.get_pos(); |
79 | CharComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) | 80 | StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) |
80 | } | 81 | } |
81 | 82 | ||
82 | fn parse_escape(&mut self, start: TextUnit) -> CharComponent { | 83 | fn parse_escape(&mut self, start: TextUnit) -> StringComponent { |
83 | if self.peek().is_none() { | 84 | if self.peek().is_none() { |
84 | return CharComponent::new(TextRange::from_to(start, start), AsciiEscape); | 85 | return StringComponent::new(TextRange::from_to(start, start), AsciiEscape); |
85 | } | 86 | } |
86 | 87 | ||
87 | let next = self.advance(); | 88 | let next = self.advance(); |
@@ -90,29 +91,7 @@ impl<'a> Parser<'a> { | |||
90 | match next { | 91 | match next { |
91 | 'x' => self.parse_ascii_code_escape(start), | 92 | 'x' => self.parse_ascii_code_escape(start), |
92 | 'u' => self.parse_unicode_escape(start), | 93 | 'u' => self.parse_unicode_escape(start), |
93 | _ => CharComponent::new(range, AsciiEscape), | 94 | _ => StringComponent::new(range, AsciiEscape), |
94 | } | ||
95 | } | ||
96 | |||
97 | pub fn parse_char_component(&mut self) -> Option<CharComponent> { | ||
98 | let next = self.peek()?; | ||
99 | |||
100 | // Ignore character close | ||
101 | if next == '\'' { | ||
102 | return None; | ||
103 | } | ||
104 | |||
105 | let start = self.get_pos(); | ||
106 | self.advance(); | ||
107 | |||
108 | if next == '\\' { | ||
109 | Some(self.parse_escape(start)) | ||
110 | } else { | ||
111 | let end = self.get_pos(); | ||
112 | Some(CharComponent::new( | ||
113 | TextRange::from_to(start, end), | ||
114 | CodePoint, | ||
115 | )) | ||
116 | } | 95 | } |
117 | } | 96 | } |
118 | 97 | ||
@@ -131,11 +110,11 @@ impl<'a> Parser<'a> { | |||
131 | } | 110 | } |
132 | } | 111 | } |
133 | 112 | ||
134 | pub fn parse_string_component(&mut self) -> Option<StringComponent> { | 113 | pub fn parse_component(&mut self) -> Option<StringComponent> { |
135 | let next = self.peek()?; | 114 | let next = self.peek()?; |
136 | 115 | ||
137 | // Ignore string close | 116 | // Ignore string close |
138 | if next == '"' { | 117 | if next == self.quote as char { |
139 | return None; | 118 | return None; |
140 | } | 119 | } |
141 | 120 | ||
@@ -145,18 +124,18 @@ impl<'a> Parser<'a> { | |||
145 | if next == '\\' { | 124 | if next == '\\' { |
146 | // Strings can use `\` to ignore newlines, so we first try to parse one of those | 125 | // Strings can use `\` to ignore newlines, so we first try to parse one of those |
147 | // before falling back to parsing char escapes | 126 | // before falling back to parsing char escapes |
148 | self.parse_ignore_newline(start).or_else(|| { | 127 | if self.quote == b'"' { |
149 | let char_component = self.parse_escape(start); | 128 | if let Some(component) = self.parse_ignore_newline(start) { |
150 | Some(StringComponent::new( | 129 | return Some(component); |
151 | char_component.range, | 130 | } |
152 | StringComponentKind::Char(char_component.kind), | 131 | } |
153 | )) | 132 | |
154 | }) | 133 | Some(self.parse_escape(start)) |
155 | } else { | 134 | } else { |
156 | let end = self.get_pos(); | 135 | let end = self.get_pos(); |
157 | Some(StringComponent::new( | 136 | Some(StringComponent::new( |
158 | TextRange::from_to(start, end), | 137 | TextRange::from_to(start, end), |
159 | StringComponentKind::Char(CodePoint), | 138 | CodePoint, |
160 | )) | 139 | )) |
161 | } | 140 | } |
162 | } | 141 | } |
@@ -177,23 +156,6 @@ impl StringComponent { | |||
177 | #[derive(Debug, Eq, PartialEq, Clone)] | 156 | #[derive(Debug, Eq, PartialEq, Clone)] |
178 | pub enum StringComponentKind { | 157 | pub enum StringComponentKind { |
179 | IgnoreNewline, | 158 | IgnoreNewline, |
180 | Char(CharComponentKind), | ||
181 | } | ||
182 | |||
183 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
184 | pub struct CharComponent { | ||
185 | pub range: TextRange, | ||
186 | pub kind: CharComponentKind, | ||
187 | } | ||
188 | |||
189 | impl CharComponent { | ||
190 | fn new(range: TextRange, kind: CharComponentKind) -> CharComponent { | ||
191 | CharComponent { range, kind } | ||
192 | } | ||
193 | } | ||
194 | |||
195 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
196 | pub enum CharComponentKind { | ||
197 | CodePoint, | 159 | CodePoint, |
198 | AsciiEscape, | 160 | AsciiEscape, |
199 | AsciiCodeEscape, | 161 | AsciiCodeEscape, |
diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs index d8351e9af..7476fea13 100644 --- a/crates/ra_syntax/src/string_lexing/string.rs +++ b/crates/ra_syntax/src/string_lexing/string.rs | |||
@@ -1,41 +1,82 @@ | |||
1 | use super::parser::Parser; | 1 | use crate::string_lexing::{ |
2 | use super::StringComponent; | 2 | parser::Parser, |
3 | StringComponent, | ||
4 | }; | ||
3 | 5 | ||
4 | pub fn parse_string_literal(src: &str) -> StringComponentIterator { | 6 | pub fn parse_string_literal(src: &str) -> StringComponentIterator { |
5 | StringComponentIterator { | 7 | StringComponentIterator { |
6 | parser: Parser::new(src), | 8 | parser: Parser::new(src, b'"'), |
7 | has_closing_quote: false, | 9 | has_closing_quote: false, |
10 | prefix: None, | ||
11 | quote: b'"', | ||
12 | } | ||
13 | } | ||
14 | |||
15 | pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator { | ||
16 | StringComponentIterator { | ||
17 | parser: Parser::new(src, b'"'), | ||
18 | has_closing_quote: false, | ||
19 | prefix: Some(b'b'), | ||
20 | quote: b'"', | ||
21 | } | ||
22 | } | ||
23 | |||
24 | pub fn parse_char_literal(src: &str) -> StringComponentIterator { | ||
25 | StringComponentIterator { | ||
26 | parser: Parser::new(src, b'\''), | ||
27 | has_closing_quote: false, | ||
28 | prefix: None, | ||
29 | quote: b'\'', | ||
30 | } | ||
31 | } | ||
32 | |||
33 | pub fn parse_byte_literal(src: &str) -> StringComponentIterator { | ||
34 | StringComponentIterator { | ||
35 | parser: Parser::new(src, b'\''), | ||
36 | has_closing_quote: false, | ||
37 | prefix: Some(b'b'), | ||
38 | quote: b'\'', | ||
8 | } | 39 | } |
9 | } | 40 | } |
10 | 41 | ||
11 | pub struct StringComponentIterator<'a> { | 42 | pub struct StringComponentIterator<'a> { |
12 | parser: Parser<'a>, | 43 | parser: Parser<'a>, |
13 | pub has_closing_quote: bool, | 44 | pub has_closing_quote: bool, |
45 | prefix: Option<u8>, | ||
46 | quote: u8, | ||
14 | } | 47 | } |
15 | 48 | ||
16 | impl<'a> Iterator for StringComponentIterator<'a> { | 49 | impl<'a> Iterator for StringComponentIterator<'a> { |
17 | type Item = StringComponent; | 50 | type Item = StringComponent; |
18 | fn next(&mut self) -> Option<StringComponent> { | 51 | fn next(&mut self) -> Option<StringComponent> { |
19 | if self.parser.pos == 0 { | 52 | if self.parser.pos == 0 { |
53 | if let Some(prefix) = self.prefix { | ||
54 | assert!( | ||
55 | self.parser.advance() == prefix as char, | ||
56 | "literal should start with a {:?}", | ||
57 | prefix as char, | ||
58 | ); | ||
59 | } | ||
20 | assert!( | 60 | assert!( |
21 | self.parser.advance() == '"', | 61 | self.parser.advance() == self.quote as char, |
22 | "string literal should start with double quotes" | 62 | "literal should start with a {:?}", |
63 | self.quote as char, | ||
23 | ); | 64 | ); |
24 | } | 65 | } |
25 | 66 | ||
26 | if let Some(component) = self.parser.parse_string_component() { | 67 | if let Some(component) = self.parser.parse_component() { |
27 | return Some(component); | 68 | return Some(component); |
28 | } | 69 | } |
29 | 70 | ||
30 | // We get here when there are no char components left to parse | 71 | // We get here when there are no char components left to parse |
31 | if self.parser.peek() == Some('"') { | 72 | if self.parser.peek() == Some(self.quote as char) { |
32 | self.parser.advance(); | 73 | self.parser.advance(); |
33 | self.has_closing_quote = true; | 74 | self.has_closing_quote = true; |
34 | } | 75 | } |
35 | 76 | ||
36 | assert!( | 77 | assert!( |
37 | self.parser.peek() == None, | 78 | self.parser.peek() == None, |
38 | "string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", | 79 | "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", |
39 | self.parser.src, | 80 | self.parser.src, |
40 | self.parser.pos, | 81 | self.parser.pos, |
41 | self.parser.src.len() | 82 | self.parser.src.len() |
@@ -44,3 +85,133 @@ impl<'a> Iterator for StringComponentIterator<'a> { | |||
44 | None | 85 | None |
45 | } | 86 | } |
46 | } | 87 | } |
88 | |||
89 | #[cfg(test)] | ||
90 | mod tests { | ||
91 | use rowan::TextRange; | ||
92 | use crate::string_lexing::{ | ||
93 | StringComponent, | ||
94 | StringComponentKind::*, | ||
95 | }; | ||
96 | |||
97 | fn parse(src: &str) -> (bool, Vec<StringComponent>) { | ||
98 | let component_iterator = &mut super::parse_char_literal(src); | ||
99 | let components: Vec<_> = component_iterator.collect(); | ||
100 | (component_iterator.has_closing_quote, components) | ||
101 | } | ||
102 | |||
103 | fn unclosed_char_component(src: &str) -> StringComponent { | ||
104 | let (has_closing_quote, components) = parse(src); | ||
105 | assert!(!has_closing_quote, "char should not have closing quote"); | ||
106 | assert!(components.len() == 1); | ||
107 | components[0].clone() | ||
108 | } | ||
109 | |||
110 | fn closed_char_component(src: &str) -> StringComponent { | ||
111 | let (has_closing_quote, components) = parse(src); | ||
112 | assert!(has_closing_quote, "char should have closing quote"); | ||
113 | assert!( | ||
114 | components.len() == 1, | ||
115 | "Literal: {}\nComponents: {:#?}", | ||
116 | src, | ||
117 | components | ||
118 | ); | ||
119 | components[0].clone() | ||
120 | } | ||
121 | |||
122 | fn closed_char_components(src: &str) -> Vec<StringComponent> { | ||
123 | let (has_closing_quote, components) = parse(src); | ||
124 | assert!(has_closing_quote, "char should have closing quote"); | ||
125 | components | ||
126 | } | ||
127 | |||
128 | fn range_closed(src: &str) -> TextRange { | ||
129 | TextRange::from_to(1.into(), (src.len() as u32 - 1).into()) | ||
130 | } | ||
131 | |||
132 | fn range_unclosed(src: &str) -> TextRange { | ||
133 | TextRange::from_to(1.into(), (src.len() as u32).into()) | ||
134 | } | ||
135 | |||
136 | #[test] | ||
137 | fn test_unicode_escapes() { | ||
138 | let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; | ||
139 | for escape in unicode_escapes { | ||
140 | let escape_sequence = format!(r"'\u{}'", escape); | ||
141 | let component = closed_char_component(&escape_sequence); | ||
142 | let expected_range = range_closed(&escape_sequence); | ||
143 | assert_eq!(component.kind, UnicodeEscape); | ||
144 | assert_eq!(component.range, expected_range); | ||
145 | } | ||
146 | } | ||
147 | |||
148 | #[test] | ||
149 | fn test_unicode_escapes_unclosed() { | ||
150 | let unicode_escapes = &["{DEAD", "{BEEF", "{FF"]; | ||
151 | for escape in unicode_escapes { | ||
152 | let escape_sequence = format!(r"'\u{}'", escape); | ||
153 | let component = unclosed_char_component(&escape_sequence); | ||
154 | let expected_range = range_unclosed(&escape_sequence); | ||
155 | assert_eq!(component.kind, UnicodeEscape); | ||
156 | assert_eq!(component.range, expected_range); | ||
157 | } | ||
158 | } | ||
159 | |||
160 | #[test] | ||
161 | fn test_empty_char() { | ||
162 | let (has_closing_quote, components) = parse("''"); | ||
163 | assert!(has_closing_quote, "char should have closing quote"); | ||
164 | assert!(components.len() == 0); | ||
165 | } | ||
166 | |||
167 | #[test] | ||
168 | fn test_unclosed_char() { | ||
169 | let component = unclosed_char_component("'a"); | ||
170 | assert!(component.kind == CodePoint); | ||
171 | assert!(component.range == TextRange::from_to(1.into(), 2.into())); | ||
172 | } | ||
173 | |||
174 | #[test] | ||
175 | fn test_digit_escapes() { | ||
176 | let literals = &[r"", r"5", r"55"]; | ||
177 | |||
178 | for literal in literals { | ||
179 | let lit_text = format!(r"'\x{}'", literal); | ||
180 | let component = closed_char_component(&lit_text); | ||
181 | assert!(component.kind == AsciiCodeEscape); | ||
182 | assert!(component.range == range_closed(&lit_text)); | ||
183 | } | ||
184 | |||
185 | // More than 2 digits starts a new codepoint | ||
186 | let components = closed_char_components(r"'\x555'"); | ||
187 | assert!(components.len() == 2); | ||
188 | assert!(components[1].kind == CodePoint); | ||
189 | } | ||
190 | |||
191 | #[test] | ||
192 | fn test_ascii_escapes() { | ||
193 | let literals = &[ | ||
194 | r"\'", "\\\"", // equivalent to \" | ||
195 | r"\n", r"\r", r"\t", r"\\", r"\0", | ||
196 | ]; | ||
197 | |||
198 | for literal in literals { | ||
199 | let lit_text = format!("'{}'", literal); | ||
200 | let component = closed_char_component(&lit_text); | ||
201 | assert!(component.kind == AsciiEscape); | ||
202 | assert!(component.range == range_closed(&lit_text)); | ||
203 | } | ||
204 | } | ||
205 | |||
206 | #[test] | ||
207 | fn test_no_escapes() { | ||
208 | let literals = &['"', 'n', 'r', 't', '0', 'x', 'u']; | ||
209 | |||
210 | for &literal in literals { | ||
211 | let lit_text = format!("'{}'", literal); | ||
212 | let component = closed_char_component(&lit_text); | ||
213 | assert!(component.kind == CodePoint); | ||
214 | assert!(component.range == range_closed(&lit_text)); | ||
215 | } | ||
216 | } | ||
217 | } | ||
diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs index 43c0d7edd..e3603e761 100644 --- a/crates/ra_syntax/src/validation/byte.rs +++ b/crates/ra_syntax/src/validation/byte.rs | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | use crate::{ | 3 | use crate::{ |
4 | ast::{self, AstNode}, | 4 | ast::{self, AstNode}, |
5 | string_lexing::{self, CharComponentKind}, | 5 | string_lexing::{self, StringComponentKind}, |
6 | TextRange, | 6 | TextRange, |
7 | validation::char, | 7 | validation::char, |
8 | yellow::{ | 8 | yellow::{ |
@@ -38,11 +38,11 @@ pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>) | |||
38 | 38 | ||
39 | pub(super) fn validate_byte_component( | 39 | pub(super) fn validate_byte_component( |
40 | text: &str, | 40 | text: &str, |
41 | kind: CharComponentKind, | 41 | kind: StringComponentKind, |
42 | range: TextRange, | 42 | range: TextRange, |
43 | errors: &mut Vec<SyntaxError>, | 43 | errors: &mut Vec<SyntaxError>, |
44 | ) { | 44 | ) { |
45 | use self::CharComponentKind::*; | 45 | use self::StringComponentKind::*; |
46 | match kind { | 46 | match kind { |
47 | AsciiEscape => validate_byte_escape(text, range, errors), | 47 | AsciiEscape => validate_byte_escape(text, range, errors), |
48 | AsciiCodeEscape => validate_byte_code_escape(text, range, errors), | 48 | AsciiCodeEscape => validate_byte_code_escape(text, range, errors), |
@@ -63,6 +63,7 @@ pub(super) fn validate_byte_component( | |||
63 | errors.push(SyntaxError::new(ByteOutOfRange, range)); | 63 | errors.push(SyntaxError::new(ByteOutOfRange, range)); |
64 | } | 64 | } |
65 | } | 65 | } |
66 | IgnoreNewline => { /* always valid */ } | ||
66 | } | 67 | } |
67 | } | 68 | } |
68 | 69 | ||
diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs index 7b830e97c..2f98472f4 100644 --- a/crates/ra_syntax/src/validation/byte_string.rs +++ b/crates/ra_syntax/src/validation/byte_string.rs | |||
@@ -17,15 +17,15 @@ pub(crate) fn validate_byte_string_node(node: ast::ByteString, errors: &mut Vec< | |||
17 | let range = component.range + literal_range.start(); | 17 | let range = component.range + literal_range.start(); |
18 | 18 | ||
19 | match component.kind { | 19 | match component.kind { |
20 | StringComponentKind::Char(kind) => { | 20 | StringComponentKind::IgnoreNewline => { /* always valid */ } |
21 | _ => { | ||
21 | // Chars must escape \t, \n and \r codepoints, but strings don't | 22 | // Chars must escape \t, \n and \r codepoints, but strings don't |
22 | let text = &literal_text[component.range]; | 23 | let text = &literal_text[component.range]; |
23 | match text { | 24 | match text { |
24 | "\t" | "\n" | "\r" => { /* always valid */ } | 25 | "\t" | "\n" | "\r" => { /* always valid */ } |
25 | _ => byte::validate_byte_component(text, kind, range, errors), | 26 | _ => byte::validate_byte_component(text, component.kind, range, errors), |
26 | } | 27 | } |
27 | } | 28 | } |
28 | StringComponentKind::IgnoreNewline => { /* always valid */ } | ||
29 | } | 29 | } |
30 | } | 30 | } |
31 | 31 | ||
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs index 4728c85e6..deb5b0a9e 100644 --- a/crates/ra_syntax/src/validation/char.rs +++ b/crates/ra_syntax/src/validation/char.rs | |||
@@ -6,7 +6,7 @@ use arrayvec::ArrayString; | |||
6 | 6 | ||
7 | use crate::{ | 7 | use crate::{ |
8 | ast::{self, AstNode}, | 8 | ast::{self, AstNode}, |
9 | string_lexing::{self, CharComponentKind}, | 9 | string_lexing::{self, StringComponentKind}, |
10 | TextRange, | 10 | TextRange, |
11 | yellow::{ | 11 | yellow::{ |
12 | SyntaxError, | 12 | SyntaxError, |
@@ -41,12 +41,12 @@ pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) | |||
41 | 41 | ||
42 | pub(super) fn validate_char_component( | 42 | pub(super) fn validate_char_component( |
43 | text: &str, | 43 | text: &str, |
44 | kind: CharComponentKind, | 44 | kind: StringComponentKind, |
45 | range: TextRange, | 45 | range: TextRange, |
46 | errors: &mut Vec<SyntaxError>, | 46 | errors: &mut Vec<SyntaxError>, |
47 | ) { | 47 | ) { |
48 | // Validate escapes | 48 | // Validate escapes |
49 | use self::CharComponentKind::*; | 49 | use self::StringComponentKind::*; |
50 | match kind { | 50 | match kind { |
51 | AsciiEscape => validate_ascii_escape(text, range, errors), | 51 | AsciiEscape => validate_ascii_escape(text, range, errors), |
52 | AsciiCodeEscape => validate_ascii_code_escape(text, range, errors), | 52 | AsciiCodeEscape => validate_ascii_code_escape(text, range, errors), |
@@ -57,6 +57,7 @@ pub(super) fn validate_char_component( | |||
57 | errors.push(SyntaxError::new(UnescapedCodepoint, range)); | 57 | errors.push(SyntaxError::new(UnescapedCodepoint, range)); |
58 | } | 58 | } |
59 | } | 59 | } |
60 | StringComponentKind::IgnoreNewline => { /* always valid */ } | ||
60 | } | 61 | } |
61 | } | 62 | } |
62 | 63 | ||
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs index 089879d15..456180ab6 100644 --- a/crates/ra_syntax/src/validation/string.rs +++ b/crates/ra_syntax/src/validation/string.rs | |||
@@ -1,6 +1,6 @@ | |||
1 | use crate::{ | 1 | use crate::{ |
2 | ast::{self, AstNode}, | 2 | ast::{self, AstNode}, |
3 | string_lexing::{self, StringComponentKind}, | 3 | string_lexing, |
4 | yellow::{ | 4 | yellow::{ |
5 | SyntaxError, | 5 | SyntaxError, |
6 | SyntaxErrorKind::*, | 6 | SyntaxErrorKind::*, |
@@ -16,16 +16,11 @@ pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxErr | |||
16 | for component in &mut components { | 16 | for component in &mut components { |
17 | let range = component.range + literal_range.start(); | 17 | let range = component.range + literal_range.start(); |
18 | 18 | ||
19 | match component.kind { | 19 | // Chars must escape \t, \n and \r codepoints, but strings don't |
20 | StringComponentKind::Char(kind) => { | 20 | let text = &literal_text[component.range]; |
21 | // Chars must escape \t, \n and \r codepoints, but strings don't | 21 | match text { |
22 | let text = &literal_text[component.range]; | 22 | "\t" | "\n" | "\r" => { /* always valid */ } |
23 | match text { | 23 | _ => char::validate_char_component(text, component.kind, range, errors), |
24 | "\t" | "\n" | "\r" => { /* always valid */ } | ||
25 | _ => char::validate_char_component(text, kind, range, errors), | ||
26 | } | ||
27 | } | ||
28 | StringComponentKind::IgnoreNewline => { /* always valid */ } | ||
29 | } | 24 | } |
30 | } | 25 | } |
31 | 26 | ||