aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2018-12-27 11:42:46 +0000
committerAleksey Kladov <[email protected]>2018-12-27 11:42:46 +0000
commit73ded3c63ca2522b7bb6ca8eb7834c5adc1a3511 (patch)
tree521f3b0fa02e32b0b2a6e9a9d1370845d922657f
parentdbf03b674e7e1a49d9b32ec5ed656df2aedd3ed3 (diff)
dedupe literal parsers
-rw-r--r--crates/ra_syntax/src/string_lexing.rs10
-rw-r--r--crates/ra_syntax/src/string_lexing/byte.rs51
-rw-r--r--crates/ra_syntax/src/string_lexing/byte_string.rs51
-rw-r--r--crates/ra_syntax/src/string_lexing/char.rs176
-rw-r--r--crates/ra_syntax/src/string_lexing/parser.rs82
-rw-r--r--crates/ra_syntax/src/string_lexing/string.rs187
-rw-r--r--crates/ra_syntax/src/validation/byte.rs7
-rw-r--r--crates/ra_syntax/src/validation/byte_string.rs6
-rw-r--r--crates/ra_syntax/src/validation/char.rs7
-rw-r--r--crates/ra_syntax/src/validation/string.rs17
10 files changed, 220 insertions, 374 deletions
diff --git a/crates/ra_syntax/src/string_lexing.rs b/crates/ra_syntax/src/string_lexing.rs
index 94853331f..349733f3f 100644
--- a/crates/ra_syntax/src/string_lexing.rs
+++ b/crates/ra_syntax/src/string_lexing.rs
@@ -1,13 +1,7 @@
1mod parser; 1mod parser;
2mod byte;
3mod byte_string;
4mod char;
5mod string; 2mod string;
6 3
7pub use self::{ 4pub use self::{
8 byte::parse_byte_literal, 5 parser::{StringComponent, StringComponentKind},
9 byte_string::parse_byte_string_literal, 6 string::{parse_string_literal, parse_char_literal, parse_byte_literal, parse_byte_string_literal},
10 char::parse_char_literal,
11 parser::{CharComponent, CharComponentKind, StringComponent, StringComponentKind},
12 string::parse_string_literal,
13}; 7};
diff --git a/crates/ra_syntax/src/string_lexing/byte.rs b/crates/ra_syntax/src/string_lexing/byte.rs
deleted file mode 100644
index b3228d6ca..000000000
--- a/crates/ra_syntax/src/string_lexing/byte.rs
+++ /dev/null
@@ -1,51 +0,0 @@
1use super::parser::Parser;
2use super::CharComponent;
3
4pub fn parse_byte_literal(src: &str) -> ByteComponentIterator {
5 ByteComponentIterator {
6 parser: Parser::new(src),
7 has_closing_quote: false,
8 }
9}
10
11pub struct ByteComponentIterator<'a> {
12 parser: Parser<'a>,
13 pub has_closing_quote: bool,
14}
15
16impl<'a> Iterator for ByteComponentIterator<'a> {
17 type Item = CharComponent;
18 fn next(&mut self) -> Option<CharComponent> {
19 if self.parser.pos == 0 {
20 assert!(
21 self.parser.advance() == 'b',
22 "Byte literal should start with a `b`"
23 );
24
25 assert!(
26 self.parser.advance() == '\'',
27 "Byte literal should start with a `b`, followed by a quote"
28 );
29 }
30
31 if let Some(component) = self.parser.parse_char_component() {
32 return Some(component);
33 }
34
35 // We get here when there are no char components left to parse
36 if self.parser.peek() == Some('\'') {
37 self.parser.advance();
38 self.has_closing_quote = true;
39 }
40
41 assert!(
42 self.parser.peek() == None,
43 "byte literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
44 self.parser.src,
45 self.parser.pos,
46 self.parser.src.len()
47 );
48
49 None
50 }
51}
diff --git a/crates/ra_syntax/src/string_lexing/byte_string.rs b/crates/ra_syntax/src/string_lexing/byte_string.rs
deleted file mode 100644
index a6056159b..000000000
--- a/crates/ra_syntax/src/string_lexing/byte_string.rs
+++ /dev/null
@@ -1,51 +0,0 @@
1use super::parser::Parser;
2use super::StringComponent;
3
4pub fn parse_byte_string_literal(src: &str) -> ByteStringComponentIterator {
5 ByteStringComponentIterator {
6 parser: Parser::new(src),
7 has_closing_quote: false,
8 }
9}
10
11pub struct ByteStringComponentIterator<'a> {
12 parser: Parser<'a>,
13 pub has_closing_quote: bool,
14}
15
16impl<'a> Iterator for ByteStringComponentIterator<'a> {
17 type Item = StringComponent;
18 fn next(&mut self) -> Option<StringComponent> {
19 if self.parser.pos == 0 {
20 assert!(
21 self.parser.advance() == 'b',
22 "byte string literal should start with a `b`"
23 );
24
25 assert!(
26 self.parser.advance() == '"',
27 "byte string literal should start with a `b`, followed by double quotes"
28 );
29 }
30
31 if let Some(component) = self.parser.parse_string_component() {
32 return Some(component);
33 }
34
35 // We get here when there are no char components left to parse
36 if self.parser.peek() == Some('"') {
37 self.parser.advance();
38 self.has_closing_quote = true;
39 }
40
41 assert!(
42 self.parser.peek() == None,
43 "byte string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
44 self.parser.src,
45 self.parser.pos,
46 self.parser.src.len()
47 );
48
49 None
50 }
51}
diff --git a/crates/ra_syntax/src/string_lexing/char.rs b/crates/ra_syntax/src/string_lexing/char.rs
deleted file mode 100644
index e01813176..000000000
--- a/crates/ra_syntax/src/string_lexing/char.rs
+++ /dev/null
@@ -1,176 +0,0 @@
1use super::parser::Parser;
2use super::CharComponent;
3
4pub fn parse_char_literal(src: &str) -> CharComponentIterator {
5 CharComponentIterator {
6 parser: Parser::new(src),
7 has_closing_quote: false,
8 }
9}
10
11pub struct CharComponentIterator<'a> {
12 parser: Parser<'a>,
13 pub has_closing_quote: bool,
14}
15
16impl<'a> Iterator for CharComponentIterator<'a> {
17 type Item = CharComponent;
18 fn next(&mut self) -> Option<CharComponent> {
19 if self.parser.pos == 0 {
20 assert!(
21 self.parser.advance() == '\'',
22 "char literal should start with a quote"
23 );
24 }
25
26 if let Some(component) = self.parser.parse_char_component() {
27 return Some(component);
28 }
29
30 // We get here when there are no char components left to parse
31 if self.parser.peek() == Some('\'') {
32 self.parser.advance();
33 self.has_closing_quote = true;
34 }
35
36 assert!(
37 self.parser.peek() == None,
38 "char literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
39 self.parser.src,
40 self.parser.pos,
41 self.parser.src.len()
42 );
43
44 None
45 }
46}
47
48#[cfg(test)]
49mod tests {
50 use rowan::TextRange;
51 use crate::string_lexing::{
52 CharComponent,
53 CharComponentKind::*,
54};
55
56 fn parse(src: &str) -> (bool, Vec<CharComponent>) {
57 let component_iterator = &mut super::parse_char_literal(src);
58 let components: Vec<_> = component_iterator.collect();
59 (component_iterator.has_closing_quote, components)
60 }
61
62 fn unclosed_char_component(src: &str) -> CharComponent {
63 let (has_closing_quote, components) = parse(src);
64 assert!(!has_closing_quote, "char should not have closing quote");
65 assert!(components.len() == 1);
66 components[0].clone()
67 }
68
69 fn closed_char_component(src: &str) -> CharComponent {
70 let (has_closing_quote, components) = parse(src);
71 assert!(has_closing_quote, "char should have closing quote");
72 assert!(
73 components.len() == 1,
74 "Literal: {}\nComponents: {:#?}",
75 src,
76 components
77 );
78 components[0].clone()
79 }
80
81 fn closed_char_components(src: &str) -> Vec<CharComponent> {
82 let (has_closing_quote, components) = parse(src);
83 assert!(has_closing_quote, "char should have closing quote");
84 components
85 }
86
87 fn range_closed(src: &str) -> TextRange {
88 TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
89 }
90
91 fn range_unclosed(src: &str) -> TextRange {
92 TextRange::from_to(1.into(), (src.len() as u32).into())
93 }
94
95 #[test]
96 fn test_unicode_escapes() {
97 let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
98 for escape in unicode_escapes {
99 let escape_sequence = format!(r"'\u{}'", escape);
100 let component = closed_char_component(&escape_sequence);
101 let expected_range = range_closed(&escape_sequence);
102 assert_eq!(component.kind, UnicodeEscape);
103 assert_eq!(component.range, expected_range);
104 }
105 }
106
107 #[test]
108 fn test_unicode_escapes_unclosed() {
109 let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
110 for escape in unicode_escapes {
111 let escape_sequence = format!(r"'\u{}'", escape);
112 let component = unclosed_char_component(&escape_sequence);
113 let expected_range = range_unclosed(&escape_sequence);
114 assert_eq!(component.kind, UnicodeEscape);
115 assert_eq!(component.range, expected_range);
116 }
117 }
118
119 #[test]
120 fn test_empty_char() {
121 let (has_closing_quote, components) = parse("''");
122 assert!(has_closing_quote, "char should have closing quote");
123 assert!(components.len() == 0);
124 }
125
126 #[test]
127 fn test_unclosed_char() {
128 let component = unclosed_char_component("'a");
129 assert!(component.kind == CodePoint);
130 assert!(component.range == TextRange::from_to(1.into(), 2.into()));
131 }
132
133 #[test]
134 fn test_digit_escapes() {
135 let literals = &[r"", r"5", r"55"];
136
137 for literal in literals {
138 let lit_text = format!(r"'\x{}'", literal);
139 let component = closed_char_component(&lit_text);
140 assert!(component.kind == AsciiCodeEscape);
141 assert!(component.range == range_closed(&lit_text));
142 }
143
144 // More than 2 digits starts a new codepoint
145 let components = closed_char_components(r"'\x555'");
146 assert!(components.len() == 2);
147 assert!(components[1].kind == CodePoint);
148 }
149
150 #[test]
151 fn test_ascii_escapes() {
152 let literals = &[
153 r"\'", "\\\"", // equivalent to \"
154 r"\n", r"\r", r"\t", r"\\", r"\0",
155 ];
156
157 for literal in literals {
158 let lit_text = format!("'{}'", literal);
159 let component = closed_char_component(&lit_text);
160 assert!(component.kind == AsciiEscape);
161 assert!(component.range == range_closed(&lit_text));
162 }
163 }
164
165 #[test]
166 fn test_no_escapes() {
167 let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
168
169 for &literal in literals {
170 let lit_text = format!("'{}'", literal);
171 let component = closed_char_component(&lit_text);
172 assert!(component.kind == CodePoint);
173 assert!(component.range == range_closed(&lit_text));
174 }
175 }
176}
diff --git a/crates/ra_syntax/src/string_lexing/parser.rs b/crates/ra_syntax/src/string_lexing/parser.rs
index 4a6d5bc93..13f3db889 100644
--- a/crates/ra_syntax/src/string_lexing/parser.rs
+++ b/crates/ra_syntax/src/string_lexing/parser.rs
@@ -1,15 +1,16 @@
1use rowan::{TextRange, TextUnit}; 1use rowan::{TextRange, TextUnit};
2 2
3use self::CharComponentKind::*; 3use self::StringComponentKind::*;
4 4
5pub struct Parser<'a> { 5pub struct Parser<'a> {
6 pub(super) quote: u8,
6 pub(super) src: &'a str, 7 pub(super) src: &'a str,
7 pub(super) pos: usize, 8 pub(super) pos: usize,
8} 9}
9 10
10impl<'a> Parser<'a> { 11impl<'a> Parser<'a> {
11 pub fn new(src: &'a str) -> Parser<'a> { 12 pub fn new(src: &'a str, quote: u8) -> Parser<'a> {
12 Parser { src, pos: 0 } 13 Parser { quote, src, pos: 0 }
13 } 14 }
14 15
15 // Utility methods 16 // Utility methods
@@ -42,7 +43,7 @@ impl<'a> Parser<'a> {
42 43
43 // Char parsing methods 44 // Char parsing methods
44 45
45 fn parse_unicode_escape(&mut self, start: TextUnit) -> CharComponent { 46 fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent {
46 match self.peek() { 47 match self.peek() {
47 Some('{') => { 48 Some('{') => {
48 self.advance(); 49 self.advance();
@@ -56,16 +57,16 @@ impl<'a> Parser<'a> {
56 } 57 }
57 58
58 let end = self.get_pos(); 59 let end = self.get_pos();
59 CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) 60 StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
60 } 61 }
61 Some(_) | None => { 62 Some(_) | None => {
62 let end = self.get_pos(); 63 let end = self.get_pos();
63 CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) 64 StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
64 } 65 }
65 } 66 }
66 } 67 }
67 68
68 fn parse_ascii_code_escape(&mut self, start: TextUnit) -> CharComponent { 69 fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent {
69 let code_start = self.get_pos(); 70 let code_start = self.get_pos();
70 while let Some(next) = self.peek() { 71 while let Some(next) = self.peek() {
71 if next == '\'' || (self.get_pos() - code_start == 2.into()) { 72 if next == '\'' || (self.get_pos() - code_start == 2.into()) {
@@ -76,12 +77,12 @@ impl<'a> Parser<'a> {
76 } 77 }
77 78
78 let end = self.get_pos(); 79 let end = self.get_pos();
79 CharComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) 80 StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape)
80 } 81 }
81 82
82 fn parse_escape(&mut self, start: TextUnit) -> CharComponent { 83 fn parse_escape(&mut self, start: TextUnit) -> StringComponent {
83 if self.peek().is_none() { 84 if self.peek().is_none() {
84 return CharComponent::new(TextRange::from_to(start, start), AsciiEscape); 85 return StringComponent::new(TextRange::from_to(start, start), AsciiEscape);
85 } 86 }
86 87
87 let next = self.advance(); 88 let next = self.advance();
@@ -90,29 +91,7 @@ impl<'a> Parser<'a> {
90 match next { 91 match next {
91 'x' => self.parse_ascii_code_escape(start), 92 'x' => self.parse_ascii_code_escape(start),
92 'u' => self.parse_unicode_escape(start), 93 'u' => self.parse_unicode_escape(start),
93 _ => CharComponent::new(range, AsciiEscape), 94 _ => StringComponent::new(range, AsciiEscape),
94 }
95 }
96
97 pub fn parse_char_component(&mut self) -> Option<CharComponent> {
98 let next = self.peek()?;
99
100 // Ignore character close
101 if next == '\'' {
102 return None;
103 }
104
105 let start = self.get_pos();
106 self.advance();
107
108 if next == '\\' {
109 Some(self.parse_escape(start))
110 } else {
111 let end = self.get_pos();
112 Some(CharComponent::new(
113 TextRange::from_to(start, end),
114 CodePoint,
115 ))
116 } 95 }
117 } 96 }
118 97
@@ -131,11 +110,11 @@ impl<'a> Parser<'a> {
131 } 110 }
132 } 111 }
133 112
134 pub fn parse_string_component(&mut self) -> Option<StringComponent> { 113 pub fn parse_component(&mut self) -> Option<StringComponent> {
135 let next = self.peek()?; 114 let next = self.peek()?;
136 115
137 // Ignore string close 116 // Ignore string close
138 if next == '"' { 117 if next == self.quote as char {
139 return None; 118 return None;
140 } 119 }
141 120
@@ -145,18 +124,18 @@ impl<'a> Parser<'a> {
145 if next == '\\' { 124 if next == '\\' {
146 // Strings can use `\` to ignore newlines, so we first try to parse one of those 125 // Strings can use `\` to ignore newlines, so we first try to parse one of those
147 // before falling back to parsing char escapes 126 // before falling back to parsing char escapes
148 self.parse_ignore_newline(start).or_else(|| { 127 if self.quote == b'"' {
149 let char_component = self.parse_escape(start); 128 if let Some(component) = self.parse_ignore_newline(start) {
150 Some(StringComponent::new( 129 return Some(component);
151 char_component.range, 130 }
152 StringComponentKind::Char(char_component.kind), 131 }
153 )) 132
154 }) 133 Some(self.parse_escape(start))
155 } else { 134 } else {
156 let end = self.get_pos(); 135 let end = self.get_pos();
157 Some(StringComponent::new( 136 Some(StringComponent::new(
158 TextRange::from_to(start, end), 137 TextRange::from_to(start, end),
159 StringComponentKind::Char(CodePoint), 138 CodePoint,
160 )) 139 ))
161 } 140 }
162 } 141 }
@@ -177,23 +156,6 @@ impl StringComponent {
177#[derive(Debug, Eq, PartialEq, Clone)] 156#[derive(Debug, Eq, PartialEq, Clone)]
178pub enum StringComponentKind { 157pub enum StringComponentKind {
179 IgnoreNewline, 158 IgnoreNewline,
180 Char(CharComponentKind),
181}
182
183#[derive(Debug, Eq, PartialEq, Clone)]
184pub struct CharComponent {
185 pub range: TextRange,
186 pub kind: CharComponentKind,
187}
188
189impl CharComponent {
190 fn new(range: TextRange, kind: CharComponentKind) -> CharComponent {
191 CharComponent { range, kind }
192 }
193}
194
195#[derive(Debug, Eq, PartialEq, Clone)]
196pub enum CharComponentKind {
197 CodePoint, 159 CodePoint,
198 AsciiEscape, 160 AsciiEscape,
199 AsciiCodeEscape, 161 AsciiCodeEscape,
diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs
index d8351e9af..7476fea13 100644
--- a/crates/ra_syntax/src/string_lexing/string.rs
+++ b/crates/ra_syntax/src/string_lexing/string.rs
@@ -1,41 +1,82 @@
1use super::parser::Parser; 1use crate::string_lexing::{
2use super::StringComponent; 2 parser::Parser,
3 StringComponent,
4};
3 5
4pub fn parse_string_literal(src: &str) -> StringComponentIterator { 6pub fn parse_string_literal(src: &str) -> StringComponentIterator {
5 StringComponentIterator { 7 StringComponentIterator {
6 parser: Parser::new(src), 8 parser: Parser::new(src, b'"'),
7 has_closing_quote: false, 9 has_closing_quote: false,
10 prefix: None,
11 quote: b'"',
12 }
13}
14
15pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator {
16 StringComponentIterator {
17 parser: Parser::new(src, b'"'),
18 has_closing_quote: false,
19 prefix: Some(b'b'),
20 quote: b'"',
21 }
22}
23
24pub fn parse_char_literal(src: &str) -> StringComponentIterator {
25 StringComponentIterator {
26 parser: Parser::new(src, b'\''),
27 has_closing_quote: false,
28 prefix: None,
29 quote: b'\'',
30 }
31}
32
33pub fn parse_byte_literal(src: &str) -> StringComponentIterator {
34 StringComponentIterator {
35 parser: Parser::new(src, b'\''),
36 has_closing_quote: false,
37 prefix: Some(b'b'),
38 quote: b'\'',
8 } 39 }
9} 40}
10 41
11pub struct StringComponentIterator<'a> { 42pub struct StringComponentIterator<'a> {
12 parser: Parser<'a>, 43 parser: Parser<'a>,
13 pub has_closing_quote: bool, 44 pub has_closing_quote: bool,
45 prefix: Option<u8>,
46 quote: u8,
14} 47}
15 48
16impl<'a> Iterator for StringComponentIterator<'a> { 49impl<'a> Iterator for StringComponentIterator<'a> {
17 type Item = StringComponent; 50 type Item = StringComponent;
18 fn next(&mut self) -> Option<StringComponent> { 51 fn next(&mut self) -> Option<StringComponent> {
19 if self.parser.pos == 0 { 52 if self.parser.pos == 0 {
53 if let Some(prefix) = self.prefix {
54 assert!(
55 self.parser.advance() == prefix as char,
56 "literal should start with a {:?}",
57 prefix as char,
58 );
59 }
20 assert!( 60 assert!(
21 self.parser.advance() == '"', 61 self.parser.advance() == self.quote as char,
22 "string literal should start with double quotes" 62 "literal should start with a {:?}",
63 self.quote as char,
23 ); 64 );
24 } 65 }
25 66
26 if let Some(component) = self.parser.parse_string_component() { 67 if let Some(component) = self.parser.parse_component() {
27 return Some(component); 68 return Some(component);
28 } 69 }
29 70
30 // We get here when there are no char components left to parse 71 // We get here when there are no char components left to parse
31 if self.parser.peek() == Some('"') { 72 if self.parser.peek() == Some(self.quote as char) {
32 self.parser.advance(); 73 self.parser.advance();
33 self.has_closing_quote = true; 74 self.has_closing_quote = true;
34 } 75 }
35 76
36 assert!( 77 assert!(
37 self.parser.peek() == None, 78 self.parser.peek() == None,
38 "string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", 79 "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
39 self.parser.src, 80 self.parser.src,
40 self.parser.pos, 81 self.parser.pos,
41 self.parser.src.len() 82 self.parser.src.len()
@@ -44,3 +85,133 @@ impl<'a> Iterator for StringComponentIterator<'a> {
44 None 85 None
45 } 86 }
46} 87}
88
89#[cfg(test)]
90mod tests {
91 use rowan::TextRange;
92 use crate::string_lexing::{
93 StringComponent,
94 StringComponentKind::*,
95};
96
97 fn parse(src: &str) -> (bool, Vec<StringComponent>) {
98 let component_iterator = &mut super::parse_char_literal(src);
99 let components: Vec<_> = component_iterator.collect();
100 (component_iterator.has_closing_quote, components)
101 }
102
103 fn unclosed_char_component(src: &str) -> StringComponent {
104 let (has_closing_quote, components) = parse(src);
105 assert!(!has_closing_quote, "char should not have closing quote");
106 assert!(components.len() == 1);
107 components[0].clone()
108 }
109
110 fn closed_char_component(src: &str) -> StringComponent {
111 let (has_closing_quote, components) = parse(src);
112 assert!(has_closing_quote, "char should have closing quote");
113 assert!(
114 components.len() == 1,
115 "Literal: {}\nComponents: {:#?}",
116 src,
117 components
118 );
119 components[0].clone()
120 }
121
122 fn closed_char_components(src: &str) -> Vec<StringComponent> {
123 let (has_closing_quote, components) = parse(src);
124 assert!(has_closing_quote, "char should have closing quote");
125 components
126 }
127
128 fn range_closed(src: &str) -> TextRange {
129 TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
130 }
131
132 fn range_unclosed(src: &str) -> TextRange {
133 TextRange::from_to(1.into(), (src.len() as u32).into())
134 }
135
136 #[test]
137 fn test_unicode_escapes() {
138 let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
139 for escape in unicode_escapes {
140 let escape_sequence = format!(r"'\u{}'", escape);
141 let component = closed_char_component(&escape_sequence);
142 let expected_range = range_closed(&escape_sequence);
143 assert_eq!(component.kind, UnicodeEscape);
144 assert_eq!(component.range, expected_range);
145 }
146 }
147
148 #[test]
149 fn test_unicode_escapes_unclosed() {
150 let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
151 for escape in unicode_escapes {
152 let escape_sequence = format!(r"'\u{}'", escape);
153 let component = unclosed_char_component(&escape_sequence);
154 let expected_range = range_unclosed(&escape_sequence);
155 assert_eq!(component.kind, UnicodeEscape);
156 assert_eq!(component.range, expected_range);
157 }
158 }
159
160 #[test]
161 fn test_empty_char() {
162 let (has_closing_quote, components) = parse("''");
163 assert!(has_closing_quote, "char should have closing quote");
164 assert!(components.len() == 0);
165 }
166
167 #[test]
168 fn test_unclosed_char() {
169 let component = unclosed_char_component("'a");
170 assert!(component.kind == CodePoint);
171 assert!(component.range == TextRange::from_to(1.into(), 2.into()));
172 }
173
174 #[test]
175 fn test_digit_escapes() {
176 let literals = &[r"", r"5", r"55"];
177
178 for literal in literals {
179 let lit_text = format!(r"'\x{}'", literal);
180 let component = closed_char_component(&lit_text);
181 assert!(component.kind == AsciiCodeEscape);
182 assert!(component.range == range_closed(&lit_text));
183 }
184
185 // More than 2 digits starts a new codepoint
186 let components = closed_char_components(r"'\x555'");
187 assert!(components.len() == 2);
188 assert!(components[1].kind == CodePoint);
189 }
190
191 #[test]
192 fn test_ascii_escapes() {
193 let literals = &[
194 r"\'", "\\\"", // equivalent to \"
195 r"\n", r"\r", r"\t", r"\\", r"\0",
196 ];
197
198 for literal in literals {
199 let lit_text = format!("'{}'", literal);
200 let component = closed_char_component(&lit_text);
201 assert!(component.kind == AsciiEscape);
202 assert!(component.range == range_closed(&lit_text));
203 }
204 }
205
206 #[test]
207 fn test_no_escapes() {
208 let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
209
210 for &literal in literals {
211 let lit_text = format!("'{}'", literal);
212 let component = closed_char_component(&lit_text);
213 assert!(component.kind == CodePoint);
214 assert!(component.range == range_closed(&lit_text));
215 }
216 }
217}
diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs
index 43c0d7edd..e3603e761 100644
--- a/crates/ra_syntax/src/validation/byte.rs
+++ b/crates/ra_syntax/src/validation/byte.rs
@@ -2,7 +2,7 @@
2 2
3use crate::{ 3use crate::{
4 ast::{self, AstNode}, 4 ast::{self, AstNode},
5 string_lexing::{self, CharComponentKind}, 5 string_lexing::{self, StringComponentKind},
6 TextRange, 6 TextRange,
7 validation::char, 7 validation::char,
8 yellow::{ 8 yellow::{
@@ -38,11 +38,11 @@ pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>)
38 38
39pub(super) fn validate_byte_component( 39pub(super) fn validate_byte_component(
40 text: &str, 40 text: &str,
41 kind: CharComponentKind, 41 kind: StringComponentKind,
42 range: TextRange, 42 range: TextRange,
43 errors: &mut Vec<SyntaxError>, 43 errors: &mut Vec<SyntaxError>,
44) { 44) {
45 use self::CharComponentKind::*; 45 use self::StringComponentKind::*;
46 match kind { 46 match kind {
47 AsciiEscape => validate_byte_escape(text, range, errors), 47 AsciiEscape => validate_byte_escape(text, range, errors),
48 AsciiCodeEscape => validate_byte_code_escape(text, range, errors), 48 AsciiCodeEscape => validate_byte_code_escape(text, range, errors),
@@ -63,6 +63,7 @@ pub(super) fn validate_byte_component(
63 errors.push(SyntaxError::new(ByteOutOfRange, range)); 63 errors.push(SyntaxError::new(ByteOutOfRange, range));
64 } 64 }
65 } 65 }
66 IgnoreNewline => { /* always valid */ }
66 } 67 }
67} 68}
68 69
diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs
index 7b830e97c..2f98472f4 100644
--- a/crates/ra_syntax/src/validation/byte_string.rs
+++ b/crates/ra_syntax/src/validation/byte_string.rs
@@ -17,15 +17,15 @@ pub(crate) fn validate_byte_string_node(node: ast::ByteString, errors: &mut Vec<
17 let range = component.range + literal_range.start(); 17 let range = component.range + literal_range.start();
18 18
19 match component.kind { 19 match component.kind {
20 StringComponentKind::Char(kind) => { 20 StringComponentKind::IgnoreNewline => { /* always valid */ }
21 _ => {
21 // Chars must escape \t, \n and \r codepoints, but strings don't 22 // Chars must escape \t, \n and \r codepoints, but strings don't
22 let text = &literal_text[component.range]; 23 let text = &literal_text[component.range];
23 match text { 24 match text {
24 "\t" | "\n" | "\r" => { /* always valid */ } 25 "\t" | "\n" | "\r" => { /* always valid */ }
25 _ => byte::validate_byte_component(text, kind, range, errors), 26 _ => byte::validate_byte_component(text, component.kind, range, errors),
26 } 27 }
27 } 28 }
28 StringComponentKind::IgnoreNewline => { /* always valid */ }
29 } 29 }
30 } 30 }
31 31
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
index 4728c85e6..deb5b0a9e 100644
--- a/crates/ra_syntax/src/validation/char.rs
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -6,7 +6,7 @@ use arrayvec::ArrayString;
6 6
7use crate::{ 7use crate::{
8 ast::{self, AstNode}, 8 ast::{self, AstNode},
9 string_lexing::{self, CharComponentKind}, 9 string_lexing::{self, StringComponentKind},
10 TextRange, 10 TextRange,
11 yellow::{ 11 yellow::{
12 SyntaxError, 12 SyntaxError,
@@ -41,12 +41,12 @@ pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>)
41 41
42pub(super) fn validate_char_component( 42pub(super) fn validate_char_component(
43 text: &str, 43 text: &str,
44 kind: CharComponentKind, 44 kind: StringComponentKind,
45 range: TextRange, 45 range: TextRange,
46 errors: &mut Vec<SyntaxError>, 46 errors: &mut Vec<SyntaxError>,
47) { 47) {
48 // Validate escapes 48 // Validate escapes
49 use self::CharComponentKind::*; 49 use self::StringComponentKind::*;
50 match kind { 50 match kind {
51 AsciiEscape => validate_ascii_escape(text, range, errors), 51 AsciiEscape => validate_ascii_escape(text, range, errors),
52 AsciiCodeEscape => validate_ascii_code_escape(text, range, errors), 52 AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
@@ -57,6 +57,7 @@ pub(super) fn validate_char_component(
57 errors.push(SyntaxError::new(UnescapedCodepoint, range)); 57 errors.push(SyntaxError::new(UnescapedCodepoint, range));
58 } 58 }
59 } 59 }
60 StringComponentKind::IgnoreNewline => { /* always valid */ }
60 } 61 }
61} 62}
62 63
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs
index 089879d15..456180ab6 100644
--- a/crates/ra_syntax/src/validation/string.rs
+++ b/crates/ra_syntax/src/validation/string.rs
@@ -1,6 +1,6 @@
1use crate::{ 1use crate::{
2 ast::{self, AstNode}, 2 ast::{self, AstNode},
3 string_lexing::{self, StringComponentKind}, 3 string_lexing,
4 yellow::{ 4 yellow::{
5 SyntaxError, 5 SyntaxError,
6 SyntaxErrorKind::*, 6 SyntaxErrorKind::*,
@@ -16,16 +16,11 @@ pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxErr
16 for component in &mut components { 16 for component in &mut components {
17 let range = component.range + literal_range.start(); 17 let range = component.range + literal_range.start();
18 18
19 match component.kind { 19 // Chars must escape \t, \n and \r codepoints, but strings don't
20 StringComponentKind::Char(kind) => { 20 let text = &literal_text[component.range];
21 // Chars must escape \t, \n and \r codepoints, but strings don't 21 match text {
22 let text = &literal_text[component.range]; 22 "\t" | "\n" | "\r" => { /* always valid */ }
23 match text { 23 _ => char::validate_char_component(text, component.kind, range, errors),
24 "\t" | "\n" | "\r" => { /* always valid */ }
25 _ => char::validate_char_component(text, kind, range, errors),
26 }
27 }
28 StringComponentKind::IgnoreNewline => { /* always valid */ }
29 } 24 }
30 } 25 }
31 26