aboutsummaryrefslogtreecommitdiff
path: root/crates
diff options
context:
space:
mode:
Diffstat (limited to 'crates')
-rw-r--r--crates/ra_syntax/src/string_lexing.rs10
-rw-r--r--crates/ra_syntax/src/string_lexing/byte.rs51
-rw-r--r--crates/ra_syntax/src/string_lexing/byte_string.rs51
-rw-r--r--crates/ra_syntax/src/string_lexing/char.rs176
-rw-r--r--crates/ra_syntax/src/string_lexing/parser.rs92
-rw-r--r--crates/ra_syntax/src/string_lexing/string.rs197
-rw-r--r--crates/ra_syntax/src/utils.rs7
-rw-r--r--crates/ra_syntax/src/validation/byte.rs14
-rw-r--r--crates/ra_syntax/src/validation/byte_string.rs13
-rw-r--r--crates/ra_syntax/src/validation/char.rs14
-rw-r--r--crates/ra_syntax/src/validation/string.rs24
-rw-r--r--crates/ra_syntax/src/yellow/syntax_error.rs2
-rw-r--r--crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs6
-rw-r--r--crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt73
14 files changed, 354 insertions, 376 deletions
diff --git a/crates/ra_syntax/src/string_lexing.rs b/crates/ra_syntax/src/string_lexing.rs
index 94853331f..349733f3f 100644
--- a/crates/ra_syntax/src/string_lexing.rs
+++ b/crates/ra_syntax/src/string_lexing.rs
@@ -1,13 +1,7 @@
1mod parser; 1mod parser;
2mod byte;
3mod byte_string;
4mod char;
5mod string; 2mod string;
6 3
7pub use self::{ 4pub use self::{
8 byte::parse_byte_literal, 5 parser::{StringComponent, StringComponentKind},
9 byte_string::parse_byte_string_literal, 6 string::{parse_string_literal, parse_char_literal, parse_byte_literal, parse_byte_string_literal},
10 char::parse_char_literal,
11 parser::{CharComponent, CharComponentKind, StringComponent, StringComponentKind},
12 string::parse_string_literal,
13}; 7};
diff --git a/crates/ra_syntax/src/string_lexing/byte.rs b/crates/ra_syntax/src/string_lexing/byte.rs
deleted file mode 100644
index b3228d6ca..000000000
--- a/crates/ra_syntax/src/string_lexing/byte.rs
+++ /dev/null
@@ -1,51 +0,0 @@
1use super::parser::Parser;
2use super::CharComponent;
3
4pub fn parse_byte_literal(src: &str) -> ByteComponentIterator {
5 ByteComponentIterator {
6 parser: Parser::new(src),
7 has_closing_quote: false,
8 }
9}
10
11pub struct ByteComponentIterator<'a> {
12 parser: Parser<'a>,
13 pub has_closing_quote: bool,
14}
15
16impl<'a> Iterator for ByteComponentIterator<'a> {
17 type Item = CharComponent;
18 fn next(&mut self) -> Option<CharComponent> {
19 if self.parser.pos == 0 {
20 assert!(
21 self.parser.advance() == 'b',
22 "Byte literal should start with a `b`"
23 );
24
25 assert!(
26 self.parser.advance() == '\'',
27 "Byte literal should start with a `b`, followed by a quote"
28 );
29 }
30
31 if let Some(component) = self.parser.parse_char_component() {
32 return Some(component);
33 }
34
35 // We get here when there are no char components left to parse
36 if self.parser.peek() == Some('\'') {
37 self.parser.advance();
38 self.has_closing_quote = true;
39 }
40
41 assert!(
42 self.parser.peek() == None,
43 "byte literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
44 self.parser.src,
45 self.parser.pos,
46 self.parser.src.len()
47 );
48
49 None
50 }
51}
diff --git a/crates/ra_syntax/src/string_lexing/byte_string.rs b/crates/ra_syntax/src/string_lexing/byte_string.rs
deleted file mode 100644
index a6056159b..000000000
--- a/crates/ra_syntax/src/string_lexing/byte_string.rs
+++ /dev/null
@@ -1,51 +0,0 @@
1use super::parser::Parser;
2use super::StringComponent;
3
4pub fn parse_byte_string_literal(src: &str) -> ByteStringComponentIterator {
5 ByteStringComponentIterator {
6 parser: Parser::new(src),
7 has_closing_quote: false,
8 }
9}
10
11pub struct ByteStringComponentIterator<'a> {
12 parser: Parser<'a>,
13 pub has_closing_quote: bool,
14}
15
16impl<'a> Iterator for ByteStringComponentIterator<'a> {
17 type Item = StringComponent;
18 fn next(&mut self) -> Option<StringComponent> {
19 if self.parser.pos == 0 {
20 assert!(
21 self.parser.advance() == 'b',
22 "byte string literal should start with a `b`"
23 );
24
25 assert!(
26 self.parser.advance() == '"',
27 "byte string literal should start with a `b`, followed by double quotes"
28 );
29 }
30
31 if let Some(component) = self.parser.parse_string_component() {
32 return Some(component);
33 }
34
35 // We get here when there are no char components left to parse
36 if self.parser.peek() == Some('"') {
37 self.parser.advance();
38 self.has_closing_quote = true;
39 }
40
41 assert!(
42 self.parser.peek() == None,
43 "byte string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
44 self.parser.src,
45 self.parser.pos,
46 self.parser.src.len()
47 );
48
49 None
50 }
51}
diff --git a/crates/ra_syntax/src/string_lexing/char.rs b/crates/ra_syntax/src/string_lexing/char.rs
deleted file mode 100644
index e01813176..000000000
--- a/crates/ra_syntax/src/string_lexing/char.rs
+++ /dev/null
@@ -1,176 +0,0 @@
1use super::parser::Parser;
2use super::CharComponent;
3
4pub fn parse_char_literal(src: &str) -> CharComponentIterator {
5 CharComponentIterator {
6 parser: Parser::new(src),
7 has_closing_quote: false,
8 }
9}
10
11pub struct CharComponentIterator<'a> {
12 parser: Parser<'a>,
13 pub has_closing_quote: bool,
14}
15
16impl<'a> Iterator for CharComponentIterator<'a> {
17 type Item = CharComponent;
18 fn next(&mut self) -> Option<CharComponent> {
19 if self.parser.pos == 0 {
20 assert!(
21 self.parser.advance() == '\'',
22 "char literal should start with a quote"
23 );
24 }
25
26 if let Some(component) = self.parser.parse_char_component() {
27 return Some(component);
28 }
29
30 // We get here when there are no char components left to parse
31 if self.parser.peek() == Some('\'') {
32 self.parser.advance();
33 self.has_closing_quote = true;
34 }
35
36 assert!(
37 self.parser.peek() == None,
38 "char literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
39 self.parser.src,
40 self.parser.pos,
41 self.parser.src.len()
42 );
43
44 None
45 }
46}
47
48#[cfg(test)]
49mod tests {
50 use rowan::TextRange;
51 use crate::string_lexing::{
52 CharComponent,
53 CharComponentKind::*,
54};
55
56 fn parse(src: &str) -> (bool, Vec<CharComponent>) {
57 let component_iterator = &mut super::parse_char_literal(src);
58 let components: Vec<_> = component_iterator.collect();
59 (component_iterator.has_closing_quote, components)
60 }
61
62 fn unclosed_char_component(src: &str) -> CharComponent {
63 let (has_closing_quote, components) = parse(src);
64 assert!(!has_closing_quote, "char should not have closing quote");
65 assert!(components.len() == 1);
66 components[0].clone()
67 }
68
69 fn closed_char_component(src: &str) -> CharComponent {
70 let (has_closing_quote, components) = parse(src);
71 assert!(has_closing_quote, "char should have closing quote");
72 assert!(
73 components.len() == 1,
74 "Literal: {}\nComponents: {:#?}",
75 src,
76 components
77 );
78 components[0].clone()
79 }
80
81 fn closed_char_components(src: &str) -> Vec<CharComponent> {
82 let (has_closing_quote, components) = parse(src);
83 assert!(has_closing_quote, "char should have closing quote");
84 components
85 }
86
87 fn range_closed(src: &str) -> TextRange {
88 TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
89 }
90
91 fn range_unclosed(src: &str) -> TextRange {
92 TextRange::from_to(1.into(), (src.len() as u32).into())
93 }
94
95 #[test]
96 fn test_unicode_escapes() {
97 let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
98 for escape in unicode_escapes {
99 let escape_sequence = format!(r"'\u{}'", escape);
100 let component = closed_char_component(&escape_sequence);
101 let expected_range = range_closed(&escape_sequence);
102 assert_eq!(component.kind, UnicodeEscape);
103 assert_eq!(component.range, expected_range);
104 }
105 }
106
107 #[test]
108 fn test_unicode_escapes_unclosed() {
109 let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
110 for escape in unicode_escapes {
111 let escape_sequence = format!(r"'\u{}'", escape);
112 let component = unclosed_char_component(&escape_sequence);
113 let expected_range = range_unclosed(&escape_sequence);
114 assert_eq!(component.kind, UnicodeEscape);
115 assert_eq!(component.range, expected_range);
116 }
117 }
118
119 #[test]
120 fn test_empty_char() {
121 let (has_closing_quote, components) = parse("''");
122 assert!(has_closing_quote, "char should have closing quote");
123 assert!(components.len() == 0);
124 }
125
126 #[test]
127 fn test_unclosed_char() {
128 let component = unclosed_char_component("'a");
129 assert!(component.kind == CodePoint);
130 assert!(component.range == TextRange::from_to(1.into(), 2.into()));
131 }
132
133 #[test]
134 fn test_digit_escapes() {
135 let literals = &[r"", r"5", r"55"];
136
137 for literal in literals {
138 let lit_text = format!(r"'\x{}'", literal);
139 let component = closed_char_component(&lit_text);
140 assert!(component.kind == AsciiCodeEscape);
141 assert!(component.range == range_closed(&lit_text));
142 }
143
144 // More than 2 digits starts a new codepoint
145 let components = closed_char_components(r"'\x555'");
146 assert!(components.len() == 2);
147 assert!(components[1].kind == CodePoint);
148 }
149
150 #[test]
151 fn test_ascii_escapes() {
152 let literals = &[
153 r"\'", "\\\"", // equivalent to \"
154 r"\n", r"\r", r"\t", r"\\", r"\0",
155 ];
156
157 for literal in literals {
158 let lit_text = format!("'{}'", literal);
159 let component = closed_char_component(&lit_text);
160 assert!(component.kind == AsciiEscape);
161 assert!(component.range == range_closed(&lit_text));
162 }
163 }
164
165 #[test]
166 fn test_no_escapes() {
167 let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
168
169 for &literal in literals {
170 let lit_text = format!("'{}'", literal);
171 let component = closed_char_component(&lit_text);
172 assert!(component.kind == CodePoint);
173 assert!(component.range == range_closed(&lit_text));
174 }
175 }
176}
diff --git a/crates/ra_syntax/src/string_lexing/parser.rs b/crates/ra_syntax/src/string_lexing/parser.rs
index 4a6d5bc93..14c6015c2 100644
--- a/crates/ra_syntax/src/string_lexing/parser.rs
+++ b/crates/ra_syntax/src/string_lexing/parser.rs
@@ -1,15 +1,16 @@
1use rowan::{TextRange, TextUnit}; 1use rowan::{TextRange, TextUnit};
2 2
3use self::CharComponentKind::*; 3use self::StringComponentKind::*;
4 4
5pub struct Parser<'a> { 5pub struct Parser<'a> {
6 pub(super) quote: u8,
6 pub(super) src: &'a str, 7 pub(super) src: &'a str,
7 pub(super) pos: usize, 8 pub(super) pos: usize,
8} 9}
9 10
10impl<'a> Parser<'a> { 11impl<'a> Parser<'a> {
11 pub fn new(src: &'a str) -> Parser<'a> { 12 pub fn new(src: &'a str, quote: u8) -> Parser<'a> {
12 Parser { src, pos: 0 } 13 Parser { quote, src, pos: 0 }
13 } 14 }
14 15
15 // Utility methods 16 // Utility methods
@@ -42,7 +43,7 @@ impl<'a> Parser<'a> {
42 43
43 // Char parsing methods 44 // Char parsing methods
44 45
45 fn parse_unicode_escape(&mut self, start: TextUnit) -> CharComponent { 46 fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent {
46 match self.peek() { 47 match self.peek() {
47 Some('{') => { 48 Some('{') => {
48 self.advance(); 49 self.advance();
@@ -56,16 +57,16 @@ impl<'a> Parser<'a> {
56 } 57 }
57 58
58 let end = self.get_pos(); 59 let end = self.get_pos();
59 CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) 60 StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
60 } 61 }
61 Some(_) | None => { 62 Some(_) | None => {
62 let end = self.get_pos(); 63 let end = self.get_pos();
63 CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) 64 StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
64 } 65 }
65 } 66 }
66 } 67 }
67 68
68 fn parse_ascii_code_escape(&mut self, start: TextUnit) -> CharComponent { 69 fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent {
69 let code_start = self.get_pos(); 70 let code_start = self.get_pos();
70 while let Some(next) = self.peek() { 71 while let Some(next) = self.peek() {
71 if next == '\'' || (self.get_pos() - code_start == 2.into()) { 72 if next == '\'' || (self.get_pos() - code_start == 2.into()) {
@@ -76,12 +77,12 @@ impl<'a> Parser<'a> {
76 } 77 }
77 78
78 let end = self.get_pos(); 79 let end = self.get_pos();
79 CharComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) 80 StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape)
80 } 81 }
81 82
82 fn parse_escape(&mut self, start: TextUnit) -> CharComponent { 83 fn parse_escape(&mut self, start: TextUnit) -> StringComponent {
83 if self.peek().is_none() { 84 if self.peek().is_none() {
84 return CharComponent::new(TextRange::from_to(start, start), AsciiEscape); 85 return StringComponent::new(TextRange::from_to(start, start), AsciiEscape);
85 } 86 }
86 87
87 let next = self.advance(); 88 let next = self.advance();
@@ -90,29 +91,7 @@ impl<'a> Parser<'a> {
90 match next { 91 match next {
91 'x' => self.parse_ascii_code_escape(start), 92 'x' => self.parse_ascii_code_escape(start),
92 'u' => self.parse_unicode_escape(start), 93 'u' => self.parse_unicode_escape(start),
93 _ => CharComponent::new(range, AsciiEscape), 94 _ => StringComponent::new(range, AsciiEscape),
94 }
95 }
96
97 pub fn parse_char_component(&mut self) -> Option<CharComponent> {
98 let next = self.peek()?;
99
100 // Ignore character close
101 if next == '\'' {
102 return None;
103 }
104
105 let start = self.get_pos();
106 self.advance();
107
108 if next == '\\' {
109 Some(self.parse_escape(start))
110 } else {
111 let end = self.get_pos();
112 Some(CharComponent::new(
113 TextRange::from_to(start, end),
114 CodePoint,
115 ))
116 } 95 }
117 } 96 }
118 97
@@ -131,11 +110,11 @@ impl<'a> Parser<'a> {
131 } 110 }
132 } 111 }
133 112
134 pub fn parse_string_component(&mut self) -> Option<StringComponent> { 113 pub fn parse_component(&mut self) -> Option<StringComponent> {
135 let next = self.peek()?; 114 let next = self.peek()?;
136 115
137 // Ignore string close 116 // Ignore string close
138 if next == '"' { 117 if next == self.quote as char {
139 return None; 118 return None;
140 } 119 }
141 120
@@ -145,21 +124,31 @@ impl<'a> Parser<'a> {
145 if next == '\\' { 124 if next == '\\' {
146 // Strings can use `\` to ignore newlines, so we first try to parse one of those 125 // Strings can use `\` to ignore newlines, so we first try to parse one of those
147 // before falling back to parsing char escapes 126 // before falling back to parsing char escapes
148 self.parse_ignore_newline(start).or_else(|| { 127 if self.quote == b'"' {
149 let char_component = self.parse_escape(start); 128 if let Some(component) = self.parse_ignore_newline(start) {
150 Some(StringComponent::new( 129 return Some(component);
151 char_component.range, 130 }
152 StringComponentKind::Char(char_component.kind), 131 }
153 )) 132
154 }) 133 Some(self.parse_escape(start))
155 } else { 134 } else {
156 let end = self.get_pos(); 135 let end = self.get_pos();
157 Some(StringComponent::new( 136 Some(StringComponent::new(
158 TextRange::from_to(start, end), 137 TextRange::from_to(start, end),
159 StringComponentKind::Char(CodePoint), 138 CodePoint,
160 )) 139 ))
161 } 140 }
162 } 141 }
142
143 pub fn parse_suffix(&mut self) -> Option<TextRange> {
144 let start = self.get_pos();
145 let _ = self.peek()?;
146 while let Some(_) = self.peek() {
147 self.advance();
148 }
149 let end = self.get_pos();
150 Some(TextRange::from_to(start, end))
151 }
163} 152}
164 153
165#[derive(Debug, Eq, PartialEq, Clone)] 154#[derive(Debug, Eq, PartialEq, Clone)]
@@ -177,23 +166,6 @@ impl StringComponent {
177#[derive(Debug, Eq, PartialEq, Clone)] 166#[derive(Debug, Eq, PartialEq, Clone)]
178pub enum StringComponentKind { 167pub enum StringComponentKind {
179 IgnoreNewline, 168 IgnoreNewline,
180 Char(CharComponentKind),
181}
182
183#[derive(Debug, Eq, PartialEq, Clone)]
184pub struct CharComponent {
185 pub range: TextRange,
186 pub kind: CharComponentKind,
187}
188
189impl CharComponent {
190 fn new(range: TextRange, kind: CharComponentKind) -> CharComponent {
191 CharComponent { range, kind }
192 }
193}
194
195#[derive(Debug, Eq, PartialEq, Clone)]
196pub enum CharComponentKind {
197 CodePoint, 169 CodePoint,
198 AsciiEscape, 170 AsciiEscape,
199 AsciiCodeEscape, 171 AsciiCodeEscape,
diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs
index d8351e9af..064f08544 100644
--- a/crates/ra_syntax/src/string_lexing/string.rs
+++ b/crates/ra_syntax/src/string_lexing/string.rs
@@ -1,41 +1,92 @@
1use super::parser::Parser; 1use crate::{
2use super::StringComponent; 2 TextRange,
3 string_lexing::{
4 parser::Parser,
5 StringComponent,
6}};
3 7
4pub fn parse_string_literal(src: &str) -> StringComponentIterator { 8pub fn parse_string_literal(src: &str) -> StringComponentIterator {
5 StringComponentIterator { 9 StringComponentIterator {
6 parser: Parser::new(src), 10 parser: Parser::new(src, b'"'),
7 has_closing_quote: false, 11 has_closing_quote: false,
12 suffix: None,
13 prefix: None,
14 quote: b'"',
15 }
16}
17
18pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator {
19 StringComponentIterator {
20 parser: Parser::new(src, b'"'),
21 has_closing_quote: false,
22 suffix: None,
23 prefix: Some(b'b'),
24 quote: b'"',
25 }
26}
27
28pub fn parse_char_literal(src: &str) -> StringComponentIterator {
29 StringComponentIterator {
30 parser: Parser::new(src, b'\''),
31 has_closing_quote: false,
32 suffix: None,
33 prefix: None,
34 quote: b'\'',
35 }
36}
37
38pub fn parse_byte_literal(src: &str) -> StringComponentIterator {
39 StringComponentIterator {
40 parser: Parser::new(src, b'\''),
41 has_closing_quote: false,
42 suffix: None,
43 prefix: Some(b'b'),
44 quote: b'\'',
8 } 45 }
9} 46}
10 47
11pub struct StringComponentIterator<'a> { 48pub struct StringComponentIterator<'a> {
12 parser: Parser<'a>, 49 parser: Parser<'a>,
13 pub has_closing_quote: bool, 50 pub has_closing_quote: bool,
51 pub suffix: Option<TextRange>,
52 prefix: Option<u8>,
53 quote: u8,
14} 54}
15 55
16impl<'a> Iterator for StringComponentIterator<'a> { 56impl<'a> Iterator for StringComponentIterator<'a> {
17 type Item = StringComponent; 57 type Item = StringComponent;
18 fn next(&mut self) -> Option<StringComponent> { 58 fn next(&mut self) -> Option<StringComponent> {
19 if self.parser.pos == 0 { 59 if self.parser.pos == 0 {
60 if let Some(prefix) = self.prefix {
61 assert!(
62 self.parser.advance() == prefix as char,
63 "literal should start with a {:?}",
64 prefix as char,
65 );
66 }
20 assert!( 67 assert!(
21 self.parser.advance() == '"', 68 self.parser.advance() == self.quote as char,
22 "string literal should start with double quotes" 69 "literal should start with a {:?}",
70 self.quote as char,
23 ); 71 );
24 } 72 }
25 73
26 if let Some(component) = self.parser.parse_string_component() { 74 if let Some(component) = self.parser.parse_component() {
27 return Some(component); 75 return Some(component);
28 } 76 }
29 77
30 // We get here when there are no char components left to parse 78 // We get here when there are no char components left to parse
31 if self.parser.peek() == Some('"') { 79 if self.parser.peek() == Some(self.quote as char) {
32 self.parser.advance(); 80 self.parser.advance();
33 self.has_closing_quote = true; 81 self.has_closing_quote = true;
82 if let Some(range) = self.parser.parse_suffix() {
83 self.suffix = Some(range);
84 }
34 } 85 }
35 86
36 assert!( 87 assert!(
37 self.parser.peek() == None, 88 self.parser.peek() == None,
38 "string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", 89 "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}",
39 self.parser.src, 90 self.parser.src,
40 self.parser.pos, 91 self.parser.pos,
41 self.parser.src.len() 92 self.parser.src.len()
@@ -44,3 +95,133 @@ impl<'a> Iterator for StringComponentIterator<'a> {
44 None 95 None
45 } 96 }
46} 97}
98
99#[cfg(test)]
100mod tests {
101 use rowan::TextRange;
102 use crate::string_lexing::{
103 StringComponent,
104 StringComponentKind::*,
105};
106
107 fn parse(src: &str) -> (bool, Vec<StringComponent>) {
108 let component_iterator = &mut super::parse_char_literal(src);
109 let components: Vec<_> = component_iterator.collect();
110 (component_iterator.has_closing_quote, components)
111 }
112
113 fn unclosed_char_component(src: &str) -> StringComponent {
114 let (has_closing_quote, components) = parse(src);
115 assert!(!has_closing_quote, "char should not have closing quote");
116 assert!(components.len() == 1);
117 components[0].clone()
118 }
119
120 fn closed_char_component(src: &str) -> StringComponent {
121 let (has_closing_quote, components) = parse(src);
122 assert!(has_closing_quote, "char should have closing quote");
123 assert!(
124 components.len() == 1,
125 "Literal: {}\nComponents: {:#?}",
126 src,
127 components
128 );
129 components[0].clone()
130 }
131
132 fn closed_char_components(src: &str) -> Vec<StringComponent> {
133 let (has_closing_quote, components) = parse(src);
134 assert!(has_closing_quote, "char should have closing quote");
135 components
136 }
137
138 fn range_closed(src: &str) -> TextRange {
139 TextRange::from_to(1.into(), (src.len() as u32 - 1).into())
140 }
141
142 fn range_unclosed(src: &str) -> TextRange {
143 TextRange::from_to(1.into(), (src.len() as u32).into())
144 }
145
146 #[test]
147 fn test_unicode_escapes() {
148 let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""];
149 for escape in unicode_escapes {
150 let escape_sequence = format!(r"'\u{}'", escape);
151 let component = closed_char_component(&escape_sequence);
152 let expected_range = range_closed(&escape_sequence);
153 assert_eq!(component.kind, UnicodeEscape);
154 assert_eq!(component.range, expected_range);
155 }
156 }
157
158 #[test]
159 fn test_unicode_escapes_unclosed() {
160 let unicode_escapes = &["{DEAD", "{BEEF", "{FF"];
161 for escape in unicode_escapes {
162 let escape_sequence = format!(r"'\u{}'", escape);
163 let component = unclosed_char_component(&escape_sequence);
164 let expected_range = range_unclosed(&escape_sequence);
165 assert_eq!(component.kind, UnicodeEscape);
166 assert_eq!(component.range, expected_range);
167 }
168 }
169
170 #[test]
171 fn test_empty_char() {
172 let (has_closing_quote, components) = parse("''");
173 assert!(has_closing_quote, "char should have closing quote");
174 assert!(components.len() == 0);
175 }
176
177 #[test]
178 fn test_unclosed_char() {
179 let component = unclosed_char_component("'a");
180 assert!(component.kind == CodePoint);
181 assert!(component.range == TextRange::from_to(1.into(), 2.into()));
182 }
183
184 #[test]
185 fn test_digit_escapes() {
186 let literals = &[r"", r"5", r"55"];
187
188 for literal in literals {
189 let lit_text = format!(r"'\x{}'", literal);
190 let component = closed_char_component(&lit_text);
191 assert!(component.kind == AsciiCodeEscape);
192 assert!(component.range == range_closed(&lit_text));
193 }
194
195 // More than 2 digits starts a new codepoint
196 let components = closed_char_components(r"'\x555'");
197 assert!(components.len() == 2);
198 assert!(components[1].kind == CodePoint);
199 }
200
201 #[test]
202 fn test_ascii_escapes() {
203 let literals = &[
204 r"\'", "\\\"", // equivalent to \"
205 r"\n", r"\r", r"\t", r"\\", r"\0",
206 ];
207
208 for literal in literals {
209 let lit_text = format!("'{}'", literal);
210 let component = closed_char_component(&lit_text);
211 assert!(component.kind == AsciiEscape);
212 assert!(component.range == range_closed(&lit_text));
213 }
214 }
215
216 #[test]
217 fn test_no_escapes() {
218 let literals = &['"', 'n', 'r', 't', '0', 'x', 'u'];
219
220 for &literal in literals {
221 let lit_text = format!("'{}'", literal);
222 let component = closed_char_component(&lit_text);
223 assert!(component.kind == CodePoint);
224 assert!(component.range == range_closed(&lit_text));
225 }
226 }
227}
diff --git a/crates/ra_syntax/src/utils.rs b/crates/ra_syntax/src/utils.rs
index 5bbdf80bb..0a2b6afbc 100644
--- a/crates/ra_syntax/src/utils.rs
+++ b/crates/ra_syntax/src/utils.rs
@@ -1,10 +1,13 @@
1use crate::{SourceFileNode, SyntaxKind, SyntaxNodeRef, WalkEvent}; 1use crate::{SourceFileNode, SyntaxKind, SyntaxNodeRef, WalkEvent, AstNode};
2use std::fmt::Write; 2use std::fmt::Write;
3use std::str; 3use std::str;
4 4
5/// Parse a file and create a string representation of the resulting parse tree. 5/// Parse a file and create a string representation of the resulting parse tree.
6pub fn dump_tree(syntax: SyntaxNodeRef) -> String { 6pub fn dump_tree(syntax: SyntaxNodeRef) -> String {
7 let mut errors: Vec<_> = syntax.root_data().to_vec(); 7 let mut errors: Vec<_> = match syntax.ancestors().find_map(SourceFileNode::cast) {
8 Some(file) => file.owned().errors(),
9 None => syntax.root_data().to_vec(),
10 };
8 errors.sort_by_key(|e| e.offset()); 11 errors.sort_by_key(|e| e.offset());
9 let mut err_pos = 0; 12 let mut err_pos = 0;
10 let mut level = 0; 13 let mut level = 0;
diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs
index 43c0d7edd..d0897eeed 100644
--- a/crates/ra_syntax/src/validation/byte.rs
+++ b/crates/ra_syntax/src/validation/byte.rs
@@ -2,7 +2,7 @@
2 2
3use crate::{ 3use crate::{
4 ast::{self, AstNode}, 4 ast::{self, AstNode},
5 string_lexing::{self, CharComponentKind}, 5 string_lexing::{self, StringComponentKind},
6 TextRange, 6 TextRange,
7 validation::char, 7 validation::char,
8 yellow::{ 8 yellow::{
@@ -27,6 +27,13 @@ pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>)
27 errors.push(SyntaxError::new(UnclosedByte, literal_range)); 27 errors.push(SyntaxError::new(UnclosedByte, literal_range));
28 } 28 }
29 29
30 if let Some(range) = components.suffix {
31 errors.push(SyntaxError::new(
32 InvalidSuffix,
33 range + literal_range.start(),
34 ));
35 }
36
30 if len == 0 { 37 if len == 0 {
31 errors.push(SyntaxError::new(EmptyByte, literal_range)); 38 errors.push(SyntaxError::new(EmptyByte, literal_range));
32 } 39 }
@@ -38,11 +45,11 @@ pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec<SyntaxError>)
38 45
39pub(super) fn validate_byte_component( 46pub(super) fn validate_byte_component(
40 text: &str, 47 text: &str,
41 kind: CharComponentKind, 48 kind: StringComponentKind,
42 range: TextRange, 49 range: TextRange,
43 errors: &mut Vec<SyntaxError>, 50 errors: &mut Vec<SyntaxError>,
44) { 51) {
45 use self::CharComponentKind::*; 52 use self::StringComponentKind::*;
46 match kind { 53 match kind {
47 AsciiEscape => validate_byte_escape(text, range, errors), 54 AsciiEscape => validate_byte_escape(text, range, errors),
48 AsciiCodeEscape => validate_byte_code_escape(text, range, errors), 55 AsciiCodeEscape => validate_byte_code_escape(text, range, errors),
@@ -63,6 +70,7 @@ pub(super) fn validate_byte_component(
63 errors.push(SyntaxError::new(ByteOutOfRange, range)); 70 errors.push(SyntaxError::new(ByteOutOfRange, range));
64 } 71 }
65 } 72 }
73 IgnoreNewline => { /* always valid */ }
66 } 74 }
67} 75}
68 76
diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs
index 7b830e97c..f7a4fb156 100644
--- a/crates/ra_syntax/src/validation/byte_string.rs
+++ b/crates/ra_syntax/src/validation/byte_string.rs
@@ -17,21 +17,28 @@ pub(crate) fn validate_byte_string_node(node: ast::ByteString, errors: &mut Vec<
17 let range = component.range + literal_range.start(); 17 let range = component.range + literal_range.start();
18 18
19 match component.kind { 19 match component.kind {
20 StringComponentKind::Char(kind) => { 20 StringComponentKind::IgnoreNewline => { /* always valid */ }
21 _ => {
21 // Chars must escape \t, \n and \r codepoints, but strings don't 22 // Chars must escape \t, \n and \r codepoints, but strings don't
22 let text = &literal_text[component.range]; 23 let text = &literal_text[component.range];
23 match text { 24 match text {
24 "\t" | "\n" | "\r" => { /* always valid */ } 25 "\t" | "\n" | "\r" => { /* always valid */ }
25 _ => byte::validate_byte_component(text, kind, range, errors), 26 _ => byte::validate_byte_component(text, component.kind, range, errors),
26 } 27 }
27 } 28 }
28 StringComponentKind::IgnoreNewline => { /* always valid */ }
29 } 29 }
30 } 30 }
31 31
32 if !components.has_closing_quote { 32 if !components.has_closing_quote {
33 errors.push(SyntaxError::new(UnclosedString, literal_range)); 33 errors.push(SyntaxError::new(UnclosedString, literal_range));
34 } 34 }
35
36 if let Some(range) = components.suffix {
37 errors.push(SyntaxError::new(
38 InvalidSuffix,
39 range + literal_range.start(),
40 ));
41 }
35} 42}
36 43
37#[cfg(test)] 44#[cfg(test)]
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
index 4728c85e6..19cd3830f 100644
--- a/crates/ra_syntax/src/validation/char.rs
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -6,7 +6,7 @@ use arrayvec::ArrayString;
6 6
7use crate::{ 7use crate::{
8 ast::{self, AstNode}, 8 ast::{self, AstNode},
9 string_lexing::{self, CharComponentKind}, 9 string_lexing::{self, StringComponentKind},
10 TextRange, 10 TextRange,
11 yellow::{ 11 yellow::{
12 SyntaxError, 12 SyntaxError,
@@ -30,6 +30,13 @@ pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>)
30 errors.push(SyntaxError::new(UnclosedChar, literal_range)); 30 errors.push(SyntaxError::new(UnclosedChar, literal_range));
31 } 31 }
32 32
33 if let Some(range) = components.suffix {
34 errors.push(SyntaxError::new(
35 InvalidSuffix,
36 range + literal_range.start(),
37 ));
38 }
39
33 if len == 0 { 40 if len == 0 {
34 errors.push(SyntaxError::new(EmptyChar, literal_range)); 41 errors.push(SyntaxError::new(EmptyChar, literal_range));
35 } 42 }
@@ -41,12 +48,12 @@ pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>)
41 48
42pub(super) fn validate_char_component( 49pub(super) fn validate_char_component(
43 text: &str, 50 text: &str,
44 kind: CharComponentKind, 51 kind: StringComponentKind,
45 range: TextRange, 52 range: TextRange,
46 errors: &mut Vec<SyntaxError>, 53 errors: &mut Vec<SyntaxError>,
47) { 54) {
48 // Validate escapes 55 // Validate escapes
49 use self::CharComponentKind::*; 56 use self::StringComponentKind::*;
50 match kind { 57 match kind {
51 AsciiEscape => validate_ascii_escape(text, range, errors), 58 AsciiEscape => validate_ascii_escape(text, range, errors),
52 AsciiCodeEscape => validate_ascii_code_escape(text, range, errors), 59 AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
@@ -57,6 +64,7 @@ pub(super) fn validate_char_component(
57 errors.push(SyntaxError::new(UnescapedCodepoint, range)); 64 errors.push(SyntaxError::new(UnescapedCodepoint, range));
58 } 65 }
59 } 66 }
67 StringComponentKind::IgnoreNewline => { /* always valid */ }
60 } 68 }
61} 69}
62 70
diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs
index 089879d15..1371bb1f0 100644
--- a/crates/ra_syntax/src/validation/string.rs
+++ b/crates/ra_syntax/src/validation/string.rs
@@ -1,6 +1,6 @@
1use crate::{ 1use crate::{
2 ast::{self, AstNode}, 2 ast::{self, AstNode},
3 string_lexing::{self, StringComponentKind}, 3 string_lexing,
4 yellow::{ 4 yellow::{
5 SyntaxError, 5 SyntaxError,
6 SyntaxErrorKind::*, 6 SyntaxErrorKind::*,
@@ -16,22 +16,24 @@ pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec<SyntaxErr
16 for component in &mut components { 16 for component in &mut components {
17 let range = component.range + literal_range.start(); 17 let range = component.range + literal_range.start();
18 18
19 match component.kind { 19 // Chars must escape \t, \n and \r codepoints, but strings don't
20 StringComponentKind::Char(kind) => { 20 let text = &literal_text[component.range];
21 // Chars must escape \t, \n and \r codepoints, but strings don't 21 match text {
22 let text = &literal_text[component.range]; 22 "\t" | "\n" | "\r" => { /* always valid */ }
23 match text { 23 _ => char::validate_char_component(text, component.kind, range, errors),
24 "\t" | "\n" | "\r" => { /* always valid */ }
25 _ => char::validate_char_component(text, kind, range, errors),
26 }
27 }
28 StringComponentKind::IgnoreNewline => { /* always valid */ }
29 } 24 }
30 } 25 }
31 26
32 if !components.has_closing_quote { 27 if !components.has_closing_quote {
33 errors.push(SyntaxError::new(UnclosedString, literal_range)); 28 errors.push(SyntaxError::new(UnclosedString, literal_range));
34 } 29 }
30
31 if let Some(range) = components.suffix {
32 errors.push(SyntaxError::new(
33 InvalidSuffix,
34 range + literal_range.start(),
35 ));
36 }
35} 37}
36 38
37#[cfg(test)] 39#[cfg(test)]
diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs
index c32ee650d..534f3511e 100644
--- a/crates/ra_syntax/src/yellow/syntax_error.rs
+++ b/crates/ra_syntax/src/yellow/syntax_error.rs
@@ -93,6 +93,7 @@ pub enum SyntaxErrorKind {
93 OverlongUnicodeEscape, 93 OverlongUnicodeEscape,
94 UnicodeEscapeOutOfRange, 94 UnicodeEscapeOutOfRange,
95 UnclosedString, 95 UnclosedString,
96 InvalidSuffix,
96} 97}
97 98
98#[derive(Debug, Clone, PartialEq, Eq, Hash)] 99#[derive(Debug, Clone, PartialEq, Eq, Hash)]
@@ -134,6 +135,7 @@ impl fmt::Display for SyntaxErrorKind {
134 } 135 }
135 UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"), 136 UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"),
136 UnclosedString => write!(f, "Unclosed string literal"), 137 UnclosedString => write!(f, "Unclosed string literal"),
138 InvalidSuffix => write!(f, "Invalid literal suffix"),
137 ParseError(msg) => write!(f, "{}", msg.0), 139 ParseError(msg) => write!(f, "{}", msg.0),
138 } 140 }
139 } 141 }
diff --git a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs b/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs
new file mode 100644
index 000000000..261aad1fb
--- /dev/null
+++ b/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs
@@ -0,0 +1,6 @@
1fn main() {
2 let _ = 'c'u32;
3 let _ = "string"invalid;
4 let _ = b'b'_suff;
5 let _ = b"bs"invalid;
6}
diff --git a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt b/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt
new file mode 100644
index 000000000..dc10e9ee7
--- /dev/null
+++ b/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt
@@ -0,0 +1,73 @@
1SOURCE_FILE@[0; 112)
2 FN_DEF@[0; 111)
3 FN_KW@[0; 2)
4 WHITESPACE@[2; 3)
5 NAME@[3; 7)
6 IDENT@[3; 7) "main"
7 PARAM_LIST@[7; 9)
8 L_PAREN@[7; 8)
9 R_PAREN@[8; 9)
10 WHITESPACE@[9; 10)
11 BLOCK@[10; 111)
12 L_CURLY@[10; 11)
13 WHITESPACE@[11; 16)
14 LET_STMT@[16; 27)
15 LET_KW@[16; 19)
16 WHITESPACE@[19; 20)
17 PLACEHOLDER_PAT@[20; 21)
18 UNDERSCORE@[20; 21)
19 WHITESPACE@[21; 22)
20 EQ@[22; 23)
21 WHITESPACE@[23; 24)
22 LITERAL@[24; 27)
23 CHAR@[24; 27)
24 err: `expected SEMI`
25 EXPR_STMT@[27; 31)
26 PATH_EXPR@[27; 30)
27 PATH@[27; 30)
28 PATH_SEGMENT@[27; 30)
29 NAME_REF@[27; 30)
30 IDENT@[27; 30) "u32"
31 SEMI@[30; 31)
32 WHITESPACE@[31; 36)
33 LET_STMT@[36; 60)
34 LET_KW@[36; 39)
35 WHITESPACE@[39; 40)
36 PLACEHOLDER_PAT@[40; 41)
37 UNDERSCORE@[40; 41)
38 WHITESPACE@[41; 42)
39 EQ@[42; 43)
40 WHITESPACE@[43; 44)
41 LITERAL@[44; 59)
42 STRING@[44; 59)
43 err: `Invalid literal suffix`
44 SEMI@[59; 60)
45 WHITESPACE@[60; 65)
46 LET_STMT@[65; 83)
47 LET_KW@[65; 68)
48 WHITESPACE@[68; 69)
49 PLACEHOLDER_PAT@[69; 70)
50 UNDERSCORE@[69; 70)
51 WHITESPACE@[70; 71)
52 EQ@[71; 72)
53 WHITESPACE@[72; 73)
54 LITERAL@[73; 82)
55 BYTE@[73; 82)
56 err: `Invalid literal suffix`
57 SEMI@[82; 83)
58 WHITESPACE@[83; 88)
59 LET_STMT@[88; 109)
60 LET_KW@[88; 91)
61 WHITESPACE@[91; 92)
62 PLACEHOLDER_PAT@[92; 93)
63 UNDERSCORE@[92; 93)
64 WHITESPACE@[93; 94)
65 EQ@[94; 95)
66 WHITESPACE@[95; 96)
67 LITERAL@[96; 108)
68 BYTE_STRING@[96; 108)
69 err: `Invalid literal suffix`
70 SEMI@[108; 109)
71 WHITESPACE@[109; 110)
72 R_CURLY@[110; 111)
73 WHITESPACE@[111; 112)