aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/parsing
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_syntax/src/parsing')
-rw-r--r--crates/ra_syntax/src/parsing/lexer.rs92
-rw-r--r--crates/ra_syntax/src/parsing/reparsing.rs84
2 files changed, 93 insertions, 83 deletions
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index f889e6a1d..f2684c852 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -2,7 +2,7 @@
2//! It is just a bridge to `rustc_lexer`. 2//! It is just a bridge to `rustc_lexer`.
3 3
4use crate::{ 4use crate::{
5 SyntaxError, SyntaxErrorKind, 5 SyntaxError,
6 SyntaxKind::{self, *}, 6 SyntaxKind::{self, *},
7 TextRange, TextUnit, 7 TextRange, TextUnit,
8}; 8};
@@ -41,13 +41,13 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
41 let token_len = TextUnit::from_usize(rustc_token.len); 41 let token_len = TextUnit::from_usize(rustc_token.len);
42 let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); 42 let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);
43 43
44 let (syntax_kind, error) = 44 let (syntax_kind, err_message) =
45 rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); 45 rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
46 46
47 tokens.push(Token { kind: syntax_kind, len: token_len }); 47 tokens.push(Token { kind: syntax_kind, len: token_len });
48 48
49 if let Some(error) = error { 49 if let Some(err_message) = err_message {
50 errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)); 50 errors.push(SyntaxError::new(err_message, token_range));
51 } 51 }
52 52
53 offset += rustc_token.len; 53 offset += rustc_token.len;
@@ -94,61 +94,21 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
94 } 94 }
95 95
96 let rustc_token = rustc_lexer::first_token(text); 96 let rustc_token = rustc_lexer::first_token(text);
97 let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); 97 let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
98 98
99 let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; 99 let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };
100 let error = error.map(|error| { 100 let optional_error = err_message.map(|err_message| {
101 SyntaxError::new( 101 SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text)))
102 SyntaxErrorKind::TokenizeError(error),
103 TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)),
104 )
105 }); 102 });
106 103
107 Some((token, error)) 104 Some((token, optional_error))
108}
109
110// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice:
111// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067
112
113/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
114/// It describes all the types of errors that may happen during the tokenization
115/// of Rust source.
116#[derive(Debug, Clone, PartialEq, Eq, Hash)]
117pub enum TokenizeError {
118 /// Base prefix was provided, but there were no digits
119 /// after it, e.g. `0x`, `0b`.
120 EmptyInt,
121 /// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`,
122 EmptyExponent,
123
124 /// Block comment lacks trailing delimiter `*/`
125 UnterminatedBlockComment,
126 /// Character literal lacks trailing delimiter `'`
127 UnterminatedChar,
128 /// Characterish byte literal lacks trailing delimiter `'`
129 UnterminatedByte,
130 /// String literal lacks trailing delimiter `"`
131 UnterminatedString,
132 /// Byte string literal lacks trailing delimiter `"`
133 UnterminatedByteString,
134 /// Raw literal lacks trailing delimiter e.g. `"##`
135 UnterminatedRawString,
136 /// Raw byte string literal lacks trailing delimiter e.g. `"##`
137 UnterminatedRawByteString,
138
139 /// Raw string lacks a quote after the pound characters e.g. `r###`
140 UnstartedRawString,
141 /// Raw byte string lacks a quote after the pound characters e.g. `br###`
142 UnstartedRawByteString,
143
144 /// Lifetime starts with a number e.g. `'4ever`
145 LifetimeStartsWithNumber,
146} 105}
147 106
107/// Returns `SyntaxKind` and an optional tokenize error message.
148fn rustc_token_kind_to_syntax_kind( 108fn rustc_token_kind_to_syntax_kind(
149 rustc_token_kind: &rustc_lexer::TokenKind, 109 rustc_token_kind: &rustc_lexer::TokenKind,
150 token_text: &str, 110 token_text: &str,
151) -> (SyntaxKind, Option<TokenizeError>) { 111) -> (SyntaxKind, Option<&'static str>) {
152 // A note on an intended tradeoff: 112 // A note on an intended tradeoff:
153 // We drop some useful infromation here (see patterns with double dots `..`) 113 // We drop some useful infromation here (see patterns with double dots `..`)
154 // Storing that info in `SyntaxKind` is not possible due to its layout requirements of 114 // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
@@ -156,14 +116,15 @@ fn rustc_token_kind_to_syntax_kind(
156 116
157 let syntax_kind = { 117 let syntax_kind = {
158 use rustc_lexer::TokenKind as TK; 118 use rustc_lexer::TokenKind as TK;
159 use TokenizeError as TE;
160
161 match rustc_token_kind { 119 match rustc_token_kind {
162 TK::LineComment => COMMENT, 120 TK::LineComment => COMMENT,
163 121
164 TK::BlockComment { terminated: true } => COMMENT, 122 TK::BlockComment { terminated: true } => COMMENT,
165 TK::BlockComment { terminated: false } => { 123 TK::BlockComment { terminated: false } => {
166 return (COMMENT, Some(TE::UnterminatedBlockComment)); 124 return (
125 COMMENT,
126 Some("Missing trailing `*/` symbols to terminate the block comment"),
127 );
167 } 128 }
168 129
169 TK::Whitespace => WHITESPACE, 130 TK::Whitespace => WHITESPACE,
@@ -181,7 +142,7 @@ fn rustc_token_kind_to_syntax_kind(
181 142
182 TK::Lifetime { starts_with_number: false } => LIFETIME, 143 TK::Lifetime { starts_with_number: false } => LIFETIME,
183 TK::Lifetime { starts_with_number: true } => { 144 TK::Lifetime { starts_with_number: true } => {
184 return (LIFETIME, Some(TE::LifetimeStartsWithNumber)) 145 return (LIFETIME, Some("Lifetime name cannot start with a number"))
185 } 146 }
186 147
187 TK::Semi => SEMI, 148 TK::Semi => SEMI,
@@ -217,57 +178,56 @@ fn rustc_token_kind_to_syntax_kind(
217 178
218 return (syntax_kind, None); 179 return (syntax_kind, None);
219 180
220 fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) { 181 fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
221 use rustc_lexer::LiteralKind as LK; 182 use rustc_lexer::LiteralKind as LK;
222 use TokenizeError as TE;
223 183
224 #[rustfmt::skip] 184 #[rustfmt::skip]
225 let syntax_kind = match *kind { 185 let syntax_kind = match *kind {
226 LK::Int { empty_int: false, .. } => INT_NUMBER, 186 LK::Int { empty_int: false, .. } => INT_NUMBER,
227 LK::Int { empty_int: true, .. } => { 187 LK::Int { empty_int: true, .. } => {
228 return (INT_NUMBER, Some(TE::EmptyInt)) 188 return (INT_NUMBER, Some("Missing digits after the integer base prefix"))
229 } 189 }
230 190
231 LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, 191 LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
232 LK::Float { empty_exponent: true, .. } => { 192 LK::Float { empty_exponent: true, .. } => {
233 return (FLOAT_NUMBER, Some(TE::EmptyExponent)) 193 return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol"))
234 } 194 }
235 195
236 LK::Char { terminated: true } => CHAR, 196 LK::Char { terminated: true } => CHAR,
237 LK::Char { terminated: false } => { 197 LK::Char { terminated: false } => {
238 return (CHAR, Some(TE::UnterminatedChar)) 198 return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal"))
239 } 199 }
240 200
241 LK::Byte { terminated: true } => BYTE, 201 LK::Byte { terminated: true } => BYTE,
242 LK::Byte { terminated: false } => { 202 LK::Byte { terminated: false } => {
243 return (BYTE, Some(TE::UnterminatedByte)) 203 return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal"))
244 } 204 }
245 205
246 LK::Str { terminated: true } => STRING, 206 LK::Str { terminated: true } => STRING,
247 LK::Str { terminated: false } => { 207 LK::Str { terminated: false } => {
248 return (STRING, Some(TE::UnterminatedString)) 208 return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal"))
249 } 209 }
250 210
251 211
252 LK::ByteStr { terminated: true } => BYTE_STRING, 212 LK::ByteStr { terminated: true } => BYTE_STRING,
253 LK::ByteStr { terminated: false } => { 213 LK::ByteStr { terminated: false } => {
254 return (BYTE_STRING, Some(TE::UnterminatedByteString)) 214 return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal"))
255 } 215 }
256 216
257 LK::RawStr { started: true, terminated: true, .. } => RAW_STRING, 217 LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,
258 LK::RawStr { started: true, terminated: false, .. } => { 218 LK::RawStr { started: true, terminated: false, .. } => {
259 return (RAW_STRING, Some(TE::UnterminatedRawString)) 219 return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal"))
260 } 220 }
261 LK::RawStr { started: false, .. } => { 221 LK::RawStr { started: false, .. } => {
262 return (RAW_STRING, Some(TE::UnstartedRawString)) 222 return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal"))
263 } 223 }
264 224
265 LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING, 225 LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,
266 LK::RawByteStr { started: true, terminated: false, .. } => { 226 LK::RawByteStr { started: true, terminated: false, .. } => {
267 return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString)) 227 return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"))
268 } 228 }
269 LK::RawByteStr { started: false, .. } => { 229 LK::RawByteStr { started: false, .. } => {
270 return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString)) 230 return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal"))
271 } 231 }
272 }; 232 };
273 233
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index a86da0675..aad70d015 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -27,8 +27,8 @@ pub(crate) fn incremental_reparse(
27 edit: &AtomTextEdit, 27 edit: &AtomTextEdit,
28 errors: Vec<SyntaxError>, 28 errors: Vec<SyntaxError>,
29) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { 29) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
30 if let Some((green, old_range)) = reparse_token(node, &edit) { 30 if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) {
31 return Some((green, merge_errors(errors, Vec::new(), old_range, edit), old_range)); 31 return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
32 } 32 }
33 33
34 if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) { 34 if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) {
@@ -40,7 +40,7 @@ pub(crate) fn incremental_reparse(
40fn reparse_token<'node>( 40fn reparse_token<'node>(
41 root: &'node SyntaxNode, 41 root: &'node SyntaxNode,
42 edit: &AtomTextEdit, 42 edit: &AtomTextEdit,
43) -> Option<(GreenNode, TextRange)> { 43) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
44 let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); 44 let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();
45 let prev_token_kind = prev_token.kind(); 45 let prev_token_kind = prev_token.kind();
46 match prev_token_kind { 46 match prev_token_kind {
@@ -54,7 +54,7 @@ fn reparse_token<'node>(
54 } 54 }
55 55
56 let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); 56 let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
57 let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?; 57 let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;
58 58
59 if new_token_kind != prev_token_kind 59 if new_token_kind != prev_token_kind
60 || (new_token_kind == IDENT && is_contextual_kw(&new_text)) 60 || (new_token_kind == IDENT && is_contextual_kw(&new_text))
@@ -76,7 +76,11 @@ fn reparse_token<'node>(
76 76
77 let new_token = 77 let new_token =
78 GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); 78 GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());
79 Some((prev_token.replace_with(new_token), prev_token.text_range())) 79 Some((
80 prev_token.replace_with(new_token),
81 new_err.into_iter().collect(),
82 prev_token.text_range(),
83 ))
80 } 84 }
81 _ => None, 85 _ => None,
82 } 86 }
@@ -87,7 +91,7 @@ fn reparse_block<'node>(
87 edit: &AtomTextEdit, 91 edit: &AtomTextEdit,
88) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { 92) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
89 let (node, reparser) = find_reparsable_node(root, edit.delete)?; 93 let (node, reparser) = find_reparsable_node(root, edit.delete)?;
90 let text = get_text_after_edit(node.clone().into(), &edit); 94 let text = get_text_after_edit(node.clone().into(), edit);
91 95
92 let (tokens, new_lexer_errors) = tokenize(&text); 96 let (tokens, new_lexer_errors) = tokenize(&text);
93 if !is_balanced(&tokens) { 97 if !is_balanced(&tokens) {
@@ -162,20 +166,27 @@ fn is_balanced(tokens: &[Token]) -> bool {
162fn merge_errors( 166fn merge_errors(
163 old_errors: Vec<SyntaxError>, 167 old_errors: Vec<SyntaxError>,
164 new_errors: Vec<SyntaxError>, 168 new_errors: Vec<SyntaxError>,
165 old_range: TextRange, 169 range_before_reparse: TextRange,
166 edit: &AtomTextEdit, 170 edit: &AtomTextEdit,
167) -> Vec<SyntaxError> { 171) -> Vec<SyntaxError> {
168 let mut res = Vec::new(); 172 let mut res = Vec::new();
169 for e in old_errors { 173
170 if e.offset() <= old_range.start() { 174 for old_err in old_errors {
171 res.push(e) 175 let old_err_range = old_err.range();
172 } else if e.offset() >= old_range.end() { 176 // FIXME: make sure that .start() was here previously by a mistake
173 res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len())); 177 if old_err_range.end() <= range_before_reparse.start() {
178 res.push(old_err);
179 } else if old_err_range.start() >= range_before_reparse.end() {
180 let inserted_len = TextUnit::of_str(&edit.insert);
181 res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
182 // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
174 } 183 }
175 } 184 }
176 for e in new_errors { 185 res.extend(new_errors.into_iter().map(|new_err| {
177 res.push(e.add_offset(old_range.start(), 0.into())); 186 // fighting borrow checker with a variable ;)
178 } 187 let offseted_range = new_err.range() + range_before_reparse.start();
188 new_err.with_range(offseted_range)
189 }));
179 res 190 res
180} 191}
181 192
@@ -193,9 +204,9 @@ mod tests {
193 204
194 let fully_reparsed = SourceFile::parse(&after); 205 let fully_reparsed = SourceFile::parse(&after);
195 let incrementally_reparsed: Parse<SourceFile> = { 206 let incrementally_reparsed: Parse<SourceFile> = {
196 let f = SourceFile::parse(&before); 207 let before = SourceFile::parse(&before);
197 let (green, new_errors, range) = 208 let (green, new_errors, range) =
198 incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap(); 209 incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
199 assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); 210 assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
200 Parse::new(green, new_errors) 211 Parse::new(green, new_errors)
201 }; 212 };
@@ -204,6 +215,7 @@ mod tests {
204 &format!("{:#?}", fully_reparsed.tree().syntax()), 215 &format!("{:#?}", fully_reparsed.tree().syntax()),
205 &format!("{:#?}", incrementally_reparsed.tree().syntax()), 216 &format!("{:#?}", incrementally_reparsed.tree().syntax()),
206 ); 217 );
218 assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
207 } 219 }
208 220
209 #[test] // FIXME: some test here actually test token reparsing 221 #[test] // FIXME: some test here actually test token reparsing
@@ -402,4 +414,42 @@ enum Foo {
402 4, 414 4,
403 ); 415 );
404 } 416 }
417
418 #[test]
419 fn reparse_str_token_with_error_unchanged() {
420 do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24);
421 }
422
423 #[test]
424 fn reparse_str_token_with_error_fixed() {
425 do_check(r#""unterinated<|><|>"#, "\"", 12);
426 }
427
428 #[test]
429 fn reparse_block_with_error_in_middle_unchanged() {
430 do_check(
431 r#"fn main() {
432 if {}
433 32 + 4<|><|>
434 return
435 if {}
436 }"#,
437 "23",
438 105,
439 )
440 }
441
442 #[test]
443 fn reparse_block_with_error_in_middle_fixed() {
444 do_check(
445 r#"fn main() {
446 if {}
447 32 + 4<|><|>
448 return
449 if {}
450 }"#,
451 ";",
452 105,
453 )
454 }
405} 455}