diff options
Diffstat (limited to 'crates/ra_syntax/src/parsing')
-rw-r--r-- | crates/ra_syntax/src/parsing/lexer.rs | 92 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/reparsing.rs | 83 |
2 files changed, 92 insertions, 83 deletions
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index f889e6a1d..f2684c852 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs | |||
@@ -2,7 +2,7 @@ | |||
2 | //! It is just a bridge to `rustc_lexer`. | 2 | //! It is just a bridge to `rustc_lexer`. |
3 | 3 | ||
4 | use crate::{ | 4 | use crate::{ |
5 | SyntaxError, SyntaxErrorKind, | 5 | SyntaxError, |
6 | SyntaxKind::{self, *}, | 6 | SyntaxKind::{self, *}, |
7 | TextRange, TextUnit, | 7 | TextRange, TextUnit, |
8 | }; | 8 | }; |
@@ -41,13 +41,13 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) { | |||
41 | let token_len = TextUnit::from_usize(rustc_token.len); | 41 | let token_len = TextUnit::from_usize(rustc_token.len); |
42 | let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); | 42 | let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); |
43 | 43 | ||
44 | let (syntax_kind, error) = | 44 | let (syntax_kind, err_message) = |
45 | rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); | 45 | rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); |
46 | 46 | ||
47 | tokens.push(Token { kind: syntax_kind, len: token_len }); | 47 | tokens.push(Token { kind: syntax_kind, len: token_len }); |
48 | 48 | ||
49 | if let Some(error) = error { | 49 | if let Some(err_message) = err_message { |
50 | errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)); | 50 | errors.push(SyntaxError::new(err_message, token_range)); |
51 | } | 51 | } |
52 | 52 | ||
53 | offset += rustc_token.len; | 53 | offset += rustc_token.len; |
@@ -94,61 +94,21 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> { | |||
94 | } | 94 | } |
95 | 95 | ||
96 | let rustc_token = rustc_lexer::first_token(text); | 96 | let rustc_token = rustc_lexer::first_token(text); |
97 | let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); | 97 | let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); |
98 | 98 | ||
99 | let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; | 99 | let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; |
100 | let error = error.map(|error| { | 100 | let optional_error = err_message.map(|err_message| { |
101 | SyntaxError::new( | 101 | SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text))) |
102 | SyntaxErrorKind::TokenizeError(error), | ||
103 | TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)), | ||
104 | ) | ||
105 | }); | 102 | }); |
106 | 103 | ||
107 | Some((token, error)) | 104 | Some((token, optional_error)) |
108 | } | ||
109 | |||
110 | // FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice: | ||
111 | // https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067 | ||
112 | |||
113 | /// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant. | ||
114 | /// It describes all the types of errors that may happen during the tokenization | ||
115 | /// of Rust source. | ||
116 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
117 | pub enum TokenizeError { | ||
118 | /// Base prefix was provided, but there were no digits | ||
119 | /// after it, e.g. `0x`, `0b`. | ||
120 | EmptyInt, | ||
121 | /// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`, | ||
122 | EmptyExponent, | ||
123 | |||
124 | /// Block comment lacks trailing delimiter `*/` | ||
125 | UnterminatedBlockComment, | ||
126 | /// Character literal lacks trailing delimiter `'` | ||
127 | UnterminatedChar, | ||
128 | /// Characterish byte literal lacks trailing delimiter `'` | ||
129 | UnterminatedByte, | ||
130 | /// String literal lacks trailing delimiter `"` | ||
131 | UnterminatedString, | ||
132 | /// Byte string literal lacks trailing delimiter `"` | ||
133 | UnterminatedByteString, | ||
134 | /// Raw literal lacks trailing delimiter e.g. `"##` | ||
135 | UnterminatedRawString, | ||
136 | /// Raw byte string literal lacks trailing delimiter e.g. `"##` | ||
137 | UnterminatedRawByteString, | ||
138 | |||
139 | /// Raw string lacks a quote after the pound characters e.g. `r###` | ||
140 | UnstartedRawString, | ||
141 | /// Raw byte string lacks a quote after the pound characters e.g. `br###` | ||
142 | UnstartedRawByteString, | ||
143 | |||
144 | /// Lifetime starts with a number e.g. `'4ever` | ||
145 | LifetimeStartsWithNumber, | ||
146 | } | 105 | } |
147 | 106 | ||
107 | /// Returns `SyntaxKind` and an optional tokenize error message. | ||
148 | fn rustc_token_kind_to_syntax_kind( | 108 | fn rustc_token_kind_to_syntax_kind( |
149 | rustc_token_kind: &rustc_lexer::TokenKind, | 109 | rustc_token_kind: &rustc_lexer::TokenKind, |
150 | token_text: &str, | 110 | token_text: &str, |
151 | ) -> (SyntaxKind, Option<TokenizeError>) { | 111 | ) -> (SyntaxKind, Option<&'static str>) { |
152 | // A note on an intended tradeoff: | 112 | // A note on an intended tradeoff: |
153 | // We drop some useful infromation here (see patterns with double dots `..`) | 113 | // We drop some useful infromation here (see patterns with double dots `..`) |
154 | // Storing that info in `SyntaxKind` is not possible due to its layout requirements of | 114 | // Storing that info in `SyntaxKind` is not possible due to its layout requirements of |
@@ -156,14 +116,15 @@ fn rustc_token_kind_to_syntax_kind( | |||
156 | 116 | ||
157 | let syntax_kind = { | 117 | let syntax_kind = { |
158 | use rustc_lexer::TokenKind as TK; | 118 | use rustc_lexer::TokenKind as TK; |
159 | use TokenizeError as TE; | ||
160 | |||
161 | match rustc_token_kind { | 119 | match rustc_token_kind { |
162 | TK::LineComment => COMMENT, | 120 | TK::LineComment => COMMENT, |
163 | 121 | ||
164 | TK::BlockComment { terminated: true } => COMMENT, | 122 | TK::BlockComment { terminated: true } => COMMENT, |
165 | TK::BlockComment { terminated: false } => { | 123 | TK::BlockComment { terminated: false } => { |
166 | return (COMMENT, Some(TE::UnterminatedBlockComment)); | 124 | return ( |
125 | COMMENT, | ||
126 | Some("Missing trailing `*/` symbols to terminate the block comment"), | ||
127 | ); | ||
167 | } | 128 | } |
168 | 129 | ||
169 | TK::Whitespace => WHITESPACE, | 130 | TK::Whitespace => WHITESPACE, |
@@ -181,7 +142,7 @@ fn rustc_token_kind_to_syntax_kind( | |||
181 | 142 | ||
182 | TK::Lifetime { starts_with_number: false } => LIFETIME, | 143 | TK::Lifetime { starts_with_number: false } => LIFETIME, |
183 | TK::Lifetime { starts_with_number: true } => { | 144 | TK::Lifetime { starts_with_number: true } => { |
184 | return (LIFETIME, Some(TE::LifetimeStartsWithNumber)) | 145 | return (LIFETIME, Some("Lifetime name cannot start with a number")) |
185 | } | 146 | } |
186 | 147 | ||
187 | TK::Semi => SEMI, | 148 | TK::Semi => SEMI, |
@@ -217,57 +178,56 @@ fn rustc_token_kind_to_syntax_kind( | |||
217 | 178 | ||
218 | return (syntax_kind, None); | 179 | return (syntax_kind, None); |
219 | 180 | ||
220 | fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) { | 181 | fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) { |
221 | use rustc_lexer::LiteralKind as LK; | 182 | use rustc_lexer::LiteralKind as LK; |
222 | use TokenizeError as TE; | ||
223 | 183 | ||
224 | #[rustfmt::skip] | 184 | #[rustfmt::skip] |
225 | let syntax_kind = match *kind { | 185 | let syntax_kind = match *kind { |
226 | LK::Int { empty_int: false, .. } => INT_NUMBER, | 186 | LK::Int { empty_int: false, .. } => INT_NUMBER, |
227 | LK::Int { empty_int: true, .. } => { | 187 | LK::Int { empty_int: true, .. } => { |
228 | return (INT_NUMBER, Some(TE::EmptyInt)) | 188 | return (INT_NUMBER, Some("Missing digits after the integer base prefix")) |
229 | } | 189 | } |
230 | 190 | ||
231 | LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, | 191 | LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, |
232 | LK::Float { empty_exponent: true, .. } => { | 192 | LK::Float { empty_exponent: true, .. } => { |
233 | return (FLOAT_NUMBER, Some(TE::EmptyExponent)) | 193 | return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol")) |
234 | } | 194 | } |
235 | 195 | ||
236 | LK::Char { terminated: true } => CHAR, | 196 | LK::Char { terminated: true } => CHAR, |
237 | LK::Char { terminated: false } => { | 197 | LK::Char { terminated: false } => { |
238 | return (CHAR, Some(TE::UnterminatedChar)) | 198 | return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal")) |
239 | } | 199 | } |
240 | 200 | ||
241 | LK::Byte { terminated: true } => BYTE, | 201 | LK::Byte { terminated: true } => BYTE, |
242 | LK::Byte { terminated: false } => { | 202 | LK::Byte { terminated: false } => { |
243 | return (BYTE, Some(TE::UnterminatedByte)) | 203 | return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal")) |
244 | } | 204 | } |
245 | 205 | ||
246 | LK::Str { terminated: true } => STRING, | 206 | LK::Str { terminated: true } => STRING, |
247 | LK::Str { terminated: false } => { | 207 | LK::Str { terminated: false } => { |
248 | return (STRING, Some(TE::UnterminatedString)) | 208 | return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal")) |
249 | } | 209 | } |
250 | 210 | ||
251 | 211 | ||
252 | LK::ByteStr { terminated: true } => BYTE_STRING, | 212 | LK::ByteStr { terminated: true } => BYTE_STRING, |
253 | LK::ByteStr { terminated: false } => { | 213 | LK::ByteStr { terminated: false } => { |
254 | return (BYTE_STRING, Some(TE::UnterminatedByteString)) | 214 | return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal")) |
255 | } | 215 | } |
256 | 216 | ||
257 | LK::RawStr { started: true, terminated: true, .. } => RAW_STRING, | 217 | LK::RawStr { started: true, terminated: true, .. } => RAW_STRING, |
258 | LK::RawStr { started: true, terminated: false, .. } => { | 218 | LK::RawStr { started: true, terminated: false, .. } => { |
259 | return (RAW_STRING, Some(TE::UnterminatedRawString)) | 219 | return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal")) |
260 | } | 220 | } |
261 | LK::RawStr { started: false, .. } => { | 221 | LK::RawStr { started: false, .. } => { |
262 | return (RAW_STRING, Some(TE::UnstartedRawString)) | 222 | return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal")) |
263 | } | 223 | } |
264 | 224 | ||
265 | LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING, | 225 | LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING, |
266 | LK::RawByteStr { started: true, terminated: false, .. } => { | 226 | LK::RawByteStr { started: true, terminated: false, .. } => { |
267 | return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString)) | 227 | return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal")) |
268 | } | 228 | } |
269 | LK::RawByteStr { started: false, .. } => { | 229 | LK::RawByteStr { started: false, .. } => { |
270 | return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString)) | 230 | return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal")) |
271 | } | 231 | } |
272 | }; | 232 | }; |
273 | 233 | ||
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index a86da0675..2d65b91f1 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs | |||
@@ -27,8 +27,8 @@ pub(crate) fn incremental_reparse( | |||
27 | edit: &AtomTextEdit, | 27 | edit: &AtomTextEdit, |
28 | errors: Vec<SyntaxError>, | 28 | errors: Vec<SyntaxError>, |
29 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { | 29 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { |
30 | if let Some((green, old_range)) = reparse_token(node, &edit) { | 30 | if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) { |
31 | return Some((green, merge_errors(errors, Vec::new(), old_range, edit), old_range)); | 31 | return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); |
32 | } | 32 | } |
33 | 33 | ||
34 | if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) { | 34 | if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) { |
@@ -40,7 +40,7 @@ pub(crate) fn incremental_reparse( | |||
40 | fn reparse_token<'node>( | 40 | fn reparse_token<'node>( |
41 | root: &'node SyntaxNode, | 41 | root: &'node SyntaxNode, |
42 | edit: &AtomTextEdit, | 42 | edit: &AtomTextEdit, |
43 | ) -> Option<(GreenNode, TextRange)> { | 43 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { |
44 | let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); | 44 | let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); |
45 | let prev_token_kind = prev_token.kind(); | 45 | let prev_token_kind = prev_token.kind(); |
46 | match prev_token_kind { | 46 | match prev_token_kind { |
@@ -54,7 +54,7 @@ fn reparse_token<'node>( | |||
54 | } | 54 | } |
55 | 55 | ||
56 | let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); | 56 | let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); |
57 | let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?; | 57 | let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?; |
58 | 58 | ||
59 | if new_token_kind != prev_token_kind | 59 | if new_token_kind != prev_token_kind |
60 | || (new_token_kind == IDENT && is_contextual_kw(&new_text)) | 60 | || (new_token_kind == IDENT && is_contextual_kw(&new_text)) |
@@ -76,7 +76,11 @@ fn reparse_token<'node>( | |||
76 | 76 | ||
77 | let new_token = | 77 | let new_token = |
78 | GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); | 78 | GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); |
79 | Some((prev_token.replace_with(new_token), prev_token.text_range())) | 79 | Some(( |
80 | prev_token.replace_with(new_token), | ||
81 | new_err.into_iter().collect(), | ||
82 | prev_token.text_range(), | ||
83 | )) | ||
80 | } | 84 | } |
81 | _ => None, | 85 | _ => None, |
82 | } | 86 | } |
@@ -87,7 +91,7 @@ fn reparse_block<'node>( | |||
87 | edit: &AtomTextEdit, | 91 | edit: &AtomTextEdit, |
88 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { | 92 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { |
89 | let (node, reparser) = find_reparsable_node(root, edit.delete)?; | 93 | let (node, reparser) = find_reparsable_node(root, edit.delete)?; |
90 | let text = get_text_after_edit(node.clone().into(), &edit); | 94 | let text = get_text_after_edit(node.clone().into(), edit); |
91 | 95 | ||
92 | let (tokens, new_lexer_errors) = tokenize(&text); | 96 | let (tokens, new_lexer_errors) = tokenize(&text); |
93 | if !is_balanced(&tokens) { | 97 | if !is_balanced(&tokens) { |
@@ -162,20 +166,26 @@ fn is_balanced(tokens: &[Token]) -> bool { | |||
162 | fn merge_errors( | 166 | fn merge_errors( |
163 | old_errors: Vec<SyntaxError>, | 167 | old_errors: Vec<SyntaxError>, |
164 | new_errors: Vec<SyntaxError>, | 168 | new_errors: Vec<SyntaxError>, |
165 | old_range: TextRange, | 169 | range_before_reparse: TextRange, |
166 | edit: &AtomTextEdit, | 170 | edit: &AtomTextEdit, |
167 | ) -> Vec<SyntaxError> { | 171 | ) -> Vec<SyntaxError> { |
168 | let mut res = Vec::new(); | 172 | let mut res = Vec::new(); |
169 | for e in old_errors { | 173 | |
170 | if e.offset() <= old_range.start() { | 174 | for old_err in old_errors { |
171 | res.push(e) | 175 | let old_err_range = old_err.range(); |
172 | } else if e.offset() >= old_range.end() { | 176 | if old_err_range.end() <= range_before_reparse.start() { |
173 | res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len())); | 177 | res.push(old_err); |
178 | } else if old_err_range.start() >= range_before_reparse.end() { | ||
179 | let inserted_len = TextUnit::of_str(&edit.insert); | ||
180 | res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len())); | ||
181 | // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug) | ||
174 | } | 182 | } |
175 | } | 183 | } |
176 | for e in new_errors { | 184 | res.extend(new_errors.into_iter().map(|new_err| { |
177 | res.push(e.add_offset(old_range.start(), 0.into())); | 185 | // fighting borrow checker with a variable ;) |
178 | } | 186 | let offseted_range = new_err.range() + range_before_reparse.start(); |
187 | new_err.with_range(offseted_range) | ||
188 | })); | ||
179 | res | 189 | res |
180 | } | 190 | } |
181 | 191 | ||
@@ -193,9 +203,9 @@ mod tests { | |||
193 | 203 | ||
194 | let fully_reparsed = SourceFile::parse(&after); | 204 | let fully_reparsed = SourceFile::parse(&after); |
195 | let incrementally_reparsed: Parse<SourceFile> = { | 205 | let incrementally_reparsed: Parse<SourceFile> = { |
196 | let f = SourceFile::parse(&before); | 206 | let before = SourceFile::parse(&before); |
197 | let (green, new_errors, range) = | 207 | let (green, new_errors, range) = |
198 | incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap(); | 208 | incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap(); |
199 | assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); | 209 | assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); |
200 | Parse::new(green, new_errors) | 210 | Parse::new(green, new_errors) |
201 | }; | 211 | }; |
@@ -204,6 +214,7 @@ mod tests { | |||
204 | &format!("{:#?}", fully_reparsed.tree().syntax()), | 214 | &format!("{:#?}", fully_reparsed.tree().syntax()), |
205 | &format!("{:#?}", incrementally_reparsed.tree().syntax()), | 215 | &format!("{:#?}", incrementally_reparsed.tree().syntax()), |
206 | ); | 216 | ); |
217 | assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors()); | ||
207 | } | 218 | } |
208 | 219 | ||
209 | #[test] // FIXME: some test here actually test token reparsing | 220 | #[test] // FIXME: some test here actually test token reparsing |
@@ -402,4 +413,42 @@ enum Foo { | |||
402 | 4, | 413 | 4, |
403 | ); | 414 | ); |
404 | } | 415 | } |
416 | |||
417 | #[test] | ||
418 | fn reparse_str_token_with_error_unchanged() { | ||
419 | do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24); | ||
420 | } | ||
421 | |||
422 | #[test] | ||
423 | fn reparse_str_token_with_error_fixed() { | ||
424 | do_check(r#""unterinated<|><|>"#, "\"", 12); | ||
425 | } | ||
426 | |||
427 | #[test] | ||
428 | fn reparse_block_with_error_in_middle_unchanged() { | ||
429 | do_check( | ||
430 | r#"fn main() { | ||
431 | if {} | ||
432 | 32 + 4<|><|> | ||
433 | return | ||
434 | if {} | ||
435 | }"#, | ||
436 | "23", | ||
437 | 105, | ||
438 | ) | ||
439 | } | ||
440 | |||
441 | #[test] | ||
442 | fn reparse_block_with_error_in_middle_fixed() { | ||
443 | do_check( | ||
444 | r#"fn main() { | ||
445 | if {} | ||
446 | 32 + 4<|><|> | ||
447 | return | ||
448 | if {} | ||
449 | }"#, | ||
450 | ";", | ||
451 | 105, | ||
452 | ) | ||
453 | } | ||
405 | } | 454 | } |