diff options
author | Aleksey Kladov <[email protected]> | 2020-08-12 17:26:51 +0100 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2020-08-12 17:30:53 +0100 |
commit | a1c187eef3ba08076aedb5154929f7eda8d1b424 (patch) | |
tree | 9d898eb9600b0c36a74e4f95238f679c683fa566 /crates/ra_syntax/src/parsing/reparsing.rs | |
parent | 3d6889cba72a9d02199f7adaa2ecc69bc30af834 (diff) |
Rename ra_syntax -> syntax
Diffstat (limited to 'crates/ra_syntax/src/parsing/reparsing.rs')
-rw-r--r-- | crates/ra_syntax/src/parsing/reparsing.rs | 455 |
1 files changed, 0 insertions, 455 deletions
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs deleted file mode 100644 index 4149f856a..000000000 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ /dev/null | |||
@@ -1,455 +0,0 @@ | |||
1 | //! Implementation of incremental re-parsing. | ||
2 | //! | ||
3 | //! We use two simple strategies for this: | ||
4 | //! - if the edit modifies only a single token (like changing an identifier's | ||
5 | //! letter), we replace only this token. | ||
6 | //! - otherwise, we search for the nearest `{}` block which contains the edit | ||
7 | //! and try to parse only this block. | ||
8 | |||
9 | use parser::Reparser; | ||
10 | use text_edit::Indel; | ||
11 | |||
12 | use crate::{ | ||
13 | algo, | ||
14 | parsing::{ | ||
15 | lexer::{lex_single_syntax_kind, tokenize, Token}, | ||
16 | text_token_source::TextTokenSource, | ||
17 | text_tree_sink::TextTreeSink, | ||
18 | }, | ||
19 | syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode}, | ||
20 | SyntaxError, | ||
21 | SyntaxKind::*, | ||
22 | TextRange, TextSize, T, | ||
23 | }; | ||
24 | |||
25 | pub(crate) fn incremental_reparse( | ||
26 | node: &SyntaxNode, | ||
27 | edit: &Indel, | ||
28 | errors: Vec<SyntaxError>, | ||
29 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { | ||
30 | if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) { | ||
31 | return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); | ||
32 | } | ||
33 | |||
34 | if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) { | ||
35 | return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); | ||
36 | } | ||
37 | None | ||
38 | } | ||
39 | |||
40 | fn reparse_token<'node>( | ||
41 | root: &'node SyntaxNode, | ||
42 | edit: &Indel, | ||
43 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { | ||
44 | let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); | ||
45 | let prev_token_kind = prev_token.kind(); | ||
46 | match prev_token_kind { | ||
47 | WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { | ||
48 | if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { | ||
49 | // removing a new line may extends previous token | ||
50 | let deleted_range = edit.delete - prev_token.text_range().start(); | ||
51 | if prev_token.text()[deleted_range].contains('\n') { | ||
52 | return None; | ||
53 | } | ||
54 | } | ||
55 | |||
56 | let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); | ||
57 | let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?; | ||
58 | |||
59 | if new_token_kind != prev_token_kind | ||
60 | || (new_token_kind == IDENT && is_contextual_kw(&new_text)) | ||
61 | { | ||
62 | return None; | ||
63 | } | ||
64 | |||
65 | // Check that edited token is not a part of the bigger token. | ||
66 | // E.g. if for source code `bruh"str"` the user removed `ruh`, then | ||
67 | // `b` no longer remains an identifier, but becomes a part of byte string literal | ||
68 | if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { | ||
69 | new_text.push(next_char); | ||
70 | let token_with_next_char = lex_single_syntax_kind(&new_text); | ||
71 | if let Some((_kind, _error)) = token_with_next_char { | ||
72 | return None; | ||
73 | } | ||
74 | new_text.pop(); | ||
75 | } | ||
76 | |||
77 | let new_token = | ||
78 | GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); | ||
79 | Some(( | ||
80 | prev_token.replace_with(new_token), | ||
81 | new_err.into_iter().collect(), | ||
82 | prev_token.text_range(), | ||
83 | )) | ||
84 | } | ||
85 | _ => None, | ||
86 | } | ||
87 | } | ||
88 | |||
89 | fn reparse_block<'node>( | ||
90 | root: &'node SyntaxNode, | ||
91 | edit: &Indel, | ||
92 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { | ||
93 | let (node, reparser) = find_reparsable_node(root, edit.delete)?; | ||
94 | let text = get_text_after_edit(node.clone().into(), edit); | ||
95 | |||
96 | let (tokens, new_lexer_errors) = tokenize(&text); | ||
97 | if !is_balanced(&tokens) { | ||
98 | return None; | ||
99 | } | ||
100 | |||
101 | let mut token_source = TextTokenSource::new(&text, &tokens); | ||
102 | let mut tree_sink = TextTreeSink::new(&text, &tokens); | ||
103 | reparser.parse(&mut token_source, &mut tree_sink); | ||
104 | |||
105 | let (green, mut new_parser_errors) = tree_sink.finish(); | ||
106 | new_parser_errors.extend(new_lexer_errors); | ||
107 | |||
108 | Some((node.replace_with(green), new_parser_errors, node.text_range())) | ||
109 | } | ||
110 | |||
111 | fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String { | ||
112 | let edit = Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone()); | ||
113 | |||
114 | let mut text = match element { | ||
115 | NodeOrToken::Token(token) => token.text().to_string(), | ||
116 | NodeOrToken::Node(node) => node.text().to_string(), | ||
117 | }; | ||
118 | edit.apply(&mut text); | ||
119 | text | ||
120 | } | ||
121 | |||
122 | fn is_contextual_kw(text: &str) -> bool { | ||
123 | matches!(text, "auto" | "default" | "union") | ||
124 | } | ||
125 | |||
126 | fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { | ||
127 | let node = algo::find_covering_element(node, range); | ||
128 | |||
129 | let mut ancestors = match node { | ||
130 | NodeOrToken::Token(it) => it.parent().ancestors(), | ||
131 | NodeOrToken::Node(it) => it.ancestors(), | ||
132 | }; | ||
133 | ancestors.find_map(|node| { | ||
134 | let first_child = node.first_child_or_token().map(|it| it.kind()); | ||
135 | let parent = node.parent().map(|it| it.kind()); | ||
136 | Reparser::for_node(node.kind(), first_child, parent).map(|r| (node, r)) | ||
137 | }) | ||
138 | } | ||
139 | |||
140 | fn is_balanced(tokens: &[Token]) -> bool { | ||
141 | if tokens.is_empty() | ||
142 | || tokens.first().unwrap().kind != T!['{'] | ||
143 | || tokens.last().unwrap().kind != T!['}'] | ||
144 | { | ||
145 | return false; | ||
146 | } | ||
147 | let mut balance = 0usize; | ||
148 | for t in &tokens[1..tokens.len() - 1] { | ||
149 | match t.kind { | ||
150 | T!['{'] => balance += 1, | ||
151 | T!['}'] => { | ||
152 | balance = match balance.checked_sub(1) { | ||
153 | Some(b) => b, | ||
154 | None => return false, | ||
155 | } | ||
156 | } | ||
157 | _ => (), | ||
158 | } | ||
159 | } | ||
160 | balance == 0 | ||
161 | } | ||
162 | |||
163 | fn merge_errors( | ||
164 | old_errors: Vec<SyntaxError>, | ||
165 | new_errors: Vec<SyntaxError>, | ||
166 | range_before_reparse: TextRange, | ||
167 | edit: &Indel, | ||
168 | ) -> Vec<SyntaxError> { | ||
169 | let mut res = Vec::new(); | ||
170 | |||
171 | for old_err in old_errors { | ||
172 | let old_err_range = old_err.range(); | ||
173 | if old_err_range.end() <= range_before_reparse.start() { | ||
174 | res.push(old_err); | ||
175 | } else if old_err_range.start() >= range_before_reparse.end() { | ||
176 | let inserted_len = TextSize::of(&edit.insert); | ||
177 | res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len())); | ||
178 | // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug) | ||
179 | } | ||
180 | } | ||
181 | res.extend(new_errors.into_iter().map(|new_err| { | ||
182 | // fighting borrow checker with a variable ;) | ||
183 | let offseted_range = new_err.range() + range_before_reparse.start(); | ||
184 | new_err.with_range(offseted_range) | ||
185 | })); | ||
186 | res | ||
187 | } | ||
188 | |||
189 | #[cfg(test)] | ||
190 | mod tests { | ||
191 | use test_utils::{assert_eq_text, extract_range}; | ||
192 | |||
193 | use super::*; | ||
194 | use crate::{AstNode, Parse, SourceFile}; | ||
195 | |||
196 | fn do_check(before: &str, replace_with: &str, reparsed_len: u32) { | ||
197 | let (range, before) = extract_range(before); | ||
198 | let edit = Indel::replace(range, replace_with.to_owned()); | ||
199 | let after = { | ||
200 | let mut after = before.clone(); | ||
201 | edit.apply(&mut after); | ||
202 | after | ||
203 | }; | ||
204 | |||
205 | let fully_reparsed = SourceFile::parse(&after); | ||
206 | let incrementally_reparsed: Parse<SourceFile> = { | ||
207 | let before = SourceFile::parse(&before); | ||
208 | let (green, new_errors, range) = | ||
209 | incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap(); | ||
210 | assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); | ||
211 | Parse::new(green, new_errors) | ||
212 | }; | ||
213 | |||
214 | assert_eq_text!( | ||
215 | &format!("{:#?}", fully_reparsed.tree().syntax()), | ||
216 | &format!("{:#?}", incrementally_reparsed.tree().syntax()), | ||
217 | ); | ||
218 | assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors()); | ||
219 | } | ||
220 | |||
221 | #[test] // FIXME: some test here actually test token reparsing | ||
222 | fn reparse_block_tests() { | ||
223 | do_check( | ||
224 | r" | ||
225 | fn foo() { | ||
226 | let x = foo + <|>bar<|> | ||
227 | } | ||
228 | ", | ||
229 | "baz", | ||
230 | 3, | ||
231 | ); | ||
232 | do_check( | ||
233 | r" | ||
234 | fn foo() { | ||
235 | let x = foo<|> + bar<|> | ||
236 | } | ||
237 | ", | ||
238 | "baz", | ||
239 | 25, | ||
240 | ); | ||
241 | do_check( | ||
242 | r" | ||
243 | struct Foo { | ||
244 | f: foo<|><|> | ||
245 | } | ||
246 | ", | ||
247 | ",\n g: (),", | ||
248 | 14, | ||
249 | ); | ||
250 | do_check( | ||
251 | r" | ||
252 | fn foo { | ||
253 | let; | ||
254 | 1 + 1; | ||
255 | <|>92<|>; | ||
256 | } | ||
257 | ", | ||
258 | "62", | ||
259 | 31, // FIXME: reparse only int literal here | ||
260 | ); | ||
261 | do_check( | ||
262 | r" | ||
263 | mod foo { | ||
264 | fn <|><|> | ||
265 | } | ||
266 | ", | ||
267 | "bar", | ||
268 | 11, | ||
269 | ); | ||
270 | |||
271 | do_check( | ||
272 | r" | ||
273 | trait Foo { | ||
274 | type <|>Foo<|>; | ||
275 | } | ||
276 | ", | ||
277 | "Output", | ||
278 | 3, | ||
279 | ); | ||
280 | do_check( | ||
281 | r" | ||
282 | impl IntoIterator<Item=i32> for Foo { | ||
283 | f<|><|> | ||
284 | } | ||
285 | ", | ||
286 | "n next(", | ||
287 | 9, | ||
288 | ); | ||
289 | do_check(r"use a::b::{foo,<|>,bar<|>};", "baz", 10); | ||
290 | do_check( | ||
291 | r" | ||
292 | pub enum A { | ||
293 | Foo<|><|> | ||
294 | } | ||
295 | ", | ||
296 | "\nBar;\n", | ||
297 | 11, | ||
298 | ); | ||
299 | do_check( | ||
300 | r" | ||
301 | foo!{a, b<|><|> d} | ||
302 | ", | ||
303 | ", c[3]", | ||
304 | 8, | ||
305 | ); | ||
306 | do_check( | ||
307 | r" | ||
308 | fn foo() { | ||
309 | vec![<|><|>] | ||
310 | } | ||
311 | ", | ||
312 | "123", | ||
313 | 14, | ||
314 | ); | ||
315 | do_check( | ||
316 | r" | ||
317 | extern { | ||
318 | fn<|>;<|> | ||
319 | } | ||
320 | ", | ||
321 | " exit(code: c_int)", | ||
322 | 11, | ||
323 | ); | ||
324 | } | ||
325 | |||
326 | #[test] | ||
327 | fn reparse_token_tests() { | ||
328 | do_check( | ||
329 | r"<|><|> | ||
330 | fn foo() -> i32 { 1 } | ||
331 | ", | ||
332 | "\n\n\n \n", | ||
333 | 1, | ||
334 | ); | ||
335 | do_check( | ||
336 | r" | ||
337 | fn foo() -> <|><|> {} | ||
338 | ", | ||
339 | " \n", | ||
340 | 2, | ||
341 | ); | ||
342 | do_check( | ||
343 | r" | ||
344 | fn <|>foo<|>() -> i32 { 1 } | ||
345 | ", | ||
346 | "bar", | ||
347 | 3, | ||
348 | ); | ||
349 | do_check( | ||
350 | r" | ||
351 | fn foo<|><|>foo() { } | ||
352 | ", | ||
353 | "bar", | ||
354 | 6, | ||
355 | ); | ||
356 | do_check( | ||
357 | r" | ||
358 | fn foo /* <|><|> */ () {} | ||
359 | ", | ||
360 | "some comment", | ||
361 | 6, | ||
362 | ); | ||
363 | do_check( | ||
364 | r" | ||
365 | fn baz <|><|> () {} | ||
366 | ", | ||
367 | " \t\t\n\n", | ||
368 | 2, | ||
369 | ); | ||
370 | do_check( | ||
371 | r" | ||
372 | fn baz <|><|> () {} | ||
373 | ", | ||
374 | " \t\t\n\n", | ||
375 | 2, | ||
376 | ); | ||
377 | do_check( | ||
378 | r" | ||
379 | /// foo <|><|>omment | ||
380 | mod { } | ||
381 | ", | ||
382 | "c", | ||
383 | 14, | ||
384 | ); | ||
385 | do_check( | ||
386 | r#" | ||
387 | fn -> &str { "Hello<|><|>" } | ||
388 | "#, | ||
389 | ", world", | ||
390 | 7, | ||
391 | ); | ||
392 | do_check( | ||
393 | r#" | ||
394 | fn -> &str { // "Hello<|><|>" | ||
395 | "#, | ||
396 | ", world", | ||
397 | 10, | ||
398 | ); | ||
399 | do_check( | ||
400 | r##" | ||
401 | fn -> &str { r#"Hello<|><|>"# | ||
402 | "##, | ||
403 | ", world", | ||
404 | 10, | ||
405 | ); | ||
406 | do_check( | ||
407 | r" | ||
408 | #[derive(<|>Copy<|>)] | ||
409 | enum Foo { | ||
410 | |||
411 | } | ||
412 | ", | ||
413 | "Clone", | ||
414 | 4, | ||
415 | ); | ||
416 | } | ||
417 | |||
418 | #[test] | ||
419 | fn reparse_str_token_with_error_unchanged() { | ||
420 | do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24); | ||
421 | } | ||
422 | |||
423 | #[test] | ||
424 | fn reparse_str_token_with_error_fixed() { | ||
425 | do_check(r#""unterinated<|><|>"#, "\"", 12); | ||
426 | } | ||
427 | |||
428 | #[test] | ||
429 | fn reparse_block_with_error_in_middle_unchanged() { | ||
430 | do_check( | ||
431 | r#"fn main() { | ||
432 | if {} | ||
433 | 32 + 4<|><|> | ||
434 | return | ||
435 | if {} | ||
436 | }"#, | ||
437 | "23", | ||
438 | 105, | ||
439 | ) | ||
440 | } | ||
441 | |||
442 | #[test] | ||
443 | fn reparse_block_with_error_in_middle_fixed() { | ||
444 | do_check( | ||
445 | r#"fn main() { | ||
446 | if {} | ||
447 | 32 + 4<|><|> | ||
448 | return | ||
449 | if {} | ||
450 | }"#, | ||
451 | ";", | ||
452 | 105, | ||
453 | ) | ||
454 | } | ||
455 | } | ||