//! Implementation of incremental re-parsing. //! //! We use two simple strategies for this: //! - if the edit modifies only a single token (like changing an identifier's //! letter), we replace only this token. //! - otherwise, we search for the nearest `{}` block which contains the edit //! and try to parse only this block. use parser::Reparser; use text_edit::Indel; use crate::{ parsing::{ lexer::{lex_single_syntax_kind, tokenize, Token}, text_token_source::TextTokenSource, text_tree_sink::TextTreeSink, }, syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode}, SyntaxError, SyntaxKind::*, TextRange, TextSize, T, }; pub(crate) fn incremental_reparse( node: &SyntaxNode, edit: &Indel, errors: Vec, ) -> Option<(GreenNode, Vec, TextRange)> { if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) { return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); } if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) { return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); } None } fn reparse_token<'node>( root: &'node SyntaxNode, edit: &Indel, ) -> Option<(GreenNode, Vec, TextRange)> { let prev_token = root.covering_element(edit.delete).as_token()?.clone(); let prev_token_kind = prev_token.kind(); match prev_token_kind { WHITESPACE | COMMENT | IDENT | STRING => { if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { // removing a new line may extends previous token let deleted_range = edit.delete - prev_token.text_range().start(); if prev_token.text()[deleted_range].contains('\n') { return None; } } let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?; if new_token_kind != prev_token_kind || (new_token_kind == IDENT && is_contextual_kw(&new_text)) { return None; } // Check that edited token is not a part of the bigger token. // E.g. if for source code `bruh"str"` the user removed `ruh`, then // `b` no longer remains an identifier, but becomes a part of byte string literal if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { new_text.push(next_char); let token_with_next_char = lex_single_syntax_kind(&new_text); if let Some((_kind, _error)) = token_with_next_char { return None; } new_text.pop(); } let new_token = GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), &new_text); Some(( prev_token.replace_with(new_token), new_err.into_iter().collect(), prev_token.text_range(), )) } _ => None, } } fn reparse_block<'node>( root: &'node SyntaxNode, edit: &Indel, ) -> Option<(GreenNode, Vec, TextRange)> { let (node, reparser) = find_reparsable_node(root, edit.delete)?; let text = get_text_after_edit(node.clone().into(), edit); let (tokens, new_lexer_errors) = tokenize(&text); if !is_balanced(&tokens) { return None; } let mut token_source = TextTokenSource::new(&text, &tokens); let mut tree_sink = TextTreeSink::new(&text, &tokens); reparser.parse(&mut token_source, &mut tree_sink); let (green, mut new_parser_errors) = tree_sink.finish(); new_parser_errors.extend(new_lexer_errors); Some((node.replace_with(green), new_parser_errors, node.text_range())) } fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String { let edit = Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone()); let mut text = match element { NodeOrToken::Token(token) => token.text().to_string(), NodeOrToken::Node(node) => node.text().to_string(), }; edit.apply(&mut text); text } fn is_contextual_kw(text: &str) -> bool { matches!(text, "auto" | "default" | "union") } fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { let node = node.covering_element(range); node.ancestors().find_map(|node| { let first_child = node.first_child_or_token().map(|it| it.kind()); let parent = node.parent().map(|it| it.kind()); Reparser::for_node(node.kind(), first_child, parent).map(|r| (node, r)) }) } fn is_balanced(tokens: &[Token]) -> bool { if tokens.is_empty() || tokens.first().unwrap().kind != T!['{'] || tokens.last().unwrap().kind != T!['}'] { return false; } let mut balance = 0usize; for t in &tokens[1..tokens.len() - 1] { match t.kind { T!['{'] => balance += 1, T!['}'] => { balance = match balance.checked_sub(1) { Some(b) => b, None => return false, } } _ => (), } } balance == 0 } fn merge_errors( old_errors: Vec, new_errors: Vec, range_before_reparse: TextRange, edit: &Indel, ) -> Vec { let mut res = Vec::new(); for old_err in old_errors { let old_err_range = old_err.range(); if old_err_range.end() <= range_before_reparse.start() { res.push(old_err); } else if old_err_range.start() >= range_before_reparse.end() { let inserted_len = TextSize::of(&edit.insert); res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len())); // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug) } } res.extend(new_errors.into_iter().map(|new_err| { // fighting borrow checker with a variable ;) let offseted_range = new_err.range() + range_before_reparse.start(); new_err.with_range(offseted_range) })); res } #[cfg(test)] mod tests { use test_utils::{assert_eq_text, extract_range}; use super::*; use crate::{AstNode, Parse, SourceFile}; fn do_check(before: &str, replace_with: &str, reparsed_len: u32) { let (range, before) = extract_range(before); let edit = Indel::replace(range, replace_with.to_owned()); let after = { let mut after = before.clone(); edit.apply(&mut after); after }; let fully_reparsed = SourceFile::parse(&after); let incrementally_reparsed: Parse = { let before = SourceFile::parse(&before); let (green, new_errors, range) = incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap(); assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); Parse::new(green, new_errors) }; assert_eq_text!( &format!("{:#?}", fully_reparsed.tree().syntax()), &format!("{:#?}", incrementally_reparsed.tree().syntax()), ); assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors()); } #[test] // FIXME: some test here actually test token reparsing fn reparse_block_tests() { do_check( r" fn foo() { let x = foo + $0bar$0 } ", "baz", 3, ); do_check( r" fn foo() { let x = foo$0 + bar$0 } ", "baz", 25, ); do_check( r" struct Foo { f: foo$0$0 } ", ",\n g: (),", 14, ); do_check( r" fn foo { let; 1 + 1; $092$0; } ", "62", 31, // FIXME: reparse only int literal here ); do_check( r" mod foo { fn $0$0 } ", "bar", 11, ); do_check( r" trait Foo { type $0Foo$0; } ", "Output", 3, ); do_check( r" impl IntoIterator for Foo { f$0$0 } ", "n next(", 9, ); do_check(r"use a::b::{foo,$0,bar$0};", "baz", 10); do_check( r" pub enum A { Foo$0$0 } ", "\nBar;\n", 11, ); do_check( r" foo!{a, b$0$0 d} ", ", c[3]", 8, ); do_check( r" fn foo() { vec![$0$0] } ", "123", 14, ); do_check( r" extern { fn$0;$0 } ", " exit(code: c_int)", 11, ); } #[test] fn reparse_token_tests() { do_check( r"$0$0 fn foo() -> i32 { 1 } ", "\n\n\n \n", 1, ); do_check( r" fn foo() -> $0$0 {} ", " \n", 2, ); do_check( r" fn $0foo$0() -> i32 { 1 } ", "bar", 3, ); do_check( r" fn foo$0$0foo() { } ", "bar", 6, ); do_check( r" fn foo /* $0$0 */ () {} ", "some comment", 6, ); do_check( r" fn baz $0$0 () {} ", " \t\t\n\n", 2, ); do_check( r" fn baz $0$0 () {} ", " \t\t\n\n", 2, ); do_check( r" /// foo $0$0omment mod { } ", "c", 14, ); do_check( r#" fn -> &str { "Hello$0$0" } "#, ", world", 7, ); do_check( r#" fn -> &str { // "Hello$0$0" "#, ", world", 10, ); do_check( r##" fn -> &str { r#"Hello$0$0"# "##, ", world", 10, ); do_check( r" #[derive($0Copy$0)] enum Foo { } ", "Clone", 4, ); } #[test] fn reparse_str_token_with_error_unchanged() { do_check(r#""$0Unclosed$0 string literal"#, "Still unclosed", 24); } #[test] fn reparse_str_token_with_error_fixed() { do_check(r#""unterinated$0$0"#, "\"", 12); } #[test] fn reparse_block_with_error_in_middle_unchanged() { do_check( r#"fn main() { if {} 32 + 4$0$0 return if {} }"#, "23", 105, ) } #[test] fn reparse_block_with_error_in_middle_fixed() { do_check( r#"fn main() { if {} 32 + 4$0$0 return if {} }"#, ";", 105, ) } }