From a1c187eef3ba08076aedb5154929f7eda8d1b424 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 12 Aug 2020 18:26:51 +0200 Subject: Rename ra_syntax -> syntax --- crates/syntax/src/parsing/reparsing.rs | 455 +++++++++++++++++++++++++++++++++ 1 file changed, 455 insertions(+) create mode 100644 crates/syntax/src/parsing/reparsing.rs (limited to 'crates/syntax/src/parsing/reparsing.rs') diff --git a/crates/syntax/src/parsing/reparsing.rs b/crates/syntax/src/parsing/reparsing.rs new file mode 100644 index 000000000..4149f856a --- /dev/null +++ b/crates/syntax/src/parsing/reparsing.rs @@ -0,0 +1,455 @@ +//! Implementation of incremental re-parsing. +//! +//! We use two simple strategies for this: +//! - if the edit modifies only a single token (like changing an identifier's +//! letter), we replace only this token. +//! - otherwise, we search for the nearest `{}` block which contains the edit +//! and try to parse only this block. + +use parser::Reparser; +use text_edit::Indel; + +use crate::{ + algo, + parsing::{ + lexer::{lex_single_syntax_kind, tokenize, Token}, + text_token_source::TextTokenSource, + text_tree_sink::TextTreeSink, + }, + syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode}, + SyntaxError, + SyntaxKind::*, + TextRange, TextSize, T, +}; + +pub(crate) fn incremental_reparse( + node: &SyntaxNode, + edit: &Indel, + errors: Vec, +) -> Option<(GreenNode, Vec, TextRange)> { + if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) { + return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); + } + + if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) { + return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); + } + None +} + +fn reparse_token<'node>( + root: &'node SyntaxNode, + edit: &Indel, +) -> Option<(GreenNode, Vec, TextRange)> { + let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); + let prev_token_kind = prev_token.kind(); + match prev_token_kind { + WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { + if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { + // removing a new line may extends previous token + let deleted_range = edit.delete - prev_token.text_range().start(); + if prev_token.text()[deleted_range].contains('\n') { + return None; + } + } + + let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); + let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?; + + if new_token_kind != prev_token_kind + || (new_token_kind == IDENT && is_contextual_kw(&new_text)) + { + return None; + } + + // Check that edited token is not a part of the bigger token. + // E.g. if for source code `bruh"str"` the user removed `ruh`, then + // `b` no longer remains an identifier, but becomes a part of byte string literal + if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { + new_text.push(next_char); + let token_with_next_char = lex_single_syntax_kind(&new_text); + if let Some((_kind, _error)) = token_with_next_char { + return None; + } + new_text.pop(); + } + + let new_token = + GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); + Some(( + prev_token.replace_with(new_token), + new_err.into_iter().collect(), + prev_token.text_range(), + )) + } + _ => None, + } +} + +fn reparse_block<'node>( + root: &'node SyntaxNode, + edit: &Indel, +) -> Option<(GreenNode, Vec, TextRange)> { + let (node, reparser) = find_reparsable_node(root, edit.delete)?; + let text = get_text_after_edit(node.clone().into(), edit); + + let (tokens, new_lexer_errors) = tokenize(&text); + if !is_balanced(&tokens) { + return None; + } + + let mut token_source = TextTokenSource::new(&text, &tokens); + let mut tree_sink = TextTreeSink::new(&text, &tokens); + reparser.parse(&mut token_source, &mut tree_sink); + + let (green, mut new_parser_errors) = tree_sink.finish(); + new_parser_errors.extend(new_lexer_errors); + + Some((node.replace_with(green), new_parser_errors, node.text_range())) +} + +fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String { + let edit = Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone()); + + let mut text = match element { + NodeOrToken::Token(token) => token.text().to_string(), + NodeOrToken::Node(node) => node.text().to_string(), + }; + edit.apply(&mut text); + text +} + +fn is_contextual_kw(text: &str) -> bool { + matches!(text, "auto" | "default" | "union") +} + +fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { + let node = algo::find_covering_element(node, range); + + let mut ancestors = match node { + NodeOrToken::Token(it) => it.parent().ancestors(), + NodeOrToken::Node(it) => it.ancestors(), + }; + ancestors.find_map(|node| { + let first_child = node.first_child_or_token().map(|it| it.kind()); + let parent = node.parent().map(|it| it.kind()); + Reparser::for_node(node.kind(), first_child, parent).map(|r| (node, r)) + }) +} + +fn is_balanced(tokens: &[Token]) -> bool { + if tokens.is_empty() + || tokens.first().unwrap().kind != T!['{'] + || tokens.last().unwrap().kind != T!['}'] + { + return false; + } + let mut balance = 0usize; + for t in &tokens[1..tokens.len() - 1] { + match t.kind { + T!['{'] => balance += 1, + T!['}'] => { + balance = match balance.checked_sub(1) { + Some(b) => b, + None => return false, + } + } + _ => (), + } + } + balance == 0 +} + +fn merge_errors( + old_errors: Vec, + new_errors: Vec, + range_before_reparse: TextRange, + edit: &Indel, +) -> Vec { + let mut res = Vec::new(); + + for old_err in old_errors { + let old_err_range = old_err.range(); + if old_err_range.end() <= range_before_reparse.start() { + res.push(old_err); + } else if old_err_range.start() >= range_before_reparse.end() { + let inserted_len = TextSize::of(&edit.insert); + res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len())); + // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug) + } + } + res.extend(new_errors.into_iter().map(|new_err| { + // fighting borrow checker with a variable ;) + let offseted_range = new_err.range() + range_before_reparse.start(); + new_err.with_range(offseted_range) + })); + res +} + +#[cfg(test)] +mod tests { + use test_utils::{assert_eq_text, extract_range}; + + use super::*; + use crate::{AstNode, Parse, SourceFile}; + + fn do_check(before: &str, replace_with: &str, reparsed_len: u32) { + let (range, before) = extract_range(before); + let edit = Indel::replace(range, replace_with.to_owned()); + let after = { + let mut after = before.clone(); + edit.apply(&mut after); + after + }; + + let fully_reparsed = SourceFile::parse(&after); + let incrementally_reparsed: Parse = { + let before = SourceFile::parse(&before); + let (green, new_errors, range) = + incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap(); + assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); + Parse::new(green, new_errors) + }; + + assert_eq_text!( + &format!("{:#?}", fully_reparsed.tree().syntax()), + &format!("{:#?}", incrementally_reparsed.tree().syntax()), + ); + assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors()); + } + + #[test] // FIXME: some test here actually test token reparsing + fn reparse_block_tests() { + do_check( + r" +fn foo() { + let x = foo + <|>bar<|> +} +", + "baz", + 3, + ); + do_check( + r" +fn foo() { + let x = foo<|> + bar<|> +} +", + "baz", + 25, + ); + do_check( + r" +struct Foo { + f: foo<|><|> +} +", + ",\n g: (),", + 14, + ); + do_check( + r" +fn foo { + let; + 1 + 1; + <|>92<|>; +} +", + "62", + 31, // FIXME: reparse only int literal here + ); + do_check( + r" +mod foo { + fn <|><|> +} +", + "bar", + 11, + ); + + do_check( + r" +trait Foo { + type <|>Foo<|>; +} +", + "Output", + 3, + ); + do_check( + r" +impl IntoIterator for Foo { + f<|><|> +} +", + "n next(", + 9, + ); + do_check(r"use a::b::{foo,<|>,bar<|>};", "baz", 10); + do_check( + r" +pub enum A { + Foo<|><|> +} +", + "\nBar;\n", + 11, + ); + do_check( + r" +foo!{a, b<|><|> d} +", + ", c[3]", + 8, + ); + do_check( + r" +fn foo() { + vec![<|><|>] +} +", + "123", + 14, + ); + do_check( + r" +extern { + fn<|>;<|> +} +", + " exit(code: c_int)", + 11, + ); + } + + #[test] + fn reparse_token_tests() { + do_check( + r"<|><|> +fn foo() -> i32 { 1 } +", + "\n\n\n \n", + 1, + ); + do_check( + r" +fn foo() -> <|><|> {} +", + " \n", + 2, + ); + do_check( + r" +fn <|>foo<|>() -> i32 { 1 } +", + "bar", + 3, + ); + do_check( + r" +fn foo<|><|>foo() { } +", + "bar", + 6, + ); + do_check( + r" +fn foo /* <|><|> */ () {} +", + "some comment", + 6, + ); + do_check( + r" +fn baz <|><|> () {} +", + " \t\t\n\n", + 2, + ); + do_check( + r" +fn baz <|><|> () {} +", + " \t\t\n\n", + 2, + ); + do_check( + r" +/// foo <|><|>omment +mod { } +", + "c", + 14, + ); + do_check( + r#" +fn -> &str { "Hello<|><|>" } +"#, + ", world", + 7, + ); + do_check( + r#" +fn -> &str { // "Hello<|><|>" +"#, + ", world", + 10, + ); + do_check( + r##" +fn -> &str { r#"Hello<|><|>"# +"##, + ", world", + 10, + ); + do_check( + r" +#[derive(<|>Copy<|>)] +enum Foo { + +} +", + "Clone", + 4, + ); + } + + #[test] + fn reparse_str_token_with_error_unchanged() { + do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24); + } + + #[test] + fn reparse_str_token_with_error_fixed() { + do_check(r#""unterinated<|><|>"#, "\"", 12); + } + + #[test] + fn reparse_block_with_error_in_middle_unchanged() { + do_check( + r#"fn main() { + if {} + 32 + 4<|><|> + return + if {} + }"#, + "23", + 105, + ) + } + + #[test] + fn reparse_block_with_error_in_middle_fixed() { + do_check( + r#"fn main() { + if {} + 32 + 4<|><|> + return + if {} + }"#, + ";", + 105, + ) + } +} -- cgit v1.2.3