From a1c187eef3ba08076aedb5154929f7eda8d1b424 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 12 Aug 2020 18:26:51 +0200 Subject: Rename ra_syntax -> syntax --- crates/ra_syntax/src/parsing/reparsing.rs | 455 ------------------------------ 1 file changed, 455 deletions(-) delete mode 100644 crates/ra_syntax/src/parsing/reparsing.rs (limited to 'crates/ra_syntax/src/parsing/reparsing.rs') diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs deleted file mode 100644 index 4149f856a..000000000 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ /dev/null @@ -1,455 +0,0 @@ -//! Implementation of incremental re-parsing. -//! -//! We use two simple strategies for this: -//! - if the edit modifies only a single token (like changing an identifier's -//! letter), we replace only this token. -//! - otherwise, we search for the nearest `{}` block which contains the edit -//! and try to parse only this block. - -use parser::Reparser; -use text_edit::Indel; - -use crate::{ - algo, - parsing::{ - lexer::{lex_single_syntax_kind, tokenize, Token}, - text_token_source::TextTokenSource, - text_tree_sink::TextTreeSink, - }, - syntax_node::{GreenNode, GreenToken, NodeOrToken, SyntaxElement, SyntaxNode}, - SyntaxError, - SyntaxKind::*, - TextRange, TextSize, T, -}; - -pub(crate) fn incremental_reparse( - node: &SyntaxNode, - edit: &Indel, - errors: Vec, -) -> Option<(GreenNode, Vec, TextRange)> { - if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) { - return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); - } - - if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) { - return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); - } - None -} - -fn reparse_token<'node>( - root: &'node SyntaxNode, - edit: &Indel, -) -> Option<(GreenNode, Vec, TextRange)> { - let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); - let prev_token_kind = prev_token.kind(); - match prev_token_kind { - WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { - if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { - // removing a new line may extends previous token - let deleted_range = edit.delete - prev_token.text_range().start(); - if prev_token.text()[deleted_range].contains('\n') { - return None; - } - } - - let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); - let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?; - - if new_token_kind != prev_token_kind - || (new_token_kind == IDENT && is_contextual_kw(&new_text)) - { - return None; - } - - // Check that edited token is not a part of the bigger token. - // E.g. if for source code `bruh"str"` the user removed `ruh`, then - // `b` no longer remains an identifier, but becomes a part of byte string literal - if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { - new_text.push(next_char); - let token_with_next_char = lex_single_syntax_kind(&new_text); - if let Some((_kind, _error)) = token_with_next_char { - return None; - } - new_text.pop(); - } - - let new_token = - GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); - Some(( - prev_token.replace_with(new_token), - new_err.into_iter().collect(), - prev_token.text_range(), - )) - } - _ => None, - } -} - -fn reparse_block<'node>( - root: &'node SyntaxNode, - edit: &Indel, -) -> Option<(GreenNode, Vec, TextRange)> { - let (node, reparser) = find_reparsable_node(root, edit.delete)?; - let text = get_text_after_edit(node.clone().into(), edit); - - let (tokens, new_lexer_errors) = tokenize(&text); - if !is_balanced(&tokens) { - return None; - } - - let mut token_source = TextTokenSource::new(&text, &tokens); - let mut tree_sink = TextTreeSink::new(&text, &tokens); - reparser.parse(&mut token_source, &mut tree_sink); - - let (green, mut new_parser_errors) = tree_sink.finish(); - new_parser_errors.extend(new_lexer_errors); - - Some((node.replace_with(green), new_parser_errors, node.text_range())) -} - -fn get_text_after_edit(element: SyntaxElement, edit: &Indel) -> String { - let edit = Indel::replace(edit.delete - element.text_range().start(), edit.insert.clone()); - - let mut text = match element { - NodeOrToken::Token(token) => token.text().to_string(), - NodeOrToken::Node(node) => node.text().to_string(), - }; - edit.apply(&mut text); - text -} - -fn is_contextual_kw(text: &str) -> bool { - matches!(text, "auto" | "default" | "union") -} - -fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { - let node = algo::find_covering_element(node, range); - - let mut ancestors = match node { - NodeOrToken::Token(it) => it.parent().ancestors(), - NodeOrToken::Node(it) => it.ancestors(), - }; - ancestors.find_map(|node| { - let first_child = node.first_child_or_token().map(|it| it.kind()); - let parent = node.parent().map(|it| it.kind()); - Reparser::for_node(node.kind(), first_child, parent).map(|r| (node, r)) - }) -} - -fn is_balanced(tokens: &[Token]) -> bool { - if tokens.is_empty() - || tokens.first().unwrap().kind != T!['{'] - || tokens.last().unwrap().kind != T!['}'] - { - return false; - } - let mut balance = 0usize; - for t in &tokens[1..tokens.len() - 1] { - match t.kind { - T!['{'] => balance += 1, - T!['}'] => { - balance = match balance.checked_sub(1) { - Some(b) => b, - None => return false, - } - } - _ => (), - } - } - balance == 0 -} - -fn merge_errors( - old_errors: Vec, - new_errors: Vec, - range_before_reparse: TextRange, - edit: &Indel, -) -> Vec { - let mut res = Vec::new(); - - for old_err in old_errors { - let old_err_range = old_err.range(); - if old_err_range.end() <= range_before_reparse.start() { - res.push(old_err); - } else if old_err_range.start() >= range_before_reparse.end() { - let inserted_len = TextSize::of(&edit.insert); - res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len())); - // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug) - } - } - res.extend(new_errors.into_iter().map(|new_err| { - // fighting borrow checker with a variable ;) - let offseted_range = new_err.range() + range_before_reparse.start(); - new_err.with_range(offseted_range) - })); - res -} - -#[cfg(test)] -mod tests { - use test_utils::{assert_eq_text, extract_range}; - - use super::*; - use crate::{AstNode, Parse, SourceFile}; - - fn do_check(before: &str, replace_with: &str, reparsed_len: u32) { - let (range, before) = extract_range(before); - let edit = Indel::replace(range, replace_with.to_owned()); - let after = { - let mut after = before.clone(); - edit.apply(&mut after); - after - }; - - let fully_reparsed = SourceFile::parse(&after); - let incrementally_reparsed: Parse = { - let before = SourceFile::parse(&before); - let (green, new_errors, range) = - incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap(); - assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); - Parse::new(green, new_errors) - }; - - assert_eq_text!( - &format!("{:#?}", fully_reparsed.tree().syntax()), - &format!("{:#?}", incrementally_reparsed.tree().syntax()), - ); - assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors()); - } - - #[test] // FIXME: some test here actually test token reparsing - fn reparse_block_tests() { - do_check( - r" -fn foo() { - let x = foo + <|>bar<|> -} -", - "baz", - 3, - ); - do_check( - r" -fn foo() { - let x = foo<|> + bar<|> -} -", - "baz", - 25, - ); - do_check( - r" -struct Foo { - f: foo<|><|> -} -", - ",\n g: (),", - 14, - ); - do_check( - r" -fn foo { - let; - 1 + 1; - <|>92<|>; -} -", - "62", - 31, // FIXME: reparse only int literal here - ); - do_check( - r" -mod foo { - fn <|><|> -} -", - "bar", - 11, - ); - - do_check( - r" -trait Foo { - type <|>Foo<|>; -} -", - "Output", - 3, - ); - do_check( - r" -impl IntoIterator for Foo { - f<|><|> -} -", - "n next(", - 9, - ); - do_check(r"use a::b::{foo,<|>,bar<|>};", "baz", 10); - do_check( - r" -pub enum A { - Foo<|><|> -} -", - "\nBar;\n", - 11, - ); - do_check( - r" -foo!{a, b<|><|> d} -", - ", c[3]", - 8, - ); - do_check( - r" -fn foo() { - vec![<|><|>] -} -", - "123", - 14, - ); - do_check( - r" -extern { - fn<|>;<|> -} -", - " exit(code: c_int)", - 11, - ); - } - - #[test] - fn reparse_token_tests() { - do_check( - r"<|><|> -fn foo() -> i32 { 1 } -", - "\n\n\n \n", - 1, - ); - do_check( - r" -fn foo() -> <|><|> {} -", - " \n", - 2, - ); - do_check( - r" -fn <|>foo<|>() -> i32 { 1 } -", - "bar", - 3, - ); - do_check( - r" -fn foo<|><|>foo() { } -", - "bar", - 6, - ); - do_check( - r" -fn foo /* <|><|> */ () {} -", - "some comment", - 6, - ); - do_check( - r" -fn baz <|><|> () {} -", - " \t\t\n\n", - 2, - ); - do_check( - r" -fn baz <|><|> () {} -", - " \t\t\n\n", - 2, - ); - do_check( - r" -/// foo <|><|>omment -mod { } -", - "c", - 14, - ); - do_check( - r#" -fn -> &str { "Hello<|><|>" } -"#, - ", world", - 7, - ); - do_check( - r#" -fn -> &str { // "Hello<|><|>" -"#, - ", world", - 10, - ); - do_check( - r##" -fn -> &str { r#"Hello<|><|>"# -"##, - ", world", - 10, - ); - do_check( - r" -#[derive(<|>Copy<|>)] -enum Foo { - -} -", - "Clone", - 4, - ); - } - - #[test] - fn reparse_str_token_with_error_unchanged() { - do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24); - } - - #[test] - fn reparse_str_token_with_error_fixed() { - do_check(r#""unterinated<|><|>"#, "\"", 12); - } - - #[test] - fn reparse_block_with_error_in_middle_unchanged() { - do_check( - r#"fn main() { - if {} - 32 + 4<|><|> - return - if {} - }"#, - "23", - 105, - ) - } - - #[test] - fn reparse_block_with_error_in_middle_fixed() { - do_check( - r#"fn main() { - if {} - 32 + 4<|><|> - return - if {} - }"#, - ";", - 105, - ) - } -} -- cgit v1.2.3