From ad24976da38482948c586bdbc16004273662ff7e Mon Sep 17 00:00:00 2001 From: Veetaha Date: Fri, 24 Jan 2020 03:39:23 +0200 Subject: ra_syntax: changed added diagnostics information returned from tokenize() (implemented with iterators) --- crates/ra_syntax/src/parsing/reparsing.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'crates/ra_syntax/src/parsing/reparsing.rs') diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 06bdda11d..3abc09877 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -46,8 +46,7 @@ fn reparse_token<'node>( WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { if token.kind() == WHITESPACE || token.kind() == COMMENT { // removing a new line may extends previous token - if token.text().to_string()[edit.delete - token.text_range().start()].contains('\n') - { + if token.text()[edit.delete - token.text_range().start()].contains('\n') { return None; } } -- cgit v1.2.3 From ac37a11f04b31f792068a1cb50dbbf5ccd4d982d Mon Sep 17 00:00:00 2001 From: Veetaha Date: Sun, 26 Jan 2020 20:44:49 +0200 Subject: Reimplemented lexer with vectors instead of iterators --- crates/ra_syntax/src/parsing/reparsing.rs | 52 ++++++++++++++++++++----------- 1 file changed, 33 insertions(+), 19 deletions(-) (limited to 'crates/ra_syntax/src/parsing/reparsing.rs') diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 3abc09877..ad1a7c855 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -12,7 +12,7 @@ use ra_text_edit::AtomTextEdit; use crate::{ algo, parsing::{ - lexer::{tokenize, Token}, + lexer::{single_token, tokenize, ParsedTokens, Token}, text_token_source::TextTokenSource, text_tree_sink::TextTreeSink, }, @@ -41,36 +41,42 @@ fn reparse_token<'node>( root: &'node SyntaxNode, edit: &AtomTextEdit, ) -> Option<(GreenNode, TextRange)> { - let token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); - match token.kind() { + let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); + let prev_token_kind = prev_token.kind(); + match prev_token_kind { WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { - if token.kind() == WHITESPACE || token.kind() == COMMENT { + if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { // removing a new line may extends previous token - if token.text()[edit.delete - token.text_range().start()].contains('\n') { + let deleted_range = edit.delete - prev_token.text_range().start(); + if prev_token.text()[deleted_range].contains('\n') { return None; } } - let text = get_text_after_edit(token.clone().into(), &edit); - let lex_tokens = tokenize(&text); - let lex_token = match lex_tokens[..] { - [lex_token] if lex_token.kind == token.kind() => lex_token, - _ => return None, - }; + let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); + let new_token_kind = single_token(&new_text)?.token.kind; - if lex_token.kind == IDENT && is_contextual_kw(&text) { + if new_token_kind != prev_token_kind + || (new_token_kind == IDENT && is_contextual_kw(&new_text)) + { return None; } - if let Some(next_char) = root.text().char_at(token.text_range().end()) { - let tokens_with_next_char = tokenize(&format!("{}{}", text, next_char)); - if tokens_with_next_char.len() == 1 { + // Check that edited token is not a part of the bigger token. + // E.g. if for source code `bruh"str"` the user removed `ruh`, then + // `b` no longer remains an identifier, but becomes a part of byte string literal + if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { + new_text.push(next_char); + let token_with_next_char = single_token(&new_text); + if token_with_next_char.is_some() { return None; } + new_text.pop(); } - let new_token = GreenToken::new(rowan::SyntaxKind(token.kind().into()), text.into()); - Some((token.replace_with(new_token), token.text_range())) + let new_token = + GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); + Some((prev_token.replace_with(new_token), prev_token.text_range())) } _ => None, } @@ -82,12 +88,12 @@ fn reparse_block<'node>( ) -> Option<(GreenNode, Vec, TextRange)> { let (node, reparser) = find_reparsable_node(root, edit.delete)?; let text = get_text_after_edit(node.clone().into(), &edit); - let tokens = tokenize(&text); + let ParsedTokens { tokens, errors } = tokenize(&text); if !is_balanced(&tokens) { return None; } let mut token_source = TextTokenSource::new(&text, &tokens); - let mut tree_sink = TextTreeSink::new(&text, &tokens); + let mut tree_sink = TextTreeSink::new(&text, &tokens, errors); reparser.parse(&mut token_source, &mut tree_sink); let (green, new_errors) = tree_sink.finish(); Some((node.replace_with(green), new_errors, node.text_range())) @@ -96,6 +102,9 @@ fn reparse_block<'node>( fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String { let edit = AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone()); + + // Note: we could move this match to a method or even further: use enum_dispatch crate + // https://crates.io/crates/enum_dispatch let text = match element { NodeOrToken::Token(token) => token.text().to_string(), NodeOrToken::Node(node) => node.text().to_string(), @@ -112,6 +121,9 @@ fn is_contextual_kw(text: &str) -> bool { fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { let node = algo::find_covering_element(node, range); + + // Note: we could move this match to a method or even further: use enum_dispatch crate + // https://crates.io/crates/enum_dispatch let mut ancestors = match node { NodeOrToken::Token(it) => it.parent().ancestors(), NodeOrToken::Node(it) => it.ancestors(), @@ -181,6 +193,8 @@ mod tests { let fully_reparsed = SourceFile::parse(&after); let incrementally_reparsed: Parse = { let f = SourceFile::parse(&before); + // FIXME: it seems this initialization statement is unnecessary (see edit in outer scope) + // Investigate whether it should really be removed. let edit = AtomTextEdit { delete: range, insert: replace_with.to_string() }; let (green, new_errors, range) = incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap(); -- cgit v1.2.3 From 9e7eaa959f9dc368a55f1a80b35651b78b3d0883 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Tue, 28 Jan 2020 07:09:13 +0200 Subject: ra_syntax: refactored the lexer design as per @matklad and @kiljacken PR review --- crates/ra_syntax/src/parsing/reparsing.rs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'crates/ra_syntax/src/parsing/reparsing.rs') diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index ad1a7c855..1f351e9fc 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -12,7 +12,7 @@ use ra_text_edit::AtomTextEdit; use crate::{ algo, parsing::{ - lexer::{single_token, tokenize, ParsedTokens, Token}, + lexer::{lex_single_syntax_kind, tokenize, Token}, text_token_source::TextTokenSource, text_tree_sink::TextTreeSink, }, @@ -54,7 +54,7 @@ fn reparse_token<'node>( } let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); - let new_token_kind = single_token(&new_text)?.token.kind; + let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?; if new_token_kind != prev_token_kind || (new_token_kind == IDENT && is_contextual_kw(&new_text)) @@ -67,8 +67,8 @@ fn reparse_token<'node>( // `b` no longer remains an identifier, but becomes a part of byte string literal if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { new_text.push(next_char); - let token_with_next_char = single_token(&new_text); - if token_with_next_char.is_some() { + let token_with_next_char = lex_single_syntax_kind(&new_text); + if let Some((_kind, _error)) = token_with_next_char { return None; } new_text.pop(); @@ -88,23 +88,26 @@ fn reparse_block<'node>( ) -> Option<(GreenNode, Vec, TextRange)> { let (node, reparser) = find_reparsable_node(root, edit.delete)?; let text = get_text_after_edit(node.clone().into(), &edit); - let ParsedTokens { tokens, errors } = tokenize(&text); + + let (tokens, new_lexer_errors) = tokenize(&text); if !is_balanced(&tokens) { return None; } + let mut token_source = TextTokenSource::new(&text, &tokens); - let mut tree_sink = TextTreeSink::new(&text, &tokens, errors); + let mut tree_sink = TextTreeSink::new(&text, &tokens); reparser.parse(&mut token_source, &mut tree_sink); - let (green, new_errors) = tree_sink.finish(); - Some((node.replace_with(green), new_errors, node.text_range())) + + let (green, mut new_parser_errors) = tree_sink.finish(); + new_parser_errors.extend(new_lexer_errors); + + Some((node.replace_with(green), new_parser_errors, node.text_range())) } fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String { let edit = AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone()); - // Note: we could move this match to a method or even further: use enum_dispatch crate - // https://crates.io/crates/enum_dispatch let text = match element { NodeOrToken::Token(token) => token.text().to_string(), NodeOrToken::Node(node) => node.text().to_string(), @@ -122,8 +125,6 @@ fn is_contextual_kw(text: &str) -> bool { fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { let node = algo::find_covering_element(node, range); - // Note: we could move this match to a method or even further: use enum_dispatch crate - // https://crates.io/crates/enum_dispatch let mut ancestors = match node { NodeOrToken::Token(it) => it.parent().ancestors(), NodeOrToken::Node(it) => it.ancestors(), -- cgit v1.2.3 From c3117eea31392f8f63f1352a127f6b77139c375b Mon Sep 17 00:00:00 2001 From: Veetaha Date: Tue, 28 Jan 2020 07:23:26 +0200 Subject: ra_syntax: removed unnecessary init statement from reparsing tests --- crates/ra_syntax/src/parsing/reparsing.rs | 3 --- 1 file changed, 3 deletions(-) (limited to 'crates/ra_syntax/src/parsing/reparsing.rs') diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 1f351e9fc..a86da0675 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -194,9 +194,6 @@ mod tests { let fully_reparsed = SourceFile::parse(&after); let incrementally_reparsed: Parse = { let f = SourceFile::parse(&before); - // FIXME: it seems this initialization statement is unnecessary (see edit in outer scope) - // Investigate whether it should really be removed. - let edit = AtomTextEdit { delete: range, insert: replace_with.to_string() }; let (green, new_errors, range) = incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap(); assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); -- cgit v1.2.3