From 9fdf984958901a6bf16772c2c88b3163f044b390 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Thu, 6 Feb 2020 02:33:18 +0200 Subject: ra_syntax: reshape SyntaxError for the sake of removing redundancy --- crates/ra_syntax/src/parsing/lexer.rs | 92 +++++++++---------------------- crates/ra_syntax/src/parsing/reparsing.rs | 27 +++++---- 2 files changed, 43 insertions(+), 76 deletions(-) (limited to 'crates/ra_syntax/src/parsing') diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index f889e6a1d..f2684c852 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -2,7 +2,7 @@ //! It is just a bridge to `rustc_lexer`. use crate::{ - SyntaxError, SyntaxErrorKind, + SyntaxError, SyntaxKind::{self, *}, TextRange, TextUnit, }; @@ -41,13 +41,13 @@ pub fn tokenize(text: &str) -> (Vec, Vec) { let token_len = TextUnit::from_usize(rustc_token.len); let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); - let (syntax_kind, error) = + let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); tokens.push(Token { kind: syntax_kind, len: token_len }); - if let Some(error) = error { - errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)); + if let Some(err_message) = err_message { + errors.push(SyntaxError::new(err_message, token_range)); } offset += rustc_token.len; @@ -94,61 +94,21 @@ fn lex_first_token(text: &str) -> Option<(Token, Option)> { } let rustc_token = rustc_lexer::first_token(text); - let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); + let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; - let error = error.map(|error| { - SyntaxError::new( - SyntaxErrorKind::TokenizeError(error), - TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)), - ) + let optional_error = err_message.map(|err_message| { + SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text))) }); - Some((token, error)) -} - -// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice: -// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067 - -/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant. -/// It describes all the types of errors that may happen during the tokenization -/// of Rust source. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub enum TokenizeError { - /// Base prefix was provided, but there were no digits - /// after it, e.g. `0x`, `0b`. - EmptyInt, - /// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`, - EmptyExponent, - - /// Block comment lacks trailing delimiter `*/` - UnterminatedBlockComment, - /// Character literal lacks trailing delimiter `'` - UnterminatedChar, - /// Characterish byte literal lacks trailing delimiter `'` - UnterminatedByte, - /// String literal lacks trailing delimiter `"` - UnterminatedString, - /// Byte string literal lacks trailing delimiter `"` - UnterminatedByteString, - /// Raw literal lacks trailing delimiter e.g. `"##` - UnterminatedRawString, - /// Raw byte string literal lacks trailing delimiter e.g. `"##` - UnterminatedRawByteString, - - /// Raw string lacks a quote after the pound characters e.g. `r###` - UnstartedRawString, - /// Raw byte string lacks a quote after the pound characters e.g. `br###` - UnstartedRawByteString, - - /// Lifetime starts with a number e.g. `'4ever` - LifetimeStartsWithNumber, + Some((token, optional_error)) } +/// Returns `SyntaxKind` and an optional tokenize error message. fn rustc_token_kind_to_syntax_kind( rustc_token_kind: &rustc_lexer::TokenKind, token_text: &str, -) -> (SyntaxKind, Option) { +) -> (SyntaxKind, Option<&'static str>) { // A note on an intended tradeoff: // We drop some useful infromation here (see patterns with double dots `..`) // Storing that info in `SyntaxKind` is not possible due to its layout requirements of @@ -156,14 +116,15 @@ fn rustc_token_kind_to_syntax_kind( let syntax_kind = { use rustc_lexer::TokenKind as TK; - use TokenizeError as TE; - match rustc_token_kind { TK::LineComment => COMMENT, TK::BlockComment { terminated: true } => COMMENT, TK::BlockComment { terminated: false } => { - return (COMMENT, Some(TE::UnterminatedBlockComment)); + return ( + COMMENT, + Some("Missing trailing `*/` symbols to terminate the block comment"), + ); } TK::Whitespace => WHITESPACE, @@ -181,7 +142,7 @@ fn rustc_token_kind_to_syntax_kind( TK::Lifetime { starts_with_number: false } => LIFETIME, TK::Lifetime { starts_with_number: true } => { - return (LIFETIME, Some(TE::LifetimeStartsWithNumber)) + return (LIFETIME, Some("Lifetime name cannot start with a number")) } TK::Semi => SEMI, @@ -217,57 +178,56 @@ fn rustc_token_kind_to_syntax_kind( return (syntax_kind, None); - fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option) { + fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) { use rustc_lexer::LiteralKind as LK; - use TokenizeError as TE; #[rustfmt::skip] let syntax_kind = match *kind { LK::Int { empty_int: false, .. } => INT_NUMBER, LK::Int { empty_int: true, .. } => { - return (INT_NUMBER, Some(TE::EmptyInt)) + return (INT_NUMBER, Some("Missing digits after the integer base prefix")) } LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, LK::Float { empty_exponent: true, .. } => { - return (FLOAT_NUMBER, Some(TE::EmptyExponent)) + return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol")) } LK::Char { terminated: true } => CHAR, LK::Char { terminated: false } => { - return (CHAR, Some(TE::UnterminatedChar)) + return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal")) } LK::Byte { terminated: true } => BYTE, LK::Byte { terminated: false } => { - return (BYTE, Some(TE::UnterminatedByte)) + return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal")) } LK::Str { terminated: true } => STRING, LK::Str { terminated: false } => { - return (STRING, Some(TE::UnterminatedString)) + return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal")) } LK::ByteStr { terminated: true } => BYTE_STRING, LK::ByteStr { terminated: false } => { - return (BYTE_STRING, Some(TE::UnterminatedByteString)) + return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal")) } LK::RawStr { started: true, terminated: true, .. } => RAW_STRING, LK::RawStr { started: true, terminated: false, .. } => { - return (RAW_STRING, Some(TE::UnterminatedRawString)) + return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal")) } LK::RawStr { started: false, .. } => { - return (RAW_STRING, Some(TE::UnstartedRawString)) + return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal")) } LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING, LK::RawByteStr { started: true, terminated: false, .. } => { - return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString)) + return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal")) } LK::RawByteStr { started: false, .. } => { - return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString)) + return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal")) } }; diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index a86da0675..41a355ec7 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -87,7 +87,7 @@ fn reparse_block<'node>( edit: &AtomTextEdit, ) -> Option<(GreenNode, Vec, TextRange)> { let (node, reparser) = find_reparsable_node(root, edit.delete)?; - let text = get_text_after_edit(node.clone().into(), &edit); + let text = get_text_after_edit(node.clone().into(), edit); let (tokens, new_lexer_errors) = tokenize(&text); if !is_balanced(&tokens) { @@ -162,20 +162,27 @@ fn is_balanced(tokens: &[Token]) -> bool { fn merge_errors( old_errors: Vec, new_errors: Vec, - old_range: TextRange, + range_before_reparse: TextRange, edit: &AtomTextEdit, ) -> Vec { let mut res = Vec::new(); - for e in old_errors { - if e.offset() <= old_range.start() { - res.push(e) - } else if e.offset() >= old_range.end() { - res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len())); + + for old_err in old_errors { + let old_err_range = *old_err.range(); + // FIXME: make sure that .start() was here previously by a mistake + if old_err_range.end() <= range_before_reparse.start() { + res.push(old_err); + } else if old_err_range.start() >= range_before_reparse.end() { + let inserted_len = TextUnit::of_str(&edit.insert); + res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len())); + // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug) } } - for e in new_errors { - res.push(e.add_offset(old_range.start(), 0.into())); - } + res.extend(new_errors.into_iter().map(|new_err| { + // fighting borrow checker with a variable ;) + let offseted_range = *new_err.range() + range_before_reparse.start(); + new_err.with_range(offseted_range) + })); res } -- cgit v1.2.3 From e00922d113e5f998893419dedae511043890f9fa Mon Sep 17 00:00:00 2001 From: Veetaha Date: Mon, 10 Feb 2020 02:08:49 +0200 Subject: ra_syntax: SyntaxError::range() now returns by value --- crates/ra_syntax/src/parsing/reparsing.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'crates/ra_syntax/src/parsing') diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 41a355ec7..4faeeab45 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -180,7 +180,7 @@ fn merge_errors( } res.extend(new_errors.into_iter().map(|new_err| { // fighting borrow checker with a variable ;) - let offseted_range = *new_err.range() + range_before_reparse.start(); + let offseted_range = new_err.range() + range_before_reparse.start(); new_err.with_range(offseted_range) })); res -- cgit v1.2.3 From b510e77fbeca7d4691ddfdbb540d0975bdeef4b3 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Mon, 10 Feb 2020 02:10:56 +0200 Subject: ra_syntax: followup fix for making SyntaxError::range() to return by value --- crates/ra_syntax/src/parsing/reparsing.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'crates/ra_syntax/src/parsing') diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 4faeeab45..57453e220 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -168,7 +168,7 @@ fn merge_errors( let mut res = Vec::new(); for old_err in old_errors { - let old_err_range = *old_err.range(); + let old_err_range = old_err.range(); // FIXME: make sure that .start() was here previously by a mistake if old_err_range.end() <= range_before_reparse.start() { res.push(old_err); -- cgit v1.2.3 From 053ccf4121797e4e559e3225d46d3f23cb1ad70b Mon Sep 17 00:00:00 2001 From: Veetaha Date: Tue, 18 Feb 2020 02:11:16 +0200 Subject: ra_syntax: fix reparsing merging errors, also now reparse_token() reports errors --- crates/ra_syntax/src/parsing/reparsing.rs | 57 +++++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 7 deletions(-) (limited to 'crates/ra_syntax/src/parsing') diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 57453e220..aad70d015 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -27,8 +27,8 @@ pub(crate) fn incremental_reparse( edit: &AtomTextEdit, errors: Vec, ) -> Option<(GreenNode, Vec, TextRange)> { - if let Some((green, old_range)) = reparse_token(node, &edit) { - return Some((green, merge_errors(errors, Vec::new(), old_range, edit), old_range)); + if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) { + return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range)); } if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) { @@ -40,7 +40,7 @@ pub(crate) fn incremental_reparse( fn reparse_token<'node>( root: &'node SyntaxNode, edit: &AtomTextEdit, -) -> Option<(GreenNode, TextRange)> { +) -> Option<(GreenNode, Vec, TextRange)> { let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); let prev_token_kind = prev_token.kind(); match prev_token_kind { @@ -54,7 +54,7 @@ fn reparse_token<'node>( } let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); - let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?; + let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?; if new_token_kind != prev_token_kind || (new_token_kind == IDENT && is_contextual_kw(&new_text)) @@ -76,7 +76,11 @@ fn reparse_token<'node>( let new_token = GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); - Some((prev_token.replace_with(new_token), prev_token.text_range())) + Some(( + prev_token.replace_with(new_token), + new_err.into_iter().collect(), + prev_token.text_range(), + )) } _ => None, } @@ -200,9 +204,9 @@ mod tests { let fully_reparsed = SourceFile::parse(&after); let incrementally_reparsed: Parse = { - let f = SourceFile::parse(&before); + let before = SourceFile::parse(&before); let (green, new_errors, range) = - incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap(); + incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap(); assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); Parse::new(green, new_errors) }; @@ -211,6 +215,7 @@ mod tests { &format!("{:#?}", fully_reparsed.tree().syntax()), &format!("{:#?}", incrementally_reparsed.tree().syntax()), ); + assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors()); } #[test] // FIXME: some test here actually test token reparsing @@ -409,4 +414,42 @@ enum Foo { 4, ); } + + #[test] + fn reparse_str_token_with_error_unchanged() { + do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24); + } + + #[test] + fn reparse_str_token_with_error_fixed() { + do_check(r#""unterinated<|><|>"#, "\"", 12); + } + + #[test] + fn reparse_block_with_error_in_middle_unchanged() { + do_check( + r#"fn main() { + if {} + 32 + 4<|><|> + return + if {} + }"#, + "23", + 105, + ) + } + + #[test] + fn reparse_block_with_error_in_middle_fixed() { + do_check( + r#"fn main() { + if {} + 32 + 4<|><|> + return + if {} + }"#, + ";", + 105, + ) + } } -- cgit v1.2.3