From 9fdf984958901a6bf16772c2c88b3163f044b390 Mon Sep 17 00:00:00 2001
From: Veetaha <gerzoh1@gmail.com>
Date: Thu, 6 Feb 2020 02:33:18 +0200
Subject: ra_syntax: reshape SyntaxError for the sake of removing redundancy

---
 crates/ra_syntax/src/parsing/lexer.rs     | 92 +++++++++----------------------
 crates/ra_syntax/src/parsing/reparsing.rs | 27 +++++----
 2 files changed, 43 insertions(+), 76 deletions(-)

(limited to 'crates/ra_syntax/src/parsing')
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index f889e6a1d..f2684c852 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -2,7 +2,7 @@
 //! It is just a bridge to `rustc_lexer`.
 
 use crate::{
-    SyntaxError, SyntaxErrorKind,
+    SyntaxError,
     SyntaxKind::{self, *},
     TextRange, TextUnit,
 };
@@ -41,13 +41,13 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
         let token_len = TextUnit::from_usize(rustc_token.len);
         let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);
 
-        let (syntax_kind, error) =
+        let (syntax_kind, err_message) =
             rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
 
         tokens.push(Token { kind: syntax_kind, len: token_len });
 
-        if let Some(error) = error {
-            errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range));
+        if let Some(err_message) = err_message {
+            errors.push(SyntaxError::new(err_message, token_range));
         }
 
         offset += rustc_token.len;
@@ -94,61 +94,21 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
     }
 
     let rustc_token = rustc_lexer::first_token(text);
-    let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
+    let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
 
     let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };
-    let error = error.map(|error| {
-        SyntaxError::new(
-            SyntaxErrorKind::TokenizeError(error),
-            TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)),
-        )
+    let optional_error = err_message.map(|err_message| {
+        SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text)))
     });
 
-    Some((token, error))
-}
-
-// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice:
-// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067
-
-/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
-/// It describes all the types of errors that may happen during the tokenization
-/// of Rust source.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub enum TokenizeError {
-    /// Base prefix was provided, but there were no digits
-    /// after it, e.g. `0x`, `0b`.
-    EmptyInt,
-    /// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`,
-    EmptyExponent,
-
-    /// Block comment lacks trailing delimiter `*/`
-    UnterminatedBlockComment,
-    /// Character literal lacks trailing delimiter `'`
-    UnterminatedChar,
-    /// Characterish byte literal lacks trailing delimiter `'`
-    UnterminatedByte,
-    /// String literal lacks trailing delimiter `"`
-    UnterminatedString,
-    /// Byte string literal lacks trailing delimiter `"`
-    UnterminatedByteString,
-    /// Raw literal lacks trailing delimiter e.g. `"##`
-    UnterminatedRawString,
-    /// Raw byte string literal lacks trailing delimiter e.g. `"##`
-    UnterminatedRawByteString,
-
-    /// Raw string lacks a quote after the pound characters e.g. `r###`
-    UnstartedRawString,
-    /// Raw byte string lacks a quote after the pound characters e.g. `br###`
-    UnstartedRawByteString,
-
-    /// Lifetime starts with a number e.g. `'4ever`
-    LifetimeStartsWithNumber,
+    Some((token, optional_error))
 }
 
+/// Returns `SyntaxKind` and an optional tokenize error message.
 fn rustc_token_kind_to_syntax_kind(
     rustc_token_kind: &rustc_lexer::TokenKind,
     token_text: &str,
-) -> (SyntaxKind, Option<TokenizeError>) {
+) -> (SyntaxKind, Option<&'static str>) {
     // A note on an intended tradeoff:
     // We drop some useful infromation here (see patterns with double dots `..`)
     // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
@@ -156,14 +116,15 @@ fn rustc_token_kind_to_syntax_kind(
 
     let syntax_kind = {
         use rustc_lexer::TokenKind as TK;
-        use TokenizeError as TE;
-
         match rustc_token_kind {
             TK::LineComment => COMMENT,
 
             TK::BlockComment { terminated: true } => COMMENT,
             TK::BlockComment { terminated: false } => {
-                return (COMMENT, Some(TE::UnterminatedBlockComment));
+                return (
+                    COMMENT,
+                    Some("Missing trailing `*/` symbols to terminate the block comment"),
+                );
             }
 
             TK::Whitespace => WHITESPACE,
@@ -181,7 +142,7 @@ fn rustc_token_kind_to_syntax_kind(
 
             TK::Lifetime { starts_with_number: false } => LIFETIME,
             TK::Lifetime { starts_with_number: true } => {
-                return (LIFETIME, Some(TE::LifetimeStartsWithNumber))
+                return (LIFETIME, Some("Lifetime name cannot start with a number"))
             }
 
             TK::Semi => SEMI,
@@ -217,57 +178,56 @@ fn rustc_token_kind_to_syntax_kind(
 
     return (syntax_kind, None);
 
-    fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) {
+    fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
         use rustc_lexer::LiteralKind as LK;
-        use TokenizeError as TE;
 
         #[rustfmt::skip]
         let syntax_kind = match *kind {
             LK::Int { empty_int: false, .. } => INT_NUMBER,
             LK::Int { empty_int: true, .. } => {
-                return (INT_NUMBER, Some(TE::EmptyInt))
+                return (INT_NUMBER, Some("Missing digits after the integer base prefix"))
             }
 
             LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
             LK::Float { empty_exponent: true, .. } => {
-                return (FLOAT_NUMBER, Some(TE::EmptyExponent))
+                return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol"))
             }
 
             LK::Char { terminated: true } => CHAR,
             LK::Char { terminated: false } => {
-                return (CHAR, Some(TE::UnterminatedChar))
+                return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal"))
             }
 
             LK::Byte { terminated: true } => BYTE,
             LK::Byte { terminated: false } => {
-                return (BYTE, Some(TE::UnterminatedByte))
+                return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal"))
             }
 
             LK::Str { terminated: true } => STRING,
             LK::Str { terminated: false } => {
-                return (STRING, Some(TE::UnterminatedString))
+                return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal"))
             }
 
 
             LK::ByteStr { terminated: true } => BYTE_STRING,
             LK::ByteStr { terminated: false } => {
-                return (BYTE_STRING, Some(TE::UnterminatedByteString))
+                return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal"))
             }
 
             LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,
             LK::RawStr { started: true, terminated: false, .. } => {
-                return (RAW_STRING, Some(TE::UnterminatedRawString))
+                return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal"))
             }
             LK::RawStr { started: false, .. } => {
-                return (RAW_STRING, Some(TE::UnstartedRawString))
+                return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal"))
             }
 
             LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,
             LK::RawByteStr { started: true, terminated: false, .. } => {
-                return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString))
+                return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"))
             }
             LK::RawByteStr { started: false, .. } => {
-                return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString))
+                return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal"))
             }
         };
 
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index a86da0675..41a355ec7 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -87,7 +87,7 @@ fn reparse_block<'node>(
     edit: &AtomTextEdit,
 ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
     let (node, reparser) = find_reparsable_node(root, edit.delete)?;
-    let text = get_text_after_edit(node.clone().into(), &edit);
+    let text = get_text_after_edit(node.clone().into(), edit);
 
     let (tokens, new_lexer_errors) = tokenize(&text);
     if !is_balanced(&tokens) {
@@ -162,20 +162,27 @@ fn is_balanced(tokens: &[Token]) -> bool {
 fn merge_errors(
     old_errors: Vec<SyntaxError>,
     new_errors: Vec<SyntaxError>,
-    old_range: TextRange,
+    range_before_reparse: TextRange,
     edit: &AtomTextEdit,
 ) -> Vec<SyntaxError> {
     let mut res = Vec::new();
-    for e in old_errors {
-        if e.offset() <= old_range.start() {
-            res.push(e)
-        } else if e.offset() >= old_range.end() {
-            res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len()));
+
+    for old_err in old_errors {
+        let old_err_range = *old_err.range();
+        // FIXME: make sure that .start() was here previously by a mistake
+        if old_err_range.end() <= range_before_reparse.start() {
+            res.push(old_err);
+        } else if old_err_range.start() >= range_before_reparse.end() {
+            let inserted_len = TextUnit::of_str(&edit.insert);
+            res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
+            // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
         }
     }
-    for e in new_errors {
-        res.push(e.add_offset(old_range.start(), 0.into()));
-    }
+    res.extend(new_errors.into_iter().map(|new_err| {
+        // fighting borrow checker with a variable ;)
+        let offseted_range = *new_err.range() + range_before_reparse.start();
+        new_err.with_range(offseted_range)
+    }));
     res
 }
 
-- 
cgit v1.2.3


From e00922d113e5f998893419dedae511043890f9fa Mon Sep 17 00:00:00 2001
From: Veetaha <gerzoh1@gmail.com>
Date: Mon, 10 Feb 2020 02:08:49 +0200
Subject: ra_syntax: SyntaxError::range() now returns by value

---
 crates/ra_syntax/src/parsing/reparsing.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'crates/ra_syntax/src/parsing')

diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index 41a355ec7..4faeeab45 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -180,7 +180,7 @@ fn merge_errors(
     }
     res.extend(new_errors.into_iter().map(|new_err| {
         // fighting borrow checker with a variable ;)
-        let offseted_range = *new_err.range() + range_before_reparse.start();
+        let offseted_range = new_err.range() + range_before_reparse.start();
         new_err.with_range(offseted_range)
     }));
     res
-- 
cgit v1.2.3


From b510e77fbeca7d4691ddfdbb540d0975bdeef4b3 Mon Sep 17 00:00:00 2001
From: Veetaha <gerzoh1@gmail.com>
Date: Mon, 10 Feb 2020 02:10:56 +0200
Subject: ra_syntax: followup fix for making SyntaxError::range() to return by
 value

---
 crates/ra_syntax/src/parsing/reparsing.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'crates/ra_syntax/src/parsing')

diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index 4faeeab45..57453e220 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -168,7 +168,7 @@ fn merge_errors(
     let mut res = Vec::new();
 
     for old_err in old_errors {
-        let old_err_range = *old_err.range();
+        let old_err_range = old_err.range();
         // FIXME: make sure that .start() was here previously by a mistake
         if old_err_range.end() <= range_before_reparse.start() {
             res.push(old_err);
-- 
cgit v1.2.3


From 053ccf4121797e4e559e3225d46d3f23cb1ad70b Mon Sep 17 00:00:00 2001
From: Veetaha <gerzoh1@gmail.com>
Date: Tue, 18 Feb 2020 02:11:16 +0200
Subject: ra_syntax: fix reparsing merging errors, also now reparse_token()
 reports errors

---
 crates/ra_syntax/src/parsing/reparsing.rs | 57 +++++++++++++++++++++++++++----
 1 file changed, 50 insertions(+), 7 deletions(-)

(limited to 'crates/ra_syntax/src/parsing')

diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index 57453e220..aad70d015 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -27,8 +27,8 @@ pub(crate) fn incremental_reparse(
     edit: &AtomTextEdit,
     errors: Vec<SyntaxError>,
 ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
-    if let Some((green, old_range)) = reparse_token(node, &edit) {
-        return Some((green, merge_errors(errors, Vec::new(), old_range, edit), old_range));
+    if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) {
+        return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
     }
 
     if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) {
@@ -40,7 +40,7 @@ pub(crate) fn incremental_reparse(
 fn reparse_token<'node>(
     root: &'node SyntaxNode,
     edit: &AtomTextEdit,
-) -> Option<(GreenNode, TextRange)> {
+) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
     let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();
     let prev_token_kind = prev_token.kind();
     match prev_token_kind {
@@ -54,7 +54,7 @@ fn reparse_token<'node>(
             }
 
             let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
-            let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?;
+            let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;
 
             if new_token_kind != prev_token_kind
                 || (new_token_kind == IDENT && is_contextual_kw(&new_text))
@@ -76,7 +76,11 @@ fn reparse_token<'node>(
 
             let new_token =
                 GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());
-            Some((prev_token.replace_with(new_token), prev_token.text_range()))
+            Some((
+                prev_token.replace_with(new_token),
+                new_err.into_iter().collect(),
+                prev_token.text_range(),
+            ))
         }
         _ => None,
     }
@@ -200,9 +204,9 @@ mod tests {
 
         let fully_reparsed = SourceFile::parse(&after);
         let incrementally_reparsed: Parse<SourceFile> = {
-            let f = SourceFile::parse(&before);
+            let before = SourceFile::parse(&before);
             let (green, new_errors, range) =
-                incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap();
+                incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
             assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
             Parse::new(green, new_errors)
         };
@@ -211,6 +215,7 @@ mod tests {
             &format!("{:#?}", fully_reparsed.tree().syntax()),
             &format!("{:#?}", incrementally_reparsed.tree().syntax()),
         );
+        assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
     }
 
     #[test] // FIXME: some test here actually test token reparsing
@@ -409,4 +414,42 @@ enum Foo {
             4,
         );
     }
+
+    #[test]
+    fn reparse_str_token_with_error_unchanged() {
+        do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24);
+    }
+
+    #[test]
+    fn reparse_str_token_with_error_fixed() {
+        do_check(r#""unterinated<|><|>"#, "\"", 12);
+    }
+
+    #[test]
+    fn reparse_block_with_error_in_middle_unchanged() {
+        do_check(
+            r#"fn main() {
+                if {}
+                32 + 4<|><|>
+                return
+                if {}
+            }"#,
+            "23",
+            105,
+        )
+    }
+
+    #[test]
+    fn reparse_block_with_error_in_middle_fixed() {
+        do_check(
+            r#"fn main() {
+                if {}
+                32 + 4<|><|>
+                return
+                if {}
+            }"#,
+            ";",
+            105,
+        )
+    }
 }
-- 
cgit v1.2.3