Merge #3026

3026: ra_syntax: reshape SyntaxError for the sake of removing redundancy r=matklad a=Veetaha Followup of #2911, also puts some crosses to the todo list of #223. **AHTUNG!** A big part of the diff of this PR are test data files changes. Simplified `SyntaxError` that was `SyntaxError { kind: { /* big enum */ }, location: Location }` to `SyntaxError(String, TextRange)`. I am not sure whether the tuple struct here is best fit, I am inclined to add names to the fields, because I already provide getters `SyntaxError::message()`, `SyntaxError::range()`. I also removed `Location` altogether ... This is currently WIP, because the following is not done: - [ ] ~~Add tests to `test_data` dir for unescape errors *// I don't know where to put these errors in particular, because they are out of the scope of the lexer and parser. However, I have an idea in mind that we move all validators we have right now to parsing stage, but this is up to discussion...*~~ **[UPD]** I came to a conclusion that tree validation logic, which unescape errors are a part of, should be rethought of, we currently have no tests and no place to put tests for tree validations. So I'd like to extract potential redesign (maybe move of tree validation to ra_parser) and adding tests for this into a separate task. Co-authored-by: Veetaha <[email protected]> Co-authored-by: Veetaha <[email protected]>
author: bors[bot] <26634292+bors[bot]@users.noreply.github.com> 2020-02-18 12:57:26 +0000
committer: GitHub <[email protected]> 2020-02-18 12:57:26 +0000
commit: c447fe9bc06006a7080da782cf67d739c91b534c (patch)
tree: 45cbc9578b24437da3eedc6a234784be22b1f38c /crates/ra_syntax/src/parsing
parent: 742459c8fe08e359ae380e3e1dc0d059c0b4f871 (diff)
parent: 053ccf4121797e4e559e3225d46d3f23cb1ad70b (diff)
2 files changed, 93 insertions, 83 deletions
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index f889e6a1d..f2684c852 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -2,7 +2,7 @@
 //! It is just a bridge to `rustc_lexer`.
 use crate::{
-    SyntaxError, SyntaxErrorKind,
+    SyntaxError,
    SyntaxKind::{self, *},
    TextRange, TextUnit,
 };
@@ -41,13 +41,13 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
        let token_len = TextUnit::from_usize(rustc_token.len);
        let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);
-        let (syntax_kind, error) =
+        let (syntax_kind, err_message) =
            rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
        tokens.push(Token { kind: syntax_kind, len: token_len });
-        if let Some(error) = error {
+        if let Some(err_message) = err_message {
-            errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range));
+            errors.push(SyntaxError::new(err_message, token_range));
        }
        offset += rustc_token.len;
@@ -94,61 +94,21 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
    }
    let rustc_token = rustc_lexer::first_token(text);
-    let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
+    let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
    let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };
-    let error = error.map(|error| {
+    let optional_error = err_message.map(|err_message| {
-        SyntaxError::new(
+        SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text)))
-            SyntaxErrorKind::TokenizeError(error),
-            TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)),
-        )
    });
-    Some((token, error))
+    Some((token, optional_error))
-}
-// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice:
-// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067
-/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
-/// It describes all the types of errors that may happen during the tokenization
-/// of Rust source.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub enum TokenizeError {
-    /// Base prefix was provided, but there were no digits
-    /// after it, e.g. `0x`, `0b`.
-    EmptyInt,
-    /// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`,
-    EmptyExponent,
-    /// Block comment lacks trailing delimiter `*/`
-    UnterminatedBlockComment,
-    /// Character literal lacks trailing delimiter `'`
-    UnterminatedChar,
-    /// Characterish byte literal lacks trailing delimiter `'`
-    UnterminatedByte,
-    /// String literal lacks trailing delimiter `"`
-    UnterminatedString,
-    /// Byte string literal lacks trailing delimiter `"`
-    UnterminatedByteString,
-    /// Raw literal lacks trailing delimiter e.g. `"##`
-    UnterminatedRawString,
-    /// Raw byte string literal lacks trailing delimiter e.g. `"##`
-    UnterminatedRawByteString,
-    /// Raw string lacks a quote after the pound characters e.g. `r###`
-    UnstartedRawString,
-    /// Raw byte string lacks a quote after the pound characters e.g. `br###`
-    UnstartedRawByteString,
-    /// Lifetime starts with a number e.g. `'4ever`
-    LifetimeStartsWithNumber,
 }
+/// Returns `SyntaxKind` and an optional tokenize error message.
 fn rustc_token_kind_to_syntax_kind(
    rustc_token_kind: &rustc_lexer::TokenKind,
    token_text: &str,
-) -> (SyntaxKind, Option<TokenizeError>) {
+) -> (SyntaxKind, Option<&'static str>) {
    // A note on an intended tradeoff:
    // We drop some useful infromation here (see patterns with double dots `..`)
    // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
@@ -156,14 +116,15 @@ fn rustc_token_kind_to_syntax_kind(
    let syntax_kind = {
        use rustc_lexer::TokenKind as TK;
-        use TokenizeError as TE;
        match rustc_token_kind {
            TK::LineComment => COMMENT,
            TK::BlockComment { terminated: true } => COMMENT,
            TK::BlockComment { terminated: false } => {
-                return (COMMENT, Some(TE::UnterminatedBlockComment));
+                return (
+                    COMMENT,
+                    Some("Missing trailing `*/` symbols to terminate the block comment"),
+                );
            }
            TK::Whitespace => WHITESPACE,
@@ -181,7 +142,7 @@ fn rustc_token_kind_to_syntax_kind(
            TK::Lifetime { starts_with_number: false } => LIFETIME,
            TK::Lifetime { starts_with_number: true } => {
-                return (LIFETIME, Some(TE::LifetimeStartsWithNumber))
+                return (LIFETIME, Some("Lifetime name cannot start with a number"))
            }
            TK::Semi => SEMI,
@@ -217,57 +178,56 @@ fn rustc_token_kind_to_syntax_kind(
    return (syntax_kind, None);
-    fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) {
+    fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
        use rustc_lexer::LiteralKind as LK;
-        use TokenizeError as TE;
        #[rustfmt::skip]
        let syntax_kind = match *kind {
            LK::Int { empty_int: false, .. } => INT_NUMBER,
            LK::Int { empty_int: true, .. } => {
-                return (INT_NUMBER, Some(TE::EmptyInt))
+                return (INT_NUMBER, Some("Missing digits after the integer base prefix"))
            }
            LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
            LK::Float { empty_exponent: true, .. } => {
-                return (FLOAT_NUMBER, Some(TE::EmptyExponent))
+                return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol"))
            }
            LK::Char { terminated: true } => CHAR,
            LK::Char { terminated: false } => {
-                return (CHAR, Some(TE::UnterminatedChar))
+                return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal"))
            }
            LK::Byte { terminated: true } => BYTE,
            LK::Byte { terminated: false } => {
-                return (BYTE, Some(TE::UnterminatedByte))
+                return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal"))
            }
            LK::Str { terminated: true } => STRING,
            LK::Str { terminated: false } => {
-                return (STRING, Some(TE::UnterminatedString))
+                return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal"))
            }
            LK::ByteStr { terminated: true } => BYTE_STRING,
            LK::ByteStr { terminated: false } => {
-                return (BYTE_STRING, Some(TE::UnterminatedByteString))
+                return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal"))
            }
            LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,
            LK::RawStr { started: true, terminated: false, .. } => {
-                return (RAW_STRING, Some(TE::UnterminatedRawString))
+                return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal"))
            }
            LK::RawStr { started: false, .. } => {
-                return (RAW_STRING, Some(TE::UnstartedRawString))
+                return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal"))
            }
            LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,
            LK::RawByteStr { started: true, terminated: false, .. } => {
-                return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString))
+                return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"))
            }
            LK::RawByteStr { started: false, .. } => {
-                return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString))
+                return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal"))
            }
        };
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index a86da0675..aad70d015 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -27,8 +27,8 @@ pub(crate) fn incremental_reparse(
    edit: &AtomTextEdit,
    errors: Vec<SyntaxError>,
 ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
-    if let Some((green, old_range)) = reparse_token(node, &edit) {
+    if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) {
-        return Some((green, merge_errors(errors, Vec::new(), old_range, edit), old_range));
+        return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
    }
    if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) {
@@ -40,7 +40,7 @@ pub(crate) fn incremental_reparse(
 fn reparse_token<'node>(
    root: &'node SyntaxNode,
    edit: &AtomTextEdit,
-) -> Option<(GreenNode, TextRange)> {
+) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
    let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();
    let prev_token_kind = prev_token.kind();
    match prev_token_kind {
@@ -54,7 +54,7 @@ fn reparse_token<'node>(
            }
            let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
-            let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?;
+            let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;
            if new_token_kind != prev_token_kind
                || (new_token_kind == IDENT && is_contextual_kw(&new_text))
@@ -76,7 +76,11 @@ fn reparse_token<'node>(
            let new_token =
                GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());
-            Some((prev_token.replace_with(new_token), prev_token.text_range()))
+            Some((
+                prev_token.replace_with(new_token),
+                new_err.into_iter().collect(),
+                prev_token.text_range(),
+            ))
        }
        _ => None,
    }
@@ -87,7 +91,7 @@ fn reparse_block<'node>(
    edit: &AtomTextEdit,
 ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
    let (node, reparser) = find_reparsable_node(root, edit.delete)?;
-    let text = get_text_after_edit(node.clone().into(), &edit);
+    let text = get_text_after_edit(node.clone().into(), edit);
    let (tokens, new_lexer_errors) = tokenize(&text);
    if !is_balanced(&tokens) {
@@ -162,20 +166,27 @@ fn is_balanced(tokens: &[Token]) -> bool {
 fn merge_errors(
    old_errors: Vec<SyntaxError>,
    new_errors: Vec<SyntaxError>,
-    old_range: TextRange,
+    range_before_reparse: TextRange,
    edit: &AtomTextEdit,
 ) -> Vec<SyntaxError> {
    let mut res = Vec::new();
-    for e in old_errors {
-        if e.offset() <= old_range.start() {
+    for old_err in old_errors {
-            res.push(e)
+        let old_err_range = old_err.range();
-        } else if e.offset() >= old_range.end() {
+        // FIXME: make sure that .start() was here previously by a mistake
-            res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len()));
+        if old_err_range.end() <= range_before_reparse.start() {
+            res.push(old_err);
+        } else if old_err_range.start() >= range_before_reparse.end() {
+            let inserted_len = TextUnit::of_str(&edit.insert);
+            res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
+            // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
        }
    }
-    for e in new_errors {
+    res.extend(new_errors.into_iter().map(|new_err| {
-        res.push(e.add_offset(old_range.start(), 0.into()));
+        // fighting borrow checker with a variable ;)
-    }
+        let offseted_range = new_err.range() + range_before_reparse.start();
+        new_err.with_range(offseted_range)
+    }));
    res
 }
@@ -193,9 +204,9 @@ mod tests {
        let fully_reparsed = SourceFile::parse(&after);
        let incrementally_reparsed: Parse<SourceFile> = {
-            let f = SourceFile::parse(&before);
+            let before = SourceFile::parse(&before);
            let (green, new_errors, range) =
-                incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap();
+                incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
            assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
            Parse::new(green, new_errors)
        };
@@ -204,6 +215,7 @@ mod tests {
            &format!("{:#?}", fully_reparsed.tree().syntax()),
            &format!("{:#?}", incrementally_reparsed.tree().syntax()),
        );
+        assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
    }
    #[test] // FIXME: some test here actually test token reparsing
@@ -402,4 +414,42 @@ enum Foo {
            4,
        );
    }
+    #[test]
+    fn reparse_str_token_with_error_unchanged() {
+        do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24);
+    }
+    #[test]
+    fn reparse_str_token_with_error_fixed() {
+        do_check(r#""unterinated<|><|>"#, "\"", 12);
+    }
+    #[test]
+    fn reparse_block_with_error_in_middle_unchanged() {
+        do_check(
+            r#"fn main() {
+                if {}
+                32 + 4<|><|>
+                return
+                if {}
+            }"#,
+            "23",
+            105,
+        )
+    }
+    #[test]
+    fn reparse_block_with_error_in_middle_fixed() {
+        do_check(
+            r#"fn main() {
+                if {}
+                32 + 4<|><|>
+                return
+                if {}
+            }"#,
+            ";",
+            105,
+        )
+    }
 }
author	bors[bot] <26634292+bors[bot]@users.noreply.github.com>	2020-02-18 12:57:26 +0000
committer	GitHub <[email protected]>	2020-02-18 12:57:26 +0000
commit	c447fe9bc06006a7080da782cf67d739c91b534c (patch)
tree	45cbc9578b24437da3eedc6a234784be22b1f38c /crates/ra_syntax/src/parsing
parent	742459c8fe08e359ae380e3e1dc0d059c0b4f871 (diff)
parent	053ccf4121797e4e559e3225d46d3f23cb1ad70b (diff)

diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index f889e6a1d..f2684c852 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -2,7 +2,7 @@
2	//! It is just a bridge to `rustc_lexer`.	2	//! It is just a bridge to `rustc_lexer`.
3		3
4	use crate::{	4	use crate::{
5	SyntaxError, SyntaxErrorKind,	5	SyntaxError,
6	SyntaxKind::{self, *},	6	SyntaxKind::{self, *},
7	TextRange, TextUnit,	7	TextRange, TextUnit,
8	};	8	};
@@ -41,13 +41,13 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
41	let token_len = TextUnit::from_usize(rustc_token.len);	41	let token_len = TextUnit::from_usize(rustc_token.len);
42	let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);	42	let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);
43		43
44	let (syntax_kind, error) =	44	let (syntax_kind, err_message) =
45	rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);	45	rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
46		46
47	tokens.push(Token { kind: syntax_kind, len: token_len });	47	tokens.push(Token { kind: syntax_kind, len: token_len });
48		48
49	if let Some(error) = error {	49	if let Some(err_message) = err_message {
50	errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range));	50	errors.push(SyntaxError::new(err_message, token_range));
51	}	51	}
52		52
53	offset += rustc_token.len;	53	offset += rustc_token.len;
@@ -94,61 +94,21 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
94	}	94	}
95		95
96	let rustc_token = rustc_lexer::first_token(text);	96	let rustc_token = rustc_lexer::first_token(text);
97	let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);	97	let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
98		98
99	let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };	99	let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };
100	let error = error.map(\|error\| {	100	let optional_error = err_message.map(\|err_message\| {
101	SyntaxError::new(	101	SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text)))
102	SyntaxErrorKind::TokenizeError(error),
103	TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)),
104	)
105	});	102	});
106		103
107	Some((token, error))	104	Some((token, optional_error))
108	}
109
110	// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice:
111	// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067
112
113	/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
114	/// It describes all the types of errors that may happen during the tokenization
115	/// of Rust source.
116	#[derive(Debug, Clone, PartialEq, Eq, Hash)]
117	pub enum TokenizeError {
118	/// Base prefix was provided, but there were no digits
119	/// after it, e.g. `0x`, `0b`.
120	EmptyInt,
121	/// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`,
122	EmptyExponent,
123
124	/// Block comment lacks trailing delimiter `*/`
125	UnterminatedBlockComment,
126	/// Character literal lacks trailing delimiter `'`
127	UnterminatedChar,
128	/// Characterish byte literal lacks trailing delimiter `'`
129	UnterminatedByte,
130	/// String literal lacks trailing delimiter `"`
131	UnterminatedString,
132	/// Byte string literal lacks trailing delimiter `"`
133	UnterminatedByteString,
134	/// Raw literal lacks trailing delimiter e.g. `"##`
135	UnterminatedRawString,
136	/// Raw byte string literal lacks trailing delimiter e.g. `"##`
137	UnterminatedRawByteString,
138
139	/// Raw string lacks a quote after the pound characters e.g. `r###`
140	UnstartedRawString,
141	/// Raw byte string lacks a quote after the pound characters e.g. `br###`
142	UnstartedRawByteString,
143
144	/// Lifetime starts with a number e.g. `'4ever`
145	LifetimeStartsWithNumber,
146	}	105	}
147		106
		107	/// Returns `SyntaxKind` and an optional tokenize error message.
148	fn rustc_token_kind_to_syntax_kind(	108	fn rustc_token_kind_to_syntax_kind(
149	rustc_token_kind: &rustc_lexer::TokenKind,	109	rustc_token_kind: &rustc_lexer::TokenKind,
150	token_text: &str,	110	token_text: &str,
151	) -> (SyntaxKind, Option<TokenizeError>) {	111	) -> (SyntaxKind, Option<&'static str>) {
152	// A note on an intended tradeoff:	112	// A note on an intended tradeoff:
153	// We drop some useful infromation here (see patterns with double dots `..`)	113	// We drop some useful infromation here (see patterns with double dots `..`)
154	// Storing that info in `SyntaxKind` is not possible due to its layout requirements of	114	// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
@@ -156,14 +116,15 @@ fn rustc_token_kind_to_syntax_kind(
156		116
157	let syntax_kind = {	117	let syntax_kind = {
158	use rustc_lexer::TokenKind as TK;	118	use rustc_lexer::TokenKind as TK;
159	use TokenizeError as TE;
160
161	match rustc_token_kind {	119	match rustc_token_kind {
162	TK::LineComment => COMMENT,	120	TK::LineComment => COMMENT,
163		121
164	TK::BlockComment { terminated: true } => COMMENT,	122	TK::BlockComment { terminated: true } => COMMENT,
165	TK::BlockComment { terminated: false } => {	123	TK::BlockComment { terminated: false } => {
166	return (COMMENT, Some(TE::UnterminatedBlockComment));	124	return (
		125	COMMENT,
		126	Some("Missing trailing `*/` symbols to terminate the block comment"),
		127	);
167	}	128	}
168		129
169	TK::Whitespace => WHITESPACE,	130	TK::Whitespace => WHITESPACE,
@@ -181,7 +142,7 @@ fn rustc_token_kind_to_syntax_kind(
181		142
182	TK::Lifetime { starts_with_number: false } => LIFETIME,	143	TK::Lifetime { starts_with_number: false } => LIFETIME,
183	TK::Lifetime { starts_with_number: true } => {	144	TK::Lifetime { starts_with_number: true } => {
184	return (LIFETIME, Some(TE::LifetimeStartsWithNumber))	145	return (LIFETIME, Some("Lifetime name cannot start with a number"))
185	}	146	}
186		147
187	TK::Semi => SEMI,	148	TK::Semi => SEMI,
@@ -217,57 +178,56 @@ fn rustc_token_kind_to_syntax_kind(
217		178
218	return (syntax_kind, None);	179	return (syntax_kind, None);
219		180
220	fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) {	181	fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
221	use rustc_lexer::LiteralKind as LK;	182	use rustc_lexer::LiteralKind as LK;
222	use TokenizeError as TE;
223		183
224	#[rustfmt::skip]	184	#[rustfmt::skip]
225	let syntax_kind = match *kind {	185	let syntax_kind = match *kind {
226	LK::Int { empty_int: false, .. } => INT_NUMBER,	186	LK::Int { empty_int: false, .. } => INT_NUMBER,
227	LK::Int { empty_int: true, .. } => {	187	LK::Int { empty_int: true, .. } => {
228	return (INT_NUMBER, Some(TE::EmptyInt))	188	return (INT_NUMBER, Some("Missing digits after the integer base prefix"))
229	}	189	}
230		190
231	LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,	191	LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
232	LK::Float { empty_exponent: true, .. } => {	192	LK::Float { empty_exponent: true, .. } => {
233	return (FLOAT_NUMBER, Some(TE::EmptyExponent))	193	return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol"))
234	}	194	}
235		195
236	LK::Char { terminated: true } => CHAR,	196	LK::Char { terminated: true } => CHAR,
237	LK::Char { terminated: false } => {	197	LK::Char { terminated: false } => {
238	return (CHAR, Some(TE::UnterminatedChar))	198	return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal"))
239	}	199	}
240		200
241	LK::Byte { terminated: true } => BYTE,	201	LK::Byte { terminated: true } => BYTE,
242	LK::Byte { terminated: false } => {	202	LK::Byte { terminated: false } => {
243	return (BYTE, Some(TE::UnterminatedByte))	203	return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal"))
244	}	204	}
245		205
246	LK::Str { terminated: true } => STRING,	206	LK::Str { terminated: true } => STRING,
247	LK::Str { terminated: false } => {	207	LK::Str { terminated: false } => {
248	return (STRING, Some(TE::UnterminatedString))	208	return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal"))
249	}	209	}
250		210
251		211
252	LK::ByteStr { terminated: true } => BYTE_STRING,	212	LK::ByteStr { terminated: true } => BYTE_STRING,
253	LK::ByteStr { terminated: false } => {	213	LK::ByteStr { terminated: false } => {
254	return (BYTE_STRING, Some(TE::UnterminatedByteString))	214	return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal"))
255	}	215	}
256		216
257	LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,	217	LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,
258	LK::RawStr { started: true, terminated: false, .. } => {	218	LK::RawStr { started: true, terminated: false, .. } => {
259	return (RAW_STRING, Some(TE::UnterminatedRawString))	219	return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal"))
260	}	220	}
261	LK::RawStr { started: false, .. } => {	221	LK::RawStr { started: false, .. } => {
262	return (RAW_STRING, Some(TE::UnstartedRawString))	222	return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal"))
263	}	223	}
264		224
265	LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,	225	LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,
266	LK::RawByteStr { started: true, terminated: false, .. } => {	226	LK::RawByteStr { started: true, terminated: false, .. } => {
267	return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString))	227	return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"))
268	}	228	}
269	LK::RawByteStr { started: false, .. } => {	229	LK::RawByteStr { started: false, .. } => {
270	return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString))	230	return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal"))
271	}	231	}
272	};	232	};
273		233


diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index a86da0675..aad70d015 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -27,8 +27,8 @@ pub(crate) fn incremental_reparse(
27	edit: &AtomTextEdit,	27	edit: &AtomTextEdit,
28	errors: Vec<SyntaxError>,	28	errors: Vec<SyntaxError>,
29	) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {	29	) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
30	if let Some((green, old_range)) = reparse_token(node, &edit) {	30	if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) {
31	return Some((green, merge_errors(errors, Vec::new(), old_range, edit), old_range));	31	return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
32	}	32	}
33		33
34	if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) {	34	if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) {
@@ -40,7 +40,7 @@ pub(crate) fn incremental_reparse(
40	fn reparse_token<'node>(	40	fn reparse_token<'node>(
41	root: &'node SyntaxNode,	41	root: &'node SyntaxNode,
42	edit: &AtomTextEdit,	42	edit: &AtomTextEdit,
43	) -> Option<(GreenNode, TextRange)> {	43	) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
44	let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();	44	let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();
45	let prev_token_kind = prev_token.kind();	45	let prev_token_kind = prev_token.kind();
46	match prev_token_kind {	46	match prev_token_kind {
@@ -54,7 +54,7 @@ fn reparse_token<'node>(
54	}	54	}
55		55
56	let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);	56	let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
57	let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?;	57	let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;
58		58
59	if new_token_kind != prev_token_kind	59	if new_token_kind != prev_token_kind
60	\|\| (new_token_kind == IDENT && is_contextual_kw(&new_text))	60	\|\| (new_token_kind == IDENT && is_contextual_kw(&new_text))
@@ -76,7 +76,11 @@ fn reparse_token<'node>(
76		76
77	let new_token =	77	let new_token =
78	GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());	78	GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());
79	Some((prev_token.replace_with(new_token), prev_token.text_range()))	79	Some((
		80	prev_token.replace_with(new_token),
		81	new_err.into_iter().collect(),
		82	prev_token.text_range(),
		83	))
80	}	84	}
81	_ => None,	85	_ => None,
82	}	86	}
@@ -87,7 +91,7 @@ fn reparse_block<'node>(
87	edit: &AtomTextEdit,	91	edit: &AtomTextEdit,
88	) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {	92	) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
89	let (node, reparser) = find_reparsable_node(root, edit.delete)?;	93	let (node, reparser) = find_reparsable_node(root, edit.delete)?;
90	let text = get_text_after_edit(node.clone().into(), &edit);	94	let text = get_text_after_edit(node.clone().into(), edit);
91		95
92	let (tokens, new_lexer_errors) = tokenize(&text);	96	let (tokens, new_lexer_errors) = tokenize(&text);
93	if !is_balanced(&tokens) {	97	if !is_balanced(&tokens) {
@@ -162,20 +166,27 @@ fn is_balanced(tokens: &[Token]) -> bool {
162	fn merge_errors(	166	fn merge_errors(
163	old_errors: Vec<SyntaxError>,	167	old_errors: Vec<SyntaxError>,
164	new_errors: Vec<SyntaxError>,	168	new_errors: Vec<SyntaxError>,
165	old_range: TextRange,	169	range_before_reparse: TextRange,
166	edit: &AtomTextEdit,	170	edit: &AtomTextEdit,
167	) -> Vec<SyntaxError> {	171	) -> Vec<SyntaxError> {
168	let mut res = Vec::new();	172	let mut res = Vec::new();
169	for e in old_errors {	173
170	if e.offset() <= old_range.start() {	174	for old_err in old_errors {
171	res.push(e)	175	let old_err_range = old_err.range();
172	} else if e.offset() >= old_range.end() {	176	// FIXME: make sure that .start() was here previously by a mistake
173	res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len()));	177	if old_err_range.end() <= range_before_reparse.start() {
		178	res.push(old_err);
		179	} else if old_err_range.start() >= range_before_reparse.end() {
		180	let inserted_len = TextUnit::of_str(&edit.insert);
		181	res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
		182	// Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
174	}	183	}
175	}	184	}
176	for e in new_errors {	185	res.extend(new_errors.into_iter().map(\|new_err\| {
177	res.push(e.add_offset(old_range.start(), 0.into()));	186	// fighting borrow checker with a variable ;)
178	}	187	let offseted_range = new_err.range() + range_before_reparse.start();
		188	new_err.with_range(offseted_range)
		189	}));
179	res	190	res
180	}	191	}
181		192
@@ -193,9 +204,9 @@ mod tests {
193		204
194	let fully_reparsed = SourceFile::parse(&after);	205	let fully_reparsed = SourceFile::parse(&after);
195	let incrementally_reparsed: Parse<SourceFile> = {	206	let incrementally_reparsed: Parse<SourceFile> = {
196	let f = SourceFile::parse(&before);	207	let before = SourceFile::parse(&before);
197	let (green, new_errors, range) =	208	let (green, new_errors, range) =
198	incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap();	209	incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
199	assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");	210	assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
200	Parse::new(green, new_errors)	211	Parse::new(green, new_errors)
201	};	212	};
@@ -204,6 +215,7 @@ mod tests {
204	&format!("{:#?}", fully_reparsed.tree().syntax()),	215	&format!("{:#?}", fully_reparsed.tree().syntax()),
205	&format!("{:#?}", incrementally_reparsed.tree().syntax()),	216	&format!("{:#?}", incrementally_reparsed.tree().syntax()),
206	);	217	);
		218	assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
207	}	219	}
208		220
209	#[test] // FIXME: some test here actually test token reparsing	221	#[test] // FIXME: some test here actually test token reparsing
@@ -402,4 +414,42 @@ enum Foo {
402	4,	414	4,
403	);	415	);
404	}	416	}
		417
		418	#[test]
		419	fn reparse_str_token_with_error_unchanged() {
		420	do_check(r#""<\|>Unclosed<\|> string literal"#, "Still unclosed", 24);
		421	}
		422
		423	#[test]
		424	fn reparse_str_token_with_error_fixed() {
		425	do_check(r#""unterinated<\|><\|>"#, "\"", 12);
		426	}
		427
		428	#[test]
		429	fn reparse_block_with_error_in_middle_unchanged() {
		430	do_check(
		431	r#"fn main() {
		432	if {}
		433	32 + 4<\|><\|>
		434	return
		435	if {}
		436	}"#,
		437	"23",
		438	105,
		439	)
		440	}
		441
		442	#[test]
		443	fn reparse_block_with_error_in_middle_fixed() {
		444	do_check(
		445	r#"fn main() {
		446	if {}
		447	32 + 4<\|><\|>
		448	return
		449	if {}
		450	}"#,
		451	";",
		452	105,
		453	)
		454	}
405	}	455	}