Merge #3026

3026: ra_syntax: reshape SyntaxError for the sake of removing redundancy r=matklad a=Veetaha Followup of #2911, also puts some crosses to the todo list of #223. **AHTUNG!** A big part of the diff of this PR are test data files changes. Simplified `SyntaxError` that was `SyntaxError { kind: { /* big enum */ }, location: Location }` to `SyntaxError(String, TextRange)`. I am not sure whether the tuple struct here is best fit, I am inclined to add names to the fields, because I already provide getters `SyntaxError::message()`, `SyntaxError::range()`. I also removed `Location` altogether ... This is currently WIP, because the following is not done: - [ ] ~~Add tests to `test_data` dir for unescape errors *// I don't know where to put these errors in particular, because they are out of the scope of the lexer and parser. However, I have an idea in mind that we move all validators we have right now to parsing stage, but this is up to discussion...*~~ **[UPD]** I came to a conclusion that tree validation logic, which unescape errors are a part of, should be rethought of, we currently have no tests and no place to put tests for tree validations. So I'd like to extract potential redesign (maybe move of tree validation to ra_parser) and adding tests for this into a separate task. Co-authored-by: Veetaha <[email protected]> Co-authored-by: Veetaha <[email protected]>
author: bors[bot] <26634292+bors[bot]@users.noreply.github.com> 2020-02-18 12:57:26 +0000
committer: GitHub <[email protected]> 2020-02-18 12:57:26 +0000
commit: c447fe9bc06006a7080da782cf67d739c91b534c (patch)
tree: 45cbc9578b24437da3eedc6a234784be22b1f38c /crates/ra_syntax/src
parent: 742459c8fe08e359ae380e3e1dc0d059c0b4f871 (diff)
parent: 053ccf4121797e4e559e3225d46d3f23cb1ad70b (diff)
8 files changed, 205 insertions, 397 deletions
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs
index f8f4b64c1..e3f74da6d 100644
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@@ -41,11 +41,9 @@ use crate::syntax_node::GreenNode;
 pub use crate::{
    algo::InsertPosition,
    ast::{AstNode, AstToken},
-    parsing::{
+    parsing::{lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token},
-        lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token, TokenizeError,
-    },
    ptr::{AstPtr, SyntaxNodePtr},
-    syntax_error::{Location, SyntaxError, SyntaxErrorKind},
+    syntax_error::SyntaxError,
    syntax_node::{
        Direction, NodeOrToken, SyntaxElement, SyntaxNode, SyntaxToken, SyntaxTreeBuilder,
    },
@@ -117,7 +115,7 @@ impl Parse<SourceFile> {
    pub fn debug_dump(&self) -> String {
        let mut buf = format!("{:#?}", self.tree().syntax());
        for err in self.errors.iter() {
-            writeln!(buf, "error {:?}: {}", err.location(), err.kind()).unwrap();
+            writeln!(buf, "error {:?}: {}", err.range(), err).unwrap();
        }
        buf
    }
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index f889e6a1d..f2684c852 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -2,7 +2,7 @@
 //! It is just a bridge to `rustc_lexer`.
 use crate::{
-    SyntaxError, SyntaxErrorKind,
+    SyntaxError,
    SyntaxKind::{self, *},
    TextRange, TextUnit,
 };
@@ -41,13 +41,13 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
        let token_len = TextUnit::from_usize(rustc_token.len);
        let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);
-        let (syntax_kind, error) =
+        let (syntax_kind, err_message) =
            rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
        tokens.push(Token { kind: syntax_kind, len: token_len });
-        if let Some(error) = error {
+        if let Some(err_message) = err_message {
-            errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range));
+            errors.push(SyntaxError::new(err_message, token_range));
        }
        offset += rustc_token.len;
@@ -94,61 +94,21 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
    }
    let rustc_token = rustc_lexer::first_token(text);
-    let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
+    let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
    let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };
-    let error = error.map(|error| {
+    let optional_error = err_message.map(|err_message| {
-        SyntaxError::new(
+        SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text)))
-            SyntaxErrorKind::TokenizeError(error),
-            TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)),
-        )
    });
-    Some((token, error))
+    Some((token, optional_error))
-}
-// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice:
-// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067
-/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
-/// It describes all the types of errors that may happen during the tokenization
-/// of Rust source.
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub enum TokenizeError {
-    /// Base prefix was provided, but there were no digits
-    /// after it, e.g. `0x`, `0b`.
-    EmptyInt,
-    /// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`,
-    EmptyExponent,
-    /// Block comment lacks trailing delimiter `*/`
-    UnterminatedBlockComment,
-    /// Character literal lacks trailing delimiter `'`
-    UnterminatedChar,
-    /// Characterish byte literal lacks trailing delimiter `'`
-    UnterminatedByte,
-    /// String literal lacks trailing delimiter `"`
-    UnterminatedString,
-    /// Byte string literal lacks trailing delimiter `"`
-    UnterminatedByteString,
-    /// Raw literal lacks trailing delimiter e.g. `"##`
-    UnterminatedRawString,
-    /// Raw byte string literal lacks trailing delimiter e.g. `"##`
-    UnterminatedRawByteString,
-    /// Raw string lacks a quote after the pound characters e.g. `r###`
-    UnstartedRawString,
-    /// Raw byte string lacks a quote after the pound characters e.g. `br###`
-    UnstartedRawByteString,
-    /// Lifetime starts with a number e.g. `'4ever`
-    LifetimeStartsWithNumber,
 }
+/// Returns `SyntaxKind` and an optional tokenize error message.
 fn rustc_token_kind_to_syntax_kind(
    rustc_token_kind: &rustc_lexer::TokenKind,
    token_text: &str,
-) -> (SyntaxKind, Option<TokenizeError>) {
+) -> (SyntaxKind, Option<&'static str>) {
    // A note on an intended tradeoff:
    // We drop some useful infromation here (see patterns with double dots `..`)
    // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
@@ -156,14 +116,15 @@ fn rustc_token_kind_to_syntax_kind(
    let syntax_kind = {
        use rustc_lexer::TokenKind as TK;
-        use TokenizeError as TE;
        match rustc_token_kind {
            TK::LineComment => COMMENT,
            TK::BlockComment { terminated: true } => COMMENT,
            TK::BlockComment { terminated: false } => {
-                return (COMMENT, Some(TE::UnterminatedBlockComment));
+                return (
+                    COMMENT,
+                    Some("Missing trailing `*/` symbols to terminate the block comment"),
+                );
            }
            TK::Whitespace => WHITESPACE,
@@ -181,7 +142,7 @@ fn rustc_token_kind_to_syntax_kind(
            TK::Lifetime { starts_with_number: false } => LIFETIME,
            TK::Lifetime { starts_with_number: true } => {
-                return (LIFETIME, Some(TE::LifetimeStartsWithNumber))
+                return (LIFETIME, Some("Lifetime name cannot start with a number"))
            }
            TK::Semi => SEMI,
@@ -217,57 +178,56 @@ fn rustc_token_kind_to_syntax_kind(
    return (syntax_kind, None);
-    fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) {
+    fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
        use rustc_lexer::LiteralKind as LK;
-        use TokenizeError as TE;
        #[rustfmt::skip]
        let syntax_kind = match *kind {
            LK::Int { empty_int: false, .. } => INT_NUMBER,
            LK::Int { empty_int: true, .. } => {
-                return (INT_NUMBER, Some(TE::EmptyInt))
+                return (INT_NUMBER, Some("Missing digits after the integer base prefix"))
            }
            LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
            LK::Float { empty_exponent: true, .. } => {
-                return (FLOAT_NUMBER, Some(TE::EmptyExponent))
+                return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol"))
            }
            LK::Char { terminated: true } => CHAR,
            LK::Char { terminated: false } => {
-                return (CHAR, Some(TE::UnterminatedChar))
+                return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal"))
            }
            LK::Byte { terminated: true } => BYTE,
            LK::Byte { terminated: false } => {
-                return (BYTE, Some(TE::UnterminatedByte))
+                return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal"))
            }
            LK::Str { terminated: true } => STRING,
            LK::Str { terminated: false } => {
-                return (STRING, Some(TE::UnterminatedString))
+                return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal"))
            }
            LK::ByteStr { terminated: true } => BYTE_STRING,
            LK::ByteStr { terminated: false } => {
-                return (BYTE_STRING, Some(TE::UnterminatedByteString))
+                return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal"))
            }
            LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,
            LK::RawStr { started: true, terminated: false, .. } => {
-                return (RAW_STRING, Some(TE::UnterminatedRawString))
+                return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal"))
            }
            LK::RawStr { started: false, .. } => {
-                return (RAW_STRING, Some(TE::UnstartedRawString))
+                return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal"))
            }
            LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,
            LK::RawByteStr { started: true, terminated: false, .. } => {
-                return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString))
+                return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"))
            }
            LK::RawByteStr { started: false, .. } => {
-                return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString))
+                return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal"))
            }
        };
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index a86da0675..aad70d015 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -27,8 +27,8 @@ pub(crate) fn incremental_reparse(
    edit: &AtomTextEdit,
    errors: Vec<SyntaxError>,
 ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
-    if let Some((green, old_range)) = reparse_token(node, &edit) {
+    if let Some((green, new_errors, old_range)) = reparse_token(node, &edit) {
-        return Some((green, merge_errors(errors, Vec::new(), old_range, edit), old_range));
+        return Some((green, merge_errors(errors, new_errors, old_range, edit), old_range));
    }
    if let Some((green, new_errors, old_range)) = reparse_block(node, &edit) {
@@ -40,7 +40,7 @@ pub(crate) fn incremental_reparse(
 fn reparse_token<'node>(
    root: &'node SyntaxNode,
    edit: &AtomTextEdit,
-) -> Option<(GreenNode, TextRange)> {
+) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
    let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();
    let prev_token_kind = prev_token.kind();
    match prev_token_kind {
@@ -54,7 +54,7 @@ fn reparse_token<'node>(
            }
            let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
-            let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?;
+            let (new_token_kind, new_err) = lex_single_syntax_kind(&new_text)?;
            if new_token_kind != prev_token_kind
                || (new_token_kind == IDENT && is_contextual_kw(&new_text))
@@ -76,7 +76,11 @@ fn reparse_token<'node>(
            let new_token =
                GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());
-            Some((prev_token.replace_with(new_token), prev_token.text_range()))
+            Some((
+                prev_token.replace_with(new_token),
+                new_err.into_iter().collect(),
+                prev_token.text_range(),
+            ))
        }
        _ => None,
    }
@@ -87,7 +91,7 @@ fn reparse_block<'node>(
    edit: &AtomTextEdit,
 ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
    let (node, reparser) = find_reparsable_node(root, edit.delete)?;
-    let text = get_text_after_edit(node.clone().into(), &edit);
+    let text = get_text_after_edit(node.clone().into(), edit);
    let (tokens, new_lexer_errors) = tokenize(&text);
    if !is_balanced(&tokens) {
@@ -162,20 +166,27 @@ fn is_balanced(tokens: &[Token]) -> bool {
 fn merge_errors(
    old_errors: Vec<SyntaxError>,
    new_errors: Vec<SyntaxError>,
-    old_range: TextRange,
+    range_before_reparse: TextRange,
    edit: &AtomTextEdit,
 ) -> Vec<SyntaxError> {
    let mut res = Vec::new();
-    for e in old_errors {
-        if e.offset() <= old_range.start() {
+    for old_err in old_errors {
-            res.push(e)
+        let old_err_range = old_err.range();
-        } else if e.offset() >= old_range.end() {
+        // FIXME: make sure that .start() was here previously by a mistake
-            res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len()));
+        if old_err_range.end() <= range_before_reparse.start() {
+            res.push(old_err);
+        } else if old_err_range.start() >= range_before_reparse.end() {
+            let inserted_len = TextUnit::of_str(&edit.insert);
+            res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len()));
+            // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug)
        }
    }
-    for e in new_errors {
+    res.extend(new_errors.into_iter().map(|new_err| {
-        res.push(e.add_offset(old_range.start(), 0.into()));
+        // fighting borrow checker with a variable ;)
-    }
+        let offseted_range = new_err.range() + range_before_reparse.start();
+        new_err.with_range(offseted_range)
+    }));
    res
 }
@@ -193,9 +204,9 @@ mod tests {
        let fully_reparsed = SourceFile::parse(&after);
        let incrementally_reparsed: Parse<SourceFile> = {
-            let f = SourceFile::parse(&before);
+            let before = SourceFile::parse(&before);
            let (green, new_errors, range) =
-                incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap();
+                incremental_reparse(before.tree().syntax(), &edit, before.errors.to_vec()).unwrap();
            assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length");
            Parse::new(green, new_errors)
        };
@@ -204,6 +215,7 @@ mod tests {
            &format!("{:#?}", fully_reparsed.tree().syntax()),
            &format!("{:#?}", incrementally_reparsed.tree().syntax()),
        );
+        assert_eq!(fully_reparsed.errors(), incrementally_reparsed.errors());
    }
    #[test] // FIXME: some test here actually test token reparsing
@@ -402,4 +414,42 @@ enum Foo {
            4,
        );
    }
+    #[test]
+    fn reparse_str_token_with_error_unchanged() {
+        do_check(r#""<|>Unclosed<|> string literal"#, "Still unclosed", 24);
+    }
+    #[test]
+    fn reparse_str_token_with_error_fixed() {
+        do_check(r#""unterinated<|><|>"#, "\"", 12);
+    }
+    #[test]
+    fn reparse_block_with_error_in_middle_unchanged() {
+        do_check(
+            r#"fn main() {
+                if {}
+                32 + 4<|><|>
+                return
+                if {}
+            }"#,
+            "23",
+            105,
+        )
+    }
+    #[test]
+    fn reparse_block_with_error_in_middle_fixed() {
+        do_check(
+            r#"fn main() {
+                if {}
+                32 + 4<|><|>
+                return
+                if {}
+            }"#,
+            ";",
+            105,
+        )
+    }
 }
diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs
index 7f9d36618..54acf7847 100644
--- a/crates/ra_syntax/src/syntax_error.rs
+++ b/crates/ra_syntax/src/syntax_error.rs
@@ -1,209 +1,44 @@
-//! FIXME: write short doc here
+//! See docs for `SyntaxError`.
 use std::fmt;
-use ra_parser::ParseError;
+use crate::{TextRange, TextUnit};
-use crate::{validation::EscapeError, TextRange, TextUnit, TokenizeError};
+/// Represents the result of unsuccessful tokenization, parsing
+/// or tree validation.
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub struct SyntaxError {
+pub struct SyntaxError(String, TextRange);
-    kind: SyntaxErrorKind,
-    location: Location,
+// FIXME: there was an unused SyntaxErrorKind previously (before this enum was removed)
-}
+// It was introduced in this PR: https://github.com/rust-analyzer/rust-analyzer/pull/846/files#diff-827da9b03b8f9faa1bade5cdd44d5dafR95
+// but it was not removed by a mistake.
-// FIXME: Location should be just `Location(TextRange)`
+//
-// TextUnit enum member just unnecessarily compicates things,
+// So, we need to find a place where to stick validation for attributes in match clauses.
-// we should'n treat it specially, it just as a `TextRange { start: x, end: x + 1 }`
+// Code before refactor:
-// see `location_to_range()` in ra_ide/src/diagnostics
+// InvalidMatchInnerAttr => {
-#[derive(Clone, PartialEq, Eq, Hash)]
+//    write!(f, "Inner attributes are only allowed directly after the opening brace of the match expression")
-pub enum Location {
+// }
-    Offset(TextUnit),
-    Range(TextRange),
-}
-impl From<TextUnit> for Location {
-    fn from(offset: TextUnit) -> Location {
-        Location::Offset(offset)
-    }
-}
-impl From<TextRange> for Location {
-    fn from(range: TextRange) -> Location {
-        Location::Range(range)
-    }
-}
-impl fmt::Debug for Location {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        match self {
-            Location::Offset(it) => fmt::Debug::fmt(it, f),
-            Location::Range(it) => fmt::Debug::fmt(it, f),
-        }
-    }
-}
 impl SyntaxError {
-    pub fn new<L: Into<Location>>(kind: SyntaxErrorKind, loc: L) -> SyntaxError {
+    pub fn new(message: impl Into<String>, range: TextRange) -> Self {
-        SyntaxError { kind, location: loc.into() }
+        Self(message.into(), range)
    }
+    pub fn new_at_offset(message: impl Into<String>, offset: TextUnit) -> Self {
-    pub fn kind(&self) -> SyntaxErrorKind {
+        Self(message.into(), TextRange::offset_len(offset, 0.into()))
-        self.kind.clone()
    }
-    pub fn location(&self) -> Location {
+    pub fn range(&self) -> TextRange {
-        self.location.clone()
+        self.1
    }
-    pub fn offset(&self) -> TextUnit {
+    pub fn with_range(mut self, range: TextRange) -> Self {
-        match self.location {
+        self.1 = range;
-            Location::Offset(offset) => offset,
-            Location::Range(range) => range.start(),
-        }
-    }
-    pub fn add_offset(mut self, plus_offset: TextUnit, minus_offset: TextUnit) -> SyntaxError {
-        self.location = match self.location {
-            Location::Range(range) => Location::Range(range + plus_offset - minus_offset),
-            Location::Offset(offset) => Location::Offset(offset + plus_offset - minus_offset),
-        };
        self
    }
-    pub fn debug_dump(&self, acc: &mut impl fmt::Write) {
-        writeln!(acc, "error {:?}: {}", self.location(), self.kind()).unwrap();
-    }
 }
 impl fmt::Display for SyntaxError {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        self.kind.fmt(f)
+        self.0.fmt(f)
-    }
-}
-#[derive(Debug, Clone, PartialEq, Eq, Hash)]
-pub enum SyntaxErrorKind {
-    ParseError(ParseError),
-    EscapeError(EscapeError),
-    TokenizeError(TokenizeError),
-    // FIXME: the obvious pattern of this enum dictates that the following enum variants
-    // should be wrapped into something like `SemmanticError(SemmanticError)`
-    // or `ValidateError(ValidateError)` or `SemmanticValidateError(...)`
-    InvalidBlockAttr,
-    InvalidMatchInnerAttr,
-    InvalidTupleIndexFormat,
-    VisibilityNotAllowed,
-    InclusiveRangeMissingEnd,
-}
-impl fmt::Display for SyntaxErrorKind {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        use self::SyntaxErrorKind::*;
-        match self {
-            InvalidBlockAttr => {
-                write!(f, "A block in this position cannot accept inner attributes")
-            }
-            InvalidMatchInnerAttr => {
-                write!(f, "Inner attributes are only allowed directly after the opening brace of the match expression")
-            }
-            InvalidTupleIndexFormat => {
-                write!(f, "Tuple (struct) field access is only allowed through decimal integers with no underscores or suffix")
-            }
-            ParseError(msg) => write!(f, "{}", msg.0),
-            EscapeError(err) => write!(f, "{}", err),
-            TokenizeError(err) => write!(f, "{}", err),
-            VisibilityNotAllowed => {
-                write!(f, "unnecessary visibility qualifier")
-            }
-            InclusiveRangeMissingEnd => {
-                write!(f, "An inclusive range must have an end expression")
-            }
-        }
-    }
-}
-impl fmt::Display for TokenizeError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        #[rustfmt::skip]
-        let msg = match self {
-            TokenizeError::EmptyInt => {
-                "Missing digits after the integer base prefix"
-            }
-            TokenizeError::EmptyExponent => {
-                "Missing digits after the exponent symbol"
-            }
-            TokenizeError::UnterminatedBlockComment => {
-                "Missing trailing `*/` symbols to terminate the block comment"
-            }
-            TokenizeError::UnterminatedChar => {
-                "Missing trailing `'` symbol to terminate the character literal"
-            }
-            TokenizeError::UnterminatedByte => {
-                "Missing trailing `'` symbol to terminate the byte literal"
-            }
-            TokenizeError::UnterminatedString => {
-                "Missing trailing `\"` symbol to terminate the string literal"
-            }
-            TokenizeError::UnterminatedByteString => {
-                "Missing trailing `\"` symbol to terminate the byte string literal"
-            }
-            TokenizeError::UnterminatedRawString => {
-                "Missing trailing `\"` with `#` symbols to terminate the raw string literal"
-            }
-            TokenizeError::UnterminatedRawByteString => {
-                "Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"
-            }
-            TokenizeError::UnstartedRawString => {
-                "Missing `\"` symbol after `#` symbols to begin the raw string literal"
-            }
-            TokenizeError::UnstartedRawByteString => {
-                "Missing `\"` symbol after `#` symbols to begin the raw byte string literal"
-            }
-            TokenizeError::LifetimeStartsWithNumber => {
-                "Lifetime name cannot start with a number"
-            }
-        };
-        write!(f, "{}", msg)
-    }
-}
-impl fmt::Display for EscapeError {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        let msg = match self {
-            EscapeError::ZeroChars => "Empty literal",
-            EscapeError::MoreThanOneChar => "Literal should be one character long",
-            EscapeError::LoneSlash => "Character must be escaped: '\\'",
-            EscapeError::InvalidEscape => "Invalid escape sequence",
-            EscapeError::BareCarriageReturn => "Character must be escaped: '\r'",
-            EscapeError::EscapeOnlyChar => "Character must be escaped",
-            EscapeError::TooShortHexEscape => "Escape sequence should have two digits",
-            EscapeError::InvalidCharInHexEscape => "Escape sequence should be a hexadecimal number",
-            EscapeError::OutOfRangeHexEscape => "Escape sequence should be ASCII",
-            EscapeError::NoBraceInUnicodeEscape => "Invalid escape sequence",
-            EscapeError::InvalidCharInUnicodeEscape => "Invalid escape sequence",
-            EscapeError::EmptyUnicodeEscape => "Invalid escape sequence",
-            EscapeError::UnclosedUnicodeEscape => "Missing '}'",
-            EscapeError::LeadingUnderscoreUnicodeEscape => "Invalid escape sequence",
-            EscapeError::OverlongUnicodeEscape => {
-                "Unicode escape sequence should have at most 6 digits"
-            }
-            EscapeError::LoneSurrogateUnicodeEscape => {
-                "Unicode escape code should not be a surrogate"
-            }
-            EscapeError::OutOfRangeUnicodeEscape => {
-                "Unicode escape code should be at most 0x10FFFF"
-            }
-            EscapeError::UnicodeEscapeInByte => "Unicode escapes are not allowed in bytes",
-            EscapeError::NonAsciiCharInByte => "Non ASCII characters are not allowed in bytes",
-        };
-        write!(f, "{}", msg)
-    }
-}
-impl From<EscapeError> for SyntaxErrorKind {
-    fn from(err: EscapeError) -> Self {
-        SyntaxErrorKind::EscapeError(err)
    }
 }
diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs
index 7c2b18af3..4e3a1460d 100644
--- a/crates/ra_syntax/src/syntax_node.rs
+++ b/crates/ra_syntax/src/syntax_node.rs
@@ -6,13 +6,9 @@
 //! The *real* implementation is in the (language-agnostic) `rowan` crate, this
 //! module just wraps its API.
-use ra_parser::ParseError;
 use rowan::{GreenNodeBuilder, Language};
-use crate::{
+use crate::{Parse, SmolStr, SyntaxError, SyntaxKind, TextUnit};
-    syntax_error::{SyntaxError, SyntaxErrorKind},
-    Parse, SmolStr, SyntaxKind, TextUnit,
-};
 pub(crate) use rowan::{GreenNode, GreenToken};
@@ -73,8 +69,7 @@ impl SyntaxTreeBuilder {
        self.inner.finish_node()
    }
-    pub fn error(&mut self, error: ParseError, text_pos: TextUnit) {
+    pub fn error(&mut self, error: ra_parser::ParseError, text_pos: TextUnit) {
-        let error = SyntaxError::new(SyntaxErrorKind::ParseError(error), text_pos);
+        self.errors.push(SyntaxError::new_at_offset(error.0, text_pos))
-        self.errors.push(error)
    }
 }
diff --git a/crates/ra_syntax/src/tests.rs b/crates/ra_syntax/src/tests.rs
index fb22b9e54..912e6aec0 100644
--- a/crates/ra_syntax/src/tests.rs
+++ b/crates/ra_syntax/src/tests.rs
@@ -5,7 +5,7 @@ use std::{
 use test_utils::{collect_tests, dir_tests, project_dir, read_text};
-use crate::{fuzz, tokenize, Location, SourceFile, SyntaxError, TextRange, Token};
+use crate::{fuzz, tokenize, SourceFile, SyntaxError, Token};
 #[test]
 fn lexer_tests() {
@@ -128,25 +128,8 @@ fn dump_tokens_and_errors(tokens: &[Token], errors: &[SyntaxError], text: &str)
        writeln!(acc, "{:?} {} {:?}", token.kind, token_len, token_text).unwrap();
    }
    for err in errors {
-        let err_range = location_to_range(err.location());
+        writeln!(acc, "> error{:?} token({:?}) msg({})", err.range(), &text[err.range()], err)
-        writeln!(
+            .unwrap();
-            acc,
-            "> error{:?} token({:?}) msg({})",
-            err.location(),
-            &text[err_range],
-            err.kind()
-        )
-        .unwrap();
-    }
-    return acc;
-    // FIXME: copy-pasted this from `ra_ide/src/diagnostics.rs`
-    // `Location` will be refactored soon in new PR, see todos here:
-    // https://github.com/rust-analyzer/rust-analyzer/issues/223
-    fn location_to_range(location: Location) -> TextRange {
-        match location {
-            Location::Offset(offset) => TextRange::offset_len(offset, 1.into()),
-            Location::Range(range) => range,
-        }
    }
+    acc
 }
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs
index 8a5f0e4b7..863859dca 100644
--- a/crates/ra_syntax/src/validation.rs
+++ b/crates/ra_syntax/src/validation.rs
@@ -5,92 +5,76 @@ mod block;
 use rustc_lexer::unescape;
 use crate::{
-    ast, match_ast, AstNode, SyntaxError, SyntaxErrorKind,
+    ast, match_ast, AstNode, SyntaxError,
    SyntaxKind::{BYTE, BYTE_STRING, CHAR, CONST_DEF, FN_DEF, INT_NUMBER, STRING, TYPE_ALIAS_DEF},
    SyntaxNode, SyntaxToken, TextUnit, T,
 };
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+fn rustc_unescape_error_to_string(err: unescape::EscapeError) -> &'static str {
-pub enum EscapeError {
+    use unescape::EscapeError as EE;
-    ZeroChars,
-    MoreThanOneChar,
-    LoneSlash,
-    InvalidEscape,
-    BareCarriageReturn,
-    EscapeOnlyChar,
-    TooShortHexEscape,
-    InvalidCharInHexEscape,
-    OutOfRangeHexEscape,
-    NoBraceInUnicodeEscape,
-    InvalidCharInUnicodeEscape,
-    EmptyUnicodeEscape,
-    UnclosedUnicodeEscape,
-    LeadingUnderscoreUnicodeEscape,
-    OverlongUnicodeEscape,
-    LoneSurrogateUnicodeEscape,
-    OutOfRangeUnicodeEscape,
-    UnicodeEscapeInByte,
-    NonAsciiCharInByte,
-}
-impl From<rustc_lexer::unescape::EscapeError> for EscapeError {
+    #[rustfmt::skip]
-    fn from(err: rustc_lexer::unescape::EscapeError) -> Self {
+    let err_message = match err {
-        match err {
+        EE::ZeroChars => {
-            rustc_lexer::unescape::EscapeError::ZeroChars => EscapeError::ZeroChars,
+            "Literal must not be empty"
-            rustc_lexer::unescape::EscapeError::MoreThanOneChar => EscapeError::MoreThanOneChar,
-            rustc_lexer::unescape::EscapeError::LoneSlash => EscapeError::LoneSlash,
-            rustc_lexer::unescape::EscapeError::InvalidEscape => EscapeError::InvalidEscape,
-            rustc_lexer::unescape::EscapeError::BareCarriageReturn
-            | rustc_lexer::unescape::EscapeError::BareCarriageReturnInRawString => {
-                EscapeError::BareCarriageReturn
-            }
-            rustc_lexer::unescape::EscapeError::EscapeOnlyChar => EscapeError::EscapeOnlyChar,
-            rustc_lexer::unescape::EscapeError::TooShortHexEscape => EscapeError::TooShortHexEscape,
-            rustc_lexer::unescape::EscapeError::InvalidCharInHexEscape => {
-                EscapeError::InvalidCharInHexEscape
-            }
-            rustc_lexer::unescape::EscapeError::OutOfRangeHexEscape => {
-                EscapeError::OutOfRangeHexEscape
-            }
-            rustc_lexer::unescape::EscapeError::NoBraceInUnicodeEscape => {
-                EscapeError::NoBraceInUnicodeEscape
-            }
-            rustc_lexer::unescape::EscapeError::InvalidCharInUnicodeEscape => {
-                EscapeError::InvalidCharInUnicodeEscape
-            }
-            rustc_lexer::unescape::EscapeError::EmptyUnicodeEscape => {
-                EscapeError::EmptyUnicodeEscape
-            }
-            rustc_lexer::unescape::EscapeError::UnclosedUnicodeEscape => {
-                EscapeError::UnclosedUnicodeEscape
-            }
-            rustc_lexer::unescape::EscapeError::LeadingUnderscoreUnicodeEscape => {
-                EscapeError::LeadingUnderscoreUnicodeEscape
-            }
-            rustc_lexer::unescape::EscapeError::OverlongUnicodeEscape => {
-                EscapeError::OverlongUnicodeEscape
-            }
-            rustc_lexer::unescape::EscapeError::LoneSurrogateUnicodeEscape => {
-                EscapeError::LoneSurrogateUnicodeEscape
-            }
-            rustc_lexer::unescape::EscapeError::OutOfRangeUnicodeEscape => {
-                EscapeError::OutOfRangeUnicodeEscape
-            }
-            rustc_lexer::unescape::EscapeError::UnicodeEscapeInByte => {
-                EscapeError::UnicodeEscapeInByte
-            }
-            rustc_lexer::unescape::EscapeError::NonAsciiCharInByte
-            | rustc_lexer::unescape::EscapeError::NonAsciiCharInByteString => {
-                EscapeError::NonAsciiCharInByte
-            }
        }
-    }
+        EE::MoreThanOneChar => {
-}
+            "Literal must be one character long"
+        }
+        EE::LoneSlash => {
+            "Character must be escaped: `\\`"
+        }
+        EE::InvalidEscape => {
+            "Invalid escape"
+        }
+        EE::BareCarriageReturn | EE::BareCarriageReturnInRawString => {
+            "Character must be escaped: `\r`"
+        }
+        EE::EscapeOnlyChar => {
+            "Escape character `\\` must be escaped itself"
+        }
+        EE::TooShortHexEscape => {
+            "ASCII hex escape code must have exactly two digits"
+        }
+        EE::InvalidCharInHexEscape => {
+            "ASCII hex escape code must contain only hex characters"
+        }
+        EE::OutOfRangeHexEscape => {
+            "ASCII hex escape code must be at most 0x7F"
+        }
+        EE::NoBraceInUnicodeEscape => {
+            "Missing `{` to begin the unicode escape"
+        }
+        EE::InvalidCharInUnicodeEscape => {
+            "Unicode escape must contain only hex characters and underscores"
+        }
+        EE::EmptyUnicodeEscape => {
+            "Unicode escape must not be empty"
+        }
+        EE::UnclosedUnicodeEscape => {
+            "Missing '}' to terminate the unicode escape"
+        }
+        EE::LeadingUnderscoreUnicodeEscape => {
+            "Unicode escape code must not begin with an underscore"
+        }
+        EE::OverlongUnicodeEscape => {
+            "Unicode escape code must have at most 6 digits"
+        }
+        EE::LoneSurrogateUnicodeEscape => {
+            "Unicode escape code must not be a surrogate"
+        }
+        EE::OutOfRangeUnicodeEscape => {
+            "Unicode escape code must be at most 0x10FFFF"
+        }
+        EE::UnicodeEscapeInByte => {
+            "Byte literals must not contain unicode escapes"
+        }
+        EE::NonAsciiCharInByte | EE::NonAsciiCharInByteString => {
+            "Byte literals must not contain non-ASCII characters"
+        }
+    };
-impl From<rustc_lexer::unescape::EscapeError> for SyntaxErrorKind {
+    err_message
-    fn from(err: rustc_lexer::unescape::EscapeError) -> Self {
-        SyntaxErrorKind::EscapeError(err.into())
-    }
 }
 pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
@@ -118,6 +102,7 @@ pub(crate) fn validate(root: &SyntaxNode) -> Vec<SyntaxError> {
 }
 fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
+    // FIXME: move this function to outer scope (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366196658)
    fn unquote(text: &str, prefix_len: usize, end_delimiter: char) -> Option<&str> {
        text.rfind(end_delimiter).and_then(|end| text.get(prefix_len..end))
    }
@@ -125,9 +110,10 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) {
    let token = literal.token();
    let text = token.text().as_str();
+    // FIXME: lift this lambda refactor to `fn` (https://github.com/rust-analyzer/rust-analyzer/pull/2834#discussion_r366199205)
    let mut push_err = |prefix_len, (off, err): (usize, unescape::EscapeError)| {
        let off = token.text_range().start() + TextUnit::from_usize(off + prefix_len);
-        acc.push(SyntaxError::new(err.into(), off));
+        acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off));
    };
    match token.kind() {
@@ -195,7 +181,8 @@ fn validate_numeric_name(name_ref: Option<ast::NameRef>, errors: &mut Vec<Syntax
    if let Some(int_token) = int_token(name_ref) {
        if int_token.text().chars().any(|c| !c.is_digit(10)) {
            errors.push(SyntaxError::new(
-                SyntaxErrorKind::InvalidTupleIndexFormat,
+                "Tuple (struct) field access is only allowed through \
+                decimal integers with no underscores or suffix",
                int_token.text_range(),
            ));
        }
@@ -215,21 +202,21 @@ fn validate_visibility(vis: ast::Visibility, errors: &mut Vec<SyntaxError>) {
        FN_DEF | CONST_DEF | TYPE_ALIAS_DEF => (),
        _ => return,
    }
    let impl_block = match parent.parent().and_then(|it| it.parent()).and_then(ast::ImplBlock::cast)
    {
        Some(it) => it,
        None => return,
    };
    if impl_block.target_trait().is_some() {
-        errors
+        errors.push(SyntaxError::new("Unnecessary visibility qualifier", vis.syntax.text_range()));
-            .push(SyntaxError::new(SyntaxErrorKind::VisibilityNotAllowed, vis.syntax.text_range()))
    }
 }
 fn validate_range_expr(expr: ast::RangeExpr, errors: &mut Vec<SyntaxError>) {
    if expr.op_kind() == Some(ast::RangeOp::Inclusive) && expr.end().is_none() {
        errors.push(SyntaxError::new(
-            SyntaxErrorKind::InclusiveRangeMissingEnd,
+            "An inclusive range must have an end expression",
            expr.syntax().text_range(),
        ));
    }
diff --git a/crates/ra_syntax/src/validation/block.rs b/crates/ra_syntax/src/validation/block.rs
index c85bbc1f4..8e962ab5b 100644
--- a/crates/ra_syntax/src/validation/block.rs
+++ b/crates/ra_syntax/src/validation/block.rs
@@ -1,9 +1,8 @@
-//! FIXME: write short doc here
+//! Logic for validating block expressions i.e. `ast::BlockExpr`.
 use crate::{
    ast::{self, AstNode, AttrsOwner},
    SyntaxError,
-    SyntaxErrorKind::*,
    SyntaxKind::*,
 };
@@ -15,10 +14,11 @@ pub(crate) fn validate_block_expr(expr: ast::BlockExpr, errors: &mut Vec<SyntaxE
        }
    }
    if let Some(block) = expr.block() {
-        errors.extend(
+        errors.extend(block.attrs().map(|attr| {
-            block
+            SyntaxError::new(
-                .attrs()
+                "A block in this position cannot accept inner attributes",
-                .map(|attr| SyntaxError::new(InvalidBlockAttr, attr.syntax().text_range())),
+                attr.syntax().text_range(),
-        )
+            )
+        }))
    }
 }
author	bors[bot] <26634292+bors[bot]@users.noreply.github.com>	2020-02-18 12:57:26 +0000
committer	GitHub <[email protected]>	2020-02-18 12:57:26 +0000
commit	c447fe9bc06006a7080da782cf67d739c91b534c (patch)
tree	45cbc9578b24437da3eedc6a234784be22b1f38c /crates/ra_syntax/src
parent	742459c8fe08e359ae380e3e1dc0d059c0b4f871 (diff)
parent	053ccf4121797e4e559e3225d46d3f23cb1ad70b (diff)