1 files changed, 165 insertions, 148 deletions
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index bf6b4d637..55755be18 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -16,55 +16,21 @@ pub struct Token {
    pub len: TextUnit,
 }
-/// Represents the result of parsing one token. Beware that the token may be malformed.
-#[derive(Debug)]
-pub struct ParsedToken {
-    /// Parsed token.
-    pub token: Token,
-    /// If error is present then parsed token is malformed.
-    pub error: Option<SyntaxError>,
-}
-#[derive(Debug, Default)]
-/// Represents the result of parsing source code of Rust language.
-pub struct ParsedTokens {
-    /// Parsed tokens in order they appear in source code.
-    pub tokens: Vec<Token>,
-    /// Collection of all occured tokenization errors.
-    /// In general `self.errors.len() <= self.tokens.len()`
-    pub errors: Vec<SyntaxError>,
-}
-impl ParsedTokens {
-    /// Append `token` and `error` (if pressent) to the result.
-    pub fn push(&mut self, ParsedToken { token, error }: ParsedToken) {
-        self.tokens.push(token);
-        if let Some(error) = error {
-            self.errors.push(error)
-        }
-    }
-}
-/// Same as `tokenize_append()`, just a shortcut for creating `ParsedTokens`
-/// and returning the result the usual way.
-pub fn tokenize(text: &str) -> ParsedTokens {
-    let mut parsed = ParsedTokens::default();
-    tokenize_append(text, &mut parsed);
-    parsed
-}
 /// Break a string up into its component tokens.
-/// Writes to `ParsedTokens` which are basically a pair `(Vec<Token>, Vec<SyntaxError>)`.
 /// Beware that it checks for shebang first and its length contributes to resulting
 /// tokens offsets.
-pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) {
+pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
    // non-empty string is a precondtion of `rustc_lexer::strip_shebang()`.
    if text.is_empty() {
-        return;
+        return Default::default();
    }
+    let mut tokens = Vec::new();
+    let mut errors = Vec::new();
    let mut offset: usize = rustc_lexer::strip_shebang(text)
        .map(|shebang_len| {
-            parsed.tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) });
+            tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) });
            shebang_len
        })
        .unwrap_or(0);
@@ -72,35 +38,76 @@ pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) {
    let text_without_shebang = &text[offset..];
    for rustc_token in rustc_lexer::tokenize(text_without_shebang) {
-        parsed.push(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from_usize(offset)));
+        let token_len = TextUnit::from_usize(rustc_token.len);
+        let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);
+        let (syntax_kind, error) =
+            rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
+        tokens.push(Token { kind: syntax_kind, len: token_len });
+        if let Some(error) = error {
+            errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range));
+        }
        offset += rustc_token.len;
    }
+    (tokens, errors)
 }
-/// Returns the first encountered token at the beginning of the string.
+/// Returns `SyntaxKind` and `Option<SyntaxError>` of the first token
-/// If the string contains zero or *two or more tokens* returns `None`.
+/// encountered at the beginning of the string.
+///
+/// Returns `None` if the string contains zero *or two or more* tokens.
+/// The token is malformed if the returned error is not `None`.
+///
+/// Beware that unescape errors are not checked at tokenization time.
+pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> {
+    first_token(text)
+        .filter(|(token, _)| token.len.to_usize() == text.len())
+        .map(|(token, error)| (token.kind, error))
+}
+/// The same as `single_syntax_kind()` but returns only `SyntaxKind` and
+/// returns `None` if any tokenization error occured.
 ///
-/// The main difference between `first_token()` and `single_token()` is that
+/// Beware that unescape errors are not checked at tokenization time.
-/// the latter returns `None` if the string contains more than one token.
+pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind> {
-pub fn single_token(text: &str) -> Option<ParsedToken> {
+    first_token(text)
-    first_token(text).filter(|parsed| parsed.token.len.to_usize() == text.len())
+        .filter(|(token, error)| !error.is_some() && token.len.to_usize() == text.len())
+        .map(|(token, _error)| token.kind)
 }
 /// Returns the first encountered token at the beginning of the string.
-/// If the string contains zero tokens returns `None`.
 ///
-/// The main difference between `first_token() and single_token()` is that
+/// Returns `None` if the string contains zero tokens or if the token was parsed
-/// the latter returns `None` if the string contains more than one token.
+/// with an error.
-pub fn first_token(text: &str) -> Option<ParsedToken> {
+///
+/// Beware that unescape errors are not checked at tokenization time.
+fn first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
    // non-empty string is a precondtion of `rustc_lexer::first_token()`.
    if text.is_empty() {
-        None
+        return None;
-    } else {
-        let rustc_token = rustc_lexer::first_token(text);
-        Some(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from(0)))
    }
+    let rustc_token = rustc_lexer::first_token(text);
+    let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
+    let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };
+    let error = error.map(|error| {
+        SyntaxError::new(
+            SyntaxErrorKind::TokenizeError(error),
+            TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)),
+        )
+    });
+    Some((token, error))
 }
+// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice:
+// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067
 /// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
 /// It describes all the types of errors that may happen during the tokenization
 /// of Rust source.
@@ -136,122 +143,132 @@ pub enum TokenizeError {
    LifetimeStartsWithNumber,
 }
-/// Mapper function that converts `rustc_lexer::Token` with some additional context
+fn rustc_token_kind_to_syntax_kind(
-/// to `ParsedToken`
+    rustc_token_kind: &rustc_lexer::TokenKind,
-fn rustc_token_to_parsed_token(
+    token_text: &str,
-    rustc_token: &rustc_lexer::Token,
+) -> (SyntaxKind, Option<TokenizeError>) {
-    text: &str,
+    // A note on an intended tradeoff:
-    token_start_offset: TextUnit,
-) -> ParsedToken {
    // We drop some useful infromation here (see patterns with double dots `..`)
    // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
-    // being `u16` that come from `rowan::SyntaxKind` type and changes to `rowan::SyntaxKind`
+    // being `u16` that come from `rowan::SyntaxKind`.
-    // would mean hell of a rewrite
-    let token_range =
+    let syntax_kind = {
-        TextRange::offset_len(token_start_offset, TextUnit::from_usize(rustc_token.len));
-    let token_text = &text[token_range];
-    let (syntax_kind, error) = {
        use rustc_lexer::TokenKind as TK;
        use TokenizeError as TE;
-        match rustc_token.kind {
+        match rustc_token_kind {
-            TK::LineComment => ok(COMMENT),
+            TK::LineComment => COMMENT,
-            TK::BlockComment { terminated } => {
-                ok_if(terminated, COMMENT, TE::UnterminatedBlockComment)
+            TK::BlockComment { terminated: true } => COMMENT,
+            TK::BlockComment { terminated: false } => {
+                return (COMMENT, Some(TE::UnterminatedBlockComment));
            }
-            TK::Whitespace => ok(WHITESPACE),
-            TK::Ident => ok(if token_text == "_" {
+            TK::Whitespace => WHITESPACE,
-                UNDERSCORE
-            } else {
+            TK::Ident => {
-                SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
+                if token_text == "_" {
-            }),
+                    UNDERSCORE
-            TK::RawIdent => ok(IDENT),
+                } else {
-            TK::Literal { kind, .. } => match_literal_kind(&kind),
+                    SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
-            TK::Lifetime { starts_with_number } => {
+                }
-                ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber)
            }
-            TK::Semi => ok(SEMI),
-            TK::Comma => ok(COMMA),
-            TK::Dot => ok(DOT),
-            TK::OpenParen => ok(L_PAREN),
-            TK::CloseParen => ok(R_PAREN),
-            TK::OpenBrace => ok(L_CURLY),
-            TK::CloseBrace => ok(R_CURLY),
-            TK::OpenBracket => ok(L_BRACK),
-            TK::CloseBracket => ok(R_BRACK),
-            TK::At => ok(AT),
-            TK::Pound => ok(POUND),
-            TK::Tilde => ok(TILDE),
-            TK::Question => ok(QUESTION),
-            TK::Colon => ok(COLON),
-            TK::Dollar => ok(DOLLAR),
-            TK::Eq => ok(EQ),
-            TK::Not => ok(EXCL),
-            TK::Lt => ok(L_ANGLE),
-            TK::Gt => ok(R_ANGLE),
-            TK::Minus => ok(MINUS),
-            TK::And => ok(AMP),
-            TK::Or => ok(PIPE),
-            TK::Plus => ok(PLUS),
-            TK::Star => ok(STAR),
-            TK::Slash => ok(SLASH),
-            TK::Caret => ok(CARET),
-            TK::Percent => ok(PERCENT),
-            TK::Unknown => ok(ERROR),
-        }
-    };
-    return ParsedToken {
+            TK::RawIdent => IDENT,
-        token: Token { kind: syntax_kind, len: token_range.len() },
+            TK::Literal { kind, .. } => return match_literal_kind(&kind),
-        error: error
-            .map(|error| SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)),
+            TK::Lifetime { starts_with_number: false } => LIFETIME,
+            TK::Lifetime { starts_with_number: true } => {
+                return (LIFETIME, Some(TE::LifetimeStartsWithNumber))
+            }
+            TK::Semi => SEMI,
+            TK::Comma => COMMA,
+            TK::Dot => DOT,
+            TK::OpenParen => L_PAREN,
+            TK::CloseParen => R_PAREN,
+            TK::OpenBrace => L_CURLY,
+            TK::CloseBrace => R_CURLY,
+            TK::OpenBracket => L_BRACK,
+            TK::CloseBracket => R_BRACK,
+            TK::At => AT,
+            TK::Pound => POUND,
+            TK::Tilde => TILDE,
+            TK::Question => QUESTION,
+            TK::Colon => COLON,
+            TK::Dollar => DOLLAR,
+            TK::Eq => EQ,
+            TK::Not => EXCL,
+            TK::Lt => L_ANGLE,
+            TK::Gt => R_ANGLE,
+            TK::Minus => MINUS,
+            TK::And => AMP,
+            TK::Or => PIPE,
+            TK::Plus => PLUS,
+            TK::Star => STAR,
+            TK::Slash => SLASH,
+            TK::Caret => CARET,
+            TK::Percent => PERCENT,
+            TK::Unknown => ERROR,
+        }
    };
-    type ParsedSyntaxKind = (SyntaxKind, Option<TokenizeError>);
+    return (syntax_kind, None);
-    fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind {
+    fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) {
        use rustc_lexer::LiteralKind as LK;
        use TokenizeError as TE;
-        match *kind {
+        #[rustfmt::skip]
-            LK::Int { empty_int, .. } => ok_if(!empty_int, INT_NUMBER, TE::EmptyInt),
+        let syntax_kind = match *kind {
-            LK::Float { empty_exponent, .. } => {
+            LK::Int { empty_int: false, .. } => INT_NUMBER,
-                ok_if(!empty_exponent, FLOAT_NUMBER, TE::EmptyExponent)
+            LK::Int { empty_int: true, .. } => {
+                return (INT_NUMBER, Some(TE::EmptyInt))
+            }
+            LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
+            LK::Float { empty_exponent: true, .. } => {
+                return (FLOAT_NUMBER, Some(TE::EmptyExponent))
+            }
+            LK::Char { terminated: true } => CHAR,
+            LK::Char { terminated: false } => {
+                return (CHAR, Some(TE::UnterminatedChar))
+            }
+            LK::Byte { terminated: true } => BYTE,
+            LK::Byte { terminated: false } => {
+                return (BYTE, Some(TE::UnterminatedByte))
            }
-            LK::Char { terminated } => ok_if(terminated, CHAR, TE::UnterminatedChar),
-            LK::Byte { terminated } => ok_if(terminated, BYTE, TE::UnterminatedByte),
+            LK::Str { terminated: true } => STRING,
-            LK::Str { terminated } => ok_if(terminated, STRING, TE::UnterminatedString),
+            LK::Str { terminated: false } => {
-            LK::ByteStr { terminated } => {
+                return (STRING, Some(TE::UnterminatedString))
-                ok_if(terminated, BYTE_STRING, TE::UnterminatedByteString)
+            }
+            LK::ByteStr { terminated: true } => BYTE_STRING,
+            LK::ByteStr { terminated: false } => {
+                return (BYTE_STRING, Some(TE::UnterminatedByteString))
            }
-            LK::RawStr { started: true, terminated, .. } => {
+            LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,
-                ok_if(terminated, RAW_STRING, TE::UnterminatedRawString)
+            LK::RawStr { started: true, terminated: false, .. } => {
+                return (RAW_STRING, Some(TE::UnterminatedRawString))
+            }
+            LK::RawStr { started: false, .. } => {
+                return (RAW_STRING, Some(TE::UnstartedRawString))
            }
-            LK::RawStr { started: false, .. } => err(RAW_STRING, TE::UnstartedRawString),
-            LK::RawByteStr { started: true, terminated, .. } => {
+            LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,
-                ok_if(terminated, RAW_BYTE_STRING, TE::UnterminatedRawByteString)
+            LK::RawByteStr { started: true, terminated: false, .. } => {
+                return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString))
            }
            LK::RawByteStr { started: false, .. } => {
-                err(RAW_BYTE_STRING, TE::UnstartedRawByteString)
+                return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString))
            }
-        }
+        };
-    }
-    const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind {
        (syntax_kind, None)
    }
-    const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
-        (syntax_kind, Some(error))
-    }
-    fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
-        if cond {
-            ok(syntax_kind)
-        } else {
-            err(syntax_kind, error)
-        }
-    }
 }

diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index bf6b4d637..55755be18 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -16,55 +16,21 @@ pub struct Token {
16	pub len: TextUnit,	16	pub len: TextUnit,
17	}	17	}
18		18
19	/// Represents the result of parsing one token. Beware that the token may be malformed.
20	#[derive(Debug)]
21	pub struct ParsedToken {
22	/// Parsed token.
23	pub token: Token,
24	/// If error is present then parsed token is malformed.
25	pub error: Option<SyntaxError>,
26	}
27
28	#[derive(Debug, Default)]
29	/// Represents the result of parsing source code of Rust language.
30	pub struct ParsedTokens {
31	/// Parsed tokens in order they appear in source code.
32	pub tokens: Vec<Token>,
33	/// Collection of all occured tokenization errors.
34	/// In general `self.errors.len() <= self.tokens.len()`
35	pub errors: Vec<SyntaxError>,
36	}
37	impl ParsedTokens {
38	/// Append `token` and `error` (if pressent) to the result.
39	pub fn push(&mut self, ParsedToken { token, error }: ParsedToken) {
40	self.tokens.push(token);
41	if let Some(error) = error {
42	self.errors.push(error)
43	}
44	}
45	}
46
47	/// Same as `tokenize_append()`, just a shortcut for creating `ParsedTokens`
48	/// and returning the result the usual way.
49	pub fn tokenize(text: &str) -> ParsedTokens {
50	let mut parsed = ParsedTokens::default();
51	tokenize_append(text, &mut parsed);
52	parsed
53	}
54
55	/// Break a string up into its component tokens.	19	/// Break a string up into its component tokens.
56	/// Writes to `ParsedTokens` which are basically a pair `(Vec<Token>, Vec<SyntaxError>)`.
57	/// Beware that it checks for shebang first and its length contributes to resulting	20	/// Beware that it checks for shebang first and its length contributes to resulting
58	/// tokens offsets.	21	/// tokens offsets.
59	pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) {	22	pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
60	// non-empty string is a precondtion of `rustc_lexer::strip_shebang()`.	23	// non-empty string is a precondtion of `rustc_lexer::strip_shebang()`.
61	if text.is_empty() {	24	if text.is_empty() {
62	return;	25	return Default::default();
63	}	26	}
64		27
		28	let mut tokens = Vec::new();
		29	let mut errors = Vec::new();
		30
65	let mut offset: usize = rustc_lexer::strip_shebang(text)	31	let mut offset: usize = rustc_lexer::strip_shebang(text)
66	.map(\|shebang_len\| {	32	.map(\|shebang_len\| {
67	parsed.tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) });	33	tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) });
68	shebang_len	34	shebang_len
69	})	35	})
70	.unwrap_or(0);	36	.unwrap_or(0);
@@ -72,35 +38,76 @@ pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) {
72	let text_without_shebang = &text[offset..];	38	let text_without_shebang = &text[offset..];
73		39
74	for rustc_token in rustc_lexer::tokenize(text_without_shebang) {	40	for rustc_token in rustc_lexer::tokenize(text_without_shebang) {
75	parsed.push(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from_usize(offset)));	41	let token_len = TextUnit::from_usize(rustc_token.len);
		42	let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);
		43
		44	let (syntax_kind, error) =
		45	rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
		46
		47	tokens.push(Token { kind: syntax_kind, len: token_len });
		48
		49	if let Some(error) = error {
		50	errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range));
		51	}
		52
76	offset += rustc_token.len;	53	offset += rustc_token.len;
77	}	54	}
		55
		56	(tokens, errors)
78	}	57	}
79		58
80	/// Returns the first encountered token at the beginning of the string.	59	/// Returns `SyntaxKind` and `Option<SyntaxError>` of the first token
81	/// If the string contains zero or two or more tokens returns `None`.	60	/// encountered at the beginning of the string.
		61	///
		62	/// Returns `None` if the string contains zero or two or more tokens.
		63	/// The token is malformed if the returned error is not `None`.
		64	///
		65	/// Beware that unescape errors are not checked at tokenization time.
		66	pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> {
		67	first_token(text)
		68	.filter(\|(token, _)\| token.len.to_usize() == text.len())
		69	.map(\|(token, error)\| (token.kind, error))
		70	}
		71
		72	/// The same as `single_syntax_kind()` but returns only `SyntaxKind` and
		73	/// returns `None` if any tokenization error occured.
82	///	74	///
83	/// The main difference between `first_token()` and `single_token()` is that	75	/// Beware that unescape errors are not checked at tokenization time.
84	/// the latter returns `None` if the string contains more than one token.	76	pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind> {
85	pub fn single_token(text: &str) -> Option<ParsedToken> {	77	first_token(text)
86	first_token(text).filter(\|parsed\| parsed.token.len.to_usize() == text.len())	78	.filter(\|(token, error)\| !error.is_some() && token.len.to_usize() == text.len())
		79	.map(\|(token, _error)\| token.kind)
87	}	80	}
88		81
89	/// Returns the first encountered token at the beginning of the string.	82	/// Returns the first encountered token at the beginning of the string.
90	/// If the string contains zero tokens returns `None`.
91	///	83	///
92	/// The main difference between `first_token() and single_token()` is that	84	/// Returns `None` if the string contains zero tokens or if the token was parsed
93	/// the latter returns `None` if the string contains more than one token.	85	/// with an error.
94	pub fn first_token(text: &str) -> Option<ParsedToken> {	86	///
		87	/// Beware that unescape errors are not checked at tokenization time.
		88	fn first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
95	// non-empty string is a precondtion of `rustc_lexer::first_token()`.	89	// non-empty string is a precondtion of `rustc_lexer::first_token()`.
96	if text.is_empty() {	90	if text.is_empty() {
97	None	91	return None;
98	} else {
99	let rustc_token = rustc_lexer::first_token(text);
100	Some(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from(0)))
101	}	92	}
		93
		94	let rustc_token = rustc_lexer::first_token(text);
		95	let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
		96
		97	let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };
		98	let error = error.map(\|error\| {
		99	SyntaxError::new(
		100	SyntaxErrorKind::TokenizeError(error),
		101	TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)),
		102	)
		103	});
		104
		105	Some((token, error))
102	}	106	}
103		107
		108	// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice:
		109	// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067
		110
104	/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.	111	/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
105	/// It describes all the types of errors that may happen during the tokenization	112	/// It describes all the types of errors that may happen during the tokenization
106	/// of Rust source.	113	/// of Rust source.
@@ -136,122 +143,132 @@ pub enum TokenizeError {
136	LifetimeStartsWithNumber,	143	LifetimeStartsWithNumber,
137	}	144	}
138		145
139	/// Mapper function that converts `rustc_lexer::Token` with some additional context	146	fn rustc_token_kind_to_syntax_kind(
140	/// to `ParsedToken`	147	rustc_token_kind: &rustc_lexer::TokenKind,
141	fn rustc_token_to_parsed_token(	148	token_text: &str,
142	rustc_token: &rustc_lexer::Token,	149	) -> (SyntaxKind, Option<TokenizeError>) {
143	text: &str,	150	// A note on an intended tradeoff:
144	token_start_offset: TextUnit,
145	) -> ParsedToken {
146	// We drop some useful infromation here (see patterns with double dots `..`)	151	// We drop some useful infromation here (see patterns with double dots `..`)
147	// Storing that info in `SyntaxKind` is not possible due to its layout requirements of	152	// Storing that info in `SyntaxKind` is not possible due to its layout requirements of
148	// being `u16` that come from `rowan::SyntaxKind` type and changes to `rowan::SyntaxKind`	153	// being `u16` that come from `rowan::SyntaxKind`.
149	// would mean hell of a rewrite
150		154
151	let token_range =	155	let syntax_kind = {
152	TextRange::offset_len(token_start_offset, TextUnit::from_usize(rustc_token.len));
153
154	let token_text = &text[token_range];
155
156	let (syntax_kind, error) = {
157	use rustc_lexer::TokenKind as TK;	156	use rustc_lexer::TokenKind as TK;
158	use TokenizeError as TE;	157	use TokenizeError as TE;
159		158
160	match rustc_token.kind {	159	match rustc_token_kind {
161	TK::LineComment => ok(COMMENT),	160	TK::LineComment => COMMENT,
162	TK::BlockComment { terminated } => {	161
163	ok_if(terminated, COMMENT, TE::UnterminatedBlockComment)	162	TK::BlockComment { terminated: true } => COMMENT,
		163	TK::BlockComment { terminated: false } => {
		164	return (COMMENT, Some(TE::UnterminatedBlockComment));
164	}	165	}
165	TK::Whitespace => ok(WHITESPACE),	166
166	TK::Ident => ok(if token_text == "_" {	167	TK::Whitespace => WHITESPACE,
167	UNDERSCORE	168
168	} else {	169	TK::Ident => {
169	SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)	170	if token_text == "_" {
170	}),	171	UNDERSCORE
171	TK::RawIdent => ok(IDENT),	172	} else {
172	TK::Literal { kind, .. } => match_literal_kind(&kind),	173	SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
173	TK::Lifetime { starts_with_number } => {	174	}
174	ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber)
175	}	175	}
176	TK::Semi => ok(SEMI),
177	TK::Comma => ok(COMMA),
178	TK::Dot => ok(DOT),
179	TK::OpenParen => ok(L_PAREN),
180	TK::CloseParen => ok(R_PAREN),
181	TK::OpenBrace => ok(L_CURLY),
182	TK::CloseBrace => ok(R_CURLY),
183	TK::OpenBracket => ok(L_BRACK),
184	TK::CloseBracket => ok(R_BRACK),
185	TK::At => ok(AT),
186	TK::Pound => ok(POUND),
187	TK::Tilde => ok(TILDE),
188	TK::Question => ok(QUESTION),
189	TK::Colon => ok(COLON),
190	TK::Dollar => ok(DOLLAR),
191	TK::Eq => ok(EQ),
192	TK::Not => ok(EXCL),
193	TK::Lt => ok(L_ANGLE),
194	TK::Gt => ok(R_ANGLE),
195	TK::Minus => ok(MINUS),
196	TK::And => ok(AMP),
197	TK::Or => ok(PIPE),
198	TK::Plus => ok(PLUS),
199	TK::Star => ok(STAR),
200	TK::Slash => ok(SLASH),
201	TK::Caret => ok(CARET),
202	TK::Percent => ok(PERCENT),
203	TK::Unknown => ok(ERROR),
204	}
205	};
206		176
207	return ParsedToken {	177	TK::RawIdent => IDENT,
208	token: Token { kind: syntax_kind, len: token_range.len() },	178	TK::Literal { kind, .. } => return match_literal_kind(&kind),
209	error: error	179
210	.map(\|error\| SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)),	180	TK::Lifetime { starts_with_number: false } => LIFETIME,
		181	TK::Lifetime { starts_with_number: true } => {
		182	return (LIFETIME, Some(TE::LifetimeStartsWithNumber))
		183	}
		184
		185	TK::Semi => SEMI,
		186	TK::Comma => COMMA,
		187	TK::Dot => DOT,
		188	TK::OpenParen => L_PAREN,
		189	TK::CloseParen => R_PAREN,
		190	TK::OpenBrace => L_CURLY,
		191	TK::CloseBrace => R_CURLY,
		192	TK::OpenBracket => L_BRACK,
		193	TK::CloseBracket => R_BRACK,
		194	TK::At => AT,
		195	TK::Pound => POUND,
		196	TK::Tilde => TILDE,
		197	TK::Question => QUESTION,
		198	TK::Colon => COLON,
		199	TK::Dollar => DOLLAR,
		200	TK::Eq => EQ,
		201	TK::Not => EXCL,
		202	TK::Lt => L_ANGLE,
		203	TK::Gt => R_ANGLE,
		204	TK::Minus => MINUS,
		205	TK::And => AMP,
		206	TK::Or => PIPE,
		207	TK::Plus => PLUS,
		208	TK::Star => STAR,
		209	TK::Slash => SLASH,
		210	TK::Caret => CARET,
		211	TK::Percent => PERCENT,
		212	TK::Unknown => ERROR,
		213	}
211	};	214	};
212		215
213	type ParsedSyntaxKind = (SyntaxKind, Option<TokenizeError>);	216	return (syntax_kind, None);
214		217
215	fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind {	218	fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) {
216	use rustc_lexer::LiteralKind as LK;	219	use rustc_lexer::LiteralKind as LK;
217	use TokenizeError as TE;	220	use TokenizeError as TE;
218		221
219	match *kind {	222	#[rustfmt::skip]
220	LK::Int { empty_int, .. } => ok_if(!empty_int, INT_NUMBER, TE::EmptyInt),	223	let syntax_kind = match *kind {
221	LK::Float { empty_exponent, .. } => {	224	LK::Int { empty_int: false, .. } => INT_NUMBER,
222	ok_if(!empty_exponent, FLOAT_NUMBER, TE::EmptyExponent)	225	LK::Int { empty_int: true, .. } => {
		226	return (INT_NUMBER, Some(TE::EmptyInt))
		227	}
		228
		229	LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
		230	LK::Float { empty_exponent: true, .. } => {
		231	return (FLOAT_NUMBER, Some(TE::EmptyExponent))
		232	}
		233
		234	LK::Char { terminated: true } => CHAR,
		235	LK::Char { terminated: false } => {
		236	return (CHAR, Some(TE::UnterminatedChar))
		237	}
		238
		239	LK::Byte { terminated: true } => BYTE,
		240	LK::Byte { terminated: false } => {
		241	return (BYTE, Some(TE::UnterminatedByte))
223	}	242	}
224	LK::Char { terminated } => ok_if(terminated, CHAR, TE::UnterminatedChar),	243
225	LK::Byte { terminated } => ok_if(terminated, BYTE, TE::UnterminatedByte),	244	LK::Str { terminated: true } => STRING,
226	LK::Str { terminated } => ok_if(terminated, STRING, TE::UnterminatedString),	245	LK::Str { terminated: false } => {
227	LK::ByteStr { terminated } => {	246	return (STRING, Some(TE::UnterminatedString))
228	ok_if(terminated, BYTE_STRING, TE::UnterminatedByteString)	247	}
		248
		249
		250	LK::ByteStr { terminated: true } => BYTE_STRING,
		251	LK::ByteStr { terminated: false } => {
		252	return (BYTE_STRING, Some(TE::UnterminatedByteString))
229	}	253	}
230		254
231	LK::RawStr { started: true, terminated, .. } => {	255	LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,
232	ok_if(terminated, RAW_STRING, TE::UnterminatedRawString)	256	LK::RawStr { started: true, terminated: false, .. } => {
		257	return (RAW_STRING, Some(TE::UnterminatedRawString))
		258	}
		259	LK::RawStr { started: false, .. } => {
		260	return (RAW_STRING, Some(TE::UnstartedRawString))
233	}	261	}
234	LK::RawStr { started: false, .. } => err(RAW_STRING, TE::UnstartedRawString),
235		262
236	LK::RawByteStr { started: true, terminated, .. } => {	263	LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,
237	ok_if(terminated, RAW_BYTE_STRING, TE::UnterminatedRawByteString)	264	LK::RawByteStr { started: true, terminated: false, .. } => {
		265	return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString))
238	}	266	}
239	LK::RawByteStr { started: false, .. } => {	267	LK::RawByteStr { started: false, .. } => {
240	err(RAW_BYTE_STRING, TE::UnstartedRawByteString)	268	return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString))
241	}	269	}
242	}	270	};
243	}	271
244	const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind {
245	(syntax_kind, None)	272	(syntax_kind, None)
246	}	273	}
247	const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
248	(syntax_kind, Some(error))
249	}
250	fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
251	if cond {
252	ok(syntax_kind)
253	} else {
254	err(syntax_kind, error)
255	}
256	}
257	}	274	}