diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2020-04-25 11:16:02 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2020-04-25 11:16:02 +0100 |
commit | 29fc409e7fe5b12dcf6bfbcca622d79c4c8fcb72 (patch) | |
tree | e42f7bd1490bca66e0786d5bf2b3194aeaa57a93 /crates/ra_syntax/src/parsing/lexer.rs | |
parent | 27a7718880d93f55f905da606d108d3b3c682ab4 (diff) | |
parent | e87346950039a54c3f0b02d6056cbb92ca38eb28 (diff) |
Merge #4131
4131: Switch to text-size r=matklad a=matklad
Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax/src/parsing/lexer.rs')
-rw-r--r-- | crates/ra_syntax/src/parsing/lexer.rs | 32 |
1 files changed, 17 insertions, 15 deletions
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 67c1f1b48..f450ef4a2 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs | |||
@@ -1,10 +1,12 @@ | |||
1 | //! Lexer analyzes raw input string and produces lexemes (tokens). | 1 | //! Lexer analyzes raw input string and produces lexemes (tokens). |
2 | //! It is just a bridge to `rustc_lexer`. | 2 | //! It is just a bridge to `rustc_lexer`. |
3 | 3 | ||
4 | use std::convert::TryInto; | ||
5 | |||
4 | use crate::{ | 6 | use crate::{ |
5 | SyntaxError, | 7 | SyntaxError, |
6 | SyntaxKind::{self, *}, | 8 | SyntaxKind::{self, *}, |
7 | TextRange, TextUnit, T, | 9 | TextRange, TextSize, T, |
8 | }; | 10 | }; |
9 | 11 | ||
10 | /// A token of Rust source. | 12 | /// A token of Rust source. |
@@ -13,7 +15,7 @@ pub struct Token { | |||
13 | /// The kind of token. | 15 | /// The kind of token. |
14 | pub kind: SyntaxKind, | 16 | pub kind: SyntaxKind, |
15 | /// The length of the token. | 17 | /// The length of the token. |
16 | pub len: TextUnit, | 18 | pub len: TextSize, |
17 | } | 19 | } |
18 | 20 | ||
19 | /// Break a string up into its component tokens. | 21 | /// Break a string up into its component tokens. |
@@ -28,18 +30,19 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) { | |||
28 | let mut tokens = Vec::new(); | 30 | let mut tokens = Vec::new(); |
29 | let mut errors = Vec::new(); | 31 | let mut errors = Vec::new(); |
30 | 32 | ||
31 | let mut offset: usize = rustc_lexer::strip_shebang(text) | 33 | let mut offset = match rustc_lexer::strip_shebang(text) { |
32 | .map(|shebang_len| { | 34 | Some(shebang_len) => { |
33 | tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) }); | 35 | tokens.push(Token { kind: SHEBANG, len: shebang_len.try_into().unwrap() }); |
34 | shebang_len | 36 | shebang_len |
35 | }) | 37 | } |
36 | .unwrap_or(0); | 38 | None => 0, |
39 | }; | ||
37 | 40 | ||
38 | let text_without_shebang = &text[offset..]; | 41 | let text_without_shebang = &text[offset..]; |
39 | 42 | ||
40 | for rustc_token in rustc_lexer::tokenize(text_without_shebang) { | 43 | for rustc_token in rustc_lexer::tokenize(text_without_shebang) { |
41 | let token_len = TextUnit::from_usize(rustc_token.len); | 44 | let token_len: TextSize = rustc_token.len.try_into().unwrap(); |
42 | let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); | 45 | let token_range = TextRange::at(offset.try_into().unwrap(), token_len); |
43 | 46 | ||
44 | let (syntax_kind, err_message) = | 47 | let (syntax_kind, err_message) = |
45 | rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); | 48 | rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); |
@@ -65,7 +68,7 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) { | |||
65 | /// Beware that unescape errors are not checked at tokenization time. | 68 | /// Beware that unescape errors are not checked at tokenization time. |
66 | pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> { | 69 | pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> { |
67 | lex_first_token(text) | 70 | lex_first_token(text) |
68 | .filter(|(token, _)| token.len == TextUnit::of_str(text)) | 71 | .filter(|(token, _)| token.len == TextSize::of(text)) |
69 | .map(|(token, error)| (token.kind, error)) | 72 | .map(|(token, error)| (token.kind, error)) |
70 | } | 73 | } |
71 | 74 | ||
@@ -75,7 +78,7 @@ pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxEr | |||
75 | /// Beware that unescape errors are not checked at tokenization time. | 78 | /// Beware that unescape errors are not checked at tokenization time. |
76 | pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind> { | 79 | pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind> { |
77 | lex_first_token(text) | 80 | lex_first_token(text) |
78 | .filter(|(token, error)| !error.is_some() && token.len == TextUnit::of_str(text)) | 81 | .filter(|(token, error)| !error.is_some() && token.len == TextSize::of(text)) |
79 | .map(|(token, _error)| token.kind) | 82 | .map(|(token, _error)| token.kind) |
80 | } | 83 | } |
81 | 84 | ||
@@ -96,10 +99,9 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> { | |||
96 | let rustc_token = rustc_lexer::first_token(text); | 99 | let rustc_token = rustc_lexer::first_token(text); |
97 | let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); | 100 | let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); |
98 | 101 | ||
99 | let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; | 102 | let token = Token { kind: syntax_kind, len: rustc_token.len.try_into().unwrap() }; |
100 | let optional_error = err_message.map(|err_message| { | 103 | let optional_error = err_message |
101 | SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text))) | 104 | .map(|err_message| SyntaxError::new(err_message, TextRange::up_to(TextSize::of(text)))); |
102 | }); | ||
103 | 105 | ||
104 | Some((token, optional_error)) | 106 | Some((token, optional_error)) |
105 | } | 107 | } |