aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/parsing/lexer.rs
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2020-04-25 11:16:02 +0100
committerGitHub <[email protected]>2020-04-25 11:16:02 +0100
commit29fc409e7fe5b12dcf6bfbcca622d79c4c8fcb72 (patch)
treee42f7bd1490bca66e0786d5bf2b3194aeaa57a93 /crates/ra_syntax/src/parsing/lexer.rs
parent27a7718880d93f55f905da606d108d3b3c682ab4 (diff)
parente87346950039a54c3f0b02d6056cbb92ca38eb28 (diff)
Merge #4131
4131: Switch to text-size r=matklad a=matklad Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_syntax/src/parsing/lexer.rs')
-rw-r--r--crates/ra_syntax/src/parsing/lexer.rs32
1 files changed, 17 insertions, 15 deletions
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index 67c1f1b48..f450ef4a2 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -1,10 +1,12 @@
1//! Lexer analyzes raw input string and produces lexemes (tokens). 1//! Lexer analyzes raw input string and produces lexemes (tokens).
2//! It is just a bridge to `rustc_lexer`. 2//! It is just a bridge to `rustc_lexer`.
3 3
4use std::convert::TryInto;
5
4use crate::{ 6use crate::{
5 SyntaxError, 7 SyntaxError,
6 SyntaxKind::{self, *}, 8 SyntaxKind::{self, *},
7 TextRange, TextUnit, T, 9 TextRange, TextSize, T,
8}; 10};
9 11
10/// A token of Rust source. 12/// A token of Rust source.
@@ -13,7 +15,7 @@ pub struct Token {
13 /// The kind of token. 15 /// The kind of token.
14 pub kind: SyntaxKind, 16 pub kind: SyntaxKind,
15 /// The length of the token. 17 /// The length of the token.
16 pub len: TextUnit, 18 pub len: TextSize,
17} 19}
18 20
19/// Break a string up into its component tokens. 21/// Break a string up into its component tokens.
@@ -28,18 +30,19 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
28 let mut tokens = Vec::new(); 30 let mut tokens = Vec::new();
29 let mut errors = Vec::new(); 31 let mut errors = Vec::new();
30 32
31 let mut offset: usize = rustc_lexer::strip_shebang(text) 33 let mut offset = match rustc_lexer::strip_shebang(text) {
32 .map(|shebang_len| { 34 Some(shebang_len) => {
33 tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) }); 35 tokens.push(Token { kind: SHEBANG, len: shebang_len.try_into().unwrap() });
34 shebang_len 36 shebang_len
35 }) 37 }
36 .unwrap_or(0); 38 None => 0,
39 };
37 40
38 let text_without_shebang = &text[offset..]; 41 let text_without_shebang = &text[offset..];
39 42
40 for rustc_token in rustc_lexer::tokenize(text_without_shebang) { 43 for rustc_token in rustc_lexer::tokenize(text_without_shebang) {
41 let token_len = TextUnit::from_usize(rustc_token.len); 44 let token_len: TextSize = rustc_token.len.try_into().unwrap();
42 let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); 45 let token_range = TextRange::at(offset.try_into().unwrap(), token_len);
43 46
44 let (syntax_kind, err_message) = 47 let (syntax_kind, err_message) =
45 rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); 48 rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
@@ -65,7 +68,7 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
65/// Beware that unescape errors are not checked at tokenization time. 68/// Beware that unescape errors are not checked at tokenization time.
66pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> { 69pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> {
67 lex_first_token(text) 70 lex_first_token(text)
68 .filter(|(token, _)| token.len == TextUnit::of_str(text)) 71 .filter(|(token, _)| token.len == TextSize::of(text))
69 .map(|(token, error)| (token.kind, error)) 72 .map(|(token, error)| (token.kind, error))
70} 73}
71 74
@@ -75,7 +78,7 @@ pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxEr
75/// Beware that unescape errors are not checked at tokenization time. 78/// Beware that unescape errors are not checked at tokenization time.
76pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind> { 79pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind> {
77 lex_first_token(text) 80 lex_first_token(text)
78 .filter(|(token, error)| !error.is_some() && token.len == TextUnit::of_str(text)) 81 .filter(|(token, error)| !error.is_some() && token.len == TextSize::of(text))
79 .map(|(token, _error)| token.kind) 82 .map(|(token, _error)| token.kind)
80} 83}
81 84
@@ -96,10 +99,9 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
96 let rustc_token = rustc_lexer::first_token(text); 99 let rustc_token = rustc_lexer::first_token(text);
97 let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); 100 let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
98 101
99 let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; 102 let token = Token { kind: syntax_kind, len: rustc_token.len.try_into().unwrap() };
100 let optional_error = err_message.map(|err_message| { 103 let optional_error = err_message
101 SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text))) 104 .map(|err_message| SyntaxError::new(err_message, TextRange::up_to(TextSize::of(text))));
102 });
103 105
104 Some((token, optional_error)) 106 Some((token, optional_error))
105} 107}