aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax
diff options
context:
space:
mode:
authorVeetaha <[email protected]>2020-01-26 18:44:49 +0000
committerVeetaha <[email protected]>2020-02-03 22:00:55 +0000
commitac37a11f04b31f792068a1cb50dbbf5ccd4d982d (patch)
tree52542f3e7b7ec9f4cfbedf2245c4fd4bb8cdffcb /crates/ra_syntax
parentad24976da38482948c586bdbc16004273662ff7e (diff)
Reimplemented lexer with vectors instead of iterators
Diffstat (limited to 'crates/ra_syntax')
-rw-r--r--crates/ra_syntax/src/lib.rs2
-rw-r--r--crates/ra_syntax/src/parsing.rs10
-rw-r--r--crates/ra_syntax/src/parsing/lexer.rs304
-rw-r--r--crates/ra_syntax/src/parsing/reparsing.rs52
-rw-r--r--crates/ra_syntax/src/parsing/text_tree_sink.rs6
-rw-r--r--crates/ra_syntax/src/syntax_error.rs42
-rw-r--r--crates/ra_syntax/src/syntax_node.rs9
-rw-r--r--crates/ra_syntax/src/tests.rs3
8 files changed, 241 insertions, 187 deletions
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs
index 9931fec84..80b3a0b22 100644
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@@ -41,7 +41,7 @@ use crate::syntax_node::GreenNode;
41pub use crate::{ 41pub use crate::{
42 algo::InsertPosition, 42 algo::InsertPosition,
43 ast::{AstNode, AstToken}, 43 ast::{AstNode, AstToken},
44 parsing::{classify_literal, tokenize, Token}, 44 parsing::{first_token, single_token, tokenize, tokenize_append, Token, TokenizeError},
45 ptr::{AstPtr, SyntaxNodePtr}, 45 ptr::{AstPtr, SyntaxNodePtr},
46 syntax_error::{Location, SyntaxError, SyntaxErrorKind}, 46 syntax_error::{Location, SyntaxError, SyntaxErrorKind},
47 syntax_node::{ 47 syntax_node::{
diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs
index 0387f0378..4e51f920b 100644
--- a/crates/ra_syntax/src/parsing.rs
+++ b/crates/ra_syntax/src/parsing.rs
@@ -7,15 +7,17 @@ mod text_tree_sink;
7mod reparsing; 7mod reparsing;
8 8
9use crate::{syntax_node::GreenNode, SyntaxError}; 9use crate::{syntax_node::GreenNode, SyntaxError};
10use text_token_source::TextTokenSource;
11use text_tree_sink::TextTreeSink;
10 12
11pub use self::lexer::{classify_literal, tokenize, Token}; 13pub use lexer::*;
12 14
13pub(crate) use self::reparsing::incremental_reparse; 15pub(crate) use self::reparsing::incremental_reparse;
14 16
15pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { 17pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
16 let tokens = tokenize(&text); 18 let ParsedTokens { tokens, errors } = tokenize(&text);
17 let mut token_source = text_token_source::TextTokenSource::new(text, &tokens); 19 let mut token_source = TextTokenSource::new(text, &tokens);
18 let mut tree_sink = text_tree_sink::TextTreeSink::new(text, &tokens); 20 let mut tree_sink = TextTreeSink::new(text, &tokens, errors);
19 ra_parser::parse(&mut token_source, &mut tree_sink); 21 ra_parser::parse(&mut token_source, &mut tree_sink);
20 tree_sink.finish() 22 tree_sink.finish()
21} 23}
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index 9dca7d747..6d96f8400 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -1,10 +1,10 @@
1//! Lexer analyzes raw input string and produces lexemes (tokens). 1//! Lexer analyzes raw input string and produces lexemes (tokens).
2 2//! It is just a bridge to `rustc_lexer`.
3use std::iter::{FromIterator, IntoIterator};
4 3
5use crate::{ 4use crate::{
5 SyntaxError, SyntaxErrorKind,
6 SyntaxKind::{self, *}, 6 SyntaxKind::{self, *},
7 TextUnit, 7 TextRange, TextUnit,
8}; 8};
9 9
10/// A token of Rust source. 10/// A token of Rust source.
@@ -15,93 +15,96 @@ pub struct Token {
15 /// The length of the token. 15 /// The length of the token.
16 pub len: TextUnit, 16 pub len: TextUnit,
17} 17}
18impl Token {
19 pub const fn new(kind: SyntaxKind, len: TextUnit) -> Self {
20 Self { kind, len }
21 }
22}
23 18
24#[derive(Debug)] 19#[derive(Debug)]
25/// Represents the result of parsing one token. 20/// Represents the result of parsing one token. Beware that the token may be malformed.
26pub struct ParsedToken { 21pub struct ParsedToken {
27 /// Parsed token. 22 /// Parsed token.
28 pub token: Token, 23 pub token: Token,
29 /// If error is present then parsed token is malformed. 24 /// If error is present then parsed token is malformed.
30 pub error: Option<TokenizeError>, 25 pub error: Option<SyntaxError>,
31}
32impl ParsedToken {
33 pub const fn new(token: Token, error: Option<TokenizeError>) -> Self {
34 Self { token, error }
35 }
36} 26}
37 27
38#[derive(Debug, Default)] 28#[derive(Debug, Default)]
39/// Represents the result of parsing one token. 29/// Represents the result of parsing source code of Rust language.
40pub struct ParsedTokens { 30pub struct ParsedTokens {
41 /// Parsed token. 31 /// Parsed tokens in order they appear in source code.
42 pub tokens: Vec<Token>, 32 pub tokens: Vec<Token>,
43 /// If error is present then parsed token is malformed. 33 /// Collection of all occured tokenization errors.
44 pub errors: Vec<TokenizeError>, 34 /// In general `self.errors.len() <= self.tokens.len()`
35 pub errors: Vec<SyntaxError>,
45} 36}
46 37impl ParsedTokens {
47impl FromIterator<ParsedToken> for ParsedTokens { 38 /// Append `token` and `error` (if pressent) to the result.
48 fn from_iter<I: IntoIterator<Item = ParsedToken>>(iter: I) -> Self { 39 pub fn push(&mut self, ParsedToken { token, error }: ParsedToken) {
49 let res = Self::default(); 40 self.tokens.push(token);
50 for entry in iter { 41 if let Some(error) = error {
51 res.tokens.push(entry.token); 42 self.errors.push(error)
52 if let Some(error) = entry.error {
53 res.errors.push(error);
54 }
55 } 43 }
56 res
57 } 44 }
58} 45}
59 46
60/// Returns the first encountered token from the string. 47/// Same as `tokenize_append()`, just a shortcut for creating `ParsedTokens`
61/// If the string contains zero or two or more tokens returns `None`. 48/// and returning the result the usual way.
62pub fn single_token(text: &str) -> Option<ParsedToken> { 49pub fn tokenize(text: &str) -> ParsedTokens {
63 // TODO: test whether this condition indeed checks for a single token 50 let mut parsed = ParsedTokens::default();
64 first_token(text).filter(|parsed| parsed.token.len.to_usize() == text.len()) 51 tokenize_append(text, &mut parsed);
52 parsed
65} 53}
66 54
67/* 55/// Break a string up into its component tokens.
68/// Returns `ParsedTokens` which are basically a pair `(Vec<Token>, Vec<TokenizeError>)` 56/// Returns `ParsedTokens` which are basically a pair `(Vec<Token>, Vec<SyntaxError>)`.
69/// This is just a shorthand for `tokenize(text).collect()` 57/// Beware that it checks for shebang first and its length contributes to resulting
70pub fn tokenize_to_vec_with_errors(text: &str) -> ParsedTokens { 58/// tokens offsets.
71 tokenize(text).collect() 59pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) {
72} 60 // non-empty string is a precondtion of `rustc_lexer::strip_shebang()`.
61 if text.is_empty() {
62 return;
63 }
73 64
74/// The simplest version of tokenize, it just retunst a ready-made `Vec<Token>`. 65 let mut offset: usize = rustc_lexer::strip_shebang(text)
75/// It discards all tokenization errors while parsing. If you need that infromation 66 .map(|shebang_len| {
76/// consider using `tokenize()` or `tokenize_to_vec_with_errors()`. 67 parsed.tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) });
77pub fn tokenize_to_vec(text: &str) -> Vec<Token> { 68 shebang_len
78 tokenize(text).map(|parsed_token| parsed_token.token).collect() 69 })
79} 70 .unwrap_or(0);
80*/
81 71
82/// Break a string up into its component tokens 72 let text_without_shebang = &text[offset..];
83/// This is the core function, all other `tokenize*()` functions are simply
84/// handy shortcuts for this one.
85pub fn tokenize(text: &str) -> impl Iterator<Item = ParsedToken> + '_ {
86 let shebang = rustc_lexer::strip_shebang(text).map(|shebang_len| {
87 text = &text[shebang_len..];
88 ParsedToken::new(Token::new(SHEBANG, TextUnit::from_usize(shebang_len)), None)
89 });
90 73
91 // Notice that we eagerly evaluate shebang since it may change text slice 74 for rustc_token in rustc_lexer::tokenize(text_without_shebang) {
92 // and we cannot simplify this into a single method call chain 75 parsed.push(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from_usize(offset)));
93 shebang.into_iter().chain(tokenize_without_shebang(text)) 76 offset += rustc_token.len;
77 }
94} 78}
95 79
96pub fn tokenize_without_shebang(text: &str) -> impl Iterator<Item = ParsedToken> + '_ { 80/// Returns the first encountered token at the beginning of the string.
97 rustc_lexer::tokenize(text).map(|rustc_token| { 81/// If the string contains zero or *two or more tokens* returns `None`.
98 let token_text = &text[..rustc_token.len]; 82///
99 text = &text[rustc_token.len..]; 83/// The main difference between `first_token()` and `single_token()` is that
100 rustc_token_kind_to_parsed_token(&rustc_token.kind, token_text) 84/// the latter returns `None` if the string contains more than one token.
101 }) 85pub fn single_token(text: &str) -> Option<ParsedToken> {
86 first_token(text).filter(|parsed| parsed.token.len.to_usize() == text.len())
102} 87}
103 88
104#[derive(Debug)] 89/// Returns the first encountered token at the beginning of the string.
90/// If the string contains zero tokens returns `None`.
91///
92/// The main difference between `first_token() and single_token()` is that
93/// the latter returns `None` if the string contains more than one token.
94pub fn first_token(text: &str) -> Option<ParsedToken> {
95 // non-empty string is a precondtion of `rustc_lexer::first_token()`.
96 if text.is_empty() {
97 None
98 } else {
99 let rustc_token = rustc_lexer::first_token(text);
100 Some(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from(0)))
101 }
102}
103
104/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
105/// It describes all the types of errors that may happen during the tokenization
106/// of Rust source.
107#[derive(Debug, Clone, PartialEq, Eq, Hash)]
105pub enum TokenizeError { 108pub enum TokenizeError {
106 /// Base prefix was provided, but there were no digits 109 /// Base prefix was provided, but there were no digits
107 /// after it, e.g. `0x`. 110 /// after it, e.g. `0x`.
@@ -124,94 +127,95 @@ pub enum TokenizeError {
124 /// Raw byte string literal lacks trailing delimiter e.g. `"##` 127 /// Raw byte string literal lacks trailing delimiter e.g. `"##`
125 UnterminatedRawByteString, 128 UnterminatedRawByteString,
126 129
127 /// Raw string lacks a quote after pound characters e.g. `r###` 130 /// Raw string lacks a quote after the pound characters e.g. `r###`
128 UnstartedRawString, 131 UnstartedRawString,
129 /// Raw byte string lacks a quote after pound characters e.g. `br###` 132 /// Raw byte string lacks a quote after the pound characters e.g. `br###`
130 UnstartedRawByteString, 133 UnstartedRawByteString,
131 134
132 /// Lifetime starts with a number e.g. `'4ever` 135 /// Lifetime starts with a number e.g. `'4ever`
133 LifetimeStartsWithNumber, 136 LifetimeStartsWithNumber,
134} 137}
135 138
136fn rustc_token_kind_to_parsed_token( 139/// Mapper function that converts `rustc_lexer::Token` with some additional context
137 rustc_token_kind: &rustc_lexer::TokenKind, 140/// to `ParsedToken`
138 token_text: &str, 141fn rustc_token_to_parsed_token(
142 rustc_token: &rustc_lexer::Token,
143 text: &str,
144 token_start_offset: TextUnit,
139) -> ParsedToken { 145) -> ParsedToken {
140 use rustc_lexer::TokenKind as TK;
141 use TokenizeError as TE;
142
143 // We drop some useful infromation here (see patterns with double dots `..`) 146 // We drop some useful infromation here (see patterns with double dots `..`)
144 // Storing that info in `SyntaxKind` is not possible due to its layout requirements of 147 // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
145 // being `u16` that come from `rowan::SyntaxKind` type and changes to `rowan::SyntaxKind` 148 // being `u16` that come from `rowan::SyntaxKind` type and changes to `rowan::SyntaxKind`
146 // would mean hell of a rewrite. 149 // would mean hell of a rewrite
147 150
148 let (syntax_kind, error) = match *rustc_token_kind { 151 let token_range =
149 TK::LineComment => ok(COMMENT), 152 TextRange::offset_len(token_start_offset, TextUnit::from_usize(rustc_token.len));
150 TK::BlockComment { terminated } => ok_if(terminated, COMMENT, TE::UnterminatedBlockComment), 153
151 TK::Whitespace => ok(WHITESPACE), 154 let token_text = &text[token_range];
152 TK::Ident => ok(if token_text == "_" { 155
153 UNDERSCORE 156 let (syntax_kind, error) = {
154 } else { 157 use rustc_lexer::TokenKind as TK;
155 SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) 158 use TokenizeError as TE;
156 }), 159
157 TK::RawIdent => ok(IDENT), 160 match rustc_token.kind {
158 TK::Literal { kind, .. } => match_literal_kind(&kind), 161 TK::LineComment => ok(COMMENT),
159 TK::Lifetime { starts_with_number } => { 162 TK::BlockComment { terminated } => {
160 ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber) 163 ok_if(terminated, COMMENT, TE::UnterminatedBlockComment)
164 }
165 TK::Whitespace => ok(WHITESPACE),
166 TK::Ident => ok(if token_text == "_" {
167 UNDERSCORE
168 } else {
169 SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
170 }),
171 TK::RawIdent => ok(IDENT),
172 TK::Literal { kind, .. } => match_literal_kind(&kind),
173 TK::Lifetime { starts_with_number } => {
174 ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber)
175 }
176 TK::Semi => ok(SEMI),
177 TK::Comma => ok(COMMA),
178 TK::Dot => ok(DOT),
179 TK::OpenParen => ok(L_PAREN),
180 TK::CloseParen => ok(R_PAREN),
181 TK::OpenBrace => ok(L_CURLY),
182 TK::CloseBrace => ok(R_CURLY),
183 TK::OpenBracket => ok(L_BRACK),
184 TK::CloseBracket => ok(R_BRACK),
185 TK::At => ok(AT),
186 TK::Pound => ok(POUND),
187 TK::Tilde => ok(TILDE),
188 TK::Question => ok(QUESTION),
189 TK::Colon => ok(COLON),
190 TK::Dollar => ok(DOLLAR),
191 TK::Eq => ok(EQ),
192 TK::Not => ok(EXCL),
193 TK::Lt => ok(L_ANGLE),
194 TK::Gt => ok(R_ANGLE),
195 TK::Minus => ok(MINUS),
196 TK::And => ok(AMP),
197 TK::Or => ok(PIPE),
198 TK::Plus => ok(PLUS),
199 TK::Star => ok(STAR),
200 TK::Slash => ok(SLASH),
201 TK::Caret => ok(CARET),
202 TK::Percent => ok(PERCENT),
203 TK::Unknown => ok(ERROR),
161 } 204 }
162 TK::Semi => ok(SEMI),
163 TK::Comma => ok(COMMA),
164 TK::Dot => ok(DOT),
165 TK::OpenParen => ok(L_PAREN),
166 TK::CloseParen => ok(R_PAREN),
167 TK::OpenBrace => ok(L_CURLY),
168 TK::CloseBrace => ok(R_CURLY),
169 TK::OpenBracket => ok(L_BRACK),
170 TK::CloseBracket => ok(R_BRACK),
171 TK::At => ok(AT),
172 TK::Pound => ok(POUND),
173 TK::Tilde => ok(TILDE),
174 TK::Question => ok(QUESTION),
175 TK::Colon => ok(COLON),
176 TK::Dollar => ok(DOLLAR),
177 TK::Eq => ok(EQ),
178 TK::Not => ok(EXCL),
179 TK::Lt => ok(L_ANGLE),
180 TK::Gt => ok(R_ANGLE),
181 TK::Minus => ok(MINUS),
182 TK::And => ok(AMP),
183 TK::Or => ok(PIPE),
184 TK::Plus => ok(PLUS),
185 TK::Star => ok(STAR),
186 TK::Slash => ok(SLASH),
187 TK::Caret => ok(CARET),
188 TK::Percent => ok(PERCENT),
189 TK::Unknown => ok(ERROR),
190 }; 205 };
191 206
192 return ParsedToken::new( 207 return ParsedToken {
193 Token::new(syntax_kind, TextUnit::from_usize(token_text.len())), 208 token: Token { kind: syntax_kind, len: token_range.len() },
194 error, 209 error: error
195 ); 210 .map(|error| SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)),
211 };
196 212
197 type ParsedSyntaxKind = (SyntaxKind, Option<TokenizeError>); 213 type ParsedSyntaxKind = (SyntaxKind, Option<TokenizeError>);
198 214
199 const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind { 215 fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind {
200 (syntax_kind, None)
201 }
202 const fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
203 if cond {
204 ok(syntax_kind)
205 } else {
206 err(syntax_kind, error)
207 }
208 }
209 const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
210 (syntax_kind, Some(error))
211 }
212
213 const fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind {
214 use rustc_lexer::LiteralKind as LK; 216 use rustc_lexer::LiteralKind as LK;
217 use TokenizeError as TE;
218
215 match *kind { 219 match *kind {
216 LK::Int { empty_int, .. } => ok_if(!empty_int, INT_NUMBER, TE::EmptyInt), 220 LK::Int { empty_int, .. } => ok_if(!empty_int, INT_NUMBER, TE::EmptyInt),
217 LK::Float { empty_exponent, .. } => { 221 LK::Float { empty_exponent, .. } => {
@@ -237,27 +241,17 @@ fn rustc_token_kind_to_parsed_token(
237 } 241 }
238 } 242 }
239 } 243 }
240} 244 const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind {
241 245 (syntax_kind, None)
242pub fn first_token(text: &str) -> Option<ParsedToken> {
243 // Checking for emptyness because of `rustc_lexer::first_token()` invariant (see its body)
244 if text.is_empty() {
245 None
246 } else {
247 let rustc_token = rustc_lexer::first_token(text);
248 Some(rustc_token_kind_to_parsed_token(&rustc_token.kind, &text[..rustc_token.len]))
249 } 246 }
250} 247 const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
251 248 (syntax_kind, Some(error))
252// TODO: think what to do with this ad hoc function 249 }
253pub fn classify_literal(text: &str) -> Option<ParsedToken> { 250 fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
254 let t = rustc_lexer::first_token(text); 251 if cond {
255 if t.len != text.len() { 252 ok(syntax_kind)
256 return None; 253 } else {
254 err(syntax_kind, error)
255 }
257 } 256 }
258 let kind = match t.kind {
259 rustc_lexer::TokenKind::Literal { kind, .. } => match_literal_kind(kind),
260 _ => return None,
261 };
262 Some(ParsedToken::new(Token::new(kind, TextUnit::from_usize(t.len))))
263} 257}
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index 3abc09877..ad1a7c855 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -12,7 +12,7 @@ use ra_text_edit::AtomTextEdit;
12use crate::{ 12use crate::{
13 algo, 13 algo,
14 parsing::{ 14 parsing::{
15 lexer::{tokenize, Token}, 15 lexer::{single_token, tokenize, ParsedTokens, Token},
16 text_token_source::TextTokenSource, 16 text_token_source::TextTokenSource,
17 text_tree_sink::TextTreeSink, 17 text_tree_sink::TextTreeSink,
18 }, 18 },
@@ -41,36 +41,42 @@ fn reparse_token<'node>(
41 root: &'node SyntaxNode, 41 root: &'node SyntaxNode,
42 edit: &AtomTextEdit, 42 edit: &AtomTextEdit,
43) -> Option<(GreenNode, TextRange)> { 43) -> Option<(GreenNode, TextRange)> {
44 let token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); 44 let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone();
45 match token.kind() { 45 let prev_token_kind = prev_token.kind();
46 match prev_token_kind {
46 WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { 47 WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => {
47 if token.kind() == WHITESPACE || token.kind() == COMMENT { 48 if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT {
48 // removing a new line may extends previous token 49 // removing a new line may extends previous token
49 if token.text()[edit.delete - token.text_range().start()].contains('\n') { 50 let deleted_range = edit.delete - prev_token.text_range().start();
51 if prev_token.text()[deleted_range].contains('\n') {
50 return None; 52 return None;
51 } 53 }
52 } 54 }
53 55
54 let text = get_text_after_edit(token.clone().into(), &edit); 56 let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
55 let lex_tokens = tokenize(&text); 57 let new_token_kind = single_token(&new_text)?.token.kind;
56 let lex_token = match lex_tokens[..] {
57 [lex_token] if lex_token.kind == token.kind() => lex_token,
58 _ => return None,
59 };
60 58
61 if lex_token.kind == IDENT && is_contextual_kw(&text) { 59 if new_token_kind != prev_token_kind
60 || (new_token_kind == IDENT && is_contextual_kw(&new_text))
61 {
62 return None; 62 return None;
63 } 63 }
64 64
65 if let Some(next_char) = root.text().char_at(token.text_range().end()) { 65 // Check that edited token is not a part of the bigger token.
66 let tokens_with_next_char = tokenize(&format!("{}{}", text, next_char)); 66 // E.g. if for source code `bruh"str"` the user removed `ruh`, then
67 if tokens_with_next_char.len() == 1 { 67 // `b` no longer remains an identifier, but becomes a part of byte string literal
68 if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) {
69 new_text.push(next_char);
70 let token_with_next_char = single_token(&new_text);
71 if token_with_next_char.is_some() {
68 return None; 72 return None;
69 } 73 }
74 new_text.pop();
70 } 75 }
71 76
72 let new_token = GreenToken::new(rowan::SyntaxKind(token.kind().into()), text.into()); 77 let new_token =
73 Some((token.replace_with(new_token), token.text_range())) 78 GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into());
79 Some((prev_token.replace_with(new_token), prev_token.text_range()))
74 } 80 }
75 _ => None, 81 _ => None,
76 } 82 }
@@ -82,12 +88,12 @@ fn reparse_block<'node>(
82) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { 88) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
83 let (node, reparser) = find_reparsable_node(root, edit.delete)?; 89 let (node, reparser) = find_reparsable_node(root, edit.delete)?;
84 let text = get_text_after_edit(node.clone().into(), &edit); 90 let text = get_text_after_edit(node.clone().into(), &edit);
85 let tokens = tokenize(&text); 91 let ParsedTokens { tokens, errors } = tokenize(&text);
86 if !is_balanced(&tokens) { 92 if !is_balanced(&tokens) {
87 return None; 93 return None;
88 } 94 }
89 let mut token_source = TextTokenSource::new(&text, &tokens); 95 let mut token_source = TextTokenSource::new(&text, &tokens);
90 let mut tree_sink = TextTreeSink::new(&text, &tokens); 96 let mut tree_sink = TextTreeSink::new(&text, &tokens, errors);
91 reparser.parse(&mut token_source, &mut tree_sink); 97 reparser.parse(&mut token_source, &mut tree_sink);
92 let (green, new_errors) = tree_sink.finish(); 98 let (green, new_errors) = tree_sink.finish();
93 Some((node.replace_with(green), new_errors, node.text_range())) 99 Some((node.replace_with(green), new_errors, node.text_range()))
@@ -96,6 +102,9 @@ fn reparse_block<'node>(
96fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String { 102fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String {
97 let edit = 103 let edit =
98 AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone()); 104 AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone());
105
106 // Note: we could move this match to a method or even further: use enum_dispatch crate
107 // https://crates.io/crates/enum_dispatch
99 let text = match element { 108 let text = match element {
100 NodeOrToken::Token(token) => token.text().to_string(), 109 NodeOrToken::Token(token) => token.text().to_string(),
101 NodeOrToken::Node(node) => node.text().to_string(), 110 NodeOrToken::Node(node) => node.text().to_string(),
@@ -112,6 +121,9 @@ fn is_contextual_kw(text: &str) -> bool {
112 121
113fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { 122fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> {
114 let node = algo::find_covering_element(node, range); 123 let node = algo::find_covering_element(node, range);
124
125 // Note: we could move this match to a method or even further: use enum_dispatch crate
126 // https://crates.io/crates/enum_dispatch
115 let mut ancestors = match node { 127 let mut ancestors = match node {
116 NodeOrToken::Token(it) => it.parent().ancestors(), 128 NodeOrToken::Token(it) => it.parent().ancestors(),
117 NodeOrToken::Node(it) => it.ancestors(), 129 NodeOrToken::Node(it) => it.ancestors(),
@@ -181,6 +193,8 @@ mod tests {
181 let fully_reparsed = SourceFile::parse(&after); 193 let fully_reparsed = SourceFile::parse(&after);
182 let incrementally_reparsed: Parse<SourceFile> = { 194 let incrementally_reparsed: Parse<SourceFile> = {
183 let f = SourceFile::parse(&before); 195 let f = SourceFile::parse(&before);
196 // FIXME: it seems this initialization statement is unnecessary (see edit in outer scope)
197 // Investigate whether it should really be removed.
184 let edit = AtomTextEdit { delete: range, insert: replace_with.to_string() }; 198 let edit = AtomTextEdit { delete: range, insert: replace_with.to_string() };
185 let (green, new_errors, range) = 199 let (green, new_errors, range) =
186 incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap(); 200 incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap();
diff --git a/crates/ra_syntax/src/parsing/text_tree_sink.rs b/crates/ra_syntax/src/parsing/text_tree_sink.rs
index c36756d6c..5faac588b 100644
--- a/crates/ra_syntax/src/parsing/text_tree_sink.rs
+++ b/crates/ra_syntax/src/parsing/text_tree_sink.rs
@@ -92,14 +92,14 @@ impl<'a> TreeSink for TextTreeSink<'a> {
92} 92}
93 93
94impl<'a> TextTreeSink<'a> { 94impl<'a> TextTreeSink<'a> {
95 pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> TextTreeSink<'a> { 95 pub(super) fn new(text: &'a str, tokens: &'a [Token], errors: Vec<SyntaxError>) -> Self {
96 TextTreeSink { 96 Self {
97 text, 97 text,
98 tokens, 98 tokens,
99 text_pos: 0.into(), 99 text_pos: 0.into(),
100 token_pos: 0, 100 token_pos: 0,
101 state: State::PendingStart, 101 state: State::PendingStart,
102 inner: SyntaxTreeBuilder::default(), 102 inner: SyntaxTreeBuilder::new(errors),
103 } 103 }
104 } 104 }
105 105
diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs
index 9122dda29..af18a30f2 100644
--- a/crates/ra_syntax/src/syntax_error.rs
+++ b/crates/ra_syntax/src/syntax_error.rs
@@ -84,6 +84,9 @@ pub enum SyntaxErrorKind {
84 ParseError(ParseError), 84 ParseError(ParseError),
85 EscapeError(EscapeError), 85 EscapeError(EscapeError),
86 TokenizeError(TokenizeError), 86 TokenizeError(TokenizeError),
87 // FIXME: the obvious pattern of this enum dictates that the following enum variants
88 // should be wrapped into something like `SemmanticError(SemmanticError)`
89 // or `ValidateError(ValidateError)` or `SemmanticValidateError(...)`
87 InvalidBlockAttr, 90 InvalidBlockAttr,
88 InvalidMatchInnerAttr, 91 InvalidMatchInnerAttr,
89 InvalidTupleIndexFormat, 92 InvalidTupleIndexFormat,
@@ -106,6 +109,7 @@ impl fmt::Display for SyntaxErrorKind {
106 } 109 }
107 ParseError(msg) => write!(f, "{}", msg.0), 110 ParseError(msg) => write!(f, "{}", msg.0),
108 EscapeError(err) => write!(f, "{}", err), 111 EscapeError(err) => write!(f, "{}", err),
112 TokenizeError(err) => write!(f, "{}", err),
109 VisibilityNotAllowed => { 113 VisibilityNotAllowed => {
110 write!(f, "unnecessary visibility qualifier") 114 write!(f, "unnecessary visibility qualifier")
111 } 115 }
@@ -116,6 +120,44 @@ impl fmt::Display for SyntaxErrorKind {
116 } 120 }
117} 121}
118 122
123impl fmt::Display for TokenizeError {
124 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
125 let msg = match self {
126 TokenizeError::EmptyInt => "Missing digits after integer base prefix",
127 TokenizeError::EmptyExponent => "Missing digits after the exponent symbol",
128 TokenizeError::UnterminatedBlockComment => {
129 "Missing trailing `*/` symbols to terminate the block comment"
130 }
131 TokenizeError::UnterminatedChar => {
132 "Missing trailing `'` symbol to terminate the character literal"
133 }
134 TokenizeError::UnterminatedByte => {
135 "Missing trailing `'` symbol to terminate the byte literal"
136 }
137 TokenizeError::UnterminatedString => {
138 "Missing trailing `\"` symbol to terminate the string literal"
139 }
140 TokenizeError::UnterminatedByteString => {
141 "Missing trailing `\"` symbol to terminate the byte string literal"
142 }
143 TokenizeError::UnterminatedRawString => {
144 "Missing trailing `\"` with `#` symbols to terminate the raw string literal"
145 }
146 TokenizeError::UnterminatedRawByteString => {
147 "Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"
148 }
149 TokenizeError::UnstartedRawString => {
150 "Missing `\"` symbol after `#` symbols to begin the raw string literal"
151 }
152 TokenizeError::UnstartedRawByteString => {
153 "Missing `\"` symbol after `#` symbols to begin the raw byte string literal"
154 }
155 TokenizeError::LifetimeStartsWithNumber => "Lifetime name cannot start with a number",
156 };
157 write!(f, "{}", msg)
158 }
159}
160
119impl fmt::Display for EscapeError { 161impl fmt::Display for EscapeError {
120 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 162 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
121 let msg = match self { 163 let msg = match self {
diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs
index b3eb5da63..591855302 100644
--- a/crates/ra_syntax/src/syntax_node.rs
+++ b/crates/ra_syntax/src/syntax_node.rs
@@ -4,7 +4,7 @@
4//! `SyntaxNode`, and a basic traversal API (parent, children, siblings). 4//! `SyntaxNode`, and a basic traversal API (parent, children, siblings).
5//! 5//!
6//! The *real* implementation is in the (language-agnostic) `rowan` crate, this 6//! The *real* implementation is in the (language-agnostic) `rowan` crate, this
7//! modules just wraps its API. 7//! module just wraps its API.
8 8
9use ra_parser::ParseError; 9use ra_parser::ParseError;
10use rowan::{GreenNodeBuilder, Language}; 10use rowan::{GreenNodeBuilder, Language};
@@ -38,14 +38,15 @@ pub type SyntaxElementChildren = rowan::SyntaxElementChildren<RustLanguage>;
38 38
39pub use rowan::{Direction, NodeOrToken}; 39pub use rowan::{Direction, NodeOrToken};
40 40
41#[derive(Default)]
41pub struct SyntaxTreeBuilder { 42pub struct SyntaxTreeBuilder {
42 errors: Vec<SyntaxError>, 43 errors: Vec<SyntaxError>,
43 inner: GreenNodeBuilder<'static>, 44 inner: GreenNodeBuilder<'static>,
44} 45}
45 46
46impl Default for SyntaxTreeBuilder { 47impl SyntaxTreeBuilder {
47 fn default() -> SyntaxTreeBuilder { 48 pub fn new(errors: Vec<SyntaxError>) -> Self {
48 SyntaxTreeBuilder { errors: Vec::new(), inner: GreenNodeBuilder::new() } 49 Self { errors, inner: GreenNodeBuilder::default() }
49 } 50 }
50} 51}
51 52
diff --git a/crates/ra_syntax/src/tests.rs b/crates/ra_syntax/src/tests.rs
index 458920607..df21c957c 100644
--- a/crates/ra_syntax/src/tests.rs
+++ b/crates/ra_syntax/src/tests.rs
@@ -10,7 +10,8 @@ use crate::{fuzz, SourceFile};
10#[test] 10#[test]
11fn lexer_tests() { 11fn lexer_tests() {
12 dir_tests(&test_data_dir(), &["lexer"], |text, _| { 12 dir_tests(&test_data_dir(), &["lexer"], |text, _| {
13 let tokens = crate::tokenize(text); 13 // FIXME: add tests for errors (their format is up to discussion)
14 let tokens = crate::tokenize(text).tokens;
14 dump_tokens(&tokens, text) 15 dump_tokens(&tokens, text)
15 }) 16 })
16} 17}