aboutsummaryrefslogtreecommitdiff
path: root/crates
diff options
context:
space:
mode:
authorVeetaha <[email protected]>2020-01-28 05:09:13 +0000
committerVeetaha <[email protected]>2020-02-03 22:00:55 +0000
commit9e7eaa959f9dc368a55f1a80b35651b78b3d0883 (patch)
tree4b1f4af14d9898301949fa937219006d671a72ef /crates
parentbf60661aa3e2a77fedb3e1627675842d05538860 (diff)
ra_syntax: refactored the lexer design as per @matklad and @kiljacken PR review
Diffstat (limited to 'crates')
-rw-r--r--crates/ra_ide/src/references/rename.rs6
-rw-r--r--crates/ra_mbe/src/subtree_source.rs5
-rw-r--r--crates/ra_syntax/src/lib.rs4
-rw-r--r--crates/ra_syntax/src/parsing.rs12
-rw-r--r--crates/ra_syntax/src/parsing/lexer.rs313
-rw-r--r--crates/ra_syntax/src/parsing/reparsing.rs25
-rw-r--r--crates/ra_syntax/src/parsing/text_tree_sink.rs4
-rw-r--r--crates/ra_syntax/src/syntax_node.rs6
-rw-r--r--crates/ra_syntax/src/tests.rs2
9 files changed, 199 insertions, 178 deletions
diff --git a/crates/ra_ide/src/references/rename.rs b/crates/ra_ide/src/references/rename.rs
index ad3e86f7c..9a84c1c88 100644
--- a/crates/ra_ide/src/references/rename.rs
+++ b/crates/ra_ide/src/references/rename.rs
@@ -2,7 +2,9 @@
2 2
3use hir::ModuleSource; 3use hir::ModuleSource;
4use ra_db::{RelativePath, RelativePathBuf, SourceDatabase, SourceDatabaseExt}; 4use ra_db::{RelativePath, RelativePathBuf, SourceDatabase, SourceDatabaseExt};
5use ra_syntax::{algo::find_node_at_offset, ast, single_token, AstNode, SyntaxKind, SyntaxNode}; 5use ra_syntax::{
6 algo::find_node_at_offset, ast, lex_single_valid_syntax_kind, AstNode, SyntaxKind, SyntaxNode,
7};
6use ra_text_edit::TextEdit; 8use ra_text_edit::TextEdit;
7 9
8use crate::{ 10use crate::{
@@ -17,7 +19,7 @@ pub(crate) fn rename(
17 position: FilePosition, 19 position: FilePosition,
18 new_name: &str, 20 new_name: &str,
19) -> Option<RangeInfo<SourceChange>> { 21) -> Option<RangeInfo<SourceChange>> {
20 match single_token(new_name)?.token.kind { 22 match lex_single_valid_syntax_kind(new_name)? {
21 SyntaxKind::IDENT | SyntaxKind::UNDERSCORE => (), 23 SyntaxKind::IDENT | SyntaxKind::UNDERSCORE => (),
22 _ => return None, 24 _ => return None,
23 } 25 }
diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs
index 72ac8df03..c9f42b3dd 100644
--- a/crates/ra_mbe/src/subtree_source.rs
+++ b/crates/ra_mbe/src/subtree_source.rs
@@ -1,7 +1,7 @@
1//! FIXME: write short doc here 1//! FIXME: write short doc here
2 2
3use ra_parser::{Token, TokenSource}; 3use ra_parser::{Token, TokenSource};
4use ra_syntax::{single_token, SmolStr, SyntaxKind, SyntaxKind::*, T}; 4use ra_syntax::{lex_single_valid_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T};
5use std::cell::{Cell, Ref, RefCell}; 5use std::cell::{Cell, Ref, RefCell};
6use tt::buffer::{Cursor, TokenBuffer}; 6use tt::buffer::{Cursor, TokenBuffer};
7 7
@@ -129,8 +129,7 @@ fn convert_delim(d: Option<tt::DelimiterKind>, closing: bool) -> TtToken {
129} 129}
130 130
131fn convert_literal(l: &tt::Literal) -> TtToken { 131fn convert_literal(l: &tt::Literal) -> TtToken {
132 let kind = single_token(&l.text) 132 let kind = lex_single_valid_syntax_kind(&l.text)
133 .map(|parsed| parsed.token.kind)
134 .filter(|kind| kind.is_literal()) 133 .filter(|kind| kind.is_literal())
135 .unwrap_or_else(|| match l.text.as_ref() { 134 .unwrap_or_else(|| match l.text.as_ref() {
136 "true" => T![true], 135 "true" => T![true],
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs
index 80b3a0b22..f8f4b64c1 100644
--- a/crates/ra_syntax/src/lib.rs
+++ b/crates/ra_syntax/src/lib.rs
@@ -41,7 +41,9 @@ use crate::syntax_node::GreenNode;
41pub use crate::{ 41pub use crate::{
42 algo::InsertPosition, 42 algo::InsertPosition,
43 ast::{AstNode, AstToken}, 43 ast::{AstNode, AstToken},
44 parsing::{first_token, single_token, tokenize, tokenize_append, Token, TokenizeError}, 44 parsing::{
45 lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token, TokenizeError,
46 },
45 ptr::{AstPtr, SyntaxNodePtr}, 47 ptr::{AstPtr, SyntaxNodePtr},
46 syntax_error::{Location, SyntaxError, SyntaxErrorKind}, 48 syntax_error::{Location, SyntaxError, SyntaxErrorKind},
47 syntax_node::{ 49 syntax_node::{
diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs
index 4e51f920b..e5eb80850 100644
--- a/crates/ra_syntax/src/parsing.rs
+++ b/crates/ra_syntax/src/parsing.rs
@@ -15,9 +15,15 @@ pub use lexer::*;
15pub(crate) use self::reparsing::incremental_reparse; 15pub(crate) use self::reparsing::incremental_reparse;
16 16
17pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { 17pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) {
18 let ParsedTokens { tokens, errors } = tokenize(&text); 18 let (tokens, lexer_errors) = tokenize(&text);
19
19 let mut token_source = TextTokenSource::new(text, &tokens); 20 let mut token_source = TextTokenSource::new(text, &tokens);
20 let mut tree_sink = TextTreeSink::new(text, &tokens, errors); 21 let mut tree_sink = TextTreeSink::new(text, &tokens);
22
21 ra_parser::parse(&mut token_source, &mut tree_sink); 23 ra_parser::parse(&mut token_source, &mut tree_sink);
22 tree_sink.finish() 24
25 let (tree, mut parser_errors) = tree_sink.finish();
26 parser_errors.extend(lexer_errors);
27
28 (tree, parser_errors)
23} 29}
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index bf6b4d637..55755be18 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -16,55 +16,21 @@ pub struct Token {
16 pub len: TextUnit, 16 pub len: TextUnit,
17} 17}
18 18
19/// Represents the result of parsing one token. Beware that the token may be malformed.
20#[derive(Debug)]
21pub struct ParsedToken {
22 /// Parsed token.
23 pub token: Token,
24 /// If error is present then parsed token is malformed.
25 pub error: Option<SyntaxError>,
26}
27
28#[derive(Debug, Default)]
29/// Represents the result of parsing source code of Rust language.
30pub struct ParsedTokens {
31 /// Parsed tokens in order they appear in source code.
32 pub tokens: Vec<Token>,
33 /// Collection of all occured tokenization errors.
34 /// In general `self.errors.len() <= self.tokens.len()`
35 pub errors: Vec<SyntaxError>,
36}
37impl ParsedTokens {
38 /// Append `token` and `error` (if pressent) to the result.
39 pub fn push(&mut self, ParsedToken { token, error }: ParsedToken) {
40 self.tokens.push(token);
41 if let Some(error) = error {
42 self.errors.push(error)
43 }
44 }
45}
46
47/// Same as `tokenize_append()`, just a shortcut for creating `ParsedTokens`
48/// and returning the result the usual way.
49pub fn tokenize(text: &str) -> ParsedTokens {
50 let mut parsed = ParsedTokens::default();
51 tokenize_append(text, &mut parsed);
52 parsed
53}
54
55/// Break a string up into its component tokens. 19/// Break a string up into its component tokens.
56/// Writes to `ParsedTokens` which are basically a pair `(Vec<Token>, Vec<SyntaxError>)`.
57/// Beware that it checks for shebang first and its length contributes to resulting 20/// Beware that it checks for shebang first and its length contributes to resulting
58/// tokens offsets. 21/// tokens offsets.
59pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) { 22pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
60 // non-empty string is a precondtion of `rustc_lexer::strip_shebang()`. 23 // non-empty string is a precondtion of `rustc_lexer::strip_shebang()`.
61 if text.is_empty() { 24 if text.is_empty() {
62 return; 25 return Default::default();
63 } 26 }
64 27
28 let mut tokens = Vec::new();
29 let mut errors = Vec::new();
30
65 let mut offset: usize = rustc_lexer::strip_shebang(text) 31 let mut offset: usize = rustc_lexer::strip_shebang(text)
66 .map(|shebang_len| { 32 .map(|shebang_len| {
67 parsed.tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) }); 33 tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) });
68 shebang_len 34 shebang_len
69 }) 35 })
70 .unwrap_or(0); 36 .unwrap_or(0);
@@ -72,35 +38,76 @@ pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) {
72 let text_without_shebang = &text[offset..]; 38 let text_without_shebang = &text[offset..];
73 39
74 for rustc_token in rustc_lexer::tokenize(text_without_shebang) { 40 for rustc_token in rustc_lexer::tokenize(text_without_shebang) {
75 parsed.push(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from_usize(offset))); 41 let token_len = TextUnit::from_usize(rustc_token.len);
42 let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len);
43
44 let (syntax_kind, error) =
45 rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]);
46
47 tokens.push(Token { kind: syntax_kind, len: token_len });
48
49 if let Some(error) = error {
50 errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range));
51 }
52
76 offset += rustc_token.len; 53 offset += rustc_token.len;
77 } 54 }
55
56 (tokens, errors)
78} 57}
79 58
80/// Returns the first encountered token at the beginning of the string. 59/// Returns `SyntaxKind` and `Option<SyntaxError>` of the first token
81/// If the string contains zero or *two or more tokens* returns `None`. 60/// encountered at the beginning of the string.
61///
62/// Returns `None` if the string contains zero *or two or more* tokens.
63/// The token is malformed if the returned error is not `None`.
64///
65/// Beware that unescape errors are not checked at tokenization time.
66pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> {
67 first_token(text)
68 .filter(|(token, _)| token.len.to_usize() == text.len())
69 .map(|(token, error)| (token.kind, error))
70}
71
72/// The same as `single_syntax_kind()` but returns only `SyntaxKind` and
73/// returns `None` if any tokenization error occured.
82/// 74///
83/// The main difference between `first_token()` and `single_token()` is that 75/// Beware that unescape errors are not checked at tokenization time.
84/// the latter returns `None` if the string contains more than one token. 76pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind> {
85pub fn single_token(text: &str) -> Option<ParsedToken> { 77 first_token(text)
86 first_token(text).filter(|parsed| parsed.token.len.to_usize() == text.len()) 78 .filter(|(token, error)| !error.is_some() && token.len.to_usize() == text.len())
79 .map(|(token, _error)| token.kind)
87} 80}
88 81
89/// Returns the first encountered token at the beginning of the string. 82/// Returns the first encountered token at the beginning of the string.
90/// If the string contains zero tokens returns `None`.
91/// 83///
92/// The main difference between `first_token() and single_token()` is that 84/// Returns `None` if the string contains zero tokens or if the token was parsed
93/// the latter returns `None` if the string contains more than one token. 85/// with an error.
94pub fn first_token(text: &str) -> Option<ParsedToken> { 86///
87/// Beware that unescape errors are not checked at tokenization time.
88fn first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> {
95 // non-empty string is a precondtion of `rustc_lexer::first_token()`. 89 // non-empty string is a precondtion of `rustc_lexer::first_token()`.
96 if text.is_empty() { 90 if text.is_empty() {
97 None 91 return None;
98 } else {
99 let rustc_token = rustc_lexer::first_token(text);
100 Some(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from(0)))
101 } 92 }
93
94 let rustc_token = rustc_lexer::first_token(text);
95 let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text);
96
97 let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) };
98 let error = error.map(|error| {
99 SyntaxError::new(
100 SyntaxErrorKind::TokenizeError(error),
101 TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)),
102 )
103 });
104
105 Some((token, error))
102} 106}
103 107
108// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice:
109// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067
110
104/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant. 111/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant.
105/// It describes all the types of errors that may happen during the tokenization 112/// It describes all the types of errors that may happen during the tokenization
106/// of Rust source. 113/// of Rust source.
@@ -136,122 +143,132 @@ pub enum TokenizeError {
136 LifetimeStartsWithNumber, 143 LifetimeStartsWithNumber,
137} 144}
138 145
139/// Mapper function that converts `rustc_lexer::Token` with some additional context 146fn rustc_token_kind_to_syntax_kind(
140/// to `ParsedToken` 147 rustc_token_kind: &rustc_lexer::TokenKind,
141fn rustc_token_to_parsed_token( 148 token_text: &str,
142 rustc_token: &rustc_lexer::Token, 149) -> (SyntaxKind, Option<TokenizeError>) {
143 text: &str, 150 // A note on an intended tradeoff:
144 token_start_offset: TextUnit,
145) -> ParsedToken {
146 // We drop some useful infromation here (see patterns with double dots `..`) 151 // We drop some useful infromation here (see patterns with double dots `..`)
147 // Storing that info in `SyntaxKind` is not possible due to its layout requirements of 152 // Storing that info in `SyntaxKind` is not possible due to its layout requirements of
148 // being `u16` that come from `rowan::SyntaxKind` type and changes to `rowan::SyntaxKind` 153 // being `u16` that come from `rowan::SyntaxKind`.
149 // would mean hell of a rewrite
150 154
151 let token_range = 155 let syntax_kind = {
152 TextRange::offset_len(token_start_offset, TextUnit::from_usize(rustc_token.len));
153
154 let token_text = &text[token_range];
155
156 let (syntax_kind, error) = {
157 use rustc_lexer::TokenKind as TK; 156 use rustc_lexer::TokenKind as TK;
158 use TokenizeError as TE; 157 use TokenizeError as TE;
159 158
160 match rustc_token.kind { 159 match rustc_token_kind {
161 TK::LineComment => ok(COMMENT), 160 TK::LineComment => COMMENT,
162 TK::BlockComment { terminated } => { 161
163 ok_if(terminated, COMMENT, TE::UnterminatedBlockComment) 162 TK::BlockComment { terminated: true } => COMMENT,
163 TK::BlockComment { terminated: false } => {
164 return (COMMENT, Some(TE::UnterminatedBlockComment));
164 } 165 }
165 TK::Whitespace => ok(WHITESPACE), 166
166 TK::Ident => ok(if token_text == "_" { 167 TK::Whitespace => WHITESPACE,
167 UNDERSCORE 168
168 } else { 169 TK::Ident => {
169 SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) 170 if token_text == "_" {
170 }), 171 UNDERSCORE
171 TK::RawIdent => ok(IDENT), 172 } else {
172 TK::Literal { kind, .. } => match_literal_kind(&kind), 173 SyntaxKind::from_keyword(token_text).unwrap_or(IDENT)
173 TK::Lifetime { starts_with_number } => { 174 }
174 ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber)
175 } 175 }
176 TK::Semi => ok(SEMI),
177 TK::Comma => ok(COMMA),
178 TK::Dot => ok(DOT),
179 TK::OpenParen => ok(L_PAREN),
180 TK::CloseParen => ok(R_PAREN),
181 TK::OpenBrace => ok(L_CURLY),
182 TK::CloseBrace => ok(R_CURLY),
183 TK::OpenBracket => ok(L_BRACK),
184 TK::CloseBracket => ok(R_BRACK),
185 TK::At => ok(AT),
186 TK::Pound => ok(POUND),
187 TK::Tilde => ok(TILDE),
188 TK::Question => ok(QUESTION),
189 TK::Colon => ok(COLON),
190 TK::Dollar => ok(DOLLAR),
191 TK::Eq => ok(EQ),
192 TK::Not => ok(EXCL),
193 TK::Lt => ok(L_ANGLE),
194 TK::Gt => ok(R_ANGLE),
195 TK::Minus => ok(MINUS),
196 TK::And => ok(AMP),
197 TK::Or => ok(PIPE),
198 TK::Plus => ok(PLUS),
199 TK::Star => ok(STAR),
200 TK::Slash => ok(SLASH),
201 TK::Caret => ok(CARET),
202 TK::Percent => ok(PERCENT),
203 TK::Unknown => ok(ERROR),
204 }
205 };
206 176
207 return ParsedToken { 177 TK::RawIdent => IDENT,
208 token: Token { kind: syntax_kind, len: token_range.len() }, 178 TK::Literal { kind, .. } => return match_literal_kind(&kind),
209 error: error 179
210 .map(|error| SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)), 180 TK::Lifetime { starts_with_number: false } => LIFETIME,
181 TK::Lifetime { starts_with_number: true } => {
182 return (LIFETIME, Some(TE::LifetimeStartsWithNumber))
183 }
184
185 TK::Semi => SEMI,
186 TK::Comma => COMMA,
187 TK::Dot => DOT,
188 TK::OpenParen => L_PAREN,
189 TK::CloseParen => R_PAREN,
190 TK::OpenBrace => L_CURLY,
191 TK::CloseBrace => R_CURLY,
192 TK::OpenBracket => L_BRACK,
193 TK::CloseBracket => R_BRACK,
194 TK::At => AT,
195 TK::Pound => POUND,
196 TK::Tilde => TILDE,
197 TK::Question => QUESTION,
198 TK::Colon => COLON,
199 TK::Dollar => DOLLAR,
200 TK::Eq => EQ,
201 TK::Not => EXCL,
202 TK::Lt => L_ANGLE,
203 TK::Gt => R_ANGLE,
204 TK::Minus => MINUS,
205 TK::And => AMP,
206 TK::Or => PIPE,
207 TK::Plus => PLUS,
208 TK::Star => STAR,
209 TK::Slash => SLASH,
210 TK::Caret => CARET,
211 TK::Percent => PERCENT,
212 TK::Unknown => ERROR,
213 }
211 }; 214 };
212 215
213 type ParsedSyntaxKind = (SyntaxKind, Option<TokenizeError>); 216 return (syntax_kind, None);
214 217
215 fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind { 218 fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) {
216 use rustc_lexer::LiteralKind as LK; 219 use rustc_lexer::LiteralKind as LK;
217 use TokenizeError as TE; 220 use TokenizeError as TE;
218 221
219 match *kind { 222 #[rustfmt::skip]
220 LK::Int { empty_int, .. } => ok_if(!empty_int, INT_NUMBER, TE::EmptyInt), 223 let syntax_kind = match *kind {
221 LK::Float { empty_exponent, .. } => { 224 LK::Int { empty_int: false, .. } => INT_NUMBER,
222 ok_if(!empty_exponent, FLOAT_NUMBER, TE::EmptyExponent) 225 LK::Int { empty_int: true, .. } => {
226 return (INT_NUMBER, Some(TE::EmptyInt))
227 }
228
229 LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER,
230 LK::Float { empty_exponent: true, .. } => {
231 return (FLOAT_NUMBER, Some(TE::EmptyExponent))
232 }
233
234 LK::Char { terminated: true } => CHAR,
235 LK::Char { terminated: false } => {
236 return (CHAR, Some(TE::UnterminatedChar))
237 }
238
239 LK::Byte { terminated: true } => BYTE,
240 LK::Byte { terminated: false } => {
241 return (BYTE, Some(TE::UnterminatedByte))
223 } 242 }
224 LK::Char { terminated } => ok_if(terminated, CHAR, TE::UnterminatedChar), 243
225 LK::Byte { terminated } => ok_if(terminated, BYTE, TE::UnterminatedByte), 244 LK::Str { terminated: true } => STRING,
226 LK::Str { terminated } => ok_if(terminated, STRING, TE::UnterminatedString), 245 LK::Str { terminated: false } => {
227 LK::ByteStr { terminated } => { 246 return (STRING, Some(TE::UnterminatedString))
228 ok_if(terminated, BYTE_STRING, TE::UnterminatedByteString) 247 }
248
249
250 LK::ByteStr { terminated: true } => BYTE_STRING,
251 LK::ByteStr { terminated: false } => {
252 return (BYTE_STRING, Some(TE::UnterminatedByteString))
229 } 253 }
230 254
231 LK::RawStr { started: true, terminated, .. } => { 255 LK::RawStr { started: true, terminated: true, .. } => RAW_STRING,
232 ok_if(terminated, RAW_STRING, TE::UnterminatedRawString) 256 LK::RawStr { started: true, terminated: false, .. } => {
257 return (RAW_STRING, Some(TE::UnterminatedRawString))
258 }
259 LK::RawStr { started: false, .. } => {
260 return (RAW_STRING, Some(TE::UnstartedRawString))
233 } 261 }
234 LK::RawStr { started: false, .. } => err(RAW_STRING, TE::UnstartedRawString),
235 262
236 LK::RawByteStr { started: true, terminated, .. } => { 263 LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING,
237 ok_if(terminated, RAW_BYTE_STRING, TE::UnterminatedRawByteString) 264 LK::RawByteStr { started: true, terminated: false, .. } => {
265 return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString))
238 } 266 }
239 LK::RawByteStr { started: false, .. } => { 267 LK::RawByteStr { started: false, .. } => {
240 err(RAW_BYTE_STRING, TE::UnstartedRawByteString) 268 return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString))
241 } 269 }
242 } 270 };
243 } 271
244 const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind {
245 (syntax_kind, None) 272 (syntax_kind, None)
246 } 273 }
247 const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
248 (syntax_kind, Some(error))
249 }
250 fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind {
251 if cond {
252 ok(syntax_kind)
253 } else {
254 err(syntax_kind, error)
255 }
256 }
257} 274}
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs
index ad1a7c855..1f351e9fc 100644
--- a/crates/ra_syntax/src/parsing/reparsing.rs
+++ b/crates/ra_syntax/src/parsing/reparsing.rs
@@ -12,7 +12,7 @@ use ra_text_edit::AtomTextEdit;
12use crate::{ 12use crate::{
13 algo, 13 algo,
14 parsing::{ 14 parsing::{
15 lexer::{single_token, tokenize, ParsedTokens, Token}, 15 lexer::{lex_single_syntax_kind, tokenize, Token},
16 text_token_source::TextTokenSource, 16 text_token_source::TextTokenSource,
17 text_tree_sink::TextTreeSink, 17 text_tree_sink::TextTreeSink,
18 }, 18 },
@@ -54,7 +54,7 @@ fn reparse_token<'node>(
54 } 54 }
55 55
56 let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); 56 let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit);
57 let new_token_kind = single_token(&new_text)?.token.kind; 57 let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?;
58 58
59 if new_token_kind != prev_token_kind 59 if new_token_kind != prev_token_kind
60 || (new_token_kind == IDENT && is_contextual_kw(&new_text)) 60 || (new_token_kind == IDENT && is_contextual_kw(&new_text))
@@ -67,8 +67,8 @@ fn reparse_token<'node>(
67 // `b` no longer remains an identifier, but becomes a part of byte string literal 67 // `b` no longer remains an identifier, but becomes a part of byte string literal
68 if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { 68 if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) {
69 new_text.push(next_char); 69 new_text.push(next_char);
70 let token_with_next_char = single_token(&new_text); 70 let token_with_next_char = lex_single_syntax_kind(&new_text);
71 if token_with_next_char.is_some() { 71 if let Some((_kind, _error)) = token_with_next_char {
72 return None; 72 return None;
73 } 73 }
74 new_text.pop(); 74 new_text.pop();
@@ -88,23 +88,26 @@ fn reparse_block<'node>(
88) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { 88) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> {
89 let (node, reparser) = find_reparsable_node(root, edit.delete)?; 89 let (node, reparser) = find_reparsable_node(root, edit.delete)?;
90 let text = get_text_after_edit(node.clone().into(), &edit); 90 let text = get_text_after_edit(node.clone().into(), &edit);
91 let ParsedTokens { tokens, errors } = tokenize(&text); 91
92 let (tokens, new_lexer_errors) = tokenize(&text);
92 if !is_balanced(&tokens) { 93 if !is_balanced(&tokens) {
93 return None; 94 return None;
94 } 95 }
96
95 let mut token_source = TextTokenSource::new(&text, &tokens); 97 let mut token_source = TextTokenSource::new(&text, &tokens);
96 let mut tree_sink = TextTreeSink::new(&text, &tokens, errors); 98 let mut tree_sink = TextTreeSink::new(&text, &tokens);
97 reparser.parse(&mut token_source, &mut tree_sink); 99 reparser.parse(&mut token_source, &mut tree_sink);
98 let (green, new_errors) = tree_sink.finish(); 100
99 Some((node.replace_with(green), new_errors, node.text_range())) 101 let (green, mut new_parser_errors) = tree_sink.finish();
102 new_parser_errors.extend(new_lexer_errors);
103
104 Some((node.replace_with(green), new_parser_errors, node.text_range()))
100} 105}
101 106
102fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String { 107fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String {
103 let edit = 108 let edit =
104 AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone()); 109 AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone());
105 110
106 // Note: we could move this match to a method or even further: use enum_dispatch crate
107 // https://crates.io/crates/enum_dispatch
108 let text = match element { 111 let text = match element {
109 NodeOrToken::Token(token) => token.text().to_string(), 112 NodeOrToken::Token(token) => token.text().to_string(),
110 NodeOrToken::Node(node) => node.text().to_string(), 113 NodeOrToken::Node(node) => node.text().to_string(),
@@ -122,8 +125,6 @@ fn is_contextual_kw(text: &str) -> bool {
122fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { 125fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> {
123 let node = algo::find_covering_element(node, range); 126 let node = algo::find_covering_element(node, range);
124 127
125 // Note: we could move this match to a method or even further: use enum_dispatch crate
126 // https://crates.io/crates/enum_dispatch
127 let mut ancestors = match node { 128 let mut ancestors = match node {
128 NodeOrToken::Token(it) => it.parent().ancestors(), 129 NodeOrToken::Token(it) => it.parent().ancestors(),
129 NodeOrToken::Node(it) => it.ancestors(), 130 NodeOrToken::Node(it) => it.ancestors(),
diff --git a/crates/ra_syntax/src/parsing/text_tree_sink.rs b/crates/ra_syntax/src/parsing/text_tree_sink.rs
index 5faac588b..dd202601d 100644
--- a/crates/ra_syntax/src/parsing/text_tree_sink.rs
+++ b/crates/ra_syntax/src/parsing/text_tree_sink.rs
@@ -92,14 +92,14 @@ impl<'a> TreeSink for TextTreeSink<'a> {
92} 92}
93 93
94impl<'a> TextTreeSink<'a> { 94impl<'a> TextTreeSink<'a> {
95 pub(super) fn new(text: &'a str, tokens: &'a [Token], errors: Vec<SyntaxError>) -> Self { 95 pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> Self {
96 Self { 96 Self {
97 text, 97 text,
98 tokens, 98 tokens,
99 text_pos: 0.into(), 99 text_pos: 0.into(),
100 token_pos: 0, 100 token_pos: 0,
101 state: State::PendingStart, 101 state: State::PendingStart,
102 inner: SyntaxTreeBuilder::new(errors), 102 inner: SyntaxTreeBuilder::default(),
103 } 103 }
104 } 104 }
105 105
diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs
index 591855302..7c2b18af3 100644
--- a/crates/ra_syntax/src/syntax_node.rs
+++ b/crates/ra_syntax/src/syntax_node.rs
@@ -45,12 +45,6 @@ pub struct SyntaxTreeBuilder {
45} 45}
46 46
47impl SyntaxTreeBuilder { 47impl SyntaxTreeBuilder {
48 pub fn new(errors: Vec<SyntaxError>) -> Self {
49 Self { errors, inner: GreenNodeBuilder::default() }
50 }
51}
52
53impl SyntaxTreeBuilder {
54 pub(crate) fn finish_raw(self) -> (GreenNode, Vec<SyntaxError>) { 48 pub(crate) fn finish_raw(self) -> (GreenNode, Vec<SyntaxError>) {
55 let green = self.inner.finish(); 49 let green = self.inner.finish();
56 (green, self.errors) 50 (green, self.errors)
diff --git a/crates/ra_syntax/src/tests.rs b/crates/ra_syntax/src/tests.rs
index df21c957c..f79dc4f93 100644
--- a/crates/ra_syntax/src/tests.rs
+++ b/crates/ra_syntax/src/tests.rs
@@ -11,7 +11,7 @@ use crate::{fuzz, SourceFile};
11fn lexer_tests() { 11fn lexer_tests() {
12 dir_tests(&test_data_dir(), &["lexer"], |text, _| { 12 dir_tests(&test_data_dir(), &["lexer"], |text, _| {
13 // FIXME: add tests for errors (their format is up to discussion) 13 // FIXME: add tests for errors (their format is up to discussion)
14 let tokens = crate::tokenize(text).tokens; 14 let (tokens, _errors) = crate::tokenize(text);
15 dump_tokens(&tokens, text) 15 dump_tokens(&tokens, text)
16 }) 16 })
17} 17}