diff options
author | Veetaha <[email protected]> | 2020-01-28 05:09:13 +0000 |
---|---|---|
committer | Veetaha <[email protected]> | 2020-02-03 22:00:55 +0000 |
commit | 9e7eaa959f9dc368a55f1a80b35651b78b3d0883 (patch) | |
tree | 4b1f4af14d9898301949fa937219006d671a72ef /crates | |
parent | bf60661aa3e2a77fedb3e1627675842d05538860 (diff) |
ra_syntax: refactored the lexer design as per @matklad and @kiljacken PR review
Diffstat (limited to 'crates')
-rw-r--r-- | crates/ra_ide/src/references/rename.rs | 6 | ||||
-rw-r--r-- | crates/ra_mbe/src/subtree_source.rs | 5 | ||||
-rw-r--r-- | crates/ra_syntax/src/lib.rs | 4 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing.rs | 12 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/lexer.rs | 313 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/reparsing.rs | 25 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/text_tree_sink.rs | 4 | ||||
-rw-r--r-- | crates/ra_syntax/src/syntax_node.rs | 6 | ||||
-rw-r--r-- | crates/ra_syntax/src/tests.rs | 2 |
9 files changed, 199 insertions, 178 deletions
diff --git a/crates/ra_ide/src/references/rename.rs b/crates/ra_ide/src/references/rename.rs index ad3e86f7c..9a84c1c88 100644 --- a/crates/ra_ide/src/references/rename.rs +++ b/crates/ra_ide/src/references/rename.rs | |||
@@ -2,7 +2,9 @@ | |||
2 | 2 | ||
3 | use hir::ModuleSource; | 3 | use hir::ModuleSource; |
4 | use ra_db::{RelativePath, RelativePathBuf, SourceDatabase, SourceDatabaseExt}; | 4 | use ra_db::{RelativePath, RelativePathBuf, SourceDatabase, SourceDatabaseExt}; |
5 | use ra_syntax::{algo::find_node_at_offset, ast, single_token, AstNode, SyntaxKind, SyntaxNode}; | 5 | use ra_syntax::{ |
6 | algo::find_node_at_offset, ast, lex_single_valid_syntax_kind, AstNode, SyntaxKind, SyntaxNode, | ||
7 | }; | ||
6 | use ra_text_edit::TextEdit; | 8 | use ra_text_edit::TextEdit; |
7 | 9 | ||
8 | use crate::{ | 10 | use crate::{ |
@@ -17,7 +19,7 @@ pub(crate) fn rename( | |||
17 | position: FilePosition, | 19 | position: FilePosition, |
18 | new_name: &str, | 20 | new_name: &str, |
19 | ) -> Option<RangeInfo<SourceChange>> { | 21 | ) -> Option<RangeInfo<SourceChange>> { |
20 | match single_token(new_name)?.token.kind { | 22 | match lex_single_valid_syntax_kind(new_name)? { |
21 | SyntaxKind::IDENT | SyntaxKind::UNDERSCORE => (), | 23 | SyntaxKind::IDENT | SyntaxKind::UNDERSCORE => (), |
22 | _ => return None, | 24 | _ => return None, |
23 | } | 25 | } |
diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs index 72ac8df03..c9f42b3dd 100644 --- a/crates/ra_mbe/src/subtree_source.rs +++ b/crates/ra_mbe/src/subtree_source.rs | |||
@@ -1,7 +1,7 @@ | |||
1 | //! FIXME: write short doc here | 1 | //! FIXME: write short doc here |
2 | 2 | ||
3 | use ra_parser::{Token, TokenSource}; | 3 | use ra_parser::{Token, TokenSource}; |
4 | use ra_syntax::{single_token, SmolStr, SyntaxKind, SyntaxKind::*, T}; | 4 | use ra_syntax::{lex_single_valid_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T}; |
5 | use std::cell::{Cell, Ref, RefCell}; | 5 | use std::cell::{Cell, Ref, RefCell}; |
6 | use tt::buffer::{Cursor, TokenBuffer}; | 6 | use tt::buffer::{Cursor, TokenBuffer}; |
7 | 7 | ||
@@ -129,8 +129,7 @@ fn convert_delim(d: Option<tt::DelimiterKind>, closing: bool) -> TtToken { | |||
129 | } | 129 | } |
130 | 130 | ||
131 | fn convert_literal(l: &tt::Literal) -> TtToken { | 131 | fn convert_literal(l: &tt::Literal) -> TtToken { |
132 | let kind = single_token(&l.text) | 132 | let kind = lex_single_valid_syntax_kind(&l.text) |
133 | .map(|parsed| parsed.token.kind) | ||
134 | .filter(|kind| kind.is_literal()) | 133 | .filter(|kind| kind.is_literal()) |
135 | .unwrap_or_else(|| match l.text.as_ref() { | 134 | .unwrap_or_else(|| match l.text.as_ref() { |
136 | "true" => T![true], | 135 | "true" => T![true], |
diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs index 80b3a0b22..f8f4b64c1 100644 --- a/crates/ra_syntax/src/lib.rs +++ b/crates/ra_syntax/src/lib.rs | |||
@@ -41,7 +41,9 @@ use crate::syntax_node::GreenNode; | |||
41 | pub use crate::{ | 41 | pub use crate::{ |
42 | algo::InsertPosition, | 42 | algo::InsertPosition, |
43 | ast::{AstNode, AstToken}, | 43 | ast::{AstNode, AstToken}, |
44 | parsing::{first_token, single_token, tokenize, tokenize_append, Token, TokenizeError}, | 44 | parsing::{ |
45 | lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token, TokenizeError, | ||
46 | }, | ||
45 | ptr::{AstPtr, SyntaxNodePtr}, | 47 | ptr::{AstPtr, SyntaxNodePtr}, |
46 | syntax_error::{Location, SyntaxError, SyntaxErrorKind}, | 48 | syntax_error::{Location, SyntaxError, SyntaxErrorKind}, |
47 | syntax_node::{ | 49 | syntax_node::{ |
diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 4e51f920b..e5eb80850 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs | |||
@@ -15,9 +15,15 @@ pub use lexer::*; | |||
15 | pub(crate) use self::reparsing::incremental_reparse; | 15 | pub(crate) use self::reparsing::incremental_reparse; |
16 | 16 | ||
17 | pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { | 17 | pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { |
18 | let ParsedTokens { tokens, errors } = tokenize(&text); | 18 | let (tokens, lexer_errors) = tokenize(&text); |
19 | |||
19 | let mut token_source = TextTokenSource::new(text, &tokens); | 20 | let mut token_source = TextTokenSource::new(text, &tokens); |
20 | let mut tree_sink = TextTreeSink::new(text, &tokens, errors); | 21 | let mut tree_sink = TextTreeSink::new(text, &tokens); |
22 | |||
21 | ra_parser::parse(&mut token_source, &mut tree_sink); | 23 | ra_parser::parse(&mut token_source, &mut tree_sink); |
22 | tree_sink.finish() | 24 | |
25 | let (tree, mut parser_errors) = tree_sink.finish(); | ||
26 | parser_errors.extend(lexer_errors); | ||
27 | |||
28 | (tree, parser_errors) | ||
23 | } | 29 | } |
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index bf6b4d637..55755be18 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs | |||
@@ -16,55 +16,21 @@ pub struct Token { | |||
16 | pub len: TextUnit, | 16 | pub len: TextUnit, |
17 | } | 17 | } |
18 | 18 | ||
19 | /// Represents the result of parsing one token. Beware that the token may be malformed. | ||
20 | #[derive(Debug)] | ||
21 | pub struct ParsedToken { | ||
22 | /// Parsed token. | ||
23 | pub token: Token, | ||
24 | /// If error is present then parsed token is malformed. | ||
25 | pub error: Option<SyntaxError>, | ||
26 | } | ||
27 | |||
28 | #[derive(Debug, Default)] | ||
29 | /// Represents the result of parsing source code of Rust language. | ||
30 | pub struct ParsedTokens { | ||
31 | /// Parsed tokens in order they appear in source code. | ||
32 | pub tokens: Vec<Token>, | ||
33 | /// Collection of all occured tokenization errors. | ||
34 | /// In general `self.errors.len() <= self.tokens.len()` | ||
35 | pub errors: Vec<SyntaxError>, | ||
36 | } | ||
37 | impl ParsedTokens { | ||
38 | /// Append `token` and `error` (if pressent) to the result. | ||
39 | pub fn push(&mut self, ParsedToken { token, error }: ParsedToken) { | ||
40 | self.tokens.push(token); | ||
41 | if let Some(error) = error { | ||
42 | self.errors.push(error) | ||
43 | } | ||
44 | } | ||
45 | } | ||
46 | |||
47 | /// Same as `tokenize_append()`, just a shortcut for creating `ParsedTokens` | ||
48 | /// and returning the result the usual way. | ||
49 | pub fn tokenize(text: &str) -> ParsedTokens { | ||
50 | let mut parsed = ParsedTokens::default(); | ||
51 | tokenize_append(text, &mut parsed); | ||
52 | parsed | ||
53 | } | ||
54 | |||
55 | /// Break a string up into its component tokens. | 19 | /// Break a string up into its component tokens. |
56 | /// Writes to `ParsedTokens` which are basically a pair `(Vec<Token>, Vec<SyntaxError>)`. | ||
57 | /// Beware that it checks for shebang first and its length contributes to resulting | 20 | /// Beware that it checks for shebang first and its length contributes to resulting |
58 | /// tokens offsets. | 21 | /// tokens offsets. |
59 | pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) { | 22 | pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) { |
60 | // non-empty string is a precondtion of `rustc_lexer::strip_shebang()`. | 23 | // non-empty string is a precondtion of `rustc_lexer::strip_shebang()`. |
61 | if text.is_empty() { | 24 | if text.is_empty() { |
62 | return; | 25 | return Default::default(); |
63 | } | 26 | } |
64 | 27 | ||
28 | let mut tokens = Vec::new(); | ||
29 | let mut errors = Vec::new(); | ||
30 | |||
65 | let mut offset: usize = rustc_lexer::strip_shebang(text) | 31 | let mut offset: usize = rustc_lexer::strip_shebang(text) |
66 | .map(|shebang_len| { | 32 | .map(|shebang_len| { |
67 | parsed.tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) }); | 33 | tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) }); |
68 | shebang_len | 34 | shebang_len |
69 | }) | 35 | }) |
70 | .unwrap_or(0); | 36 | .unwrap_or(0); |
@@ -72,35 +38,76 @@ pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) { | |||
72 | let text_without_shebang = &text[offset..]; | 38 | let text_without_shebang = &text[offset..]; |
73 | 39 | ||
74 | for rustc_token in rustc_lexer::tokenize(text_without_shebang) { | 40 | for rustc_token in rustc_lexer::tokenize(text_without_shebang) { |
75 | parsed.push(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from_usize(offset))); | 41 | let token_len = TextUnit::from_usize(rustc_token.len); |
42 | let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); | ||
43 | |||
44 | let (syntax_kind, error) = | ||
45 | rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); | ||
46 | |||
47 | tokens.push(Token { kind: syntax_kind, len: token_len }); | ||
48 | |||
49 | if let Some(error) = error { | ||
50 | errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)); | ||
51 | } | ||
52 | |||
76 | offset += rustc_token.len; | 53 | offset += rustc_token.len; |
77 | } | 54 | } |
55 | |||
56 | (tokens, errors) | ||
78 | } | 57 | } |
79 | 58 | ||
80 | /// Returns the first encountered token at the beginning of the string. | 59 | /// Returns `SyntaxKind` and `Option<SyntaxError>` of the first token |
81 | /// If the string contains zero or *two or more tokens* returns `None`. | 60 | /// encountered at the beginning of the string. |
61 | /// | ||
62 | /// Returns `None` if the string contains zero *or two or more* tokens. | ||
63 | /// The token is malformed if the returned error is not `None`. | ||
64 | /// | ||
65 | /// Beware that unescape errors are not checked at tokenization time. | ||
66 | pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)> { | ||
67 | first_token(text) | ||
68 | .filter(|(token, _)| token.len.to_usize() == text.len()) | ||
69 | .map(|(token, error)| (token.kind, error)) | ||
70 | } | ||
71 | |||
72 | /// The same as `single_syntax_kind()` but returns only `SyntaxKind` and | ||
73 | /// returns `None` if any tokenization error occured. | ||
82 | /// | 74 | /// |
83 | /// The main difference between `first_token()` and `single_token()` is that | 75 | /// Beware that unescape errors are not checked at tokenization time. |
84 | /// the latter returns `None` if the string contains more than one token. | 76 | pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind> { |
85 | pub fn single_token(text: &str) -> Option<ParsedToken> { | 77 | first_token(text) |
86 | first_token(text).filter(|parsed| parsed.token.len.to_usize() == text.len()) | 78 | .filter(|(token, error)| !error.is_some() && token.len.to_usize() == text.len()) |
79 | .map(|(token, _error)| token.kind) | ||
87 | } | 80 | } |
88 | 81 | ||
89 | /// Returns the first encountered token at the beginning of the string. | 82 | /// Returns the first encountered token at the beginning of the string. |
90 | /// If the string contains zero tokens returns `None`. | ||
91 | /// | 83 | /// |
92 | /// The main difference between `first_token() and single_token()` is that | 84 | /// Returns `None` if the string contains zero tokens or if the token was parsed |
93 | /// the latter returns `None` if the string contains more than one token. | 85 | /// with an error. |
94 | pub fn first_token(text: &str) -> Option<ParsedToken> { | 86 | /// |
87 | /// Beware that unescape errors are not checked at tokenization time. | ||
88 | fn first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> { | ||
95 | // non-empty string is a precondtion of `rustc_lexer::first_token()`. | 89 | // non-empty string is a precondtion of `rustc_lexer::first_token()`. |
96 | if text.is_empty() { | 90 | if text.is_empty() { |
97 | None | 91 | return None; |
98 | } else { | ||
99 | let rustc_token = rustc_lexer::first_token(text); | ||
100 | Some(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from(0))) | ||
101 | } | 92 | } |
93 | |||
94 | let rustc_token = rustc_lexer::first_token(text); | ||
95 | let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); | ||
96 | |||
97 | let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; | ||
98 | let error = error.map(|error| { | ||
99 | SyntaxError::new( | ||
100 | SyntaxErrorKind::TokenizeError(error), | ||
101 | TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)), | ||
102 | ) | ||
103 | }); | ||
104 | |||
105 | Some((token, error)) | ||
102 | } | 106 | } |
103 | 107 | ||
108 | // FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice: | ||
109 | // https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067 | ||
110 | |||
104 | /// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant. | 111 | /// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant. |
105 | /// It describes all the types of errors that may happen during the tokenization | 112 | /// It describes all the types of errors that may happen during the tokenization |
106 | /// of Rust source. | 113 | /// of Rust source. |
@@ -136,122 +143,132 @@ pub enum TokenizeError { | |||
136 | LifetimeStartsWithNumber, | 143 | LifetimeStartsWithNumber, |
137 | } | 144 | } |
138 | 145 | ||
139 | /// Mapper function that converts `rustc_lexer::Token` with some additional context | 146 | fn rustc_token_kind_to_syntax_kind( |
140 | /// to `ParsedToken` | 147 | rustc_token_kind: &rustc_lexer::TokenKind, |
141 | fn rustc_token_to_parsed_token( | 148 | token_text: &str, |
142 | rustc_token: &rustc_lexer::Token, | 149 | ) -> (SyntaxKind, Option<TokenizeError>) { |
143 | text: &str, | 150 | // A note on an intended tradeoff: |
144 | token_start_offset: TextUnit, | ||
145 | ) -> ParsedToken { | ||
146 | // We drop some useful infromation here (see patterns with double dots `..`) | 151 | // We drop some useful infromation here (see patterns with double dots `..`) |
147 | // Storing that info in `SyntaxKind` is not possible due to its layout requirements of | 152 | // Storing that info in `SyntaxKind` is not possible due to its layout requirements of |
148 | // being `u16` that come from `rowan::SyntaxKind` type and changes to `rowan::SyntaxKind` | 153 | // being `u16` that come from `rowan::SyntaxKind`. |
149 | // would mean hell of a rewrite | ||
150 | 154 | ||
151 | let token_range = | 155 | let syntax_kind = { |
152 | TextRange::offset_len(token_start_offset, TextUnit::from_usize(rustc_token.len)); | ||
153 | |||
154 | let token_text = &text[token_range]; | ||
155 | |||
156 | let (syntax_kind, error) = { | ||
157 | use rustc_lexer::TokenKind as TK; | 156 | use rustc_lexer::TokenKind as TK; |
158 | use TokenizeError as TE; | 157 | use TokenizeError as TE; |
159 | 158 | ||
160 | match rustc_token.kind { | 159 | match rustc_token_kind { |
161 | TK::LineComment => ok(COMMENT), | 160 | TK::LineComment => COMMENT, |
162 | TK::BlockComment { terminated } => { | 161 | |
163 | ok_if(terminated, COMMENT, TE::UnterminatedBlockComment) | 162 | TK::BlockComment { terminated: true } => COMMENT, |
163 | TK::BlockComment { terminated: false } => { | ||
164 | return (COMMENT, Some(TE::UnterminatedBlockComment)); | ||
164 | } | 165 | } |
165 | TK::Whitespace => ok(WHITESPACE), | 166 | |
166 | TK::Ident => ok(if token_text == "_" { | 167 | TK::Whitespace => WHITESPACE, |
167 | UNDERSCORE | 168 | |
168 | } else { | 169 | TK::Ident => { |
169 | SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) | 170 | if token_text == "_" { |
170 | }), | 171 | UNDERSCORE |
171 | TK::RawIdent => ok(IDENT), | 172 | } else { |
172 | TK::Literal { kind, .. } => match_literal_kind(&kind), | 173 | SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) |
173 | TK::Lifetime { starts_with_number } => { | 174 | } |
174 | ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber) | ||
175 | } | 175 | } |
176 | TK::Semi => ok(SEMI), | ||
177 | TK::Comma => ok(COMMA), | ||
178 | TK::Dot => ok(DOT), | ||
179 | TK::OpenParen => ok(L_PAREN), | ||
180 | TK::CloseParen => ok(R_PAREN), | ||
181 | TK::OpenBrace => ok(L_CURLY), | ||
182 | TK::CloseBrace => ok(R_CURLY), | ||
183 | TK::OpenBracket => ok(L_BRACK), | ||
184 | TK::CloseBracket => ok(R_BRACK), | ||
185 | TK::At => ok(AT), | ||
186 | TK::Pound => ok(POUND), | ||
187 | TK::Tilde => ok(TILDE), | ||
188 | TK::Question => ok(QUESTION), | ||
189 | TK::Colon => ok(COLON), | ||
190 | TK::Dollar => ok(DOLLAR), | ||
191 | TK::Eq => ok(EQ), | ||
192 | TK::Not => ok(EXCL), | ||
193 | TK::Lt => ok(L_ANGLE), | ||
194 | TK::Gt => ok(R_ANGLE), | ||
195 | TK::Minus => ok(MINUS), | ||
196 | TK::And => ok(AMP), | ||
197 | TK::Or => ok(PIPE), | ||
198 | TK::Plus => ok(PLUS), | ||
199 | TK::Star => ok(STAR), | ||
200 | TK::Slash => ok(SLASH), | ||
201 | TK::Caret => ok(CARET), | ||
202 | TK::Percent => ok(PERCENT), | ||
203 | TK::Unknown => ok(ERROR), | ||
204 | } | ||
205 | }; | ||
206 | 176 | ||
207 | return ParsedToken { | 177 | TK::RawIdent => IDENT, |
208 | token: Token { kind: syntax_kind, len: token_range.len() }, | 178 | TK::Literal { kind, .. } => return match_literal_kind(&kind), |
209 | error: error | 179 | |
210 | .map(|error| SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)), | 180 | TK::Lifetime { starts_with_number: false } => LIFETIME, |
181 | TK::Lifetime { starts_with_number: true } => { | ||
182 | return (LIFETIME, Some(TE::LifetimeStartsWithNumber)) | ||
183 | } | ||
184 | |||
185 | TK::Semi => SEMI, | ||
186 | TK::Comma => COMMA, | ||
187 | TK::Dot => DOT, | ||
188 | TK::OpenParen => L_PAREN, | ||
189 | TK::CloseParen => R_PAREN, | ||
190 | TK::OpenBrace => L_CURLY, | ||
191 | TK::CloseBrace => R_CURLY, | ||
192 | TK::OpenBracket => L_BRACK, | ||
193 | TK::CloseBracket => R_BRACK, | ||
194 | TK::At => AT, | ||
195 | TK::Pound => POUND, | ||
196 | TK::Tilde => TILDE, | ||
197 | TK::Question => QUESTION, | ||
198 | TK::Colon => COLON, | ||
199 | TK::Dollar => DOLLAR, | ||
200 | TK::Eq => EQ, | ||
201 | TK::Not => EXCL, | ||
202 | TK::Lt => L_ANGLE, | ||
203 | TK::Gt => R_ANGLE, | ||
204 | TK::Minus => MINUS, | ||
205 | TK::And => AMP, | ||
206 | TK::Or => PIPE, | ||
207 | TK::Plus => PLUS, | ||
208 | TK::Star => STAR, | ||
209 | TK::Slash => SLASH, | ||
210 | TK::Caret => CARET, | ||
211 | TK::Percent => PERCENT, | ||
212 | TK::Unknown => ERROR, | ||
213 | } | ||
211 | }; | 214 | }; |
212 | 215 | ||
213 | type ParsedSyntaxKind = (SyntaxKind, Option<TokenizeError>); | 216 | return (syntax_kind, None); |
214 | 217 | ||
215 | fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind { | 218 | fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) { |
216 | use rustc_lexer::LiteralKind as LK; | 219 | use rustc_lexer::LiteralKind as LK; |
217 | use TokenizeError as TE; | 220 | use TokenizeError as TE; |
218 | 221 | ||
219 | match *kind { | 222 | #[rustfmt::skip] |
220 | LK::Int { empty_int, .. } => ok_if(!empty_int, INT_NUMBER, TE::EmptyInt), | 223 | let syntax_kind = match *kind { |
221 | LK::Float { empty_exponent, .. } => { | 224 | LK::Int { empty_int: false, .. } => INT_NUMBER, |
222 | ok_if(!empty_exponent, FLOAT_NUMBER, TE::EmptyExponent) | 225 | LK::Int { empty_int: true, .. } => { |
226 | return (INT_NUMBER, Some(TE::EmptyInt)) | ||
227 | } | ||
228 | |||
229 | LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, | ||
230 | LK::Float { empty_exponent: true, .. } => { | ||
231 | return (FLOAT_NUMBER, Some(TE::EmptyExponent)) | ||
232 | } | ||
233 | |||
234 | LK::Char { terminated: true } => CHAR, | ||
235 | LK::Char { terminated: false } => { | ||
236 | return (CHAR, Some(TE::UnterminatedChar)) | ||
237 | } | ||
238 | |||
239 | LK::Byte { terminated: true } => BYTE, | ||
240 | LK::Byte { terminated: false } => { | ||
241 | return (BYTE, Some(TE::UnterminatedByte)) | ||
223 | } | 242 | } |
224 | LK::Char { terminated } => ok_if(terminated, CHAR, TE::UnterminatedChar), | 243 | |
225 | LK::Byte { terminated } => ok_if(terminated, BYTE, TE::UnterminatedByte), | 244 | LK::Str { terminated: true } => STRING, |
226 | LK::Str { terminated } => ok_if(terminated, STRING, TE::UnterminatedString), | 245 | LK::Str { terminated: false } => { |
227 | LK::ByteStr { terminated } => { | 246 | return (STRING, Some(TE::UnterminatedString)) |
228 | ok_if(terminated, BYTE_STRING, TE::UnterminatedByteString) | 247 | } |
248 | |||
249 | |||
250 | LK::ByteStr { terminated: true } => BYTE_STRING, | ||
251 | LK::ByteStr { terminated: false } => { | ||
252 | return (BYTE_STRING, Some(TE::UnterminatedByteString)) | ||
229 | } | 253 | } |
230 | 254 | ||
231 | LK::RawStr { started: true, terminated, .. } => { | 255 | LK::RawStr { started: true, terminated: true, .. } => RAW_STRING, |
232 | ok_if(terminated, RAW_STRING, TE::UnterminatedRawString) | 256 | LK::RawStr { started: true, terminated: false, .. } => { |
257 | return (RAW_STRING, Some(TE::UnterminatedRawString)) | ||
258 | } | ||
259 | LK::RawStr { started: false, .. } => { | ||
260 | return (RAW_STRING, Some(TE::UnstartedRawString)) | ||
233 | } | 261 | } |
234 | LK::RawStr { started: false, .. } => err(RAW_STRING, TE::UnstartedRawString), | ||
235 | 262 | ||
236 | LK::RawByteStr { started: true, terminated, .. } => { | 263 | LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING, |
237 | ok_if(terminated, RAW_BYTE_STRING, TE::UnterminatedRawByteString) | 264 | LK::RawByteStr { started: true, terminated: false, .. } => { |
265 | return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString)) | ||
238 | } | 266 | } |
239 | LK::RawByteStr { started: false, .. } => { | 267 | LK::RawByteStr { started: false, .. } => { |
240 | err(RAW_BYTE_STRING, TE::UnstartedRawByteString) | 268 | return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString)) |
241 | } | 269 | } |
242 | } | 270 | }; |
243 | } | 271 | |
244 | const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind { | ||
245 | (syntax_kind, None) | 272 | (syntax_kind, None) |
246 | } | 273 | } |
247 | const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { | ||
248 | (syntax_kind, Some(error)) | ||
249 | } | ||
250 | fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { | ||
251 | if cond { | ||
252 | ok(syntax_kind) | ||
253 | } else { | ||
254 | err(syntax_kind, error) | ||
255 | } | ||
256 | } | ||
257 | } | 274 | } |
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index ad1a7c855..1f351e9fc 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs | |||
@@ -12,7 +12,7 @@ use ra_text_edit::AtomTextEdit; | |||
12 | use crate::{ | 12 | use crate::{ |
13 | algo, | 13 | algo, |
14 | parsing::{ | 14 | parsing::{ |
15 | lexer::{single_token, tokenize, ParsedTokens, Token}, | 15 | lexer::{lex_single_syntax_kind, tokenize, Token}, |
16 | text_token_source::TextTokenSource, | 16 | text_token_source::TextTokenSource, |
17 | text_tree_sink::TextTreeSink, | 17 | text_tree_sink::TextTreeSink, |
18 | }, | 18 | }, |
@@ -54,7 +54,7 @@ fn reparse_token<'node>( | |||
54 | } | 54 | } |
55 | 55 | ||
56 | let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); | 56 | let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); |
57 | let new_token_kind = single_token(&new_text)?.token.kind; | 57 | let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?; |
58 | 58 | ||
59 | if new_token_kind != prev_token_kind | 59 | if new_token_kind != prev_token_kind |
60 | || (new_token_kind == IDENT && is_contextual_kw(&new_text)) | 60 | || (new_token_kind == IDENT && is_contextual_kw(&new_text)) |
@@ -67,8 +67,8 @@ fn reparse_token<'node>( | |||
67 | // `b` no longer remains an identifier, but becomes a part of byte string literal | 67 | // `b` no longer remains an identifier, but becomes a part of byte string literal |
68 | if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { | 68 | if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { |
69 | new_text.push(next_char); | 69 | new_text.push(next_char); |
70 | let token_with_next_char = single_token(&new_text); | 70 | let token_with_next_char = lex_single_syntax_kind(&new_text); |
71 | if token_with_next_char.is_some() { | 71 | if let Some((_kind, _error)) = token_with_next_char { |
72 | return None; | 72 | return None; |
73 | } | 73 | } |
74 | new_text.pop(); | 74 | new_text.pop(); |
@@ -88,23 +88,26 @@ fn reparse_block<'node>( | |||
88 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { | 88 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { |
89 | let (node, reparser) = find_reparsable_node(root, edit.delete)?; | 89 | let (node, reparser) = find_reparsable_node(root, edit.delete)?; |
90 | let text = get_text_after_edit(node.clone().into(), &edit); | 90 | let text = get_text_after_edit(node.clone().into(), &edit); |
91 | let ParsedTokens { tokens, errors } = tokenize(&text); | 91 | |
92 | let (tokens, new_lexer_errors) = tokenize(&text); | ||
92 | if !is_balanced(&tokens) { | 93 | if !is_balanced(&tokens) { |
93 | return None; | 94 | return None; |
94 | } | 95 | } |
96 | |||
95 | let mut token_source = TextTokenSource::new(&text, &tokens); | 97 | let mut token_source = TextTokenSource::new(&text, &tokens); |
96 | let mut tree_sink = TextTreeSink::new(&text, &tokens, errors); | 98 | let mut tree_sink = TextTreeSink::new(&text, &tokens); |
97 | reparser.parse(&mut token_source, &mut tree_sink); | 99 | reparser.parse(&mut token_source, &mut tree_sink); |
98 | let (green, new_errors) = tree_sink.finish(); | 100 | |
99 | Some((node.replace_with(green), new_errors, node.text_range())) | 101 | let (green, mut new_parser_errors) = tree_sink.finish(); |
102 | new_parser_errors.extend(new_lexer_errors); | ||
103 | |||
104 | Some((node.replace_with(green), new_parser_errors, node.text_range())) | ||
100 | } | 105 | } |
101 | 106 | ||
102 | fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String { | 107 | fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String { |
103 | let edit = | 108 | let edit = |
104 | AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone()); | 109 | AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone()); |
105 | 110 | ||
106 | // Note: we could move this match to a method or even further: use enum_dispatch crate | ||
107 | // https://crates.io/crates/enum_dispatch | ||
108 | let text = match element { | 111 | let text = match element { |
109 | NodeOrToken::Token(token) => token.text().to_string(), | 112 | NodeOrToken::Token(token) => token.text().to_string(), |
110 | NodeOrToken::Node(node) => node.text().to_string(), | 113 | NodeOrToken::Node(node) => node.text().to_string(), |
@@ -122,8 +125,6 @@ fn is_contextual_kw(text: &str) -> bool { | |||
122 | fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { | 125 | fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { |
123 | let node = algo::find_covering_element(node, range); | 126 | let node = algo::find_covering_element(node, range); |
124 | 127 | ||
125 | // Note: we could move this match to a method or even further: use enum_dispatch crate | ||
126 | // https://crates.io/crates/enum_dispatch | ||
127 | let mut ancestors = match node { | 128 | let mut ancestors = match node { |
128 | NodeOrToken::Token(it) => it.parent().ancestors(), | 129 | NodeOrToken::Token(it) => it.parent().ancestors(), |
129 | NodeOrToken::Node(it) => it.ancestors(), | 130 | NodeOrToken::Node(it) => it.ancestors(), |
diff --git a/crates/ra_syntax/src/parsing/text_tree_sink.rs b/crates/ra_syntax/src/parsing/text_tree_sink.rs index 5faac588b..dd202601d 100644 --- a/crates/ra_syntax/src/parsing/text_tree_sink.rs +++ b/crates/ra_syntax/src/parsing/text_tree_sink.rs | |||
@@ -92,14 +92,14 @@ impl<'a> TreeSink for TextTreeSink<'a> { | |||
92 | } | 92 | } |
93 | 93 | ||
94 | impl<'a> TextTreeSink<'a> { | 94 | impl<'a> TextTreeSink<'a> { |
95 | pub(super) fn new(text: &'a str, tokens: &'a [Token], errors: Vec<SyntaxError>) -> Self { | 95 | pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> Self { |
96 | Self { | 96 | Self { |
97 | text, | 97 | text, |
98 | tokens, | 98 | tokens, |
99 | text_pos: 0.into(), | 99 | text_pos: 0.into(), |
100 | token_pos: 0, | 100 | token_pos: 0, |
101 | state: State::PendingStart, | 101 | state: State::PendingStart, |
102 | inner: SyntaxTreeBuilder::new(errors), | 102 | inner: SyntaxTreeBuilder::default(), |
103 | } | 103 | } |
104 | } | 104 | } |
105 | 105 | ||
diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs index 591855302..7c2b18af3 100644 --- a/crates/ra_syntax/src/syntax_node.rs +++ b/crates/ra_syntax/src/syntax_node.rs | |||
@@ -45,12 +45,6 @@ pub struct SyntaxTreeBuilder { | |||
45 | } | 45 | } |
46 | 46 | ||
47 | impl SyntaxTreeBuilder { | 47 | impl SyntaxTreeBuilder { |
48 | pub fn new(errors: Vec<SyntaxError>) -> Self { | ||
49 | Self { errors, inner: GreenNodeBuilder::default() } | ||
50 | } | ||
51 | } | ||
52 | |||
53 | impl SyntaxTreeBuilder { | ||
54 | pub(crate) fn finish_raw(self) -> (GreenNode, Vec<SyntaxError>) { | 48 | pub(crate) fn finish_raw(self) -> (GreenNode, Vec<SyntaxError>) { |
55 | let green = self.inner.finish(); | 49 | let green = self.inner.finish(); |
56 | (green, self.errors) | 50 | (green, self.errors) |
diff --git a/crates/ra_syntax/src/tests.rs b/crates/ra_syntax/src/tests.rs index df21c957c..f79dc4f93 100644 --- a/crates/ra_syntax/src/tests.rs +++ b/crates/ra_syntax/src/tests.rs | |||
@@ -11,7 +11,7 @@ use crate::{fuzz, SourceFile}; | |||
11 | fn lexer_tests() { | 11 | fn lexer_tests() { |
12 | dir_tests(&test_data_dir(), &["lexer"], |text, _| { | 12 | dir_tests(&test_data_dir(), &["lexer"], |text, _| { |
13 | // FIXME: add tests for errors (their format is up to discussion) | 13 | // FIXME: add tests for errors (their format is up to discussion) |
14 | let tokens = crate::tokenize(text).tokens; | 14 | let (tokens, _errors) = crate::tokenize(text); |
15 | dump_tokens(&tokens, text) | 15 | dump_tokens(&tokens, text) |
16 | }) | 16 | }) |
17 | } | 17 | } |