diff options
Diffstat (limited to 'crates/ra_syntax/src/parsing')
-rw-r--r-- | crates/ra_syntax/src/parsing/lexer.rs | 92 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/reparsing.rs | 27 |
2 files changed, 43 insertions, 76 deletions
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index f889e6a1d..f2684c852 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs | |||
@@ -2,7 +2,7 @@ | |||
2 | //! It is just a bridge to `rustc_lexer`. | 2 | //! It is just a bridge to `rustc_lexer`. |
3 | 3 | ||
4 | use crate::{ | 4 | use crate::{ |
5 | SyntaxError, SyntaxErrorKind, | 5 | SyntaxError, |
6 | SyntaxKind::{self, *}, | 6 | SyntaxKind::{self, *}, |
7 | TextRange, TextUnit, | 7 | TextRange, TextUnit, |
8 | }; | 8 | }; |
@@ -41,13 +41,13 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) { | |||
41 | let token_len = TextUnit::from_usize(rustc_token.len); | 41 | let token_len = TextUnit::from_usize(rustc_token.len); |
42 | let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); | 42 | let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); |
43 | 43 | ||
44 | let (syntax_kind, error) = | 44 | let (syntax_kind, err_message) = |
45 | rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); | 45 | rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); |
46 | 46 | ||
47 | tokens.push(Token { kind: syntax_kind, len: token_len }); | 47 | tokens.push(Token { kind: syntax_kind, len: token_len }); |
48 | 48 | ||
49 | if let Some(error) = error { | 49 | if let Some(err_message) = err_message { |
50 | errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)); | 50 | errors.push(SyntaxError::new(err_message, token_range)); |
51 | } | 51 | } |
52 | 52 | ||
53 | offset += rustc_token.len; | 53 | offset += rustc_token.len; |
@@ -94,61 +94,21 @@ fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)> { | |||
94 | } | 94 | } |
95 | 95 | ||
96 | let rustc_token = rustc_lexer::first_token(text); | 96 | let rustc_token = rustc_lexer::first_token(text); |
97 | let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); | 97 | let (syntax_kind, err_message) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); |
98 | 98 | ||
99 | let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; | 99 | let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; |
100 | let error = error.map(|error| { | 100 | let optional_error = err_message.map(|err_message| { |
101 | SyntaxError::new( | 101 | SyntaxError::new(err_message, TextRange::from_to(0.into(), TextUnit::of_str(text))) |
102 | SyntaxErrorKind::TokenizeError(error), | ||
103 | TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)), | ||
104 | ) | ||
105 | }); | 102 | }); |
106 | 103 | ||
107 | Some((token, error)) | 104 | Some((token, optional_error)) |
108 | } | ||
109 | |||
110 | // FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice: | ||
111 | // https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067 | ||
112 | |||
113 | /// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant. | ||
114 | /// It describes all the types of errors that may happen during the tokenization | ||
115 | /// of Rust source. | ||
116 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
117 | pub enum TokenizeError { | ||
118 | /// Base prefix was provided, but there were no digits | ||
119 | /// after it, e.g. `0x`, `0b`. | ||
120 | EmptyInt, | ||
121 | /// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`, | ||
122 | EmptyExponent, | ||
123 | |||
124 | /// Block comment lacks trailing delimiter `*/` | ||
125 | UnterminatedBlockComment, | ||
126 | /// Character literal lacks trailing delimiter `'` | ||
127 | UnterminatedChar, | ||
128 | /// Characterish byte literal lacks trailing delimiter `'` | ||
129 | UnterminatedByte, | ||
130 | /// String literal lacks trailing delimiter `"` | ||
131 | UnterminatedString, | ||
132 | /// Byte string literal lacks trailing delimiter `"` | ||
133 | UnterminatedByteString, | ||
134 | /// Raw literal lacks trailing delimiter e.g. `"##` | ||
135 | UnterminatedRawString, | ||
136 | /// Raw byte string literal lacks trailing delimiter e.g. `"##` | ||
137 | UnterminatedRawByteString, | ||
138 | |||
139 | /// Raw string lacks a quote after the pound characters e.g. `r###` | ||
140 | UnstartedRawString, | ||
141 | /// Raw byte string lacks a quote after the pound characters e.g. `br###` | ||
142 | UnstartedRawByteString, | ||
143 | |||
144 | /// Lifetime starts with a number e.g. `'4ever` | ||
145 | LifetimeStartsWithNumber, | ||
146 | } | 105 | } |
147 | 106 | ||
107 | /// Returns `SyntaxKind` and an optional tokenize error message. | ||
148 | fn rustc_token_kind_to_syntax_kind( | 108 | fn rustc_token_kind_to_syntax_kind( |
149 | rustc_token_kind: &rustc_lexer::TokenKind, | 109 | rustc_token_kind: &rustc_lexer::TokenKind, |
150 | token_text: &str, | 110 | token_text: &str, |
151 | ) -> (SyntaxKind, Option<TokenizeError>) { | 111 | ) -> (SyntaxKind, Option<&'static str>) { |
152 | // A note on an intended tradeoff: | 112 | // A note on an intended tradeoff: |
153 | // We drop some useful infromation here (see patterns with double dots `..`) | 113 | // We drop some useful infromation here (see patterns with double dots `..`) |
154 | // Storing that info in `SyntaxKind` is not possible due to its layout requirements of | 114 | // Storing that info in `SyntaxKind` is not possible due to its layout requirements of |
@@ -156,14 +116,15 @@ fn rustc_token_kind_to_syntax_kind( | |||
156 | 116 | ||
157 | let syntax_kind = { | 117 | let syntax_kind = { |
158 | use rustc_lexer::TokenKind as TK; | 118 | use rustc_lexer::TokenKind as TK; |
159 | use TokenizeError as TE; | ||
160 | |||
161 | match rustc_token_kind { | 119 | match rustc_token_kind { |
162 | TK::LineComment => COMMENT, | 120 | TK::LineComment => COMMENT, |
163 | 121 | ||
164 | TK::BlockComment { terminated: true } => COMMENT, | 122 | TK::BlockComment { terminated: true } => COMMENT, |
165 | TK::BlockComment { terminated: false } => { | 123 | TK::BlockComment { terminated: false } => { |
166 | return (COMMENT, Some(TE::UnterminatedBlockComment)); | 124 | return ( |
125 | COMMENT, | ||
126 | Some("Missing trailing `*/` symbols to terminate the block comment"), | ||
127 | ); | ||
167 | } | 128 | } |
168 | 129 | ||
169 | TK::Whitespace => WHITESPACE, | 130 | TK::Whitespace => WHITESPACE, |
@@ -181,7 +142,7 @@ fn rustc_token_kind_to_syntax_kind( | |||
181 | 142 | ||
182 | TK::Lifetime { starts_with_number: false } => LIFETIME, | 143 | TK::Lifetime { starts_with_number: false } => LIFETIME, |
183 | TK::Lifetime { starts_with_number: true } => { | 144 | TK::Lifetime { starts_with_number: true } => { |
184 | return (LIFETIME, Some(TE::LifetimeStartsWithNumber)) | 145 | return (LIFETIME, Some("Lifetime name cannot start with a number")) |
185 | } | 146 | } |
186 | 147 | ||
187 | TK::Semi => SEMI, | 148 | TK::Semi => SEMI, |
@@ -217,57 +178,56 @@ fn rustc_token_kind_to_syntax_kind( | |||
217 | 178 | ||
218 | return (syntax_kind, None); | 179 | return (syntax_kind, None); |
219 | 180 | ||
220 | fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<TokenizeError>) { | 181 | fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) { |
221 | use rustc_lexer::LiteralKind as LK; | 182 | use rustc_lexer::LiteralKind as LK; |
222 | use TokenizeError as TE; | ||
223 | 183 | ||
224 | #[rustfmt::skip] | 184 | #[rustfmt::skip] |
225 | let syntax_kind = match *kind { | 185 | let syntax_kind = match *kind { |
226 | LK::Int { empty_int: false, .. } => INT_NUMBER, | 186 | LK::Int { empty_int: false, .. } => INT_NUMBER, |
227 | LK::Int { empty_int: true, .. } => { | 187 | LK::Int { empty_int: true, .. } => { |
228 | return (INT_NUMBER, Some(TE::EmptyInt)) | 188 | return (INT_NUMBER, Some("Missing digits after the integer base prefix")) |
229 | } | 189 | } |
230 | 190 | ||
231 | LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, | 191 | LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, |
232 | LK::Float { empty_exponent: true, .. } => { | 192 | LK::Float { empty_exponent: true, .. } => { |
233 | return (FLOAT_NUMBER, Some(TE::EmptyExponent)) | 193 | return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol")) |
234 | } | 194 | } |
235 | 195 | ||
236 | LK::Char { terminated: true } => CHAR, | 196 | LK::Char { terminated: true } => CHAR, |
237 | LK::Char { terminated: false } => { | 197 | LK::Char { terminated: false } => { |
238 | return (CHAR, Some(TE::UnterminatedChar)) | 198 | return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal")) |
239 | } | 199 | } |
240 | 200 | ||
241 | LK::Byte { terminated: true } => BYTE, | 201 | LK::Byte { terminated: true } => BYTE, |
242 | LK::Byte { terminated: false } => { | 202 | LK::Byte { terminated: false } => { |
243 | return (BYTE, Some(TE::UnterminatedByte)) | 203 | return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal")) |
244 | } | 204 | } |
245 | 205 | ||
246 | LK::Str { terminated: true } => STRING, | 206 | LK::Str { terminated: true } => STRING, |
247 | LK::Str { terminated: false } => { | 207 | LK::Str { terminated: false } => { |
248 | return (STRING, Some(TE::UnterminatedString)) | 208 | return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal")) |
249 | } | 209 | } |
250 | 210 | ||
251 | 211 | ||
252 | LK::ByteStr { terminated: true } => BYTE_STRING, | 212 | LK::ByteStr { terminated: true } => BYTE_STRING, |
253 | LK::ByteStr { terminated: false } => { | 213 | LK::ByteStr { terminated: false } => { |
254 | return (BYTE_STRING, Some(TE::UnterminatedByteString)) | 214 | return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal")) |
255 | } | 215 | } |
256 | 216 | ||
257 | LK::RawStr { started: true, terminated: true, .. } => RAW_STRING, | 217 | LK::RawStr { started: true, terminated: true, .. } => RAW_STRING, |
258 | LK::RawStr { started: true, terminated: false, .. } => { | 218 | LK::RawStr { started: true, terminated: false, .. } => { |
259 | return (RAW_STRING, Some(TE::UnterminatedRawString)) | 219 | return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal")) |
260 | } | 220 | } |
261 | LK::RawStr { started: false, .. } => { | 221 | LK::RawStr { started: false, .. } => { |
262 | return (RAW_STRING, Some(TE::UnstartedRawString)) | 222 | return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal")) |
263 | } | 223 | } |
264 | 224 | ||
265 | LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING, | 225 | LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING, |
266 | LK::RawByteStr { started: true, terminated: false, .. } => { | 226 | LK::RawByteStr { started: true, terminated: false, .. } => { |
267 | return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString)) | 227 | return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal")) |
268 | } | 228 | } |
269 | LK::RawByteStr { started: false, .. } => { | 229 | LK::RawByteStr { started: false, .. } => { |
270 | return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString)) | 230 | return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal")) |
271 | } | 231 | } |
272 | }; | 232 | }; |
273 | 233 | ||
diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index a86da0675..41a355ec7 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs | |||
@@ -87,7 +87,7 @@ fn reparse_block<'node>( | |||
87 | edit: &AtomTextEdit, | 87 | edit: &AtomTextEdit, |
88 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { | 88 | ) -> Option<(GreenNode, Vec<SyntaxError>, TextRange)> { |
89 | let (node, reparser) = find_reparsable_node(root, edit.delete)?; | 89 | let (node, reparser) = find_reparsable_node(root, edit.delete)?; |
90 | let text = get_text_after_edit(node.clone().into(), &edit); | 90 | let text = get_text_after_edit(node.clone().into(), edit); |
91 | 91 | ||
92 | let (tokens, new_lexer_errors) = tokenize(&text); | 92 | let (tokens, new_lexer_errors) = tokenize(&text); |
93 | if !is_balanced(&tokens) { | 93 | if !is_balanced(&tokens) { |
@@ -162,20 +162,27 @@ fn is_balanced(tokens: &[Token]) -> bool { | |||
162 | fn merge_errors( | 162 | fn merge_errors( |
163 | old_errors: Vec<SyntaxError>, | 163 | old_errors: Vec<SyntaxError>, |
164 | new_errors: Vec<SyntaxError>, | 164 | new_errors: Vec<SyntaxError>, |
165 | old_range: TextRange, | 165 | range_before_reparse: TextRange, |
166 | edit: &AtomTextEdit, | 166 | edit: &AtomTextEdit, |
167 | ) -> Vec<SyntaxError> { | 167 | ) -> Vec<SyntaxError> { |
168 | let mut res = Vec::new(); | 168 | let mut res = Vec::new(); |
169 | for e in old_errors { | 169 | |
170 | if e.offset() <= old_range.start() { | 170 | for old_err in old_errors { |
171 | res.push(e) | 171 | let old_err_range = *old_err.range(); |
172 | } else if e.offset() >= old_range.end() { | 172 | // FIXME: make sure that .start() was here previously by a mistake |
173 | res.push(e.add_offset(TextUnit::of_str(&edit.insert), edit.delete.len())); | 173 | if old_err_range.end() <= range_before_reparse.start() { |
174 | res.push(old_err); | ||
175 | } else if old_err_range.start() >= range_before_reparse.end() { | ||
176 | let inserted_len = TextUnit::of_str(&edit.insert); | ||
177 | res.push(old_err.with_range((old_err_range + inserted_len) - edit.delete.len())); | ||
178 | // Note: extra parens are intentional to prevent uint underflow, HWAB (here was a bug) | ||
174 | } | 179 | } |
175 | } | 180 | } |
176 | for e in new_errors { | 181 | res.extend(new_errors.into_iter().map(|new_err| { |
177 | res.push(e.add_offset(old_range.start(), 0.into())); | 182 | // fighting borrow checker with a variable ;) |
178 | } | 183 | let offseted_range = *new_err.range() + range_before_reparse.start(); |
184 | new_err.with_range(offseted_range) | ||
185 | })); | ||
179 | res | 186 | res |
180 | } | 187 | } |
181 | 188 | ||