diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2020-02-03 22:51:17 +0000 |
---|---|---|
committer | GitHub <[email protected]> | 2020-02-03 22:51:17 +0000 |
commit | 918547dbe9a2907401102eba491ac25cebe1404d (patch) | |
tree | e0aa3bdcec597e81f022ac1ce388d42724a92f51 /crates/ra_syntax/src/parsing.rs | |
parent | b090ee5a65f9630146c2842bc51fcfcc8da08da1 (diff) | |
parent | a3e5663ae0206270156fbeb926a174a40abbddb0 (diff) |
Merge #2911
2911: Implement collecting errors while tokenizing r=matklad a=Veetaha
Now we are collecting errors from `rustc_lexer` and returning them in `ParsedToken { token, error }` and `ParsedTokens { tokens, errors }` structures **([UPD]: this is now simplified, see updates bellow)**.
The main changes are introduced in `ra_syntax/parsing/lexer.rs`. It now exposes the following functions and types:
```rust
pub fn tokenize(text: &str) -> ParsedTokens;
pub fn tokenize_append(text: &str, parsed_tokens_to_append_to: &mut ParsedTokens);
pub fn first_token(text: &str) -> Option<ParsedToken>; // allows any number of tokens in text
pub fn single_token(text: &str) -> Option<ParsedToken>; // allows only a single token in text
pub struct ParsedToken { pub token: Token, pub error: Option<SyntaxError> }
pub struct ParsedTokens { pub tokens: Vec<Token>, pub errors: Vec<SyntaxError> }
pub enum TokenizeError { /* Simple enum which reflects rustc_lexer tokenization errors */ }
```
In the first commit I implemented it with iterators, but then decided that since this crate is ad hoc for `rust-analyzer` and we clearly see the places of its usage it would be better to simplify it to vectors.
This is currently WIP, because I want to add tests for error messages generated by the lexer.
I'd like to listen to you thoughts how to define these tests in `ra_syntax/test-data` dir.
Related issues: #223
**[UPD]**
After the PR review the API was simplified:
```rust
pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>);
// Both lex functions do not check for unescape errors
pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)>;
pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind>;
// This will be removed in the next PR in favour of simlifying `SyntaxError` to `(String, TextRange)`
pub enum TokenizeError { /* Simple enum which reflects rustc_lexer tokenization errors */ }
// this is private, but may be made public if such demand would exist in future (least privilege principle)
fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)>;
```
Co-authored-by: Veetaha <[email protected]>
Diffstat (limited to 'crates/ra_syntax/src/parsing.rs')
-rw-r--r-- | crates/ra_syntax/src/parsing.rs | 18 |
1 files changed, 13 insertions, 5 deletions
diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 0387f0378..e5eb80850 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs | |||
@@ -7,15 +7,23 @@ mod text_tree_sink; | |||
7 | mod reparsing; | 7 | mod reparsing; |
8 | 8 | ||
9 | use crate::{syntax_node::GreenNode, SyntaxError}; | 9 | use crate::{syntax_node::GreenNode, SyntaxError}; |
10 | use text_token_source::TextTokenSource; | ||
11 | use text_tree_sink::TextTreeSink; | ||
10 | 12 | ||
11 | pub use self::lexer::{classify_literal, tokenize, Token}; | 13 | pub use lexer::*; |
12 | 14 | ||
13 | pub(crate) use self::reparsing::incremental_reparse; | 15 | pub(crate) use self::reparsing::incremental_reparse; |
14 | 16 | ||
15 | pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { | 17 | pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec<SyntaxError>) { |
16 | let tokens = tokenize(&text); | 18 | let (tokens, lexer_errors) = tokenize(&text); |
17 | let mut token_source = text_token_source::TextTokenSource::new(text, &tokens); | 19 | |
18 | let mut tree_sink = text_tree_sink::TextTreeSink::new(text, &tokens); | 20 | let mut token_source = TextTokenSource::new(text, &tokens); |
21 | let mut tree_sink = TextTreeSink::new(text, &tokens); | ||
22 | |||
19 | ra_parser::parse(&mut token_source, &mut tree_sink); | 23 | ra_parser::parse(&mut token_source, &mut tree_sink); |
20 | tree_sink.finish() | 24 | |
25 | let (tree, mut parser_errors) = tree_sink.finish(); | ||
26 | parser_errors.extend(lexer_errors); | ||
27 | |||
28 | (tree, parser_errors) | ||
21 | } | 29 | } |