Merge #2911

2911: Implement collecting errors while tokenizing r=matklad a=Veetaha Now we are collecting errors from `rustc_lexer` and returning them in `ParsedToken { token, error }` and `ParsedTokens { tokens, errors }` structures **([UPD]: this is now simplified, see updates bellow)**. The main changes are introduced in `ra_syntax/parsing/lexer.rs`. It now exposes the following functions and types: ```rust pub fn tokenize(text: &str) -> ParsedTokens; pub fn tokenize_append(text: &str, parsed_tokens_to_append_to: &mut ParsedTokens); pub fn first_token(text: &str) -> Option<ParsedToken>; // allows any number of tokens in text pub fn single_token(text: &str) -> Option<ParsedToken>; // allows only a single token in text pub struct ParsedToken { pub token: Token, pub error: Option<SyntaxError> } pub struct ParsedTokens { pub tokens: Vec<Token>, pub errors: Vec<SyntaxError> } pub enum TokenizeError { /* Simple enum which reflects rustc_lexer tokenization errors */ } ``` In the first commit I implemented it with iterators, but then decided that since this crate is ad hoc for `rust-analyzer` and we clearly see the places of its usage it would be better to simplify it to vectors. This is currently WIP, because I want to add tests for error messages generated by the lexer. I'd like to listen to you thoughts how to define these tests in `ra_syntax/test-data` dir. Related issues: #223 **[UPD]** After the PR review the API was simplified: ```rust pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>); // Both lex functions do not check for unescape errors pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option<SyntaxError>)>; pub fn lex_single_valid_syntax_kind(text: &str) -> Option<SyntaxKind>; // This will be removed in the next PR in favour of simlifying `SyntaxError` to `(String, TextRange)` pub enum TokenizeError { /* Simple enum which reflects rustc_lexer tokenization errors */ } // this is private, but may be made public if such demand would exist in future (least privilege principle) fn lex_first_token(text: &str) -> Option<(Token, Option<SyntaxError>)>; ``` Co-authored-by: Veetaha <[email protected]>
author: bors[bot] <26634292+bors[bot]@users.noreply.github.com> 2020-02-03 22:51:17 +0000
committer: GitHub <[email protected]> 2020-02-03 22:51:17 +0000
commit: 918547dbe9a2907401102eba491ac25cebe1404d (patch)
tree: e0aa3bdcec597e81f022ac1ce388d42724a92f51 /crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt
parent: b090ee5a65f9630146c2842bc51fcfcc8da08da1 (diff)
parent: a3e5663ae0206270156fbeb926a174a40abbddb0 (diff)
1 files changed, 62 insertions, 0 deletions
diff --git a/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt b/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt
new file mode 100644
index 000000000..ab35e20a5
--- /dev/null
+++ b/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt
@@ -0,0 +1,62 @@
+FLOAT_NUMBER 2 "0e"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 2 "0E"
+WHITESPACE 2 "\n\n"
+FLOAT_NUMBER 4 "42e+"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 4 "42e-"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 4 "42E+"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 4 "42E-"
+WHITESPACE 2 "\n\n"
+INT_NUMBER 2 "42"
+DOT 1 "."
+IDENT 1 "e"
+PLUS 1 "+"
+WHITESPACE 1 "\n"
+INT_NUMBER 2 "42"
+DOT 1 "."
+IDENT 1 "e"
+MINUS 1 "-"
+WHITESPACE 1 "\n"
+INT_NUMBER 2 "42"
+DOT 1 "."
+IDENT 1 "E"
+PLUS 1 "+"
+WHITESPACE 1 "\n"
+INT_NUMBER 2 "42"
+DOT 1 "."
+IDENT 1 "E"
+MINUS 1 "-"
+WHITESPACE 2 "\n\n"
+FLOAT_NUMBER 6 "42.2e+"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 6 "42.2e-"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 6 "42.2E+"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 6 "42.2E-"
+WHITESPACE 2 "\n\n"
+FLOAT_NUMBER 9 "42.2e+f32"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 9 "42.2e-f32"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 9 "42.2E+f32"
+WHITESPACE 1 "\n"
+FLOAT_NUMBER 9 "42.2E-f32"
+WHITESPACE 1 "\n"
+> error[0; 2) token("0e") msg(Missing digits after the exponent symbol)
+> error[3; 5) token("0E") msg(Missing digits after the exponent symbol)
+> error[7; 11) token("42e+") msg(Missing digits after the exponent symbol)
+> error[12; 16) token("42e-") msg(Missing digits after the exponent symbol)
+> error[17; 21) token("42E+") msg(Missing digits after the exponent symbol)
+> error[22; 26) token("42E-") msg(Missing digits after the exponent symbol)
+> error[53; 59) token("42.2e+") msg(Missing digits after the exponent symbol)
+> error[60; 66) token("42.2e-") msg(Missing digits after the exponent symbol)
+> error[67; 73) token("42.2E+") msg(Missing digits after the exponent symbol)
+> error[74; 80) token("42.2E-") msg(Missing digits after the exponent symbol)
+> error[82; 91) token("42.2e+f32") msg(Missing digits after the exponent symbol)
+> error[92; 101) token("42.2e-f32") msg(Missing digits after the exponent symbol)
+> error[102; 111) token("42.2E+f32") msg(Missing digits after the exponent symbol)
+> error[112; 121) token("42.2E-f32") msg(Missing digits after the exponent symbol)
author	bors[bot] <26634292+bors[bot]@users.noreply.github.com>	2020-02-03 22:51:17 +0000
committer	GitHub <[email protected]>	2020-02-03 22:51:17 +0000
commit	918547dbe9a2907401102eba491ac25cebe1404d (patch)
tree	e0aa3bdcec597e81f022ac1ce388d42724a92f51 /crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt
parent	b090ee5a65f9630146c2842bc51fcfcc8da08da1 (diff)
parent	a3e5663ae0206270156fbeb926a174a40abbddb0 (diff)

diff --git a/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt b/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt new file mode 100644 index 000000000..ab35e20a5 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt
@@ -0,0 +1,62 @@
	1	FLOAT_NUMBER 2 "0e"
	2	WHITESPACE 1 "\n"
	3	FLOAT_NUMBER 2 "0E"
	4	WHITESPACE 2 "\n\n"
	5	FLOAT_NUMBER 4 "42e+"
	6	WHITESPACE 1 "\n"
	7	FLOAT_NUMBER 4 "42e-"
	8	WHITESPACE 1 "\n"
	9	FLOAT_NUMBER 4 "42E+"
	10	WHITESPACE 1 "\n"
	11	FLOAT_NUMBER 4 "42E-"
	12	WHITESPACE 2 "\n\n"
	13	INT_NUMBER 2 "42"
	14	DOT 1 "."
	15	IDENT 1 "e"
	16	PLUS 1 "+"
	17	WHITESPACE 1 "\n"
	18	INT_NUMBER 2 "42"
	19	DOT 1 "."
	20	IDENT 1 "e"
	21	MINUS 1 "-"
	22	WHITESPACE 1 "\n"
	23	INT_NUMBER 2 "42"
	24	DOT 1 "."
	25	IDENT 1 "E"
	26	PLUS 1 "+"
	27	WHITESPACE 1 "\n"
	28	INT_NUMBER 2 "42"
	29	DOT 1 "."
	30	IDENT 1 "E"
	31	MINUS 1 "-"
	32	WHITESPACE 2 "\n\n"
	33	FLOAT_NUMBER 6 "42.2e+"
	34	WHITESPACE 1 "\n"
	35	FLOAT_NUMBER 6 "42.2e-"
	36	WHITESPACE 1 "\n"
	37	FLOAT_NUMBER 6 "42.2E+"
	38	WHITESPACE 1 "\n"
	39	FLOAT_NUMBER 6 "42.2E-"
	40	WHITESPACE 2 "\n\n"
	41	FLOAT_NUMBER 9 "42.2e+f32"
	42	WHITESPACE 1 "\n"
	43	FLOAT_NUMBER 9 "42.2e-f32"
	44	WHITESPACE 1 "\n"
	45	FLOAT_NUMBER 9 "42.2E+f32"
	46	WHITESPACE 1 "\n"
	47	FLOAT_NUMBER 9 "42.2E-f32"
	48	WHITESPACE 1 "\n"
	49	> error[0; 2) token("0e") msg(Missing digits after the exponent symbol)
	50	> error[3; 5) token("0E") msg(Missing digits after the exponent symbol)
	51	> error[7; 11) token("42e+") msg(Missing digits after the exponent symbol)
	52	> error[12; 16) token("42e-") msg(Missing digits after the exponent symbol)
	53	> error[17; 21) token("42E+") msg(Missing digits after the exponent symbol)
	54	> error[22; 26) token("42E-") msg(Missing digits after the exponent symbol)
	55	> error[53; 59) token("42.2e+") msg(Missing digits after the exponent symbol)
	56	> error[60; 66) token("42.2e-") msg(Missing digits after the exponent symbol)
	57	> error[67; 73) token("42.2E+") msg(Missing digits after the exponent symbol)
	58	> error[74; 80) token("42.2E-") msg(Missing digits after the exponent symbol)
	59	> error[82; 91) token("42.2e+f32") msg(Missing digits after the exponent symbol)
	60	> error[92; 101) token("42.2e-f32") msg(Missing digits after the exponent symbol)
	61	> error[102; 111) token("42.2E+f32") msg(Missing digits after the exponent symbol)
	62	> error[112; 121) token("42.2E-f32") msg(Missing digits after the exponent symbol)