diff options
Diffstat (limited to 'crates')
-rw-r--r-- | crates/ra_syntax/Cargo.toml | 1 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/lexer.rs | 108 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/lexer/0004_numbers.txt | 12 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt | 2 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt | 38 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.rs (renamed from crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs) | 0 | ||||
-rw-r--r-- | crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.txt (renamed from crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt) | 13 |
7 files changed, 149 insertions, 25 deletions
diff --git a/crates/ra_syntax/Cargo.toml b/crates/ra_syntax/Cargo.toml index 97b6b047f..9ef8dee5d 100644 --- a/crates/ra_syntax/Cargo.toml +++ b/crates/ra_syntax/Cargo.toml | |||
@@ -11,6 +11,7 @@ repository = "https://github.com/rust-analyzer/rust-analyzer" | |||
11 | unicode-xid = "0.1.0" | 11 | unicode-xid = "0.1.0" |
12 | itertools = "0.8.0" | 12 | itertools = "0.8.0" |
13 | rowan = "0.6.0-pre.1" | 13 | rowan = "0.6.0-pre.1" |
14 | ra_rustc_lexer = { version = "0.1.0-pre.1", features = [ "unicode-xid" ] } | ||
14 | 15 | ||
15 | # ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here | 16 | # ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here |
16 | # to reduce number of compilations | 17 | # to reduce number of compilations |
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 60cf37047..1c818fdf4 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs | |||
@@ -30,19 +30,119 @@ pub struct Token { | |||
30 | 30 | ||
31 | /// Break a string up into its component tokens | 31 | /// Break a string up into its component tokens |
32 | pub fn tokenize(text: &str) -> Vec<Token> { | 32 | pub fn tokenize(text: &str) -> Vec<Token> { |
33 | if text.is_empty() { | ||
34 | return vec![]; | ||
35 | } | ||
33 | let mut text = text; | 36 | let mut text = text; |
34 | let mut acc = Vec::new(); | 37 | let mut acc = Vec::new(); |
38 | if let Some(len) = ra_rustc_lexer::strip_shebang(text) { | ||
39 | acc.push(Token { kind: SHEBANG, len: TextUnit::from_usize(len) }); | ||
40 | text = &text[len..]; | ||
41 | } | ||
35 | while !text.is_empty() { | 42 | while !text.is_empty() { |
36 | let token = next_token(text); | 43 | let rustc_token = ra_rustc_lexer::first_token(text); |
44 | macro_rules! decompose { | ||
45 | ($t1:expr, $t2:expr) => {{ | ||
46 | acc.push(Token { kind: $t1, len: 1.into() }); | ||
47 | acc.push(Token { kind: $t2, len: 1.into() }); | ||
48 | text = &text[2..]; | ||
49 | continue; | ||
50 | }}; | ||
51 | ($t1:expr, $t2:expr, $t3:expr) => {{ | ||
52 | acc.push(Token { kind: $t1, len: 1.into() }); | ||
53 | acc.push(Token { kind: $t2, len: 1.into() }); | ||
54 | acc.push(Token { kind: $t3, len: 1.into() }); | ||
55 | text = &text[3..]; | ||
56 | continue; | ||
57 | }}; | ||
58 | } | ||
59 | let kind = match rustc_token.kind { | ||
60 | ra_rustc_lexer::TokenKind::LineComment => COMMENT, | ||
61 | ra_rustc_lexer::TokenKind::BlockComment { .. } => COMMENT, | ||
62 | ra_rustc_lexer::TokenKind::Whitespace => WHITESPACE, | ||
63 | ra_rustc_lexer::TokenKind::Ident => { | ||
64 | let token_text = &text[..rustc_token.len]; | ||
65 | if token_text == "_" { | ||
66 | UNDERSCORE | ||
67 | } else { | ||
68 | SyntaxKind::from_keyword(&text[..rustc_token.len]).unwrap_or(IDENT) | ||
69 | } | ||
70 | } | ||
71 | ra_rustc_lexer::TokenKind::RawIdent => IDENT, | ||
72 | ra_rustc_lexer::TokenKind::Literal { kind, .. } => match kind { | ||
73 | ra_rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER, | ||
74 | ra_rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER, | ||
75 | ra_rustc_lexer::LiteralKind::Char { .. } => CHAR, | ||
76 | ra_rustc_lexer::LiteralKind::Byte { .. } => BYTE, | ||
77 | ra_rustc_lexer::LiteralKind::Str { .. } => STRING, | ||
78 | ra_rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING, | ||
79 | ra_rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING, | ||
80 | ra_rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING, | ||
81 | }, | ||
82 | ra_rustc_lexer::TokenKind::Lifetime { .. } => LIFETIME, | ||
83 | ra_rustc_lexer::TokenKind::Semi => SEMI, | ||
84 | ra_rustc_lexer::TokenKind::Comma => COMMA, | ||
85 | ra_rustc_lexer::TokenKind::DotDotDot => decompose!(DOT, DOT, DOT), | ||
86 | ra_rustc_lexer::TokenKind::DotDotEq => decompose!(DOT, DOT, EQ), | ||
87 | ra_rustc_lexer::TokenKind::DotDot => decompose!(DOT, DOT), | ||
88 | ra_rustc_lexer::TokenKind::Dot => DOT, | ||
89 | ra_rustc_lexer::TokenKind::OpenParen => L_PAREN, | ||
90 | ra_rustc_lexer::TokenKind::CloseParen => R_PAREN, | ||
91 | ra_rustc_lexer::TokenKind::OpenBrace => L_CURLY, | ||
92 | ra_rustc_lexer::TokenKind::CloseBrace => R_CURLY, | ||
93 | ra_rustc_lexer::TokenKind::OpenBracket => L_BRACK, | ||
94 | ra_rustc_lexer::TokenKind::CloseBracket => R_BRACK, | ||
95 | ra_rustc_lexer::TokenKind::At => AT, | ||
96 | ra_rustc_lexer::TokenKind::Pound => POUND, | ||
97 | ra_rustc_lexer::TokenKind::Tilde => TILDE, | ||
98 | ra_rustc_lexer::TokenKind::Question => QUESTION, | ||
99 | ra_rustc_lexer::TokenKind::ColonColon => decompose!(COLON, COLON), | ||
100 | ra_rustc_lexer::TokenKind::Colon => COLON, | ||
101 | ra_rustc_lexer::TokenKind::Dollar => DOLLAR, | ||
102 | ra_rustc_lexer::TokenKind::EqEq => decompose!(EQ, EQ), | ||
103 | ra_rustc_lexer::TokenKind::Eq => EQ, | ||
104 | ra_rustc_lexer::TokenKind::FatArrow => decompose!(EQ, R_ANGLE), | ||
105 | ra_rustc_lexer::TokenKind::Ne => decompose!(EXCL, EQ), | ||
106 | ra_rustc_lexer::TokenKind::Not => EXCL, | ||
107 | ra_rustc_lexer::TokenKind::Le => decompose!(L_ANGLE, EQ), | ||
108 | ra_rustc_lexer::TokenKind::LArrow => decompose!(COLON, MINUS), | ||
109 | ra_rustc_lexer::TokenKind::Lt => L_ANGLE, | ||
110 | ra_rustc_lexer::TokenKind::ShlEq => decompose!(L_ANGLE, L_ANGLE, EQ), | ||
111 | ra_rustc_lexer::TokenKind::Shl => decompose!(L_ANGLE, L_ANGLE), | ||
112 | ra_rustc_lexer::TokenKind::Ge => decompose!(R_ANGLE, EQ), | ||
113 | ra_rustc_lexer::TokenKind::Gt => R_ANGLE, | ||
114 | ra_rustc_lexer::TokenKind::ShrEq => decompose!(R_ANGLE, R_ANGLE, EQ), | ||
115 | ra_rustc_lexer::TokenKind::Shr => decompose!(R_ANGLE, R_ANGLE), | ||
116 | ra_rustc_lexer::TokenKind::RArrow => decompose!(MINUS, R_ANGLE), | ||
117 | ra_rustc_lexer::TokenKind::Minus => MINUS, | ||
118 | ra_rustc_lexer::TokenKind::MinusEq => decompose!(MINUS, EQ), | ||
119 | ra_rustc_lexer::TokenKind::And => AMP, | ||
120 | ra_rustc_lexer::TokenKind::AndAnd => decompose!(AMP, AMP), | ||
121 | ra_rustc_lexer::TokenKind::AndEq => decompose!(AMP, EQ), | ||
122 | ra_rustc_lexer::TokenKind::Or => PIPE, | ||
123 | ra_rustc_lexer::TokenKind::OrOr => decompose!(PIPE, PIPE), | ||
124 | ra_rustc_lexer::TokenKind::OrEq => decompose!(PIPE, EQ), | ||
125 | ra_rustc_lexer::TokenKind::PlusEq => decompose!(PLUS, EQ), | ||
126 | ra_rustc_lexer::TokenKind::Plus => PLUS, | ||
127 | ra_rustc_lexer::TokenKind::StarEq => decompose!(STAR, EQ), | ||
128 | ra_rustc_lexer::TokenKind::Star => STAR, | ||
129 | ra_rustc_lexer::TokenKind::SlashEq => decompose!(SLASH, EQ), | ||
130 | ra_rustc_lexer::TokenKind::Slash => SLASH, | ||
131 | ra_rustc_lexer::TokenKind::CaretEq => decompose!(CARET, EQ), | ||
132 | ra_rustc_lexer::TokenKind::Caret => CARET, | ||
133 | ra_rustc_lexer::TokenKind::PercentEq => decompose!(PERCENT, EQ), | ||
134 | ra_rustc_lexer::TokenKind::Percent => PERCENT, | ||
135 | ra_rustc_lexer::TokenKind::Unknown => ERROR, | ||
136 | }; | ||
137 | let token = Token { kind, len: TextUnit::from_usize(rustc_token.len) }; | ||
37 | acc.push(token); | 138 | acc.push(token); |
38 | let len: u32 = token.len.into(); | 139 | text = &text[rustc_token.len..]; |
39 | text = &text[len as usize..]; | ||
40 | } | 140 | } |
41 | acc | 141 | acc |
42 | } | 142 | } |
43 | 143 | ||
44 | /// Get the next token from a string | 144 | /// Get the next token from a string |
45 | pub fn next_token(text: &str) -> Token { | 145 | fn next_token(text: &str) -> Token { |
46 | assert!(!text.is_empty()); | 146 | assert!(!text.is_empty()); |
47 | let mut ptr = Ptr::new(text); | 147 | let mut ptr = Ptr::new(text); |
48 | let c = ptr.bump().unwrap(); | 148 | let c = ptr.bump().unwrap(); |
diff --git a/crates/ra_syntax/tests/data/lexer/0004_numbers.txt b/crates/ra_syntax/tests/data/lexer/0004_numbers.txt index 39988aedc..7bb89b8ae 100644 --- a/crates/ra_syntax/tests/data/lexer/0004_numbers.txt +++ b/crates/ra_syntax/tests/data/lexer/0004_numbers.txt | |||
@@ -12,9 +12,9 @@ INT_NUMBER 2 "0_" | |||
12 | WHITESPACE 1 " " | 12 | WHITESPACE 1 " " |
13 | FLOAT_NUMBER 2 "0." | 13 | FLOAT_NUMBER 2 "0." |
14 | WHITESPACE 1 " " | 14 | WHITESPACE 1 " " |
15 | INT_NUMBER 2 "0e" | 15 | FLOAT_NUMBER 2 "0e" |
16 | WHITESPACE 1 " " | 16 | WHITESPACE 1 " " |
17 | INT_NUMBER 2 "0E" | 17 | FLOAT_NUMBER 2 "0E" |
18 | WHITESPACE 1 " " | 18 | WHITESPACE 1 " " |
19 | INT_NUMBER 2 "0z" | 19 | INT_NUMBER 2 "0z" |
20 | WHITESPACE 1 "\n" | 20 | WHITESPACE 1 "\n" |
@@ -32,9 +32,9 @@ INT_NUMBER 6 "0_1279" | |||
32 | WHITESPACE 1 " " | 32 | WHITESPACE 1 " " |
33 | FLOAT_NUMBER 6 "0.1279" | 33 | FLOAT_NUMBER 6 "0.1279" |
34 | WHITESPACE 1 " " | 34 | WHITESPACE 1 " " |
35 | INT_NUMBER 6 "0e1279" | 35 | FLOAT_NUMBER 6 "0e1279" |
36 | WHITESPACE 1 " " | 36 | WHITESPACE 1 " " |
37 | INT_NUMBER 6 "0E1279" | 37 | FLOAT_NUMBER 6 "0E1279" |
38 | WHITESPACE 1 "\n" | 38 | WHITESPACE 1 "\n" |
39 | INT_NUMBER 1 "0" | 39 | INT_NUMBER 1 "0" |
40 | DOT 1 "." | 40 | DOT 1 "." |
@@ -47,9 +47,7 @@ IDENT 3 "foo" | |||
47 | L_PAREN 1 "(" | 47 | L_PAREN 1 "(" |
48 | R_PAREN 1 ")" | 48 | R_PAREN 1 ")" |
49 | WHITESPACE 1 "\n" | 49 | WHITESPACE 1 "\n" |
50 | INT_NUMBER 2 "0e" | 50 | FLOAT_NUMBER 4 "0e+1" |
51 | PLUS 1 "+" | ||
52 | INT_NUMBER 1 "1" | ||
53 | WHITESPACE 1 "\n" | 51 | WHITESPACE 1 "\n" |
54 | INT_NUMBER 1 "0" | 52 | INT_NUMBER 1 "0" |
55 | DOT 1 "." | 53 | DOT 1 "." |
diff --git a/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt b/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt index 812dfbc18..737a300ee 100644 --- a/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt +++ b/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt | |||
@@ -1 +1 @@ | |||
CHAR 2 "\'1" | LIFETIME 2 "\'1" | ||
diff --git a/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt b/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt index 76d186a3c..84867026f 100644 --- a/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt +++ b/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt | |||
@@ -1,7 +1,39 @@ | |||
1 | SOURCE_FILE@[0; 42) | 1 | SOURCE_FILE@[0; 42) |
2 | SHEBANG@[0; 20) "#!/use/bin/env rusti" | 2 | SHEBANG@[0; 20) "#!/use/bin/env rusti" |
3 | WHITESPACE@[20; 21) "\n" | 3 | WHITESPACE@[20; 21) "\n" |
4 | ERROR@[21; 41) | 4 | ATTR@[21; 23) |
5 | SHEBANG@[21; 41) "#!/use/bin/env rusti" | 5 | POUND@[21; 22) "#" |
6 | EXCL@[22; 23) "!" | ||
7 | ERROR@[23; 24) | ||
8 | SLASH@[23; 24) "/" | ||
9 | USE_ITEM@[24; 28) | ||
10 | USE_KW@[24; 27) "use" | ||
11 | ERROR@[27; 28) | ||
12 | SLASH@[27; 28) "/" | ||
13 | MACRO_CALL@[28; 31) | ||
14 | PATH@[28; 31) | ||
15 | PATH_SEGMENT@[28; 31) | ||
16 | NAME_REF@[28; 31) | ||
17 | IDENT@[28; 31) "bin" | ||
18 | ERROR@[31; 32) | ||
19 | SLASH@[31; 32) "/" | ||
20 | MACRO_CALL@[32; 41) | ||
21 | PATH@[32; 35) | ||
22 | PATH_SEGMENT@[32; 35) | ||
23 | NAME_REF@[32; 35) | ||
24 | IDENT@[32; 35) "env" | ||
25 | WHITESPACE@[35; 36) " " | ||
26 | NAME@[36; 41) | ||
27 | IDENT@[36; 41) "rusti" | ||
6 | WHITESPACE@[41; 42) "\n" | 28 | WHITESPACE@[41; 42) "\n" |
7 | error 21: expected an item | 29 | error 23: expected `[` |
30 | error 23: expected an item | ||
31 | error 27: expected one of `*`, `::`, `{`, `self`, `super` or an indentifier | ||
32 | error 28: expected SEMI | ||
33 | error 31: expected EXCL | ||
34 | error 31: expected `{`, `[`, `(` | ||
35 | error 31: expected SEMI | ||
36 | error 31: expected an item | ||
37 | error 35: expected EXCL | ||
38 | error 41: expected `{`, `[`, `(` | ||
39 | error 41: expected SEMI | ||
diff --git a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.rs index 261aad1fb..261aad1fb 100644 --- a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs +++ b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.rs | |||
diff --git a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.txt index b0acfa5d2..4f7e809c5 100644 --- a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt +++ b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.txt | |||
@@ -11,7 +11,7 @@ SOURCE_FILE@[0; 112) | |||
11 | BLOCK@[10; 111) | 11 | BLOCK@[10; 111) |
12 | L_CURLY@[10; 11) "{" | 12 | L_CURLY@[10; 11) "{" |
13 | WHITESPACE@[11; 16) "\n " | 13 | WHITESPACE@[11; 16) "\n " |
14 | LET_STMT@[16; 27) | 14 | LET_STMT@[16; 31) |
15 | LET_KW@[16; 19) "let" | 15 | LET_KW@[16; 19) "let" |
16 | WHITESPACE@[19; 20) " " | 16 | WHITESPACE@[19; 20) " " |
17 | PLACEHOLDER_PAT@[20; 21) | 17 | PLACEHOLDER_PAT@[20; 21) |
@@ -19,14 +19,8 @@ SOURCE_FILE@[0; 112) | |||
19 | WHITESPACE@[21; 22) " " | 19 | WHITESPACE@[21; 22) " " |
20 | EQ@[22; 23) "=" | 20 | EQ@[22; 23) "=" |
21 | WHITESPACE@[23; 24) " " | 21 | WHITESPACE@[23; 24) " " |
22 | LITERAL@[24; 27) | 22 | LITERAL@[24; 30) |
23 | CHAR@[24; 27) "\'c\'" | 23 | CHAR@[24; 30) "\'c\'u32" |
24 | EXPR_STMT@[27; 31) | ||
25 | PATH_EXPR@[27; 30) | ||
26 | PATH@[27; 30) | ||
27 | PATH_SEGMENT@[27; 30) | ||
28 | NAME_REF@[27; 30) | ||
29 | IDENT@[27; 30) "u32" | ||
30 | SEMI@[30; 31) ";" | 24 | SEMI@[30; 31) ";" |
31 | WHITESPACE@[31; 36) "\n " | 25 | WHITESPACE@[31; 36) "\n " |
32 | LET_STMT@[36; 60) | 26 | LET_STMT@[36; 60) |
@@ -67,4 +61,3 @@ SOURCE_FILE@[0; 112) | |||
67 | WHITESPACE@[109; 110) "\n" | 61 | WHITESPACE@[109; 110) "\n" |
68 | R_CURLY@[110; 111) "}" | 62 | R_CURLY@[110; 111) "}" |
69 | WHITESPACE@[111; 112) "\n" | 63 | WHITESPACE@[111; 112) "\n" |
70 | error 27: expected SEMI | ||