aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2019-07-22 15:47:33 +0100
committerAleksey Kladov <[email protected]>2019-07-22 15:47:33 +0100
commit75761c0e47d8c20a490a3d61ea64d2413d3c3570 (patch)
tree52f8c89145a952ab25e51b7658ec17441b33607a
parentd690249bc81bc265cb3d1836c2922325f4fdb8af (diff)
add rustc_lexer
-rw-r--r--Cargo.lock10
-rw-r--r--crates/ra_syntax/Cargo.toml1
-rw-r--r--crates/ra_syntax/src/parsing/lexer.rs108
-rw-r--r--crates/ra_syntax/tests/data/lexer/0004_numbers.txt12
-rw-r--r--crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt2
-rw-r--r--crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt38
-rw-r--r--crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.rs (renamed from crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs)0
-rw-r--r--crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.txt (renamed from crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt)13
8 files changed, 159 insertions, 25 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 8feaf27ec..d5474d6e2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1309,11 +1309,20 @@ dependencies = [
1309] 1309]
1310 1310
1311[[package]] 1311[[package]]
1312name = "ra_rustc_lexer"
1313version = "0.1.0-pre.1"
1314source = "registry+https://github.com/rust-lang/crates.io-index"
1315dependencies = [
1316 "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
1317]
1318
1319[[package]]
1312name = "ra_syntax" 1320name = "ra_syntax"
1313version = "0.1.0" 1321version = "0.1.0"
1314dependencies = [ 1322dependencies = [
1315 "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", 1323 "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
1316 "ra_parser 0.1.0", 1324 "ra_parser 0.1.0",
1325 "ra_rustc_lexer 0.1.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)",
1317 "ra_text_edit 0.1.0", 1326 "ra_text_edit 0.1.0",
1318 "rowan 0.6.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)", 1327 "rowan 0.6.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)",
1319 "smol_str 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", 1328 "smol_str 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2250,6 +2259,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
2250"checksum proptest 0.9.4 (registry+https://github.com/rust-lang/crates.io-index)" = "cf147e022eacf0c8a054ab864914a7602618adba841d800a9a9868a5237a529f" 2259"checksum proptest 0.9.4 (registry+https://github.com/rust-lang/crates.io-index)" = "cf147e022eacf0c8a054ab864914a7602618adba841d800a9a9868a5237a529f"
2251"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" 2260"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0"
2252"checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" 2261"checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
2262"checksum ra_rustc_lexer 0.1.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e8d92772f822978a6c9c4657aa61af439e4e635180628b3354049b283b749f1e"
2253"checksum ra_vfs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fb7cd4e302032c5ab514f1c01c89727cd96fd950dd36f9ebee9252df45d9fb1a" 2263"checksum ra_vfs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fb7cd4e302032c5ab514f1c01c89727cd96fd950dd36f9ebee9252df45d9fb1a"
2254"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" 2264"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
2255"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c" 2265"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c"
diff --git a/crates/ra_syntax/Cargo.toml b/crates/ra_syntax/Cargo.toml
index 97b6b047f..9ef8dee5d 100644
--- a/crates/ra_syntax/Cargo.toml
+++ b/crates/ra_syntax/Cargo.toml
@@ -11,6 +11,7 @@ repository = "https://github.com/rust-analyzer/rust-analyzer"
11unicode-xid = "0.1.0" 11unicode-xid = "0.1.0"
12itertools = "0.8.0" 12itertools = "0.8.0"
13rowan = "0.6.0-pre.1" 13rowan = "0.6.0-pre.1"
14ra_rustc_lexer = { version = "0.1.0-pre.1", features = [ "unicode-xid" ] }
14 15
15# ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here 16# ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here
16# to reduce number of compilations 17# to reduce number of compilations
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index 60cf37047..1c818fdf4 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -30,19 +30,119 @@ pub struct Token {
30 30
31/// Break a string up into its component tokens 31/// Break a string up into its component tokens
32pub fn tokenize(text: &str) -> Vec<Token> { 32pub fn tokenize(text: &str) -> Vec<Token> {
33 if text.is_empty() {
34 return vec![];
35 }
33 let mut text = text; 36 let mut text = text;
34 let mut acc = Vec::new(); 37 let mut acc = Vec::new();
38 if let Some(len) = ra_rustc_lexer::strip_shebang(text) {
39 acc.push(Token { kind: SHEBANG, len: TextUnit::from_usize(len) });
40 text = &text[len..];
41 }
35 while !text.is_empty() { 42 while !text.is_empty() {
36 let token = next_token(text); 43 let rustc_token = ra_rustc_lexer::first_token(text);
44 macro_rules! decompose {
45 ($t1:expr, $t2:expr) => {{
46 acc.push(Token { kind: $t1, len: 1.into() });
47 acc.push(Token { kind: $t2, len: 1.into() });
48 text = &text[2..];
49 continue;
50 }};
51 ($t1:expr, $t2:expr, $t3:expr) => {{
52 acc.push(Token { kind: $t1, len: 1.into() });
53 acc.push(Token { kind: $t2, len: 1.into() });
54 acc.push(Token { kind: $t3, len: 1.into() });
55 text = &text[3..];
56 continue;
57 }};
58 }
59 let kind = match rustc_token.kind {
60 ra_rustc_lexer::TokenKind::LineComment => COMMENT,
61 ra_rustc_lexer::TokenKind::BlockComment { .. } => COMMENT,
62 ra_rustc_lexer::TokenKind::Whitespace => WHITESPACE,
63 ra_rustc_lexer::TokenKind::Ident => {
64 let token_text = &text[..rustc_token.len];
65 if token_text == "_" {
66 UNDERSCORE
67 } else {
68 SyntaxKind::from_keyword(&text[..rustc_token.len]).unwrap_or(IDENT)
69 }
70 }
71 ra_rustc_lexer::TokenKind::RawIdent => IDENT,
72 ra_rustc_lexer::TokenKind::Literal { kind, .. } => match kind {
73 ra_rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER,
74 ra_rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER,
75 ra_rustc_lexer::LiteralKind::Char { .. } => CHAR,
76 ra_rustc_lexer::LiteralKind::Byte { .. } => BYTE,
77 ra_rustc_lexer::LiteralKind::Str { .. } => STRING,
78 ra_rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING,
79 ra_rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING,
80 ra_rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING,
81 },
82 ra_rustc_lexer::TokenKind::Lifetime { .. } => LIFETIME,
83 ra_rustc_lexer::TokenKind::Semi => SEMI,
84 ra_rustc_lexer::TokenKind::Comma => COMMA,
85 ra_rustc_lexer::TokenKind::DotDotDot => decompose!(DOT, DOT, DOT),
86 ra_rustc_lexer::TokenKind::DotDotEq => decompose!(DOT, DOT, EQ),
87 ra_rustc_lexer::TokenKind::DotDot => decompose!(DOT, DOT),
88 ra_rustc_lexer::TokenKind::Dot => DOT,
89 ra_rustc_lexer::TokenKind::OpenParen => L_PAREN,
90 ra_rustc_lexer::TokenKind::CloseParen => R_PAREN,
91 ra_rustc_lexer::TokenKind::OpenBrace => L_CURLY,
92 ra_rustc_lexer::TokenKind::CloseBrace => R_CURLY,
93 ra_rustc_lexer::TokenKind::OpenBracket => L_BRACK,
94 ra_rustc_lexer::TokenKind::CloseBracket => R_BRACK,
95 ra_rustc_lexer::TokenKind::At => AT,
96 ra_rustc_lexer::TokenKind::Pound => POUND,
97 ra_rustc_lexer::TokenKind::Tilde => TILDE,
98 ra_rustc_lexer::TokenKind::Question => QUESTION,
99 ra_rustc_lexer::TokenKind::ColonColon => decompose!(COLON, COLON),
100 ra_rustc_lexer::TokenKind::Colon => COLON,
101 ra_rustc_lexer::TokenKind::Dollar => DOLLAR,
102 ra_rustc_lexer::TokenKind::EqEq => decompose!(EQ, EQ),
103 ra_rustc_lexer::TokenKind::Eq => EQ,
104 ra_rustc_lexer::TokenKind::FatArrow => decompose!(EQ, R_ANGLE),
105 ra_rustc_lexer::TokenKind::Ne => decompose!(EXCL, EQ),
106 ra_rustc_lexer::TokenKind::Not => EXCL,
107 ra_rustc_lexer::TokenKind::Le => decompose!(L_ANGLE, EQ),
108 ra_rustc_lexer::TokenKind::LArrow => decompose!(COLON, MINUS),
109 ra_rustc_lexer::TokenKind::Lt => L_ANGLE,
110 ra_rustc_lexer::TokenKind::ShlEq => decompose!(L_ANGLE, L_ANGLE, EQ),
111 ra_rustc_lexer::TokenKind::Shl => decompose!(L_ANGLE, L_ANGLE),
112 ra_rustc_lexer::TokenKind::Ge => decompose!(R_ANGLE, EQ),
113 ra_rustc_lexer::TokenKind::Gt => R_ANGLE,
114 ra_rustc_lexer::TokenKind::ShrEq => decompose!(R_ANGLE, R_ANGLE, EQ),
115 ra_rustc_lexer::TokenKind::Shr => decompose!(R_ANGLE, R_ANGLE),
116 ra_rustc_lexer::TokenKind::RArrow => decompose!(MINUS, R_ANGLE),
117 ra_rustc_lexer::TokenKind::Minus => MINUS,
118 ra_rustc_lexer::TokenKind::MinusEq => decompose!(MINUS, EQ),
119 ra_rustc_lexer::TokenKind::And => AMP,
120 ra_rustc_lexer::TokenKind::AndAnd => decompose!(AMP, AMP),
121 ra_rustc_lexer::TokenKind::AndEq => decompose!(AMP, EQ),
122 ra_rustc_lexer::TokenKind::Or => PIPE,
123 ra_rustc_lexer::TokenKind::OrOr => decompose!(PIPE, PIPE),
124 ra_rustc_lexer::TokenKind::OrEq => decompose!(PIPE, EQ),
125 ra_rustc_lexer::TokenKind::PlusEq => decompose!(PLUS, EQ),
126 ra_rustc_lexer::TokenKind::Plus => PLUS,
127 ra_rustc_lexer::TokenKind::StarEq => decompose!(STAR, EQ),
128 ra_rustc_lexer::TokenKind::Star => STAR,
129 ra_rustc_lexer::TokenKind::SlashEq => decompose!(SLASH, EQ),
130 ra_rustc_lexer::TokenKind::Slash => SLASH,
131 ra_rustc_lexer::TokenKind::CaretEq => decompose!(CARET, EQ),
132 ra_rustc_lexer::TokenKind::Caret => CARET,
133 ra_rustc_lexer::TokenKind::PercentEq => decompose!(PERCENT, EQ),
134 ra_rustc_lexer::TokenKind::Percent => PERCENT,
135 ra_rustc_lexer::TokenKind::Unknown => ERROR,
136 };
137 let token = Token { kind, len: TextUnit::from_usize(rustc_token.len) };
37 acc.push(token); 138 acc.push(token);
38 let len: u32 = token.len.into(); 139 text = &text[rustc_token.len..];
39 text = &text[len as usize..];
40 } 140 }
41 acc 141 acc
42} 142}
43 143
44/// Get the next token from a string 144/// Get the next token from a string
45pub fn next_token(text: &str) -> Token { 145fn next_token(text: &str) -> Token {
46 assert!(!text.is_empty()); 146 assert!(!text.is_empty());
47 let mut ptr = Ptr::new(text); 147 let mut ptr = Ptr::new(text);
48 let c = ptr.bump().unwrap(); 148 let c = ptr.bump().unwrap();
diff --git a/crates/ra_syntax/tests/data/lexer/0004_numbers.txt b/crates/ra_syntax/tests/data/lexer/0004_numbers.txt
index 39988aedc..7bb89b8ae 100644
--- a/crates/ra_syntax/tests/data/lexer/0004_numbers.txt
+++ b/crates/ra_syntax/tests/data/lexer/0004_numbers.txt
@@ -12,9 +12,9 @@ INT_NUMBER 2 "0_"
12WHITESPACE 1 " " 12WHITESPACE 1 " "
13FLOAT_NUMBER 2 "0." 13FLOAT_NUMBER 2 "0."
14WHITESPACE 1 " " 14WHITESPACE 1 " "
15INT_NUMBER 2 "0e" 15FLOAT_NUMBER 2 "0e"
16WHITESPACE 1 " " 16WHITESPACE 1 " "
17INT_NUMBER 2 "0E" 17FLOAT_NUMBER 2 "0E"
18WHITESPACE 1 " " 18WHITESPACE 1 " "
19INT_NUMBER 2 "0z" 19INT_NUMBER 2 "0z"
20WHITESPACE 1 "\n" 20WHITESPACE 1 "\n"
@@ -32,9 +32,9 @@ INT_NUMBER 6 "0_1279"
32WHITESPACE 1 " " 32WHITESPACE 1 " "
33FLOAT_NUMBER 6 "0.1279" 33FLOAT_NUMBER 6 "0.1279"
34WHITESPACE 1 " " 34WHITESPACE 1 " "
35INT_NUMBER 6 "0e1279" 35FLOAT_NUMBER 6 "0e1279"
36WHITESPACE 1 " " 36WHITESPACE 1 " "
37INT_NUMBER 6 "0E1279" 37FLOAT_NUMBER 6 "0E1279"
38WHITESPACE 1 "\n" 38WHITESPACE 1 "\n"
39INT_NUMBER 1 "0" 39INT_NUMBER 1 "0"
40DOT 1 "." 40DOT 1 "."
@@ -47,9 +47,7 @@ IDENT 3 "foo"
47L_PAREN 1 "(" 47L_PAREN 1 "("
48R_PAREN 1 ")" 48R_PAREN 1 ")"
49WHITESPACE 1 "\n" 49WHITESPACE 1 "\n"
50INT_NUMBER 2 "0e" 50FLOAT_NUMBER 4 "0e+1"
51PLUS 1 "+"
52INT_NUMBER 1 "1"
53WHITESPACE 1 "\n" 51WHITESPACE 1 "\n"
54INT_NUMBER 1 "0" 52INT_NUMBER 1 "0"
55DOT 1 "." 53DOT 1 "."
diff --git a/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt b/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt
index 812dfbc18..737a300ee 100644
--- a/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt
+++ b/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt
@@ -1 +1 @@
CHAR 2 "\'1" LIFETIME 2 "\'1"
diff --git a/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt b/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt
index 76d186a3c..84867026f 100644
--- a/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt
+++ b/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt
@@ -1,7 +1,39 @@
1SOURCE_FILE@[0; 42) 1SOURCE_FILE@[0; 42)
2 SHEBANG@[0; 20) "#!/use/bin/env rusti" 2 SHEBANG@[0; 20) "#!/use/bin/env rusti"
3 WHITESPACE@[20; 21) "\n" 3 WHITESPACE@[20; 21) "\n"
4 ERROR@[21; 41) 4 ATTR@[21; 23)
5 SHEBANG@[21; 41) "#!/use/bin/env rusti" 5 POUND@[21; 22) "#"
6 EXCL@[22; 23) "!"
7 ERROR@[23; 24)
8 SLASH@[23; 24) "/"
9 USE_ITEM@[24; 28)
10 USE_KW@[24; 27) "use"
11 ERROR@[27; 28)
12 SLASH@[27; 28) "/"
13 MACRO_CALL@[28; 31)
14 PATH@[28; 31)
15 PATH_SEGMENT@[28; 31)
16 NAME_REF@[28; 31)
17 IDENT@[28; 31) "bin"
18 ERROR@[31; 32)
19 SLASH@[31; 32) "/"
20 MACRO_CALL@[32; 41)
21 PATH@[32; 35)
22 PATH_SEGMENT@[32; 35)
23 NAME_REF@[32; 35)
24 IDENT@[32; 35) "env"
25 WHITESPACE@[35; 36) " "
26 NAME@[36; 41)
27 IDENT@[36; 41) "rusti"
6 WHITESPACE@[41; 42) "\n" 28 WHITESPACE@[41; 42) "\n"
7error 21: expected an item 29error 23: expected `[`
30error 23: expected an item
31error 27: expected one of `*`, `::`, `{`, `self`, `super` or an indentifier
32error 28: expected SEMI
33error 31: expected EXCL
34error 31: expected `{`, `[`, `(`
35error 31: expected SEMI
36error 31: expected an item
37error 35: expected EXCL
38error 41: expected `{`, `[`, `(`
39error 41: expected SEMI
diff --git a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.rs
index 261aad1fb..261aad1fb 100644
--- a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs
+++ b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.rs
diff --git a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.txt
index b0acfa5d2..4f7e809c5 100644
--- a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt
+++ b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.txt
@@ -11,7 +11,7 @@ SOURCE_FILE@[0; 112)
11 BLOCK@[10; 111) 11 BLOCK@[10; 111)
12 L_CURLY@[10; 11) "{" 12 L_CURLY@[10; 11) "{"
13 WHITESPACE@[11; 16) "\n " 13 WHITESPACE@[11; 16) "\n "
14 LET_STMT@[16; 27) 14 LET_STMT@[16; 31)
15 LET_KW@[16; 19) "let" 15 LET_KW@[16; 19) "let"
16 WHITESPACE@[19; 20) " " 16 WHITESPACE@[19; 20) " "
17 PLACEHOLDER_PAT@[20; 21) 17 PLACEHOLDER_PAT@[20; 21)
@@ -19,14 +19,8 @@ SOURCE_FILE@[0; 112)
19 WHITESPACE@[21; 22) " " 19 WHITESPACE@[21; 22) " "
20 EQ@[22; 23) "=" 20 EQ@[22; 23) "="
21 WHITESPACE@[23; 24) " " 21 WHITESPACE@[23; 24) " "
22 LITERAL@[24; 27) 22 LITERAL@[24; 30)
23 CHAR@[24; 27) "\'c\'" 23 CHAR@[24; 30) "\'c\'u32"
24 EXPR_STMT@[27; 31)
25 PATH_EXPR@[27; 30)
26 PATH@[27; 30)
27 PATH_SEGMENT@[27; 30)
28 NAME_REF@[27; 30)
29 IDENT@[27; 30) "u32"
30 SEMI@[30; 31) ";" 24 SEMI@[30; 31) ";"
31 WHITESPACE@[31; 36) "\n " 25 WHITESPACE@[31; 36) "\n "
32 LET_STMT@[36; 60) 26 LET_STMT@[36; 60)
@@ -67,4 +61,3 @@ SOURCE_FILE@[0; 112)
67 WHITESPACE@[109; 110) "\n" 61 WHITESPACE@[109; 110) "\n"
68 R_CURLY@[110; 111) "}" 62 R_CURLY@[110; 111) "}"
69 WHITESPACE@[111; 112) "\n" 63 WHITESPACE@[111; 112) "\n"
70error 27: expected SEMI