diff options
Diffstat (limited to 'crates')
-rw-r--r-- | crates/ra_syntax/Cargo.toml | 2 | ||||
-rw-r--r-- | crates/ra_syntax/src/parsing/lexer.rs | 136 | ||||
-rw-r--r-- | crates/ra_syntax/src/validation.rs | 54 |
3 files changed, 75 insertions, 117 deletions
diff --git a/crates/ra_syntax/Cargo.toml b/crates/ra_syntax/Cargo.toml index 5f8585878..0ead277b2 100644 --- a/crates/ra_syntax/Cargo.toml +++ b/crates/ra_syntax/Cargo.toml | |||
@@ -10,7 +10,7 @@ repository = "https://github.com/rust-analyzer/rust-analyzer" | |||
10 | [dependencies] | 10 | [dependencies] |
11 | itertools = "0.8.0" | 11 | itertools = "0.8.0" |
12 | rowan = "0.6.1" | 12 | rowan = "0.6.1" |
13 | ra_rustc_lexer = { version = "0.1.0-pre.2" } | 13 | ra_rustc_lexer = { version = "0.1.0-pre.3", features = ["unicode-xid"] } |
14 | 14 | ||
15 | # ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here | 15 | # ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here |
16 | # to reduce number of compilations | 16 | # to reduce number of compilations |
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 06822ea91..bdb01d40b 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs | |||
@@ -12,16 +12,16 @@ pub struct Token { | |||
12 | pub len: TextUnit, | 12 | pub len: TextUnit, |
13 | } | 13 | } |
14 | 14 | ||
15 | fn match_literal_kind(kind: ra_rustc_lexer::LiteralKind) -> SyntaxKind { | 15 | fn match_literal_kind(kind: rustc_lexer::LiteralKind) -> SyntaxKind { |
16 | match kind { | 16 | match kind { |
17 | ra_rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER, | 17 | rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER, |
18 | ra_rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER, | 18 | rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER, |
19 | ra_rustc_lexer::LiteralKind::Char { .. } => CHAR, | 19 | rustc_lexer::LiteralKind::Char { .. } => CHAR, |
20 | ra_rustc_lexer::LiteralKind::Byte { .. } => BYTE, | 20 | rustc_lexer::LiteralKind::Byte { .. } => BYTE, |
21 | ra_rustc_lexer::LiteralKind::Str { .. } => STRING, | 21 | rustc_lexer::LiteralKind::Str { .. } => STRING, |
22 | ra_rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING, | 22 | rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING, |
23 | ra_rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING, | 23 | rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING, |
24 | ra_rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING, | 24 | rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING, |
25 | } | 25 | } |
26 | } | 26 | } |
27 | 27 | ||
@@ -32,32 +32,17 @@ pub fn tokenize(text: &str) -> Vec<Token> { | |||
32 | } | 32 | } |
33 | let mut text = text; | 33 | let mut text = text; |
34 | let mut acc = Vec::new(); | 34 | let mut acc = Vec::new(); |
35 | if let Some(len) = ra_rustc_lexer::strip_shebang(text) { | 35 | if let Some(len) = rustc_lexer::strip_shebang(text) { |
36 | acc.push(Token { kind: SHEBANG, len: TextUnit::from_usize(len) }); | 36 | acc.push(Token { kind: SHEBANG, len: TextUnit::from_usize(len) }); |
37 | text = &text[len..]; | 37 | text = &text[len..]; |
38 | } | 38 | } |
39 | while !text.is_empty() { | 39 | while !text.is_empty() { |
40 | let rustc_token = ra_rustc_lexer::first_token(text); | 40 | let rustc_token = rustc_lexer::first_token(text); |
41 | macro_rules! decompose { | ||
42 | ($t1:expr, $t2:expr) => {{ | ||
43 | acc.push(Token { kind: $t1, len: 1.into() }); | ||
44 | acc.push(Token { kind: $t2, len: 1.into() }); | ||
45 | text = &text[2..]; | ||
46 | continue; | ||
47 | }}; | ||
48 | ($t1:expr, $t2:expr, $t3:expr) => {{ | ||
49 | acc.push(Token { kind: $t1, len: 1.into() }); | ||
50 | acc.push(Token { kind: $t2, len: 1.into() }); | ||
51 | acc.push(Token { kind: $t3, len: 1.into() }); | ||
52 | text = &text[3..]; | ||
53 | continue; | ||
54 | }}; | ||
55 | } | ||
56 | let kind = match rustc_token.kind { | 41 | let kind = match rustc_token.kind { |
57 | ra_rustc_lexer::TokenKind::LineComment => COMMENT, | 42 | rustc_lexer::TokenKind::LineComment => COMMENT, |
58 | ra_rustc_lexer::TokenKind::BlockComment { .. } => COMMENT, | 43 | rustc_lexer::TokenKind::BlockComment { .. } => COMMENT, |
59 | ra_rustc_lexer::TokenKind::Whitespace => WHITESPACE, | 44 | rustc_lexer::TokenKind::Whitespace => WHITESPACE, |
60 | ra_rustc_lexer::TokenKind::Ident => { | 45 | rustc_lexer::TokenKind::Ident => { |
61 | let token_text = &text[..rustc_token.len]; | 46 | let token_text = &text[..rustc_token.len]; |
62 | if token_text == "_" { | 47 | if token_text == "_" { |
63 | UNDERSCORE | 48 | UNDERSCORE |
@@ -65,62 +50,37 @@ pub fn tokenize(text: &str) -> Vec<Token> { | |||
65 | SyntaxKind::from_keyword(&text[..rustc_token.len]).unwrap_or(IDENT) | 50 | SyntaxKind::from_keyword(&text[..rustc_token.len]).unwrap_or(IDENT) |
66 | } | 51 | } |
67 | } | 52 | } |
68 | ra_rustc_lexer::TokenKind::RawIdent => IDENT, | 53 | rustc_lexer::TokenKind::RawIdent => IDENT, |
69 | ra_rustc_lexer::TokenKind::Literal { kind, .. } => match_literal_kind(kind), | 54 | rustc_lexer::TokenKind::Literal { kind, .. } => match_literal_kind(kind), |
70 | ra_rustc_lexer::TokenKind::Lifetime { .. } => LIFETIME, | 55 | rustc_lexer::TokenKind::Lifetime { .. } => LIFETIME, |
71 | ra_rustc_lexer::TokenKind::Semi => SEMI, | 56 | rustc_lexer::TokenKind::Semi => SEMI, |
72 | ra_rustc_lexer::TokenKind::Comma => COMMA, | 57 | rustc_lexer::TokenKind::Comma => COMMA, |
73 | ra_rustc_lexer::TokenKind::DotDotDot => decompose!(DOT, DOT, DOT), | 58 | rustc_lexer::TokenKind::Dot => DOT, |
74 | ra_rustc_lexer::TokenKind::DotDotEq => decompose!(DOT, DOT, EQ), | 59 | rustc_lexer::TokenKind::OpenParen => L_PAREN, |
75 | ra_rustc_lexer::TokenKind::DotDot => decompose!(DOT, DOT), | 60 | rustc_lexer::TokenKind::CloseParen => R_PAREN, |
76 | ra_rustc_lexer::TokenKind::Dot => DOT, | 61 | rustc_lexer::TokenKind::OpenBrace => L_CURLY, |
77 | ra_rustc_lexer::TokenKind::OpenParen => L_PAREN, | 62 | rustc_lexer::TokenKind::CloseBrace => R_CURLY, |
78 | ra_rustc_lexer::TokenKind::CloseParen => R_PAREN, | 63 | rustc_lexer::TokenKind::OpenBracket => L_BRACK, |
79 | ra_rustc_lexer::TokenKind::OpenBrace => L_CURLY, | 64 | rustc_lexer::TokenKind::CloseBracket => R_BRACK, |
80 | ra_rustc_lexer::TokenKind::CloseBrace => R_CURLY, | 65 | rustc_lexer::TokenKind::At => AT, |
81 | ra_rustc_lexer::TokenKind::OpenBracket => L_BRACK, | 66 | rustc_lexer::TokenKind::Pound => POUND, |
82 | ra_rustc_lexer::TokenKind::CloseBracket => R_BRACK, | 67 | rustc_lexer::TokenKind::Tilde => TILDE, |
83 | ra_rustc_lexer::TokenKind::At => AT, | 68 | rustc_lexer::TokenKind::Question => QUESTION, |
84 | ra_rustc_lexer::TokenKind::Pound => POUND, | 69 | rustc_lexer::TokenKind::Colon => COLON, |
85 | ra_rustc_lexer::TokenKind::Tilde => TILDE, | 70 | rustc_lexer::TokenKind::Dollar => DOLLAR, |
86 | ra_rustc_lexer::TokenKind::Question => QUESTION, | 71 | rustc_lexer::TokenKind::Eq => EQ, |
87 | ra_rustc_lexer::TokenKind::ColonColon => decompose!(COLON, COLON), | 72 | rustc_lexer::TokenKind::Not => EXCL, |
88 | ra_rustc_lexer::TokenKind::Colon => COLON, | 73 | rustc_lexer::TokenKind::Lt => L_ANGLE, |
89 | ra_rustc_lexer::TokenKind::Dollar => DOLLAR, | 74 | rustc_lexer::TokenKind::Gt => R_ANGLE, |
90 | ra_rustc_lexer::TokenKind::EqEq => decompose!(EQ, EQ), | 75 | rustc_lexer::TokenKind::Minus => MINUS, |
91 | ra_rustc_lexer::TokenKind::Eq => EQ, | 76 | rustc_lexer::TokenKind::And => AMP, |
92 | ra_rustc_lexer::TokenKind::FatArrow => decompose!(EQ, R_ANGLE), | 77 | rustc_lexer::TokenKind::Or => PIPE, |
93 | ra_rustc_lexer::TokenKind::Ne => decompose!(EXCL, EQ), | 78 | rustc_lexer::TokenKind::Plus => PLUS, |
94 | ra_rustc_lexer::TokenKind::Not => EXCL, | 79 | rustc_lexer::TokenKind::Star => STAR, |
95 | ra_rustc_lexer::TokenKind::Le => decompose!(L_ANGLE, EQ), | 80 | rustc_lexer::TokenKind::Slash => SLASH, |
96 | ra_rustc_lexer::TokenKind::LArrow => decompose!(COLON, MINUS), | 81 | rustc_lexer::TokenKind::Caret => CARET, |
97 | ra_rustc_lexer::TokenKind::Lt => L_ANGLE, | 82 | rustc_lexer::TokenKind::Percent => PERCENT, |
98 | ra_rustc_lexer::TokenKind::ShlEq => decompose!(L_ANGLE, L_ANGLE, EQ), | 83 | rustc_lexer::TokenKind::Unknown => ERROR, |
99 | ra_rustc_lexer::TokenKind::Shl => decompose!(L_ANGLE, L_ANGLE), | ||
100 | ra_rustc_lexer::TokenKind::Ge => decompose!(R_ANGLE, EQ), | ||
101 | ra_rustc_lexer::TokenKind::Gt => R_ANGLE, | ||
102 | ra_rustc_lexer::TokenKind::ShrEq => decompose!(R_ANGLE, R_ANGLE, EQ), | ||
103 | ra_rustc_lexer::TokenKind::Shr => decompose!(R_ANGLE, R_ANGLE), | ||
104 | ra_rustc_lexer::TokenKind::RArrow => decompose!(MINUS, R_ANGLE), | ||
105 | ra_rustc_lexer::TokenKind::Minus => MINUS, | ||
106 | ra_rustc_lexer::TokenKind::MinusEq => decompose!(MINUS, EQ), | ||
107 | ra_rustc_lexer::TokenKind::And => AMP, | ||
108 | ra_rustc_lexer::TokenKind::AndAnd => decompose!(AMP, AMP), | ||
109 | ra_rustc_lexer::TokenKind::AndEq => decompose!(AMP, EQ), | ||
110 | ra_rustc_lexer::TokenKind::Or => PIPE, | ||
111 | ra_rustc_lexer::TokenKind::OrOr => decompose!(PIPE, PIPE), | ||
112 | ra_rustc_lexer::TokenKind::OrEq => decompose!(PIPE, EQ), | ||
113 | ra_rustc_lexer::TokenKind::PlusEq => decompose!(PLUS, EQ), | ||
114 | ra_rustc_lexer::TokenKind::Plus => PLUS, | ||
115 | ra_rustc_lexer::TokenKind::StarEq => decompose!(STAR, EQ), | ||
116 | ra_rustc_lexer::TokenKind::Star => STAR, | ||
117 | ra_rustc_lexer::TokenKind::SlashEq => decompose!(SLASH, EQ), | ||
118 | ra_rustc_lexer::TokenKind::Slash => SLASH, | ||
119 | ra_rustc_lexer::TokenKind::CaretEq => decompose!(CARET, EQ), | ||
120 | ra_rustc_lexer::TokenKind::Caret => CARET, | ||
121 | ra_rustc_lexer::TokenKind::PercentEq => decompose!(PERCENT, EQ), | ||
122 | ra_rustc_lexer::TokenKind::Percent => PERCENT, | ||
123 | ra_rustc_lexer::TokenKind::Unknown => ERROR, | ||
124 | }; | 84 | }; |
125 | let token = Token { kind, len: TextUnit::from_usize(rustc_token.len) }; | 85 | let token = Token { kind, len: TextUnit::from_usize(rustc_token.len) }; |
126 | acc.push(token); | 86 | acc.push(token); |
@@ -130,12 +90,12 @@ pub fn tokenize(text: &str) -> Vec<Token> { | |||
130 | } | 90 | } |
131 | 91 | ||
132 | pub fn classify_literal(text: &str) -> Option<Token> { | 92 | pub fn classify_literal(text: &str) -> Option<Token> { |
133 | let t = ra_rustc_lexer::first_token(text); | 93 | let t = rustc_lexer::first_token(text); |
134 | if t.len != text.len() { | 94 | if t.len != text.len() { |
135 | return None; | 95 | return None; |
136 | } | 96 | } |
137 | let kind = match t.kind { | 97 | let kind = match t.kind { |
138 | ra_rustc_lexer::TokenKind::Literal { kind, .. } => match_literal_kind(kind), | 98 | rustc_lexer::TokenKind::Literal { kind, .. } => match_literal_kind(kind), |
139 | _ => return None, | 99 | _ => return None, |
140 | }; | 100 | }; |
141 | Some(Token { kind, len: TextUnit::from_usize(t.len) }) | 101 | Some(Token { kind, len: TextUnit::from_usize(t.len) }) |
diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs index 2bb3c0a03..a8c789e0c 100644 --- a/crates/ra_syntax/src/validation.rs +++ b/crates/ra_syntax/src/validation.rs | |||
@@ -1,6 +1,6 @@ | |||
1 | mod block; | 1 | mod block; |
2 | 2 | ||
3 | use ra_rustc_lexer::unescape; | 3 | use rustc_lexer::unescape; |
4 | 4 | ||
5 | use crate::{ | 5 | use crate::{ |
6 | algo::visit::{visitor_ctx, VisitorCtx}, | 6 | algo::visit::{visitor_ctx, VisitorCtx}, |
@@ -32,64 +32,62 @@ pub enum EscapeError { | |||
32 | NonAsciiCharInByte, | 32 | NonAsciiCharInByte, |
33 | } | 33 | } |
34 | 34 | ||
35 | impl From<ra_rustc_lexer::unescape::EscapeError> for EscapeError { | 35 | impl From<rustc_lexer::unescape::EscapeError> for EscapeError { |
36 | fn from(err: ra_rustc_lexer::unescape::EscapeError) -> Self { | 36 | fn from(err: rustc_lexer::unescape::EscapeError) -> Self { |
37 | match err { | 37 | match err { |
38 | ra_rustc_lexer::unescape::EscapeError::ZeroChars => EscapeError::ZeroChars, | 38 | rustc_lexer::unescape::EscapeError::ZeroChars => EscapeError::ZeroChars, |
39 | ra_rustc_lexer::unescape::EscapeError::MoreThanOneChar => EscapeError::MoreThanOneChar, | 39 | rustc_lexer::unescape::EscapeError::MoreThanOneChar => EscapeError::MoreThanOneChar, |
40 | ra_rustc_lexer::unescape::EscapeError::LoneSlash => EscapeError::LoneSlash, | 40 | rustc_lexer::unescape::EscapeError::LoneSlash => EscapeError::LoneSlash, |
41 | ra_rustc_lexer::unescape::EscapeError::InvalidEscape => EscapeError::InvalidEscape, | 41 | rustc_lexer::unescape::EscapeError::InvalidEscape => EscapeError::InvalidEscape, |
42 | ra_rustc_lexer::unescape::EscapeError::BareCarriageReturn | 42 | rustc_lexer::unescape::EscapeError::BareCarriageReturn |
43 | | ra_rustc_lexer::unescape::EscapeError::BareCarriageReturnInRawString => { | 43 | | rustc_lexer::unescape::EscapeError::BareCarriageReturnInRawString => { |
44 | EscapeError::BareCarriageReturn | 44 | EscapeError::BareCarriageReturn |
45 | } | 45 | } |
46 | ra_rustc_lexer::unescape::EscapeError::EscapeOnlyChar => EscapeError::EscapeOnlyChar, | 46 | rustc_lexer::unescape::EscapeError::EscapeOnlyChar => EscapeError::EscapeOnlyChar, |
47 | ra_rustc_lexer::unescape::EscapeError::TooShortHexEscape => { | 47 | rustc_lexer::unescape::EscapeError::TooShortHexEscape => EscapeError::TooShortHexEscape, |
48 | EscapeError::TooShortHexEscape | 48 | rustc_lexer::unescape::EscapeError::InvalidCharInHexEscape => { |
49 | } | ||
50 | ra_rustc_lexer::unescape::EscapeError::InvalidCharInHexEscape => { | ||
51 | EscapeError::InvalidCharInHexEscape | 49 | EscapeError::InvalidCharInHexEscape |
52 | } | 50 | } |
53 | ra_rustc_lexer::unescape::EscapeError::OutOfRangeHexEscape => { | 51 | rustc_lexer::unescape::EscapeError::OutOfRangeHexEscape => { |
54 | EscapeError::OutOfRangeHexEscape | 52 | EscapeError::OutOfRangeHexEscape |
55 | } | 53 | } |
56 | ra_rustc_lexer::unescape::EscapeError::NoBraceInUnicodeEscape => { | 54 | rustc_lexer::unescape::EscapeError::NoBraceInUnicodeEscape => { |
57 | EscapeError::NoBraceInUnicodeEscape | 55 | EscapeError::NoBraceInUnicodeEscape |
58 | } | 56 | } |
59 | ra_rustc_lexer::unescape::EscapeError::InvalidCharInUnicodeEscape => { | 57 | rustc_lexer::unescape::EscapeError::InvalidCharInUnicodeEscape => { |
60 | EscapeError::InvalidCharInUnicodeEscape | 58 | EscapeError::InvalidCharInUnicodeEscape |
61 | } | 59 | } |
62 | ra_rustc_lexer::unescape::EscapeError::EmptyUnicodeEscape => { | 60 | rustc_lexer::unescape::EscapeError::EmptyUnicodeEscape => { |
63 | EscapeError::EmptyUnicodeEscape | 61 | EscapeError::EmptyUnicodeEscape |
64 | } | 62 | } |
65 | ra_rustc_lexer::unescape::EscapeError::UnclosedUnicodeEscape => { | 63 | rustc_lexer::unescape::EscapeError::UnclosedUnicodeEscape => { |
66 | EscapeError::UnclosedUnicodeEscape | 64 | EscapeError::UnclosedUnicodeEscape |
67 | } | 65 | } |
68 | ra_rustc_lexer::unescape::EscapeError::LeadingUnderscoreUnicodeEscape => { | 66 | rustc_lexer::unescape::EscapeError::LeadingUnderscoreUnicodeEscape => { |
69 | EscapeError::LeadingUnderscoreUnicodeEscape | 67 | EscapeError::LeadingUnderscoreUnicodeEscape |
70 | } | 68 | } |
71 | ra_rustc_lexer::unescape::EscapeError::OverlongUnicodeEscape => { | 69 | rustc_lexer::unescape::EscapeError::OverlongUnicodeEscape => { |
72 | EscapeError::OverlongUnicodeEscape | 70 | EscapeError::OverlongUnicodeEscape |
73 | } | 71 | } |
74 | ra_rustc_lexer::unescape::EscapeError::LoneSurrogateUnicodeEscape => { | 72 | rustc_lexer::unescape::EscapeError::LoneSurrogateUnicodeEscape => { |
75 | EscapeError::LoneSurrogateUnicodeEscape | 73 | EscapeError::LoneSurrogateUnicodeEscape |
76 | } | 74 | } |
77 | ra_rustc_lexer::unescape::EscapeError::OutOfRangeUnicodeEscape => { | 75 | rustc_lexer::unescape::EscapeError::OutOfRangeUnicodeEscape => { |
78 | EscapeError::OutOfRangeUnicodeEscape | 76 | EscapeError::OutOfRangeUnicodeEscape |
79 | } | 77 | } |
80 | ra_rustc_lexer::unescape::EscapeError::UnicodeEscapeInByte => { | 78 | rustc_lexer::unescape::EscapeError::UnicodeEscapeInByte => { |
81 | EscapeError::UnicodeEscapeInByte | 79 | EscapeError::UnicodeEscapeInByte |
82 | } | 80 | } |
83 | ra_rustc_lexer::unescape::EscapeError::NonAsciiCharInByte | 81 | rustc_lexer::unescape::EscapeError::NonAsciiCharInByte |
84 | | ra_rustc_lexer::unescape::EscapeError::NonAsciiCharInByteString => { | 82 | | rustc_lexer::unescape::EscapeError::NonAsciiCharInByteString => { |
85 | EscapeError::NonAsciiCharInByte | 83 | EscapeError::NonAsciiCharInByte |
86 | } | 84 | } |
87 | } | 85 | } |
88 | } | 86 | } |
89 | } | 87 | } |
90 | 88 | ||
91 | impl From<ra_rustc_lexer::unescape::EscapeError> for SyntaxErrorKind { | 89 | impl From<rustc_lexer::unescape::EscapeError> for SyntaxErrorKind { |
92 | fn from(err: ra_rustc_lexer::unescape::EscapeError) -> Self { | 90 | fn from(err: rustc_lexer::unescape::EscapeError) -> Self { |
93 | SyntaxErrorKind::EscapeError(err.into()) | 91 | SyntaxErrorKind::EscapeError(err.into()) |
94 | } | 92 | } |
95 | } | 93 | } |