1 files changed, 104 insertions, 4 deletions
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index 60cf37047..1c818fdf4 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -30,19 +30,119 @@ pub struct Token {
 /// Break a string up into its component tokens
 pub fn tokenize(text: &str) -> Vec<Token> {
+    if text.is_empty() {
+        return vec![];
+    }
    let mut text = text;
    let mut acc = Vec::new();
+    if let Some(len) = ra_rustc_lexer::strip_shebang(text) {
+        acc.push(Token { kind: SHEBANG, len: TextUnit::from_usize(len) });
+        text = &text[len..];
+    }
    while !text.is_empty() {
-        let token = next_token(text);
+        let rustc_token = ra_rustc_lexer::first_token(text);
+        macro_rules! decompose {
+            ($t1:expr, $t2:expr) => {{
+                acc.push(Token { kind: $t1, len: 1.into() });
+                acc.push(Token { kind: $t2, len: 1.into() });
+                text = &text[2..];
+                continue;
+            }};
+            ($t1:expr, $t2:expr, $t3:expr) => {{
+                acc.push(Token { kind: $t1, len: 1.into() });
+                acc.push(Token { kind: $t2, len: 1.into() });
+                acc.push(Token { kind: $t3, len: 1.into() });
+                text = &text[3..];
+                continue;
+            }};
+        }
+        let kind = match rustc_token.kind {
+            ra_rustc_lexer::TokenKind::LineComment => COMMENT,
+            ra_rustc_lexer::TokenKind::BlockComment { .. } => COMMENT,
+            ra_rustc_lexer::TokenKind::Whitespace => WHITESPACE,
+            ra_rustc_lexer::TokenKind::Ident => {
+                let token_text = &text[..rustc_token.len];
+                if token_text == "_" {
+                    UNDERSCORE
+                } else {
+                    SyntaxKind::from_keyword(&text[..rustc_token.len]).unwrap_or(IDENT)
+                }
+            }
+            ra_rustc_lexer::TokenKind::RawIdent => IDENT,
+            ra_rustc_lexer::TokenKind::Literal { kind, .. } => match kind {
+                ra_rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER,
+                ra_rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER,
+                ra_rustc_lexer::LiteralKind::Char { .. } => CHAR,
+                ra_rustc_lexer::LiteralKind::Byte { .. } => BYTE,
+                ra_rustc_lexer::LiteralKind::Str { .. } => STRING,
+                ra_rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING,
+                ra_rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING,
+                ra_rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING,
+            },
+            ra_rustc_lexer::TokenKind::Lifetime { .. } => LIFETIME,
+            ra_rustc_lexer::TokenKind::Semi => SEMI,
+            ra_rustc_lexer::TokenKind::Comma => COMMA,
+            ra_rustc_lexer::TokenKind::DotDotDot => decompose!(DOT, DOT, DOT),
+            ra_rustc_lexer::TokenKind::DotDotEq => decompose!(DOT, DOT, EQ),
+            ra_rustc_lexer::TokenKind::DotDot => decompose!(DOT, DOT),
+            ra_rustc_lexer::TokenKind::Dot => DOT,
+            ra_rustc_lexer::TokenKind::OpenParen => L_PAREN,
+            ra_rustc_lexer::TokenKind::CloseParen => R_PAREN,
+            ra_rustc_lexer::TokenKind::OpenBrace => L_CURLY,
+            ra_rustc_lexer::TokenKind::CloseBrace => R_CURLY,
+            ra_rustc_lexer::TokenKind::OpenBracket => L_BRACK,
+            ra_rustc_lexer::TokenKind::CloseBracket => R_BRACK,
+            ra_rustc_lexer::TokenKind::At => AT,
+            ra_rustc_lexer::TokenKind::Pound => POUND,
+            ra_rustc_lexer::TokenKind::Tilde => TILDE,
+            ra_rustc_lexer::TokenKind::Question => QUESTION,
+            ra_rustc_lexer::TokenKind::ColonColon => decompose!(COLON, COLON),
+            ra_rustc_lexer::TokenKind::Colon => COLON,
+            ra_rustc_lexer::TokenKind::Dollar => DOLLAR,
+            ra_rustc_lexer::TokenKind::EqEq => decompose!(EQ, EQ),
+            ra_rustc_lexer::TokenKind::Eq => EQ,
+            ra_rustc_lexer::TokenKind::FatArrow => decompose!(EQ, R_ANGLE),
+            ra_rustc_lexer::TokenKind::Ne => decompose!(EXCL, EQ),
+            ra_rustc_lexer::TokenKind::Not => EXCL,
+            ra_rustc_lexer::TokenKind::Le => decompose!(L_ANGLE, EQ),
+            ra_rustc_lexer::TokenKind::LArrow => decompose!(COLON, MINUS),
+            ra_rustc_lexer::TokenKind::Lt => L_ANGLE,
+            ra_rustc_lexer::TokenKind::ShlEq => decompose!(L_ANGLE, L_ANGLE, EQ),
+            ra_rustc_lexer::TokenKind::Shl => decompose!(L_ANGLE, L_ANGLE),
+            ra_rustc_lexer::TokenKind::Ge => decompose!(R_ANGLE, EQ),
+            ra_rustc_lexer::TokenKind::Gt => R_ANGLE,
+            ra_rustc_lexer::TokenKind::ShrEq => decompose!(R_ANGLE, R_ANGLE, EQ),
+            ra_rustc_lexer::TokenKind::Shr => decompose!(R_ANGLE, R_ANGLE),
+            ra_rustc_lexer::TokenKind::RArrow => decompose!(MINUS, R_ANGLE),
+            ra_rustc_lexer::TokenKind::Minus => MINUS,
+            ra_rustc_lexer::TokenKind::MinusEq => decompose!(MINUS, EQ),
+            ra_rustc_lexer::TokenKind::And => AMP,
+            ra_rustc_lexer::TokenKind::AndAnd => decompose!(AMP, AMP),
+            ra_rustc_lexer::TokenKind::AndEq => decompose!(AMP, EQ),
+            ra_rustc_lexer::TokenKind::Or => PIPE,
+            ra_rustc_lexer::TokenKind::OrOr => decompose!(PIPE, PIPE),
+            ra_rustc_lexer::TokenKind::OrEq => decompose!(PIPE, EQ),
+            ra_rustc_lexer::TokenKind::PlusEq => decompose!(PLUS, EQ),
+            ra_rustc_lexer::TokenKind::Plus => PLUS,
+            ra_rustc_lexer::TokenKind::StarEq => decompose!(STAR, EQ),
+            ra_rustc_lexer::TokenKind::Star => STAR,
+            ra_rustc_lexer::TokenKind::SlashEq => decompose!(SLASH, EQ),
+            ra_rustc_lexer::TokenKind::Slash => SLASH,
+            ra_rustc_lexer::TokenKind::CaretEq => decompose!(CARET, EQ),
+            ra_rustc_lexer::TokenKind::Caret => CARET,
+            ra_rustc_lexer::TokenKind::PercentEq => decompose!(PERCENT, EQ),
+            ra_rustc_lexer::TokenKind::Percent => PERCENT,
+            ra_rustc_lexer::TokenKind::Unknown => ERROR,
+        };
+        let token = Token { kind, len: TextUnit::from_usize(rustc_token.len) };
        acc.push(token);
-        let len: u32 = token.len.into();
+        text = &text[rustc_token.len..];
-        text = &text[len as usize..];
    }
    acc
 }
 /// Get the next token from a string
-pub fn next_token(text: &str) -> Token {
+fn next_token(text: &str) -> Token {
    assert!(!text.is_empty());
    let mut ptr = Ptr::new(text);
    let c = ptr.bump().unwrap();

diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 60cf37047..1c818fdf4 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -30,19 +30,119 @@ pub struct Token {
30		30
31	/// Break a string up into its component tokens	31	/// Break a string up into its component tokens
32	pub fn tokenize(text: &str) -> Vec<Token> {	32	pub fn tokenize(text: &str) -> Vec<Token> {
		33	if text.is_empty() {
		34	return vec![];
		35	}
33	let mut text = text;	36	let mut text = text;
34	let mut acc = Vec::new();	37	let mut acc = Vec::new();
		38	if let Some(len) = ra_rustc_lexer::strip_shebang(text) {
		39	acc.push(Token { kind: SHEBANG, len: TextUnit::from_usize(len) });
		40	text = &text[len..];
		41	}
35	while !text.is_empty() {	42	while !text.is_empty() {
36	let token = next_token(text);	43	let rustc_token = ra_rustc_lexer::first_token(text);
		44	macro_rules! decompose {
		45	($t1:expr, $t2:expr) => {{
		46	acc.push(Token { kind: $t1, len: 1.into() });
		47	acc.push(Token { kind: $t2, len: 1.into() });
		48	text = &text[2..];
		49	continue;
		50	}};
		51	($t1:expr, $t2:expr, $t3:expr) => {{
		52	acc.push(Token { kind: $t1, len: 1.into() });
		53	acc.push(Token { kind: $t2, len: 1.into() });
		54	acc.push(Token { kind: $t3, len: 1.into() });
		55	text = &text[3..];
		56	continue;
		57	}};
		58	}
		59	let kind = match rustc_token.kind {
		60	ra_rustc_lexer::TokenKind::LineComment => COMMENT,
		61	ra_rustc_lexer::TokenKind::BlockComment { .. } => COMMENT,
		62	ra_rustc_lexer::TokenKind::Whitespace => WHITESPACE,
		63	ra_rustc_lexer::TokenKind::Ident => {
		64	let token_text = &text[..rustc_token.len];
		65	if token_text == "_" {
		66	UNDERSCORE
		67	} else {
		68	SyntaxKind::from_keyword(&text[..rustc_token.len]).unwrap_or(IDENT)
		69	}
		70	}
		71	ra_rustc_lexer::TokenKind::RawIdent => IDENT,
		72	ra_rustc_lexer::TokenKind::Literal { kind, .. } => match kind {
		73	ra_rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER,
		74	ra_rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER,
		75	ra_rustc_lexer::LiteralKind::Char { .. } => CHAR,
		76	ra_rustc_lexer::LiteralKind::Byte { .. } => BYTE,
		77	ra_rustc_lexer::LiteralKind::Str { .. } => STRING,
		78	ra_rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING,
		79	ra_rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING,
		80	ra_rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING,
		81	},
		82	ra_rustc_lexer::TokenKind::Lifetime { .. } => LIFETIME,
		83	ra_rustc_lexer::TokenKind::Semi => SEMI,
		84	ra_rustc_lexer::TokenKind::Comma => COMMA,
		85	ra_rustc_lexer::TokenKind::DotDotDot => decompose!(DOT, DOT, DOT),
		86	ra_rustc_lexer::TokenKind::DotDotEq => decompose!(DOT, DOT, EQ),
		87	ra_rustc_lexer::TokenKind::DotDot => decompose!(DOT, DOT),
		88	ra_rustc_lexer::TokenKind::Dot => DOT,
		89	ra_rustc_lexer::TokenKind::OpenParen => L_PAREN,
		90	ra_rustc_lexer::TokenKind::CloseParen => R_PAREN,
		91	ra_rustc_lexer::TokenKind::OpenBrace => L_CURLY,
		92	ra_rustc_lexer::TokenKind::CloseBrace => R_CURLY,
		93	ra_rustc_lexer::TokenKind::OpenBracket => L_BRACK,
		94	ra_rustc_lexer::TokenKind::CloseBracket => R_BRACK,
		95	ra_rustc_lexer::TokenKind::At => AT,
		96	ra_rustc_lexer::TokenKind::Pound => POUND,
		97	ra_rustc_lexer::TokenKind::Tilde => TILDE,
		98	ra_rustc_lexer::TokenKind::Question => QUESTION,
		99	ra_rustc_lexer::TokenKind::ColonColon => decompose!(COLON, COLON),
		100	ra_rustc_lexer::TokenKind::Colon => COLON,
		101	ra_rustc_lexer::TokenKind::Dollar => DOLLAR,
		102	ra_rustc_lexer::TokenKind::EqEq => decompose!(EQ, EQ),
		103	ra_rustc_lexer::TokenKind::Eq => EQ,
		104	ra_rustc_lexer::TokenKind::FatArrow => decompose!(EQ, R_ANGLE),
		105	ra_rustc_lexer::TokenKind::Ne => decompose!(EXCL, EQ),
		106	ra_rustc_lexer::TokenKind::Not => EXCL,
		107	ra_rustc_lexer::TokenKind::Le => decompose!(L_ANGLE, EQ),
		108	ra_rustc_lexer::TokenKind::LArrow => decompose!(COLON, MINUS),
		109	ra_rustc_lexer::TokenKind::Lt => L_ANGLE,
		110	ra_rustc_lexer::TokenKind::ShlEq => decompose!(L_ANGLE, L_ANGLE, EQ),
		111	ra_rustc_lexer::TokenKind::Shl => decompose!(L_ANGLE, L_ANGLE),
		112	ra_rustc_lexer::TokenKind::Ge => decompose!(R_ANGLE, EQ),
		113	ra_rustc_lexer::TokenKind::Gt => R_ANGLE,
		114	ra_rustc_lexer::TokenKind::ShrEq => decompose!(R_ANGLE, R_ANGLE, EQ),
		115	ra_rustc_lexer::TokenKind::Shr => decompose!(R_ANGLE, R_ANGLE),
		116	ra_rustc_lexer::TokenKind::RArrow => decompose!(MINUS, R_ANGLE),
		117	ra_rustc_lexer::TokenKind::Minus => MINUS,
		118	ra_rustc_lexer::TokenKind::MinusEq => decompose!(MINUS, EQ),
		119	ra_rustc_lexer::TokenKind::And => AMP,
		120	ra_rustc_lexer::TokenKind::AndAnd => decompose!(AMP, AMP),
		121	ra_rustc_lexer::TokenKind::AndEq => decompose!(AMP, EQ),
		122	ra_rustc_lexer::TokenKind::Or => PIPE,
		123	ra_rustc_lexer::TokenKind::OrOr => decompose!(PIPE, PIPE),
		124	ra_rustc_lexer::TokenKind::OrEq => decompose!(PIPE, EQ),
		125	ra_rustc_lexer::TokenKind::PlusEq => decompose!(PLUS, EQ),
		126	ra_rustc_lexer::TokenKind::Plus => PLUS,
		127	ra_rustc_lexer::TokenKind::StarEq => decompose!(STAR, EQ),
		128	ra_rustc_lexer::TokenKind::Star => STAR,
		129	ra_rustc_lexer::TokenKind::SlashEq => decompose!(SLASH, EQ),
		130	ra_rustc_lexer::TokenKind::Slash => SLASH,
		131	ra_rustc_lexer::TokenKind::CaretEq => decompose!(CARET, EQ),
		132	ra_rustc_lexer::TokenKind::Caret => CARET,
		133	ra_rustc_lexer::TokenKind::PercentEq => decompose!(PERCENT, EQ),
		134	ra_rustc_lexer::TokenKind::Percent => PERCENT,
		135	ra_rustc_lexer::TokenKind::Unknown => ERROR,
		136	};
		137	let token = Token { kind, len: TextUnit::from_usize(rustc_token.len) };
37	acc.push(token);	138	acc.push(token);
38	let len: u32 = token.len.into();	139	text = &text[rustc_token.len..];
39	text = &text[len as usize..];
40	}	140	}
41	acc	141	acc
42	}	142	}
43		143
44	/// Get the next token from a string	144	/// Get the next token from a string
45	pub fn next_token(text: &str) -> Token {	145	fn next_token(text: &str) -> Token {
46	assert!(!text.is_empty());	146	assert!(!text.is_empty());
47	let mut ptr = Ptr::new(text);	147	let mut ptr = Ptr::new(text);
48	let c = ptr.bump().unwrap();	148	let c = ptr.bump().unwrap();