aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.lock10
-rw-r--r--crates/ra_syntax/Cargo.toml1
-rw-r--r--crates/ra_syntax/src/parsing/lexer.rs271
-rw-r--r--crates/ra_syntax/src/parsing/lexer/classes.rs26
-rw-r--r--crates/ra_syntax/src/parsing/lexer/comments.rs57
-rw-r--r--crates/ra_syntax/src/parsing/lexer/numbers.rs66
-rw-r--r--crates/ra_syntax/src/parsing/lexer/ptr.rs162
-rw-r--r--crates/ra_syntax/src/parsing/lexer/strings.rs112
-rw-r--r--crates/ra_syntax/tests/data/lexer/0004_numbers.txt12
-rw-r--r--crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt2
-rw-r--r--crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt38
-rw-r--r--crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.rs (renamed from crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs)0
-rw-r--r--crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.txt (renamed from crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt)13
13 files changed, 175 insertions, 595 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 8feaf27ec..d5474d6e2 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1309,11 +1309,20 @@ dependencies = [
1309] 1309]
1310 1310
1311[[package]] 1311[[package]]
1312name = "ra_rustc_lexer"
1313version = "0.1.0-pre.1"
1314source = "registry+https://github.com/rust-lang/crates.io-index"
1315dependencies = [
1316 "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
1317]
1318
1319[[package]]
1312name = "ra_syntax" 1320name = "ra_syntax"
1313version = "0.1.0" 1321version = "0.1.0"
1314dependencies = [ 1322dependencies = [
1315 "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", 1323 "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
1316 "ra_parser 0.1.0", 1324 "ra_parser 0.1.0",
1325 "ra_rustc_lexer 0.1.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)",
1317 "ra_text_edit 0.1.0", 1326 "ra_text_edit 0.1.0",
1318 "rowan 0.6.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)", 1327 "rowan 0.6.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)",
1319 "smol_str 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", 1328 "smol_str 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -2250,6 +2259,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
2250"checksum proptest 0.9.4 (registry+https://github.com/rust-lang/crates.io-index)" = "cf147e022eacf0c8a054ab864914a7602618adba841d800a9a9868a5237a529f" 2259"checksum proptest 0.9.4 (registry+https://github.com/rust-lang/crates.io-index)" = "cf147e022eacf0c8a054ab864914a7602618adba841d800a9a9868a5237a529f"
2251"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" 2260"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0"
2252"checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" 2261"checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
2262"checksum ra_rustc_lexer 0.1.0-pre.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e8d92772f822978a6c9c4657aa61af439e4e635180628b3354049b283b749f1e"
2253"checksum ra_vfs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fb7cd4e302032c5ab514f1c01c89727cd96fd950dd36f9ebee9252df45d9fb1a" 2263"checksum ra_vfs 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fb7cd4e302032c5ab514f1c01c89727cd96fd950dd36f9ebee9252df45d9fb1a"
2254"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca" 2264"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
2255"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c" 2265"checksum rand 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d47eab0e83d9693d40f825f86948aa16eff6750ead4bdffc4ab95b8b3a7f052c"
diff --git a/crates/ra_syntax/Cargo.toml b/crates/ra_syntax/Cargo.toml
index 97b6b047f..9ef8dee5d 100644
--- a/crates/ra_syntax/Cargo.toml
+++ b/crates/ra_syntax/Cargo.toml
@@ -11,6 +11,7 @@ repository = "https://github.com/rust-analyzer/rust-analyzer"
11unicode-xid = "0.1.0" 11unicode-xid = "0.1.0"
12itertools = "0.8.0" 12itertools = "0.8.0"
13rowan = "0.6.0-pre.1" 13rowan = "0.6.0-pre.1"
14ra_rustc_lexer = { version = "0.1.0-pre.1", features = [ "unicode-xid" ] }
14 15
15# ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here 16# ideally, `serde` should be enabled by `ra_lsp_server`, but we enable it here
16# to reduce number of compilations 17# to reduce number of compilations
diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs
index 60cf37047..2a4343b0a 100644
--- a/crates/ra_syntax/src/parsing/lexer.rs
+++ b/crates/ra_syntax/src/parsing/lexer.rs
@@ -1,22 +1,6 @@
1mod classes;
2mod comments;
3mod numbers;
4mod ptr;
5mod strings;
6
7use crate::{ 1use crate::{
8 SyntaxKind::{self, *}, 2 SyntaxKind::{self, *},
9 TextUnit, T, 3 TextUnit,
10};
11
12use self::{
13 classes::*,
14 comments::{scan_comment, scan_shebang},
15 numbers::scan_number,
16 ptr::Ptr,
17 strings::{
18 is_string_literal_start, scan_byte_char_or_string, scan_char, scan_raw_string, scan_string,
19 },
20}; 4};
21 5
22/// A token of Rust source. 6/// A token of Rust source.
@@ -30,149 +14,134 @@ pub struct Token {
30 14
31/// Break a string up into its component tokens 15/// Break a string up into its component tokens
32pub fn tokenize(text: &str) -> Vec<Token> { 16pub fn tokenize(text: &str) -> Vec<Token> {
17 if text.is_empty() {
18 return vec![];
19 }
33 let mut text = text; 20 let mut text = text;
34 let mut acc = Vec::new(); 21 let mut acc = Vec::new();
35 while !text.is_empty() { 22 if let Some(len) = ra_rustc_lexer::strip_shebang(text) {
36 let token = next_token(text); 23 acc.push(Token { kind: SHEBANG, len: TextUnit::from_usize(len) });
37 acc.push(token); 24 text = &text[len..];
38 let len: u32 = token.len.into();
39 text = &text[len as usize..];
40 }
41 acc
42}
43
44/// Get the next token from a string
45pub fn next_token(text: &str) -> Token {
46 assert!(!text.is_empty());
47 let mut ptr = Ptr::new(text);
48 let c = ptr.bump().unwrap();
49 let kind = next_token_inner(c, &mut ptr);
50 let len = ptr.into_len();
51 Token { kind, len }
52}
53
54fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind {
55 if is_whitespace(c) {
56 ptr.bump_while(is_whitespace);
57 return WHITESPACE;
58 } 25 }
59 26 while !text.is_empty() {
60 match c { 27 let rustc_token = ra_rustc_lexer::first_token(text);
61 '#' => { 28 macro_rules! decompose {
62 if scan_shebang(ptr) { 29 ($t1:expr, $t2:expr) => {{
63 return SHEBANG; 30 acc.push(Token { kind: $t1, len: 1.into() });
64 } 31 acc.push(Token { kind: $t2, len: 1.into() });
65 } 32 text = &text[2..];
66 '/' => { 33 continue;
67 if let Some(kind) = scan_comment(ptr) { 34 }};
68 return kind; 35 ($t1:expr, $t2:expr, $t3:expr) => {{
69 } 36 acc.push(Token { kind: $t1, len: 1.into() });
37 acc.push(Token { kind: $t2, len: 1.into() });
38 acc.push(Token { kind: $t3, len: 1.into() });
39 text = &text[3..];
40 continue;
41 }};
70 } 42 }
71 _ => (), 43 let kind = match rustc_token.kind {
72 } 44 ra_rustc_lexer::TokenKind::LineComment => COMMENT,
73 45 ra_rustc_lexer::TokenKind::BlockComment { .. } => COMMENT,
74 let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.current(), ptr.nth(1)); 46 ra_rustc_lexer::TokenKind::Whitespace => WHITESPACE,
75 if ident_start { 47 ra_rustc_lexer::TokenKind::Ident => {
76 return scan_ident(c, ptr); 48 let token_text = &text[..rustc_token.len];
77 } 49 if token_text == "_" {
78 50 UNDERSCORE
79 if is_dec_digit(c) { 51 } else {
80 let kind = scan_number(c, ptr); 52 SyntaxKind::from_keyword(&text[..rustc_token.len]).unwrap_or(IDENT)
81 scan_literal_suffix(ptr);
82 return kind;
83 }
84
85 // One-byte tokens.
86 if let Some(kind) = SyntaxKind::from_char(c) {
87 return kind;
88 }
89
90 match c {
91 // Possiblily multi-byte tokens,
92 // but we only produce single byte token now
93 // T![...], T![..], T![..=], T![.]
94 '.' => return T![.],
95 // T![::] T![:]
96 ':' => return T![:],
97 // T![==] FATARROW T![=]
98 '=' => return T![=],
99 // T![!=] T![!]
100 '!' => return T![!],
101 // T![->] T![-]
102 '-' => return T![-],
103
104 // If the character is an ident start not followed by another single
105 // quote, then this is a lifetime name:
106 '\'' => {
107 return if ptr.at_p(is_ident_start) && !ptr.at_str("''") {
108 ptr.bump();
109 while ptr.at_p(is_ident_continue) {
110 ptr.bump();
111 } 53 }
112 // lifetimes shouldn't end with a single quote 54 }
113 // if we find one, then this is an invalid character literal 55 ra_rustc_lexer::TokenKind::RawIdent => IDENT,
114 if ptr.at('\'') { 56 ra_rustc_lexer::TokenKind::Literal { kind, .. } => match kind {
115 ptr.bump(); 57 ra_rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER,
116 return CHAR; 58 ra_rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER,
117 } 59 ra_rustc_lexer::LiteralKind::Char { .. } => CHAR,
118 LIFETIME 60 ra_rustc_lexer::LiteralKind::Byte { .. } => BYTE,
119 } else { 61 ra_rustc_lexer::LiteralKind::Str { .. } => STRING,
120 scan_char(ptr); 62 ra_rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING,
121 scan_literal_suffix(ptr); 63 ra_rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING,
122 CHAR 64 ra_rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING,
123 }; 65 },
124 } 66 ra_rustc_lexer::TokenKind::Lifetime { .. } => LIFETIME,
125 'b' => { 67 ra_rustc_lexer::TokenKind::Semi => SEMI,
126 let kind = scan_byte_char_or_string(ptr); 68 ra_rustc_lexer::TokenKind::Comma => COMMA,
127 scan_literal_suffix(ptr); 69 ra_rustc_lexer::TokenKind::DotDotDot => decompose!(DOT, DOT, DOT),
128 return kind; 70 ra_rustc_lexer::TokenKind::DotDotEq => decompose!(DOT, DOT, EQ),
129 } 71 ra_rustc_lexer::TokenKind::DotDot => decompose!(DOT, DOT),
130 '"' => { 72 ra_rustc_lexer::TokenKind::Dot => DOT,
131 scan_string(ptr); 73 ra_rustc_lexer::TokenKind::OpenParen => L_PAREN,
132 scan_literal_suffix(ptr); 74 ra_rustc_lexer::TokenKind::CloseParen => R_PAREN,
133 return STRING; 75 ra_rustc_lexer::TokenKind::OpenBrace => L_CURLY,
134 } 76 ra_rustc_lexer::TokenKind::CloseBrace => R_CURLY,
135 'r' => { 77 ra_rustc_lexer::TokenKind::OpenBracket => L_BRACK,
136 scan_raw_string(ptr); 78 ra_rustc_lexer::TokenKind::CloseBracket => R_BRACK,
137 scan_literal_suffix(ptr); 79 ra_rustc_lexer::TokenKind::At => AT,
138 return RAW_STRING; 80 ra_rustc_lexer::TokenKind::Pound => POUND,
139 } 81 ra_rustc_lexer::TokenKind::Tilde => TILDE,
140 _ => (), 82 ra_rustc_lexer::TokenKind::Question => QUESTION,
141 } 83 ra_rustc_lexer::TokenKind::ColonColon => decompose!(COLON, COLON),
142 ERROR 84 ra_rustc_lexer::TokenKind::Colon => COLON,
143} 85 ra_rustc_lexer::TokenKind::Dollar => DOLLAR,
144 86 ra_rustc_lexer::TokenKind::EqEq => decompose!(EQ, EQ),
145fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { 87 ra_rustc_lexer::TokenKind::Eq => EQ,
146 let is_raw = match (c, ptr.current()) { 88 ra_rustc_lexer::TokenKind::FatArrow => decompose!(EQ, R_ANGLE),
147 ('r', Some('#')) => { 89 ra_rustc_lexer::TokenKind::Ne => decompose!(EXCL, EQ),
148 ptr.bump(); 90 ra_rustc_lexer::TokenKind::Not => EXCL,
149 true 91 ra_rustc_lexer::TokenKind::Le => decompose!(L_ANGLE, EQ),
150 } 92 ra_rustc_lexer::TokenKind::LArrow => decompose!(COLON, MINUS),
151 ('_', None) => return T![_], 93 ra_rustc_lexer::TokenKind::Lt => L_ANGLE,
152 ('_', Some(c)) if !is_ident_continue(c) => return T![_], 94 ra_rustc_lexer::TokenKind::ShlEq => decompose!(L_ANGLE, L_ANGLE, EQ),
153 _ => false, 95 ra_rustc_lexer::TokenKind::Shl => decompose!(L_ANGLE, L_ANGLE),
154 }; 96 ra_rustc_lexer::TokenKind::Ge => decompose!(R_ANGLE, EQ),
155 ptr.bump_while(is_ident_continue); 97 ra_rustc_lexer::TokenKind::Gt => R_ANGLE,
156 if !is_raw { 98 ra_rustc_lexer::TokenKind::ShrEq => decompose!(R_ANGLE, R_ANGLE, EQ),
157 if let Some(kind) = SyntaxKind::from_keyword(ptr.current_token_text()) { 99 ra_rustc_lexer::TokenKind::Shr => decompose!(R_ANGLE, R_ANGLE),
158 return kind; 100 ra_rustc_lexer::TokenKind::RArrow => decompose!(MINUS, R_ANGLE),
159 } 101 ra_rustc_lexer::TokenKind::Minus => MINUS,
160 } 102 ra_rustc_lexer::TokenKind::MinusEq => decompose!(MINUS, EQ),
161 IDENT 103 ra_rustc_lexer::TokenKind::And => AMP,
162} 104 ra_rustc_lexer::TokenKind::AndAnd => decompose!(AMP, AMP),
163 105 ra_rustc_lexer::TokenKind::AndEq => decompose!(AMP, EQ),
164fn scan_literal_suffix(ptr: &mut Ptr) { 106 ra_rustc_lexer::TokenKind::Or => PIPE,
165 if ptr.at_p(is_ident_start) { 107 ra_rustc_lexer::TokenKind::OrOr => decompose!(PIPE, PIPE),
166 ptr.bump(); 108 ra_rustc_lexer::TokenKind::OrEq => decompose!(PIPE, EQ),
109 ra_rustc_lexer::TokenKind::PlusEq => decompose!(PLUS, EQ),
110 ra_rustc_lexer::TokenKind::Plus => PLUS,
111 ra_rustc_lexer::TokenKind::StarEq => decompose!(STAR, EQ),
112 ra_rustc_lexer::TokenKind::Star => STAR,
113 ra_rustc_lexer::TokenKind::SlashEq => decompose!(SLASH, EQ),
114 ra_rustc_lexer::TokenKind::Slash => SLASH,
115 ra_rustc_lexer::TokenKind::CaretEq => decompose!(CARET, EQ),
116 ra_rustc_lexer::TokenKind::Caret => CARET,
117 ra_rustc_lexer::TokenKind::PercentEq => decompose!(PERCENT, EQ),
118 ra_rustc_lexer::TokenKind::Percent => PERCENT,
119 ra_rustc_lexer::TokenKind::Unknown => ERROR,
120 };
121 let token = Token { kind, len: TextUnit::from_usize(rustc_token.len) };
122 acc.push(token);
123 text = &text[rustc_token.len..];
167 } 124 }
168 ptr.bump_while(is_ident_continue); 125 acc
169} 126}
170 127
171pub fn classify_literal(text: &str) -> Option<Token> { 128pub fn classify_literal(text: &str) -> Option<Token> {
172 let tkn = next_token(text); 129 let t = ra_rustc_lexer::first_token(text);
173 if !tkn.kind.is_literal() || tkn.len.to_usize() != text.len() { 130 if t.len != text.len() {
174 return None; 131 return None;
175 } 132 }
176 133 let kind = match t.kind {
177 Some(tkn) 134 ra_rustc_lexer::TokenKind::Literal { kind, .. } => match kind {
135 ra_rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER,
136 ra_rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER,
137 ra_rustc_lexer::LiteralKind::Char { .. } => CHAR,
138 ra_rustc_lexer::LiteralKind::Byte { .. } => BYTE,
139 ra_rustc_lexer::LiteralKind::Str { .. } => STRING,
140 ra_rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING,
141 ra_rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING,
142 ra_rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING,
143 },
144 _ => return None,
145 };
146 Some(Token { kind, len: TextUnit::from_usize(t.len) })
178} 147}
diff --git a/crates/ra_syntax/src/parsing/lexer/classes.rs b/crates/ra_syntax/src/parsing/lexer/classes.rs
deleted file mode 100644
index 4235d2648..000000000
--- a/crates/ra_syntax/src/parsing/lexer/classes.rs
+++ /dev/null
@@ -1,26 +0,0 @@
1use unicode_xid::UnicodeXID;
2
3pub fn is_ident_start(c: char) -> bool {
4 (c >= 'a' && c <= 'z')
5 || (c >= 'A' && c <= 'Z')
6 || c == '_'
7 || (c > '\x7f' && UnicodeXID::is_xid_start(c))
8}
9
10pub fn is_ident_continue(c: char) -> bool {
11 (c >= 'a' && c <= 'z')
12 || (c >= 'A' && c <= 'Z')
13 || (c >= '0' && c <= '9')
14 || c == '_'
15 || (c > '\x7f' && UnicodeXID::is_xid_continue(c))
16}
17
18pub fn is_whitespace(c: char) -> bool {
19 //FIXME: use is_pattern_whitespace
20 //https://github.com/behnam/rust-unic/issues/192
21 c.is_whitespace()
22}
23
24pub fn is_dec_digit(c: char) -> bool {
25 '0' <= c && c <= '9'
26}
diff --git a/crates/ra_syntax/src/parsing/lexer/comments.rs b/crates/ra_syntax/src/parsing/lexer/comments.rs
deleted file mode 100644
index 8bbbe659b..000000000
--- a/crates/ra_syntax/src/parsing/lexer/comments.rs
+++ /dev/null
@@ -1,57 +0,0 @@
1use crate::parsing::lexer::ptr::Ptr;
2
3use crate::SyntaxKind::{self, *};
4
5pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool {
6 if ptr.at_str("!/") {
7 ptr.bump();
8 ptr.bump();
9 bump_until_eol(ptr);
10 true
11 } else {
12 false
13 }
14}
15
16fn scan_block_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
17 if ptr.at('*') {
18 ptr.bump();
19 let mut depth: u32 = 1;
20 while depth > 0 {
21 if ptr.at_str("*/") {
22 depth -= 1;
23 ptr.bump();
24 ptr.bump();
25 } else if ptr.at_str("/*") {
26 depth += 1;
27 ptr.bump();
28 ptr.bump();
29 } else if ptr.bump().is_none() {
30 break;
31 }
32 }
33 Some(COMMENT)
34 } else {
35 None
36 }
37}
38
39pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option<SyntaxKind> {
40 if ptr.at('/') {
41 bump_until_eol(ptr);
42 Some(COMMENT)
43 } else {
44 scan_block_comment(ptr)
45 }
46}
47
48fn bump_until_eol(ptr: &mut Ptr) {
49 loop {
50 if ptr.at('\n') || ptr.at_str("\r\n") {
51 return;
52 }
53 if ptr.bump().is_none() {
54 break;
55 }
56 }
57}
diff --git a/crates/ra_syntax/src/parsing/lexer/numbers.rs b/crates/ra_syntax/src/parsing/lexer/numbers.rs
deleted file mode 100644
index e53ae231b..000000000
--- a/crates/ra_syntax/src/parsing/lexer/numbers.rs
+++ /dev/null
@@ -1,66 +0,0 @@
1use crate::parsing::lexer::{classes::*, ptr::Ptr};
2
3use crate::SyntaxKind::{self, *};
4
5pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind {
6 if c == '0' {
7 match ptr.current().unwrap_or('\0') {
8 'b' | 'o' => {
9 ptr.bump();
10 scan_digits(ptr, false);
11 }
12 'x' => {
13 ptr.bump();
14 scan_digits(ptr, true);
15 }
16 '0'..='9' | '_' | '.' | 'e' | 'E' => {
17 scan_digits(ptr, true);
18 }
19 _ => return INT_NUMBER,
20 }
21 } else {
22 scan_digits(ptr, false);
23 }
24
25 // might be a float, but don't be greedy if this is actually an
26 // integer literal followed by field/method access or a range pattern
27 // (`0..2` and `12.foo()`)
28 if ptr.at('.') && !(ptr.at_str("..") || ptr.nth_is_p(1, is_ident_start)) {
29 // might have stuff after the ., and if it does, it needs to start
30 // with a number
31 ptr.bump();
32 scan_digits(ptr, false);
33 scan_float_exponent(ptr);
34 return FLOAT_NUMBER;
35 }
36 // it might be a float if it has an exponent
37 if ptr.at('e') || ptr.at('E') {
38 scan_float_exponent(ptr);
39 return FLOAT_NUMBER;
40 }
41 INT_NUMBER
42}
43
44fn scan_digits(ptr: &mut Ptr, allow_hex: bool) {
45 while let Some(c) = ptr.current() {
46 match c {
47 '_' | '0'..='9' => {
48 ptr.bump();
49 }
50 'a'..='f' | 'A'..='F' if allow_hex => {
51 ptr.bump();
52 }
53 _ => return,
54 }
55 }
56}
57
58fn scan_float_exponent(ptr: &mut Ptr) {
59 if ptr.at('e') || ptr.at('E') {
60 ptr.bump();
61 if ptr.at('-') || ptr.at('+') {
62 ptr.bump();
63 }
64 scan_digits(ptr, false);
65 }
66}
diff --git a/crates/ra_syntax/src/parsing/lexer/ptr.rs b/crates/ra_syntax/src/parsing/lexer/ptr.rs
deleted file mode 100644
index c341c4176..000000000
--- a/crates/ra_syntax/src/parsing/lexer/ptr.rs
+++ /dev/null
@@ -1,162 +0,0 @@
1use crate::TextUnit;
2
3use std::str::Chars;
4
5/// A simple view into the characters of a string.
6pub(crate) struct Ptr<'s> {
7 text: &'s str,
8 len: TextUnit,
9}
10
11impl<'s> Ptr<'s> {
12 /// Creates a new `Ptr` from a string.
13 pub fn new(text: &'s str) -> Ptr<'s> {
14 Ptr { text, len: 0.into() }
15 }
16
17 /// Gets the length of the remaining string.
18 pub fn into_len(self) -> TextUnit {
19 self.len
20 }
21
22 /// Gets the current character, if one exists.
23 pub fn current(&self) -> Option<char> {
24 self.chars().next()
25 }
26
27 /// Gets the nth character from the current.
28 /// For example, 0 will return the current character, 1 will return the next, etc.
29 pub fn nth(&self, n: u32) -> Option<char> {
30 self.chars().nth(n as usize)
31 }
32
33 /// Checks whether the current character is `c`.
34 pub fn at(&self, c: char) -> bool {
35 self.current() == Some(c)
36 }
37
38 /// Checks whether the next characters match `s`.
39 pub fn at_str(&self, s: &str) -> bool {
40 let chars = self.chars();
41 chars.as_str().starts_with(s)
42 }
43
44 /// Checks whether the current character satisfies the predicate `p`.
45 pub fn at_p<P: Fn(char) -> bool>(&self, p: P) -> bool {
46 self.current().map(p) == Some(true)
47 }
48
49 /// Checks whether the nth character satisfies the predicate `p`.
50 pub fn nth_is_p<P: Fn(char) -> bool>(&self, n: u32, p: P) -> bool {
51 self.nth(n).map(p) == Some(true)
52 }
53
54 /// Moves to the next character.
55 pub fn bump(&mut self) -> Option<char> {
56 let ch = self.chars().next()?;
57 self.len += TextUnit::of_char(ch);
58 Some(ch)
59 }
60
61 /// Moves to the next character as long as `pred` is satisfied.
62 pub fn bump_while<F: Fn(char) -> bool>(&mut self, pred: F) {
63 loop {
64 match self.current() {
65 Some(c) if pred(c) => {
66 self.bump();
67 }
68 _ => return,
69 }
70 }
71 }
72
73 /// Returns the text up to the current point.
74 pub fn current_token_text(&self) -> &str {
75 let len: u32 = self.len.into();
76 &self.text[..len as usize]
77 }
78
79 /// Returns an iterator over the remaining characters.
80 fn chars(&self) -> Chars {
81 let len: u32 = self.len.into();
82 self.text[len as usize..].chars()
83 }
84}
85
86#[cfg(test)]
87mod tests {
88 use super::*;
89
90 #[test]
91 fn test_current() {
92 let ptr = Ptr::new("test");
93 assert_eq!(ptr.current(), Some('t'));
94 }
95
96 #[test]
97 fn test_nth() {
98 let ptr = Ptr::new("test");
99 assert_eq!(ptr.nth(0), Some('t'));
100 assert_eq!(ptr.nth(1), Some('e'));
101 assert_eq!(ptr.nth(2), Some('s'));
102 assert_eq!(ptr.nth(3), Some('t'));
103 assert_eq!(ptr.nth(4), None);
104 }
105
106 #[test]
107 fn test_at() {
108 let ptr = Ptr::new("test");
109 assert!(ptr.at('t'));
110 assert!(!ptr.at('a'));
111 }
112
113 #[test]
114 fn test_at_str() {
115 let ptr = Ptr::new("test");
116 assert!(ptr.at_str("t"));
117 assert!(ptr.at_str("te"));
118 assert!(ptr.at_str("test"));
119 assert!(!ptr.at_str("tests"));
120 assert!(!ptr.at_str("rust"));
121 }
122
123 #[test]
124 fn test_at_p() {
125 let ptr = Ptr::new("test");
126 assert!(ptr.at_p(|c| c == 't'));
127 assert!(!ptr.at_p(|c| c == 'e'));
128 }
129
130 #[test]
131 fn test_nth_is_p() {
132 let ptr = Ptr::new("test");
133 assert!(ptr.nth_is_p(0, |c| c == 't'));
134 assert!(!ptr.nth_is_p(1, |c| c == 't'));
135 assert!(ptr.nth_is_p(3, |c| c == 't'));
136 assert!(!ptr.nth_is_p(150, |c| c == 't'));
137 }
138
139 #[test]
140 fn test_bump() {
141 let mut ptr = Ptr::new("test");
142 assert_eq!(ptr.current(), Some('t'));
143 ptr.bump();
144 assert_eq!(ptr.current(), Some('e'));
145 ptr.bump();
146 assert_eq!(ptr.current(), Some('s'));
147 ptr.bump();
148 assert_eq!(ptr.current(), Some('t'));
149 ptr.bump();
150 assert_eq!(ptr.current(), None);
151 ptr.bump();
152 assert_eq!(ptr.current(), None);
153 }
154
155 #[test]
156 fn test_bump_while() {
157 let mut ptr = Ptr::new("test");
158 assert_eq!(ptr.current(), Some('t'));
159 ptr.bump_while(|c| c != 's');
160 assert_eq!(ptr.current(), Some('s'));
161 }
162}
diff --git a/crates/ra_syntax/src/parsing/lexer/strings.rs b/crates/ra_syntax/src/parsing/lexer/strings.rs
deleted file mode 100644
index f74acff9e..000000000
--- a/crates/ra_syntax/src/parsing/lexer/strings.rs
+++ /dev/null
@@ -1,112 +0,0 @@
1use crate::{
2 parsing::lexer::ptr::Ptr,
3 SyntaxKind::{self, *},
4};
5
6pub(crate) fn is_string_literal_start(c: char, c1: Option<char>, c2: Option<char>) -> bool {
7 match (c, c1, c2) {
8 ('r', Some('"'), _)
9 | ('r', Some('#'), Some('"'))
10 | ('r', Some('#'), Some('#'))
11 | ('b', Some('"'), _)
12 | ('b', Some('\''), _)
13 | ('b', Some('r'), Some('"'))
14 | ('b', Some('r'), Some('#')) => true,
15 _ => false,
16 }
17}
18
19pub(crate) fn scan_char(ptr: &mut Ptr) {
20 while let Some(c) = ptr.current() {
21 match c {
22 '\\' => {
23 ptr.bump();
24 if ptr.at('\\') || ptr.at('\'') {
25 ptr.bump();
26 }
27 }
28 '\'' => {
29 ptr.bump();
30 return;
31 }
32 '\n' => return,
33 _ => {
34 ptr.bump();
35 }
36 }
37 }
38}
39
40pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
41 // unwrapping and not-exhaustive match are ok
42 // because of string_literal_start
43 let c = ptr.bump().unwrap();
44 match c {
45 '\'' => {
46 scan_byte(ptr);
47 BYTE
48 }
49 '"' => {
50 scan_byte_string(ptr);
51 BYTE_STRING
52 }
53 'r' => {
54 scan_raw_string(ptr);
55 RAW_BYTE_STRING
56 }
57 _ => unreachable!(),
58 }
59}
60
61pub(crate) fn scan_string(ptr: &mut Ptr) {
62 while let Some(c) = ptr.current() {
63 match c {
64 '\\' => {
65 ptr.bump();
66 if ptr.at('\\') || ptr.at('"') {
67 ptr.bump();
68 }
69 }
70 '"' => {
71 ptr.bump();
72 return;
73 }
74 _ => {
75 ptr.bump();
76 }
77 }
78 }
79}
80
81pub(crate) fn scan_raw_string(ptr: &mut Ptr) {
82 let mut hashes = 0;
83 while ptr.at('#') {
84 hashes += 1;
85 ptr.bump();
86 }
87 if !ptr.at('"') {
88 return;
89 }
90 ptr.bump();
91
92 while let Some(c) = ptr.bump() {
93 if c == '"' {
94 let mut hashes_left = hashes;
95 while ptr.at('#') && hashes_left > 0 {
96 hashes_left -= 1;
97 ptr.bump();
98 }
99 if hashes_left == 0 {
100 return;
101 }
102 }
103 }
104}
105
106fn scan_byte(ptr: &mut Ptr) {
107 scan_char(ptr)
108}
109
110fn scan_byte_string(ptr: &mut Ptr) {
111 scan_string(ptr)
112}
diff --git a/crates/ra_syntax/tests/data/lexer/0004_numbers.txt b/crates/ra_syntax/tests/data/lexer/0004_numbers.txt
index 39988aedc..7bb89b8ae 100644
--- a/crates/ra_syntax/tests/data/lexer/0004_numbers.txt
+++ b/crates/ra_syntax/tests/data/lexer/0004_numbers.txt
@@ -12,9 +12,9 @@ INT_NUMBER 2 "0_"
12WHITESPACE 1 " " 12WHITESPACE 1 " "
13FLOAT_NUMBER 2 "0." 13FLOAT_NUMBER 2 "0."
14WHITESPACE 1 " " 14WHITESPACE 1 " "
15INT_NUMBER 2 "0e" 15FLOAT_NUMBER 2 "0e"
16WHITESPACE 1 " " 16WHITESPACE 1 " "
17INT_NUMBER 2 "0E" 17FLOAT_NUMBER 2 "0E"
18WHITESPACE 1 " " 18WHITESPACE 1 " "
19INT_NUMBER 2 "0z" 19INT_NUMBER 2 "0z"
20WHITESPACE 1 "\n" 20WHITESPACE 1 "\n"
@@ -32,9 +32,9 @@ INT_NUMBER 6 "0_1279"
32WHITESPACE 1 " " 32WHITESPACE 1 " "
33FLOAT_NUMBER 6 "0.1279" 33FLOAT_NUMBER 6 "0.1279"
34WHITESPACE 1 " " 34WHITESPACE 1 " "
35INT_NUMBER 6 "0e1279" 35FLOAT_NUMBER 6 "0e1279"
36WHITESPACE 1 " " 36WHITESPACE 1 " "
37INT_NUMBER 6 "0E1279" 37FLOAT_NUMBER 6 "0E1279"
38WHITESPACE 1 "\n" 38WHITESPACE 1 "\n"
39INT_NUMBER 1 "0" 39INT_NUMBER 1 "0"
40DOT 1 "." 40DOT 1 "."
@@ -47,9 +47,7 @@ IDENT 3 "foo"
47L_PAREN 1 "(" 47L_PAREN 1 "("
48R_PAREN 1 ")" 48R_PAREN 1 ")"
49WHITESPACE 1 "\n" 49WHITESPACE 1 "\n"
50INT_NUMBER 2 "0e" 50FLOAT_NUMBER 4 "0e+1"
51PLUS 1 "+"
52INT_NUMBER 1 "1"
53WHITESPACE 1 "\n" 51WHITESPACE 1 "\n"
54INT_NUMBER 1 "0" 52INT_NUMBER 1 "0"
55DOT 1 "." 53DOT 1 "."
diff --git a/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt b/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt
index 812dfbc18..737a300ee 100644
--- a/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt
+++ b/crates/ra_syntax/tests/data/lexer/0014_unclosed_char.txt
@@ -1 +1 @@
CHAR 2 "\'1" LIFETIME 2 "\'1"
diff --git a/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt b/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt
index 76d186a3c..84867026f 100644
--- a/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt
+++ b/crates/ra_syntax/tests/data/parser/err/0002_duplicate_shebang.txt
@@ -1,7 +1,39 @@
1SOURCE_FILE@[0; 42) 1SOURCE_FILE@[0; 42)
2 SHEBANG@[0; 20) "#!/use/bin/env rusti" 2 SHEBANG@[0; 20) "#!/use/bin/env rusti"
3 WHITESPACE@[20; 21) "\n" 3 WHITESPACE@[20; 21) "\n"
4 ERROR@[21; 41) 4 ATTR@[21; 23)
5 SHEBANG@[21; 41) "#!/use/bin/env rusti" 5 POUND@[21; 22) "#"
6 EXCL@[22; 23) "!"
7 ERROR@[23; 24)
8 SLASH@[23; 24) "/"
9 USE_ITEM@[24; 28)
10 USE_KW@[24; 27) "use"
11 ERROR@[27; 28)
12 SLASH@[27; 28) "/"
13 MACRO_CALL@[28; 31)
14 PATH@[28; 31)
15 PATH_SEGMENT@[28; 31)
16 NAME_REF@[28; 31)
17 IDENT@[28; 31) "bin"
18 ERROR@[31; 32)
19 SLASH@[31; 32) "/"
20 MACRO_CALL@[32; 41)
21 PATH@[32; 35)
22 PATH_SEGMENT@[32; 35)
23 NAME_REF@[32; 35)
24 IDENT@[32; 35) "env"
25 WHITESPACE@[35; 36) " "
26 NAME@[36; 41)
27 IDENT@[36; 41) "rusti"
6 WHITESPACE@[41; 42) "\n" 28 WHITESPACE@[41; 42) "\n"
7error 21: expected an item 29error 23: expected `[`
30error 23: expected an item
31error 27: expected one of `*`, `::`, `{`, `self`, `super` or an indentifier
32error 28: expected SEMI
33error 31: expected EXCL
34error 31: expected `{`, `[`, `(`
35error 31: expected SEMI
36error 31: expected an item
37error 35: expected EXCL
38error 41: expected `{`, `[`, `(`
39error 41: expected SEMI
diff --git a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.rs
index 261aad1fb..261aad1fb 100644
--- a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.rs
+++ b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.rs
diff --git a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.txt
index b0acfa5d2..4f7e809c5 100644
--- a/crates/ra_syntax/tests/data/parser/err/0030_string_suffixes.txt
+++ b/crates/ra_syntax/tests/data/parser/ok/0030_string_suffixes.txt
@@ -11,7 +11,7 @@ SOURCE_FILE@[0; 112)
11 BLOCK@[10; 111) 11 BLOCK@[10; 111)
12 L_CURLY@[10; 11) "{" 12 L_CURLY@[10; 11) "{"
13 WHITESPACE@[11; 16) "\n " 13 WHITESPACE@[11; 16) "\n "
14 LET_STMT@[16; 27) 14 LET_STMT@[16; 31)
15 LET_KW@[16; 19) "let" 15 LET_KW@[16; 19) "let"
16 WHITESPACE@[19; 20) " " 16 WHITESPACE@[19; 20) " "
17 PLACEHOLDER_PAT@[20; 21) 17 PLACEHOLDER_PAT@[20; 21)
@@ -19,14 +19,8 @@ SOURCE_FILE@[0; 112)
19 WHITESPACE@[21; 22) " " 19 WHITESPACE@[21; 22) " "
20 EQ@[22; 23) "=" 20 EQ@[22; 23) "="
21 WHITESPACE@[23; 24) " " 21 WHITESPACE@[23; 24) " "
22 LITERAL@[24; 27) 22 LITERAL@[24; 30)
23 CHAR@[24; 27) "\'c\'" 23 CHAR@[24; 30) "\'c\'u32"
24 EXPR_STMT@[27; 31)
25 PATH_EXPR@[27; 30)
26 PATH@[27; 30)
27 PATH_SEGMENT@[27; 30)
28 NAME_REF@[27; 30)
29 IDENT@[27; 30) "u32"
30 SEMI@[30; 31) ";" 24 SEMI@[30; 31) ";"
31 WHITESPACE@[31; 36) "\n " 25 WHITESPACE@[31; 36) "\n "
32 LET_STMT@[36; 60) 26 LET_STMT@[36; 60)
@@ -67,4 +61,3 @@ SOURCE_FILE@[0; 112)
67 WHITESPACE@[109; 110) "\n" 61 WHITESPACE@[109; 110) "\n"
68 R_CURLY@[110; 111) "}" 62 R_CURLY@[110; 111) "}"
69 WHITESPACE@[111; 112) "\n" 63 WHITESPACE@[111; 112) "\n"
70error 27: expected SEMI