aboutsummaryrefslogtreecommitdiff
path: root/crates/syntax/src
diff options
context:
space:
mode:
Diffstat (limited to 'crates/syntax/src')
-rw-r--r--crates/syntax/src/ast.rs2
-rw-r--r--crates/syntax/src/ast/expr_ext.rs87
-rw-r--r--crates/syntax/src/ast/generated/tokens.rs42
-rw-r--r--crates/syntax/src/ast/token_ext.rs97
-rw-r--r--crates/syntax/src/parsing/lexer.rs114
5 files changed, 205 insertions, 137 deletions
diff --git a/crates/syntax/src/ast.rs b/crates/syntax/src/ast.rs
index a16ac6a7c..8a0e3d27b 100644
--- a/crates/syntax/src/ast.rs
+++ b/crates/syntax/src/ast.rs
@@ -16,7 +16,7 @@ use crate::{
16}; 16};
17 17
18pub use self::{ 18pub use self::{
19 expr_ext::{ArrayExprKind, BinOp, Effect, ElseBranch, LiteralKind, PrefixOp, Radix, RangeOp}, 19 expr_ext::{ArrayExprKind, BinOp, Effect, ElseBranch, LiteralKind, PrefixOp, RangeOp},
20 generated::{nodes::*, tokens::*}, 20 generated::{nodes::*, tokens::*},
21 node_ext::{ 21 node_ext::{
22 AttrKind, FieldKind, NameOrNameRef, PathSegmentKind, SelfParamKind, SlicePatComponents, 22 AttrKind, FieldKind, NameOrNameRef, PathSegmentKind, SelfParamKind, SlicePatComponents,
diff --git a/crates/syntax/src/ast/expr_ext.rs b/crates/syntax/src/ast/expr_ext.rs
index 3aff01e83..3d33cd1cf 100644
--- a/crates/syntax/src/ast/expr_ext.rs
+++ b/crates/syntax/src/ast/expr_ext.rs
@@ -2,7 +2,7 @@
2 2
3use crate::{ 3use crate::{
4 ast::{self, support, AstChildren, AstNode}, 4 ast::{self, support, AstChildren, AstNode},
5 SmolStr, 5 AstToken, SmolStr,
6 SyntaxKind::*, 6 SyntaxKind::*,
7 SyntaxToken, T, 7 SyntaxToken, T,
8}; 8};
@@ -316,6 +316,10 @@ impl ast::Literal {
316 .unwrap() 316 .unwrap()
317 } 317 }
318 318
319 pub fn as_int_number(&self) -> Option<ast::IntNumber> {
320 ast::IntNumber::cast(self.token())
321 }
322
319 fn find_suffix(text: &str, possible_suffixes: &[&str]) -> Option<SmolStr> { 323 fn find_suffix(text: &str, possible_suffixes: &[&str]) -> Option<SmolStr> {
320 possible_suffixes 324 possible_suffixes
321 .iter() 325 .iter()
@@ -324,11 +328,6 @@ impl ast::Literal {
324 } 328 }
325 329
326 pub fn kind(&self) -> LiteralKind { 330 pub fn kind(&self) -> LiteralKind {
327 const INT_SUFFIXES: [&str; 12] = [
328 "u64", "u32", "u16", "u8", "usize", "isize", "i64", "i32", "i16", "i8", "u128", "i128",
329 ];
330 const FLOAT_SUFFIXES: [&str; 2] = ["f32", "f64"];
331
332 let token = self.token(); 331 let token = self.token();
333 332
334 match token.kind() { 333 match token.kind() {
@@ -337,17 +336,20 @@ impl ast::Literal {
337 // The lexer treats e.g. `1f64` as an integer literal. See 336 // The lexer treats e.g. `1f64` as an integer literal. See
338 // https://github.com/rust-analyzer/rust-analyzer/issues/1592 337 // https://github.com/rust-analyzer/rust-analyzer/issues/1592
339 // and the comments on the linked PR. 338 // and the comments on the linked PR.
340
341 let text = token.text(); 339 let text = token.text();
342 if let suffix @ Some(_) = Self::find_suffix(&text, &FLOAT_SUFFIXES) { 340 if let suffix @ Some(_) = Self::find_suffix(&text, &ast::FloatNumber::SUFFIXES) {
343 LiteralKind::FloatNumber { suffix } 341 LiteralKind::FloatNumber { suffix }
344 } else { 342 } else {
345 LiteralKind::IntNumber { suffix: Self::find_suffix(&text, &INT_SUFFIXES) } 343 LiteralKind::IntNumber {
344 suffix: Self::find_suffix(&text, &ast::IntNumber::SUFFIXES),
345 }
346 } 346 }
347 } 347 }
348 FLOAT_NUMBER => { 348 FLOAT_NUMBER => {
349 let text = token.text(); 349 let text = token.text();
350 LiteralKind::FloatNumber { suffix: Self::find_suffix(&text, &FLOAT_SUFFIXES) } 350 LiteralKind::FloatNumber {
351 suffix: Self::find_suffix(&text, &ast::FloatNumber::SUFFIXES),
352 }
351 } 353 }
352 STRING | RAW_STRING => LiteralKind::String, 354 STRING | RAW_STRING => LiteralKind::String,
353 T![true] => LiteralKind::Bool(true), 355 T![true] => LiteralKind::Bool(true),
@@ -358,71 +360,6 @@ impl ast::Literal {
358 _ => unreachable!(), 360 _ => unreachable!(),
359 } 361 }
360 } 362 }
361
362 // FIXME: should probably introduce string token type?
363 // https://github.com/rust-analyzer/rust-analyzer/issues/6308
364 pub fn int_value(&self) -> Option<(Radix, u128)> {
365 let suffix = match self.kind() {
366 LiteralKind::IntNumber { suffix } => suffix,
367 _ => return None,
368 };
369
370 let token = self.token();
371 let mut text = token.text().as_str();
372 text = &text[..text.len() - suffix.map_or(0, |it| it.len())];
373
374 let buf;
375 if text.contains("_") {
376 buf = text.replace('_', "");
377 text = buf.as_str();
378 };
379
380 let radix = Radix::identify(text)?;
381 let digits = &text[radix.prefix_len()..];
382 let value = u128::from_str_radix(digits, radix as u32).ok()?;
383 Some((radix, value))
384 }
385}
386
387#[derive(Debug, PartialEq, Eq, Copy, Clone)]
388pub enum Radix {
389 Binary = 2,
390 Octal = 8,
391 Decimal = 10,
392 Hexadecimal = 16,
393}
394
395impl Radix {
396 pub const ALL: &'static [Radix] =
397 &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
398
399 fn identify(literal_text: &str) -> Option<Self> {
400 // We cannot express a literal in anything other than decimal in under 3 characters, so we return here if possible.
401 if literal_text.len() < 3 && literal_text.chars().all(|c| c.is_digit(10)) {
402 return Some(Self::Decimal);
403 }
404
405 let res = match &literal_text[..2] {
406 "0b" => Radix::Binary,
407 "0o" => Radix::Octal,
408 "0x" => Radix::Hexadecimal,
409 _ => Radix::Decimal,
410 };
411
412 // Checks that all characters after the base prefix are all valid digits for that base.
413 if literal_text[res.prefix_len()..].chars().all(|c| c.is_digit(res as u32)) {
414 Some(res)
415 } else {
416 None
417 }
418 }
419
420 const fn prefix_len(&self) -> usize {
421 match self {
422 Self::Decimal => 0,
423 _ => 2,
424 }
425 }
426} 363}
427 364
428#[derive(Debug, Clone, PartialEq, Eq)] 365#[derive(Debug, Clone, PartialEq, Eq)]
diff --git a/crates/syntax/src/ast/generated/tokens.rs b/crates/syntax/src/ast/generated/tokens.rs
index abadd0b61..1b8449221 100644
--- a/crates/syntax/src/ast/generated/tokens.rs
+++ b/crates/syntax/src/ast/generated/tokens.rs
@@ -89,3 +89,45 @@ impl AstToken for RawString {
89 } 89 }
90 fn syntax(&self) -> &SyntaxToken { &self.syntax } 90 fn syntax(&self) -> &SyntaxToken { &self.syntax }
91} 91}
92
93#[derive(Debug, Clone, PartialEq, Eq, Hash)]
94pub struct IntNumber {
95 pub(crate) syntax: SyntaxToken,
96}
97impl std::fmt::Display for IntNumber {
98 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
99 std::fmt::Display::fmt(&self.syntax, f)
100 }
101}
102impl AstToken for IntNumber {
103 fn can_cast(kind: SyntaxKind) -> bool { kind == INT_NUMBER }
104 fn cast(syntax: SyntaxToken) -> Option<Self> {
105 if Self::can_cast(syntax.kind()) {
106 Some(Self { syntax })
107 } else {
108 None
109 }
110 }
111 fn syntax(&self) -> &SyntaxToken { &self.syntax }
112}
113
114#[derive(Debug, Clone, PartialEq, Eq, Hash)]
115pub struct FloatNumber {
116 pub(crate) syntax: SyntaxToken,
117}
118impl std::fmt::Display for FloatNumber {
119 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
120 std::fmt::Display::fmt(&self.syntax, f)
121 }
122}
123impl AstToken for FloatNumber {
124 fn can_cast(kind: SyntaxKind) -> bool { kind == FLOAT_NUMBER }
125 fn cast(syntax: SyntaxToken) -> Option<Self> {
126 if Self::can_cast(syntax.kind()) {
127 Some(Self { syntax })
128 } else {
129 None
130 }
131 }
132 fn syntax(&self) -> &SyntaxToken { &self.syntax }
133}
diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs
index c5ef92733..8d3fad5a6 100644
--- a/crates/syntax/src/ast/token_ext.rs
+++ b/crates/syntax/src/ast/token_ext.rs
@@ -8,11 +8,11 @@ use std::{
8use rustc_lexer::unescape::{unescape_literal, Mode}; 8use rustc_lexer::unescape::{unescape_literal, Mode};
9 9
10use crate::{ 10use crate::{
11 ast::{AstToken, Comment, RawString, String, Whitespace}, 11 ast::{self, AstToken},
12 TextRange, TextSize, 12 TextRange, TextSize,
13}; 13};
14 14
15impl Comment { 15impl ast::Comment {
16 pub fn kind(&self) -> CommentKind { 16 pub fn kind(&self) -> CommentKind {
17 kind_by_prefix(self.text()) 17 kind_by_prefix(self.text())
18 } 18 }
@@ -80,7 +80,7 @@ fn kind_by_prefix(text: &str) -> CommentKind {
80 panic!("bad comment text: {:?}", text) 80 panic!("bad comment text: {:?}", text)
81} 81}
82 82
83impl Whitespace { 83impl ast::Whitespace {
84 pub fn spans_multiple_lines(&self) -> bool { 84 pub fn spans_multiple_lines(&self) -> bool {
85 let text = self.text(); 85 let text = self.text();
86 text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n')) 86 text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
@@ -138,19 +138,19 @@ pub trait HasQuotes: AstToken {
138 } 138 }
139} 139}
140 140
141impl HasQuotes for String {} 141impl HasQuotes for ast::String {}
142impl HasQuotes for RawString {} 142impl HasQuotes for ast::RawString {}
143 143
144pub trait HasStringValue: HasQuotes { 144pub trait HasStringValue: HasQuotes {
145 fn value(&self) -> Option<Cow<'_, str>>; 145 fn value(&self) -> Option<Cow<'_, str>>;
146} 146}
147 147
148impl HasStringValue for String { 148impl HasStringValue for ast::String {
149 fn value(&self) -> Option<Cow<'_, str>> { 149 fn value(&self) -> Option<Cow<'_, str>> {
150 let text = self.text().as_str(); 150 let text = self.text().as_str();
151 let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; 151 let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
152 152
153 let mut buf = std::string::String::with_capacity(text.len()); 153 let mut buf = String::with_capacity(text.len());
154 let mut has_error = false; 154 let mut has_error = false;
155 unescape_literal(text, Mode::Str, &mut |_, unescaped_char| match unescaped_char { 155 unescape_literal(text, Mode::Str, &mut |_, unescaped_char| match unescaped_char {
156 Ok(c) => buf.push(c), 156 Ok(c) => buf.push(c),
@@ -166,7 +166,8 @@ impl HasStringValue for String {
166 } 166 }
167} 167}
168 168
169impl HasStringValue for RawString { 169// FIXME: merge `ast::RawString` and `ast::String`.
170impl HasStringValue for ast::RawString {
170 fn value(&self) -> Option<Cow<'_, str>> { 171 fn value(&self) -> Option<Cow<'_, str>> {
171 let text = self.text().as_str(); 172 let text = self.text().as_str();
172 let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; 173 let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
@@ -174,7 +175,7 @@ impl HasStringValue for RawString {
174 } 175 }
175} 176}
176 177
177impl RawString { 178impl ast::RawString {
178 pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> { 179 pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
179 let contents_range = self.text_range_between_quotes()?; 180 let contents_range = self.text_range_between_quotes()?;
180 assert!(TextRange::up_to(contents_range.len()).contains_range(range)); 181 assert!(TextRange::up_to(contents_range.len()).contains_range(range));
@@ -500,7 +501,7 @@ pub trait HasFormatSpecifier: AstToken {
500 } 501 }
501} 502}
502 503
503impl HasFormatSpecifier for String { 504impl HasFormatSpecifier for ast::String {
504 fn char_ranges( 505 fn char_ranges(
505 &self, 506 &self,
506 ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> { 507 ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
@@ -521,7 +522,7 @@ impl HasFormatSpecifier for String {
521 } 522 }
522} 523}
523 524
524impl HasFormatSpecifier for RawString { 525impl HasFormatSpecifier for ast::RawString {
525 fn char_ranges( 526 fn char_ranges(
526 &self, 527 &self,
527 ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> { 528 ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
@@ -536,3 +537,77 @@ impl HasFormatSpecifier for RawString {
536 Some(res) 537 Some(res)
537 } 538 }
538} 539}
540
541impl ast::IntNumber {
542 #[rustfmt::skip]
543 pub(crate) const SUFFIXES: &'static [&'static str] = &[
544 "u8", "u16", "u32", "u64", "u128", "usize",
545 "i8", "i16", "i32", "i64", "i128", "isize",
546 ];
547
548 pub fn radix(&self) -> Radix {
549 match self.text().get(..2).unwrap_or_default() {
550 "0b" => Radix::Binary,
551 "0o" => Radix::Octal,
552 "0x" => Radix::Hexadecimal,
553 _ => Radix::Decimal,
554 }
555 }
556
557 pub fn value(&self) -> Option<u128> {
558 let token = self.syntax();
559
560 let mut text = token.text().as_str();
561 if let Some(suffix) = self.suffix() {
562 text = &text[..text.len() - suffix.len()]
563 }
564
565 let radix = self.radix();
566 text = &text[radix.prefix_len()..];
567
568 let buf;
569 if text.contains("_") {
570 buf = text.replace('_', "");
571 text = buf.as_str();
572 };
573
574 let value = u128::from_str_radix(text, radix as u32).ok()?;
575 Some(value)
576 }
577
578 pub fn suffix(&self) -> Option<&str> {
579 let text = self.text();
580 // FIXME: don't check a fixed set of suffixes, `1_0_1___lol` is valid
581 // syntax, suffix is `lol`.
582 ast::IntNumber::SUFFIXES.iter().find_map(|suffix| {
583 if text.ends_with(suffix) {
584 return Some(&text[text.len() - suffix.len()..]);
585 }
586 None
587 })
588 }
589}
590
591impl ast::FloatNumber {
592 pub(crate) const SUFFIXES: &'static [&'static str] = &["f32", "f64"];
593}
594
595#[derive(Debug, PartialEq, Eq, Copy, Clone)]
596pub enum Radix {
597 Binary = 2,
598 Octal = 8,
599 Decimal = 10,
600 Hexadecimal = 16,
601}
602
603impl Radix {
604 pub const ALL: &'static [Radix] =
605 &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal];
606
607 const fn prefix_len(&self) -> usize {
608 match self {
609 Self::Decimal => 0,
610 _ => 2,
611 }
612 }
613}
diff --git a/crates/syntax/src/parsing/lexer.rs b/crates/syntax/src/parsing/lexer.rs
index 7e38c32cc..5674ecb84 100644
--- a/crates/syntax/src/parsing/lexer.rs
+++ b/crates/syntax/src/parsing/lexer.rs
@@ -3,7 +3,7 @@
3 3
4use std::convert::TryInto; 4use std::convert::TryInto;
5 5
6use rustc_lexer::{LiteralKind as LK, RawStrError}; 6use rustc_lexer::RawStrError;
7 7
8use crate::{ 8use crate::{
9 SyntaxError, 9 SyntaxError,
@@ -185,63 +185,77 @@ fn rustc_token_kind_to_syntax_kind(
185 return (syntax_kind, None); 185 return (syntax_kind, None);
186 186
187 fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) { 187 fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) {
188 #[rustfmt::skip] 188 let mut err = "";
189 let syntax_kind = match *kind { 189 let syntax_kind = match *kind {
190 LK::Int { empty_int: false, .. } => INT_NUMBER, 190 rustc_lexer::LiteralKind::Int { empty_int, base: _ } => {
191 LK::Int { empty_int: true, .. } => { 191 if empty_int {
192 return (INT_NUMBER, Some("Missing digits after the integer base prefix")) 192 err = "Missing digits after the integer base prefix";
193 }
194 INT_NUMBER
193 } 195 }
194 196 rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => {
195 LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, 197 if empty_exponent {
196 LK::Float { empty_exponent: true, .. } => { 198 err = "Missing digits after the exponent symbol";
197 return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol")) 199 }
200 FLOAT_NUMBER
198 } 201 }
199 202 rustc_lexer::LiteralKind::Char { terminated } => {
200 LK::Char { terminated: true } => CHAR, 203 if !terminated {
201 LK::Char { terminated: false } => { 204 err = "Missing trailing `'` symbol to terminate the character literal";
202 return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal")) 205 }
206 CHAR
203 } 207 }
204 208 rustc_lexer::LiteralKind::Byte { terminated } => {
205 LK::Byte { terminated: true } => BYTE, 209 if !terminated {
206 LK::Byte { terminated: false } => { 210 err = "Missing trailing `'` symbol to terminate the byte literal";
207 return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal")) 211 }
212 BYTE
208 } 213 }
209 214 rustc_lexer::LiteralKind::Str { terminated } => {
210 LK::Str { terminated: true } => STRING, 215 if !terminated {
211 LK::Str { terminated: false } => { 216 err = "Missing trailing `\"` symbol to terminate the string literal";
212 return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal")) 217 }
218 STRING
213 } 219 }
214 220 rustc_lexer::LiteralKind::ByteStr { terminated } => {
215 221 if !terminated {
216 LK::ByteStr { terminated: true } => BYTE_STRING, 222 err = "Missing trailing `\"` symbol to terminate the byte string literal";
217 LK::ByteStr { terminated: false } => { 223 }
218 return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal")) 224 BYTE_STRING
225 }
226 rustc_lexer::LiteralKind::RawStr { err: raw_str_err, .. } => {
227 if let Some(raw_str_err) = raw_str_err {
228 err = match raw_str_err {
229 RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw string literal",
230 RawStrError::NoTerminator { expected, found, .. } => if expected == found {
231 "Missing trailing `\"` to terminate the raw string literal"
232 } else {
233 "Missing trailing `\"` with `#` symbols to terminate the raw string literal"
234 },
235 RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols",
236 };
237 };
238 RAW_STRING
239 }
240 rustc_lexer::LiteralKind::RawByteStr { err: raw_str_err, .. } => {
241 if let Some(raw_str_err) = raw_str_err {
242 err = match raw_str_err {
243 RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw byte string literal",
244 RawStrError::NoTerminator { expected, found, .. } => if expected == found {
245 "Missing trailing `\"` to terminate the raw byte string literal"
246 } else {
247 "Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"
248 },
249 RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols",
250 };
251 };
252
253 RAW_BYTE_STRING
219 } 254 }
220
221 LK::RawStr { err, .. } => match err {
222 None => RAW_STRING,
223 Some(RawStrError::InvalidStarter { .. }) => return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal")),
224 Some(RawStrError::NoTerminator { expected, found, .. }) => if expected == found {
225 return (RAW_STRING, Some("Missing trailing `\"` to terminate the raw string literal"))
226 } else {
227 return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal"))
228
229 },
230 Some(RawStrError::TooManyDelimiters { .. }) => return (RAW_STRING, Some("Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols")),
231 },
232 LK::RawByteStr { err, .. } => match err {
233 None => RAW_BYTE_STRING,
234 Some(RawStrError::InvalidStarter { .. }) => return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal")),
235 Some(RawStrError::NoTerminator { expected, found, .. }) => if expected == found {
236 return (RAW_BYTE_STRING, Some("Missing trailing `\"` to terminate the raw byte string literal"))
237 } else {
238 return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal"))
239
240 },
241 Some(RawStrError::TooManyDelimiters { .. }) => return (RAW_BYTE_STRING, Some("Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols")),
242 },
243 }; 255 };
244 256
245 (syntax_kind, None) 257 let err = if err.is_empty() { None } else { Some(err) };
258
259 (syntax_kind, err)
246 } 260 }
247} 261}