diff options
author | Aleksey Kladov <[email protected]> | 2020-11-06 17:54:01 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2020-11-06 17:54:01 +0000 |
commit | 735aaa7b39b4d3d789ad75c167bbf322a65ca257 (patch) | |
tree | 865daba83547042e2aed9cc412382fd646a5a158 /crates/syntax/src | |
parent | 6bcc33e5b7e32a79865be4893fcc33caf8d831d6 (diff) |
Move int parsing to IntNumber token
Diffstat (limited to 'crates/syntax/src')
-rw-r--r-- | crates/syntax/src/ast.rs | 2 | ||||
-rw-r--r-- | crates/syntax/src/ast/expr_ext.rs | 87 | ||||
-rw-r--r-- | crates/syntax/src/ast/token_ext.rs | 78 |
3 files changed, 91 insertions, 76 deletions
diff --git a/crates/syntax/src/ast.rs b/crates/syntax/src/ast.rs index a16ac6a7c..8a0e3d27b 100644 --- a/crates/syntax/src/ast.rs +++ b/crates/syntax/src/ast.rs | |||
@@ -16,7 +16,7 @@ use crate::{ | |||
16 | }; | 16 | }; |
17 | 17 | ||
18 | pub use self::{ | 18 | pub use self::{ |
19 | expr_ext::{ArrayExprKind, BinOp, Effect, ElseBranch, LiteralKind, PrefixOp, Radix, RangeOp}, | 19 | expr_ext::{ArrayExprKind, BinOp, Effect, ElseBranch, LiteralKind, PrefixOp, RangeOp}, |
20 | generated::{nodes::*, tokens::*}, | 20 | generated::{nodes::*, tokens::*}, |
21 | node_ext::{ | 21 | node_ext::{ |
22 | AttrKind, FieldKind, NameOrNameRef, PathSegmentKind, SelfParamKind, SlicePatComponents, | 22 | AttrKind, FieldKind, NameOrNameRef, PathSegmentKind, SelfParamKind, SlicePatComponents, |
diff --git a/crates/syntax/src/ast/expr_ext.rs b/crates/syntax/src/ast/expr_ext.rs index 3aff01e83..3d33cd1cf 100644 --- a/crates/syntax/src/ast/expr_ext.rs +++ b/crates/syntax/src/ast/expr_ext.rs | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | use crate::{ | 3 | use crate::{ |
4 | ast::{self, support, AstChildren, AstNode}, | 4 | ast::{self, support, AstChildren, AstNode}, |
5 | SmolStr, | 5 | AstToken, SmolStr, |
6 | SyntaxKind::*, | 6 | SyntaxKind::*, |
7 | SyntaxToken, T, | 7 | SyntaxToken, T, |
8 | }; | 8 | }; |
@@ -316,6 +316,10 @@ impl ast::Literal { | |||
316 | .unwrap() | 316 | .unwrap() |
317 | } | 317 | } |
318 | 318 | ||
319 | pub fn as_int_number(&self) -> Option<ast::IntNumber> { | ||
320 | ast::IntNumber::cast(self.token()) | ||
321 | } | ||
322 | |||
319 | fn find_suffix(text: &str, possible_suffixes: &[&str]) -> Option<SmolStr> { | 323 | fn find_suffix(text: &str, possible_suffixes: &[&str]) -> Option<SmolStr> { |
320 | possible_suffixes | 324 | possible_suffixes |
321 | .iter() | 325 | .iter() |
@@ -324,11 +328,6 @@ impl ast::Literal { | |||
324 | } | 328 | } |
325 | 329 | ||
326 | pub fn kind(&self) -> LiteralKind { | 330 | pub fn kind(&self) -> LiteralKind { |
327 | const INT_SUFFIXES: [&str; 12] = [ | ||
328 | "u64", "u32", "u16", "u8", "usize", "isize", "i64", "i32", "i16", "i8", "u128", "i128", | ||
329 | ]; | ||
330 | const FLOAT_SUFFIXES: [&str; 2] = ["f32", "f64"]; | ||
331 | |||
332 | let token = self.token(); | 331 | let token = self.token(); |
333 | 332 | ||
334 | match token.kind() { | 333 | match token.kind() { |
@@ -337,17 +336,20 @@ impl ast::Literal { | |||
337 | // The lexer treats e.g. `1f64` as an integer literal. See | 336 | // The lexer treats e.g. `1f64` as an integer literal. See |
338 | // https://github.com/rust-analyzer/rust-analyzer/issues/1592 | 337 | // https://github.com/rust-analyzer/rust-analyzer/issues/1592 |
339 | // and the comments on the linked PR. | 338 | // and the comments on the linked PR. |
340 | |||
341 | let text = token.text(); | 339 | let text = token.text(); |
342 | if let suffix @ Some(_) = Self::find_suffix(&text, &FLOAT_SUFFIXES) { | 340 | if let suffix @ Some(_) = Self::find_suffix(&text, &ast::FloatNumber::SUFFIXES) { |
343 | LiteralKind::FloatNumber { suffix } | 341 | LiteralKind::FloatNumber { suffix } |
344 | } else { | 342 | } else { |
345 | LiteralKind::IntNumber { suffix: Self::find_suffix(&text, &INT_SUFFIXES) } | 343 | LiteralKind::IntNumber { |
344 | suffix: Self::find_suffix(&text, &ast::IntNumber::SUFFIXES), | ||
345 | } | ||
346 | } | 346 | } |
347 | } | 347 | } |
348 | FLOAT_NUMBER => { | 348 | FLOAT_NUMBER => { |
349 | let text = token.text(); | 349 | let text = token.text(); |
350 | LiteralKind::FloatNumber { suffix: Self::find_suffix(&text, &FLOAT_SUFFIXES) } | 350 | LiteralKind::FloatNumber { |
351 | suffix: Self::find_suffix(&text, &ast::FloatNumber::SUFFIXES), | ||
352 | } | ||
351 | } | 353 | } |
352 | STRING | RAW_STRING => LiteralKind::String, | 354 | STRING | RAW_STRING => LiteralKind::String, |
353 | T![true] => LiteralKind::Bool(true), | 355 | T![true] => LiteralKind::Bool(true), |
@@ -358,71 +360,6 @@ impl ast::Literal { | |||
358 | _ => unreachable!(), | 360 | _ => unreachable!(), |
359 | } | 361 | } |
360 | } | 362 | } |
361 | |||
362 | // FIXME: should probably introduce string token type? | ||
363 | // https://github.com/rust-analyzer/rust-analyzer/issues/6308 | ||
364 | pub fn int_value(&self) -> Option<(Radix, u128)> { | ||
365 | let suffix = match self.kind() { | ||
366 | LiteralKind::IntNumber { suffix } => suffix, | ||
367 | _ => return None, | ||
368 | }; | ||
369 | |||
370 | let token = self.token(); | ||
371 | let mut text = token.text().as_str(); | ||
372 | text = &text[..text.len() - suffix.map_or(0, |it| it.len())]; | ||
373 | |||
374 | let buf; | ||
375 | if text.contains("_") { | ||
376 | buf = text.replace('_', ""); | ||
377 | text = buf.as_str(); | ||
378 | }; | ||
379 | |||
380 | let radix = Radix::identify(text)?; | ||
381 | let digits = &text[radix.prefix_len()..]; | ||
382 | let value = u128::from_str_radix(digits, radix as u32).ok()?; | ||
383 | Some((radix, value)) | ||
384 | } | ||
385 | } | ||
386 | |||
387 | #[derive(Debug, PartialEq, Eq, Copy, Clone)] | ||
388 | pub enum Radix { | ||
389 | Binary = 2, | ||
390 | Octal = 8, | ||
391 | Decimal = 10, | ||
392 | Hexadecimal = 16, | ||
393 | } | ||
394 | |||
395 | impl Radix { | ||
396 | pub const ALL: &'static [Radix] = | ||
397 | &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal]; | ||
398 | |||
399 | fn identify(literal_text: &str) -> Option<Self> { | ||
400 | // We cannot express a literal in anything other than decimal in under 3 characters, so we return here if possible. | ||
401 | if literal_text.len() < 3 && literal_text.chars().all(|c| c.is_digit(10)) { | ||
402 | return Some(Self::Decimal); | ||
403 | } | ||
404 | |||
405 | let res = match &literal_text[..2] { | ||
406 | "0b" => Radix::Binary, | ||
407 | "0o" => Radix::Octal, | ||
408 | "0x" => Radix::Hexadecimal, | ||
409 | _ => Radix::Decimal, | ||
410 | }; | ||
411 | |||
412 | // Checks that all characters after the base prefix are all valid digits for that base. | ||
413 | if literal_text[res.prefix_len()..].chars().all(|c| c.is_digit(res as u32)) { | ||
414 | Some(res) | ||
415 | } else { | ||
416 | None | ||
417 | } | ||
418 | } | ||
419 | |||
420 | const fn prefix_len(&self) -> usize { | ||
421 | match self { | ||
422 | Self::Decimal => 0, | ||
423 | _ => 2, | ||
424 | } | ||
425 | } | ||
426 | } | 363 | } |
427 | 364 | ||
428 | #[derive(Debug, Clone, PartialEq, Eq)] | 365 | #[derive(Debug, Clone, PartialEq, Eq)] |
diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index 44f967acb..5623799b4 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs | |||
@@ -536,3 +536,81 @@ impl HasFormatSpecifier for ast::RawString { | |||
536 | Some(res) | 536 | Some(res) |
537 | } | 537 | } |
538 | } | 538 | } |
539 | |||
540 | impl ast::IntNumber { | ||
541 | #[rustfmt::skip] | ||
542 | pub(crate) const SUFFIXES: &'static [&'static str] = &[ | ||
543 | "u8", "u16", "u32", "u64", "u128", "usize", | ||
544 | "i8", "i16", "i32", "i64", "i128", "isize", | ||
545 | ]; | ||
546 | |||
547 | // FIXME: should probably introduce string token type? | ||
548 | // https://github.com/rust-analyzer/rust-analyzer/issues/6308 | ||
549 | pub fn value(&self) -> Option<(Radix, u128)> { | ||
550 | let token = self.syntax(); | ||
551 | |||
552 | let mut text = token.text().as_str(); | ||
553 | for suffix in ast::IntNumber::SUFFIXES { | ||
554 | if let Some(without_suffix) = text.strip_suffix(suffix) { | ||
555 | text = without_suffix; | ||
556 | break; | ||
557 | } | ||
558 | } | ||
559 | |||
560 | let buf; | ||
561 | if text.contains("_") { | ||
562 | buf = text.replace('_', ""); | ||
563 | text = buf.as_str(); | ||
564 | }; | ||
565 | |||
566 | let radix = Radix::identify(text)?; | ||
567 | let digits = &text[radix.prefix_len()..]; | ||
568 | let value = u128::from_str_radix(digits, radix as u32).ok()?; | ||
569 | Some((radix, value)) | ||
570 | } | ||
571 | } | ||
572 | |||
573 | impl ast::FloatNumber { | ||
574 | pub(crate) const SUFFIXES: &'static [&'static str] = &["f32", "f64"]; | ||
575 | } | ||
576 | |||
577 | #[derive(Debug, PartialEq, Eq, Copy, Clone)] | ||
578 | pub enum Radix { | ||
579 | Binary = 2, | ||
580 | Octal = 8, | ||
581 | Decimal = 10, | ||
582 | Hexadecimal = 16, | ||
583 | } | ||
584 | |||
585 | impl Radix { | ||
586 | pub const ALL: &'static [Radix] = | ||
587 | &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal]; | ||
588 | |||
589 | fn identify(literal_text: &str) -> Option<Self> { | ||
590 | // We cannot express a literal in anything other than decimal in under 3 characters, so we return here if possible. | ||
591 | if literal_text.len() < 3 && literal_text.chars().all(|c| c.is_digit(10)) { | ||
592 | return Some(Self::Decimal); | ||
593 | } | ||
594 | |||
595 | let res = match &literal_text[..2] { | ||
596 | "0b" => Radix::Binary, | ||
597 | "0o" => Radix::Octal, | ||
598 | "0x" => Radix::Hexadecimal, | ||
599 | _ => Radix::Decimal, | ||
600 | }; | ||
601 | |||
602 | // Checks that all characters after the base prefix are all valid digits for that base. | ||
603 | if literal_text[res.prefix_len()..].chars().all(|c| c.is_digit(res as u32)) { | ||
604 | Some(res) | ||
605 | } else { | ||
606 | None | ||
607 | } | ||
608 | } | ||
609 | |||
610 | const fn prefix_len(&self) -> usize { | ||
611 | match self { | ||
612 | Self::Decimal => 0, | ||
613 | _ => 2, | ||
614 | } | ||
615 | } | ||
616 | } | ||