From 3820b26a9325b26acd614bcad57d410e7286226e Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 6 Nov 2020 18:19:24 +0100 Subject: Generate token for ints and floats --- crates/syntax/src/ast/generated/tokens.rs | 42 +++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'crates/syntax') diff --git a/crates/syntax/src/ast/generated/tokens.rs b/crates/syntax/src/ast/generated/tokens.rs index abadd0b61..1b8449221 100644 --- a/crates/syntax/src/ast/generated/tokens.rs +++ b/crates/syntax/src/ast/generated/tokens.rs @@ -89,3 +89,45 @@ impl AstToken for RawString { } fn syntax(&self) -> &SyntaxToken { &self.syntax } } + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct IntNumber { + pub(crate) syntax: SyntaxToken, +} +impl std::fmt::Display for IntNumber { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.syntax, f) + } +} +impl AstToken for IntNumber { + fn can_cast(kind: SyntaxKind) -> bool { kind == INT_NUMBER } + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxToken { &self.syntax } +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FloatNumber { + pub(crate) syntax: SyntaxToken, +} +impl std::fmt::Display for FloatNumber { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.syntax, f) + } +} +impl AstToken for FloatNumber { + fn can_cast(kind: SyntaxKind) -> bool { kind == FLOAT_NUMBER } + fn cast(syntax: SyntaxToken) -> Option { + if Self::can_cast(syntax.kind()) { + Some(Self { syntax }) + } else { + None + } + } + fn syntax(&self) -> &SyntaxToken { &self.syntax } +} -- cgit v1.2.3 From 6bcc33e5b7e32a79865be4893fcc33caf8d831d6 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 6 Nov 2020 18:39:09 +0100 Subject: Better imports --- crates/syntax/src/ast/token_ext.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'crates/syntax') diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index c5ef92733..44f967acb 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -8,11 +8,11 @@ use std::{ use rustc_lexer::unescape::{unescape_literal, Mode}; use crate::{ - ast::{AstToken, Comment, RawString, String, Whitespace}, + ast::{self, AstToken}, TextRange, TextSize, }; -impl Comment { +impl ast::Comment { pub fn kind(&self) -> CommentKind { kind_by_prefix(self.text()) } @@ -80,7 +80,7 @@ fn kind_by_prefix(text: &str) -> CommentKind { panic!("bad comment text: {:?}", text) } -impl Whitespace { +impl ast::Whitespace { pub fn spans_multiple_lines(&self) -> bool { let text = self.text(); text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n')) @@ -138,19 +138,19 @@ pub trait HasQuotes: AstToken { } } -impl HasQuotes for String {} -impl HasQuotes for RawString {} +impl HasQuotes for ast::String {} +impl HasQuotes for ast::RawString {} pub trait HasStringValue: HasQuotes { fn value(&self) -> Option>; } -impl HasStringValue for String { +impl HasStringValue for ast::String { fn value(&self) -> Option> { let text = self.text().as_str(); let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; - let mut buf = std::string::String::with_capacity(text.len()); + let mut buf = String::with_capacity(text.len()); let mut has_error = false; unescape_literal(text, Mode::Str, &mut |_, unescaped_char| match unescaped_char { Ok(c) => buf.push(c), @@ -166,7 +166,7 @@ impl HasStringValue for String { } } -impl HasStringValue for RawString { +impl HasStringValue for ast::RawString { fn value(&self) -> Option> { let text = self.text().as_str(); let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; @@ -174,7 +174,7 @@ impl HasStringValue for RawString { } } -impl RawString { +impl ast::RawString { pub fn map_range_up(&self, range: TextRange) -> Option { let contents_range = self.text_range_between_quotes()?; assert!(TextRange::up_to(contents_range.len()).contains_range(range)); @@ -500,7 +500,7 @@ pub trait HasFormatSpecifier: AstToken { } } -impl HasFormatSpecifier for String { +impl HasFormatSpecifier for ast::String { fn char_ranges( &self, ) -> Option)>> { @@ -521,7 +521,7 @@ impl HasFormatSpecifier for String { } } -impl HasFormatSpecifier for RawString { +impl HasFormatSpecifier for ast::RawString { fn char_ranges( &self, ) -> Option)>> { -- cgit v1.2.3 From 735aaa7b39b4d3d789ad75c167bbf322a65ca257 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 6 Nov 2020 18:54:01 +0100 Subject: Move int parsing to IntNumber token --- crates/syntax/src/ast.rs | 2 +- crates/syntax/src/ast/expr_ext.rs | 87 ++++++-------------------------------- crates/syntax/src/ast/token_ext.rs | 78 ++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 76 deletions(-) (limited to 'crates/syntax') diff --git a/crates/syntax/src/ast.rs b/crates/syntax/src/ast.rs index a16ac6a7c..8a0e3d27b 100644 --- a/crates/syntax/src/ast.rs +++ b/crates/syntax/src/ast.rs @@ -16,7 +16,7 @@ use crate::{ }; pub use self::{ - expr_ext::{ArrayExprKind, BinOp, Effect, ElseBranch, LiteralKind, PrefixOp, Radix, RangeOp}, + expr_ext::{ArrayExprKind, BinOp, Effect, ElseBranch, LiteralKind, PrefixOp, RangeOp}, generated::{nodes::*, tokens::*}, node_ext::{ AttrKind, FieldKind, NameOrNameRef, PathSegmentKind, SelfParamKind, SlicePatComponents, diff --git a/crates/syntax/src/ast/expr_ext.rs b/crates/syntax/src/ast/expr_ext.rs index 3aff01e83..3d33cd1cf 100644 --- a/crates/syntax/src/ast/expr_ext.rs +++ b/crates/syntax/src/ast/expr_ext.rs @@ -2,7 +2,7 @@ use crate::{ ast::{self, support, AstChildren, AstNode}, - SmolStr, + AstToken, SmolStr, SyntaxKind::*, SyntaxToken, T, }; @@ -316,6 +316,10 @@ impl ast::Literal { .unwrap() } + pub fn as_int_number(&self) -> Option { + ast::IntNumber::cast(self.token()) + } + fn find_suffix(text: &str, possible_suffixes: &[&str]) -> Option { possible_suffixes .iter() @@ -324,11 +328,6 @@ impl ast::Literal { } pub fn kind(&self) -> LiteralKind { - const INT_SUFFIXES: [&str; 12] = [ - "u64", "u32", "u16", "u8", "usize", "isize", "i64", "i32", "i16", "i8", "u128", "i128", - ]; - const FLOAT_SUFFIXES: [&str; 2] = ["f32", "f64"]; - let token = self.token(); match token.kind() { @@ -337,17 +336,20 @@ impl ast::Literal { // The lexer treats e.g. `1f64` as an integer literal. See // https://github.com/rust-analyzer/rust-analyzer/issues/1592 // and the comments on the linked PR. - let text = token.text(); - if let suffix @ Some(_) = Self::find_suffix(&text, &FLOAT_SUFFIXES) { + if let suffix @ Some(_) = Self::find_suffix(&text, &ast::FloatNumber::SUFFIXES) { LiteralKind::FloatNumber { suffix } } else { - LiteralKind::IntNumber { suffix: Self::find_suffix(&text, &INT_SUFFIXES) } + LiteralKind::IntNumber { + suffix: Self::find_suffix(&text, &ast::IntNumber::SUFFIXES), + } } } FLOAT_NUMBER => { let text = token.text(); - LiteralKind::FloatNumber { suffix: Self::find_suffix(&text, &FLOAT_SUFFIXES) } + LiteralKind::FloatNumber { + suffix: Self::find_suffix(&text, &ast::FloatNumber::SUFFIXES), + } } STRING | RAW_STRING => LiteralKind::String, T![true] => LiteralKind::Bool(true), @@ -358,71 +360,6 @@ impl ast::Literal { _ => unreachable!(), } } - - // FIXME: should probably introduce string token type? - // https://github.com/rust-analyzer/rust-analyzer/issues/6308 - pub fn int_value(&self) -> Option<(Radix, u128)> { - let suffix = match self.kind() { - LiteralKind::IntNumber { suffix } => suffix, - _ => return None, - }; - - let token = self.token(); - let mut text = token.text().as_str(); - text = &text[..text.len() - suffix.map_or(0, |it| it.len())]; - - let buf; - if text.contains("_") { - buf = text.replace('_', ""); - text = buf.as_str(); - }; - - let radix = Radix::identify(text)?; - let digits = &text[radix.prefix_len()..]; - let value = u128::from_str_radix(digits, radix as u32).ok()?; - Some((radix, value)) - } -} - -#[derive(Debug, PartialEq, Eq, Copy, Clone)] -pub enum Radix { - Binary = 2, - Octal = 8, - Decimal = 10, - Hexadecimal = 16, -} - -impl Radix { - pub const ALL: &'static [Radix] = - &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal]; - - fn identify(literal_text: &str) -> Option { - // We cannot express a literal in anything other than decimal in under 3 characters, so we return here if possible. - if literal_text.len() < 3 && literal_text.chars().all(|c| c.is_digit(10)) { - return Some(Self::Decimal); - } - - let res = match &literal_text[..2] { - "0b" => Radix::Binary, - "0o" => Radix::Octal, - "0x" => Radix::Hexadecimal, - _ => Radix::Decimal, - }; - - // Checks that all characters after the base prefix are all valid digits for that base. - if literal_text[res.prefix_len()..].chars().all(|c| c.is_digit(res as u32)) { - Some(res) - } else { - None - } - } - - const fn prefix_len(&self) -> usize { - match self { - Self::Decimal => 0, - _ => 2, - } - } } #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index 44f967acb..5623799b4 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -536,3 +536,81 @@ impl HasFormatSpecifier for ast::RawString { Some(res) } } + +impl ast::IntNumber { + #[rustfmt::skip] + pub(crate) const SUFFIXES: &'static [&'static str] = &[ + "u8", "u16", "u32", "u64", "u128", "usize", + "i8", "i16", "i32", "i64", "i128", "isize", + ]; + + // FIXME: should probably introduce string token type? + // https://github.com/rust-analyzer/rust-analyzer/issues/6308 + pub fn value(&self) -> Option<(Radix, u128)> { + let token = self.syntax(); + + let mut text = token.text().as_str(); + for suffix in ast::IntNumber::SUFFIXES { + if let Some(without_suffix) = text.strip_suffix(suffix) { + text = without_suffix; + break; + } + } + + let buf; + if text.contains("_") { + buf = text.replace('_', ""); + text = buf.as_str(); + }; + + let radix = Radix::identify(text)?; + let digits = &text[radix.prefix_len()..]; + let value = u128::from_str_radix(digits, radix as u32).ok()?; + Some((radix, value)) + } +} + +impl ast::FloatNumber { + pub(crate) const SUFFIXES: &'static [&'static str] = &["f32", "f64"]; +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum Radix { + Binary = 2, + Octal = 8, + Decimal = 10, + Hexadecimal = 16, +} + +impl Radix { + pub const ALL: &'static [Radix] = + &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal]; + + fn identify(literal_text: &str) -> Option { + // We cannot express a literal in anything other than decimal in under 3 characters, so we return here if possible. + if literal_text.len() < 3 && literal_text.chars().all(|c| c.is_digit(10)) { + return Some(Self::Decimal); + } + + let res = match &literal_text[..2] { + "0b" => Radix::Binary, + "0o" => Radix::Octal, + "0x" => Radix::Hexadecimal, + _ => Radix::Decimal, + }; + + // Checks that all characters after the base prefix are all valid digits for that base. + if literal_text[res.prefix_len()..].chars().all(|c| c.is_digit(res as u32)) { + Some(res) + } else { + None + } + } + + const fn prefix_len(&self) -> usize { + match self { + Self::Decimal => 0, + _ => 2, + } + } +} -- cgit v1.2.3 From eb460333907a44c37bf7287b31c653877c3358c2 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Fri, 6 Nov 2020 19:01:25 +0100 Subject: More orthogonal API --- crates/syntax/src/ast/token_ext.rs | 63 ++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 33 deletions(-) (limited to 'crates/syntax') diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index 5623799b4..8d3fad5a6 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs @@ -166,6 +166,7 @@ impl HasStringValue for ast::String { } } +// FIXME: merge `ast::RawString` and `ast::String`. impl HasStringValue for ast::RawString { fn value(&self) -> Option> { let text = self.text().as_str(); @@ -544,29 +545,46 @@ impl ast::IntNumber { "i8", "i16", "i32", "i64", "i128", "isize", ]; - // FIXME: should probably introduce string token type? - // https://github.com/rust-analyzer/rust-analyzer/issues/6308 - pub fn value(&self) -> Option<(Radix, u128)> { + pub fn radix(&self) -> Radix { + match self.text().get(..2).unwrap_or_default() { + "0b" => Radix::Binary, + "0o" => Radix::Octal, + "0x" => Radix::Hexadecimal, + _ => Radix::Decimal, + } + } + + pub fn value(&self) -> Option { let token = self.syntax(); let mut text = token.text().as_str(); - for suffix in ast::IntNumber::SUFFIXES { - if let Some(without_suffix) = text.strip_suffix(suffix) { - text = without_suffix; - break; - } + if let Some(suffix) = self.suffix() { + text = &text[..text.len() - suffix.len()] } + let radix = self.radix(); + text = &text[radix.prefix_len()..]; + let buf; if text.contains("_") { buf = text.replace('_', ""); text = buf.as_str(); }; - let radix = Radix::identify(text)?; - let digits = &text[radix.prefix_len()..]; - let value = u128::from_str_radix(digits, radix as u32).ok()?; - Some((radix, value)) + let value = u128::from_str_radix(text, radix as u32).ok()?; + Some(value) + } + + pub fn suffix(&self) -> Option<&str> { + let text = self.text(); + // FIXME: don't check a fixed set of suffixes, `1_0_1___lol` is valid + // syntax, suffix is `lol`. + ast::IntNumber::SUFFIXES.iter().find_map(|suffix| { + if text.ends_with(suffix) { + return Some(&text[text.len() - suffix.len()..]); + } + None + }) } } @@ -586,27 +604,6 @@ impl Radix { pub const ALL: &'static [Radix] = &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal]; - fn identify(literal_text: &str) -> Option { - // We cannot express a literal in anything other than decimal in under 3 characters, so we return here if possible. - if literal_text.len() < 3 && literal_text.chars().all(|c| c.is_digit(10)) { - return Some(Self::Decimal); - } - - let res = match &literal_text[..2] { - "0b" => Radix::Binary, - "0o" => Radix::Octal, - "0x" => Radix::Hexadecimal, - _ => Radix::Decimal, - }; - - // Checks that all characters after the base prefix are all valid digits for that base. - if literal_text[res.prefix_len()..].chars().all(|c| c.is_digit(res as u32)) { - Some(res) - } else { - None - } - } - const fn prefix_len(&self) -> usize { match self { Self::Decimal => 0, -- cgit v1.2.3