diff options
Diffstat (limited to 'crates/syntax/src')
-rw-r--r-- | crates/syntax/src/ast.rs | 2 | ||||
-rw-r--r-- | crates/syntax/src/ast/expr_ext.rs | 120 | ||||
-rw-r--r-- | crates/syntax/src/ast/generated/tokens.rs | 50 | ||||
-rw-r--r-- | crates/syntax/src/ast/make.rs | 31 | ||||
-rw-r--r-- | crates/syntax/src/ast/node_ext.rs | 10 | ||||
-rw-r--r-- | crates/syntax/src/ast/token_ext.rs | 173 | ||||
-rw-r--r-- | crates/syntax/src/parsing/lexer.rs | 114 | ||||
-rw-r--r-- | crates/syntax/src/parsing/reparsing.rs | 2 | ||||
-rw-r--r-- | crates/syntax/src/validation.rs | 52 |
9 files changed, 308 insertions, 246 deletions
diff --git a/crates/syntax/src/ast.rs b/crates/syntax/src/ast.rs index a16ac6a7c..8a0e3d27b 100644 --- a/crates/syntax/src/ast.rs +++ b/crates/syntax/src/ast.rs | |||
@@ -16,7 +16,7 @@ use crate::{ | |||
16 | }; | 16 | }; |
17 | 17 | ||
18 | pub use self::{ | 18 | pub use self::{ |
19 | expr_ext::{ArrayExprKind, BinOp, Effect, ElseBranch, LiteralKind, PrefixOp, Radix, RangeOp}, | 19 | expr_ext::{ArrayExprKind, BinOp, Effect, ElseBranch, LiteralKind, PrefixOp, RangeOp}, |
20 | generated::{nodes::*, tokens::*}, | 20 | generated::{nodes::*, tokens::*}, |
21 | node_ext::{ | 21 | node_ext::{ |
22 | AttrKind, FieldKind, NameOrNameRef, PathSegmentKind, SelfParamKind, SlicePatComponents, | 22 | AttrKind, FieldKind, NameOrNameRef, PathSegmentKind, SelfParamKind, SlicePatComponents, |
diff --git a/crates/syntax/src/ast/expr_ext.rs b/crates/syntax/src/ast/expr_ext.rs index 3aff01e83..9253c97d0 100644 --- a/crates/syntax/src/ast/expr_ext.rs +++ b/crates/syntax/src/ast/expr_ext.rs | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | use crate::{ | 3 | use crate::{ |
4 | ast::{self, support, AstChildren, AstNode}, | 4 | ast::{self, support, AstChildren, AstNode}, |
5 | SmolStr, | 5 | AstToken, |
6 | SyntaxKind::*, | 6 | SyntaxKind::*, |
7 | SyntaxToken, T, | 7 | SyntaxToken, T, |
8 | }; | 8 | }; |
@@ -298,12 +298,12 @@ impl ast::ArrayExpr { | |||
298 | 298 | ||
299 | #[derive(Clone, Debug, PartialEq, Eq, Hash)] | 299 | #[derive(Clone, Debug, PartialEq, Eq, Hash)] |
300 | pub enum LiteralKind { | 300 | pub enum LiteralKind { |
301 | String, | 301 | String(ast::String), |
302 | ByteString, | 302 | ByteString(ast::ByteString), |
303 | IntNumber(ast::IntNumber), | ||
304 | FloatNumber(ast::FloatNumber), | ||
303 | Char, | 305 | Char, |
304 | Byte, | 306 | Byte, |
305 | IntNumber { suffix: Option<SmolStr> }, | ||
306 | FloatNumber { suffix: Option<SmolStr> }, | ||
307 | Bool(bool), | 307 | Bool(bool), |
308 | } | 308 | } |
309 | 309 | ||
@@ -315,114 +315,30 @@ impl ast::Literal { | |||
315 | .and_then(|e| e.into_token()) | 315 | .and_then(|e| e.into_token()) |
316 | .unwrap() | 316 | .unwrap() |
317 | } | 317 | } |
318 | |||
319 | fn find_suffix(text: &str, possible_suffixes: &[&str]) -> Option<SmolStr> { | ||
320 | possible_suffixes | ||
321 | .iter() | ||
322 | .find(|&suffix| text.ends_with(suffix)) | ||
323 | .map(|&suffix| SmolStr::new(suffix)) | ||
324 | } | ||
325 | |||
326 | pub fn kind(&self) -> LiteralKind { | 318 | pub fn kind(&self) -> LiteralKind { |
327 | const INT_SUFFIXES: [&str; 12] = [ | ||
328 | "u64", "u32", "u16", "u8", "usize", "isize", "i64", "i32", "i16", "i8", "u128", "i128", | ||
329 | ]; | ||
330 | const FLOAT_SUFFIXES: [&str; 2] = ["f32", "f64"]; | ||
331 | |||
332 | let token = self.token(); | 319 | let token = self.token(); |
333 | 320 | ||
321 | if let Some(t) = ast::IntNumber::cast(token.clone()) { | ||
322 | return LiteralKind::IntNumber(t); | ||
323 | } | ||
324 | if let Some(t) = ast::FloatNumber::cast(token.clone()) { | ||
325 | return LiteralKind::FloatNumber(t); | ||
326 | } | ||
327 | if let Some(t) = ast::String::cast(token.clone()) { | ||
328 | return LiteralKind::String(t); | ||
329 | } | ||
330 | if let Some(t) = ast::ByteString::cast(token.clone()) { | ||
331 | return LiteralKind::ByteString(t); | ||
332 | } | ||
333 | |||
334 | match token.kind() { | 334 | match token.kind() { |
335 | INT_NUMBER => { | ||
336 | // FYI: there was a bug here previously, thus the if statement below is necessary. | ||
337 | // The lexer treats e.g. `1f64` as an integer literal. See | ||
338 | // https://github.com/rust-analyzer/rust-analyzer/issues/1592 | ||
339 | // and the comments on the linked PR. | ||
340 | |||
341 | let text = token.text(); | ||
342 | if let suffix @ Some(_) = Self::find_suffix(&text, &FLOAT_SUFFIXES) { | ||
343 | LiteralKind::FloatNumber { suffix } | ||
344 | } else { | ||
345 | LiteralKind::IntNumber { suffix: Self::find_suffix(&text, &INT_SUFFIXES) } | ||
346 | } | ||
347 | } | ||
348 | FLOAT_NUMBER => { | ||
349 | let text = token.text(); | ||
350 | LiteralKind::FloatNumber { suffix: Self::find_suffix(&text, &FLOAT_SUFFIXES) } | ||
351 | } | ||
352 | STRING | RAW_STRING => LiteralKind::String, | ||
353 | T![true] => LiteralKind::Bool(true), | 335 | T![true] => LiteralKind::Bool(true), |
354 | T![false] => LiteralKind::Bool(false), | 336 | T![false] => LiteralKind::Bool(false), |
355 | BYTE_STRING | RAW_BYTE_STRING => LiteralKind::ByteString, | ||
356 | CHAR => LiteralKind::Char, | 337 | CHAR => LiteralKind::Char, |
357 | BYTE => LiteralKind::Byte, | 338 | BYTE => LiteralKind::Byte, |
358 | _ => unreachable!(), | 339 | _ => unreachable!(), |
359 | } | 340 | } |
360 | } | 341 | } |
361 | |||
362 | // FIXME: should probably introduce string token type? | ||
363 | // https://github.com/rust-analyzer/rust-analyzer/issues/6308 | ||
364 | pub fn int_value(&self) -> Option<(Radix, u128)> { | ||
365 | let suffix = match self.kind() { | ||
366 | LiteralKind::IntNumber { suffix } => suffix, | ||
367 | _ => return None, | ||
368 | }; | ||
369 | |||
370 | let token = self.token(); | ||
371 | let mut text = token.text().as_str(); | ||
372 | text = &text[..text.len() - suffix.map_or(0, |it| it.len())]; | ||
373 | |||
374 | let buf; | ||
375 | if text.contains("_") { | ||
376 | buf = text.replace('_', ""); | ||
377 | text = buf.as_str(); | ||
378 | }; | ||
379 | |||
380 | let radix = Radix::identify(text)?; | ||
381 | let digits = &text[radix.prefix_len()..]; | ||
382 | let value = u128::from_str_radix(digits, radix as u32).ok()?; | ||
383 | Some((radix, value)) | ||
384 | } | ||
385 | } | ||
386 | |||
387 | #[derive(Debug, PartialEq, Eq, Copy, Clone)] | ||
388 | pub enum Radix { | ||
389 | Binary = 2, | ||
390 | Octal = 8, | ||
391 | Decimal = 10, | ||
392 | Hexadecimal = 16, | ||
393 | } | ||
394 | |||
395 | impl Radix { | ||
396 | pub const ALL: &'static [Radix] = | ||
397 | &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal]; | ||
398 | |||
399 | fn identify(literal_text: &str) -> Option<Self> { | ||
400 | // We cannot express a literal in anything other than decimal in under 3 characters, so we return here if possible. | ||
401 | if literal_text.len() < 3 && literal_text.chars().all(|c| c.is_digit(10)) { | ||
402 | return Some(Self::Decimal); | ||
403 | } | ||
404 | |||
405 | let res = match &literal_text[..2] { | ||
406 | "0b" => Radix::Binary, | ||
407 | "0o" => Radix::Octal, | ||
408 | "0x" => Radix::Hexadecimal, | ||
409 | _ => Radix::Decimal, | ||
410 | }; | ||
411 | |||
412 | // Checks that all characters after the base prefix are all valid digits for that base. | ||
413 | if literal_text[res.prefix_len()..].chars().all(|c| c.is_digit(res as u32)) { | ||
414 | Some(res) | ||
415 | } else { | ||
416 | None | ||
417 | } | ||
418 | } | ||
419 | |||
420 | const fn prefix_len(&self) -> usize { | ||
421 | match self { | ||
422 | Self::Decimal => 0, | ||
423 | _ => 2, | ||
424 | } | ||
425 | } | ||
426 | } | 342 | } |
427 | 343 | ||
428 | #[derive(Debug, Clone, PartialEq, Eq)] | 344 | #[derive(Debug, Clone, PartialEq, Eq)] |
diff --git a/crates/syntax/src/ast/generated/tokens.rs b/crates/syntax/src/ast/generated/tokens.rs index abadd0b61..728b72cd7 100644 --- a/crates/syntax/src/ast/generated/tokens.rs +++ b/crates/syntax/src/ast/generated/tokens.rs | |||
@@ -70,16 +70,58 @@ impl AstToken for String { | |||
70 | } | 70 | } |
71 | 71 | ||
72 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | 72 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] |
73 | pub struct RawString { | 73 | pub struct ByteString { |
74 | pub(crate) syntax: SyntaxToken, | 74 | pub(crate) syntax: SyntaxToken, |
75 | } | 75 | } |
76 | impl std::fmt::Display for RawString { | 76 | impl std::fmt::Display for ByteString { |
77 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | 77 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { |
78 | std::fmt::Display::fmt(&self.syntax, f) | 78 | std::fmt::Display::fmt(&self.syntax, f) |
79 | } | 79 | } |
80 | } | 80 | } |
81 | impl AstToken for RawString { | 81 | impl AstToken for ByteString { |
82 | fn can_cast(kind: SyntaxKind) -> bool { kind == RAW_STRING } | 82 | fn can_cast(kind: SyntaxKind) -> bool { kind == BYTE_STRING } |
83 | fn cast(syntax: SyntaxToken) -> Option<Self> { | ||
84 | if Self::can_cast(syntax.kind()) { | ||
85 | Some(Self { syntax }) | ||
86 | } else { | ||
87 | None | ||
88 | } | ||
89 | } | ||
90 | fn syntax(&self) -> &SyntaxToken { &self.syntax } | ||
91 | } | ||
92 | |||
93 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
94 | pub struct IntNumber { | ||
95 | pub(crate) syntax: SyntaxToken, | ||
96 | } | ||
97 | impl std::fmt::Display for IntNumber { | ||
98 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
99 | std::fmt::Display::fmt(&self.syntax, f) | ||
100 | } | ||
101 | } | ||
102 | impl AstToken for IntNumber { | ||
103 | fn can_cast(kind: SyntaxKind) -> bool { kind == INT_NUMBER } | ||
104 | fn cast(syntax: SyntaxToken) -> Option<Self> { | ||
105 | if Self::can_cast(syntax.kind()) { | ||
106 | Some(Self { syntax }) | ||
107 | } else { | ||
108 | None | ||
109 | } | ||
110 | } | ||
111 | fn syntax(&self) -> &SyntaxToken { &self.syntax } | ||
112 | } | ||
113 | |||
114 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
115 | pub struct FloatNumber { | ||
116 | pub(crate) syntax: SyntaxToken, | ||
117 | } | ||
118 | impl std::fmt::Display for FloatNumber { | ||
119 | fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
120 | std::fmt::Display::fmt(&self.syntax, f) | ||
121 | } | ||
122 | } | ||
123 | impl AstToken for FloatNumber { | ||
124 | fn can_cast(kind: SyntaxKind) -> bool { kind == FLOAT_NUMBER } | ||
83 | fn cast(syntax: SyntaxToken) -> Option<Self> { | 125 | fn cast(syntax: SyntaxToken) -> Option<Self> { |
84 | if Self::can_cast(syntax.kind()) { | 126 | if Self::can_cast(syntax.kind()) { |
85 | Some(Self { syntax }) | 127 | Some(Self { syntax }) |
diff --git a/crates/syntax/src/ast/make.rs b/crates/syntax/src/ast/make.rs index 2cf436e7a..876659a2b 100644 --- a/crates/syntax/src/ast/make.rs +++ b/crates/syntax/src/ast/make.rs | |||
@@ -25,6 +25,10 @@ pub fn assoc_item_list() -> ast::AssocItemList { | |||
25 | ast_from_text("impl C for D {};") | 25 | ast_from_text("impl C for D {};") |
26 | } | 26 | } |
27 | 27 | ||
28 | pub fn impl_trait(trait_: ast::Path, ty: ast::Path) -> ast::Impl { | ||
29 | ast_from_text(&format!("impl {} for {} {{}}", trait_, ty)) | ||
30 | } | ||
31 | |||
28 | pub fn path_segment(name_ref: ast::NameRef) -> ast::PathSegment { | 32 | pub fn path_segment(name_ref: ast::NameRef) -> ast::PathSegment { |
29 | ast_from_text(&format!("use {};", name_ref)) | 33 | ast_from_text(&format!("use {};", name_ref)) |
30 | } | 34 | } |
@@ -110,8 +114,16 @@ pub fn record_expr_field(name: ast::NameRef, expr: Option<ast::Expr>) -> ast::Re | |||
110 | } | 114 | } |
111 | } | 115 | } |
112 | 116 | ||
113 | pub fn record_field(name: ast::NameRef, ty: ast::Type) -> ast::RecordField { | 117 | pub fn record_field( |
114 | ast_from_text(&format!("struct S {{ {}: {}, }}", name, ty)) | 118 | visibility: Option<ast::Visibility>, |
119 | name: ast::Name, | ||
120 | ty: ast::Type, | ||
121 | ) -> ast::RecordField { | ||
122 | let visibility = match visibility { | ||
123 | None => String::new(), | ||
124 | Some(it) => format!("{} ", it), | ||
125 | }; | ||
126 | ast_from_text(&format!("struct S {{ {}{}: {}, }}", visibility, name, ty)) | ||
115 | } | 127 | } |
116 | 128 | ||
117 | pub fn block_expr( | 129 | pub fn block_expr( |
@@ -360,6 +372,13 @@ pub fn tuple_field_list(fields: impl IntoIterator<Item = ast::TupleField>) -> as | |||
360 | ast_from_text(&format!("struct f({});", fields)) | 372 | ast_from_text(&format!("struct f({});", fields)) |
361 | } | 373 | } |
362 | 374 | ||
375 | pub fn record_field_list( | ||
376 | fields: impl IntoIterator<Item = ast::RecordField>, | ||
377 | ) -> ast::RecordFieldList { | ||
378 | let fields = fields.into_iter().join(", "); | ||
379 | ast_from_text(&format!("struct f {{ {} }}", fields)) | ||
380 | } | ||
381 | |||
363 | pub fn tuple_field(visibility: Option<ast::Visibility>, ty: ast::Type) -> ast::TupleField { | 382 | pub fn tuple_field(visibility: Option<ast::Visibility>, ty: ast::Type) -> ast::TupleField { |
364 | let visibility = match visibility { | 383 | let visibility = match visibility { |
365 | None => String::new(), | 384 | None => String::new(), |
@@ -368,6 +387,14 @@ pub fn tuple_field(visibility: Option<ast::Visibility>, ty: ast::Type) -> ast::T | |||
368 | ast_from_text(&format!("struct f({}{});", visibility, ty)) | 387 | ast_from_text(&format!("struct f({}{});", visibility, ty)) |
369 | } | 388 | } |
370 | 389 | ||
390 | pub fn variant(name: ast::Name, field_list: Option<ast::FieldList>) -> ast::Variant { | ||
391 | let field_list = match field_list { | ||
392 | None => String::new(), | ||
393 | Some(it) => format!("{}", it), | ||
394 | }; | ||
395 | ast_from_text(&format!("enum f {{ {}{} }}", name, field_list)) | ||
396 | } | ||
397 | |||
371 | pub fn fn_( | 398 | pub fn fn_( |
372 | visibility: Option<ast::Visibility>, | 399 | visibility: Option<ast::Visibility>, |
373 | fn_name: ast::Name, | 400 | fn_name: ast::Name, |
diff --git a/crates/syntax/src/ast/node_ext.rs b/crates/syntax/src/ast/node_ext.rs index c5cd1c504..ce35ac01a 100644 --- a/crates/syntax/src/ast/node_ext.rs +++ b/crates/syntax/src/ast/node_ext.rs | |||
@@ -7,7 +7,7 @@ use itertools::Itertools; | |||
7 | use parser::SyntaxKind; | 7 | use parser::SyntaxKind; |
8 | 8 | ||
9 | use crate::{ | 9 | use crate::{ |
10 | ast::{self, support, token_ext::HasStringValue, AstNode, AstToken, NameOwner, SyntaxNode}, | 10 | ast::{self, support, AstNode, AstToken, NameOwner, SyntaxNode}, |
11 | SmolStr, SyntaxElement, SyntaxToken, T, | 11 | SmolStr, SyntaxElement, SyntaxToken, T, |
12 | }; | 12 | }; |
13 | 13 | ||
@@ -55,13 +55,7 @@ impl ast::Attr { | |||
55 | let key = self.simple_name()?; | 55 | let key = self.simple_name()?; |
56 | let value_token = lit.syntax().first_token()?; | 56 | let value_token = lit.syntax().first_token()?; |
57 | 57 | ||
58 | let value: SmolStr = if let Some(s) = ast::String::cast(value_token.clone()) { | 58 | let value: SmolStr = ast::String::cast(value_token.clone())?.value()?.into(); |
59 | s.value()?.into() | ||
60 | } else if let Some(s) = ast::RawString::cast(value_token) { | ||
61 | s.value()?.into() | ||
62 | } else { | ||
63 | return None; | ||
64 | }; | ||
65 | 59 | ||
66 | Some((key, value)) | 60 | Some((key, value)) |
67 | } | 61 | } |
diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs index c5ef92733..e4e512f2e 100644 --- a/crates/syntax/src/ast/token_ext.rs +++ b/crates/syntax/src/ast/token_ext.rs | |||
@@ -8,11 +8,11 @@ use std::{ | |||
8 | use rustc_lexer::unescape::{unescape_literal, Mode}; | 8 | use rustc_lexer::unescape::{unescape_literal, Mode}; |
9 | 9 | ||
10 | use crate::{ | 10 | use crate::{ |
11 | ast::{AstToken, Comment, RawString, String, Whitespace}, | 11 | ast::{self, AstToken}, |
12 | TextRange, TextSize, | 12 | TextRange, TextSize, |
13 | }; | 13 | }; |
14 | 14 | ||
15 | impl Comment { | 15 | impl ast::Comment { |
16 | pub fn kind(&self) -> CommentKind { | 16 | pub fn kind(&self) -> CommentKind { |
17 | kind_by_prefix(self.text()) | 17 | kind_by_prefix(self.text()) |
18 | } | 18 | } |
@@ -80,7 +80,7 @@ fn kind_by_prefix(text: &str) -> CommentKind { | |||
80 | panic!("bad comment text: {:?}", text) | 80 | panic!("bad comment text: {:?}", text) |
81 | } | 81 | } |
82 | 82 | ||
83 | impl Whitespace { | 83 | impl ast::Whitespace { |
84 | pub fn spans_multiple_lines(&self) -> bool { | 84 | pub fn spans_multiple_lines(&self) -> bool { |
85 | let text = self.text(); | 85 | let text = self.text(); |
86 | text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n')) | 86 | text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n')) |
@@ -114,43 +114,28 @@ impl QuoteOffsets { | |||
114 | } | 114 | } |
115 | } | 115 | } |
116 | 116 | ||
117 | pub trait HasQuotes: AstToken { | 117 | impl ast::String { |
118 | fn quote_offsets(&self) -> Option<QuoteOffsets> { | 118 | pub fn is_raw(&self) -> bool { |
119 | let text = self.text().as_str(); | 119 | self.text().starts_with('r') |
120 | let offsets = QuoteOffsets::new(text)?; | ||
121 | let o = self.syntax().text_range().start(); | ||
122 | let offsets = QuoteOffsets { | ||
123 | quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o), | ||
124 | contents: offsets.contents + o, | ||
125 | }; | ||
126 | Some(offsets) | ||
127 | } | ||
128 | fn open_quote_text_range(&self) -> Option<TextRange> { | ||
129 | self.quote_offsets().map(|it| it.quotes.0) | ||
130 | } | 120 | } |
131 | 121 | pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> { | |
132 | fn close_quote_text_range(&self) -> Option<TextRange> { | 122 | let contents_range = self.text_range_between_quotes()?; |
133 | self.quote_offsets().map(|it| it.quotes.1) | 123 | assert!(TextRange::up_to(contents_range.len()).contains_range(range)); |
124 | Some(range + contents_range.start()) | ||
134 | } | 125 | } |
135 | 126 | ||
136 | fn text_range_between_quotes(&self) -> Option<TextRange> { | 127 | pub fn value(&self) -> Option<Cow<'_, str>> { |
137 | self.quote_offsets().map(|it| it.contents) | 128 | if self.is_raw() { |
138 | } | 129 | let text = self.text().as_str(); |
139 | } | 130 | let text = |
140 | 131 | &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; | |
141 | impl HasQuotes for String {} | 132 | return Some(Cow::Borrowed(text)); |
142 | impl HasQuotes for RawString {} | 133 | } |
143 | |||
144 | pub trait HasStringValue: HasQuotes { | ||
145 | fn value(&self) -> Option<Cow<'_, str>>; | ||
146 | } | ||
147 | 134 | ||
148 | impl HasStringValue for String { | ||
149 | fn value(&self) -> Option<Cow<'_, str>> { | ||
150 | let text = self.text().as_str(); | 135 | let text = self.text().as_str(); |
151 | let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; | 136 | let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; |
152 | 137 | ||
153 | let mut buf = std::string::String::with_capacity(text.len()); | 138 | let mut buf = String::with_capacity(text.len()); |
154 | let mut has_error = false; | 139 | let mut has_error = false; |
155 | unescape_literal(text, Mode::Str, &mut |_, unescaped_char| match unescaped_char { | 140 | unescape_literal(text, Mode::Str, &mut |_, unescaped_char| match unescaped_char { |
156 | Ok(c) => buf.push(c), | 141 | Ok(c) => buf.push(c), |
@@ -164,21 +149,31 @@ impl HasStringValue for String { | |||
164 | let res = if buf == text { Cow::Borrowed(text) } else { Cow::Owned(buf) }; | 149 | let res = if buf == text { Cow::Borrowed(text) } else { Cow::Owned(buf) }; |
165 | Some(res) | 150 | Some(res) |
166 | } | 151 | } |
167 | } | ||
168 | 152 | ||
169 | impl HasStringValue for RawString { | 153 | pub fn quote_offsets(&self) -> Option<QuoteOffsets> { |
170 | fn value(&self) -> Option<Cow<'_, str>> { | ||
171 | let text = self.text().as_str(); | 154 | let text = self.text().as_str(); |
172 | let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; | 155 | let offsets = QuoteOffsets::new(text)?; |
173 | Some(Cow::Borrowed(text)) | 156 | let o = self.syntax().text_range().start(); |
157 | let offsets = QuoteOffsets { | ||
158 | quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o), | ||
159 | contents: offsets.contents + o, | ||
160 | }; | ||
161 | Some(offsets) | ||
162 | } | ||
163 | pub fn text_range_between_quotes(&self) -> Option<TextRange> { | ||
164 | self.quote_offsets().map(|it| it.contents) | ||
165 | } | ||
166 | pub fn open_quote_text_range(&self) -> Option<TextRange> { | ||
167 | self.quote_offsets().map(|it| it.quotes.0) | ||
168 | } | ||
169 | pub fn close_quote_text_range(&self) -> Option<TextRange> { | ||
170 | self.quote_offsets().map(|it| it.quotes.1) | ||
174 | } | 171 | } |
175 | } | 172 | } |
176 | 173 | ||
177 | impl RawString { | 174 | impl ast::ByteString { |
178 | pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> { | 175 | pub fn is_raw(&self) -> bool { |
179 | let contents_range = self.text_range_between_quotes()?; | 176 | self.text().starts_with("br") |
180 | assert!(TextRange::up_to(contents_range.len()).contains_range(range)); | ||
181 | Some(range + contents_range.start()) | ||
182 | } | 177 | } |
183 | } | 178 | } |
184 | 179 | ||
@@ -500,7 +495,7 @@ pub trait HasFormatSpecifier: AstToken { | |||
500 | } | 495 | } |
501 | } | 496 | } |
502 | 497 | ||
503 | impl HasFormatSpecifier for String { | 498 | impl HasFormatSpecifier for ast::String { |
504 | fn char_ranges( | 499 | fn char_ranges( |
505 | &self, | 500 | &self, |
506 | ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> { | 501 | ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> { |
@@ -521,18 +516,86 @@ impl HasFormatSpecifier for String { | |||
521 | } | 516 | } |
522 | } | 517 | } |
523 | 518 | ||
524 | impl HasFormatSpecifier for RawString { | 519 | impl ast::IntNumber { |
525 | fn char_ranges( | 520 | const SUFFIXES: &'static [&'static str] = &[ |
526 | &self, | 521 | "u8", "u16", "u32", "u64", "u128", "usize", // Unsigned. |
527 | ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> { | 522 | "i8", "i16", "i32", "i64", "i128", "isize", // Signed. |
528 | let text = self.text().as_str(); | 523 | ]; |
529 | let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; | 524 | |
530 | let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start(); | 525 | pub fn radix(&self) -> Radix { |
526 | match self.text().get(..2).unwrap_or_default() { | ||
527 | "0b" => Radix::Binary, | ||
528 | "0o" => Radix::Octal, | ||
529 | "0x" => Radix::Hexadecimal, | ||
530 | _ => Radix::Decimal, | ||
531 | } | ||
532 | } | ||
531 | 533 | ||
532 | let mut res = Vec::with_capacity(text.len()); | 534 | pub fn value(&self) -> Option<u128> { |
533 | for (idx, c) in text.char_indices() { | 535 | let token = self.syntax(); |
534 | res.push((TextRange::at(idx.try_into().unwrap(), TextSize::of(c)) + offset, Ok(c))); | 536 | |
537 | let mut text = token.text().as_str(); | ||
538 | if let Some(suffix) = self.suffix() { | ||
539 | text = &text[..text.len() - suffix.len()] | ||
540 | } | ||
541 | |||
542 | let radix = self.radix(); | ||
543 | text = &text[radix.prefix_len()..]; | ||
544 | |||
545 | let buf; | ||
546 | if text.contains("_") { | ||
547 | buf = text.replace('_', ""); | ||
548 | text = buf.as_str(); | ||
549 | }; | ||
550 | |||
551 | let value = u128::from_str_radix(text, radix as u32).ok()?; | ||
552 | Some(value) | ||
553 | } | ||
554 | |||
555 | pub fn suffix(&self) -> Option<&str> { | ||
556 | let text = self.text(); | ||
557 | // FIXME: don't check a fixed set of suffixes, `1_0_1_l_o_l` is valid | ||
558 | // syntax, suffix is `l_o_l`. | ||
559 | ast::IntNumber::SUFFIXES.iter().chain(ast::FloatNumber::SUFFIXES.iter()).find_map( | ||
560 | |suffix| { | ||
561 | if text.ends_with(suffix) { | ||
562 | return Some(&text[text.len() - suffix.len()..]); | ||
563 | } | ||
564 | None | ||
565 | }, | ||
566 | ) | ||
567 | } | ||
568 | } | ||
569 | |||
570 | impl ast::FloatNumber { | ||
571 | const SUFFIXES: &'static [&'static str] = &["f32", "f64"]; | ||
572 | pub fn suffix(&self) -> Option<&str> { | ||
573 | let text = self.text(); | ||
574 | ast::FloatNumber::SUFFIXES.iter().find_map(|suffix| { | ||
575 | if text.ends_with(suffix) { | ||
576 | return Some(&text[text.len() - suffix.len()..]); | ||
577 | } | ||
578 | None | ||
579 | }) | ||
580 | } | ||
581 | } | ||
582 | |||
583 | #[derive(Debug, PartialEq, Eq, Copy, Clone)] | ||
584 | pub enum Radix { | ||
585 | Binary = 2, | ||
586 | Octal = 8, | ||
587 | Decimal = 10, | ||
588 | Hexadecimal = 16, | ||
589 | } | ||
590 | |||
591 | impl Radix { | ||
592 | pub const ALL: &'static [Radix] = | ||
593 | &[Radix::Binary, Radix::Octal, Radix::Decimal, Radix::Hexadecimal]; | ||
594 | |||
595 | const fn prefix_len(&self) -> usize { | ||
596 | match self { | ||
597 | Self::Decimal => 0, | ||
598 | _ => 2, | ||
535 | } | 599 | } |
536 | Some(res) | ||
537 | } | 600 | } |
538 | } | 601 | } |
diff --git a/crates/syntax/src/parsing/lexer.rs b/crates/syntax/src/parsing/lexer.rs index 7e38c32cc..8afd7e53b 100644 --- a/crates/syntax/src/parsing/lexer.rs +++ b/crates/syntax/src/parsing/lexer.rs | |||
@@ -3,7 +3,7 @@ | |||
3 | 3 | ||
4 | use std::convert::TryInto; | 4 | use std::convert::TryInto; |
5 | 5 | ||
6 | use rustc_lexer::{LiteralKind as LK, RawStrError}; | 6 | use rustc_lexer::RawStrError; |
7 | 7 | ||
8 | use crate::{ | 8 | use crate::{ |
9 | SyntaxError, | 9 | SyntaxError, |
@@ -185,63 +185,77 @@ fn rustc_token_kind_to_syntax_kind( | |||
185 | return (syntax_kind, None); | 185 | return (syntax_kind, None); |
186 | 186 | ||
187 | fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) { | 187 | fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option<&'static str>) { |
188 | #[rustfmt::skip] | 188 | let mut err = ""; |
189 | let syntax_kind = match *kind { | 189 | let syntax_kind = match *kind { |
190 | LK::Int { empty_int: false, .. } => INT_NUMBER, | 190 | rustc_lexer::LiteralKind::Int { empty_int, base: _ } => { |
191 | LK::Int { empty_int: true, .. } => { | 191 | if empty_int { |
192 | return (INT_NUMBER, Some("Missing digits after the integer base prefix")) | 192 | err = "Missing digits after the integer base prefix"; |
193 | } | ||
194 | INT_NUMBER | ||
193 | } | 195 | } |
194 | 196 | rustc_lexer::LiteralKind::Float { empty_exponent, base: _ } => { | |
195 | LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, | 197 | if empty_exponent { |
196 | LK::Float { empty_exponent: true, .. } => { | 198 | err = "Missing digits after the exponent symbol"; |
197 | return (FLOAT_NUMBER, Some("Missing digits after the exponent symbol")) | 199 | } |
200 | FLOAT_NUMBER | ||
198 | } | 201 | } |
199 | 202 | rustc_lexer::LiteralKind::Char { terminated } => { | |
200 | LK::Char { terminated: true } => CHAR, | 203 | if !terminated { |
201 | LK::Char { terminated: false } => { | 204 | err = "Missing trailing `'` symbol to terminate the character literal"; |
202 | return (CHAR, Some("Missing trailing `'` symbol to terminate the character literal")) | 205 | } |
206 | CHAR | ||
203 | } | 207 | } |
204 | 208 | rustc_lexer::LiteralKind::Byte { terminated } => { | |
205 | LK::Byte { terminated: true } => BYTE, | 209 | if !terminated { |
206 | LK::Byte { terminated: false } => { | 210 | err = "Missing trailing `'` symbol to terminate the byte literal"; |
207 | return (BYTE, Some("Missing trailing `'` symbol to terminate the byte literal")) | 211 | } |
212 | BYTE | ||
208 | } | 213 | } |
209 | 214 | rustc_lexer::LiteralKind::Str { terminated } => { | |
210 | LK::Str { terminated: true } => STRING, | 215 | if !terminated { |
211 | LK::Str { terminated: false } => { | 216 | err = "Missing trailing `\"` symbol to terminate the string literal"; |
212 | return (STRING, Some("Missing trailing `\"` symbol to terminate the string literal")) | 217 | } |
218 | STRING | ||
213 | } | 219 | } |
214 | 220 | rustc_lexer::LiteralKind::ByteStr { terminated } => { | |
215 | 221 | if !terminated { | |
216 | LK::ByteStr { terminated: true } => BYTE_STRING, | 222 | err = "Missing trailing `\"` symbol to terminate the byte string literal"; |
217 | LK::ByteStr { terminated: false } => { | 223 | } |
218 | return (BYTE_STRING, Some("Missing trailing `\"` symbol to terminate the byte string literal")) | 224 | BYTE_STRING |
225 | } | ||
226 | rustc_lexer::LiteralKind::RawStr { err: raw_str_err, .. } => { | ||
227 | if let Some(raw_str_err) = raw_str_err { | ||
228 | err = match raw_str_err { | ||
229 | RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw string literal", | ||
230 | RawStrError::NoTerminator { expected, found, .. } => if expected == found { | ||
231 | "Missing trailing `\"` to terminate the raw string literal" | ||
232 | } else { | ||
233 | "Missing trailing `\"` with `#` symbols to terminate the raw string literal" | ||
234 | }, | ||
235 | RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols", | ||
236 | }; | ||
237 | }; | ||
238 | STRING | ||
239 | } | ||
240 | rustc_lexer::LiteralKind::RawByteStr { err: raw_str_err, .. } => { | ||
241 | if let Some(raw_str_err) = raw_str_err { | ||
242 | err = match raw_str_err { | ||
243 | RawStrError::InvalidStarter { .. } => "Missing `\"` symbol after `#` symbols to begin the raw byte string literal", | ||
244 | RawStrError::NoTerminator { expected, found, .. } => if expected == found { | ||
245 | "Missing trailing `\"` to terminate the raw byte string literal" | ||
246 | } else { | ||
247 | "Missing trailing `\"` with `#` symbols to terminate the raw byte string literal" | ||
248 | }, | ||
249 | RawStrError::TooManyDelimiters { .. } => "Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols", | ||
250 | }; | ||
251 | }; | ||
252 | |||
253 | BYTE_STRING | ||
219 | } | 254 | } |
220 | |||
221 | LK::RawStr { err, .. } => match err { | ||
222 | None => RAW_STRING, | ||
223 | Some(RawStrError::InvalidStarter { .. }) => return (RAW_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw string literal")), | ||
224 | Some(RawStrError::NoTerminator { expected, found, .. }) => if expected == found { | ||
225 | return (RAW_STRING, Some("Missing trailing `\"` to terminate the raw string literal")) | ||
226 | } else { | ||
227 | return (RAW_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw string literal")) | ||
228 | |||
229 | }, | ||
230 | Some(RawStrError::TooManyDelimiters { .. }) => return (RAW_STRING, Some("Too many `#` symbols: raw strings may be delimited by up to 65535 `#` symbols")), | ||
231 | }, | ||
232 | LK::RawByteStr { err, .. } => match err { | ||
233 | None => RAW_BYTE_STRING, | ||
234 | Some(RawStrError::InvalidStarter { .. }) => return (RAW_BYTE_STRING, Some("Missing `\"` symbol after `#` symbols to begin the raw byte string literal")), | ||
235 | Some(RawStrError::NoTerminator { expected, found, .. }) => if expected == found { | ||
236 | return (RAW_BYTE_STRING, Some("Missing trailing `\"` to terminate the raw byte string literal")) | ||
237 | } else { | ||
238 | return (RAW_BYTE_STRING, Some("Missing trailing `\"` with `#` symbols to terminate the raw byte string literal")) | ||
239 | |||
240 | }, | ||
241 | Some(RawStrError::TooManyDelimiters { .. }) => return (RAW_BYTE_STRING, Some("Too many `#` symbols: raw byte strings may be delimited by up to 65535 `#` symbols")), | ||
242 | }, | ||
243 | }; | 255 | }; |
244 | 256 | ||
245 | (syntax_kind, None) | 257 | let err = if err.is_empty() { None } else { Some(err) }; |
258 | |||
259 | (syntax_kind, err) | ||
246 | } | 260 | } |
247 | } | 261 | } |
diff --git a/crates/syntax/src/parsing/reparsing.rs b/crates/syntax/src/parsing/reparsing.rs index 4149f856a..190f5f67a 100644 --- a/crates/syntax/src/parsing/reparsing.rs +++ b/crates/syntax/src/parsing/reparsing.rs | |||
@@ -44,7 +44,7 @@ fn reparse_token<'node>( | |||
44 | let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); | 44 | let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); |
45 | let prev_token_kind = prev_token.kind(); | 45 | let prev_token_kind = prev_token.kind(); |
46 | match prev_token_kind { | 46 | match prev_token_kind { |
47 | WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { | 47 | WHITESPACE | COMMENT | IDENT | STRING => { |
48 | if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { | 48 | if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { |
49 | // removing a new line may extends previous token | 49 | // removing a new line may extends previous token |
50 | let deleted_range = edit.delete - prev_token.text_range().start(); | 50 | let deleted_range = edit.delete - prev_token.text_range().start(); |
diff --git a/crates/syntax/src/validation.rs b/crates/syntax/src/validation.rs index 0f9a5e8ae..6f45149bf 100644 --- a/crates/syntax/src/validation.rs +++ b/crates/syntax/src/validation.rs | |||
@@ -4,7 +4,7 @@ mod block; | |||
4 | 4 | ||
5 | use crate::{ | 5 | use crate::{ |
6 | algo, ast, match_ast, AstNode, SyntaxError, | 6 | algo, ast, match_ast, AstNode, SyntaxError, |
7 | SyntaxKind::{BYTE, BYTE_STRING, CHAR, CONST, FN, INT_NUMBER, STRING, TYPE_ALIAS}, | 7 | SyntaxKind::{CONST, FN, INT_NUMBER, TYPE_ALIAS}, |
8 | SyntaxNode, SyntaxToken, TextSize, T, | 8 | SyntaxNode, SyntaxToken, TextSize, T, |
9 | }; | 9 | }; |
10 | use rowan::Direction; | 10 | use rowan::Direction; |
@@ -121,36 +121,42 @@ fn validate_literal(literal: ast::Literal, acc: &mut Vec<SyntaxError>) { | |||
121 | acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off)); | 121 | acc.push(SyntaxError::new_at_offset(rustc_unescape_error_to_string(err), off)); |
122 | }; | 122 | }; |
123 | 123 | ||
124 | match token.kind() { | 124 | match literal.kind() { |
125 | BYTE => { | 125 | ast::LiteralKind::String(s) => { |
126 | if let Some(Err(e)) = unquote(text, 2, '\'').map(unescape_byte) { | 126 | if !s.is_raw() { |
127 | push_err(2, e); | 127 | if let Some(without_quotes) = unquote(text, 1, '"') { |
128 | unescape_literal(without_quotes, Mode::Str, &mut |range, char| { | ||
129 | if let Err(err) = char { | ||
130 | push_err(1, (range.start, err)); | ||
131 | } | ||
132 | }) | ||
133 | } | ||
128 | } | 134 | } |
129 | } | 135 | } |
130 | CHAR => { | 136 | ast::LiteralKind::ByteString(s) => { |
131 | if let Some(Err(e)) = unquote(text, 1, '\'').map(unescape_char) { | 137 | if !s.is_raw() { |
132 | push_err(1, e); | 138 | if let Some(without_quotes) = unquote(text, 2, '"') { |
139 | unescape_byte_literal(without_quotes, Mode::ByteStr, &mut |range, char| { | ||
140 | if let Err(err) = char { | ||
141 | push_err(2, (range.start, err)); | ||
142 | } | ||
143 | }) | ||
144 | } | ||
133 | } | 145 | } |
134 | } | 146 | } |
135 | BYTE_STRING => { | 147 | ast::LiteralKind::Char => { |
136 | if let Some(without_quotes) = unquote(text, 2, '"') { | 148 | if let Some(Err(e)) = unquote(text, 1, '\'').map(unescape_char) { |
137 | unescape_byte_literal(without_quotes, Mode::ByteStr, &mut |range, char| { | 149 | push_err(1, e); |
138 | if let Err(err) = char { | ||
139 | push_err(2, (range.start, err)); | ||
140 | } | ||
141 | }) | ||
142 | } | 150 | } |
143 | } | 151 | } |
144 | STRING => { | 152 | ast::LiteralKind::Byte => { |
145 | if let Some(without_quotes) = unquote(text, 1, '"') { | 153 | if let Some(Err(e)) = unquote(text, 2, '\'').map(unescape_byte) { |
146 | unescape_literal(without_quotes, Mode::Str, &mut |range, char| { | 154 | push_err(2, e); |
147 | if let Err(err) = char { | ||
148 | push_err(1, (range.start, err)); | ||
149 | } | ||
150 | }) | ||
151 | } | 155 | } |
152 | } | 156 | } |
153 | _ => (), | 157 | ast::LiteralKind::IntNumber(_) |
158 | | ast::LiteralKind::FloatNumber(_) | ||
159 | | ast::LiteralKind::Bool(_) => {} | ||
154 | } | 160 | } |
155 | } | 161 | } |
156 | 162 | ||