From ad24976da38482948c586bdbc16004273662ff7e Mon Sep 17 00:00:00 2001 From: Veetaha Date: Fri, 24 Jan 2020 03:39:23 +0200 Subject: ra_syntax: changed added diagnostics information returned from tokenize() (implemented with iterators) --- crates/ra_syntax/src/algo.rs | 2 +- crates/ra_syntax/src/parsing/lexer.rs | 299 +++++++++++++++++++++++------- crates/ra_syntax/src/parsing/reparsing.rs | 3 +- crates/ra_syntax/src/syntax_error.rs | 7 +- 4 files changed, 237 insertions(+), 74 deletions(-) diff --git a/crates/ra_syntax/src/algo.rs b/crates/ra_syntax/src/algo.rs index 30a479f01..acf677e7d 100644 --- a/crates/ra_syntax/src/algo.rs +++ b/crates/ra_syntax/src/algo.rs @@ -81,7 +81,7 @@ impl TreeDiff { /// Specifically, returns a map whose keys are descendants of `from` and values /// are descendants of `to`, such that `replace_descendants(from, map) == to`. /// -/// A trivial solution is a singletom map `{ from: to }`, but this function +/// A trivial solution is a singleton map `{ from: to }`, but this function /// tries to find a more fine-grained diff. pub fn diff(from: &SyntaxNode, to: &SyntaxNode) -> TreeDiff { let mut buf = FxHashMap::default(); diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 6d839208d..9dca7d747 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -1,4 +1,6 @@ -//! FIXME: write short doc here +//! Lexer analyzes raw input string and produces lexemes (tokens). + +use std::iter::{FromIterator, IntoIterator}; use crate::{ SyntaxKind::{self, *}, @@ -13,85 +15,242 @@ pub struct Token { /// The length of the token. pub len: TextUnit, } +impl Token { + pub const fn new(kind: SyntaxKind, len: TextUnit) -> Self { + Self { kind, len } + } +} -fn match_literal_kind(kind: rustc_lexer::LiteralKind) -> SyntaxKind { - match kind { - rustc_lexer::LiteralKind::Int { .. } => INT_NUMBER, - rustc_lexer::LiteralKind::Float { .. } => FLOAT_NUMBER, - rustc_lexer::LiteralKind::Char { .. } => CHAR, - rustc_lexer::LiteralKind::Byte { .. } => BYTE, - rustc_lexer::LiteralKind::Str { .. } => STRING, - rustc_lexer::LiteralKind::ByteStr { .. } => BYTE_STRING, - rustc_lexer::LiteralKind::RawStr { .. } => RAW_STRING, - rustc_lexer::LiteralKind::RawByteStr { .. } => RAW_BYTE_STRING, +#[derive(Debug)] +/// Represents the result of parsing one token. +pub struct ParsedToken { + /// Parsed token. + pub token: Token, + /// If error is present then parsed token is malformed. + pub error: Option, +} +impl ParsedToken { + pub const fn new(token: Token, error: Option) -> Self { + Self { token, error } } } +#[derive(Debug, Default)] +/// Represents the result of parsing one token. +pub struct ParsedTokens { + /// Parsed token. + pub tokens: Vec, + /// If error is present then parsed token is malformed. + pub errors: Vec, +} + +impl FromIterator for ParsedTokens { + fn from_iter>(iter: I) -> Self { + let res = Self::default(); + for entry in iter { + res.tokens.push(entry.token); + if let Some(error) = entry.error { + res.errors.push(error); + } + } + res + } +} + +/// Returns the first encountered token from the string. +/// If the string contains zero or two or more tokens returns `None`. +pub fn single_token(text: &str) -> Option { + // TODO: test whether this condition indeed checks for a single token + first_token(text).filter(|parsed| parsed.token.len.to_usize() == text.len()) +} + +/* +/// Returns `ParsedTokens` which are basically a pair `(Vec, Vec)` +/// This is just a shorthand for `tokenize(text).collect()` +pub fn tokenize_to_vec_with_errors(text: &str) -> ParsedTokens { + tokenize(text).collect() +} + +/// The simplest version of tokenize, it just retunst a ready-made `Vec`. +/// It discards all tokenization errors while parsing. If you need that infromation +/// consider using `tokenize()` or `tokenize_to_vec_with_errors()`. +pub fn tokenize_to_vec(text: &str) -> Vec { + tokenize(text).map(|parsed_token| parsed_token.token).collect() +} +*/ + /// Break a string up into its component tokens -pub fn tokenize(text: &str) -> Vec { - if text.is_empty() { - return vec![]; +/// This is the core function, all other `tokenize*()` functions are simply +/// handy shortcuts for this one. +pub fn tokenize(text: &str) -> impl Iterator + '_ { + let shebang = rustc_lexer::strip_shebang(text).map(|shebang_len| { + text = &text[shebang_len..]; + ParsedToken::new(Token::new(SHEBANG, TextUnit::from_usize(shebang_len)), None) + }); + + // Notice that we eagerly evaluate shebang since it may change text slice + // and we cannot simplify this into a single method call chain + shebang.into_iter().chain(tokenize_without_shebang(text)) +} + +pub fn tokenize_without_shebang(text: &str) -> impl Iterator + '_ { + rustc_lexer::tokenize(text).map(|rustc_token| { + let token_text = &text[..rustc_token.len]; + text = &text[rustc_token.len..]; + rustc_token_kind_to_parsed_token(&rustc_token.kind, token_text) + }) +} + +#[derive(Debug)] +pub enum TokenizeError { + /// Base prefix was provided, but there were no digits + /// after it, e.g. `0x`. + EmptyInt, + /// Float exponent lacks digits e.g. `e+`, `E+`, `e-`, `E-`, + EmptyExponent, + + /// Block comment lacks trailing delimiter `*/` + UnterminatedBlockComment, + /// Character literal lacks trailing delimiter `'` + UnterminatedChar, + /// Characterish byte literal lacks trailing delimiter `'` + UnterminatedByte, + /// String literal lacks trailing delimiter `"` + UnterminatedString, + /// Byte string literal lacks trailing delimiter `"` + UnterminatedByteString, + /// Raw literal lacks trailing delimiter e.g. `"##` + UnterminatedRawString, + /// Raw byte string literal lacks trailing delimiter e.g. `"##` + UnterminatedRawByteString, + + /// Raw string lacks a quote after pound characters e.g. `r###` + UnstartedRawString, + /// Raw byte string lacks a quote after pound characters e.g. `br###` + UnstartedRawByteString, + + /// Lifetime starts with a number e.g. `'4ever` + LifetimeStartsWithNumber, +} + +fn rustc_token_kind_to_parsed_token( + rustc_token_kind: &rustc_lexer::TokenKind, + token_text: &str, +) -> ParsedToken { + use rustc_lexer::TokenKind as TK; + use TokenizeError as TE; + + // We drop some useful infromation here (see patterns with double dots `..`) + // Storing that info in `SyntaxKind` is not possible due to its layout requirements of + // being `u16` that come from `rowan::SyntaxKind` type and changes to `rowan::SyntaxKind` + // would mean hell of a rewrite. + + let (syntax_kind, error) = match *rustc_token_kind { + TK::LineComment => ok(COMMENT), + TK::BlockComment { terminated } => ok_if(terminated, COMMENT, TE::UnterminatedBlockComment), + TK::Whitespace => ok(WHITESPACE), + TK::Ident => ok(if token_text == "_" { + UNDERSCORE + } else { + SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) + }), + TK::RawIdent => ok(IDENT), + TK::Literal { kind, .. } => match_literal_kind(&kind), + TK::Lifetime { starts_with_number } => { + ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber) + } + TK::Semi => ok(SEMI), + TK::Comma => ok(COMMA), + TK::Dot => ok(DOT), + TK::OpenParen => ok(L_PAREN), + TK::CloseParen => ok(R_PAREN), + TK::OpenBrace => ok(L_CURLY), + TK::CloseBrace => ok(R_CURLY), + TK::OpenBracket => ok(L_BRACK), + TK::CloseBracket => ok(R_BRACK), + TK::At => ok(AT), + TK::Pound => ok(POUND), + TK::Tilde => ok(TILDE), + TK::Question => ok(QUESTION), + TK::Colon => ok(COLON), + TK::Dollar => ok(DOLLAR), + TK::Eq => ok(EQ), + TK::Not => ok(EXCL), + TK::Lt => ok(L_ANGLE), + TK::Gt => ok(R_ANGLE), + TK::Minus => ok(MINUS), + TK::And => ok(AMP), + TK::Or => ok(PIPE), + TK::Plus => ok(PLUS), + TK::Star => ok(STAR), + TK::Slash => ok(SLASH), + TK::Caret => ok(CARET), + TK::Percent => ok(PERCENT), + TK::Unknown => ok(ERROR), + }; + + return ParsedToken::new( + Token::new(syntax_kind, TextUnit::from_usize(token_text.len())), + error, + ); + + type ParsedSyntaxKind = (SyntaxKind, Option); + + const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind { + (syntax_kind, None) } - let mut text = text; - let mut acc = Vec::new(); - if let Some(len) = rustc_lexer::strip_shebang(text) { - acc.push(Token { kind: SHEBANG, len: TextUnit::from_usize(len) }); - text = &text[len..]; + const fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { + if cond { + ok(syntax_kind) + } else { + err(syntax_kind, error) + } } - while !text.is_empty() { - let rustc_token = rustc_lexer::first_token(text); - let kind = match rustc_token.kind { - rustc_lexer::TokenKind::LineComment => COMMENT, - rustc_lexer::TokenKind::BlockComment { .. } => COMMENT, - rustc_lexer::TokenKind::Whitespace => WHITESPACE, - rustc_lexer::TokenKind::Ident => { - let token_text = &text[..rustc_token.len]; - if token_text == "_" { - UNDERSCORE - } else { - SyntaxKind::from_keyword(&text[..rustc_token.len]).unwrap_or(IDENT) - } + const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { + (syntax_kind, Some(error)) + } + + const fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind { + use rustc_lexer::LiteralKind as LK; + match *kind { + LK::Int { empty_int, .. } => ok_if(!empty_int, INT_NUMBER, TE::EmptyInt), + LK::Float { empty_exponent, .. } => { + ok_if(!empty_exponent, FLOAT_NUMBER, TE::EmptyExponent) } - rustc_lexer::TokenKind::RawIdent => IDENT, - rustc_lexer::TokenKind::Literal { kind, .. } => match_literal_kind(kind), - rustc_lexer::TokenKind::Lifetime { .. } => LIFETIME, - rustc_lexer::TokenKind::Semi => SEMI, - rustc_lexer::TokenKind::Comma => COMMA, - rustc_lexer::TokenKind::Dot => DOT, - rustc_lexer::TokenKind::OpenParen => L_PAREN, - rustc_lexer::TokenKind::CloseParen => R_PAREN, - rustc_lexer::TokenKind::OpenBrace => L_CURLY, - rustc_lexer::TokenKind::CloseBrace => R_CURLY, - rustc_lexer::TokenKind::OpenBracket => L_BRACK, - rustc_lexer::TokenKind::CloseBracket => R_BRACK, - rustc_lexer::TokenKind::At => AT, - rustc_lexer::TokenKind::Pound => POUND, - rustc_lexer::TokenKind::Tilde => TILDE, - rustc_lexer::TokenKind::Question => QUESTION, - rustc_lexer::TokenKind::Colon => COLON, - rustc_lexer::TokenKind::Dollar => DOLLAR, - rustc_lexer::TokenKind::Eq => EQ, - rustc_lexer::TokenKind::Not => EXCL, - rustc_lexer::TokenKind::Lt => L_ANGLE, - rustc_lexer::TokenKind::Gt => R_ANGLE, - rustc_lexer::TokenKind::Minus => MINUS, - rustc_lexer::TokenKind::And => AMP, - rustc_lexer::TokenKind::Or => PIPE, - rustc_lexer::TokenKind::Plus => PLUS, - rustc_lexer::TokenKind::Star => STAR, - rustc_lexer::TokenKind::Slash => SLASH, - rustc_lexer::TokenKind::Caret => CARET, - rustc_lexer::TokenKind::Percent => PERCENT, - rustc_lexer::TokenKind::Unknown => ERROR, - }; - let token = Token { kind, len: TextUnit::from_usize(rustc_token.len) }; - acc.push(token); - text = &text[rustc_token.len..]; + LK::Char { terminated } => ok_if(terminated, CHAR, TE::UnterminatedChar), + LK::Byte { terminated } => ok_if(terminated, BYTE, TE::UnterminatedByte), + LK::Str { terminated } => ok_if(terminated, STRING, TE::UnterminatedString), + LK::ByteStr { terminated } => { + ok_if(terminated, BYTE_STRING, TE::UnterminatedByteString) + } + + LK::RawStr { started: true, terminated, .. } => { + ok_if(terminated, RAW_STRING, TE::UnterminatedRawString) + } + LK::RawStr { started: false, .. } => err(RAW_STRING, TE::UnstartedRawString), + + LK::RawByteStr { started: true, terminated, .. } => { + ok_if(terminated, RAW_BYTE_STRING, TE::UnterminatedRawByteString) + } + LK::RawByteStr { started: false, .. } => { + err(RAW_BYTE_STRING, TE::UnstartedRawByteString) + } + } + } +} + +pub fn first_token(text: &str) -> Option { + // Checking for emptyness because of `rustc_lexer::first_token()` invariant (see its body) + if text.is_empty() { + None + } else { + let rustc_token = rustc_lexer::first_token(text); + Some(rustc_token_kind_to_parsed_token(&rustc_token.kind, &text[..rustc_token.len])) } - acc } -pub fn classify_literal(text: &str) -> Option { +// TODO: think what to do with this ad hoc function +pub fn classify_literal(text: &str) -> Option { let t = rustc_lexer::first_token(text); if t.len != text.len() { return None; @@ -100,5 +259,5 @@ pub fn classify_literal(text: &str) -> Option { rustc_lexer::TokenKind::Literal { kind, .. } => match_literal_kind(kind), _ => return None, }; - Some(Token { kind, len: TextUnit::from_usize(t.len) }) + Some(ParsedToken::new(Token::new(kind, TextUnit::from_usize(t.len)))) } diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 06bdda11d..3abc09877 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -46,8 +46,7 @@ fn reparse_token<'node>( WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { if token.kind() == WHITESPACE || token.kind() == COMMENT { // removing a new line may extends previous token - if token.text().to_string()[edit.delete - token.text_range().start()].contains('\n') - { + if token.text()[edit.delete - token.text_range().start()].contains('\n') { return None; } } diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs index 6c171df8d..9122dda29 100644 --- a/crates/ra_syntax/src/syntax_error.rs +++ b/crates/ra_syntax/src/syntax_error.rs @@ -4,7 +4,7 @@ use std::fmt; use ra_parser::ParseError; -use crate::{validation::EscapeError, TextRange, TextUnit}; +use crate::{validation::EscapeError, TextRange, TextUnit, TokenizeError}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct SyntaxError { @@ -12,6 +12,10 @@ pub struct SyntaxError { location: Location, } +// FIXME: Location should be just `Location(TextRange)` +// TextUnit enum member just unnecessarily compicates things, +// we should'n treat it specially, it just as a `TextRange { start: x, end: x + 1 }` +// see `location_to_range()` in ra_ide/src/diagnostics #[derive(Clone, PartialEq, Eq, Hash)] pub enum Location { Offset(TextUnit), @@ -79,6 +83,7 @@ impl fmt::Display for SyntaxError { pub enum SyntaxErrorKind { ParseError(ParseError), EscapeError(EscapeError), + TokenizeError(TokenizeError), InvalidBlockAttr, InvalidMatchInnerAttr, InvalidTupleIndexFormat, -- cgit v1.2.3 From ac37a11f04b31f792068a1cb50dbbf5ccd4d982d Mon Sep 17 00:00:00 2001 From: Veetaha Date: Sun, 26 Jan 2020 20:44:49 +0200 Subject: Reimplemented lexer with vectors instead of iterators --- crates/ra_ide/src/references/rename.rs | 10 +- crates/ra_mbe/src/subtree_source.rs | 8 +- crates/ra_syntax/src/lib.rs | 2 +- crates/ra_syntax/src/parsing.rs | 10 +- crates/ra_syntax/src/parsing/lexer.rs | 304 ++++++++++++------------- crates/ra_syntax/src/parsing/reparsing.rs | 52 +++-- crates/ra_syntax/src/parsing/text_tree_sink.rs | 6 +- crates/ra_syntax/src/syntax_error.rs | 42 ++++ crates/ra_syntax/src/syntax_node.rs | 9 +- crates/ra_syntax/src/tests.rs | 3 +- 10 files changed, 250 insertions(+), 196 deletions(-) diff --git a/crates/ra_ide/src/references/rename.rs b/crates/ra_ide/src/references/rename.rs index 626efb603..ad3e86f7c 100644 --- a/crates/ra_ide/src/references/rename.rs +++ b/crates/ra_ide/src/references/rename.rs @@ -2,7 +2,7 @@ use hir::ModuleSource; use ra_db::{RelativePath, RelativePathBuf, SourceDatabase, SourceDatabaseExt}; -use ra_syntax::{algo::find_node_at_offset, ast, tokenize, AstNode, SyntaxKind, SyntaxNode}; +use ra_syntax::{algo::find_node_at_offset, ast, single_token, AstNode, SyntaxKind, SyntaxNode}; use ra_text_edit::TextEdit; use crate::{ @@ -17,11 +17,9 @@ pub(crate) fn rename( position: FilePosition, new_name: &str, ) -> Option> { - let tokens = tokenize(new_name); - if tokens.len() != 1 - || (tokens[0].kind != SyntaxKind::IDENT && tokens[0].kind != SyntaxKind::UNDERSCORE) - { - return None; + match single_token(new_name)?.token.kind { + SyntaxKind::IDENT | SyntaxKind::UNDERSCORE => (), + _ => return None, } let parse = db.parse(position.file_id); diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs index b841c39d3..72ac8df03 100644 --- a/crates/ra_mbe/src/subtree_source.rs +++ b/crates/ra_mbe/src/subtree_source.rs @@ -1,7 +1,7 @@ //! FIXME: write short doc here use ra_parser::{Token, TokenSource}; -use ra_syntax::{classify_literal, SmolStr, SyntaxKind, SyntaxKind::*, T}; +use ra_syntax::{single_token, SmolStr, SyntaxKind, SyntaxKind::*, T}; use std::cell::{Cell, Ref, RefCell}; use tt::buffer::{Cursor, TokenBuffer}; @@ -129,8 +129,10 @@ fn convert_delim(d: Option, closing: bool) -> TtToken { } fn convert_literal(l: &tt::Literal) -> TtToken { - let kind = - classify_literal(&l.text).map(|tkn| tkn.kind).unwrap_or_else(|| match l.text.as_ref() { + let kind = single_token(&l.text) + .map(|parsed| parsed.token.kind) + .filter(|kind| kind.is_literal()) + .unwrap_or_else(|| match l.text.as_ref() { "true" => T![true], "false" => T![false], _ => panic!("Fail to convert given literal {:#?}", &l), diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs index 9931fec84..80b3a0b22 100644 --- a/crates/ra_syntax/src/lib.rs +++ b/crates/ra_syntax/src/lib.rs @@ -41,7 +41,7 @@ use crate::syntax_node::GreenNode; pub use crate::{ algo::InsertPosition, ast::{AstNode, AstToken}, - parsing::{classify_literal, tokenize, Token}, + parsing::{first_token, single_token, tokenize, tokenize_append, Token, TokenizeError}, ptr::{AstPtr, SyntaxNodePtr}, syntax_error::{Location, SyntaxError, SyntaxErrorKind}, syntax_node::{ diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 0387f0378..4e51f920b 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -7,15 +7,17 @@ mod text_tree_sink; mod reparsing; use crate::{syntax_node::GreenNode, SyntaxError}; +use text_token_source::TextTokenSource; +use text_tree_sink::TextTreeSink; -pub use self::lexer::{classify_literal, tokenize, Token}; +pub use lexer::*; pub(crate) use self::reparsing::incremental_reparse; pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec) { - let tokens = tokenize(&text); - let mut token_source = text_token_source::TextTokenSource::new(text, &tokens); - let mut tree_sink = text_tree_sink::TextTreeSink::new(text, &tokens); + let ParsedTokens { tokens, errors } = tokenize(&text); + let mut token_source = TextTokenSource::new(text, &tokens); + let mut tree_sink = TextTreeSink::new(text, &tokens, errors); ra_parser::parse(&mut token_source, &mut tree_sink); tree_sink.finish() } diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 9dca7d747..6d96f8400 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -1,10 +1,10 @@ //! Lexer analyzes raw input string and produces lexemes (tokens). - -use std::iter::{FromIterator, IntoIterator}; +//! It is just a bridge to `rustc_lexer`. use crate::{ + SyntaxError, SyntaxErrorKind, SyntaxKind::{self, *}, - TextUnit, + TextRange, TextUnit, }; /// A token of Rust source. @@ -15,93 +15,96 @@ pub struct Token { /// The length of the token. pub len: TextUnit, } -impl Token { - pub const fn new(kind: SyntaxKind, len: TextUnit) -> Self { - Self { kind, len } - } -} #[derive(Debug)] -/// Represents the result of parsing one token. +/// Represents the result of parsing one token. Beware that the token may be malformed. pub struct ParsedToken { /// Parsed token. pub token: Token, /// If error is present then parsed token is malformed. - pub error: Option, -} -impl ParsedToken { - pub const fn new(token: Token, error: Option) -> Self { - Self { token, error } - } + pub error: Option, } #[derive(Debug, Default)] -/// Represents the result of parsing one token. +/// Represents the result of parsing source code of Rust language. pub struct ParsedTokens { - /// Parsed token. + /// Parsed tokens in order they appear in source code. pub tokens: Vec, - /// If error is present then parsed token is malformed. - pub errors: Vec, + /// Collection of all occured tokenization errors. + /// In general `self.errors.len() <= self.tokens.len()` + pub errors: Vec, } - -impl FromIterator for ParsedTokens { - fn from_iter>(iter: I) -> Self { - let res = Self::default(); - for entry in iter { - res.tokens.push(entry.token); - if let Some(error) = entry.error { - res.errors.push(error); - } +impl ParsedTokens { + /// Append `token` and `error` (if pressent) to the result. + pub fn push(&mut self, ParsedToken { token, error }: ParsedToken) { + self.tokens.push(token); + if let Some(error) = error { + self.errors.push(error) } - res } } -/// Returns the first encountered token from the string. -/// If the string contains zero or two or more tokens returns `None`. -pub fn single_token(text: &str) -> Option { - // TODO: test whether this condition indeed checks for a single token - first_token(text).filter(|parsed| parsed.token.len.to_usize() == text.len()) +/// Same as `tokenize_append()`, just a shortcut for creating `ParsedTokens` +/// and returning the result the usual way. +pub fn tokenize(text: &str) -> ParsedTokens { + let mut parsed = ParsedTokens::default(); + tokenize_append(text, &mut parsed); + parsed } -/* -/// Returns `ParsedTokens` which are basically a pair `(Vec, Vec)` -/// This is just a shorthand for `tokenize(text).collect()` -pub fn tokenize_to_vec_with_errors(text: &str) -> ParsedTokens { - tokenize(text).collect() -} +/// Break a string up into its component tokens. +/// Returns `ParsedTokens` which are basically a pair `(Vec, Vec)`. +/// Beware that it checks for shebang first and its length contributes to resulting +/// tokens offsets. +pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) { + // non-empty string is a precondtion of `rustc_lexer::strip_shebang()`. + if text.is_empty() { + return; + } -/// The simplest version of tokenize, it just retunst a ready-made `Vec`. -/// It discards all tokenization errors while parsing. If you need that infromation -/// consider using `tokenize()` or `tokenize_to_vec_with_errors()`. -pub fn tokenize_to_vec(text: &str) -> Vec { - tokenize(text).map(|parsed_token| parsed_token.token).collect() -} -*/ + let mut offset: usize = rustc_lexer::strip_shebang(text) + .map(|shebang_len| { + parsed.tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) }); + shebang_len + }) + .unwrap_or(0); -/// Break a string up into its component tokens -/// This is the core function, all other `tokenize*()` functions are simply -/// handy shortcuts for this one. -pub fn tokenize(text: &str) -> impl Iterator + '_ { - let shebang = rustc_lexer::strip_shebang(text).map(|shebang_len| { - text = &text[shebang_len..]; - ParsedToken::new(Token::new(SHEBANG, TextUnit::from_usize(shebang_len)), None) - }); + let text_without_shebang = &text[offset..]; - // Notice that we eagerly evaluate shebang since it may change text slice - // and we cannot simplify this into a single method call chain - shebang.into_iter().chain(tokenize_without_shebang(text)) + for rustc_token in rustc_lexer::tokenize(text_without_shebang) { + parsed.push(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from_usize(offset))); + offset += rustc_token.len; + } } -pub fn tokenize_without_shebang(text: &str) -> impl Iterator + '_ { - rustc_lexer::tokenize(text).map(|rustc_token| { - let token_text = &text[..rustc_token.len]; - text = &text[rustc_token.len..]; - rustc_token_kind_to_parsed_token(&rustc_token.kind, token_text) - }) +/// Returns the first encountered token at the beginning of the string. +/// If the string contains zero or *two or more tokens* returns `None`. +/// +/// The main difference between `first_token()` and `single_token()` is that +/// the latter returns `None` if the string contains more than one token. +pub fn single_token(text: &str) -> Option { + first_token(text).filter(|parsed| parsed.token.len.to_usize() == text.len()) } -#[derive(Debug)] +/// Returns the first encountered token at the beginning of the string. +/// If the string contains zero tokens returns `None`. +/// +/// The main difference between `first_token() and single_token()` is that +/// the latter returns `None` if the string contains more than one token. +pub fn first_token(text: &str) -> Option { + // non-empty string is a precondtion of `rustc_lexer::first_token()`. + if text.is_empty() { + None + } else { + let rustc_token = rustc_lexer::first_token(text); + Some(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from(0))) + } +} + +/// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant. +/// It describes all the types of errors that may happen during the tokenization +/// of Rust source. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum TokenizeError { /// Base prefix was provided, but there were no digits /// after it, e.g. `0x`. @@ -124,94 +127,95 @@ pub enum TokenizeError { /// Raw byte string literal lacks trailing delimiter e.g. `"##` UnterminatedRawByteString, - /// Raw string lacks a quote after pound characters e.g. `r###` + /// Raw string lacks a quote after the pound characters e.g. `r###` UnstartedRawString, - /// Raw byte string lacks a quote after pound characters e.g. `br###` + /// Raw byte string lacks a quote after the pound characters e.g. `br###` UnstartedRawByteString, /// Lifetime starts with a number e.g. `'4ever` LifetimeStartsWithNumber, } -fn rustc_token_kind_to_parsed_token( - rustc_token_kind: &rustc_lexer::TokenKind, - token_text: &str, +/// Mapper function that converts `rustc_lexer::Token` with some additional context +/// to `ParsedToken` +fn rustc_token_to_parsed_token( + rustc_token: &rustc_lexer::Token, + text: &str, + token_start_offset: TextUnit, ) -> ParsedToken { - use rustc_lexer::TokenKind as TK; - use TokenizeError as TE; - // We drop some useful infromation here (see patterns with double dots `..`) // Storing that info in `SyntaxKind` is not possible due to its layout requirements of // being `u16` that come from `rowan::SyntaxKind` type and changes to `rowan::SyntaxKind` - // would mean hell of a rewrite. + // would mean hell of a rewrite - let (syntax_kind, error) = match *rustc_token_kind { - TK::LineComment => ok(COMMENT), - TK::BlockComment { terminated } => ok_if(terminated, COMMENT, TE::UnterminatedBlockComment), - TK::Whitespace => ok(WHITESPACE), - TK::Ident => ok(if token_text == "_" { - UNDERSCORE - } else { - SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) - }), - TK::RawIdent => ok(IDENT), - TK::Literal { kind, .. } => match_literal_kind(&kind), - TK::Lifetime { starts_with_number } => { - ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber) + let token_range = + TextRange::offset_len(token_start_offset, TextUnit::from_usize(rustc_token.len)); + + let token_text = &text[token_range]; + + let (syntax_kind, error) = { + use rustc_lexer::TokenKind as TK; + use TokenizeError as TE; + + match rustc_token.kind { + TK::LineComment => ok(COMMENT), + TK::BlockComment { terminated } => { + ok_if(terminated, COMMENT, TE::UnterminatedBlockComment) + } + TK::Whitespace => ok(WHITESPACE), + TK::Ident => ok(if token_text == "_" { + UNDERSCORE + } else { + SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) + }), + TK::RawIdent => ok(IDENT), + TK::Literal { kind, .. } => match_literal_kind(&kind), + TK::Lifetime { starts_with_number } => { + ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber) + } + TK::Semi => ok(SEMI), + TK::Comma => ok(COMMA), + TK::Dot => ok(DOT), + TK::OpenParen => ok(L_PAREN), + TK::CloseParen => ok(R_PAREN), + TK::OpenBrace => ok(L_CURLY), + TK::CloseBrace => ok(R_CURLY), + TK::OpenBracket => ok(L_BRACK), + TK::CloseBracket => ok(R_BRACK), + TK::At => ok(AT), + TK::Pound => ok(POUND), + TK::Tilde => ok(TILDE), + TK::Question => ok(QUESTION), + TK::Colon => ok(COLON), + TK::Dollar => ok(DOLLAR), + TK::Eq => ok(EQ), + TK::Not => ok(EXCL), + TK::Lt => ok(L_ANGLE), + TK::Gt => ok(R_ANGLE), + TK::Minus => ok(MINUS), + TK::And => ok(AMP), + TK::Or => ok(PIPE), + TK::Plus => ok(PLUS), + TK::Star => ok(STAR), + TK::Slash => ok(SLASH), + TK::Caret => ok(CARET), + TK::Percent => ok(PERCENT), + TK::Unknown => ok(ERROR), } - TK::Semi => ok(SEMI), - TK::Comma => ok(COMMA), - TK::Dot => ok(DOT), - TK::OpenParen => ok(L_PAREN), - TK::CloseParen => ok(R_PAREN), - TK::OpenBrace => ok(L_CURLY), - TK::CloseBrace => ok(R_CURLY), - TK::OpenBracket => ok(L_BRACK), - TK::CloseBracket => ok(R_BRACK), - TK::At => ok(AT), - TK::Pound => ok(POUND), - TK::Tilde => ok(TILDE), - TK::Question => ok(QUESTION), - TK::Colon => ok(COLON), - TK::Dollar => ok(DOLLAR), - TK::Eq => ok(EQ), - TK::Not => ok(EXCL), - TK::Lt => ok(L_ANGLE), - TK::Gt => ok(R_ANGLE), - TK::Minus => ok(MINUS), - TK::And => ok(AMP), - TK::Or => ok(PIPE), - TK::Plus => ok(PLUS), - TK::Star => ok(STAR), - TK::Slash => ok(SLASH), - TK::Caret => ok(CARET), - TK::Percent => ok(PERCENT), - TK::Unknown => ok(ERROR), }; - return ParsedToken::new( - Token::new(syntax_kind, TextUnit::from_usize(token_text.len())), - error, - ); + return ParsedToken { + token: Token { kind: syntax_kind, len: token_range.len() }, + error: error + .map(|error| SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)), + }; type ParsedSyntaxKind = (SyntaxKind, Option); - const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind { - (syntax_kind, None) - } - const fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { - if cond { - ok(syntax_kind) - } else { - err(syntax_kind, error) - } - } - const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { - (syntax_kind, Some(error)) - } - - const fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind { + fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind { use rustc_lexer::LiteralKind as LK; + use TokenizeError as TE; + match *kind { LK::Int { empty_int, .. } => ok_if(!empty_int, INT_NUMBER, TE::EmptyInt), LK::Float { empty_exponent, .. } => { @@ -237,27 +241,17 @@ fn rustc_token_kind_to_parsed_token( } } } -} - -pub fn first_token(text: &str) -> Option { - // Checking for emptyness because of `rustc_lexer::first_token()` invariant (see its body) - if text.is_empty() { - None - } else { - let rustc_token = rustc_lexer::first_token(text); - Some(rustc_token_kind_to_parsed_token(&rustc_token.kind, &text[..rustc_token.len])) + const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind { + (syntax_kind, None) } -} - -// TODO: think what to do with this ad hoc function -pub fn classify_literal(text: &str) -> Option { - let t = rustc_lexer::first_token(text); - if t.len != text.len() { - return None; + const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { + (syntax_kind, Some(error)) + } + fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { + if cond { + ok(syntax_kind) + } else { + err(syntax_kind, error) + } } - let kind = match t.kind { - rustc_lexer::TokenKind::Literal { kind, .. } => match_literal_kind(kind), - _ => return None, - }; - Some(ParsedToken::new(Token::new(kind, TextUnit::from_usize(t.len)))) } diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 3abc09877..ad1a7c855 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -12,7 +12,7 @@ use ra_text_edit::AtomTextEdit; use crate::{ algo, parsing::{ - lexer::{tokenize, Token}, + lexer::{single_token, tokenize, ParsedTokens, Token}, text_token_source::TextTokenSource, text_tree_sink::TextTreeSink, }, @@ -41,36 +41,42 @@ fn reparse_token<'node>( root: &'node SyntaxNode, edit: &AtomTextEdit, ) -> Option<(GreenNode, TextRange)> { - let token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); - match token.kind() { + let prev_token = algo::find_covering_element(root, edit.delete).as_token()?.clone(); + let prev_token_kind = prev_token.kind(); + match prev_token_kind { WHITESPACE | COMMENT | IDENT | STRING | RAW_STRING => { - if token.kind() == WHITESPACE || token.kind() == COMMENT { + if prev_token_kind == WHITESPACE || prev_token_kind == COMMENT { // removing a new line may extends previous token - if token.text()[edit.delete - token.text_range().start()].contains('\n') { + let deleted_range = edit.delete - prev_token.text_range().start(); + if prev_token.text()[deleted_range].contains('\n') { return None; } } - let text = get_text_after_edit(token.clone().into(), &edit); - let lex_tokens = tokenize(&text); - let lex_token = match lex_tokens[..] { - [lex_token] if lex_token.kind == token.kind() => lex_token, - _ => return None, - }; + let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); + let new_token_kind = single_token(&new_text)?.token.kind; - if lex_token.kind == IDENT && is_contextual_kw(&text) { + if new_token_kind != prev_token_kind + || (new_token_kind == IDENT && is_contextual_kw(&new_text)) + { return None; } - if let Some(next_char) = root.text().char_at(token.text_range().end()) { - let tokens_with_next_char = tokenize(&format!("{}{}", text, next_char)); - if tokens_with_next_char.len() == 1 { + // Check that edited token is not a part of the bigger token. + // E.g. if for source code `bruh"str"` the user removed `ruh`, then + // `b` no longer remains an identifier, but becomes a part of byte string literal + if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { + new_text.push(next_char); + let token_with_next_char = single_token(&new_text); + if token_with_next_char.is_some() { return None; } + new_text.pop(); } - let new_token = GreenToken::new(rowan::SyntaxKind(token.kind().into()), text.into()); - Some((token.replace_with(new_token), token.text_range())) + let new_token = + GreenToken::new(rowan::SyntaxKind(prev_token_kind.into()), new_text.into()); + Some((prev_token.replace_with(new_token), prev_token.text_range())) } _ => None, } @@ -82,12 +88,12 @@ fn reparse_block<'node>( ) -> Option<(GreenNode, Vec, TextRange)> { let (node, reparser) = find_reparsable_node(root, edit.delete)?; let text = get_text_after_edit(node.clone().into(), &edit); - let tokens = tokenize(&text); + let ParsedTokens { tokens, errors } = tokenize(&text); if !is_balanced(&tokens) { return None; } let mut token_source = TextTokenSource::new(&text, &tokens); - let mut tree_sink = TextTreeSink::new(&text, &tokens); + let mut tree_sink = TextTreeSink::new(&text, &tokens, errors); reparser.parse(&mut token_source, &mut tree_sink); let (green, new_errors) = tree_sink.finish(); Some((node.replace_with(green), new_errors, node.text_range())) @@ -96,6 +102,9 @@ fn reparse_block<'node>( fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String { let edit = AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone()); + + // Note: we could move this match to a method or even further: use enum_dispatch crate + // https://crates.io/crates/enum_dispatch let text = match element { NodeOrToken::Token(token) => token.text().to_string(), NodeOrToken::Node(node) => node.text().to_string(), @@ -112,6 +121,9 @@ fn is_contextual_kw(text: &str) -> bool { fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { let node = algo::find_covering_element(node, range); + + // Note: we could move this match to a method or even further: use enum_dispatch crate + // https://crates.io/crates/enum_dispatch let mut ancestors = match node { NodeOrToken::Token(it) => it.parent().ancestors(), NodeOrToken::Node(it) => it.ancestors(), @@ -181,6 +193,8 @@ mod tests { let fully_reparsed = SourceFile::parse(&after); let incrementally_reparsed: Parse = { let f = SourceFile::parse(&before); + // FIXME: it seems this initialization statement is unnecessary (see edit in outer scope) + // Investigate whether it should really be removed. let edit = AtomTextEdit { delete: range, insert: replace_with.to_string() }; let (green, new_errors, range) = incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap(); diff --git a/crates/ra_syntax/src/parsing/text_tree_sink.rs b/crates/ra_syntax/src/parsing/text_tree_sink.rs index c36756d6c..5faac588b 100644 --- a/crates/ra_syntax/src/parsing/text_tree_sink.rs +++ b/crates/ra_syntax/src/parsing/text_tree_sink.rs @@ -92,14 +92,14 @@ impl<'a> TreeSink for TextTreeSink<'a> { } impl<'a> TextTreeSink<'a> { - pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> TextTreeSink<'a> { - TextTreeSink { + pub(super) fn new(text: &'a str, tokens: &'a [Token], errors: Vec) -> Self { + Self { text, tokens, text_pos: 0.into(), token_pos: 0, state: State::PendingStart, - inner: SyntaxTreeBuilder::default(), + inner: SyntaxTreeBuilder::new(errors), } } diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs index 9122dda29..af18a30f2 100644 --- a/crates/ra_syntax/src/syntax_error.rs +++ b/crates/ra_syntax/src/syntax_error.rs @@ -84,6 +84,9 @@ pub enum SyntaxErrorKind { ParseError(ParseError), EscapeError(EscapeError), TokenizeError(TokenizeError), + // FIXME: the obvious pattern of this enum dictates that the following enum variants + // should be wrapped into something like `SemmanticError(SemmanticError)` + // or `ValidateError(ValidateError)` or `SemmanticValidateError(...)` InvalidBlockAttr, InvalidMatchInnerAttr, InvalidTupleIndexFormat, @@ -106,6 +109,7 @@ impl fmt::Display for SyntaxErrorKind { } ParseError(msg) => write!(f, "{}", msg.0), EscapeError(err) => write!(f, "{}", err), + TokenizeError(err) => write!(f, "{}", err), VisibilityNotAllowed => { write!(f, "unnecessary visibility qualifier") } @@ -116,6 +120,44 @@ impl fmt::Display for SyntaxErrorKind { } } +impl fmt::Display for TokenizeError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let msg = match self { + TokenizeError::EmptyInt => "Missing digits after integer base prefix", + TokenizeError::EmptyExponent => "Missing digits after the exponent symbol", + TokenizeError::UnterminatedBlockComment => { + "Missing trailing `*/` symbols to terminate the block comment" + } + TokenizeError::UnterminatedChar => { + "Missing trailing `'` symbol to terminate the character literal" + } + TokenizeError::UnterminatedByte => { + "Missing trailing `'` symbol to terminate the byte literal" + } + TokenizeError::UnterminatedString => { + "Missing trailing `\"` symbol to terminate the string literal" + } + TokenizeError::UnterminatedByteString => { + "Missing trailing `\"` symbol to terminate the byte string literal" + } + TokenizeError::UnterminatedRawString => { + "Missing trailing `\"` with `#` symbols to terminate the raw string literal" + } + TokenizeError::UnterminatedRawByteString => { + "Missing trailing `\"` with `#` symbols to terminate the raw byte string literal" + } + TokenizeError::UnstartedRawString => { + "Missing `\"` symbol after `#` symbols to begin the raw string literal" + } + TokenizeError::UnstartedRawByteString => { + "Missing `\"` symbol after `#` symbols to begin the raw byte string literal" + } + TokenizeError::LifetimeStartsWithNumber => "Lifetime name cannot start with a number", + }; + write!(f, "{}", msg) + } +} + impl fmt::Display for EscapeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let msg = match self { diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs index b3eb5da63..591855302 100644 --- a/crates/ra_syntax/src/syntax_node.rs +++ b/crates/ra_syntax/src/syntax_node.rs @@ -4,7 +4,7 @@ //! `SyntaxNode`, and a basic traversal API (parent, children, siblings). //! //! The *real* implementation is in the (language-agnostic) `rowan` crate, this -//! modules just wraps its API. +//! module just wraps its API. use ra_parser::ParseError; use rowan::{GreenNodeBuilder, Language}; @@ -38,14 +38,15 @@ pub type SyntaxElementChildren = rowan::SyntaxElementChildren; pub use rowan::{Direction, NodeOrToken}; +#[derive(Default)] pub struct SyntaxTreeBuilder { errors: Vec, inner: GreenNodeBuilder<'static>, } -impl Default for SyntaxTreeBuilder { - fn default() -> SyntaxTreeBuilder { - SyntaxTreeBuilder { errors: Vec::new(), inner: GreenNodeBuilder::new() } +impl SyntaxTreeBuilder { + pub fn new(errors: Vec) -> Self { + Self { errors, inner: GreenNodeBuilder::default() } } } diff --git a/crates/ra_syntax/src/tests.rs b/crates/ra_syntax/src/tests.rs index 458920607..df21c957c 100644 --- a/crates/ra_syntax/src/tests.rs +++ b/crates/ra_syntax/src/tests.rs @@ -10,7 +10,8 @@ use crate::{fuzz, SourceFile}; #[test] fn lexer_tests() { dir_tests(&test_data_dir(), &["lexer"], |text, _| { - let tokens = crate::tokenize(text); + // FIXME: add tests for errors (their format is up to discussion) + let tokens = crate::tokenize(text).tokens; dump_tokens(&tokens, text) }) } -- cgit v1.2.3 From a2bc4c2a7453f2e72df6f2e6c3273d6b3f0114a9 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Sun, 26 Jan 2020 22:32:23 +0200 Subject: ra_syntax: fixed doc comment --- crates/ra_syntax/src/parsing/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 6d96f8400..9f321cd06 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -53,7 +53,7 @@ pub fn tokenize(text: &str) -> ParsedTokens { } /// Break a string up into its component tokens. -/// Returns `ParsedTokens` which are basically a pair `(Vec, Vec)`. +/// Writes to `ParsedTokens` which are basically a pair `(Vec, Vec)`. /// Beware that it checks for shebang first and its length contributes to resulting /// tokens offsets. pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) { -- cgit v1.2.3 From ffe00631d5823070d6ab9e92ae7cfb5dcb04200d Mon Sep 17 00:00:00 2001 From: Veetaha Date: Sun, 26 Jan 2020 22:33:09 +0200 Subject: ra_syntax: moved ParsedToken derive attribute under the doc comment --- crates/ra_syntax/src/parsing/lexer.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 9f321cd06..7cd7110de 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -16,8 +16,8 @@ pub struct Token { pub len: TextUnit, } -#[derive(Debug)] /// Represents the result of parsing one token. Beware that the token may be malformed. +#[derive(Debug)] pub struct ParsedToken { /// Parsed token. pub token: Token, -- cgit v1.2.3 From c6d0881382548da9e6f8a8362306e7c3948b84b8 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Mon, 27 Jan 2020 01:38:16 +0200 Subject: add better docs for tokenize errors --- crates/ra_syntax/src/parsing/lexer.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 7cd7110de..bf6b4d637 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -107,9 +107,9 @@ pub fn first_token(text: &str) -> Option { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum TokenizeError { /// Base prefix was provided, but there were no digits - /// after it, e.g. `0x`. + /// after it, e.g. `0x`, `0b`. EmptyInt, - /// Float exponent lacks digits e.g. `e+`, `E+`, `e-`, `E-`, + /// Float exponent lacks digits e.g. `12.34e+`, `12.3E+`, `12e-`, `1_E-`, EmptyExponent, /// Block comment lacks trailing delimiter `*/` -- cgit v1.2.3 From bf60661aa3e2a77fedb3e1627675842d05538860 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Mon, 27 Jan 2020 01:41:48 +0200 Subject: ra_syntax: remove backticks from TokenizeError message since that is not Markdown ;( --- crates/ra_syntax/src/syntax_error.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs index af18a30f2..45e11f404 100644 --- a/crates/ra_syntax/src/syntax_error.rs +++ b/crates/ra_syntax/src/syntax_error.rs @@ -126,31 +126,31 @@ impl fmt::Display for TokenizeError { TokenizeError::EmptyInt => "Missing digits after integer base prefix", TokenizeError::EmptyExponent => "Missing digits after the exponent symbol", TokenizeError::UnterminatedBlockComment => { - "Missing trailing `*/` symbols to terminate the block comment" + "Missing trailing */ symbols to terminate the block comment" } TokenizeError::UnterminatedChar => { - "Missing trailing `'` symbol to terminate the character literal" + "Missing trailing ' symbol to terminate the character literal" } TokenizeError::UnterminatedByte => { - "Missing trailing `'` symbol to terminate the byte literal" + "Missing trailing ' symbol to terminate the byte literal" } TokenizeError::UnterminatedString => { - "Missing trailing `\"` symbol to terminate the string literal" + "Missing trailing \" symbol to terminate the string literal" } TokenizeError::UnterminatedByteString => { - "Missing trailing `\"` symbol to terminate the byte string literal" + "Missing trailing \" symbol to terminate the byte string literal" } TokenizeError::UnterminatedRawString => { - "Missing trailing `\"` with `#` symbols to terminate the raw string literal" + "Missing trailing \" with # symbols to terminate the raw string literal" } TokenizeError::UnterminatedRawByteString => { - "Missing trailing `\"` with `#` symbols to terminate the raw byte string literal" + "Missing trailing \" with # symbols to terminate the raw byte string literal" } TokenizeError::UnstartedRawString => { - "Missing `\"` symbol after `#` symbols to begin the raw string literal" + "Missing \" symbol after # symbols to begin the raw string literal" } TokenizeError::UnstartedRawByteString => { - "Missing `\"` symbol after `#` symbols to begin the raw byte string literal" + "Missing \" symbol after # symbols to begin the raw byte string literal" } TokenizeError::LifetimeStartsWithNumber => "Lifetime name cannot start with a number", }; -- cgit v1.2.3 From 9e7eaa959f9dc368a55f1a80b35651b78b3d0883 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Tue, 28 Jan 2020 07:09:13 +0200 Subject: ra_syntax: refactored the lexer design as per @matklad and @kiljacken PR review --- crates/ra_ide/src/references/rename.rs | 6 +- crates/ra_mbe/src/subtree_source.rs | 5 +- crates/ra_syntax/src/lib.rs | 4 +- crates/ra_syntax/src/parsing.rs | 12 +- crates/ra_syntax/src/parsing/lexer.rs | 313 +++++++++++++------------ crates/ra_syntax/src/parsing/reparsing.rs | 25 +- crates/ra_syntax/src/parsing/text_tree_sink.rs | 4 +- crates/ra_syntax/src/syntax_node.rs | 6 - crates/ra_syntax/src/tests.rs | 2 +- 9 files changed, 199 insertions(+), 178 deletions(-) diff --git a/crates/ra_ide/src/references/rename.rs b/crates/ra_ide/src/references/rename.rs index ad3e86f7c..9a84c1c88 100644 --- a/crates/ra_ide/src/references/rename.rs +++ b/crates/ra_ide/src/references/rename.rs @@ -2,7 +2,9 @@ use hir::ModuleSource; use ra_db::{RelativePath, RelativePathBuf, SourceDatabase, SourceDatabaseExt}; -use ra_syntax::{algo::find_node_at_offset, ast, single_token, AstNode, SyntaxKind, SyntaxNode}; +use ra_syntax::{ + algo::find_node_at_offset, ast, lex_single_valid_syntax_kind, AstNode, SyntaxKind, SyntaxNode, +}; use ra_text_edit::TextEdit; use crate::{ @@ -17,7 +19,7 @@ pub(crate) fn rename( position: FilePosition, new_name: &str, ) -> Option> { - match single_token(new_name)?.token.kind { + match lex_single_valid_syntax_kind(new_name)? { SyntaxKind::IDENT | SyntaxKind::UNDERSCORE => (), _ => return None, } diff --git a/crates/ra_mbe/src/subtree_source.rs b/crates/ra_mbe/src/subtree_source.rs index 72ac8df03..c9f42b3dd 100644 --- a/crates/ra_mbe/src/subtree_source.rs +++ b/crates/ra_mbe/src/subtree_source.rs @@ -1,7 +1,7 @@ //! FIXME: write short doc here use ra_parser::{Token, TokenSource}; -use ra_syntax::{single_token, SmolStr, SyntaxKind, SyntaxKind::*, T}; +use ra_syntax::{lex_single_valid_syntax_kind, SmolStr, SyntaxKind, SyntaxKind::*, T}; use std::cell::{Cell, Ref, RefCell}; use tt::buffer::{Cursor, TokenBuffer}; @@ -129,8 +129,7 @@ fn convert_delim(d: Option, closing: bool) -> TtToken { } fn convert_literal(l: &tt::Literal) -> TtToken { - let kind = single_token(&l.text) - .map(|parsed| parsed.token.kind) + let kind = lex_single_valid_syntax_kind(&l.text) .filter(|kind| kind.is_literal()) .unwrap_or_else(|| match l.text.as_ref() { "true" => T![true], diff --git a/crates/ra_syntax/src/lib.rs b/crates/ra_syntax/src/lib.rs index 80b3a0b22..f8f4b64c1 100644 --- a/crates/ra_syntax/src/lib.rs +++ b/crates/ra_syntax/src/lib.rs @@ -41,7 +41,9 @@ use crate::syntax_node::GreenNode; pub use crate::{ algo::InsertPosition, ast::{AstNode, AstToken}, - parsing::{first_token, single_token, tokenize, tokenize_append, Token, TokenizeError}, + parsing::{ + lex_single_syntax_kind, lex_single_valid_syntax_kind, tokenize, Token, TokenizeError, + }, ptr::{AstPtr, SyntaxNodePtr}, syntax_error::{Location, SyntaxError, SyntaxErrorKind}, syntax_node::{ diff --git a/crates/ra_syntax/src/parsing.rs b/crates/ra_syntax/src/parsing.rs index 4e51f920b..e5eb80850 100644 --- a/crates/ra_syntax/src/parsing.rs +++ b/crates/ra_syntax/src/parsing.rs @@ -15,9 +15,15 @@ pub use lexer::*; pub(crate) use self::reparsing::incremental_reparse; pub(crate) fn parse_text(text: &str) -> (GreenNode, Vec) { - let ParsedTokens { tokens, errors } = tokenize(&text); + let (tokens, lexer_errors) = tokenize(&text); + let mut token_source = TextTokenSource::new(text, &tokens); - let mut tree_sink = TextTreeSink::new(text, &tokens, errors); + let mut tree_sink = TextTreeSink::new(text, &tokens); + ra_parser::parse(&mut token_source, &mut tree_sink); - tree_sink.finish() + + let (tree, mut parser_errors) = tree_sink.finish(); + parser_errors.extend(lexer_errors); + + (tree, parser_errors) } diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index bf6b4d637..55755be18 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -16,55 +16,21 @@ pub struct Token { pub len: TextUnit, } -/// Represents the result of parsing one token. Beware that the token may be malformed. -#[derive(Debug)] -pub struct ParsedToken { - /// Parsed token. - pub token: Token, - /// If error is present then parsed token is malformed. - pub error: Option, -} - -#[derive(Debug, Default)] -/// Represents the result of parsing source code of Rust language. -pub struct ParsedTokens { - /// Parsed tokens in order they appear in source code. - pub tokens: Vec, - /// Collection of all occured tokenization errors. - /// In general `self.errors.len() <= self.tokens.len()` - pub errors: Vec, -} -impl ParsedTokens { - /// Append `token` and `error` (if pressent) to the result. - pub fn push(&mut self, ParsedToken { token, error }: ParsedToken) { - self.tokens.push(token); - if let Some(error) = error { - self.errors.push(error) - } - } -} - -/// Same as `tokenize_append()`, just a shortcut for creating `ParsedTokens` -/// and returning the result the usual way. -pub fn tokenize(text: &str) -> ParsedTokens { - let mut parsed = ParsedTokens::default(); - tokenize_append(text, &mut parsed); - parsed -} - /// Break a string up into its component tokens. -/// Writes to `ParsedTokens` which are basically a pair `(Vec, Vec)`. /// Beware that it checks for shebang first and its length contributes to resulting /// tokens offsets. -pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) { +pub fn tokenize(text: &str) -> (Vec, Vec) { // non-empty string is a precondtion of `rustc_lexer::strip_shebang()`. if text.is_empty() { - return; + return Default::default(); } + let mut tokens = Vec::new(); + let mut errors = Vec::new(); + let mut offset: usize = rustc_lexer::strip_shebang(text) .map(|shebang_len| { - parsed.tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) }); + tokens.push(Token { kind: SHEBANG, len: TextUnit::from_usize(shebang_len) }); shebang_len }) .unwrap_or(0); @@ -72,35 +38,76 @@ pub fn tokenize_append(text: &str, parsed: &mut ParsedTokens) { let text_without_shebang = &text[offset..]; for rustc_token in rustc_lexer::tokenize(text_without_shebang) { - parsed.push(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from_usize(offset))); + let token_len = TextUnit::from_usize(rustc_token.len); + let token_range = TextRange::offset_len(TextUnit::from_usize(offset), token_len); + + let (syntax_kind, error) = + rustc_token_kind_to_syntax_kind(&rustc_token.kind, &text[token_range]); + + tokens.push(Token { kind: syntax_kind, len: token_len }); + + if let Some(error) = error { + errors.push(SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)); + } + offset += rustc_token.len; } + + (tokens, errors) } -/// Returns the first encountered token at the beginning of the string. -/// If the string contains zero or *two or more tokens* returns `None`. +/// Returns `SyntaxKind` and `Option` of the first token +/// encountered at the beginning of the string. +/// +/// Returns `None` if the string contains zero *or two or more* tokens. +/// The token is malformed if the returned error is not `None`. +/// +/// Beware that unescape errors are not checked at tokenization time. +pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option)> { + first_token(text) + .filter(|(token, _)| token.len.to_usize() == text.len()) + .map(|(token, error)| (token.kind, error)) +} + +/// The same as `single_syntax_kind()` but returns only `SyntaxKind` and +/// returns `None` if any tokenization error occured. /// -/// The main difference between `first_token()` and `single_token()` is that -/// the latter returns `None` if the string contains more than one token. -pub fn single_token(text: &str) -> Option { - first_token(text).filter(|parsed| parsed.token.len.to_usize() == text.len()) +/// Beware that unescape errors are not checked at tokenization time. +pub fn lex_single_valid_syntax_kind(text: &str) -> Option { + first_token(text) + .filter(|(token, error)| !error.is_some() && token.len.to_usize() == text.len()) + .map(|(token, _error)| token.kind) } /// Returns the first encountered token at the beginning of the string. -/// If the string contains zero tokens returns `None`. /// -/// The main difference between `first_token() and single_token()` is that -/// the latter returns `None` if the string contains more than one token. -pub fn first_token(text: &str) -> Option { +/// Returns `None` if the string contains zero tokens or if the token was parsed +/// with an error. +/// +/// Beware that unescape errors are not checked at tokenization time. +fn first_token(text: &str) -> Option<(Token, Option)> { // non-empty string is a precondtion of `rustc_lexer::first_token()`. if text.is_empty() { - None - } else { - let rustc_token = rustc_lexer::first_token(text); - Some(rustc_token_to_parsed_token(&rustc_token, text, TextUnit::from(0))) + return None; } + + let rustc_token = rustc_lexer::first_token(text); + let (syntax_kind, error) = rustc_token_kind_to_syntax_kind(&rustc_token.kind, text); + + let token = Token { kind: syntax_kind, len: TextUnit::from_usize(rustc_token.len) }; + let error = error.map(|error| { + SyntaxError::new( + SyntaxErrorKind::TokenizeError(error), + TextRange::from_to(TextUnit::from(0), TextUnit::of_str(text)), + ) + }); + + Some((token, error)) } +// FIXME: simplify TokenizeError to `SyntaxError(String, TextRange)` as per @matklad advice: +// https://github.com/rust-analyzer/rust-analyzer/pull/2911/files#r371175067 + /// Describes the values of `SyntaxErrorKind::TokenizeError` enum variant. /// It describes all the types of errors that may happen during the tokenization /// of Rust source. @@ -136,122 +143,132 @@ pub enum TokenizeError { LifetimeStartsWithNumber, } -/// Mapper function that converts `rustc_lexer::Token` with some additional context -/// to `ParsedToken` -fn rustc_token_to_parsed_token( - rustc_token: &rustc_lexer::Token, - text: &str, - token_start_offset: TextUnit, -) -> ParsedToken { +fn rustc_token_kind_to_syntax_kind( + rustc_token_kind: &rustc_lexer::TokenKind, + token_text: &str, +) -> (SyntaxKind, Option) { + // A note on an intended tradeoff: // We drop some useful infromation here (see patterns with double dots `..`) // Storing that info in `SyntaxKind` is not possible due to its layout requirements of - // being `u16` that come from `rowan::SyntaxKind` type and changes to `rowan::SyntaxKind` - // would mean hell of a rewrite + // being `u16` that come from `rowan::SyntaxKind`. - let token_range = - TextRange::offset_len(token_start_offset, TextUnit::from_usize(rustc_token.len)); - - let token_text = &text[token_range]; - - let (syntax_kind, error) = { + let syntax_kind = { use rustc_lexer::TokenKind as TK; use TokenizeError as TE; - match rustc_token.kind { - TK::LineComment => ok(COMMENT), - TK::BlockComment { terminated } => { - ok_if(terminated, COMMENT, TE::UnterminatedBlockComment) + match rustc_token_kind { + TK::LineComment => COMMENT, + + TK::BlockComment { terminated: true } => COMMENT, + TK::BlockComment { terminated: false } => { + return (COMMENT, Some(TE::UnterminatedBlockComment)); } - TK::Whitespace => ok(WHITESPACE), - TK::Ident => ok(if token_text == "_" { - UNDERSCORE - } else { - SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) - }), - TK::RawIdent => ok(IDENT), - TK::Literal { kind, .. } => match_literal_kind(&kind), - TK::Lifetime { starts_with_number } => { - ok_if(!starts_with_number, LIFETIME, TE::LifetimeStartsWithNumber) + + TK::Whitespace => WHITESPACE, + + TK::Ident => { + if token_text == "_" { + UNDERSCORE + } else { + SyntaxKind::from_keyword(token_text).unwrap_or(IDENT) + } } - TK::Semi => ok(SEMI), - TK::Comma => ok(COMMA), - TK::Dot => ok(DOT), - TK::OpenParen => ok(L_PAREN), - TK::CloseParen => ok(R_PAREN), - TK::OpenBrace => ok(L_CURLY), - TK::CloseBrace => ok(R_CURLY), - TK::OpenBracket => ok(L_BRACK), - TK::CloseBracket => ok(R_BRACK), - TK::At => ok(AT), - TK::Pound => ok(POUND), - TK::Tilde => ok(TILDE), - TK::Question => ok(QUESTION), - TK::Colon => ok(COLON), - TK::Dollar => ok(DOLLAR), - TK::Eq => ok(EQ), - TK::Not => ok(EXCL), - TK::Lt => ok(L_ANGLE), - TK::Gt => ok(R_ANGLE), - TK::Minus => ok(MINUS), - TK::And => ok(AMP), - TK::Or => ok(PIPE), - TK::Plus => ok(PLUS), - TK::Star => ok(STAR), - TK::Slash => ok(SLASH), - TK::Caret => ok(CARET), - TK::Percent => ok(PERCENT), - TK::Unknown => ok(ERROR), - } - }; - return ParsedToken { - token: Token { kind: syntax_kind, len: token_range.len() }, - error: error - .map(|error| SyntaxError::new(SyntaxErrorKind::TokenizeError(error), token_range)), + TK::RawIdent => IDENT, + TK::Literal { kind, .. } => return match_literal_kind(&kind), + + TK::Lifetime { starts_with_number: false } => LIFETIME, + TK::Lifetime { starts_with_number: true } => { + return (LIFETIME, Some(TE::LifetimeStartsWithNumber)) + } + + TK::Semi => SEMI, + TK::Comma => COMMA, + TK::Dot => DOT, + TK::OpenParen => L_PAREN, + TK::CloseParen => R_PAREN, + TK::OpenBrace => L_CURLY, + TK::CloseBrace => R_CURLY, + TK::OpenBracket => L_BRACK, + TK::CloseBracket => R_BRACK, + TK::At => AT, + TK::Pound => POUND, + TK::Tilde => TILDE, + TK::Question => QUESTION, + TK::Colon => COLON, + TK::Dollar => DOLLAR, + TK::Eq => EQ, + TK::Not => EXCL, + TK::Lt => L_ANGLE, + TK::Gt => R_ANGLE, + TK::Minus => MINUS, + TK::And => AMP, + TK::Or => PIPE, + TK::Plus => PLUS, + TK::Star => STAR, + TK::Slash => SLASH, + TK::Caret => CARET, + TK::Percent => PERCENT, + TK::Unknown => ERROR, + } }; - type ParsedSyntaxKind = (SyntaxKind, Option); + return (syntax_kind, None); - fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> ParsedSyntaxKind { + fn match_literal_kind(kind: &rustc_lexer::LiteralKind) -> (SyntaxKind, Option) { use rustc_lexer::LiteralKind as LK; use TokenizeError as TE; - match *kind { - LK::Int { empty_int, .. } => ok_if(!empty_int, INT_NUMBER, TE::EmptyInt), - LK::Float { empty_exponent, .. } => { - ok_if(!empty_exponent, FLOAT_NUMBER, TE::EmptyExponent) + #[rustfmt::skip] + let syntax_kind = match *kind { + LK::Int { empty_int: false, .. } => INT_NUMBER, + LK::Int { empty_int: true, .. } => { + return (INT_NUMBER, Some(TE::EmptyInt)) + } + + LK::Float { empty_exponent: false, .. } => FLOAT_NUMBER, + LK::Float { empty_exponent: true, .. } => { + return (FLOAT_NUMBER, Some(TE::EmptyExponent)) + } + + LK::Char { terminated: true } => CHAR, + LK::Char { terminated: false } => { + return (CHAR, Some(TE::UnterminatedChar)) + } + + LK::Byte { terminated: true } => BYTE, + LK::Byte { terminated: false } => { + return (BYTE, Some(TE::UnterminatedByte)) } - LK::Char { terminated } => ok_if(terminated, CHAR, TE::UnterminatedChar), - LK::Byte { terminated } => ok_if(terminated, BYTE, TE::UnterminatedByte), - LK::Str { terminated } => ok_if(terminated, STRING, TE::UnterminatedString), - LK::ByteStr { terminated } => { - ok_if(terminated, BYTE_STRING, TE::UnterminatedByteString) + + LK::Str { terminated: true } => STRING, + LK::Str { terminated: false } => { + return (STRING, Some(TE::UnterminatedString)) + } + + + LK::ByteStr { terminated: true } => BYTE_STRING, + LK::ByteStr { terminated: false } => { + return (BYTE_STRING, Some(TE::UnterminatedByteString)) } - LK::RawStr { started: true, terminated, .. } => { - ok_if(terminated, RAW_STRING, TE::UnterminatedRawString) + LK::RawStr { started: true, terminated: true, .. } => RAW_STRING, + LK::RawStr { started: true, terminated: false, .. } => { + return (RAW_STRING, Some(TE::UnterminatedRawString)) + } + LK::RawStr { started: false, .. } => { + return (RAW_STRING, Some(TE::UnstartedRawString)) } - LK::RawStr { started: false, .. } => err(RAW_STRING, TE::UnstartedRawString), - LK::RawByteStr { started: true, terminated, .. } => { - ok_if(terminated, RAW_BYTE_STRING, TE::UnterminatedRawByteString) + LK::RawByteStr { started: true, terminated: true, .. } => RAW_BYTE_STRING, + LK::RawByteStr { started: true, terminated: false, .. } => { + return (RAW_BYTE_STRING, Some(TE::UnterminatedRawByteString)) } LK::RawByteStr { started: false, .. } => { - err(RAW_BYTE_STRING, TE::UnstartedRawByteString) + return (RAW_BYTE_STRING, Some(TE::UnstartedRawByteString)) } - } - } - const fn ok(syntax_kind: SyntaxKind) -> ParsedSyntaxKind { + }; + (syntax_kind, None) } - const fn err(syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { - (syntax_kind, Some(error)) - } - fn ok_if(cond: bool, syntax_kind: SyntaxKind, error: TokenizeError) -> ParsedSyntaxKind { - if cond { - ok(syntax_kind) - } else { - err(syntax_kind, error) - } - } } diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index ad1a7c855..1f351e9fc 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -12,7 +12,7 @@ use ra_text_edit::AtomTextEdit; use crate::{ algo, parsing::{ - lexer::{single_token, tokenize, ParsedTokens, Token}, + lexer::{lex_single_syntax_kind, tokenize, Token}, text_token_source::TextTokenSource, text_tree_sink::TextTreeSink, }, @@ -54,7 +54,7 @@ fn reparse_token<'node>( } let mut new_text = get_text_after_edit(prev_token.clone().into(), &edit); - let new_token_kind = single_token(&new_text)?.token.kind; + let (new_token_kind, _error) = lex_single_syntax_kind(&new_text)?; if new_token_kind != prev_token_kind || (new_token_kind == IDENT && is_contextual_kw(&new_text)) @@ -67,8 +67,8 @@ fn reparse_token<'node>( // `b` no longer remains an identifier, but becomes a part of byte string literal if let Some(next_char) = root.text().char_at(prev_token.text_range().end()) { new_text.push(next_char); - let token_with_next_char = single_token(&new_text); - if token_with_next_char.is_some() { + let token_with_next_char = lex_single_syntax_kind(&new_text); + if let Some((_kind, _error)) = token_with_next_char { return None; } new_text.pop(); @@ -88,23 +88,26 @@ fn reparse_block<'node>( ) -> Option<(GreenNode, Vec, TextRange)> { let (node, reparser) = find_reparsable_node(root, edit.delete)?; let text = get_text_after_edit(node.clone().into(), &edit); - let ParsedTokens { tokens, errors } = tokenize(&text); + + let (tokens, new_lexer_errors) = tokenize(&text); if !is_balanced(&tokens) { return None; } + let mut token_source = TextTokenSource::new(&text, &tokens); - let mut tree_sink = TextTreeSink::new(&text, &tokens, errors); + let mut tree_sink = TextTreeSink::new(&text, &tokens); reparser.parse(&mut token_source, &mut tree_sink); - let (green, new_errors) = tree_sink.finish(); - Some((node.replace_with(green), new_errors, node.text_range())) + + let (green, mut new_parser_errors) = tree_sink.finish(); + new_parser_errors.extend(new_lexer_errors); + + Some((node.replace_with(green), new_parser_errors, node.text_range())) } fn get_text_after_edit(element: SyntaxElement, edit: &AtomTextEdit) -> String { let edit = AtomTextEdit::replace(edit.delete - element.text_range().start(), edit.insert.clone()); - // Note: we could move this match to a method or even further: use enum_dispatch crate - // https://crates.io/crates/enum_dispatch let text = match element { NodeOrToken::Token(token) => token.text().to_string(), NodeOrToken::Node(node) => node.text().to_string(), @@ -122,8 +125,6 @@ fn is_contextual_kw(text: &str) -> bool { fn find_reparsable_node(node: &SyntaxNode, range: TextRange) -> Option<(SyntaxNode, Reparser)> { let node = algo::find_covering_element(node, range); - // Note: we could move this match to a method or even further: use enum_dispatch crate - // https://crates.io/crates/enum_dispatch let mut ancestors = match node { NodeOrToken::Token(it) => it.parent().ancestors(), NodeOrToken::Node(it) => it.ancestors(), diff --git a/crates/ra_syntax/src/parsing/text_tree_sink.rs b/crates/ra_syntax/src/parsing/text_tree_sink.rs index 5faac588b..dd202601d 100644 --- a/crates/ra_syntax/src/parsing/text_tree_sink.rs +++ b/crates/ra_syntax/src/parsing/text_tree_sink.rs @@ -92,14 +92,14 @@ impl<'a> TreeSink for TextTreeSink<'a> { } impl<'a> TextTreeSink<'a> { - pub(super) fn new(text: &'a str, tokens: &'a [Token], errors: Vec) -> Self { + pub(super) fn new(text: &'a str, tokens: &'a [Token]) -> Self { Self { text, tokens, text_pos: 0.into(), token_pos: 0, state: State::PendingStart, - inner: SyntaxTreeBuilder::new(errors), + inner: SyntaxTreeBuilder::default(), } } diff --git a/crates/ra_syntax/src/syntax_node.rs b/crates/ra_syntax/src/syntax_node.rs index 591855302..7c2b18af3 100644 --- a/crates/ra_syntax/src/syntax_node.rs +++ b/crates/ra_syntax/src/syntax_node.rs @@ -44,12 +44,6 @@ pub struct SyntaxTreeBuilder { inner: GreenNodeBuilder<'static>, } -impl SyntaxTreeBuilder { - pub fn new(errors: Vec) -> Self { - Self { errors, inner: GreenNodeBuilder::default() } - } -} - impl SyntaxTreeBuilder { pub(crate) fn finish_raw(self) -> (GreenNode, Vec) { let green = self.inner.finish(); diff --git a/crates/ra_syntax/src/tests.rs b/crates/ra_syntax/src/tests.rs index df21c957c..f79dc4f93 100644 --- a/crates/ra_syntax/src/tests.rs +++ b/crates/ra_syntax/src/tests.rs @@ -11,7 +11,7 @@ use crate::{fuzz, SourceFile}; fn lexer_tests() { dir_tests(&test_data_dir(), &["lexer"], |text, _| { // FIXME: add tests for errors (their format is up to discussion) - let tokens = crate::tokenize(text).tokens; + let (tokens, _errors) = crate::tokenize(text); dump_tokens(&tokens, text) }) } -- cgit v1.2.3 From b1764d85fced5f3bc1db82063fca9369f9e1740b Mon Sep 17 00:00:00 2001 From: Veetaha Date: Tue, 28 Jan 2020 07:13:18 +0200 Subject: ra_syntax: fixed a typo in doc comment --- crates/ra_syntax/src/parsing/lexer.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index 55755be18..d1315e604 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -69,7 +69,7 @@ pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option Option { .map(|(token, _error)| token.kind) } -/// Returns the first encountered token at the beginning of the string. +/// Returns `SyntaxKind` and `Option` of the first token +/// encountered at the beginning of the string. /// /// Returns `None` if the string contains zero tokens or if the token was parsed /// with an error. +/// The token is malformed if the returned error is not `None`. /// /// Beware that unescape errors are not checked at tokenization time. fn first_token(text: &str) -> Option<(Token, Option)> { -- cgit v1.2.3 From 58e01d875477234c132061e3072ac19f4dfb7a32 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Tue, 28 Jan 2020 07:18:35 +0200 Subject: ra_syntax: rename first_token() -> lex_first_token() --- crates/ra_syntax/src/parsing/lexer.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/ra_syntax/src/parsing/lexer.rs b/crates/ra_syntax/src/parsing/lexer.rs index d1315e604..f889e6a1d 100644 --- a/crates/ra_syntax/src/parsing/lexer.rs +++ b/crates/ra_syntax/src/parsing/lexer.rs @@ -64,7 +64,7 @@ pub fn tokenize(text: &str) -> (Vec, Vec) { /// /// Beware that unescape errors are not checked at tokenization time. pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option)> { - first_token(text) + lex_first_token(text) .filter(|(token, _)| token.len.to_usize() == text.len()) .map(|(token, error)| (token.kind, error)) } @@ -74,7 +74,7 @@ pub fn lex_single_syntax_kind(text: &str) -> Option<(SyntaxKind, Option Option { - first_token(text) + lex_first_token(text) .filter(|(token, error)| !error.is_some() && token.len.to_usize() == text.len()) .map(|(token, _error)| token.kind) } @@ -87,7 +87,7 @@ pub fn lex_single_valid_syntax_kind(text: &str) -> Option { /// The token is malformed if the returned error is not `None`. /// /// Beware that unescape errors are not checked at tokenization time. -fn first_token(text: &str) -> Option<(Token, Option)> { +fn lex_first_token(text: &str) -> Option<(Token, Option)> { // non-empty string is a precondtion of `rustc_lexer::first_token()`. if text.is_empty() { return None; -- cgit v1.2.3 From c3117eea31392f8f63f1352a127f6b77139c375b Mon Sep 17 00:00:00 2001 From: Veetaha Date: Tue, 28 Jan 2020 07:23:26 +0200 Subject: ra_syntax: removed unnecessary init statement from reparsing tests --- crates/ra_syntax/src/parsing/reparsing.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/crates/ra_syntax/src/parsing/reparsing.rs b/crates/ra_syntax/src/parsing/reparsing.rs index 1f351e9fc..a86da0675 100644 --- a/crates/ra_syntax/src/parsing/reparsing.rs +++ b/crates/ra_syntax/src/parsing/reparsing.rs @@ -194,9 +194,6 @@ mod tests { let fully_reparsed = SourceFile::parse(&after); let incrementally_reparsed: Parse = { let f = SourceFile::parse(&before); - // FIXME: it seems this initialization statement is unnecessary (see edit in outer scope) - // Investigate whether it should really be removed. - let edit = AtomTextEdit { delete: range, insert: replace_with.to_string() }; let (green, new_errors, range) = incremental_reparse(f.tree().syntax(), &edit, f.errors.to_vec()).unwrap(); assert_eq!(range.len(), reparsed_len.into(), "reparsed fragment has wrong length"); -- cgit v1.2.3 From 9367b9a2920073a3f79fdff80dcc97d727f6ce17 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Thu, 30 Jan 2020 00:18:21 +0200 Subject: ra_syntax: add backticks around tokens specimen --- crates/ra_syntax/src/syntax_error.rs | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/crates/ra_syntax/src/syntax_error.rs b/crates/ra_syntax/src/syntax_error.rs index 45e11f404..7f9d36618 100644 --- a/crates/ra_syntax/src/syntax_error.rs +++ b/crates/ra_syntax/src/syntax_error.rs @@ -71,6 +71,10 @@ impl SyntaxError { self } + + pub fn debug_dump(&self, acc: &mut impl fmt::Write) { + writeln!(acc, "error {:?}: {}", self.location(), self.kind()).unwrap(); + } } impl fmt::Display for SyntaxError { @@ -122,37 +126,44 @@ impl fmt::Display for SyntaxErrorKind { impl fmt::Display for TokenizeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + #[rustfmt::skip] let msg = match self { - TokenizeError::EmptyInt => "Missing digits after integer base prefix", - TokenizeError::EmptyExponent => "Missing digits after the exponent symbol", + TokenizeError::EmptyInt => { + "Missing digits after the integer base prefix" + } + TokenizeError::EmptyExponent => { + "Missing digits after the exponent symbol" + } TokenizeError::UnterminatedBlockComment => { - "Missing trailing */ symbols to terminate the block comment" + "Missing trailing `*/` symbols to terminate the block comment" } TokenizeError::UnterminatedChar => { - "Missing trailing ' symbol to terminate the character literal" + "Missing trailing `'` symbol to terminate the character literal" } TokenizeError::UnterminatedByte => { - "Missing trailing ' symbol to terminate the byte literal" + "Missing trailing `'` symbol to terminate the byte literal" } TokenizeError::UnterminatedString => { - "Missing trailing \" symbol to terminate the string literal" + "Missing trailing `\"` symbol to terminate the string literal" } TokenizeError::UnterminatedByteString => { - "Missing trailing \" symbol to terminate the byte string literal" + "Missing trailing `\"` symbol to terminate the byte string literal" } TokenizeError::UnterminatedRawString => { - "Missing trailing \" with # symbols to terminate the raw string literal" + "Missing trailing `\"` with `#` symbols to terminate the raw string literal" } TokenizeError::UnterminatedRawByteString => { - "Missing trailing \" with # symbols to terminate the raw byte string literal" + "Missing trailing `\"` with `#` symbols to terminate the raw byte string literal" } TokenizeError::UnstartedRawString => { - "Missing \" symbol after # symbols to begin the raw string literal" + "Missing `\"` symbol after `#` symbols to begin the raw string literal" } TokenizeError::UnstartedRawByteString => { - "Missing \" symbol after # symbols to begin the raw byte string literal" + "Missing `\"` symbol after `#` symbols to begin the raw byte string literal" + } + TokenizeError::LifetimeStartsWithNumber => { + "Lifetime name cannot start with a number" } - TokenizeError::LifetimeStartsWithNumber => "Lifetime name cannot start with a number", }; write!(f, "{}", msg) } -- cgit v1.2.3 From a3e5663ae0206270156fbeb926a174a40abbddb0 Mon Sep 17 00:00:00 2001 From: Veetaha Date: Sat, 1 Feb 2020 22:25:01 +0200 Subject: ra_syntax: added tests for tokenization errors --- crates/ra_syntax/src/tests.rs | 80 ++++++++++++++++------ crates/ra_syntax/src/validation.rs | 6 ++ .../test_data/lexer/00012_block_comment.rs | 4 -- .../test_data/lexer/00012_block_comment.txt | 7 -- crates/ra_syntax/test_data/lexer/0001_hello.rs | 1 - crates/ra_syntax/test_data/lexer/0001_hello.txt | 3 - .../ra_syntax/test_data/lexer/0002_whitespace.rs | 4 -- .../ra_syntax/test_data/lexer/0002_whitespace.txt | 12 ---- crates/ra_syntax/test_data/lexer/0003_ident.rs | 1 - crates/ra_syntax/test_data/lexer/0003_ident.txt | 14 ---- crates/ra_syntax/test_data/lexer/0004_numbers.rs | 9 --- crates/ra_syntax/test_data/lexer/0004_numbers.txt | 66 ------------------ crates/ra_syntax/test_data/lexer/0005_symbols.rs | 6 -- crates/ra_syntax/test_data/lexer/0005_symbols.txt | 77 --------------------- crates/ra_syntax/test_data/lexer/0006_chars.rs | 1 - crates/ra_syntax/test_data/lexer/0006_chars.txt | 16 ----- crates/ra_syntax/test_data/lexer/0007_lifetimes.rs | 1 - .../ra_syntax/test_data/lexer/0007_lifetimes.txt | 8 --- .../ra_syntax/test_data/lexer/0008_byte_strings.rs | 3 - .../test_data/lexer/0008_byte_strings.txt | 22 ------ crates/ra_syntax/test_data/lexer/0009_strings.rs | 2 - crates/ra_syntax/test_data/lexer/0009_strings.txt | 8 --- crates/ra_syntax/test_data/lexer/0010_comments.rs | 3 - crates/ra_syntax/test_data/lexer/0010_comments.txt | 6 -- crates/ra_syntax/test_data/lexer/0011_keywords.rs | 3 - crates/ra_syntax/test_data/lexer/0011_keywords.txt | 64 ----------------- .../ra_syntax/test_data/lexer/0013_raw_strings.rs | 1 - .../ra_syntax/test_data/lexer/0013_raw_strings.txt | 2 - .../test_data/lexer/0014_unclosed_char.rs | 1 - .../test_data/lexer/0014_unclosed_char.txt | 1 - .../test_data/lexer/0015_unclosed_string.rs | 1 - .../test_data/lexer/0015_unclosed_string.txt | 1 - crates/ra_syntax/test_data/lexer/0016_raw_ident.rs | 1 - .../ra_syntax/test_data/lexer/0016_raw_ident.txt | 2 - .../lexer/err/0001_unclosed_char_at_eof.rs | 1 + .../lexer/err/0001_unclosed_char_at_eof.txt | 2 + .../lexer/err/0002_unclosed_char_with_ferris.rs | 1 + .../lexer/err/0002_unclosed_char_with_ferris.txt | 2 + .../err/0003_unclosed_char_with_ascii_escape.rs | 1 + .../err/0003_unclosed_char_with_ascii_escape.txt | 2 + .../err/0004_unclosed_char_with_unicode_escape.rs | 1 + .../err/0004_unclosed_char_with_unicode_escape.txt | 2 + .../lexer/err/0005_unclosed_char_with_space.rs | 1 + .../lexer/err/0005_unclosed_char_with_space.txt | 2 + .../lexer/err/0006_unclosed_char_with_slash.rs | 1 + .../lexer/err/0006_unclosed_char_with_slash.txt | 2 + .../lexer/err/0007_unclosed_char_with_slash_n.rs | 1 + .../lexer/err/0007_unclosed_char_with_slash_n.txt | 2 + .../0008_unclosed_char_with_slash_single_quote.rs | 1 + .../0008_unclosed_char_with_slash_single_quote.txt | 2 + .../lexer/err/0009_unclosed_byte_at_eof.rs | 1 + .../lexer/err/0009_unclosed_byte_at_eof.txt | 2 + .../lexer/err/0010_unclosed_byte_with_ferris.rs | 1 + .../lexer/err/0010_unclosed_byte_with_ferris.txt | 2 + .../err/0011_unclosed_byte_with_ascii_escape.rs | 1 + .../err/0011_unclosed_byte_with_ascii_escape.txt | 2 + .../err/0012_unclosed_byte_with_unicode_escape.rs | 1 + .../err/0012_unclosed_byte_with_unicode_escape.txt | 2 + .../lexer/err/0013_unclosed_byte_with_space.rs | 1 + .../lexer/err/0013_unclosed_byte_with_space.txt | 2 + .../lexer/err/0014_unclosed_byte_with_slash.rs | 1 + .../lexer/err/0014_unclosed_byte_with_slash.txt | 2 + .../lexer/err/0015_unclosed_byte_with_slash_n.rs | 1 + .../lexer/err/0015_unclosed_byte_with_slash_n.txt | 2 + .../0016_unclosed_byte_with_slash_single_quote.rs | 1 + .../0016_unclosed_byte_with_slash_single_quote.txt | 2 + .../lexer/err/0017_unclosed_string_at_eof.rs | 1 + .../lexer/err/0017_unclosed_string_at_eof.txt | 2 + .../lexer/err/0018_unclosed_string_with_ferris.rs | 1 + .../lexer/err/0018_unclosed_string_with_ferris.txt | 2 + .../err/0019_unclosed_string_with_ascii_escape.rs | 1 + .../err/0019_unclosed_string_with_ascii_escape.txt | 2 + .../0020_unclosed_string_with_unicode_escape.rs | 1 + .../0020_unclosed_string_with_unicode_escape.txt | 2 + .../lexer/err/0021_unclosed_string_with_space.rs | 1 + .../lexer/err/0021_unclosed_string_with_space.txt | 2 + .../lexer/err/0022_unclosed_string_with_slash.rs | 1 + .../lexer/err/0022_unclosed_string_with_slash.txt | 2 + .../lexer/err/0023_unclosed_string_with_slash_n.rs | 1 + .../err/0023_unclosed_string_with_slash_n.txt | 2 + ...0024_unclosed_string_with_slash_double_quote.rs | 1 + ...024_unclosed_string_with_slash_double_quote.txt | 2 + .../lexer/err/0025_unclosed_byte_string_at_eof.rs | 1 + .../lexer/err/0025_unclosed_byte_string_at_eof.txt | 2 + .../err/0026_unclosed_byte_string_with_ferris.rs | 1 + .../err/0026_unclosed_byte_string_with_ferris.txt | 2 + .../0027_unclosed_byte_string_with_ascii_escape.rs | 1 + ...0027_unclosed_byte_string_with_ascii_escape.txt | 2 + ...028_unclosed_byte_string_with_unicode_escape.rs | 1 + ...28_unclosed_byte_string_with_unicode_escape.txt | 2 + .../err/0029_unclosed_byte_string_with_space.rs | 1 + .../err/0029_unclosed_byte_string_with_space.txt | 2 + .../err/0030_unclosed_byte_string_with_slash.rs | 1 + .../err/0030_unclosed_byte_string_with_slash.txt | 2 + .../err/0031_unclosed_byte_string_with_slash_n.rs | 1 + .../err/0031_unclosed_byte_string_with_slash_n.txt | 2 + ...unclosed_byte_string_with_slash_double_quote.rs | 1 + ...nclosed_byte_string_with_slash_double_quote.txt | 2 + .../lexer/err/0033_unclosed_raw_string_at_eof.rs | 1 + .../lexer/err/0033_unclosed_raw_string_at_eof.txt | 2 + .../err/0034_unclosed_raw_string_with_ferris.rs | 1 + .../err/0034_unclosed_raw_string_with_ferris.txt | 2 + .../0035_unclosed_raw_string_with_ascii_escape.rs | 1 + .../0035_unclosed_raw_string_with_ascii_escape.txt | 2 + ...0036_unclosed_raw_string_with_unicode_escape.rs | 1 + ...036_unclosed_raw_string_with_unicode_escape.txt | 2 + .../err/0037_unclosed_raw_string_with_space.rs | 1 + .../err/0037_unclosed_raw_string_with_space.txt | 2 + .../err/0038_unclosed_raw_string_with_slash.rs | 1 + .../err/0038_unclosed_raw_string_with_slash.txt | 2 + .../err/0039_unclosed_raw_string_with_slash_n.rs | 1 + .../err/0039_unclosed_raw_string_with_slash_n.txt | 2 + .../err/0040_unclosed_raw_byte_string_at_eof.rs | 1 + .../err/0040_unclosed_raw_byte_string_at_eof.txt | 2 + .../0041_unclosed_raw_byte_string_with_ferris.rs | 1 + .../0041_unclosed_raw_byte_string_with_ferris.txt | 2 + ...2_unclosed_raw_byte_string_with_ascii_escape.rs | 1 + ..._unclosed_raw_byte_string_with_ascii_escape.txt | 2 + ...unclosed_raw_byte_string_with_unicode_escape.rs | 1 + ...nclosed_raw_byte_string_with_unicode_escape.txt | 2 + .../0044_unclosed_raw_byte_string_with_space.rs | 1 + .../0044_unclosed_raw_byte_string_with_space.txt | 2 + .../0045_unclosed_raw_byte_string_with_slash.rs | 1 + .../0045_unclosed_raw_byte_string_with_slash.txt | 2 + .../0046_unclosed_raw_byte_string_with_slash_n.rs | 1 + .../0046_unclosed_raw_byte_string_with_slash_n.txt | 2 + .../lexer/err/0047_unstarted_raw_string_at_eof.rs | 1 + .../lexer/err/0047_unstarted_raw_string_at_eof.txt | 2 + .../err/0048_unstarted_raw_byte_string_at_eof.rs | 1 + .../err/0048_unstarted_raw_byte_string_at_eof.txt | 2 + .../err/0049_unstarted_raw_string_with_ascii.rs | 1 + .../err/0049_unstarted_raw_string_with_ascii.txt | 10 +++ .../0050_unstarted_raw_byte_string_with_ascii.rs | 1 + .../0050_unstarted_raw_byte_string_with_ascii.txt | 10 +++ .../err/0051_unclosed_block_comment_at_eof.rs | 1 + .../err/0051_unclosed_block_comment_at_eof.txt | 2 + .../0052_unclosed_block_comment_with_content.rs | 1 + .../0052_unclosed_block_comment_with_content.txt | 2 + .../0053_unclosed_nested_block_comment_entirely.rs | 1 + ...0053_unclosed_nested_block_comment_entirely.txt | 2 + ...0054_unclosed_nested_block_comment_partially.rs | 1 + ...054_unclosed_nested_block_comment_partially.txt | 2 + .../test_data/lexer/err/0055_empty_int.rs | 17 +++++ .../test_data/lexer/err/0055_empty_int.txt | 39 +++++++++++ .../test_data/lexer/err/0056_empty_exponent.rs | 22 ++++++ .../test_data/lexer/err/0056_empty_exponent.txt | 62 +++++++++++++++++ .../err/0057_lifetime_strarts_with_a_number.rs | 2 + .../err/0057_lifetime_strarts_with_a_number.txt | 6 ++ crates/ra_syntax/test_data/lexer/ok/0001_hello.rs | 1 + crates/ra_syntax/test_data/lexer/ok/0001_hello.txt | 3 + .../test_data/lexer/ok/0002_whitespace.rs | 4 ++ .../test_data/lexer/ok/0002_whitespace.txt | 12 ++++ crates/ra_syntax/test_data/lexer/ok/0003_ident.rs | 1 + crates/ra_syntax/test_data/lexer/ok/0003_ident.txt | 14 ++++ .../ra_syntax/test_data/lexer/ok/0004_numbers.rs | 9 +++ .../ra_syntax/test_data/lexer/ok/0004_numbers.txt | 57 +++++++++++++++ .../ra_syntax/test_data/lexer/ok/0005_symbols.rs | 6 ++ .../ra_syntax/test_data/lexer/ok/0005_symbols.txt | 77 +++++++++++++++++++++ crates/ra_syntax/test_data/lexer/ok/0006_chars.rs | 1 + crates/ra_syntax/test_data/lexer/ok/0006_chars.txt | 16 +++++ .../ra_syntax/test_data/lexer/ok/0007_lifetimes.rs | 1 + .../test_data/lexer/ok/0007_lifetimes.txt | 8 +++ .../test_data/lexer/ok/0008_byte_strings.rs | 3 + .../test_data/lexer/ok/0008_byte_strings.txt | 22 ++++++ .../ra_syntax/test_data/lexer/ok/0009_strings.rs | 2 + .../ra_syntax/test_data/lexer/ok/0009_strings.txt | 8 +++ .../lexer/ok/0010_single_line_comments.rs | 12 ++++ .../lexer/ok/0010_single_line_comments.txt | 22 ++++++ .../ra_syntax/test_data/lexer/ok/0011_keywords.rs | 3 + .../ra_syntax/test_data/lexer/ok/0011_keywords.txt | 64 +++++++++++++++++ .../test_data/lexer/ok/0012_block_comment.rs | 3 + .../test_data/lexer/ok/0012_block_comment.txt | 6 ++ .../test_data/lexer/ok/0013_raw_strings.rs | 1 + .../test_data/lexer/ok/0013_raw_strings.txt | 2 + .../ra_syntax/test_data/lexer/ok/0014_raw_ident.rs | 1 + .../test_data/lexer/ok/0014_raw_ident.txt | 2 + 176 files changed, 751 insertions(+), 373 deletions(-) delete mode 100644 crates/ra_syntax/test_data/lexer/00012_block_comment.rs delete mode 100644 crates/ra_syntax/test_data/lexer/00012_block_comment.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0001_hello.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0001_hello.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0002_whitespace.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0002_whitespace.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0003_ident.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0003_ident.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0004_numbers.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0004_numbers.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0005_symbols.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0005_symbols.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0006_chars.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0006_chars.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0007_lifetimes.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0007_lifetimes.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0008_byte_strings.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0008_byte_strings.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0009_strings.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0009_strings.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0010_comments.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0010_comments.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0011_keywords.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0011_keywords.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0013_raw_strings.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0013_raw_strings.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0014_unclosed_char.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0014_unclosed_char.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0015_unclosed_string.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0015_unclosed_string.txt delete mode 100644 crates/ra_syntax/test_data/lexer/0016_raw_ident.rs delete mode 100644 crates/ra_syntax/test_data/lexer/0016_raw_ident.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0001_unclosed_char_at_eof.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0001_unclosed_char_at_eof.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0002_unclosed_char_with_ferris.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0002_unclosed_char_with_ferris.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0003_unclosed_char_with_ascii_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0003_unclosed_char_with_ascii_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0004_unclosed_char_with_unicode_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0004_unclosed_char_with_unicode_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0005_unclosed_char_with_space.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0005_unclosed_char_with_space.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0006_unclosed_char_with_slash.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0006_unclosed_char_with_slash.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0007_unclosed_char_with_slash_n.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0007_unclosed_char_with_slash_n.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0008_unclosed_char_with_slash_single_quote.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0008_unclosed_char_with_slash_single_quote.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0009_unclosed_byte_at_eof.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0009_unclosed_byte_at_eof.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0010_unclosed_byte_with_ferris.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0010_unclosed_byte_with_ferris.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0011_unclosed_byte_with_ascii_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0011_unclosed_byte_with_ascii_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0012_unclosed_byte_with_unicode_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0012_unclosed_byte_with_unicode_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0013_unclosed_byte_with_space.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0013_unclosed_byte_with_space.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0014_unclosed_byte_with_slash.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0014_unclosed_byte_with_slash.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0015_unclosed_byte_with_slash_n.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0015_unclosed_byte_with_slash_n.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0016_unclosed_byte_with_slash_single_quote.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0016_unclosed_byte_with_slash_single_quote.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0017_unclosed_string_at_eof.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0017_unclosed_string_at_eof.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0018_unclosed_string_with_ferris.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0018_unclosed_string_with_ferris.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0019_unclosed_string_with_ascii_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0019_unclosed_string_with_ascii_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0020_unclosed_string_with_unicode_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0020_unclosed_string_with_unicode_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0021_unclosed_string_with_space.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0021_unclosed_string_with_space.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0022_unclosed_string_with_slash.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0022_unclosed_string_with_slash.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0023_unclosed_string_with_slash_n.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0023_unclosed_string_with_slash_n.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0024_unclosed_string_with_slash_double_quote.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0024_unclosed_string_with_slash_double_quote.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0025_unclosed_byte_string_at_eof.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0025_unclosed_byte_string_at_eof.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0026_unclosed_byte_string_with_ferris.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0026_unclosed_byte_string_with_ferris.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0027_unclosed_byte_string_with_ascii_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0027_unclosed_byte_string_with_ascii_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0028_unclosed_byte_string_with_unicode_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0028_unclosed_byte_string_with_unicode_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0029_unclosed_byte_string_with_space.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0029_unclosed_byte_string_with_space.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0030_unclosed_byte_string_with_slash.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0030_unclosed_byte_string_with_slash.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0031_unclosed_byte_string_with_slash_n.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0031_unclosed_byte_string_with_slash_n.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0032_unclosed_byte_string_with_slash_double_quote.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0032_unclosed_byte_string_with_slash_double_quote.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0033_unclosed_raw_string_at_eof.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0033_unclosed_raw_string_at_eof.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0034_unclosed_raw_string_with_ferris.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0034_unclosed_raw_string_with_ferris.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0035_unclosed_raw_string_with_ascii_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0035_unclosed_raw_string_with_ascii_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0036_unclosed_raw_string_with_unicode_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0036_unclosed_raw_string_with_unicode_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0037_unclosed_raw_string_with_space.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0037_unclosed_raw_string_with_space.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0038_unclosed_raw_string_with_slash.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0038_unclosed_raw_string_with_slash.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0039_unclosed_raw_string_with_slash_n.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0039_unclosed_raw_string_with_slash_n.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0040_unclosed_raw_byte_string_at_eof.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0040_unclosed_raw_byte_string_at_eof.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0041_unclosed_raw_byte_string_with_ferris.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0041_unclosed_raw_byte_string_with_ferris.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0042_unclosed_raw_byte_string_with_ascii_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0042_unclosed_raw_byte_string_with_ascii_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0043_unclosed_raw_byte_string_with_unicode_escape.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0043_unclosed_raw_byte_string_with_unicode_escape.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0044_unclosed_raw_byte_string_with_space.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0044_unclosed_raw_byte_string_with_space.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0045_unclosed_raw_byte_string_with_slash.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0045_unclosed_raw_byte_string_with_slash.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0046_unclosed_raw_byte_string_with_slash_n.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0046_unclosed_raw_byte_string_with_slash_n.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0047_unstarted_raw_string_at_eof.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0047_unstarted_raw_string_at_eof.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0048_unstarted_raw_byte_string_at_eof.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0048_unstarted_raw_byte_string_at_eof.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0049_unstarted_raw_string_with_ascii.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0049_unstarted_raw_string_with_ascii.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0050_unstarted_raw_byte_string_with_ascii.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0050_unstarted_raw_byte_string_with_ascii.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0051_unclosed_block_comment_at_eof.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0051_unclosed_block_comment_at_eof.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0052_unclosed_block_comment_with_content.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0052_unclosed_block_comment_with_content.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0053_unclosed_nested_block_comment_entirely.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0053_unclosed_nested_block_comment_entirely.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0054_unclosed_nested_block_comment_partially.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0054_unclosed_nested_block_comment_partially.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0055_empty_int.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0055_empty_int.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt create mode 100644 crates/ra_syntax/test_data/lexer/err/0057_lifetime_strarts_with_a_number.rs create mode 100644 crates/ra_syntax/test_data/lexer/err/0057_lifetime_strarts_with_a_number.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0001_hello.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0001_hello.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0002_whitespace.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0002_whitespace.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0003_ident.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0003_ident.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0004_numbers.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0004_numbers.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0005_symbols.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0005_symbols.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0006_chars.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0006_chars.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0007_lifetimes.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0007_lifetimes.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0008_byte_strings.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0008_byte_strings.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0009_strings.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0009_strings.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0010_single_line_comments.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0010_single_line_comments.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0011_keywords.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0011_keywords.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0012_block_comment.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0012_block_comment.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0013_raw_strings.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0013_raw_strings.txt create mode 100644 crates/ra_syntax/test_data/lexer/ok/0014_raw_ident.rs create mode 100644 crates/ra_syntax/test_data/lexer/ok/0014_raw_ident.txt diff --git a/crates/ra_syntax/src/tests.rs b/crates/ra_syntax/src/tests.rs index f79dc4f93..fb22b9e54 100644 --- a/crates/ra_syntax/src/tests.rs +++ b/crates/ra_syntax/src/tests.rs @@ -1,19 +1,28 @@ use std::{ fmt::Write, - path::{Component, PathBuf}, + path::{Component, Path, PathBuf}, }; use test_utils::{collect_tests, dir_tests, project_dir, read_text}; -use crate::{fuzz, SourceFile}; +use crate::{fuzz, tokenize, Location, SourceFile, SyntaxError, TextRange, Token}; #[test] fn lexer_tests() { - dir_tests(&test_data_dir(), &["lexer"], |text, _| { - // FIXME: add tests for errors (their format is up to discussion) - let (tokens, _errors) = crate::tokenize(text); - dump_tokens(&tokens, text) - }) + // FIXME: + // * Add tests for unicode escapes in byte-character and [raw]-byte-string literals + // * Add tests for unescape errors + + dir_tests(&test_data_dir(), &["lexer/ok"], |text, path| { + let (tokens, errors) = tokenize(text); + assert_errors_are_absent(&errors, path); + dump_tokens_and_errors(&tokens, &errors, text) + }); + dir_tests(&test_data_dir(), &["lexer/err"], |text, path| { + let (tokens, errors) = tokenize(text); + assert_errors_are_present(&errors, path); + dump_tokens_and_errors(&tokens, &errors, text) + }); } #[test] @@ -33,18 +42,13 @@ fn parser_tests() { dir_tests(&test_data_dir(), &["parser/inline/ok", "parser/ok"], |text, path| { let parse = SourceFile::parse(text); let errors = parse.errors(); - assert_eq!( - errors, - &[] as &[crate::SyntaxError], - "There should be no errors in the file {:?}", - path.display(), - ); + assert_errors_are_absent(&errors, path); parse.debug_dump() }); dir_tests(&test_data_dir(), &["parser/err", "parser/inline/err"], |text, path| { let parse = SourceFile::parse(text); let errors = parse.errors(); - assert!(!errors.is_empty(), "There should be errors in the file {:?}", path.display()); + assert_errors_are_present(&errors, path); parse.debug_dump() }); } @@ -76,7 +80,7 @@ fn self_hosting_parsing() { .into_iter() .filter_entry(|entry| { !entry.path().components().any(|component| { - // Get all files which are not in the crates/ra_syntax/tests/data folder + // Get all files which are not in the crates/ra_syntax/test_data folder component == Component::Normal(OsStr::new("test_data")) }) }) @@ -102,15 +106,47 @@ fn test_data_dir() -> PathBuf { project_dir().join("crates/ra_syntax/test_data") } -fn dump_tokens(tokens: &[crate::Token], text: &str) -> String { +fn assert_errors_are_present(errors: &[SyntaxError], path: &Path) { + assert!(!errors.is_empty(), "There should be errors in the file {:?}", path.display()); +} +fn assert_errors_are_absent(errors: &[SyntaxError], path: &Path) { + assert_eq!( + errors, + &[] as &[SyntaxError], + "There should be no errors in the file {:?}", + path.display(), + ); +} + +fn dump_tokens_and_errors(tokens: &[Token], errors: &[SyntaxError], text: &str) -> String { let mut acc = String::new(); let mut offset = 0; for token in tokens { - let len: u32 = token.len.into(); - let len = len as usize; - let token_text = &text[offset..offset + len]; - offset += len; - write!(acc, "{:?} {} {:?}\n", token.kind, token.len, token_text).unwrap() + let token_len = token.len.to_usize(); + let token_text = &text[offset..offset + token_len]; + offset += token_len; + writeln!(acc, "{:?} {} {:?}", token.kind, token_len, token_text).unwrap(); + } + for err in errors { + let err_range = location_to_range(err.location()); + writeln!( + acc, + "> error{:?} token({:?}) msg({})", + err.location(), + &text[err_range], + err.kind() + ) + .unwrap(); + } + return acc; + + // FIXME: copy-pasted this from `ra_ide/src/diagnostics.rs` + // `Location` will be refactored soon in new PR, see todos here: + // https://github.com/rust-analyzer/rust-analyzer/issues/223 + fn location_to_range(location: Location) -> TextRange { + match location { + Location::Offset(offset) => TextRange::offset_len(offset, 1.into()), + Location::Range(range) => range, + } } - acc } diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs index 445e3b3e4..8a5f0e4b7 100644 --- a/crates/ra_syntax/src/validation.rs +++ b/crates/ra_syntax/src/validation.rs @@ -94,6 +94,12 @@ impl From for SyntaxErrorKind { } pub(crate) fn validate(root: &SyntaxNode) -> Vec { + // FIXME: + // * Add validation of character literal containing only a single char + // * Add validation of `crate` keyword not appearing in the middle of the symbol path + // * Add validation of doc comments are being attached to nodes + // * Remove validation of unterminated literals (it is already implemented in `tokenize()`) + let mut errors = Vec::new(); for node in root.descendants() { match_ast! { diff --git a/crates/ra_syntax/test_data/lexer/00012_block_comment.rs b/crates/ra_syntax/test_data/lexer/00012_block_comment.rs deleted file mode 100644 index 708aac197..000000000 --- a/crates/ra_syntax/test_data/lexer/00012_block_comment.rs +++ /dev/null @@ -1,4 +0,0 @@ -/* */ -/**/ -/* /* */ */ -/* diff --git a/crates/ra_syntax/test_data/lexer/00012_block_comment.txt b/crates/ra_syntax/test_data/lexer/00012_block_comment.txt deleted file mode 100644 index 9958b2518..000000000 --- a/crates/ra_syntax/test_data/lexer/00012_block_comment.txt +++ /dev/null @@ -1,7 +0,0 @@ -COMMENT 5 "/* */" -WHITESPACE 1 "\n" -COMMENT 4 "/**/" -WHITESPACE 1 "\n" -COMMENT 11 "/* /* */ */" -WHITESPACE 1 "\n" -COMMENT 3 "/*\n" diff --git a/crates/ra_syntax/test_data/lexer/0001_hello.rs b/crates/ra_syntax/test_data/lexer/0001_hello.rs deleted file mode 100644 index 95d09f2b1..000000000 --- a/crates/ra_syntax/test_data/lexer/0001_hello.rs +++ /dev/null @@ -1 +0,0 @@ -hello world \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/0001_hello.txt b/crates/ra_syntax/test_data/lexer/0001_hello.txt deleted file mode 100644 index 27a5940a9..000000000 --- a/crates/ra_syntax/test_data/lexer/0001_hello.txt +++ /dev/null @@ -1,3 +0,0 @@ -IDENT 5 "hello" -WHITESPACE 1 " " -IDENT 5 "world" diff --git a/crates/ra_syntax/test_data/lexer/0002_whitespace.rs b/crates/ra_syntax/test_data/lexer/0002_whitespace.rs deleted file mode 100644 index 08fce1418..000000000 --- a/crates/ra_syntax/test_data/lexer/0002_whitespace.rs +++ /dev/null @@ -1,4 +0,0 @@ -a b c -d - -e f diff --git a/crates/ra_syntax/test_data/lexer/0002_whitespace.txt b/crates/ra_syntax/test_data/lexer/0002_whitespace.txt deleted file mode 100644 index 01d260918..000000000 --- a/crates/ra_syntax/test_data/lexer/0002_whitespace.txt +++ /dev/null @@ -1,12 +0,0 @@ -IDENT 1 "a" -WHITESPACE 1 " " -IDENT 1 "b" -WHITESPACE 2 " " -IDENT 1 "c" -WHITESPACE 1 "\n" -IDENT 1 "d" -WHITESPACE 2 "\n\n" -IDENT 1 "e" -WHITESPACE 1 "\t" -IDENT 1 "f" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0003_ident.rs b/crates/ra_syntax/test_data/lexer/0003_ident.rs deleted file mode 100644 index c05c9c009..000000000 --- a/crates/ra_syntax/test_data/lexer/0003_ident.rs +++ /dev/null @@ -1 +0,0 @@ -foo foo_ _foo _ __ x ΠΏΡ€ΠΈΠ²Π΅Ρ‚ diff --git a/crates/ra_syntax/test_data/lexer/0003_ident.txt b/crates/ra_syntax/test_data/lexer/0003_ident.txt deleted file mode 100644 index 4a0d5c053..000000000 --- a/crates/ra_syntax/test_data/lexer/0003_ident.txt +++ /dev/null @@ -1,14 +0,0 @@ -IDENT 3 "foo" -WHITESPACE 1 " " -IDENT 4 "foo_" -WHITESPACE 1 " " -IDENT 4 "_foo" -WHITESPACE 1 " " -UNDERSCORE 1 "_" -WHITESPACE 1 " " -IDENT 2 "__" -WHITESPACE 1 " " -IDENT 1 "x" -WHITESPACE 1 " " -IDENT 12 "ΠΏΡ€ΠΈΠ²Π΅Ρ‚" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0004_numbers.rs b/crates/ra_syntax/test_data/lexer/0004_numbers.rs deleted file mode 100644 index dc974b553..000000000 --- a/crates/ra_syntax/test_data/lexer/0004_numbers.rs +++ /dev/null @@ -1,9 +0,0 @@ -0 0b 0o 0x 00 0_ 0. 0e 0E 0z -01790 0b1790 0o1790 0x1790aAbBcCdDeEfF 001279 0_1279 0.1279 0e1279 0E1279 -0..2 -0.foo() -0e+1 -0.e+1 -0.0E-2 -0___0.10000____0000e+111__ -1i64 92.0f32 11__s \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/0004_numbers.txt b/crates/ra_syntax/test_data/lexer/0004_numbers.txt deleted file mode 100644 index 7bb89b8ae..000000000 --- a/crates/ra_syntax/test_data/lexer/0004_numbers.txt +++ /dev/null @@ -1,66 +0,0 @@ -INT_NUMBER 1 "0" -WHITESPACE 1 " " -INT_NUMBER 2 "0b" -WHITESPACE 1 " " -INT_NUMBER 2 "0o" -WHITESPACE 1 " " -INT_NUMBER 2 "0x" -WHITESPACE 1 " " -INT_NUMBER 2 "00" -WHITESPACE 1 " " -INT_NUMBER 2 "0_" -WHITESPACE 1 " " -FLOAT_NUMBER 2 "0." -WHITESPACE 1 " " -FLOAT_NUMBER 2 "0e" -WHITESPACE 1 " " -FLOAT_NUMBER 2 "0E" -WHITESPACE 1 " " -INT_NUMBER 2 "0z" -WHITESPACE 1 "\n" -INT_NUMBER 5 "01790" -WHITESPACE 1 " " -INT_NUMBER 6 "0b1790" -WHITESPACE 1 " " -INT_NUMBER 6 "0o1790" -WHITESPACE 1 " " -INT_NUMBER 18 "0x1790aAbBcCdDeEfF" -WHITESPACE 1 " " -INT_NUMBER 6 "001279" -WHITESPACE 1 " " -INT_NUMBER 6 "0_1279" -WHITESPACE 1 " " -FLOAT_NUMBER 6 "0.1279" -WHITESPACE 1 " " -FLOAT_NUMBER 6 "0e1279" -WHITESPACE 1 " " -FLOAT_NUMBER 6 "0E1279" -WHITESPACE 1 "\n" -INT_NUMBER 1 "0" -DOT 1 "." -DOT 1 "." -INT_NUMBER 1 "2" -WHITESPACE 1 "\n" -INT_NUMBER 1 "0" -DOT 1 "." -IDENT 3 "foo" -L_PAREN 1 "(" -R_PAREN 1 ")" -WHITESPACE 1 "\n" -FLOAT_NUMBER 4 "0e+1" -WHITESPACE 1 "\n" -INT_NUMBER 1 "0" -DOT 1 "." -IDENT 1 "e" -PLUS 1 "+" -INT_NUMBER 1 "1" -WHITESPACE 1 "\n" -FLOAT_NUMBER 6 "0.0E-2" -WHITESPACE 1 "\n" -FLOAT_NUMBER 26 "0___0.10000____0000e+111__" -WHITESPACE 1 "\n" -INT_NUMBER 4 "1i64" -WHITESPACE 1 " " -FLOAT_NUMBER 7 "92.0f32" -WHITESPACE 1 " " -INT_NUMBER 5 "11__s" diff --git a/crates/ra_syntax/test_data/lexer/0005_symbols.rs b/crates/ra_syntax/test_data/lexer/0005_symbols.rs deleted file mode 100644 index 487569b5a..000000000 --- a/crates/ra_syntax/test_data/lexer/0005_symbols.rs +++ /dev/null @@ -1,6 +0,0 @@ -; , ( ) { } [ ] < > @ # ~ ? $ & | + * / ^ % -. .. ... ..= -: :: -= => -! != -- -> diff --git a/crates/ra_syntax/test_data/lexer/0005_symbols.txt b/crates/ra_syntax/test_data/lexer/0005_symbols.txt deleted file mode 100644 index 469a90e42..000000000 --- a/crates/ra_syntax/test_data/lexer/0005_symbols.txt +++ /dev/null @@ -1,77 +0,0 @@ -SEMI 1 ";" -WHITESPACE 1 " " -COMMA 1 "," -WHITESPACE 1 " " -L_PAREN 1 "(" -WHITESPACE 1 " " -R_PAREN 1 ")" -WHITESPACE 1 " " -L_CURLY 1 "{" -WHITESPACE 1 " " -R_CURLY 1 "}" -WHITESPACE 1 " " -L_BRACK 1 "[" -WHITESPACE 1 " " -R_BRACK 1 "]" -WHITESPACE 1 " " -L_ANGLE 1 "<" -WHITESPACE 1 " " -R_ANGLE 1 ">" -WHITESPACE 1 " " -AT 1 "@" -WHITESPACE 1 " " -POUND 1 "#" -WHITESPACE 1 " " -TILDE 1 "~" -WHITESPACE 1 " " -QUESTION 1 "?" -WHITESPACE 1 " " -DOLLAR 1 "$" -WHITESPACE 1 " " -AMP 1 "&" -WHITESPACE 1 " " -PIPE 1 "|" -WHITESPACE 1 " " -PLUS 1 "+" -WHITESPACE 1 " " -STAR 1 "*" -WHITESPACE 1 " " -SLASH 1 "/" -WHITESPACE 1 " " -CARET 1 "^" -WHITESPACE 1 " " -PERCENT 1 "%" -WHITESPACE 1 "\n" -DOT 1 "." -WHITESPACE 1 " " -DOT 1 "." -DOT 1 "." -WHITESPACE 1 " " -DOT 1 "." -DOT 1 "." -DOT 1 "." -WHITESPACE 1 " " -DOT 1 "." -DOT 1 "." -EQ 1 "=" -WHITESPACE 1 "\n" -COLON 1 ":" -WHITESPACE 1 " " -COLON 1 ":" -COLON 1 ":" -WHITESPACE 1 "\n" -EQ 1 "=" -WHITESPACE 1 " " -EQ 1 "=" -R_ANGLE 1 ">" -WHITESPACE 1 "\n" -EXCL 1 "!" -WHITESPACE 1 " " -EXCL 1 "!" -EQ 1 "=" -WHITESPACE 1 "\n" -MINUS 1 "-" -WHITESPACE 1 " " -MINUS 1 "-" -R_ANGLE 1 ">" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0006_chars.rs b/crates/ra_syntax/test_data/lexer/0006_chars.rs deleted file mode 100644 index 454ee0a5f..000000000 --- a/crates/ra_syntax/test_data/lexer/0006_chars.rs +++ /dev/null @@ -1 +0,0 @@ -'x' ' ' '0' 'hello' '\x7f' '\n' '\\' '\'' diff --git a/crates/ra_syntax/test_data/lexer/0006_chars.txt b/crates/ra_syntax/test_data/lexer/0006_chars.txt deleted file mode 100644 index 950954fbc..000000000 --- a/crates/ra_syntax/test_data/lexer/0006_chars.txt +++ /dev/null @@ -1,16 +0,0 @@ -CHAR 3 "\'x\'" -WHITESPACE 1 " " -CHAR 3 "\' \'" -WHITESPACE 1 " " -CHAR 3 "\'0\'" -WHITESPACE 1 " " -CHAR 7 "\'hello\'" -WHITESPACE 1 " " -CHAR 6 "\'\\x7f\'" -WHITESPACE 1 " " -CHAR 4 "\'\\n\'" -WHITESPACE 1 " " -CHAR 4 "\'\\\\\'" -WHITESPACE 1 " " -CHAR 4 "\'\\\'\'" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0007_lifetimes.rs b/crates/ra_syntax/test_data/lexer/0007_lifetimes.rs deleted file mode 100644 index b764f1dce..000000000 --- a/crates/ra_syntax/test_data/lexer/0007_lifetimes.rs +++ /dev/null @@ -1 +0,0 @@ -'a 'foo 'foo_bar_baz '_ diff --git a/crates/ra_syntax/test_data/lexer/0007_lifetimes.txt b/crates/ra_syntax/test_data/lexer/0007_lifetimes.txt deleted file mode 100644 index 005c29100..000000000 --- a/crates/ra_syntax/test_data/lexer/0007_lifetimes.txt +++ /dev/null @@ -1,8 +0,0 @@ -LIFETIME 2 "\'a" -WHITESPACE 1 " " -LIFETIME 4 "\'foo" -WHITESPACE 1 " " -LIFETIME 12 "\'foo_bar_baz" -WHITESPACE 1 " " -LIFETIME 2 "\'_" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0008_byte_strings.rs b/crates/ra_syntax/test_data/lexer/0008_byte_strings.rs deleted file mode 100644 index b54930f5e..000000000 --- a/crates/ra_syntax/test_data/lexer/0008_byte_strings.rs +++ /dev/null @@ -1,3 +0,0 @@ -b'' b'x' b"foo" br"" -b''suf b""ix br""br -b'\n' b'\\' b'\'' b'hello' diff --git a/crates/ra_syntax/test_data/lexer/0008_byte_strings.txt b/crates/ra_syntax/test_data/lexer/0008_byte_strings.txt deleted file mode 100644 index bc03b51a8..000000000 --- a/crates/ra_syntax/test_data/lexer/0008_byte_strings.txt +++ /dev/null @@ -1,22 +0,0 @@ -BYTE 3 "b\'\'" -WHITESPACE 1 " " -BYTE 4 "b\'x\'" -WHITESPACE 1 " " -BYTE_STRING 6 "b\"foo\"" -WHITESPACE 1 " " -RAW_BYTE_STRING 4 "br\"\"" -WHITESPACE 1 "\n" -BYTE 6 "b\'\'suf" -WHITESPACE 1 " " -BYTE_STRING 5 "b\"\"ix" -WHITESPACE 1 " " -RAW_BYTE_STRING 6 "br\"\"br" -WHITESPACE 1 "\n" -BYTE 5 "b\'\\n\'" -WHITESPACE 1 " " -BYTE 5 "b\'\\\\\'" -WHITESPACE 1 " " -BYTE 5 "b\'\\\'\'" -WHITESPACE 1 " " -BYTE 8 "b\'hello\'" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0009_strings.rs b/crates/ra_syntax/test_data/lexer/0009_strings.rs deleted file mode 100644 index 4ddb5bffc..000000000 --- a/crates/ra_syntax/test_data/lexer/0009_strings.rs +++ /dev/null @@ -1,2 +0,0 @@ -"hello" r"world" "\n\"\\no escape" "multi -line" diff --git a/crates/ra_syntax/test_data/lexer/0009_strings.txt b/crates/ra_syntax/test_data/lexer/0009_strings.txt deleted file mode 100644 index 4cb4d711d..000000000 --- a/crates/ra_syntax/test_data/lexer/0009_strings.txt +++ /dev/null @@ -1,8 +0,0 @@ -STRING 7 "\"hello\"" -WHITESPACE 1 " " -RAW_STRING 8 "r\"world\"" -WHITESPACE 1 " " -STRING 17 "\"\\n\\\"\\\\no escape\"" -WHITESPACE 1 " " -STRING 12 "\"multi\nline\"" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0010_comments.rs b/crates/ra_syntax/test_data/lexer/0010_comments.rs deleted file mode 100644 index 71bdd1f9c..000000000 --- a/crates/ra_syntax/test_data/lexer/0010_comments.rs +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/env bash -// hello -//! World diff --git a/crates/ra_syntax/test_data/lexer/0010_comments.txt b/crates/ra_syntax/test_data/lexer/0010_comments.txt deleted file mode 100644 index 3c997de3f..000000000 --- a/crates/ra_syntax/test_data/lexer/0010_comments.txt +++ /dev/null @@ -1,6 +0,0 @@ -SHEBANG 19 "#!/usr/bin/env bash" -WHITESPACE 1 "\n" -COMMENT 8 "// hello" -WHITESPACE 1 "\n" -COMMENT 9 "//! World" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0011_keywords.rs b/crates/ra_syntax/test_data/lexer/0011_keywords.rs deleted file mode 100644 index 1e91bff4e..000000000 --- a/crates/ra_syntax/test_data/lexer/0011_keywords.rs +++ /dev/null @@ -1,3 +0,0 @@ -async fn use struct trait enum impl true false as extern crate -mod pub self super in where for loop while if match const -static mut type ref let else move return diff --git a/crates/ra_syntax/test_data/lexer/0011_keywords.txt b/crates/ra_syntax/test_data/lexer/0011_keywords.txt deleted file mode 100644 index 22c00eefb..000000000 --- a/crates/ra_syntax/test_data/lexer/0011_keywords.txt +++ /dev/null @@ -1,64 +0,0 @@ -ASYNC_KW 5 "async" -WHITESPACE 1 " " -FN_KW 2 "fn" -WHITESPACE 1 " " -USE_KW 3 "use" -WHITESPACE 1 " " -STRUCT_KW 6 "struct" -WHITESPACE 1 " " -TRAIT_KW 5 "trait" -WHITESPACE 1 " " -ENUM_KW 4 "enum" -WHITESPACE 1 " " -IMPL_KW 4 "impl" -WHITESPACE 1 " " -TRUE_KW 4 "true" -WHITESPACE 1 " " -FALSE_KW 5 "false" -WHITESPACE 1 " " -AS_KW 2 "as" -WHITESPACE 1 " " -EXTERN_KW 6 "extern" -WHITESPACE 1 " " -CRATE_KW 5 "crate" -WHITESPACE 1 "\n" -MOD_KW 3 "mod" -WHITESPACE 1 " " -PUB_KW 3 "pub" -WHITESPACE 1 " " -SELF_KW 4 "self" -WHITESPACE 1 " " -SUPER_KW 5 "super" -WHITESPACE 1 " " -IN_KW 2 "in" -WHITESPACE 1 " " -WHERE_KW 5 "where" -WHITESPACE 1 " " -FOR_KW 3 "for" -WHITESPACE 1 " " -LOOP_KW 4 "loop" -WHITESPACE 1 " " -WHILE_KW 5 "while" -WHITESPACE 1 " " -IF_KW 2 "if" -WHITESPACE 1 " " -MATCH_KW 5 "match" -WHITESPACE 1 " " -CONST_KW 5 "const" -WHITESPACE 1 "\n" -STATIC_KW 6 "static" -WHITESPACE 1 " " -MUT_KW 3 "mut" -WHITESPACE 1 " " -TYPE_KW 4 "type" -WHITESPACE 1 " " -REF_KW 3 "ref" -WHITESPACE 1 " " -LET_KW 3 "let" -WHITESPACE 1 " " -ELSE_KW 4 "else" -WHITESPACE 1 " " -MOVE_KW 4 "move" -WHITESPACE 1 " " -RETURN_KW 6 "return" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0013_raw_strings.rs b/crates/ra_syntax/test_data/lexer/0013_raw_strings.rs deleted file mode 100644 index e5ed0b693..000000000 --- a/crates/ra_syntax/test_data/lexer/0013_raw_strings.rs +++ /dev/null @@ -1 +0,0 @@ -r###"this is a r##"raw"## string"### diff --git a/crates/ra_syntax/test_data/lexer/0013_raw_strings.txt b/crates/ra_syntax/test_data/lexer/0013_raw_strings.txt deleted file mode 100644 index 9cf0957d1..000000000 --- a/crates/ra_syntax/test_data/lexer/0013_raw_strings.txt +++ /dev/null @@ -1,2 +0,0 @@ -RAW_STRING 36 "r###\"this is a r##\"raw\"## string\"###" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/0014_unclosed_char.rs b/crates/ra_syntax/test_data/lexer/0014_unclosed_char.rs deleted file mode 100644 index 9c0007077..000000000 --- a/crates/ra_syntax/test_data/lexer/0014_unclosed_char.rs +++ /dev/null @@ -1 +0,0 @@ -'1 \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/0014_unclosed_char.txt b/crates/ra_syntax/test_data/lexer/0014_unclosed_char.txt deleted file mode 100644 index 737a300ee..000000000 --- a/crates/ra_syntax/test_data/lexer/0014_unclosed_char.txt +++ /dev/null @@ -1 +0,0 @@ -LIFETIME 2 "\'1" diff --git a/crates/ra_syntax/test_data/lexer/0015_unclosed_string.rs b/crates/ra_syntax/test_data/lexer/0015_unclosed_string.rs deleted file mode 100644 index d771a26d4..000000000 --- a/crates/ra_syntax/test_data/lexer/0015_unclosed_string.rs +++ /dev/null @@ -1 +0,0 @@ -"hello diff --git a/crates/ra_syntax/test_data/lexer/0015_unclosed_string.txt b/crates/ra_syntax/test_data/lexer/0015_unclosed_string.txt deleted file mode 100644 index 728c40b66..000000000 --- a/crates/ra_syntax/test_data/lexer/0015_unclosed_string.txt +++ /dev/null @@ -1 +0,0 @@ -STRING 7 "\"hello\n" diff --git a/crates/ra_syntax/test_data/lexer/0016_raw_ident.rs b/crates/ra_syntax/test_data/lexer/0016_raw_ident.rs deleted file mode 100644 index b40a1b6a2..000000000 --- a/crates/ra_syntax/test_data/lexer/0016_raw_ident.rs +++ /dev/null @@ -1 +0,0 @@ -r#raw_ident diff --git a/crates/ra_syntax/test_data/lexer/0016_raw_ident.txt b/crates/ra_syntax/test_data/lexer/0016_raw_ident.txt deleted file mode 100644 index 484689693..000000000 --- a/crates/ra_syntax/test_data/lexer/0016_raw_ident.txt +++ /dev/null @@ -1,2 +0,0 @@ -IDENT 11 "r#raw_ident" -WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/err/0001_unclosed_char_at_eof.rs b/crates/ra_syntax/test_data/lexer/err/0001_unclosed_char_at_eof.rs new file mode 100644 index 000000000..ad2823b48 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0001_unclosed_char_at_eof.rs @@ -0,0 +1 @@ +' \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0001_unclosed_char_at_eof.txt b/crates/ra_syntax/test_data/lexer/err/0001_unclosed_char_at_eof.txt new file mode 100644 index 000000000..f24e1fd32 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0001_unclosed_char_at_eof.txt @@ -0,0 +1,2 @@ +CHAR 1 "\'" +> error[0; 1) token("\'") msg(Missing trailing `'` symbol to terminate the character literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0002_unclosed_char_with_ferris.rs b/crates/ra_syntax/test_data/lexer/err/0002_unclosed_char_with_ferris.rs new file mode 100644 index 000000000..e264a4152 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0002_unclosed_char_with_ferris.rs @@ -0,0 +1 @@ +'πŸ¦€ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0002_unclosed_char_with_ferris.txt b/crates/ra_syntax/test_data/lexer/err/0002_unclosed_char_with_ferris.txt new file mode 100644 index 000000000..bd08cfc44 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0002_unclosed_char_with_ferris.txt @@ -0,0 +1,2 @@ +CHAR 5 "\'πŸ¦€" +> error[0; 5) token("\'πŸ¦€") msg(Missing trailing `'` symbol to terminate the character literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0003_unclosed_char_with_ascii_escape.rs b/crates/ra_syntax/test_data/lexer/err/0003_unclosed_char_with_ascii_escape.rs new file mode 100644 index 000000000..cf74b4dad --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0003_unclosed_char_with_ascii_escape.rs @@ -0,0 +1 @@ +'\x7f \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0003_unclosed_char_with_ascii_escape.txt b/crates/ra_syntax/test_data/lexer/err/0003_unclosed_char_with_ascii_escape.txt new file mode 100644 index 000000000..0ee22912d --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0003_unclosed_char_with_ascii_escape.txt @@ -0,0 +1,2 @@ +CHAR 5 "\'\\x7f" +> error[0; 5) token("\'\\x7f") msg(Missing trailing `'` symbol to terminate the character literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0004_unclosed_char_with_unicode_escape.rs b/crates/ra_syntax/test_data/lexer/err/0004_unclosed_char_with_unicode_escape.rs new file mode 100644 index 000000000..50be91f68 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0004_unclosed_char_with_unicode_escape.rs @@ -0,0 +1 @@ +'\u{20AA} \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0004_unclosed_char_with_unicode_escape.txt b/crates/ra_syntax/test_data/lexer/err/0004_unclosed_char_with_unicode_escape.txt new file mode 100644 index 000000000..96fac42ce --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0004_unclosed_char_with_unicode_escape.txt @@ -0,0 +1,2 @@ +CHAR 9 "\'\\u{20AA}" +> error[0; 9) token("\'\\u{20AA}") msg(Missing trailing `'` symbol to terminate the character literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0005_unclosed_char_with_space.rs b/crates/ra_syntax/test_data/lexer/err/0005_unclosed_char_with_space.rs new file mode 100644 index 000000000..309ecfe47 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0005_unclosed_char_with_space.rs @@ -0,0 +1 @@ +' \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0005_unclosed_char_with_space.txt b/crates/ra_syntax/test_data/lexer/err/0005_unclosed_char_with_space.txt new file mode 100644 index 000000000..2059f3f81 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0005_unclosed_char_with_space.txt @@ -0,0 +1,2 @@ +CHAR 2 "\' " +> error[0; 2) token("\' ") msg(Missing trailing `'` symbol to terminate the character literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0006_unclosed_char_with_slash.rs b/crates/ra_syntax/test_data/lexer/err/0006_unclosed_char_with_slash.rs new file mode 100644 index 000000000..6ba258b10 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0006_unclosed_char_with_slash.rs @@ -0,0 +1 @@ +'\ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0006_unclosed_char_with_slash.txt b/crates/ra_syntax/test_data/lexer/err/0006_unclosed_char_with_slash.txt new file mode 100644 index 000000000..7dd376e59 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0006_unclosed_char_with_slash.txt @@ -0,0 +1,2 @@ +CHAR 2 "\'\\" +> error[0; 2) token("\'\\") msg(Missing trailing `'` symbol to terminate the character literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0007_unclosed_char_with_slash_n.rs b/crates/ra_syntax/test_data/lexer/err/0007_unclosed_char_with_slash_n.rs new file mode 100644 index 000000000..78bef7e3e --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0007_unclosed_char_with_slash_n.rs @@ -0,0 +1 @@ +'\n \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0007_unclosed_char_with_slash_n.txt b/crates/ra_syntax/test_data/lexer/err/0007_unclosed_char_with_slash_n.txt new file mode 100644 index 000000000..ef7a0a147 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0007_unclosed_char_with_slash_n.txt @@ -0,0 +1,2 @@ +CHAR 3 "\'\\n" +> error[0; 3) token("\'\\n") msg(Missing trailing `'` symbol to terminate the character literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0008_unclosed_char_with_slash_single_quote.rs b/crates/ra_syntax/test_data/lexer/err/0008_unclosed_char_with_slash_single_quote.rs new file mode 100644 index 000000000..a0e722065 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0008_unclosed_char_with_slash_single_quote.rs @@ -0,0 +1 @@ +'\' \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0008_unclosed_char_with_slash_single_quote.txt b/crates/ra_syntax/test_data/lexer/err/0008_unclosed_char_with_slash_single_quote.txt new file mode 100644 index 000000000..13fc5ea9a --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0008_unclosed_char_with_slash_single_quote.txt @@ -0,0 +1,2 @@ +CHAR 3 "\'\\\'" +> error[0; 3) token("\'\\\'") msg(Missing trailing `'` symbol to terminate the character literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0009_unclosed_byte_at_eof.rs b/crates/ra_syntax/test_data/lexer/err/0009_unclosed_byte_at_eof.rs new file mode 100644 index 000000000..795dc7e25 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0009_unclosed_byte_at_eof.rs @@ -0,0 +1 @@ +b' \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0009_unclosed_byte_at_eof.txt b/crates/ra_syntax/test_data/lexer/err/0009_unclosed_byte_at_eof.txt new file mode 100644 index 000000000..269d68c74 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0009_unclosed_byte_at_eof.txt @@ -0,0 +1,2 @@ +BYTE 2 "b\'" +> error[0; 2) token("b\'") msg(Missing trailing `'` symbol to terminate the byte literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0010_unclosed_byte_with_ferris.rs b/crates/ra_syntax/test_data/lexer/err/0010_unclosed_byte_with_ferris.rs new file mode 100644 index 000000000..c9230dc24 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0010_unclosed_byte_with_ferris.rs @@ -0,0 +1 @@ +b'πŸ¦€ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0010_unclosed_byte_with_ferris.txt b/crates/ra_syntax/test_data/lexer/err/0010_unclosed_byte_with_ferris.txt new file mode 100644 index 000000000..91a76e479 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0010_unclosed_byte_with_ferris.txt @@ -0,0 +1,2 @@ +BYTE 6 "b\'πŸ¦€" +> error[0; 6) token("b\'πŸ¦€") msg(Missing trailing `'` symbol to terminate the byte literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0011_unclosed_byte_with_ascii_escape.rs b/crates/ra_syntax/test_data/lexer/err/0011_unclosed_byte_with_ascii_escape.rs new file mode 100644 index 000000000..d146a8090 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0011_unclosed_byte_with_ascii_escape.rs @@ -0,0 +1 @@ +b'\x7f \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0011_unclosed_byte_with_ascii_escape.txt b/crates/ra_syntax/test_data/lexer/err/0011_unclosed_byte_with_ascii_escape.txt new file mode 100644 index 000000000..b8c804a18 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0011_unclosed_byte_with_ascii_escape.txt @@ -0,0 +1,2 @@ +BYTE 6 "b\'\\x7f" +> error[0; 6) token("b\'\\x7f") msg(Missing trailing `'` symbol to terminate the byte literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0012_unclosed_byte_with_unicode_escape.rs b/crates/ra_syntax/test_data/lexer/err/0012_unclosed_byte_with_unicode_escape.rs new file mode 100644 index 000000000..a3dec7c25 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0012_unclosed_byte_with_unicode_escape.rs @@ -0,0 +1 @@ +b'\u{20AA} \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0012_unclosed_byte_with_unicode_escape.txt b/crates/ra_syntax/test_data/lexer/err/0012_unclosed_byte_with_unicode_escape.txt new file mode 100644 index 000000000..dfca22a59 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0012_unclosed_byte_with_unicode_escape.txt @@ -0,0 +1,2 @@ +BYTE 10 "b\'\\u{20AA}" +> error[0; 10) token("b\'\\u{20AA}") msg(Missing trailing `'` symbol to terminate the byte literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0013_unclosed_byte_with_space.rs b/crates/ra_syntax/test_data/lexer/err/0013_unclosed_byte_with_space.rs new file mode 100644 index 000000000..93b7f9c87 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0013_unclosed_byte_with_space.rs @@ -0,0 +1 @@ +b' \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0013_unclosed_byte_with_space.txt b/crates/ra_syntax/test_data/lexer/err/0013_unclosed_byte_with_space.txt new file mode 100644 index 000000000..51a1cceab --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0013_unclosed_byte_with_space.txt @@ -0,0 +1,2 @@ +BYTE 3 "b\' " +> error[0; 3) token("b\' ") msg(Missing trailing `'` symbol to terminate the byte literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0014_unclosed_byte_with_slash.rs b/crates/ra_syntax/test_data/lexer/err/0014_unclosed_byte_with_slash.rs new file mode 100644 index 000000000..abffa5037 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0014_unclosed_byte_with_slash.rs @@ -0,0 +1 @@ +b'\ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0014_unclosed_byte_with_slash.txt b/crates/ra_syntax/test_data/lexer/err/0014_unclosed_byte_with_slash.txt new file mode 100644 index 000000000..24e835c27 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0014_unclosed_byte_with_slash.txt @@ -0,0 +1,2 @@ +BYTE 3 "b\'\\" +> error[0; 3) token("b\'\\") msg(Missing trailing `'` symbol to terminate the byte literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0015_unclosed_byte_with_slash_n.rs b/crates/ra_syntax/test_data/lexer/err/0015_unclosed_byte_with_slash_n.rs new file mode 100644 index 000000000..4f46836a9 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0015_unclosed_byte_with_slash_n.rs @@ -0,0 +1 @@ +b'\n \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0015_unclosed_byte_with_slash_n.txt b/crates/ra_syntax/test_data/lexer/err/0015_unclosed_byte_with_slash_n.txt new file mode 100644 index 000000000..f1e39a41b --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0015_unclosed_byte_with_slash_n.txt @@ -0,0 +1,2 @@ +BYTE 4 "b\'\\n" +> error[0; 4) token("b\'\\n") msg(Missing trailing `'` symbol to terminate the byte literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0016_unclosed_byte_with_slash_single_quote.rs b/crates/ra_syntax/test_data/lexer/err/0016_unclosed_byte_with_slash_single_quote.rs new file mode 100644 index 000000000..645b641ee --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0016_unclosed_byte_with_slash_single_quote.rs @@ -0,0 +1 @@ +b'\' \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0016_unclosed_byte_with_slash_single_quote.txt b/crates/ra_syntax/test_data/lexer/err/0016_unclosed_byte_with_slash_single_quote.txt new file mode 100644 index 000000000..f8ffe815d --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0016_unclosed_byte_with_slash_single_quote.txt @@ -0,0 +1,2 @@ +BYTE 4 "b\'\\\'" +> error[0; 4) token("b\'\\\'") msg(Missing trailing `'` symbol to terminate the byte literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0017_unclosed_string_at_eof.rs b/crates/ra_syntax/test_data/lexer/err/0017_unclosed_string_at_eof.rs new file mode 100644 index 000000000..9d68933c4 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0017_unclosed_string_at_eof.rs @@ -0,0 +1 @@ +" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0017_unclosed_string_at_eof.txt b/crates/ra_syntax/test_data/lexer/err/0017_unclosed_string_at_eof.txt new file mode 100644 index 000000000..823daaf6f --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0017_unclosed_string_at_eof.txt @@ -0,0 +1,2 @@ +STRING 1 "\"" +> error[0; 1) token("\"") msg(Missing trailing `"` symbol to terminate the string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0018_unclosed_string_with_ferris.rs b/crates/ra_syntax/test_data/lexer/err/0018_unclosed_string_with_ferris.rs new file mode 100644 index 000000000..d439b8d2a --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0018_unclosed_string_with_ferris.rs @@ -0,0 +1 @@ +"πŸ¦€ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0018_unclosed_string_with_ferris.txt b/crates/ra_syntax/test_data/lexer/err/0018_unclosed_string_with_ferris.txt new file mode 100644 index 000000000..164580eb3 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0018_unclosed_string_with_ferris.txt @@ -0,0 +1,2 @@ +STRING 5 "\"πŸ¦€" +> error[0; 5) token("\"πŸ¦€") msg(Missing trailing `"` symbol to terminate the string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0019_unclosed_string_with_ascii_escape.rs b/crates/ra_syntax/test_data/lexer/err/0019_unclosed_string_with_ascii_escape.rs new file mode 100644 index 000000000..56186a344 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0019_unclosed_string_with_ascii_escape.rs @@ -0,0 +1 @@ +"\x7f \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0019_unclosed_string_with_ascii_escape.txt b/crates/ra_syntax/test_data/lexer/err/0019_unclosed_string_with_ascii_escape.txt new file mode 100644 index 000000000..4453827c3 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0019_unclosed_string_with_ascii_escape.txt @@ -0,0 +1,2 @@ +STRING 5 "\"\\x7f" +> error[0; 5) token("\"\\x7f") msg(Missing trailing `"` symbol to terminate the string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0020_unclosed_string_with_unicode_escape.rs b/crates/ra_syntax/test_data/lexer/err/0020_unclosed_string_with_unicode_escape.rs new file mode 100644 index 000000000..ed24095c3 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0020_unclosed_string_with_unicode_escape.rs @@ -0,0 +1 @@ +"\u{20AA} \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0020_unclosed_string_with_unicode_escape.txt b/crates/ra_syntax/test_data/lexer/err/0020_unclosed_string_with_unicode_escape.txt new file mode 100644 index 000000000..aa614f304 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0020_unclosed_string_with_unicode_escape.txt @@ -0,0 +1,2 @@ +STRING 9 "\"\\u{20AA}" +> error[0; 9) token("\"\\u{20AA}") msg(Missing trailing `"` symbol to terminate the string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0021_unclosed_string_with_space.rs b/crates/ra_syntax/test_data/lexer/err/0021_unclosed_string_with_space.rs new file mode 100644 index 000000000..72cdc841f --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0021_unclosed_string_with_space.rs @@ -0,0 +1 @@ +" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0021_unclosed_string_with_space.txt b/crates/ra_syntax/test_data/lexer/err/0021_unclosed_string_with_space.txt new file mode 100644 index 000000000..b7db1236f --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0021_unclosed_string_with_space.txt @@ -0,0 +1,2 @@ +STRING 2 "\" " +> error[0; 2) token("\" ") msg(Missing trailing `"` symbol to terminate the string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0022_unclosed_string_with_slash.rs b/crates/ra_syntax/test_data/lexer/err/0022_unclosed_string_with_slash.rs new file mode 100644 index 000000000..00a258400 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0022_unclosed_string_with_slash.rs @@ -0,0 +1 @@ +"\ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0022_unclosed_string_with_slash.txt b/crates/ra_syntax/test_data/lexer/err/0022_unclosed_string_with_slash.txt new file mode 100644 index 000000000..9d3df3799 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0022_unclosed_string_with_slash.txt @@ -0,0 +1,2 @@ +STRING 2 "\"\\" +> error[0; 2) token("\"\\") msg(Missing trailing `"` symbol to terminate the string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0023_unclosed_string_with_slash_n.rs b/crates/ra_syntax/test_data/lexer/err/0023_unclosed_string_with_slash_n.rs new file mode 100644 index 000000000..a0c29b8cf --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0023_unclosed_string_with_slash_n.rs @@ -0,0 +1 @@ +"\n \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0023_unclosed_string_with_slash_n.txt b/crates/ra_syntax/test_data/lexer/err/0023_unclosed_string_with_slash_n.txt new file mode 100644 index 000000000..e3eb672b6 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0023_unclosed_string_with_slash_n.txt @@ -0,0 +1,2 @@ +STRING 3 "\"\\n" +> error[0; 3) token("\"\\n") msg(Missing trailing `"` symbol to terminate the string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0024_unclosed_string_with_slash_double_quote.rs b/crates/ra_syntax/test_data/lexer/err/0024_unclosed_string_with_slash_double_quote.rs new file mode 100644 index 000000000..403c2d6dd --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0024_unclosed_string_with_slash_double_quote.rs @@ -0,0 +1 @@ +"\" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0024_unclosed_string_with_slash_double_quote.txt b/crates/ra_syntax/test_data/lexer/err/0024_unclosed_string_with_slash_double_quote.txt new file mode 100644 index 000000000..041d7fb6e --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0024_unclosed_string_with_slash_double_quote.txt @@ -0,0 +1,2 @@ +STRING 3 "\"\\\"" +> error[0; 3) token("\"\\\"") msg(Missing trailing `"` symbol to terminate the string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0025_unclosed_byte_string_at_eof.rs b/crates/ra_syntax/test_data/lexer/err/0025_unclosed_byte_string_at_eof.rs new file mode 100644 index 000000000..36f4f4321 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0025_unclosed_byte_string_at_eof.rs @@ -0,0 +1 @@ +b" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0025_unclosed_byte_string_at_eof.txt b/crates/ra_syntax/test_data/lexer/err/0025_unclosed_byte_string_at_eof.txt new file mode 100644 index 000000000..be7970a83 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0025_unclosed_byte_string_at_eof.txt @@ -0,0 +1,2 @@ +BYTE_STRING 2 "b\"" +> error[0; 2) token("b\"") msg(Missing trailing `"` symbol to terminate the byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0026_unclosed_byte_string_with_ferris.rs b/crates/ra_syntax/test_data/lexer/err/0026_unclosed_byte_string_with_ferris.rs new file mode 100644 index 000000000..3c23a0372 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0026_unclosed_byte_string_with_ferris.rs @@ -0,0 +1 @@ +b"πŸ¦€ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0026_unclosed_byte_string_with_ferris.txt b/crates/ra_syntax/test_data/lexer/err/0026_unclosed_byte_string_with_ferris.txt new file mode 100644 index 000000000..bf9aab132 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0026_unclosed_byte_string_with_ferris.txt @@ -0,0 +1,2 @@ +BYTE_STRING 6 "b\"πŸ¦€" +> error[0; 6) token("b\"πŸ¦€") msg(Missing trailing `"` symbol to terminate the byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0027_unclosed_byte_string_with_ascii_escape.rs b/crates/ra_syntax/test_data/lexer/err/0027_unclosed_byte_string_with_ascii_escape.rs new file mode 100644 index 000000000..836c112c1 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0027_unclosed_byte_string_with_ascii_escape.rs @@ -0,0 +1 @@ +b"\x7f \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0027_unclosed_byte_string_with_ascii_escape.txt b/crates/ra_syntax/test_data/lexer/err/0027_unclosed_byte_string_with_ascii_escape.txt new file mode 100644 index 000000000..76e16d7d3 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0027_unclosed_byte_string_with_ascii_escape.txt @@ -0,0 +1,2 @@ +BYTE_STRING 6 "b\"\\x7f" +> error[0; 6) token("b\"\\x7f") msg(Missing trailing `"` symbol to terminate the byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0028_unclosed_byte_string_with_unicode_escape.rs b/crates/ra_syntax/test_data/lexer/err/0028_unclosed_byte_string_with_unicode_escape.rs new file mode 100644 index 000000000..1c6df1d00 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0028_unclosed_byte_string_with_unicode_escape.rs @@ -0,0 +1 @@ +b"\u{20AA} \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0028_unclosed_byte_string_with_unicode_escape.txt b/crates/ra_syntax/test_data/lexer/err/0028_unclosed_byte_string_with_unicode_escape.txt new file mode 100644 index 000000000..09adffa16 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0028_unclosed_byte_string_with_unicode_escape.txt @@ -0,0 +1,2 @@ +BYTE_STRING 10 "b\"\\u{20AA}" +> error[0; 10) token("b\"\\u{20AA}") msg(Missing trailing `"` symbol to terminate the byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0029_unclosed_byte_string_with_space.rs b/crates/ra_syntax/test_data/lexer/err/0029_unclosed_byte_string_with_space.rs new file mode 100644 index 000000000..d6898541e --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0029_unclosed_byte_string_with_space.rs @@ -0,0 +1 @@ +b" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0029_unclosed_byte_string_with_space.txt b/crates/ra_syntax/test_data/lexer/err/0029_unclosed_byte_string_with_space.txt new file mode 100644 index 000000000..fcb7253c8 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0029_unclosed_byte_string_with_space.txt @@ -0,0 +1,2 @@ +BYTE_STRING 3 "b\" " +> error[0; 3) token("b\" ") msg(Missing trailing `"` symbol to terminate the byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0030_unclosed_byte_string_with_slash.rs b/crates/ra_syntax/test_data/lexer/err/0030_unclosed_byte_string_with_slash.rs new file mode 100644 index 000000000..cce661538 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0030_unclosed_byte_string_with_slash.rs @@ -0,0 +1 @@ +b"\ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0030_unclosed_byte_string_with_slash.txt b/crates/ra_syntax/test_data/lexer/err/0030_unclosed_byte_string_with_slash.txt new file mode 100644 index 000000000..0a1b3e269 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0030_unclosed_byte_string_with_slash.txt @@ -0,0 +1,2 @@ +BYTE_STRING 3 "b\"\\" +> error[0; 3) token("b\"\\") msg(Missing trailing `"` symbol to terminate the byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0031_unclosed_byte_string_with_slash_n.rs b/crates/ra_syntax/test_data/lexer/err/0031_unclosed_byte_string_with_slash_n.rs new file mode 100644 index 000000000..5e680aabb --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0031_unclosed_byte_string_with_slash_n.rs @@ -0,0 +1 @@ +b"\n \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0031_unclosed_byte_string_with_slash_n.txt b/crates/ra_syntax/test_data/lexer/err/0031_unclosed_byte_string_with_slash_n.txt new file mode 100644 index 000000000..1fb89d2b6 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0031_unclosed_byte_string_with_slash_n.txt @@ -0,0 +1,2 @@ +BYTE_STRING 4 "b\"\\n" +> error[0; 4) token("b\"\\n") msg(Missing trailing `"` symbol to terminate the byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0032_unclosed_byte_string_with_slash_double_quote.rs b/crates/ra_syntax/test_data/lexer/err/0032_unclosed_byte_string_with_slash_double_quote.rs new file mode 100644 index 000000000..f2ff58ba9 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0032_unclosed_byte_string_with_slash_double_quote.rs @@ -0,0 +1 @@ +b"\" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0032_unclosed_byte_string_with_slash_double_quote.txt b/crates/ra_syntax/test_data/lexer/err/0032_unclosed_byte_string_with_slash_double_quote.txt new file mode 100644 index 000000000..718d36992 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0032_unclosed_byte_string_with_slash_double_quote.txt @@ -0,0 +1,2 @@ +BYTE_STRING 4 "b\"\\\"" +> error[0; 4) token("b\"\\\"") msg(Missing trailing `"` symbol to terminate the byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0033_unclosed_raw_string_at_eof.rs b/crates/ra_syntax/test_data/lexer/err/0033_unclosed_raw_string_at_eof.rs new file mode 100644 index 000000000..557c59b62 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0033_unclosed_raw_string_at_eof.rs @@ -0,0 +1 @@ +r##" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0033_unclosed_raw_string_at_eof.txt b/crates/ra_syntax/test_data/lexer/err/0033_unclosed_raw_string_at_eof.txt new file mode 100644 index 000000000..93348f548 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0033_unclosed_raw_string_at_eof.txt @@ -0,0 +1,2 @@ +RAW_STRING 4 "r##\"" +> error[0; 4) token("r##\"") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0034_unclosed_raw_string_with_ferris.rs b/crates/ra_syntax/test_data/lexer/err/0034_unclosed_raw_string_with_ferris.rs new file mode 100644 index 000000000..bd046e4bb --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0034_unclosed_raw_string_with_ferris.rs @@ -0,0 +1 @@ +r##"πŸ¦€ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0034_unclosed_raw_string_with_ferris.txt b/crates/ra_syntax/test_data/lexer/err/0034_unclosed_raw_string_with_ferris.txt new file mode 100644 index 000000000..42c70dfe8 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0034_unclosed_raw_string_with_ferris.txt @@ -0,0 +1,2 @@ +RAW_STRING 8 "r##\"πŸ¦€" +> error[0; 8) token("r##\"πŸ¦€") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0035_unclosed_raw_string_with_ascii_escape.rs b/crates/ra_syntax/test_data/lexer/err/0035_unclosed_raw_string_with_ascii_escape.rs new file mode 100644 index 000000000..5bec883dc --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0035_unclosed_raw_string_with_ascii_escape.rs @@ -0,0 +1 @@ +r##"\x7f \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0035_unclosed_raw_string_with_ascii_escape.txt b/crates/ra_syntax/test_data/lexer/err/0035_unclosed_raw_string_with_ascii_escape.txt new file mode 100644 index 000000000..2bdeea0ff --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0035_unclosed_raw_string_with_ascii_escape.txt @@ -0,0 +1,2 @@ +RAW_STRING 8 "r##\"\\x7f" +> error[0; 8) token("r##\"\\x7f") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0036_unclosed_raw_string_with_unicode_escape.rs b/crates/ra_syntax/test_data/lexer/err/0036_unclosed_raw_string_with_unicode_escape.rs new file mode 100644 index 000000000..bf05c3913 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0036_unclosed_raw_string_with_unicode_escape.rs @@ -0,0 +1 @@ +r##"\u{20AA} \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0036_unclosed_raw_string_with_unicode_escape.txt b/crates/ra_syntax/test_data/lexer/err/0036_unclosed_raw_string_with_unicode_escape.txt new file mode 100644 index 000000000..667d4d79f --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0036_unclosed_raw_string_with_unicode_escape.txt @@ -0,0 +1,2 @@ +RAW_STRING 12 "r##\"\\u{20AA}" +> error[0; 12) token("r##\"\\u{20AA}") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0037_unclosed_raw_string_with_space.rs b/crates/ra_syntax/test_data/lexer/err/0037_unclosed_raw_string_with_space.rs new file mode 100644 index 000000000..f104bae4f --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0037_unclosed_raw_string_with_space.rs @@ -0,0 +1 @@ +r##" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0037_unclosed_raw_string_with_space.txt b/crates/ra_syntax/test_data/lexer/err/0037_unclosed_raw_string_with_space.txt new file mode 100644 index 000000000..dd9597a1a --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0037_unclosed_raw_string_with_space.txt @@ -0,0 +1,2 @@ +RAW_STRING 5 "r##\" " +> error[0; 5) token("r##\" ") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0038_unclosed_raw_string_with_slash.rs b/crates/ra_syntax/test_data/lexer/err/0038_unclosed_raw_string_with_slash.rs new file mode 100644 index 000000000..9242077b8 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0038_unclosed_raw_string_with_slash.rs @@ -0,0 +1 @@ +r##"\ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0038_unclosed_raw_string_with_slash.txt b/crates/ra_syntax/test_data/lexer/err/0038_unclosed_raw_string_with_slash.txt new file mode 100644 index 000000000..6ac6e3d62 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0038_unclosed_raw_string_with_slash.txt @@ -0,0 +1,2 @@ +RAW_STRING 5 "r##\"\\" +> error[0; 5) token("r##\"\\") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0039_unclosed_raw_string_with_slash_n.rs b/crates/ra_syntax/test_data/lexer/err/0039_unclosed_raw_string_with_slash_n.rs new file mode 100644 index 000000000..db1c16f2b --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0039_unclosed_raw_string_with_slash_n.rs @@ -0,0 +1 @@ +r##"\n \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0039_unclosed_raw_string_with_slash_n.txt b/crates/ra_syntax/test_data/lexer/err/0039_unclosed_raw_string_with_slash_n.txt new file mode 100644 index 000000000..9d35443f5 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0039_unclosed_raw_string_with_slash_n.txt @@ -0,0 +1,2 @@ +RAW_STRING 6 "r##\"\\n" +> error[0; 6) token("r##\"\\n") msg(Missing trailing `"` with `#` symbols to terminate the raw string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0040_unclosed_raw_byte_string_at_eof.rs b/crates/ra_syntax/test_data/lexer/err/0040_unclosed_raw_byte_string_at_eof.rs new file mode 100644 index 000000000..ae5bae622 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0040_unclosed_raw_byte_string_at_eof.rs @@ -0,0 +1 @@ +br##" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0040_unclosed_raw_byte_string_at_eof.txt b/crates/ra_syntax/test_data/lexer/err/0040_unclosed_raw_byte_string_at_eof.txt new file mode 100644 index 000000000..81fa39ea5 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0040_unclosed_raw_byte_string_at_eof.txt @@ -0,0 +1,2 @@ +RAW_BYTE_STRING 5 "br##\"" +> error[0; 5) token("br##\"") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0041_unclosed_raw_byte_string_with_ferris.rs b/crates/ra_syntax/test_data/lexer/err/0041_unclosed_raw_byte_string_with_ferris.rs new file mode 100644 index 000000000..9ef01207a --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0041_unclosed_raw_byte_string_with_ferris.rs @@ -0,0 +1 @@ +br##"πŸ¦€ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0041_unclosed_raw_byte_string_with_ferris.txt b/crates/ra_syntax/test_data/lexer/err/0041_unclosed_raw_byte_string_with_ferris.txt new file mode 100644 index 000000000..c2503a4d0 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0041_unclosed_raw_byte_string_with_ferris.txt @@ -0,0 +1,2 @@ +RAW_BYTE_STRING 9 "br##\"πŸ¦€" +> error[0; 9) token("br##\"πŸ¦€") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0042_unclosed_raw_byte_string_with_ascii_escape.rs b/crates/ra_syntax/test_data/lexer/err/0042_unclosed_raw_byte_string_with_ascii_escape.rs new file mode 100644 index 000000000..d50270afe --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0042_unclosed_raw_byte_string_with_ascii_escape.rs @@ -0,0 +1 @@ +br##"\x7f \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0042_unclosed_raw_byte_string_with_ascii_escape.txt b/crates/ra_syntax/test_data/lexer/err/0042_unclosed_raw_byte_string_with_ascii_escape.txt new file mode 100644 index 000000000..3bd3d8152 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0042_unclosed_raw_byte_string_with_ascii_escape.txt @@ -0,0 +1,2 @@ +RAW_BYTE_STRING 9 "br##\"\\x7f" +> error[0; 9) token("br##\"\\x7f") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0043_unclosed_raw_byte_string_with_unicode_escape.rs b/crates/ra_syntax/test_data/lexer/err/0043_unclosed_raw_byte_string_with_unicode_escape.rs new file mode 100644 index 000000000..90e299a1a --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0043_unclosed_raw_byte_string_with_unicode_escape.rs @@ -0,0 +1 @@ +br##"\u{20AA} \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0043_unclosed_raw_byte_string_with_unicode_escape.txt b/crates/ra_syntax/test_data/lexer/err/0043_unclosed_raw_byte_string_with_unicode_escape.txt new file mode 100644 index 000000000..a512f0428 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0043_unclosed_raw_byte_string_with_unicode_escape.txt @@ -0,0 +1,2 @@ +RAW_BYTE_STRING 13 "br##\"\\u{20AA}" +> error[0; 13) token("br##\"\\u{20AA}") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0044_unclosed_raw_byte_string_with_space.rs b/crates/ra_syntax/test_data/lexer/err/0044_unclosed_raw_byte_string_with_space.rs new file mode 100644 index 000000000..14c602fd2 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0044_unclosed_raw_byte_string_with_space.rs @@ -0,0 +1 @@ +br##" \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0044_unclosed_raw_byte_string_with_space.txt b/crates/ra_syntax/test_data/lexer/err/0044_unclosed_raw_byte_string_with_space.txt new file mode 100644 index 000000000..dc616a623 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0044_unclosed_raw_byte_string_with_space.txt @@ -0,0 +1,2 @@ +RAW_BYTE_STRING 6 "br##\" " +> error[0; 6) token("br##\" ") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0045_unclosed_raw_byte_string_with_slash.rs b/crates/ra_syntax/test_data/lexer/err/0045_unclosed_raw_byte_string_with_slash.rs new file mode 100644 index 000000000..0b3c015d7 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0045_unclosed_raw_byte_string_with_slash.rs @@ -0,0 +1 @@ +br##"\ \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0045_unclosed_raw_byte_string_with_slash.txt b/crates/ra_syntax/test_data/lexer/err/0045_unclosed_raw_byte_string_with_slash.txt new file mode 100644 index 000000000..debafe380 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0045_unclosed_raw_byte_string_with_slash.txt @@ -0,0 +1,2 @@ +RAW_BYTE_STRING 6 "br##\"\\" +> error[0; 6) token("br##\"\\") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0046_unclosed_raw_byte_string_with_slash_n.rs b/crates/ra_syntax/test_data/lexer/err/0046_unclosed_raw_byte_string_with_slash_n.rs new file mode 100644 index 000000000..0d8b0e7ab --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0046_unclosed_raw_byte_string_with_slash_n.rs @@ -0,0 +1 @@ +br##"\n \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0046_unclosed_raw_byte_string_with_slash_n.txt b/crates/ra_syntax/test_data/lexer/err/0046_unclosed_raw_byte_string_with_slash_n.txt new file mode 100644 index 000000000..524e617b7 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0046_unclosed_raw_byte_string_with_slash_n.txt @@ -0,0 +1,2 @@ +RAW_BYTE_STRING 7 "br##\"\\n" +> error[0; 7) token("br##\"\\n") msg(Missing trailing `"` with `#` symbols to terminate the raw byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0047_unstarted_raw_string_at_eof.rs b/crates/ra_syntax/test_data/lexer/err/0047_unstarted_raw_string_at_eof.rs new file mode 100644 index 000000000..eddf8d080 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0047_unstarted_raw_string_at_eof.rs @@ -0,0 +1 @@ +r## \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0047_unstarted_raw_string_at_eof.txt b/crates/ra_syntax/test_data/lexer/err/0047_unstarted_raw_string_at_eof.txt new file mode 100644 index 000000000..00b046840 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0047_unstarted_raw_string_at_eof.txt @@ -0,0 +1,2 @@ +RAW_STRING 3 "r##" +> error[0; 3) token("r##") msg(Missing `"` symbol after `#` symbols to begin the raw string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0048_unstarted_raw_byte_string_at_eof.rs b/crates/ra_syntax/test_data/lexer/err/0048_unstarted_raw_byte_string_at_eof.rs new file mode 100644 index 000000000..7e8cadf4f --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0048_unstarted_raw_byte_string_at_eof.rs @@ -0,0 +1 @@ +br## \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0048_unstarted_raw_byte_string_at_eof.txt b/crates/ra_syntax/test_data/lexer/err/0048_unstarted_raw_byte_string_at_eof.txt new file mode 100644 index 000000000..33b25e60f --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0048_unstarted_raw_byte_string_at_eof.txt @@ -0,0 +1,2 @@ +RAW_BYTE_STRING 4 "br##" +> error[0; 4) token("br##") msg(Missing `"` symbol after `#` symbols to begin the raw byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0049_unstarted_raw_string_with_ascii.rs b/crates/ra_syntax/test_data/lexer/err/0049_unstarted_raw_string_with_ascii.rs new file mode 100644 index 000000000..534668a9b --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0049_unstarted_raw_string_with_ascii.rs @@ -0,0 +1 @@ +r## I lack a quote! \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0049_unstarted_raw_string_with_ascii.txt b/crates/ra_syntax/test_data/lexer/err/0049_unstarted_raw_string_with_ascii.txt new file mode 100644 index 000000000..782dfd974 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0049_unstarted_raw_string_with_ascii.txt @@ -0,0 +1,10 @@ +RAW_STRING 4 "r## " +IDENT 1 "I" +WHITESPACE 1 " " +IDENT 4 "lack" +WHITESPACE 1 " " +IDENT 1 "a" +WHITESPACE 1 " " +IDENT 5 "quote" +EXCL 1 "!" +> error[0; 4) token("r## ") msg(Missing `"` symbol after `#` symbols to begin the raw string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0050_unstarted_raw_byte_string_with_ascii.rs b/crates/ra_syntax/test_data/lexer/err/0050_unstarted_raw_byte_string_with_ascii.rs new file mode 100644 index 000000000..d9b55455a --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0050_unstarted_raw_byte_string_with_ascii.rs @@ -0,0 +1 @@ +br## I lack a quote! \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0050_unstarted_raw_byte_string_with_ascii.txt b/crates/ra_syntax/test_data/lexer/err/0050_unstarted_raw_byte_string_with_ascii.txt new file mode 100644 index 000000000..59c40cd65 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0050_unstarted_raw_byte_string_with_ascii.txt @@ -0,0 +1,10 @@ +RAW_BYTE_STRING 5 "br## " +IDENT 1 "I" +WHITESPACE 1 " " +IDENT 4 "lack" +WHITESPACE 1 " " +IDENT 1 "a" +WHITESPACE 1 " " +IDENT 5 "quote" +EXCL 1 "!" +> error[0; 5) token("br## ") msg(Missing `"` symbol after `#` symbols to begin the raw byte string literal) diff --git a/crates/ra_syntax/test_data/lexer/err/0051_unclosed_block_comment_at_eof.rs b/crates/ra_syntax/test_data/lexer/err/0051_unclosed_block_comment_at_eof.rs new file mode 100644 index 000000000..22e83649f --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0051_unclosed_block_comment_at_eof.rs @@ -0,0 +1 @@ +/* \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/err/0051_unclosed_block_comment_at_eof.txt b/crates/ra_syntax/test_data/lexer/err/0051_unclosed_block_comment_at_eof.txt new file mode 100644 index 000000000..5d04cdaa4 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0051_unclosed_block_comment_at_eof.txt @@ -0,0 +1,2 @@ +COMMENT 2 "/*" +> error[0; 2) token("/*") msg(Missing trailing `*/` symbols to terminate the block comment) diff --git a/crates/ra_syntax/test_data/lexer/err/0052_unclosed_block_comment_with_content.rs b/crates/ra_syntax/test_data/lexer/err/0052_unclosed_block_comment_with_content.rs new file mode 100644 index 000000000..c45c2844d --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0052_unclosed_block_comment_with_content.rs @@ -0,0 +1 @@ +/* comment diff --git a/crates/ra_syntax/test_data/lexer/err/0052_unclosed_block_comment_with_content.txt b/crates/ra_syntax/test_data/lexer/err/0052_unclosed_block_comment_with_content.txt new file mode 100644 index 000000000..8c6b678e3 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0052_unclosed_block_comment_with_content.txt @@ -0,0 +1,2 @@ +COMMENT 11 "/* comment\n" +> error[0; 11) token("/* comment\n") msg(Missing trailing `*/` symbols to terminate the block comment) diff --git a/crates/ra_syntax/test_data/lexer/err/0053_unclosed_nested_block_comment_entirely.rs b/crates/ra_syntax/test_data/lexer/err/0053_unclosed_nested_block_comment_entirely.rs new file mode 100644 index 000000000..3fcfc9660 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0053_unclosed_nested_block_comment_entirely.rs @@ -0,0 +1 @@ +/* /* /* diff --git a/crates/ra_syntax/test_data/lexer/err/0053_unclosed_nested_block_comment_entirely.txt b/crates/ra_syntax/test_data/lexer/err/0053_unclosed_nested_block_comment_entirely.txt new file mode 100644 index 000000000..250de34d9 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0053_unclosed_nested_block_comment_entirely.txt @@ -0,0 +1,2 @@ +COMMENT 9 "/* /* /*\n" +> error[0; 9) token("/* /* /*\n") msg(Missing trailing `*/` symbols to terminate the block comment) diff --git a/crates/ra_syntax/test_data/lexer/err/0054_unclosed_nested_block_comment_partially.rs b/crates/ra_syntax/test_data/lexer/err/0054_unclosed_nested_block_comment_partially.rs new file mode 100644 index 000000000..26c898f01 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0054_unclosed_nested_block_comment_partially.rs @@ -0,0 +1 @@ +/** /*! /* comment */ */ diff --git a/crates/ra_syntax/test_data/lexer/err/0054_unclosed_nested_block_comment_partially.txt b/crates/ra_syntax/test_data/lexer/err/0054_unclosed_nested_block_comment_partially.txt new file mode 100644 index 000000000..f97f2a8c7 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0054_unclosed_nested_block_comment_partially.txt @@ -0,0 +1,2 @@ +COMMENT 25 "/** /*! /* comment */ */\n" +> error[0; 25) token("/** /*! /* comment */ */\n") msg(Missing trailing `*/` symbols to terminate the block comment) diff --git a/crates/ra_syntax/test_data/lexer/err/0055_empty_int.rs b/crates/ra_syntax/test_data/lexer/err/0055_empty_int.rs new file mode 100644 index 000000000..aa2a9fdca --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0055_empty_int.rs @@ -0,0 +1,17 @@ +0b +0o +0x + +0b_ +0o_ +0x_ + +0bnoDigit +0onoDigit +0xnoDigit + +0xG +0xg + +0x_g +0x_G diff --git a/crates/ra_syntax/test_data/lexer/err/0055_empty_int.txt b/crates/ra_syntax/test_data/lexer/err/0055_empty_int.txt new file mode 100644 index 000000000..2fe5bd950 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0055_empty_int.txt @@ -0,0 +1,39 @@ +INT_NUMBER 2 "0b" +WHITESPACE 1 "\n" +INT_NUMBER 2 "0o" +WHITESPACE 1 "\n" +INT_NUMBER 2 "0x" +WHITESPACE 2 "\n\n" +INT_NUMBER 3 "0b_" +WHITESPACE 1 "\n" +INT_NUMBER 3 "0o_" +WHITESPACE 1 "\n" +INT_NUMBER 3 "0x_" +WHITESPACE 2 "\n\n" +INT_NUMBER 9 "0bnoDigit" +WHITESPACE 1 "\n" +INT_NUMBER 9 "0onoDigit" +WHITESPACE 1 "\n" +INT_NUMBER 9 "0xnoDigit" +WHITESPACE 2 "\n\n" +INT_NUMBER 3 "0xG" +WHITESPACE 1 "\n" +INT_NUMBER 3 "0xg" +WHITESPACE 2 "\n\n" +INT_NUMBER 4 "0x_g" +WHITESPACE 1 "\n" +INT_NUMBER 4 "0x_G" +WHITESPACE 1 "\n" +> error[0; 2) token("0b") msg(Missing digits after the integer base prefix) +> error[3; 5) token("0o") msg(Missing digits after the integer base prefix) +> error[6; 8) token("0x") msg(Missing digits after the integer base prefix) +> error[10; 13) token("0b_") msg(Missing digits after the integer base prefix) +> error[14; 17) token("0o_") msg(Missing digits after the integer base prefix) +> error[18; 21) token("0x_") msg(Missing digits after the integer base prefix) +> error[23; 32) token("0bnoDigit") msg(Missing digits after the integer base prefix) +> error[33; 42) token("0onoDigit") msg(Missing digits after the integer base prefix) +> error[43; 52) token("0xnoDigit") msg(Missing digits after the integer base prefix) +> error[54; 57) token("0xG") msg(Missing digits after the integer base prefix) +> error[58; 61) token("0xg") msg(Missing digits after the integer base prefix) +> error[63; 67) token("0x_g") msg(Missing digits after the integer base prefix) +> error[68; 72) token("0x_G") msg(Missing digits after the integer base prefix) diff --git a/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.rs b/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.rs new file mode 100644 index 000000000..286584c88 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.rs @@ -0,0 +1,22 @@ +0e +0E + +42e+ +42e- +42E+ +42E- + +42.e+ +42.e- +42.E+ +42.E- + +42.2e+ +42.2e- +42.2E+ +42.2E- + +42.2e+f32 +42.2e-f32 +42.2E+f32 +42.2E-f32 diff --git a/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt b/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt new file mode 100644 index 000000000..ab35e20a5 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0056_empty_exponent.txt @@ -0,0 +1,62 @@ +FLOAT_NUMBER 2 "0e" +WHITESPACE 1 "\n" +FLOAT_NUMBER 2 "0E" +WHITESPACE 2 "\n\n" +FLOAT_NUMBER 4 "42e+" +WHITESPACE 1 "\n" +FLOAT_NUMBER 4 "42e-" +WHITESPACE 1 "\n" +FLOAT_NUMBER 4 "42E+" +WHITESPACE 1 "\n" +FLOAT_NUMBER 4 "42E-" +WHITESPACE 2 "\n\n" +INT_NUMBER 2 "42" +DOT 1 "." +IDENT 1 "e" +PLUS 1 "+" +WHITESPACE 1 "\n" +INT_NUMBER 2 "42" +DOT 1 "." +IDENT 1 "e" +MINUS 1 "-" +WHITESPACE 1 "\n" +INT_NUMBER 2 "42" +DOT 1 "." +IDENT 1 "E" +PLUS 1 "+" +WHITESPACE 1 "\n" +INT_NUMBER 2 "42" +DOT 1 "." +IDENT 1 "E" +MINUS 1 "-" +WHITESPACE 2 "\n\n" +FLOAT_NUMBER 6 "42.2e+" +WHITESPACE 1 "\n" +FLOAT_NUMBER 6 "42.2e-" +WHITESPACE 1 "\n" +FLOAT_NUMBER 6 "42.2E+" +WHITESPACE 1 "\n" +FLOAT_NUMBER 6 "42.2E-" +WHITESPACE 2 "\n\n" +FLOAT_NUMBER 9 "42.2e+f32" +WHITESPACE 1 "\n" +FLOAT_NUMBER 9 "42.2e-f32" +WHITESPACE 1 "\n" +FLOAT_NUMBER 9 "42.2E+f32" +WHITESPACE 1 "\n" +FLOAT_NUMBER 9 "42.2E-f32" +WHITESPACE 1 "\n" +> error[0; 2) token("0e") msg(Missing digits after the exponent symbol) +> error[3; 5) token("0E") msg(Missing digits after the exponent symbol) +> error[7; 11) token("42e+") msg(Missing digits after the exponent symbol) +> error[12; 16) token("42e-") msg(Missing digits after the exponent symbol) +> error[17; 21) token("42E+") msg(Missing digits after the exponent symbol) +> error[22; 26) token("42E-") msg(Missing digits after the exponent symbol) +> error[53; 59) token("42.2e+") msg(Missing digits after the exponent symbol) +> error[60; 66) token("42.2e-") msg(Missing digits after the exponent symbol) +> error[67; 73) token("42.2E+") msg(Missing digits after the exponent symbol) +> error[74; 80) token("42.2E-") msg(Missing digits after the exponent symbol) +> error[82; 91) token("42.2e+f32") msg(Missing digits after the exponent symbol) +> error[92; 101) token("42.2e-f32") msg(Missing digits after the exponent symbol) +> error[102; 111) token("42.2E+f32") msg(Missing digits after the exponent symbol) +> error[112; 121) token("42.2E-f32") msg(Missing digits after the exponent symbol) diff --git a/crates/ra_syntax/test_data/lexer/err/0057_lifetime_strarts_with_a_number.rs b/crates/ra_syntax/test_data/lexer/err/0057_lifetime_strarts_with_a_number.rs new file mode 100644 index 000000000..a7698a404 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0057_lifetime_strarts_with_a_number.rs @@ -0,0 +1,2 @@ +'1 +'1lifetime diff --git a/crates/ra_syntax/test_data/lexer/err/0057_lifetime_strarts_with_a_number.txt b/crates/ra_syntax/test_data/lexer/err/0057_lifetime_strarts_with_a_number.txt new file mode 100644 index 000000000..89b38bfac --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/err/0057_lifetime_strarts_with_a_number.txt @@ -0,0 +1,6 @@ +LIFETIME 2 "\'1" +WHITESPACE 1 "\n" +LIFETIME 10 "\'1lifetime" +WHITESPACE 1 "\n" +> error[0; 2) token("\'1") msg(Lifetime name cannot start with a number) +> error[3; 13) token("\'1lifetime") msg(Lifetime name cannot start with a number) diff --git a/crates/ra_syntax/test_data/lexer/ok/0001_hello.rs b/crates/ra_syntax/test_data/lexer/ok/0001_hello.rs new file mode 100644 index 000000000..95d09f2b1 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0001_hello.rs @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/crates/ra_syntax/test_data/lexer/ok/0001_hello.txt b/crates/ra_syntax/test_data/lexer/ok/0001_hello.txt new file mode 100644 index 000000000..27a5940a9 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0001_hello.txt @@ -0,0 +1,3 @@ +IDENT 5 "hello" +WHITESPACE 1 " " +IDENT 5 "world" diff --git a/crates/ra_syntax/test_data/lexer/ok/0002_whitespace.rs b/crates/ra_syntax/test_data/lexer/ok/0002_whitespace.rs new file mode 100644 index 000000000..08fce1418 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0002_whitespace.rs @@ -0,0 +1,4 @@ +a b c +d + +e f diff --git a/crates/ra_syntax/test_data/lexer/ok/0002_whitespace.txt b/crates/ra_syntax/test_data/lexer/ok/0002_whitespace.txt new file mode 100644 index 000000000..01d260918 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0002_whitespace.txt @@ -0,0 +1,12 @@ +IDENT 1 "a" +WHITESPACE 1 " " +IDENT 1 "b" +WHITESPACE 2 " " +IDENT 1 "c" +WHITESPACE 1 "\n" +IDENT 1 "d" +WHITESPACE 2 "\n\n" +IDENT 1 "e" +WHITESPACE 1 "\t" +IDENT 1 "f" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0003_ident.rs b/crates/ra_syntax/test_data/lexer/ok/0003_ident.rs new file mode 100644 index 000000000..c05c9c009 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0003_ident.rs @@ -0,0 +1 @@ +foo foo_ _foo _ __ x ΠΏΡ€ΠΈΠ²Π΅Ρ‚ diff --git a/crates/ra_syntax/test_data/lexer/ok/0003_ident.txt b/crates/ra_syntax/test_data/lexer/ok/0003_ident.txt new file mode 100644 index 000000000..4a0d5c053 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0003_ident.txt @@ -0,0 +1,14 @@ +IDENT 3 "foo" +WHITESPACE 1 " " +IDENT 4 "foo_" +WHITESPACE 1 " " +IDENT 4 "_foo" +WHITESPACE 1 " " +UNDERSCORE 1 "_" +WHITESPACE 1 " " +IDENT 2 "__" +WHITESPACE 1 " " +IDENT 1 "x" +WHITESPACE 1 " " +IDENT 12 "ΠΏΡ€ΠΈΠ²Π΅Ρ‚" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0004_numbers.rs b/crates/ra_syntax/test_data/lexer/ok/0004_numbers.rs new file mode 100644 index 000000000..bc761c235 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0004_numbers.rs @@ -0,0 +1,9 @@ +0 00 0_ 0. 0z +01790 0b1790 0o1790 0x1790aAbBcCdDeEfF 001279 0_1279 0.1279 0e1279 0E1279 +0..2 +0.foo() +0e+1 +0.e+1 +0.0E-2 +0___0.10000____0000e+111__ +1i64 92.0f32 11__s diff --git a/crates/ra_syntax/test_data/lexer/ok/0004_numbers.txt b/crates/ra_syntax/test_data/lexer/ok/0004_numbers.txt new file mode 100644 index 000000000..e19fc5789 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0004_numbers.txt @@ -0,0 +1,57 @@ +INT_NUMBER 1 "0" +WHITESPACE 1 " " +INT_NUMBER 2 "00" +WHITESPACE 1 " " +INT_NUMBER 2 "0_" +WHITESPACE 1 " " +FLOAT_NUMBER 2 "0." +WHITESPACE 1 " " +INT_NUMBER 2 "0z" +WHITESPACE 1 "\n" +INT_NUMBER 5 "01790" +WHITESPACE 1 " " +INT_NUMBER 6 "0b1790" +WHITESPACE 1 " " +INT_NUMBER 6 "0o1790" +WHITESPACE 1 " " +INT_NUMBER 18 "0x1790aAbBcCdDeEfF" +WHITESPACE 1 " " +INT_NUMBER 6 "001279" +WHITESPACE 1 " " +INT_NUMBER 6 "0_1279" +WHITESPACE 1 " " +FLOAT_NUMBER 6 "0.1279" +WHITESPACE 1 " " +FLOAT_NUMBER 6 "0e1279" +WHITESPACE 1 " " +FLOAT_NUMBER 6 "0E1279" +WHITESPACE 1 "\n" +INT_NUMBER 1 "0" +DOT 1 "." +DOT 1 "." +INT_NUMBER 1 "2" +WHITESPACE 1 "\n" +INT_NUMBER 1 "0" +DOT 1 "." +IDENT 3 "foo" +L_PAREN 1 "(" +R_PAREN 1 ")" +WHITESPACE 1 "\n" +FLOAT_NUMBER 4 "0e+1" +WHITESPACE 1 "\n" +INT_NUMBER 1 "0" +DOT 1 "." +IDENT 1 "e" +PLUS 1 "+" +INT_NUMBER 1 "1" +WHITESPACE 1 "\n" +FLOAT_NUMBER 6 "0.0E-2" +WHITESPACE 1 "\n" +FLOAT_NUMBER 26 "0___0.10000____0000e+111__" +WHITESPACE 1 "\n" +INT_NUMBER 4 "1i64" +WHITESPACE 1 " " +FLOAT_NUMBER 7 "92.0f32" +WHITESPACE 1 " " +INT_NUMBER 5 "11__s" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0005_symbols.rs b/crates/ra_syntax/test_data/lexer/ok/0005_symbols.rs new file mode 100644 index 000000000..487569b5a --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0005_symbols.rs @@ -0,0 +1,6 @@ +; , ( ) { } [ ] < > @ # ~ ? $ & | + * / ^ % +. .. ... ..= +: :: += => +! != +- -> diff --git a/crates/ra_syntax/test_data/lexer/ok/0005_symbols.txt b/crates/ra_syntax/test_data/lexer/ok/0005_symbols.txt new file mode 100644 index 000000000..469a90e42 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0005_symbols.txt @@ -0,0 +1,77 @@ +SEMI 1 ";" +WHITESPACE 1 " " +COMMA 1 "," +WHITESPACE 1 " " +L_PAREN 1 "(" +WHITESPACE 1 " " +R_PAREN 1 ")" +WHITESPACE 1 " " +L_CURLY 1 "{" +WHITESPACE 1 " " +R_CURLY 1 "}" +WHITESPACE 1 " " +L_BRACK 1 "[" +WHITESPACE 1 " " +R_BRACK 1 "]" +WHITESPACE 1 " " +L_ANGLE 1 "<" +WHITESPACE 1 " " +R_ANGLE 1 ">" +WHITESPACE 1 " " +AT 1 "@" +WHITESPACE 1 " " +POUND 1 "#" +WHITESPACE 1 " " +TILDE 1 "~" +WHITESPACE 1 " " +QUESTION 1 "?" +WHITESPACE 1 " " +DOLLAR 1 "$" +WHITESPACE 1 " " +AMP 1 "&" +WHITESPACE 1 " " +PIPE 1 "|" +WHITESPACE 1 " " +PLUS 1 "+" +WHITESPACE 1 " " +STAR 1 "*" +WHITESPACE 1 " " +SLASH 1 "/" +WHITESPACE 1 " " +CARET 1 "^" +WHITESPACE 1 " " +PERCENT 1 "%" +WHITESPACE 1 "\n" +DOT 1 "." +WHITESPACE 1 " " +DOT 1 "." +DOT 1 "." +WHITESPACE 1 " " +DOT 1 "." +DOT 1 "." +DOT 1 "." +WHITESPACE 1 " " +DOT 1 "." +DOT 1 "." +EQ 1 "=" +WHITESPACE 1 "\n" +COLON 1 ":" +WHITESPACE 1 " " +COLON 1 ":" +COLON 1 ":" +WHITESPACE 1 "\n" +EQ 1 "=" +WHITESPACE 1 " " +EQ 1 "=" +R_ANGLE 1 ">" +WHITESPACE 1 "\n" +EXCL 1 "!" +WHITESPACE 1 " " +EXCL 1 "!" +EQ 1 "=" +WHITESPACE 1 "\n" +MINUS 1 "-" +WHITESPACE 1 " " +MINUS 1 "-" +R_ANGLE 1 ">" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0006_chars.rs b/crates/ra_syntax/test_data/lexer/ok/0006_chars.rs new file mode 100644 index 000000000..454ee0a5f --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0006_chars.rs @@ -0,0 +1 @@ +'x' ' ' '0' 'hello' '\x7f' '\n' '\\' '\'' diff --git a/crates/ra_syntax/test_data/lexer/ok/0006_chars.txt b/crates/ra_syntax/test_data/lexer/ok/0006_chars.txt new file mode 100644 index 000000000..950954fbc --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0006_chars.txt @@ -0,0 +1,16 @@ +CHAR 3 "\'x\'" +WHITESPACE 1 " " +CHAR 3 "\' \'" +WHITESPACE 1 " " +CHAR 3 "\'0\'" +WHITESPACE 1 " " +CHAR 7 "\'hello\'" +WHITESPACE 1 " " +CHAR 6 "\'\\x7f\'" +WHITESPACE 1 " " +CHAR 4 "\'\\n\'" +WHITESPACE 1 " " +CHAR 4 "\'\\\\\'" +WHITESPACE 1 " " +CHAR 4 "\'\\\'\'" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0007_lifetimes.rs b/crates/ra_syntax/test_data/lexer/ok/0007_lifetimes.rs new file mode 100644 index 000000000..b764f1dce --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0007_lifetimes.rs @@ -0,0 +1 @@ +'a 'foo 'foo_bar_baz '_ diff --git a/crates/ra_syntax/test_data/lexer/ok/0007_lifetimes.txt b/crates/ra_syntax/test_data/lexer/ok/0007_lifetimes.txt new file mode 100644 index 000000000..005c29100 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0007_lifetimes.txt @@ -0,0 +1,8 @@ +LIFETIME 2 "\'a" +WHITESPACE 1 " " +LIFETIME 4 "\'foo" +WHITESPACE 1 " " +LIFETIME 12 "\'foo_bar_baz" +WHITESPACE 1 " " +LIFETIME 2 "\'_" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0008_byte_strings.rs b/crates/ra_syntax/test_data/lexer/ok/0008_byte_strings.rs new file mode 100644 index 000000000..b54930f5e --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0008_byte_strings.rs @@ -0,0 +1,3 @@ +b'' b'x' b"foo" br"" +b''suf b""ix br""br +b'\n' b'\\' b'\'' b'hello' diff --git a/crates/ra_syntax/test_data/lexer/ok/0008_byte_strings.txt b/crates/ra_syntax/test_data/lexer/ok/0008_byte_strings.txt new file mode 100644 index 000000000..bc03b51a8 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0008_byte_strings.txt @@ -0,0 +1,22 @@ +BYTE 3 "b\'\'" +WHITESPACE 1 " " +BYTE 4 "b\'x\'" +WHITESPACE 1 " " +BYTE_STRING 6 "b\"foo\"" +WHITESPACE 1 " " +RAW_BYTE_STRING 4 "br\"\"" +WHITESPACE 1 "\n" +BYTE 6 "b\'\'suf" +WHITESPACE 1 " " +BYTE_STRING 5 "b\"\"ix" +WHITESPACE 1 " " +RAW_BYTE_STRING 6 "br\"\"br" +WHITESPACE 1 "\n" +BYTE 5 "b\'\\n\'" +WHITESPACE 1 " " +BYTE 5 "b\'\\\\\'" +WHITESPACE 1 " " +BYTE 5 "b\'\\\'\'" +WHITESPACE 1 " " +BYTE 8 "b\'hello\'" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0009_strings.rs b/crates/ra_syntax/test_data/lexer/ok/0009_strings.rs new file mode 100644 index 000000000..4ddb5bffc --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0009_strings.rs @@ -0,0 +1,2 @@ +"hello" r"world" "\n\"\\no escape" "multi +line" diff --git a/crates/ra_syntax/test_data/lexer/ok/0009_strings.txt b/crates/ra_syntax/test_data/lexer/ok/0009_strings.txt new file mode 100644 index 000000000..4cb4d711d --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0009_strings.txt @@ -0,0 +1,8 @@ +STRING 7 "\"hello\"" +WHITESPACE 1 " " +RAW_STRING 8 "r\"world\"" +WHITESPACE 1 " " +STRING 17 "\"\\n\\\"\\\\no escape\"" +WHITESPACE 1 " " +STRING 12 "\"multi\nline\"" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0010_single_line_comments.rs b/crates/ra_syntax/test_data/lexer/ok/0010_single_line_comments.rs new file mode 100644 index 000000000..4b6653f9c --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0010_single_line_comments.rs @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +// hello +//! World +//!! Inner line doc +/// Outer line doc +//// Just a comment + +// +//! +//!! +/// +//// diff --git a/crates/ra_syntax/test_data/lexer/ok/0010_single_line_comments.txt b/crates/ra_syntax/test_data/lexer/ok/0010_single_line_comments.txt new file mode 100644 index 000000000..98a3818c0 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0010_single_line_comments.txt @@ -0,0 +1,22 @@ +SHEBANG 19 "#!/usr/bin/env bash" +WHITESPACE 1 "\n" +COMMENT 8 "// hello" +WHITESPACE 1 "\n" +COMMENT 9 "//! World" +WHITESPACE 1 "\n" +COMMENT 19 "//!! Inner line doc" +WHITESPACE 1 "\n" +COMMENT 18 "/// Outer line doc" +WHITESPACE 1 "\n" +COMMENT 19 "//// Just a comment" +WHITESPACE 2 "\n\n" +COMMENT 2 "//" +WHITESPACE 1 "\n" +COMMENT 3 "//!" +WHITESPACE 1 "\n" +COMMENT 4 "//!!" +WHITESPACE 1 "\n" +COMMENT 3 "///" +WHITESPACE 1 "\n" +COMMENT 4 "////" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0011_keywords.rs b/crates/ra_syntax/test_data/lexer/ok/0011_keywords.rs new file mode 100644 index 000000000..1e91bff4e --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0011_keywords.rs @@ -0,0 +1,3 @@ +async fn use struct trait enum impl true false as extern crate +mod pub self super in where for loop while if match const +static mut type ref let else move return diff --git a/crates/ra_syntax/test_data/lexer/ok/0011_keywords.txt b/crates/ra_syntax/test_data/lexer/ok/0011_keywords.txt new file mode 100644 index 000000000..22c00eefb --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0011_keywords.txt @@ -0,0 +1,64 @@ +ASYNC_KW 5 "async" +WHITESPACE 1 " " +FN_KW 2 "fn" +WHITESPACE 1 " " +USE_KW 3 "use" +WHITESPACE 1 " " +STRUCT_KW 6 "struct" +WHITESPACE 1 " " +TRAIT_KW 5 "trait" +WHITESPACE 1 " " +ENUM_KW 4 "enum" +WHITESPACE 1 " " +IMPL_KW 4 "impl" +WHITESPACE 1 " " +TRUE_KW 4 "true" +WHITESPACE 1 " " +FALSE_KW 5 "false" +WHITESPACE 1 " " +AS_KW 2 "as" +WHITESPACE 1 " " +EXTERN_KW 6 "extern" +WHITESPACE 1 " " +CRATE_KW 5 "crate" +WHITESPACE 1 "\n" +MOD_KW 3 "mod" +WHITESPACE 1 " " +PUB_KW 3 "pub" +WHITESPACE 1 " " +SELF_KW 4 "self" +WHITESPACE 1 " " +SUPER_KW 5 "super" +WHITESPACE 1 " " +IN_KW 2 "in" +WHITESPACE 1 " " +WHERE_KW 5 "where" +WHITESPACE 1 " " +FOR_KW 3 "for" +WHITESPACE 1 " " +LOOP_KW 4 "loop" +WHITESPACE 1 " " +WHILE_KW 5 "while" +WHITESPACE 1 " " +IF_KW 2 "if" +WHITESPACE 1 " " +MATCH_KW 5 "match" +WHITESPACE 1 " " +CONST_KW 5 "const" +WHITESPACE 1 "\n" +STATIC_KW 6 "static" +WHITESPACE 1 " " +MUT_KW 3 "mut" +WHITESPACE 1 " " +TYPE_KW 4 "type" +WHITESPACE 1 " " +REF_KW 3 "ref" +WHITESPACE 1 " " +LET_KW 3 "let" +WHITESPACE 1 " " +ELSE_KW 4 "else" +WHITESPACE 1 " " +MOVE_KW 4 "move" +WHITESPACE 1 " " +RETURN_KW 6 "return" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0012_block_comment.rs b/crates/ra_syntax/test_data/lexer/ok/0012_block_comment.rs new file mode 100644 index 000000000..b880a59d9 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0012_block_comment.rs @@ -0,0 +1,3 @@ +/* */ +/**/ +/* /* */ */ diff --git a/crates/ra_syntax/test_data/lexer/ok/0012_block_comment.txt b/crates/ra_syntax/test_data/lexer/ok/0012_block_comment.txt new file mode 100644 index 000000000..2618e287e --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0012_block_comment.txt @@ -0,0 +1,6 @@ +COMMENT 5 "/* */" +WHITESPACE 1 "\n" +COMMENT 4 "/**/" +WHITESPACE 1 "\n" +COMMENT 11 "/* /* */ */" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0013_raw_strings.rs b/crates/ra_syntax/test_data/lexer/ok/0013_raw_strings.rs new file mode 100644 index 000000000..e5ed0b693 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0013_raw_strings.rs @@ -0,0 +1 @@ +r###"this is a r##"raw"## string"### diff --git a/crates/ra_syntax/test_data/lexer/ok/0013_raw_strings.txt b/crates/ra_syntax/test_data/lexer/ok/0013_raw_strings.txt new file mode 100644 index 000000000..9cf0957d1 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0013_raw_strings.txt @@ -0,0 +1,2 @@ +RAW_STRING 36 "r###\"this is a r##\"raw\"## string\"###" +WHITESPACE 1 "\n" diff --git a/crates/ra_syntax/test_data/lexer/ok/0014_raw_ident.rs b/crates/ra_syntax/test_data/lexer/ok/0014_raw_ident.rs new file mode 100644 index 000000000..b40a1b6a2 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0014_raw_ident.rs @@ -0,0 +1 @@ +r#raw_ident diff --git a/crates/ra_syntax/test_data/lexer/ok/0014_raw_ident.txt b/crates/ra_syntax/test_data/lexer/ok/0014_raw_ident.txt new file mode 100644 index 000000000..484689693 --- /dev/null +++ b/crates/ra_syntax/test_data/lexer/ok/0014_raw_ident.txt @@ -0,0 +1,2 @@ +IDENT 11 "r#raw_ident" +WHITESPACE 1 "\n" -- cgit v1.2.3