From 73ded3c63ca2522b7bb6ca8eb7834c5adc1a3511 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 27 Dec 2018 14:42:46 +0300 Subject: dedupe literal parsers --- crates/ra_syntax/src/string_lexing.rs | 10 +- crates/ra_syntax/src/string_lexing/byte.rs | 51 ------ crates/ra_syntax/src/string_lexing/byte_string.rs | 51 ------ crates/ra_syntax/src/string_lexing/char.rs | 176 -------------------- crates/ra_syntax/src/string_lexing/parser.rs | 82 +++------- crates/ra_syntax/src/string_lexing/string.rs | 187 +++++++++++++++++++++- crates/ra_syntax/src/validation/byte.rs | 7 +- crates/ra_syntax/src/validation/byte_string.rs | 6 +- crates/ra_syntax/src/validation/char.rs | 7 +- crates/ra_syntax/src/validation/string.rs | 17 +- 10 files changed, 220 insertions(+), 374 deletions(-) delete mode 100644 crates/ra_syntax/src/string_lexing/byte.rs delete mode 100644 crates/ra_syntax/src/string_lexing/byte_string.rs delete mode 100644 crates/ra_syntax/src/string_lexing/char.rs (limited to 'crates/ra_syntax/src') diff --git a/crates/ra_syntax/src/string_lexing.rs b/crates/ra_syntax/src/string_lexing.rs index 94853331f..349733f3f 100644 --- a/crates/ra_syntax/src/string_lexing.rs +++ b/crates/ra_syntax/src/string_lexing.rs @@ -1,13 +1,7 @@ mod parser; -mod byte; -mod byte_string; -mod char; mod string; pub use self::{ - byte::parse_byte_literal, - byte_string::parse_byte_string_literal, - char::parse_char_literal, - parser::{CharComponent, CharComponentKind, StringComponent, StringComponentKind}, - string::parse_string_literal, + parser::{StringComponent, StringComponentKind}, + string::{parse_string_literal, parse_char_literal, parse_byte_literal, parse_byte_string_literal}, }; diff --git a/crates/ra_syntax/src/string_lexing/byte.rs b/crates/ra_syntax/src/string_lexing/byte.rs deleted file mode 100644 index b3228d6ca..000000000 --- a/crates/ra_syntax/src/string_lexing/byte.rs +++ /dev/null @@ -1,51 +0,0 @@ -use super::parser::Parser; -use super::CharComponent; - -pub fn parse_byte_literal(src: &str) -> ByteComponentIterator { - ByteComponentIterator { - parser: Parser::new(src), - has_closing_quote: false, - } -} - -pub struct ByteComponentIterator<'a> { - parser: Parser<'a>, - pub has_closing_quote: bool, -} - -impl<'a> Iterator for ByteComponentIterator<'a> { - type Item = CharComponent; - fn next(&mut self) -> Option { - if self.parser.pos == 0 { - assert!( - self.parser.advance() == 'b', - "Byte literal should start with a `b`" - ); - - assert!( - self.parser.advance() == '\'', - "Byte literal should start with a `b`, followed by a quote" - ); - } - - if let Some(component) = self.parser.parse_char_component() { - return Some(component); - } - - // We get here when there are no char components left to parse - if self.parser.peek() == Some('\'') { - self.parser.advance(); - self.has_closing_quote = true; - } - - assert!( - self.parser.peek() == None, - "byte literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", - self.parser.src, - self.parser.pos, - self.parser.src.len() - ); - - None - } -} diff --git a/crates/ra_syntax/src/string_lexing/byte_string.rs b/crates/ra_syntax/src/string_lexing/byte_string.rs deleted file mode 100644 index a6056159b..000000000 --- a/crates/ra_syntax/src/string_lexing/byte_string.rs +++ /dev/null @@ -1,51 +0,0 @@ -use super::parser::Parser; -use super::StringComponent; - -pub fn parse_byte_string_literal(src: &str) -> ByteStringComponentIterator { - ByteStringComponentIterator { - parser: Parser::new(src), - has_closing_quote: false, - } -} - -pub struct ByteStringComponentIterator<'a> { - parser: Parser<'a>, - pub has_closing_quote: bool, -} - -impl<'a> Iterator for ByteStringComponentIterator<'a> { - type Item = StringComponent; - fn next(&mut self) -> Option { - if self.parser.pos == 0 { - assert!( - self.parser.advance() == 'b', - "byte string literal should start with a `b`" - ); - - assert!( - self.parser.advance() == '"', - "byte string literal should start with a `b`, followed by double quotes" - ); - } - - if let Some(component) = self.parser.parse_string_component() { - return Some(component); - } - - // We get here when there are no char components left to parse - if self.parser.peek() == Some('"') { - self.parser.advance(); - self.has_closing_quote = true; - } - - assert!( - self.parser.peek() == None, - "byte string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", - self.parser.src, - self.parser.pos, - self.parser.src.len() - ); - - None - } -} diff --git a/crates/ra_syntax/src/string_lexing/char.rs b/crates/ra_syntax/src/string_lexing/char.rs deleted file mode 100644 index e01813176..000000000 --- a/crates/ra_syntax/src/string_lexing/char.rs +++ /dev/null @@ -1,176 +0,0 @@ -use super::parser::Parser; -use super::CharComponent; - -pub fn parse_char_literal(src: &str) -> CharComponentIterator { - CharComponentIterator { - parser: Parser::new(src), - has_closing_quote: false, - } -} - -pub struct CharComponentIterator<'a> { - parser: Parser<'a>, - pub has_closing_quote: bool, -} - -impl<'a> Iterator for CharComponentIterator<'a> { - type Item = CharComponent; - fn next(&mut self) -> Option { - if self.parser.pos == 0 { - assert!( - self.parser.advance() == '\'', - "char literal should start with a quote" - ); - } - - if let Some(component) = self.parser.parse_char_component() { - return Some(component); - } - - // We get here when there are no char components left to parse - if self.parser.peek() == Some('\'') { - self.parser.advance(); - self.has_closing_quote = true; - } - - assert!( - self.parser.peek() == None, - "char literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", - self.parser.src, - self.parser.pos, - self.parser.src.len() - ); - - None - } -} - -#[cfg(test)] -mod tests { - use rowan::TextRange; - use crate::string_lexing::{ - CharComponent, - CharComponentKind::*, -}; - - fn parse(src: &str) -> (bool, Vec) { - let component_iterator = &mut super::parse_char_literal(src); - let components: Vec<_> = component_iterator.collect(); - (component_iterator.has_closing_quote, components) - } - - fn unclosed_char_component(src: &str) -> CharComponent { - let (has_closing_quote, components) = parse(src); - assert!(!has_closing_quote, "char should not have closing quote"); - assert!(components.len() == 1); - components[0].clone() - } - - fn closed_char_component(src: &str) -> CharComponent { - let (has_closing_quote, components) = parse(src); - assert!(has_closing_quote, "char should have closing quote"); - assert!( - components.len() == 1, - "Literal: {}\nComponents: {:#?}", - src, - components - ); - components[0].clone() - } - - fn closed_char_components(src: &str) -> Vec { - let (has_closing_quote, components) = parse(src); - assert!(has_closing_quote, "char should have closing quote"); - components - } - - fn range_closed(src: &str) -> TextRange { - TextRange::from_to(1.into(), (src.len() as u32 - 1).into()) - } - - fn range_unclosed(src: &str) -> TextRange { - TextRange::from_to(1.into(), (src.len() as u32).into()) - } - - #[test] - fn test_unicode_escapes() { - let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; - for escape in unicode_escapes { - let escape_sequence = format!(r"'\u{}'", escape); - let component = closed_char_component(&escape_sequence); - let expected_range = range_closed(&escape_sequence); - assert_eq!(component.kind, UnicodeEscape); - assert_eq!(component.range, expected_range); - } - } - - #[test] - fn test_unicode_escapes_unclosed() { - let unicode_escapes = &["{DEAD", "{BEEF", "{FF"]; - for escape in unicode_escapes { - let escape_sequence = format!(r"'\u{}'", escape); - let component = unclosed_char_component(&escape_sequence); - let expected_range = range_unclosed(&escape_sequence); - assert_eq!(component.kind, UnicodeEscape); - assert_eq!(component.range, expected_range); - } - } - - #[test] - fn test_empty_char() { - let (has_closing_quote, components) = parse("''"); - assert!(has_closing_quote, "char should have closing quote"); - assert!(components.len() == 0); - } - - #[test] - fn test_unclosed_char() { - let component = unclosed_char_component("'a"); - assert!(component.kind == CodePoint); - assert!(component.range == TextRange::from_to(1.into(), 2.into())); - } - - #[test] - fn test_digit_escapes() { - let literals = &[r"", r"5", r"55"]; - - for literal in literals { - let lit_text = format!(r"'\x{}'", literal); - let component = closed_char_component(&lit_text); - assert!(component.kind == AsciiCodeEscape); - assert!(component.range == range_closed(&lit_text)); - } - - // More than 2 digits starts a new codepoint - let components = closed_char_components(r"'\x555'"); - assert!(components.len() == 2); - assert!(components[1].kind == CodePoint); - } - - #[test] - fn test_ascii_escapes() { - let literals = &[ - r"\'", "\\\"", // equivalent to \" - r"\n", r"\r", r"\t", r"\\", r"\0", - ]; - - for literal in literals { - let lit_text = format!("'{}'", literal); - let component = closed_char_component(&lit_text); - assert!(component.kind == AsciiEscape); - assert!(component.range == range_closed(&lit_text)); - } - } - - #[test] - fn test_no_escapes() { - let literals = &['"', 'n', 'r', 't', '0', 'x', 'u']; - - for &literal in literals { - let lit_text = format!("'{}'", literal); - let component = closed_char_component(&lit_text); - assert!(component.kind == CodePoint); - assert!(component.range == range_closed(&lit_text)); - } - } -} diff --git a/crates/ra_syntax/src/string_lexing/parser.rs b/crates/ra_syntax/src/string_lexing/parser.rs index 4a6d5bc93..13f3db889 100644 --- a/crates/ra_syntax/src/string_lexing/parser.rs +++ b/crates/ra_syntax/src/string_lexing/parser.rs @@ -1,15 +1,16 @@ use rowan::{TextRange, TextUnit}; -use self::CharComponentKind::*; +use self::StringComponentKind::*; pub struct Parser<'a> { + pub(super) quote: u8, pub(super) src: &'a str, pub(super) pos: usize, } impl<'a> Parser<'a> { - pub fn new(src: &'a str) -> Parser<'a> { - Parser { src, pos: 0 } + pub fn new(src: &'a str, quote: u8) -> Parser<'a> { + Parser { quote, src, pos: 0 } } // Utility methods @@ -42,7 +43,7 @@ impl<'a> Parser<'a> { // Char parsing methods - fn parse_unicode_escape(&mut self, start: TextUnit) -> CharComponent { + fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent { match self.peek() { Some('{') => { self.advance(); @@ -56,16 +57,16 @@ impl<'a> Parser<'a> { } let end = self.get_pos(); - CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) + StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) } Some(_) | None => { let end = self.get_pos(); - CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) + StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) } } } - fn parse_ascii_code_escape(&mut self, start: TextUnit) -> CharComponent { + fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent { let code_start = self.get_pos(); while let Some(next) = self.peek() { if next == '\'' || (self.get_pos() - code_start == 2.into()) { @@ -76,12 +77,12 @@ impl<'a> Parser<'a> { } let end = self.get_pos(); - CharComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) + StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) } - fn parse_escape(&mut self, start: TextUnit) -> CharComponent { + fn parse_escape(&mut self, start: TextUnit) -> StringComponent { if self.peek().is_none() { - return CharComponent::new(TextRange::from_to(start, start), AsciiEscape); + return StringComponent::new(TextRange::from_to(start, start), AsciiEscape); } let next = self.advance(); @@ -90,29 +91,7 @@ impl<'a> Parser<'a> { match next { 'x' => self.parse_ascii_code_escape(start), 'u' => self.parse_unicode_escape(start), - _ => CharComponent::new(range, AsciiEscape), - } - } - - pub fn parse_char_component(&mut self) -> Option { - let next = self.peek()?; - - // Ignore character close - if next == '\'' { - return None; - } - - let start = self.get_pos(); - self.advance(); - - if next == '\\' { - Some(self.parse_escape(start)) - } else { - let end = self.get_pos(); - Some(CharComponent::new( - TextRange::from_to(start, end), - CodePoint, - )) + _ => StringComponent::new(range, AsciiEscape), } } @@ -131,11 +110,11 @@ impl<'a> Parser<'a> { } } - pub fn parse_string_component(&mut self) -> Option { + pub fn parse_component(&mut self) -> Option { let next = self.peek()?; // Ignore string close - if next == '"' { + if next == self.quote as char { return None; } @@ -145,18 +124,18 @@ impl<'a> Parser<'a> { if next == '\\' { // Strings can use `\` to ignore newlines, so we first try to parse one of those // before falling back to parsing char escapes - self.parse_ignore_newline(start).or_else(|| { - let char_component = self.parse_escape(start); - Some(StringComponent::new( - char_component.range, - StringComponentKind::Char(char_component.kind), - )) - }) + if self.quote == b'"' { + if let Some(component) = self.parse_ignore_newline(start) { + return Some(component); + } + } + + Some(self.parse_escape(start)) } else { let end = self.get_pos(); Some(StringComponent::new( TextRange::from_to(start, end), - StringComponentKind::Char(CodePoint), + CodePoint, )) } } @@ -177,23 +156,6 @@ impl StringComponent { #[derive(Debug, Eq, PartialEq, Clone)] pub enum StringComponentKind { IgnoreNewline, - Char(CharComponentKind), -} - -#[derive(Debug, Eq, PartialEq, Clone)] -pub struct CharComponent { - pub range: TextRange, - pub kind: CharComponentKind, -} - -impl CharComponent { - fn new(range: TextRange, kind: CharComponentKind) -> CharComponent { - CharComponent { range, kind } - } -} - -#[derive(Debug, Eq, PartialEq, Clone)] -pub enum CharComponentKind { CodePoint, AsciiEscape, AsciiCodeEscape, diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs index d8351e9af..7476fea13 100644 --- a/crates/ra_syntax/src/string_lexing/string.rs +++ b/crates/ra_syntax/src/string_lexing/string.rs @@ -1,41 +1,82 @@ -use super::parser::Parser; -use super::StringComponent; +use crate::string_lexing::{ + parser::Parser, + StringComponent, +}; pub fn parse_string_literal(src: &str) -> StringComponentIterator { StringComponentIterator { - parser: Parser::new(src), + parser: Parser::new(src, b'"'), has_closing_quote: false, + prefix: None, + quote: b'"', + } +} + +pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator { + StringComponentIterator { + parser: Parser::new(src, b'"'), + has_closing_quote: false, + prefix: Some(b'b'), + quote: b'"', + } +} + +pub fn parse_char_literal(src: &str) -> StringComponentIterator { + StringComponentIterator { + parser: Parser::new(src, b'\''), + has_closing_quote: false, + prefix: None, + quote: b'\'', + } +} + +pub fn parse_byte_literal(src: &str) -> StringComponentIterator { + StringComponentIterator { + parser: Parser::new(src, b'\''), + has_closing_quote: false, + prefix: Some(b'b'), + quote: b'\'', } } pub struct StringComponentIterator<'a> { parser: Parser<'a>, pub has_closing_quote: bool, + prefix: Option, + quote: u8, } impl<'a> Iterator for StringComponentIterator<'a> { type Item = StringComponent; fn next(&mut self) -> Option { if self.parser.pos == 0 { + if let Some(prefix) = self.prefix { + assert!( + self.parser.advance() == prefix as char, + "literal should start with a {:?}", + prefix as char, + ); + } assert!( - self.parser.advance() == '"', - "string literal should start with double quotes" + self.parser.advance() == self.quote as char, + "literal should start with a {:?}", + self.quote as char, ); } - if let Some(component) = self.parser.parse_string_component() { + if let Some(component) = self.parser.parse_component() { return Some(component); } // We get here when there are no char components left to parse - if self.parser.peek() == Some('"') { + if self.parser.peek() == Some(self.quote as char) { self.parser.advance(); self.has_closing_quote = true; } assert!( self.parser.peek() == None, - "string literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", + "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", self.parser.src, self.parser.pos, self.parser.src.len() @@ -44,3 +85,133 @@ impl<'a> Iterator for StringComponentIterator<'a> { None } } + +#[cfg(test)] +mod tests { + use rowan::TextRange; + use crate::string_lexing::{ + StringComponent, + StringComponentKind::*, +}; + + fn parse(src: &str) -> (bool, Vec) { + let component_iterator = &mut super::parse_char_literal(src); + let components: Vec<_> = component_iterator.collect(); + (component_iterator.has_closing_quote, components) + } + + fn unclosed_char_component(src: &str) -> StringComponent { + let (has_closing_quote, components) = parse(src); + assert!(!has_closing_quote, "char should not have closing quote"); + assert!(components.len() == 1); + components[0].clone() + } + + fn closed_char_component(src: &str) -> StringComponent { + let (has_closing_quote, components) = parse(src); + assert!(has_closing_quote, "char should have closing quote"); + assert!( + components.len() == 1, + "Literal: {}\nComponents: {:#?}", + src, + components + ); + components[0].clone() + } + + fn closed_char_components(src: &str) -> Vec { + let (has_closing_quote, components) = parse(src); + assert!(has_closing_quote, "char should have closing quote"); + components + } + + fn range_closed(src: &str) -> TextRange { + TextRange::from_to(1.into(), (src.len() as u32 - 1).into()) + } + + fn range_unclosed(src: &str) -> TextRange { + TextRange::from_to(1.into(), (src.len() as u32).into()) + } + + #[test] + fn test_unicode_escapes() { + let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; + for escape in unicode_escapes { + let escape_sequence = format!(r"'\u{}'", escape); + let component = closed_char_component(&escape_sequence); + let expected_range = range_closed(&escape_sequence); + assert_eq!(component.kind, UnicodeEscape); + assert_eq!(component.range, expected_range); + } + } + + #[test] + fn test_unicode_escapes_unclosed() { + let unicode_escapes = &["{DEAD", "{BEEF", "{FF"]; + for escape in unicode_escapes { + let escape_sequence = format!(r"'\u{}'", escape); + let component = unclosed_char_component(&escape_sequence); + let expected_range = range_unclosed(&escape_sequence); + assert_eq!(component.kind, UnicodeEscape); + assert_eq!(component.range, expected_range); + } + } + + #[test] + fn test_empty_char() { + let (has_closing_quote, components) = parse("''"); + assert!(has_closing_quote, "char should have closing quote"); + assert!(components.len() == 0); + } + + #[test] + fn test_unclosed_char() { + let component = unclosed_char_component("'a"); + assert!(component.kind == CodePoint); + assert!(component.range == TextRange::from_to(1.into(), 2.into())); + } + + #[test] + fn test_digit_escapes() { + let literals = &[r"", r"5", r"55"]; + + for literal in literals { + let lit_text = format!(r"'\x{}'", literal); + let component = closed_char_component(&lit_text); + assert!(component.kind == AsciiCodeEscape); + assert!(component.range == range_closed(&lit_text)); + } + + // More than 2 digits starts a new codepoint + let components = closed_char_components(r"'\x555'"); + assert!(components.len() == 2); + assert!(components[1].kind == CodePoint); + } + + #[test] + fn test_ascii_escapes() { + let literals = &[ + r"\'", "\\\"", // equivalent to \" + r"\n", r"\r", r"\t", r"\\", r"\0", + ]; + + for literal in literals { + let lit_text = format!("'{}'", literal); + let component = closed_char_component(&lit_text); + assert!(component.kind == AsciiEscape); + assert!(component.range == range_closed(&lit_text)); + } + } + + #[test] + fn test_no_escapes() { + let literals = &['"', 'n', 'r', 't', '0', 'x', 'u']; + + for &literal in literals { + let lit_text = format!("'{}'", literal); + let component = closed_char_component(&lit_text); + assert!(component.kind == CodePoint); + assert!(component.range == range_closed(&lit_text)); + } + } +} diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs index 43c0d7edd..e3603e761 100644 --- a/crates/ra_syntax/src/validation/byte.rs +++ b/crates/ra_syntax/src/validation/byte.rs @@ -2,7 +2,7 @@ use crate::{ ast::{self, AstNode}, - string_lexing::{self, CharComponentKind}, + string_lexing::{self, StringComponentKind}, TextRange, validation::char, yellow::{ @@ -38,11 +38,11 @@ pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec) pub(super) fn validate_byte_component( text: &str, - kind: CharComponentKind, + kind: StringComponentKind, range: TextRange, errors: &mut Vec, ) { - use self::CharComponentKind::*; + use self::StringComponentKind::*; match kind { AsciiEscape => validate_byte_escape(text, range, errors), AsciiCodeEscape => validate_byte_code_escape(text, range, errors), @@ -63,6 +63,7 @@ pub(super) fn validate_byte_component( errors.push(SyntaxError::new(ByteOutOfRange, range)); } } + IgnoreNewline => { /* always valid */ } } } diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs index 7b830e97c..2f98472f4 100644 --- a/crates/ra_syntax/src/validation/byte_string.rs +++ b/crates/ra_syntax/src/validation/byte_string.rs @@ -17,15 +17,15 @@ pub(crate) fn validate_byte_string_node(node: ast::ByteString, errors: &mut Vec< let range = component.range + literal_range.start(); match component.kind { - StringComponentKind::Char(kind) => { + StringComponentKind::IgnoreNewline => { /* always valid */ } + _ => { // Chars must escape \t, \n and \r codepoints, but strings don't let text = &literal_text[component.range]; match text { "\t" | "\n" | "\r" => { /* always valid */ } - _ => byte::validate_byte_component(text, kind, range, errors), + _ => byte::validate_byte_component(text, component.kind, range, errors), } } - StringComponentKind::IgnoreNewline => { /* always valid */ } } } diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs index 4728c85e6..deb5b0a9e 100644 --- a/crates/ra_syntax/src/validation/char.rs +++ b/crates/ra_syntax/src/validation/char.rs @@ -6,7 +6,7 @@ use arrayvec::ArrayString; use crate::{ ast::{self, AstNode}, - string_lexing::{self, CharComponentKind}, + string_lexing::{self, StringComponentKind}, TextRange, yellow::{ SyntaxError, @@ -41,12 +41,12 @@ pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec) pub(super) fn validate_char_component( text: &str, - kind: CharComponentKind, + kind: StringComponentKind, range: TextRange, errors: &mut Vec, ) { // Validate escapes - use self::CharComponentKind::*; + use self::StringComponentKind::*; match kind { AsciiEscape => validate_ascii_escape(text, range, errors), AsciiCodeEscape => validate_ascii_code_escape(text, range, errors), @@ -57,6 +57,7 @@ pub(super) fn validate_char_component( errors.push(SyntaxError::new(UnescapedCodepoint, range)); } } + StringComponentKind::IgnoreNewline => { /* always valid */ } } } diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs index 089879d15..456180ab6 100644 --- a/crates/ra_syntax/src/validation/string.rs +++ b/crates/ra_syntax/src/validation/string.rs @@ -1,6 +1,6 @@ use crate::{ ast::{self, AstNode}, - string_lexing::{self, StringComponentKind}, + string_lexing, yellow::{ SyntaxError, SyntaxErrorKind::*, @@ -16,16 +16,11 @@ pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec { - // Chars must escape \t, \n and \r codepoints, but strings don't - let text = &literal_text[component.range]; - match text { - "\t" | "\n" | "\r" => { /* always valid */ } - _ => char::validate_char_component(text, kind, range, errors), - } - } - StringComponentKind::IgnoreNewline => { /* always valid */ } + // Chars must escape \t, \n and \r codepoints, but strings don't + let text = &literal_text[component.range]; + match text { + "\t" | "\n" | "\r" => { /* always valid */ } + _ => char::validate_char_component(text, component.kind, range, errors), } } -- cgit v1.2.3 From 359e70d1b20402ca9cc8731909daecfab598e55d Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 27 Dec 2018 15:03:18 +0300 Subject: support literal suffixes --- crates/ra_syntax/src/string_lexing/parser.rs | 10 ++++++++++ crates/ra_syntax/src/string_lexing/string.rs | 14 ++++++++++++-- crates/ra_syntax/src/validation/byte.rs | 4 ++++ crates/ra_syntax/src/validation/byte_string.rs | 4 ++++ crates/ra_syntax/src/validation/char.rs | 4 ++++ crates/ra_syntax/src/validation/string.rs | 4 ++++ crates/ra_syntax/src/yellow/syntax_error.rs | 2 ++ 7 files changed, 40 insertions(+), 2 deletions(-) (limited to 'crates/ra_syntax/src') diff --git a/crates/ra_syntax/src/string_lexing/parser.rs b/crates/ra_syntax/src/string_lexing/parser.rs index 13f3db889..14c6015c2 100644 --- a/crates/ra_syntax/src/string_lexing/parser.rs +++ b/crates/ra_syntax/src/string_lexing/parser.rs @@ -139,6 +139,16 @@ impl<'a> Parser<'a> { )) } } + + pub fn parse_suffix(&mut self) -> Option { + let start = self.get_pos(); + let _ = self.peek()?; + while let Some(_) = self.peek() { + self.advance(); + } + let end = self.get_pos(); + Some(TextRange::from_to(start, end)) + } } #[derive(Debug, Eq, PartialEq, Clone)] diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs index 7476fea13..064f08544 100644 --- a/crates/ra_syntax/src/string_lexing/string.rs +++ b/crates/ra_syntax/src/string_lexing/string.rs @@ -1,12 +1,15 @@ -use crate::string_lexing::{ +use crate::{ + TextRange, + string_lexing::{ parser::Parser, StringComponent, -}; +}}; pub fn parse_string_literal(src: &str) -> StringComponentIterator { StringComponentIterator { parser: Parser::new(src, b'"'), has_closing_quote: false, + suffix: None, prefix: None, quote: b'"', } @@ -16,6 +19,7 @@ pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator { StringComponentIterator { parser: Parser::new(src, b'"'), has_closing_quote: false, + suffix: None, prefix: Some(b'b'), quote: b'"', } @@ -25,6 +29,7 @@ pub fn parse_char_literal(src: &str) -> StringComponentIterator { StringComponentIterator { parser: Parser::new(src, b'\''), has_closing_quote: false, + suffix: None, prefix: None, quote: b'\'', } @@ -34,6 +39,7 @@ pub fn parse_byte_literal(src: &str) -> StringComponentIterator { StringComponentIterator { parser: Parser::new(src, b'\''), has_closing_quote: false, + suffix: None, prefix: Some(b'b'), quote: b'\'', } @@ -42,6 +48,7 @@ pub fn parse_byte_literal(src: &str) -> StringComponentIterator { pub struct StringComponentIterator<'a> { parser: Parser<'a>, pub has_closing_quote: bool, + pub suffix: Option, prefix: Option, quote: u8, } @@ -72,6 +79,9 @@ impl<'a> Iterator for StringComponentIterator<'a> { if self.parser.peek() == Some(self.quote as char) { self.parser.advance(); self.has_closing_quote = true; + if let Some(range) = self.parser.parse_suffix() { + self.suffix = Some(range); + } } assert!( diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs index e3603e761..2f9b7fac7 100644 --- a/crates/ra_syntax/src/validation/byte.rs +++ b/crates/ra_syntax/src/validation/byte.rs @@ -27,6 +27,10 @@ pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec) errors.push(SyntaxError::new(UnclosedByte, literal_range)); } + if let Some(range) = components.suffix { + errors.push(SyntaxError::new(InvalidSuffix, range)); + } + if len == 0 { errors.push(SyntaxError::new(EmptyByte, literal_range)); } diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs index 2f98472f4..bf4c934a7 100644 --- a/crates/ra_syntax/src/validation/byte_string.rs +++ b/crates/ra_syntax/src/validation/byte_string.rs @@ -32,6 +32,10 @@ pub(crate) fn validate_byte_string_node(node: ast::ByteString, errors: &mut Vec< if !components.has_closing_quote { errors.push(SyntaxError::new(UnclosedString, literal_range)); } + + if let Some(range) = components.suffix { + errors.push(SyntaxError::new(InvalidSuffix, range)); + } } #[cfg(test)] diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs index deb5b0a9e..50184aaf8 100644 --- a/crates/ra_syntax/src/validation/char.rs +++ b/crates/ra_syntax/src/validation/char.rs @@ -30,6 +30,10 @@ pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec) errors.push(SyntaxError::new(UnclosedChar, literal_range)); } + if let Some(range) = components.suffix { + errors.push(SyntaxError::new(InvalidSuffix, range)); + } + if len == 0 { errors.push(SyntaxError::new(EmptyChar, literal_range)); } diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs index 456180ab6..ff1fb6edc 100644 --- a/crates/ra_syntax/src/validation/string.rs +++ b/crates/ra_syntax/src/validation/string.rs @@ -27,6 +27,10 @@ pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec write!(f, "Unicode escape code should be at most 0x10FFFF"), UnclosedString => write!(f, "Unclosed string literal"), + InvalidSuffix => write!(f, "Invalid literal suffix"), ParseError(msg) => write!(f, "{}", msg.0), } } -- cgit v1.2.3 From a912b261d3e6c95430bf2116b15b3af2e112bc4b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 27 Dec 2018 15:07:51 +0300 Subject: dump validation errors --- crates/ra_syntax/src/utils.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'crates/ra_syntax/src') diff --git a/crates/ra_syntax/src/utils.rs b/crates/ra_syntax/src/utils.rs index 5bbdf80bb..0a2b6afbc 100644 --- a/crates/ra_syntax/src/utils.rs +++ b/crates/ra_syntax/src/utils.rs @@ -1,10 +1,13 @@ -use crate::{SourceFileNode, SyntaxKind, SyntaxNodeRef, WalkEvent}; +use crate::{SourceFileNode, SyntaxKind, SyntaxNodeRef, WalkEvent, AstNode}; use std::fmt::Write; use std::str; /// Parse a file and create a string representation of the resulting parse tree. pub fn dump_tree(syntax: SyntaxNodeRef) -> String { - let mut errors: Vec<_> = syntax.root_data().to_vec(); + let mut errors: Vec<_> = match syntax.ancestors().find_map(SourceFileNode::cast) { + Some(file) => file.owned().errors(), + None => syntax.root_data().to_vec(), + }; errors.sort_by_key(|e| e.offset()); let mut err_pos = 0; let mut level = 0; -- cgit v1.2.3 From 49b0fe20ab6aa752df3764908b7c21f4b1827e52 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 27 Dec 2018 15:10:30 +0300 Subject: fix suffix ranges --- crates/ra_syntax/src/validation/byte.rs | 5 ++++- crates/ra_syntax/src/validation/byte_string.rs | 5 ++++- crates/ra_syntax/src/validation/char.rs | 5 ++++- crates/ra_syntax/src/validation/string.rs | 5 ++++- 4 files changed, 16 insertions(+), 4 deletions(-) (limited to 'crates/ra_syntax/src') diff --git a/crates/ra_syntax/src/validation/byte.rs b/crates/ra_syntax/src/validation/byte.rs index 2f9b7fac7..d0897eeed 100644 --- a/crates/ra_syntax/src/validation/byte.rs +++ b/crates/ra_syntax/src/validation/byte.rs @@ -28,7 +28,10 @@ pub(super) fn validate_byte_node(node: ast::Byte, errors: &mut Vec) } if let Some(range) = components.suffix { - errors.push(SyntaxError::new(InvalidSuffix, range)); + errors.push(SyntaxError::new( + InvalidSuffix, + range + literal_range.start(), + )); } if len == 0 { diff --git a/crates/ra_syntax/src/validation/byte_string.rs b/crates/ra_syntax/src/validation/byte_string.rs index bf4c934a7..f7a4fb156 100644 --- a/crates/ra_syntax/src/validation/byte_string.rs +++ b/crates/ra_syntax/src/validation/byte_string.rs @@ -34,7 +34,10 @@ pub(crate) fn validate_byte_string_node(node: ast::ByteString, errors: &mut Vec< } if let Some(range) = components.suffix { - errors.push(SyntaxError::new(InvalidSuffix, range)); + errors.push(SyntaxError::new( + InvalidSuffix, + range + literal_range.start(), + )); } } diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs index 50184aaf8..19cd3830f 100644 --- a/crates/ra_syntax/src/validation/char.rs +++ b/crates/ra_syntax/src/validation/char.rs @@ -31,7 +31,10 @@ pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec) } if let Some(range) = components.suffix { - errors.push(SyntaxError::new(InvalidSuffix, range)); + errors.push(SyntaxError::new( + InvalidSuffix, + range + literal_range.start(), + )); } if len == 0 { diff --git a/crates/ra_syntax/src/validation/string.rs b/crates/ra_syntax/src/validation/string.rs index ff1fb6edc..1371bb1f0 100644 --- a/crates/ra_syntax/src/validation/string.rs +++ b/crates/ra_syntax/src/validation/string.rs @@ -29,7 +29,10 @@ pub(crate) fn validate_string_node(node: ast::String, errors: &mut Vec