From c258b4fdb0e421813330c2428985c4537c787582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Sun, 11 Nov 2018 20:27:00 +0100 Subject: Add validator for byte --- crates/ra_syntax/src/validation/char.rs | 188 +++++++++++++++++--------------- 1 file changed, 98 insertions(+), 90 deletions(-) (limited to 'crates/ra_syntax/src/validation/char.rs') diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs index 63f9bad24..793539b3a 100644 --- a/crates/ra_syntax/src/validation/char.rs +++ b/crates/ra_syntax/src/validation/char.rs @@ -1,3 +1,5 @@ +//! Validation of char literals + use std::u32; use arrayvec::ArrayString; @@ -12,7 +14,7 @@ use crate::{ }, }; -pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec) { +pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec) { let literal_text = node.text(); let literal_range = node.syntax().range(); let mut components = string_lexing::parse_char_literal(literal_text); @@ -37,7 +39,7 @@ pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec) } } -pub(crate) fn validate_char_component( +pub(super) fn validate_char_component( text: &str, kind: CharComponentKind, range: TextRange, @@ -46,109 +48,115 @@ pub(crate) fn validate_char_component( // Validate escapes use self::CharComponentKind::*; match kind { - AsciiEscape => { - if text.len() == 1 { - // Escape sequence consists only of leading `\` - errors.push(SyntaxError::new(EmptyAsciiEscape, range)); - } else { - let escape_code = text.chars().skip(1).next().unwrap(); - if !is_ascii_escape(escape_code) { - errors.push(SyntaxError::new(InvalidAsciiEscape, range)); - } + AsciiEscape => validate_ascii_escape(text, range, errors), + AsciiCodeEscape => validate_ascii_code_escape(text, range, errors), + UnicodeEscape => validate_unicode_escape(text, range, errors), + CodePoint => { + // These code points must always be escaped + if text == "\t" || text == "\r" || text == "\n" { + errors.push(SyntaxError::new(UnescapedCodepoint, range)); } } - AsciiCodeEscape => { - // An AsciiCodeEscape has 4 chars, example: `\xDD` - if text.len() < 4 { - errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range)); - } else { - assert!( - text.chars().count() == 4, - "AsciiCodeEscape cannot be longer than 4 chars" - ); - - match u8::from_str_radix(&text[2..], 16) { - Ok(code) if code < 128 => { /* Escape code is valid */ } - Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)), - Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)), - } - } + } +} + +fn validate_ascii_escape(text: &str, range: TextRange, errors: &mut Vec) { + if text.len() == 1 { + // Escape sequence consists only of leading `\` + errors.push(SyntaxError::new(EmptyAsciiEscape, range)); + } else { + let escape_code = text.chars().skip(1).next().unwrap(); + if !is_ascii_escape(escape_code) { + errors.push(SyntaxError::new(InvalidAsciiEscape, range)); } - UnicodeEscape => { - assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u"); + } +} - if text.len() == 2 { - // No starting `{` - errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); - return; - } +pub(super) fn is_ascii_escape(code: char) -> bool { + match code { + '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true, + _ => false, + } +} - if text.len() == 3 { - // Only starting `{` - errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)); - return; - } +fn validate_ascii_code_escape(text: &str, range: TextRange, errors: &mut Vec) { + // An AsciiCodeEscape has 4 chars, example: `\xDD` + if text.len() < 4 { + errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range)); + } else { + assert!( + text.chars().count() == 4, + "AsciiCodeEscape cannot be longer than 4 chars" + ); - let mut code = ArrayString::<[_; 6]>::new(); - let mut closed = false; - for c in text[3..].chars() { - assert!(!closed, "no characters after escape is closed"); - - if c.is_digit(16) { - if code.len() == 6 { - errors.push(SyntaxError::new(OverlongUnicodeEscape, range)); - return; - } - - code.push(c); - } else if c == '_' { - // Reject leading _ - if code.len() == 0 { - errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); - return; - } - } else if c == '}' { - closed = true; - } else { - errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); - return; - } - } + match u8::from_str_radix(&text[2..], 16) { + Ok(code) if code < 128 => { /* Escape code is valid */ } + Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)), + Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)), + } + } +} - if !closed { - errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)) - } +fn validate_unicode_escape(text: &str, range: TextRange, errors: &mut Vec) { + assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u"); - if code.len() == 0 { - errors.push(SyntaxError::new(EmptyUnicodeEcape, range)); + if text.len() == 2 { + // No starting `{` + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + return; + } + + if text.len() == 3 { + // Only starting `{` + errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)); + return; + } + + let mut code = ArrayString::<[_; 6]>::new(); + let mut closed = false; + for c in text[3..].chars() { + assert!(!closed, "no characters after escape is closed"); + + if c.is_digit(16) { + if code.len() == 6 { + errors.push(SyntaxError::new(OverlongUnicodeEscape, range)); return; } - match u32::from_str_radix(&code, 16) { - Ok(code_u32) if code_u32 > 0x10FFFF => { - errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range)); - } - Ok(_) => { - // Valid escape code - } - Err(_) => { - errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); - } - } - } - CodePoint => { - // These code points must always be escaped - if text == "\t" || text == "\r" { - errors.push(SyntaxError::new(UnescapedCodepoint, range)); + code.push(c); + } else if c == '_' { + // Reject leading _ + if code.len() == 0 { + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + return; } + } else if c == '}' { + closed = true; + } else { + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + return; } } -} -fn is_ascii_escape(code: char) -> bool { - match code { - '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true, - _ => false, + if !closed { + errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)) + } + + if code.len() == 0 { + errors.push(SyntaxError::new(EmptyUnicodeEcape, range)); + return; + } + + match u32::from_str_radix(&code, 16) { + Ok(code_u32) if code_u32 > 0x10FFFF => { + errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range)); + } + Ok(_) => { + // Valid escape code + } + Err(_) => { + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + } } } -- cgit v1.2.3 From 30cd4d5acb7dfd40cea264a926d1c89f0c3522c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Sun, 11 Nov 2018 20:41:43 +0100 Subject: Validate byte string literals --- crates/ra_syntax/src/validation/char.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'crates/ra_syntax/src/validation/char.rs') diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs index 793539b3a..622b2efdc 100644 --- a/crates/ra_syntax/src/validation/char.rs +++ b/crates/ra_syntax/src/validation/char.rs @@ -214,7 +214,7 @@ mod test { #[test] fn test_valid_ascii_escape() { let valid = [ - r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b", + r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", ]; for c in &valid { assert_valid_char(c); -- cgit v1.2.3 From c96bfe7e2d4465653fe6b0eff053f0dfb48313fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Sun, 11 Nov 2018 21:00:31 +0100 Subject: Split string lexing and run rustfmt --- crates/ra_syntax/src/validation/char.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'crates/ra_syntax/src/validation/char.rs') diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs index 622b2efdc..4728c85e6 100644 --- a/crates/ra_syntax/src/validation/char.rs +++ b/crates/ra_syntax/src/validation/char.rs @@ -213,9 +213,7 @@ mod test { #[test] fn test_valid_ascii_escape() { - let valid = [ - r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", - ]; + let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"]; for c in &valid { assert_valid_char(c); } -- cgit v1.2.3