From c56db92d1f9b1a24de24cefd996c43c7b988b4c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= Date: Tue, 6 Nov 2018 17:05:06 +0100 Subject: Finish implementing char validation --- crates/ra_syntax/src/string_lexing/mod.rs | 2 +- crates/ra_syntax/src/validation.rs | 83 ++++++++++++++++++++++++++--- crates/ra_syntax/src/yellow/syntax_error.rs | 16 ++++++ 3 files changed, 93 insertions(+), 8 deletions(-) (limited to 'crates') diff --git a/crates/ra_syntax/src/string_lexing/mod.rs b/crates/ra_syntax/src/string_lexing/mod.rs index f0812ff28..cc53e0aba 100644 --- a/crates/ra_syntax/src/string_lexing/mod.rs +++ b/crates/ra_syntax/src/string_lexing/mod.rs @@ -219,7 +219,7 @@ mod tests { #[test] fn test_unicode_escapes() { - let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", ""]; + let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; for escape in unicode_escapes { let escape_sequence = format!(r"'\u{}'", escape); let component = closed_char_component(&escape_sequence); diff --git a/crates/ra_syntax/src/validation.rs b/crates/ra_syntax/src/validation.rs index 009f5052f..a550ce0ab 100644 --- a/crates/ra_syntax/src/validation.rs +++ b/crates/ra_syntax/src/validation.rs @@ -1,3 +1,5 @@ +use std::u32; + use crate::{ algo::visit::{visitor_ctx, VisitorCtx}, ast::{self, AstNode}, @@ -42,15 +44,82 @@ fn validate_char(node: ast::Char, errors: &mut Vec) { } } AsciiCodeEscape => { - // TODO: - // * First digit is octal - // * Second digit is hex + // An AsciiCodeEscape has 4 chars, example: `\xDD` + if text.len() < 4 { + errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range)); + } else { + assert!(text.chars().count() == 4, "AsciiCodeEscape cannot be longer than 4 chars"); + + match u8::from_str_radix(&text[2..], 16) { + Ok(code) if code < 128 => { /* Escape code is valid */ }, + Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)), + Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)), + } + + } } UnicodeEscape => { - // TODO: - // * Only hex digits or underscores allowed - // * Max 6 chars - // * Within allowed range (must be at most 10FFFF) + assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u"); + + if text.len() == 2 { + // No starting `{` + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + return; + } + + if text.len() == 3 { + // Only starting `{` + errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)); + return; + } + + let mut code = String::new(); + let mut closed = false; + for c in text[3..].chars() { + assert!(!closed, "no characters after escape is closed"); + + if c.is_digit(16) { + code.push(c); + } else if c == '_' { + // Reject leading _ + if code.len() == 0 { + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + return; + } + } else if c == '}' { + closed = true; + } else { + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + return; + } + } + + if !closed { + errors.push(SyntaxError::new(UnclosedUnicodeEscape, range)) + } + + if code.len() == 0 { + errors.push(SyntaxError::new(EmptyUnicodeEcape, range)); + return; + } + + if code.len() > 6 { + errors.push(SyntaxError::new(OverlongUnicodeEscape, range)); + } + + match u32::from_str_radix(&code, 16) { + Ok(code_u32) if code_u32 > 0x10FFFF => { + errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range)); + } + Ok(_) => { + // Valid escape code + } + Err(_) => { + errors.push(SyntaxError::new(MalformedUnicodeEscape, range)); + } + } + + // FIXME: we really need tests for this } // Code points are always valid CodePoint => (), diff --git a/crates/ra_syntax/src/yellow/syntax_error.rs b/crates/ra_syntax/src/yellow/syntax_error.rs index f3df6bc15..9aed9e81e 100644 --- a/crates/ra_syntax/src/yellow/syntax_error.rs +++ b/crates/ra_syntax/src/yellow/syntax_error.rs @@ -69,6 +69,14 @@ pub enum SyntaxErrorKind { LongChar, EmptyAsciiEscape, InvalidAsciiEscape, + TooShortAsciiCodeEscape, + AsciiCodeEscapeOutOfRange, + MalformedAsciiCodeEscape, + UnclosedUnicodeEscape, + MalformedUnicodeEscape, + EmptyUnicodeEcape, + OverlongUnicodeEscape, + UnicodeEscapeOutOfRange, } #[derive(Debug, Clone, PartialEq, Eq, Hash)] @@ -83,6 +91,14 @@ impl fmt::Display for SyntaxErrorKind { EmptyChar => write!(f, "Empty char literal"), UnclosedChar => write!(f, "Unclosed char literal"), LongChar => write!(f, "Char literal should be one character long"), + TooShortAsciiCodeEscape => write!(f, "Escape sequence should have two digits"), + AsciiCodeEscapeOutOfRange => write!(f, "Escape sequence should be between \\x00 and \\x7F"), + MalformedAsciiCodeEscape => write!(f, "Escape sequence should be a hexadecimal number"), + UnclosedUnicodeEscape => write!(f, "Missing `}}`"), + MalformedUnicodeEscape => write!(f, "Malformed unicode escape sequence"), + EmptyUnicodeEcape => write!(f, "Empty unicode escape sequence"), + OverlongUnicodeEscape => write!(f, "Unicode escape sequence should have at most 6 digits"), + UnicodeEscapeOutOfRange => write!(f, "Unicode escape code should be at most 0x10FFFF"), ParseError(msg) => write!(f, "{}", msg.0), } } -- cgit v1.2.3