From c258b4fdb0e421813330c2428985c4537c787582 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= <aochagavia92@gmail.com>
Date: Sun, 11 Nov 2018 20:27:00 +0100
Subject: Add validator for byte

---
 crates/ra_syntax/src/validation/char.rs | 188 +++++++++++++++++---------------
 1 file changed, 98 insertions(+), 90 deletions(-)

(limited to 'crates/ra_syntax/src/validation/char.rs')
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
index 63f9bad24..793539b3a 100644
--- a/crates/ra_syntax/src/validation/char.rs
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -1,3 +1,5 @@
+//! Validation of char literals
+
 use std::u32;
 
 use arrayvec::ArrayString;
@@ -12,7 +14,7 @@ use crate::{
     },
 };
 
-pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
+pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
     let literal_text = node.text();
     let literal_range = node.syntax().range();
     let mut components = string_lexing::parse_char_literal(literal_text);
@@ -37,7 +39,7 @@ pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>)
     }
 }
 
-pub(crate) fn validate_char_component(
+pub(super) fn validate_char_component(
     text: &str,
     kind: CharComponentKind,
     range: TextRange,
@@ -46,109 +48,115 @@ pub(crate) fn validate_char_component(
     // Validate escapes
     use self::CharComponentKind::*;
     match kind {
-        AsciiEscape => {
-            if text.len() == 1 {
-                // Escape sequence consists only of leading `\`
-                errors.push(SyntaxError::new(EmptyAsciiEscape, range));
-            } else {
-                let escape_code = text.chars().skip(1).next().unwrap();
-                if !is_ascii_escape(escape_code) {
-                    errors.push(SyntaxError::new(InvalidAsciiEscape, range));
-                }
+        AsciiEscape => validate_ascii_escape(text, range, errors),
+        AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
+        UnicodeEscape => validate_unicode_escape(text, range, errors),
+        CodePoint => {
+            // These code points must always be escaped
+            if text == "\t" || text == "\r" || text == "\n" {
+                errors.push(SyntaxError::new(UnescapedCodepoint, range));
             }
         }
-        AsciiCodeEscape => {
-            // An AsciiCodeEscape has 4 chars, example: `\xDD`
-            if text.len() < 4 {
-                errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
-            } else {
-                assert!(
-                    text.chars().count() == 4,
-                    "AsciiCodeEscape cannot be longer than 4 chars"
-                );
-
-                match u8::from_str_radix(&text[2..], 16) {
-                    Ok(code) if code < 128 => { /* Escape code is valid */ }
-                    Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
-                    Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
-                }
-            }
+    }
+}
+
+fn validate_ascii_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
+    if text.len() == 1 {
+        // Escape sequence consists only of leading `\`
+        errors.push(SyntaxError::new(EmptyAsciiEscape, range));
+    } else {
+        let escape_code = text.chars().skip(1).next().unwrap();
+        if !is_ascii_escape(escape_code) {
+            errors.push(SyntaxError::new(InvalidAsciiEscape, range));
         }
-        UnicodeEscape => {
-            assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
+    }
+}
 
-            if text.len() == 2 {
-                // No starting `{`
-                errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                return;
-            }
+pub(super) fn is_ascii_escape(code: char) -> bool {
+    match code {
+        '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
+        _ => false,
+    }
+}
 
-            if text.len() == 3 {
-                // Only starting `{`
-                errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
-                return;
-            }
+fn validate_ascii_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
+    // An AsciiCodeEscape has 4 chars, example: `\xDD`
+    if text.len() < 4 {
+        errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
+    } else {
+        assert!(
+            text.chars().count() == 4,
+            "AsciiCodeEscape cannot be longer than 4 chars"
+        );
 
-            let mut code = ArrayString::<[_; 6]>::new();
-            let mut closed = false;
-            for c in text[3..].chars() {
-                assert!(!closed, "no characters after escape is closed");
-
-                if c.is_digit(16) {
-                    if code.len() == 6 {
-                        errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
-                        return;
-                    }
-
-                    code.push(c);
-                } else if c == '_' {
-                    // Reject leading _
-                    if code.len() == 0 {
-                        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                        return;
-                    }
-                } else if c == '}' {
-                    closed = true;
-                } else {
-                    errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                    return;
-                }
-            }
+        match u8::from_str_radix(&text[2..], 16) {
+            Ok(code) if code < 128 => { /* Escape code is valid */ }
+            Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
+            Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
+        }
+    }
+}
 
-            if !closed {
-                errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
-            }
+fn validate_unicode_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
+    assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
 
-            if code.len() == 0 {
-                errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
+    if text.len() == 2 {
+        // No starting `{`
+        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+        return;
+    }
+
+    if text.len() == 3 {
+        // Only starting `{`
+        errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
+        return;
+    }
+
+    let mut code = ArrayString::<[_; 6]>::new();
+    let mut closed = false;
+    for c in text[3..].chars() {
+        assert!(!closed, "no characters after escape is closed");
+
+        if c.is_digit(16) {
+            if code.len() == 6 {
+                errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
                 return;
             }
 
-            match u32::from_str_radix(&code, 16) {
-                Ok(code_u32) if code_u32 > 0x10FFFF => {
-                    errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
-                }
-                Ok(_) => {
-                    // Valid escape code
-                }
-                Err(_) => {
-                    errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                }
-            }
-        }
-        CodePoint => {
-            // These code points must always be escaped
-            if text == "\t" || text == "\r" {
-                errors.push(SyntaxError::new(UnescapedCodepoint, range));
+            code.push(c);
+        } else if c == '_' {
+            // Reject leading _
+            if code.len() == 0 {
+                errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+                return;
             }
+        } else if c == '}' {
+            closed = true;
+        } else {
+            errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+            return;
         }
     }
-}
 
-fn is_ascii_escape(code: char) -> bool {
-    match code {
-        '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
-        _ => false,
+    if !closed {
+        errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
+    }
+
+    if code.len() == 0 {
+        errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
+        return;
+    }
+
+    match u32::from_str_radix(&code, 16) {
+        Ok(code_u32) if code_u32 > 0x10FFFF => {
+            errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
+        }
+        Ok(_) => {
+            // Valid escape code
+        }
+        Err(_) => {
+            errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+        }
     }
 }
 
-- 
cgit v1.2.3


From 30cd4d5acb7dfd40cea264a926d1c89f0c3522c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= <aochagavia92@gmail.com>
Date: Sun, 11 Nov 2018 20:41:43 +0100
Subject: Validate byte string literals

---
 crates/ra_syntax/src/validation/char.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'crates/ra_syntax/src/validation/char.rs')

diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
index 793539b3a..622b2efdc 100644
--- a/crates/ra_syntax/src/validation/char.rs
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -214,7 +214,7 @@ mod test {
     #[test]
     fn test_valid_ascii_escape() {
         let valid = [
-            r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0", "a", "b",
+            r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0",
         ];
         for c in &valid {
             assert_valid_char(c);
-- 
cgit v1.2.3


From c96bfe7e2d4465653fe6b0eff053f0dfb48313fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Adolfo=20Ochagav=C3=ADa?= <aochagavia92@gmail.com>
Date: Sun, 11 Nov 2018 21:00:31 +0100
Subject: Split string lexing and run rustfmt

---
 crates/ra_syntax/src/validation/char.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'crates/ra_syntax/src/validation/char.rs')

diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
index 622b2efdc..4728c85e6 100644
--- a/crates/ra_syntax/src/validation/char.rs
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -213,9 +213,7 @@ mod test {
 
     #[test]
     fn test_valid_ascii_escape() {
-        let valid = [
-            r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0",
-        ];
+        let valid = [r"\'", "\"", "\\\\", "\\\"", r"\n", r"\r", r"\t", r"\0"];
         for c in &valid {
             assert_valid_char(c);
         }
-- 
cgit v1.2.3