Add validator for byte

author: Adolfo Ochagavía <[email protected]> 2018-11-11 19:27:00 +0000
committer: Adolfo Ochagavía <[email protected]> 2018-11-11 19:27:00 +0000
commit: c258b4fdb0e421813330c2428985c4537c787582 (patch)
tree: e53263f28c0cd07911a1e9c9ef6538c8ff0227fd /crates/ra_syntax/src/validation/char.rs
parent: a4f7d7a7cd85a5b9b64a935dd84ad493b6860236 (diff)
1 files changed, 98 insertions, 90 deletions
diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs
index 63f9bad24..793539b3a 100644
--- a/crates/ra_syntax/src/validation/char.rs
+++ b/crates/ra_syntax/src/validation/char.rs
@@ -1,3 +1,5 @@
+//! Validation of char literals
 use std::u32;
 use arrayvec::ArrayString;
@@ -12,7 +14,7 @@ use crate::{
    },
 };
-pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
+pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
    let literal_text = node.text();
    let literal_range = node.syntax().range();
    let mut components = string_lexing::parse_char_literal(literal_text);
@@ -37,7 +39,7 @@ pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>)
    }
 }
-pub(crate) fn validate_char_component(
+pub(super) fn validate_char_component(
    text: &str,
    kind: CharComponentKind,
    range: TextRange,
@@ -46,109 +48,115 @@ pub(crate) fn validate_char_component(
    // Validate escapes
    use self::CharComponentKind::*;
    match kind {
-        AsciiEscape => {
+        AsciiEscape => validate_ascii_escape(text, range, errors),
-            if text.len() == 1 {
+        AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
-                // Escape sequence consists only of leading `\`
+        UnicodeEscape => validate_unicode_escape(text, range, errors),
-                errors.push(SyntaxError::new(EmptyAsciiEscape, range));
+        CodePoint => {
-            } else {
+            // These code points must always be escaped
-                let escape_code = text.chars().skip(1).next().unwrap();
+            if text == "\t" || text == "\r" || text == "\n" {
-                if !is_ascii_escape(escape_code) {
+                errors.push(SyntaxError::new(UnescapedCodepoint, range));
-                    errors.push(SyntaxError::new(InvalidAsciiEscape, range));
-                }
            }
        }
-        AsciiCodeEscape => {
+    }
-            // An AsciiCodeEscape has 4 chars, example: `\xDD`
+}
-            if text.len() < 4 {
-                errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
+fn validate_ascii_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
-            } else {
+    if text.len() == 1 {
-                assert!(
+        // Escape sequence consists only of leading `\`
-                    text.chars().count() == 4,
+        errors.push(SyntaxError::new(EmptyAsciiEscape, range));
-                    "AsciiCodeEscape cannot be longer than 4 chars"
+    } else {
-                );
+        let escape_code = text.chars().skip(1).next().unwrap();
+        if !is_ascii_escape(escape_code) {
-                match u8::from_str_radix(&text[2..], 16) {
+            errors.push(SyntaxError::new(InvalidAsciiEscape, range));
-                    Ok(code) if code < 128 => { /* Escape code is valid */ }
-                    Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
-                    Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
-                }
-            }
        }
-        UnicodeEscape => {
+    }
-            assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
+}
-            if text.len() == 2 {
+pub(super) fn is_ascii_escape(code: char) -> bool {
-                // No starting `{`
+    match code {
-                errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+        '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
-                return;
+        _ => false,
-            }
+    }
+}
-            if text.len() == 3 {
+fn validate_ascii_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
-                // Only starting `{`
+    // An AsciiCodeEscape has 4 chars, example: `\xDD`
-                errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
+    if text.len() < 4 {
-                return;
+        errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
-            }
+    } else {
+        assert!(
+            text.chars().count() == 4,
+            "AsciiCodeEscape cannot be longer than 4 chars"
+        );
-            let mut code = ArrayString::<[_; 6]>::new();
+        match u8::from_str_radix(&text[2..], 16) {
-            let mut closed = false;
+            Ok(code) if code < 128 => { /* Escape code is valid */ }
-            for c in text[3..].chars() {
+            Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
-                assert!(!closed, "no characters after escape is closed");
+            Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
+        }
-                if c.is_digit(16) {
+    }
-                    if code.len() == 6 {
+}
-                        errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
-                        return;
-                    }
-                    code.push(c);
-                } else if c == '_' {
-                    // Reject leading _
-                    if code.len() == 0 {
-                        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                        return;
-                    }
-                } else if c == '}' {
-                    closed = true;
-                } else {
-                    errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                    return;
-                }
-            }
-            if !closed {
+fn validate_unicode_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
-                errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
+    assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
-            }
-            if code.len() == 0 {
+    if text.len() == 2 {
-                errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
+        // No starting `{`
+        errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+        return;
+    }
+    if text.len() == 3 {
+        // Only starting `{`
+        errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
+        return;
+    }
+    let mut code = ArrayString::<[_; 6]>::new();
+    let mut closed = false;
+    for c in text[3..].chars() {
+        assert!(!closed, "no characters after escape is closed");
+        if c.is_digit(16) {
+            if code.len() == 6 {
+                errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
                return;
            }
-            match u32::from_str_radix(&code, 16) {
+            code.push(c);
-                Ok(code_u32) if code_u32 > 0x10FFFF => {
+        } else if c == '_' {
-                    errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
+            // Reject leading _
-                }
+            if code.len() == 0 {
-                Ok(_) => {
+                errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                    // Valid escape code
+                return;
-                }
-                Err(_) => {
-                    errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
-                }
-            }
-        }
-        CodePoint => {
-            // These code points must always be escaped
-            if text == "\t" || text == "\r" {
-                errors.push(SyntaxError::new(UnescapedCodepoint, range));
            }
+        } else if c == '}' {
+            closed = true;
+        } else {
+            errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+            return;
        }
    }
-}
-fn is_ascii_escape(code: char) -> bool {
+    if !closed {
-    match code {
+        errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
-        '\\' | '\'' | '"' | 'n' | 'r' | 't' | '0' => true,
+    }
-        _ => false,
+    if code.len() == 0 {
+        errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
+        return;
+    }
+    match u32::from_str_radix(&code, 16) {
+        Ok(code_u32) if code_u32 > 0x10FFFF => {
+            errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
+        }
+        Ok(_) => {
+            // Valid escape code
+        }
+        Err(_) => {
+            errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
+        }
    }
 }
author	Adolfo Ochagavía <[email protected]>	2018-11-11 19:27:00 +0000
committer	Adolfo Ochagavía <[email protected]>	2018-11-11 19:27:00 +0000
commit	c258b4fdb0e421813330c2428985c4537c787582 (patch)
tree	e53263f28c0cd07911a1e9c9ef6538c8ff0227fd /crates/ra_syntax/src/validation/char.rs
parent	a4f7d7a7cd85a5b9b64a935dd84ad493b6860236 (diff)

diff --git a/crates/ra_syntax/src/validation/char.rs b/crates/ra_syntax/src/validation/char.rs index 63f9bad24..793539b3a 100644 --- a/crates/ra_syntax/src/validation/char.rs +++ b/crates/ra_syntax/src/validation/char.rs
@@ -1,3 +1,5 @@
		1	//! Validation of char literals
		2
1	use std::u32;	3	use std::u32;
2		4
3	use arrayvec::ArrayString;	5	use arrayvec::ArrayString;
@@ -12,7 +14,7 @@ use crate::{
12	},	14	},
13	};	15	};
14		16
15	pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {	17	pub(super) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>) {
16	let literal_text = node.text();	18	let literal_text = node.text();
17	let literal_range = node.syntax().range();	19	let literal_range = node.syntax().range();
18	let mut components = string_lexing::parse_char_literal(literal_text);	20	let mut components = string_lexing::parse_char_literal(literal_text);
@@ -37,7 +39,7 @@ pub(crate) fn validate_char_node(node: ast::Char, errors: &mut Vec<SyntaxError>)
37	}	39	}
38	}	40	}
39		41
40	pub(crate) fn validate_char_component(	42	pub(super) fn validate_char_component(
41	text: &str,	43	text: &str,
42	kind: CharComponentKind,	44	kind: CharComponentKind,
43	range: TextRange,	45	range: TextRange,
@@ -46,109 +48,115 @@ pub(crate) fn validate_char_component(
46	// Validate escapes	48	// Validate escapes
47	use self::CharComponentKind::*;	49	use self::CharComponentKind::*;
48	match kind {	50	match kind {
49	AsciiEscape => {	51	AsciiEscape => validate_ascii_escape(text, range, errors),
50	if text.len() == 1 {	52	AsciiCodeEscape => validate_ascii_code_escape(text, range, errors),
51	// Escape sequence consists only of leading `\`	53	UnicodeEscape => validate_unicode_escape(text, range, errors),
52	errors.push(SyntaxError::new(EmptyAsciiEscape, range));	54	CodePoint => {
53	} else {	55	// These code points must always be escaped
54	let escape_code = text.chars().skip(1).next().unwrap();	56	if text == "\t" \|\| text == "\r" \|\| text == "\n" {
55	if !is_ascii_escape(escape_code) {	57	errors.push(SyntaxError::new(UnescapedCodepoint, range));
56	errors.push(SyntaxError::new(InvalidAsciiEscape, range));
57	}
58	}	58	}
59	}	59	}
60	AsciiCodeEscape => {	60	}
61	// An AsciiCodeEscape has 4 chars, example: `\xDD`	61	}
62	if text.len() < 4 {	62
63	errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));	63	fn validate_ascii_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
64	} else {	64	if text.len() == 1 {
65	assert!(	65	// Escape sequence consists only of leading `\`
66	text.chars().count() == 4,	66	errors.push(SyntaxError::new(EmptyAsciiEscape, range));
67	"AsciiCodeEscape cannot be longer than 4 chars"	67	} else {
68	);	68	let escape_code = text.chars().skip(1).next().unwrap();
69		69	if !is_ascii_escape(escape_code) {
70	match u8::from_str_radix(&text[2..], 16) {	70	errors.push(SyntaxError::new(InvalidAsciiEscape, range));
71	Ok(code) if code < 128 => { /* Escape code is valid */ }
72	Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
73	Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
74	}
75	}
76	}	71	}
77	UnicodeEscape => {	72	}
78	assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");	73	}
79		74
80	if text.len() == 2 {	75	pub(super) fn is_ascii_escape(code: char) -> bool {
81	// No starting `{`	76	match code {
82	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));	77	'\\' \| '\'' \| '"' \| 'n' \| 'r' \| 't' \| '0' => true,
83	return;	78	_ => false,
84	}	79	}
		80	}
85		81
86	if text.len() == 3 {	82	fn validate_ascii_code_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
87	// Only starting `{`	83	// An AsciiCodeEscape has 4 chars, example: `\xDD`
88	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));	84	if text.len() < 4 {
89	return;	85	errors.push(SyntaxError::new(TooShortAsciiCodeEscape, range));
90	}	86	} else {
		87	assert!(
		88	text.chars().count() == 4,
		89	"AsciiCodeEscape cannot be longer than 4 chars"
		90	);
91		91
92	let mut code = ArrayString::<[_; 6]>::new();	92	match u8::from_str_radix(&text[2..], 16) {
93	let mut closed = false;	93	Ok(code) if code < 128 => { /* Escape code is valid */ }
94	for c in text[3..].chars() {	94	Ok(_) => errors.push(SyntaxError::new(AsciiCodeEscapeOutOfRange, range)),
95	assert!(!closed, "no characters after escape is closed");	95	Err(_) => errors.push(SyntaxError::new(MalformedAsciiCodeEscape, range)),
96		96	}
97	if c.is_digit(16) {	97	}
98	if code.len() == 6 {	98	}
99	errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
100	return;
101	}
102
103	code.push(c);
104	} else if c == '_' {
105	// Reject leading _
106	if code.len() == 0 {
107	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
108	return;
109	}
110	} else if c == '}' {
111	closed = true;
112	} else {
113	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
114	return;
115	}
116	}
117		99
118	if !closed {	100	fn validate_unicode_escape(text: &str, range: TextRange, errors: &mut Vec<SyntaxError>) {
119	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))	101	assert!(&text[..2] == "\\u", "UnicodeEscape always starts with \\u");
120	}
121		102
122	if code.len() == 0 {	103	if text.len() == 2 {
123	errors.push(SyntaxError::new(EmptyUnicodeEcape, range));	104	// No starting `{`
		105	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
		106	return;
		107	}
		108
		109	if text.len() == 3 {
		110	// Only starting `{`
		111	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range));
		112	return;
		113	}
		114
		115	let mut code = ArrayString::<[_; 6]>::new();
		116	let mut closed = false;
		117	for c in text[3..].chars() {
		118	assert!(!closed, "no characters after escape is closed");
		119
		120	if c.is_digit(16) {
		121	if code.len() == 6 {
		122	errors.push(SyntaxError::new(OverlongUnicodeEscape, range));
124	return;	123	return;
125	}	124	}
126		125
127	match u32::from_str_radix(&code, 16) {	126	code.push(c);
128	Ok(code_u32) if code_u32 > 0x10FFFF => {	127	} else if c == '_' {
129	errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));	128	// Reject leading _
130	}	129	if code.len() == 0 {
131	Ok(_) => {	130	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
132	// Valid escape code	131	return;
133	}
134	Err(_) => {
135	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
136	}
137	}
138	}
139	CodePoint => {
140	// These code points must always be escaped
141	if text == "\t" \|\| text == "\r" {
142	errors.push(SyntaxError::new(UnescapedCodepoint, range));
143	}	132	}
		133	} else if c == '}' {
		134	closed = true;
		135	} else {
		136	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
		137	return;
144	}	138	}
145	}	139	}
146	}
147		140
148	fn is_ascii_escape(code: char) -> bool {	141	if !closed {
149	match code {	142	errors.push(SyntaxError::new(UnclosedUnicodeEscape, range))
150	'\\' \| '\'' \| '"' \| 'n' \| 'r' \| 't' \| '0' => true,	143	}
151	_ => false,	144
		145	if code.len() == 0 {
		146	errors.push(SyntaxError::new(EmptyUnicodeEcape, range));
		147	return;
		148	}
		149
		150	match u32::from_str_radix(&code, 16) {
		151	Ok(code_u32) if code_u32 > 0x10FFFF => {
		152	errors.push(SyntaxError::new(UnicodeEscapeOutOfRange, range));
		153	}
		154	Ok(_) => {
		155	// Valid escape code
		156	}
		157	Err(_) => {
		158	errors.push(SyntaxError::new(MalformedUnicodeEscape, range));
		159	}
152	}	160	}
153	}	161	}
154		162