aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2017-12-31 11:02:55 +0000
committerAleksey Kladov <[email protected]>2017-12-31 11:03:29 +0000
commit2f24fb4f2c5d8708533a1b0155e1e884bd4b2ba2 (patch)
treea4ed4479231d2e5526618d9728d88fc3990b39e2
parentf1a840cc385798fc2e3e9ac9ddb0dd57fd0ac509 (diff)
Lexer: byte strings
-rw-r--r--grammar.ron7
-rw-r--r--src/lexer/strings.rs31
-rw-r--r--src/syntax_kinds.rs18
-rw-r--r--tests/data/lexer/0008_strings.rs1
-rw-r--r--tests/data/lexer/0008_strings.txt7
-rw-r--r--validation.md2
6 files changed, 55 insertions, 11 deletions
diff --git a/grammar.ron b/grammar.ron
index 995d71f81..c0564e9cf 100644
--- a/grammar.ron
+++ b/grammar.ron
@@ -32,7 +32,12 @@ Grammar(
32 "FAT_ARROW", 32 "FAT_ARROW",
33 "NEQ", 33 "NEQ",
34 "NOT", 34 "NOT",
35 "CHAR",
36 "LIFETIME", 35 "LIFETIME",
36 "CHAR",
37 "BYTE",
38 "STRING",
39 "RAW_STRING",
40 "BYTE_STRING",
41 "RAW_BYTE_STRING",
37 ] 42 ]
38) \ No newline at end of file 43) \ No newline at end of file
diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs
index 40e5e4528..283ce8feb 100644
--- a/src/lexer/strings.rs
+++ b/src/lexer/strings.rs
@@ -33,30 +33,51 @@ pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind {
33 match c { 33 match c {
34 '\'' => { 34 '\'' => {
35 scan_byte(ptr); 35 scan_byte(ptr);
36 CHAR 36 BYTE
37 } 37 }
38 '"' => { 38 '"' => {
39 scan_byte_string(ptr); 39 scan_byte_string(ptr);
40 CHAR 40 BYTE_STRING
41 } 41 }
42 'r' => { 42 'r' => {
43 scan_raw_byte_string(ptr); 43 scan_raw_byte_string(ptr);
44 CHAR 44 RAW_BYTE_STRING
45 } 45 }
46 _ => unreachable!(), 46 _ => unreachable!(),
47 } 47 }
48} 48}
49 49
50fn scan_byte(ptr: &mut Ptr) { 50fn scan_byte(ptr: &mut Ptr) {
51 51 if ptr.next_is('\'') {
52 ptr.bump();
53 return
54 }
55 ptr.bump();
56 if ptr.next_is('\'') {
57 ptr.bump();
58 return
59 }
52} 60}
53 61
54fn scan_byte_string(ptr: &mut Ptr) { 62fn scan_byte_string(ptr: &mut Ptr) {
55 63 while let Some(c) = ptr.bump() {
64 if c == '"' {
65 return
66 }
67 }
56} 68}
57 69
58fn scan_raw_byte_string(ptr: &mut Ptr) { 70fn scan_raw_byte_string(ptr: &mut Ptr) {
71 if !ptr.next_is('"') {
72 return
73 }
74 ptr.bump();
59 75
76 while let Some(c) = ptr.bump() {
77 if c == '"' {
78 return
79 }
80 }
60} 81}
61 82
62fn scan_char_or_byte(ptr: &mut Ptr) { 83fn scan_char_or_byte(ptr: &mut Ptr) {
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs
index 4c023757b..4a68acb31 100644
--- a/src/syntax_kinds.rs
+++ b/src/syntax_kinds.rs
@@ -33,10 +33,15 @@ pub const EQEQ: SyntaxKind = SyntaxKind(28);
33pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); 33pub const FAT_ARROW: SyntaxKind = SyntaxKind(29);
34pub const NEQ: SyntaxKind = SyntaxKind(30); 34pub const NEQ: SyntaxKind = SyntaxKind(30);
35pub const NOT: SyntaxKind = SyntaxKind(31); 35pub const NOT: SyntaxKind = SyntaxKind(31);
36pub const CHAR: SyntaxKind = SyntaxKind(32); 36pub const LIFETIME: SyntaxKind = SyntaxKind(32);
37pub const LIFETIME: SyntaxKind = SyntaxKind(33); 37pub const CHAR: SyntaxKind = SyntaxKind(33);
38pub const BYTE: SyntaxKind = SyntaxKind(34);
39pub const STRING: SyntaxKind = SyntaxKind(35);
40pub const RAW_STRING: SyntaxKind = SyntaxKind(36);
41pub const BYTE_STRING: SyntaxKind = SyntaxKind(37);
42pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(38);
38 43
39static INFOS: [SyntaxInfo; 34] = [ 44static INFOS: [SyntaxInfo; 39] = [
40 SyntaxInfo { name: "ERROR" }, 45 SyntaxInfo { name: "ERROR" },
41 SyntaxInfo { name: "IDENT" }, 46 SyntaxInfo { name: "IDENT" },
42 SyntaxInfo { name: "UNDERSCORE" }, 47 SyntaxInfo { name: "UNDERSCORE" },
@@ -69,8 +74,13 @@ static INFOS: [SyntaxInfo; 34] = [
69 SyntaxInfo { name: "FAT_ARROW" }, 74 SyntaxInfo { name: "FAT_ARROW" },
70 SyntaxInfo { name: "NEQ" }, 75 SyntaxInfo { name: "NEQ" },
71 SyntaxInfo { name: "NOT" }, 76 SyntaxInfo { name: "NOT" },
72 SyntaxInfo { name: "CHAR" },
73 SyntaxInfo { name: "LIFETIME" }, 77 SyntaxInfo { name: "LIFETIME" },
78 SyntaxInfo { name: "CHAR" },
79 SyntaxInfo { name: "BYTE" },
80 SyntaxInfo { name: "STRING" },
81 SyntaxInfo { name: "RAW_STRING" },
82 SyntaxInfo { name: "BYTE_STRING" },
83 SyntaxInfo { name: "RAW_BYTE_STRING" },
74]; 84];
75 85
76pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { 86pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
diff --git a/tests/data/lexer/0008_strings.rs b/tests/data/lexer/0008_strings.rs
new file mode 100644
index 000000000..1ffc7bb9d
--- /dev/null
+++ b/tests/data/lexer/0008_strings.rs
@@ -0,0 +1 @@
b'' b'x' b"foo" br"" \ No newline at end of file
diff --git a/tests/data/lexer/0008_strings.txt b/tests/data/lexer/0008_strings.txt
new file mode 100644
index 000000000..d4ff4b558
--- /dev/null
+++ b/tests/data/lexer/0008_strings.txt
@@ -0,0 +1,7 @@
1BYTE 3 "b\'\'"
2WHITESPACE 1 " "
3BYTE 4 "b\'x\'"
4WHITESPACE 1 " "
5BYTE_STRING 6 "b\"foo\""
6WHITESPACE 1 " "
7RAW_BYTE_STRING 4 "br\"\""
diff --git a/validation.md b/validation.md
index a38b4a96e..39b5f85fa 100644
--- a/validation.md
+++ b/validation.md
@@ -5,4 +5,4 @@ Fixmes:
5* Validate that float and integer literals use digits only of the appropriate 5* Validate that float and integer literals use digits only of the appropriate
6 base, and are in range 6 base, and are in range
7* Validation for unclosed char literal 7* Validation for unclosed char literal
8 8* Strings are completely wrong: more tests and comparison with libsyntax.