diff options
-rw-r--r-- | grammar.ron | 7 | ||||
-rw-r--r-- | src/lexer/strings.rs | 31 | ||||
-rw-r--r-- | src/syntax_kinds.rs | 18 | ||||
-rw-r--r-- | tests/data/lexer/0008_strings.rs | 1 | ||||
-rw-r--r-- | tests/data/lexer/0008_strings.txt | 7 | ||||
-rw-r--r-- | validation.md | 2 |
6 files changed, 55 insertions, 11 deletions
diff --git a/grammar.ron b/grammar.ron index 995d71f81..c0564e9cf 100644 --- a/grammar.ron +++ b/grammar.ron | |||
@@ -32,7 +32,12 @@ Grammar( | |||
32 | "FAT_ARROW", | 32 | "FAT_ARROW", |
33 | "NEQ", | 33 | "NEQ", |
34 | "NOT", | 34 | "NOT", |
35 | "CHAR", | ||
36 | "LIFETIME", | 35 | "LIFETIME", |
36 | "CHAR", | ||
37 | "BYTE", | ||
38 | "STRING", | ||
39 | "RAW_STRING", | ||
40 | "BYTE_STRING", | ||
41 | "RAW_BYTE_STRING", | ||
37 | ] | 42 | ] |
38 | ) \ No newline at end of file | 43 | ) \ No newline at end of file |
diff --git a/src/lexer/strings.rs b/src/lexer/strings.rs index 40e5e4528..283ce8feb 100644 --- a/src/lexer/strings.rs +++ b/src/lexer/strings.rs | |||
@@ -33,30 +33,51 @@ pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind { | |||
33 | match c { | 33 | match c { |
34 | '\'' => { | 34 | '\'' => { |
35 | scan_byte(ptr); | 35 | scan_byte(ptr); |
36 | CHAR | 36 | BYTE |
37 | } | 37 | } |
38 | '"' => { | 38 | '"' => { |
39 | scan_byte_string(ptr); | 39 | scan_byte_string(ptr); |
40 | CHAR | 40 | BYTE_STRING |
41 | } | 41 | } |
42 | 'r' => { | 42 | 'r' => { |
43 | scan_raw_byte_string(ptr); | 43 | scan_raw_byte_string(ptr); |
44 | CHAR | 44 | RAW_BYTE_STRING |
45 | } | 45 | } |
46 | _ => unreachable!(), | 46 | _ => unreachable!(), |
47 | } | 47 | } |
48 | } | 48 | } |
49 | 49 | ||
50 | fn scan_byte(ptr: &mut Ptr) { | 50 | fn scan_byte(ptr: &mut Ptr) { |
51 | 51 | if ptr.next_is('\'') { | |
52 | ptr.bump(); | ||
53 | return | ||
54 | } | ||
55 | ptr.bump(); | ||
56 | if ptr.next_is('\'') { | ||
57 | ptr.bump(); | ||
58 | return | ||
59 | } | ||
52 | } | 60 | } |
53 | 61 | ||
54 | fn scan_byte_string(ptr: &mut Ptr) { | 62 | fn scan_byte_string(ptr: &mut Ptr) { |
55 | 63 | while let Some(c) = ptr.bump() { | |
64 | if c == '"' { | ||
65 | return | ||
66 | } | ||
67 | } | ||
56 | } | 68 | } |
57 | 69 | ||
58 | fn scan_raw_byte_string(ptr: &mut Ptr) { | 70 | fn scan_raw_byte_string(ptr: &mut Ptr) { |
71 | if !ptr.next_is('"') { | ||
72 | return | ||
73 | } | ||
74 | ptr.bump(); | ||
59 | 75 | ||
76 | while let Some(c) = ptr.bump() { | ||
77 | if c == '"' { | ||
78 | return | ||
79 | } | ||
80 | } | ||
60 | } | 81 | } |
61 | 82 | ||
62 | fn scan_char_or_byte(ptr: &mut Ptr) { | 83 | fn scan_char_or_byte(ptr: &mut Ptr) { |
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs index 4c023757b..4a68acb31 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds.rs | |||
@@ -33,10 +33,15 @@ pub const EQEQ: SyntaxKind = SyntaxKind(28); | |||
33 | pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); | 33 | pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); |
34 | pub const NEQ: SyntaxKind = SyntaxKind(30); | 34 | pub const NEQ: SyntaxKind = SyntaxKind(30); |
35 | pub const NOT: SyntaxKind = SyntaxKind(31); | 35 | pub const NOT: SyntaxKind = SyntaxKind(31); |
36 | pub const CHAR: SyntaxKind = SyntaxKind(32); | 36 | pub const LIFETIME: SyntaxKind = SyntaxKind(32); |
37 | pub const LIFETIME: SyntaxKind = SyntaxKind(33); | 37 | pub const CHAR: SyntaxKind = SyntaxKind(33); |
38 | pub const BYTE: SyntaxKind = SyntaxKind(34); | ||
39 | pub const STRING: SyntaxKind = SyntaxKind(35); | ||
40 | pub const RAW_STRING: SyntaxKind = SyntaxKind(36); | ||
41 | pub const BYTE_STRING: SyntaxKind = SyntaxKind(37); | ||
42 | pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(38); | ||
38 | 43 | ||
39 | static INFOS: [SyntaxInfo; 34] = [ | 44 | static INFOS: [SyntaxInfo; 39] = [ |
40 | SyntaxInfo { name: "ERROR" }, | 45 | SyntaxInfo { name: "ERROR" }, |
41 | SyntaxInfo { name: "IDENT" }, | 46 | SyntaxInfo { name: "IDENT" }, |
42 | SyntaxInfo { name: "UNDERSCORE" }, | 47 | SyntaxInfo { name: "UNDERSCORE" }, |
@@ -69,8 +74,13 @@ static INFOS: [SyntaxInfo; 34] = [ | |||
69 | SyntaxInfo { name: "FAT_ARROW" }, | 74 | SyntaxInfo { name: "FAT_ARROW" }, |
70 | SyntaxInfo { name: "NEQ" }, | 75 | SyntaxInfo { name: "NEQ" }, |
71 | SyntaxInfo { name: "NOT" }, | 76 | SyntaxInfo { name: "NOT" }, |
72 | SyntaxInfo { name: "CHAR" }, | ||
73 | SyntaxInfo { name: "LIFETIME" }, | 77 | SyntaxInfo { name: "LIFETIME" }, |
78 | SyntaxInfo { name: "CHAR" }, | ||
79 | SyntaxInfo { name: "BYTE" }, | ||
80 | SyntaxInfo { name: "STRING" }, | ||
81 | SyntaxInfo { name: "RAW_STRING" }, | ||
82 | SyntaxInfo { name: "BYTE_STRING" }, | ||
83 | SyntaxInfo { name: "RAW_BYTE_STRING" }, | ||
74 | ]; | 84 | ]; |
75 | 85 | ||
76 | pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { | 86 | pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { |
diff --git a/tests/data/lexer/0008_strings.rs b/tests/data/lexer/0008_strings.rs new file mode 100644 index 000000000..1ffc7bb9d --- /dev/null +++ b/tests/data/lexer/0008_strings.rs | |||
@@ -0,0 +1 @@ | |||
b'' b'x' b"foo" br"" \ No newline at end of file | |||
diff --git a/tests/data/lexer/0008_strings.txt b/tests/data/lexer/0008_strings.txt new file mode 100644 index 000000000..d4ff4b558 --- /dev/null +++ b/tests/data/lexer/0008_strings.txt | |||
@@ -0,0 +1,7 @@ | |||
1 | BYTE 3 "b\'\'" | ||
2 | WHITESPACE 1 " " | ||
3 | BYTE 4 "b\'x\'" | ||
4 | WHITESPACE 1 " " | ||
5 | BYTE_STRING 6 "b\"foo\"" | ||
6 | WHITESPACE 1 " " | ||
7 | RAW_BYTE_STRING 4 "br\"\"" | ||
diff --git a/validation.md b/validation.md index a38b4a96e..39b5f85fa 100644 --- a/validation.md +++ b/validation.md | |||
@@ -5,4 +5,4 @@ Fixmes: | |||
5 | * Validate that float and integer literals use digits only of the appropriate | 5 | * Validate that float and integer literals use digits only of the appropriate |
6 | base, and are in range | 6 | base, and are in range |
7 | * Validation for unclosed char literal | 7 | * Validation for unclosed char literal |
8 | 8 | * Strings are completely wrong: more tests and comparison with libsyntax. | |