diff options
-rw-r--r-- | src/lexer/ptr.rs | 3 | ||||
-rw-r--r-- | src/text.rs | 4 | ||||
-rw-r--r-- | tests/data/lexer/0001_hello.txt | 6 | ||||
-rw-r--r-- | tests/data/lexer/0002_whitespace.txt | 24 | ||||
-rw-r--r-- | tests/data/lexer/0003_ident.txt | 28 | ||||
-rw-r--r-- | tests/data/lexer/0004_number.txt | 124 | ||||
-rw-r--r-- | tests/lexer.rs | 12 |
7 files changed, 103 insertions, 98 deletions
diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs index d441b826b..b380117e6 100644 --- a/src/lexer/ptr.rs +++ b/src/lexer/ptr.rs | |||
@@ -56,6 +56,7 @@ impl<'s> Ptr<'s> { | |||
56 | } | 56 | } |
57 | 57 | ||
58 | fn chars(&self) -> Chars { | 58 | fn chars(&self) -> Chars { |
59 | self.text[self.len.0 as usize ..].chars() | 59 | let len: u32 = self.len.into(); |
60 | self.text[len as usize ..].chars() | ||
60 | } | 61 | } |
61 | } | 62 | } |
diff --git a/src/text.rs b/src/text.rs index 31e67b456..c3ef1ac8e 100644 --- a/src/text.rs +++ b/src/text.rs | |||
@@ -2,9 +2,7 @@ use std::fmt; | |||
2 | use std::ops; | 2 | use std::ops; |
3 | 3 | ||
4 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | 4 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] |
5 | pub struct TextUnit( | 5 | pub struct TextUnit(u32); |
6 | pub(crate) u32 | ||
7 | ); | ||
8 | 6 | ||
9 | impl TextUnit { | 7 | impl TextUnit { |
10 | pub fn len_of_char(c: char) -> TextUnit { | 8 | pub fn len_of_char(c: char) -> TextUnit { |
diff --git a/tests/data/lexer/0001_hello.txt b/tests/data/lexer/0001_hello.txt index e0b6a1f10..27a5940a9 100644 --- a/tests/data/lexer/0001_hello.txt +++ b/tests/data/lexer/0001_hello.txt | |||
@@ -1,3 +1,3 @@ | |||
1 | IDENT 5 | 1 | IDENT 5 "hello" |
2 | WHITESPACE 1 | 2 | WHITESPACE 1 " " |
3 | IDENT 5 | 3 | IDENT 5 "world" |
diff --git a/tests/data/lexer/0002_whitespace.txt b/tests/data/lexer/0002_whitespace.txt index 4b9885e4a..01d260918 100644 --- a/tests/data/lexer/0002_whitespace.txt +++ b/tests/data/lexer/0002_whitespace.txt | |||
@@ -1,12 +1,12 @@ | |||
1 | IDENT 1 | 1 | IDENT 1 "a" |
2 | WHITESPACE 1 | 2 | WHITESPACE 1 " " |
3 | IDENT 1 | 3 | IDENT 1 "b" |
4 | WHITESPACE 2 | 4 | WHITESPACE 2 " " |
5 | IDENT 1 | 5 | IDENT 1 "c" |
6 | WHITESPACE 1 | 6 | WHITESPACE 1 "\n" |
7 | IDENT 1 | 7 | IDENT 1 "d" |
8 | WHITESPACE 2 | 8 | WHITESPACE 2 "\n\n" |
9 | IDENT 1 | 9 | IDENT 1 "e" |
10 | WHITESPACE 1 | 10 | WHITESPACE 1 "\t" |
11 | IDENT 1 | 11 | IDENT 1 "f" |
12 | WHITESPACE 1 | 12 | WHITESPACE 1 "\n" |
diff --git a/tests/data/lexer/0003_ident.txt b/tests/data/lexer/0003_ident.txt index eec82fb91..4a0d5c053 100644 --- a/tests/data/lexer/0003_ident.txt +++ b/tests/data/lexer/0003_ident.txt | |||
@@ -1,14 +1,14 @@ | |||
1 | IDENT 3 | 1 | IDENT 3 "foo" |
2 | WHITESPACE 1 | 2 | WHITESPACE 1 " " |
3 | IDENT 4 | 3 | IDENT 4 "foo_" |
4 | WHITESPACE 1 | 4 | WHITESPACE 1 " " |
5 | IDENT 4 | 5 | IDENT 4 "_foo" |
6 | WHITESPACE 1 | 6 | WHITESPACE 1 " " |
7 | UNDERSCORE 1 | 7 | UNDERSCORE 1 "_" |
8 | WHITESPACE 1 | 8 | WHITESPACE 1 " " |
9 | IDENT 2 | 9 | IDENT 2 "__" |
10 | WHITESPACE 1 | 10 | WHITESPACE 1 " " |
11 | IDENT 1 | 11 | IDENT 1 "x" |
12 | WHITESPACE 1 | 12 | WHITESPACE 1 " " |
13 | IDENT 12 | 13 | IDENT 12 "привет" |
14 | WHITESPACE 1 | 14 | WHITESPACE 1 "\n" |
diff --git a/tests/data/lexer/0004_number.txt b/tests/data/lexer/0004_number.txt index e9ad8410d..7dedd2cac 100644 --- a/tests/data/lexer/0004_number.txt +++ b/tests/data/lexer/0004_number.txt | |||
@@ -1,62 +1,62 @@ | |||
1 | INT_NUMBER 1 | 1 | INT_NUMBER 1 "0" |
2 | WHITESPACE 1 | 2 | WHITESPACE 1 " " |
3 | INT_NUMBER 2 | 3 | INT_NUMBER 2 "0b" |
4 | WHITESPACE 1 | 4 | WHITESPACE 1 " " |
5 | INT_NUMBER 2 | 5 | INT_NUMBER 2 "0o" |
6 | WHITESPACE 1 | 6 | WHITESPACE 1 " " |
7 | INT_NUMBER 2 | 7 | INT_NUMBER 2 "0x" |
8 | WHITESPACE 1 | 8 | WHITESPACE 1 " " |
9 | INT_NUMBER 2 | 9 | INT_NUMBER 2 "00" |
10 | WHITESPACE 1 | 10 | WHITESPACE 1 " " |
11 | INT_NUMBER 2 | 11 | INT_NUMBER 2 "0_" |
12 | WHITESPACE 1 | 12 | WHITESPACE 1 " " |
13 | FLOAT_NUMBER 2 | 13 | FLOAT_NUMBER 2 "0." |
14 | WHITESPACE 1 | 14 | WHITESPACE 1 " " |
15 | INT_NUMBER 2 | 15 | INT_NUMBER 2 "0e" |
16 | WHITESPACE 1 | 16 | WHITESPACE 1 " " |
17 | INT_NUMBER 2 | 17 | INT_NUMBER 2 "0E" |
18 | WHITESPACE 1 | 18 | WHITESPACE 1 " " |
19 | INT_NUMBER 1 | 19 | INT_NUMBER 1 "0" |
20 | IDENT 1 | 20 | IDENT 1 "z" |
21 | WHITESPACE 1 | 21 | WHITESPACE 1 "\n" |
22 | INT_NUMBER 5 | 22 | INT_NUMBER 5 "01790" |
23 | WHITESPACE 1 | 23 | WHITESPACE 1 " " |
24 | INT_NUMBER 6 | 24 | INT_NUMBER 6 "0b1790" |
25 | WHITESPACE 1 | 25 | WHITESPACE 1 " " |
26 | INT_NUMBER 6 | 26 | INT_NUMBER 6 "0o1790" |
27 | WHITESPACE 1 | 27 | WHITESPACE 1 " " |
28 | INT_NUMBER 18 | 28 | INT_NUMBER 18 "0x1790aAbBcCdDeEfF" |
29 | WHITESPACE 1 | 29 | WHITESPACE 1 " " |
30 | INT_NUMBER 6 | 30 | INT_NUMBER 6 "001279" |
31 | WHITESPACE 1 | 31 | WHITESPACE 1 " " |
32 | INT_NUMBER 6 | 32 | INT_NUMBER 6 "0_1279" |
33 | WHITESPACE 1 | 33 | WHITESPACE 1 " " |
34 | FLOAT_NUMBER 6 | 34 | FLOAT_NUMBER 6 "0.1279" |
35 | WHITESPACE 1 | 35 | WHITESPACE 1 " " |
36 | INT_NUMBER 6 | 36 | INT_NUMBER 6 "0e1279" |
37 | WHITESPACE 1 | 37 | WHITESPACE 1 " " |
38 | INT_NUMBER 6 | 38 | INT_NUMBER 6 "0E1279" |
39 | WHITESPACE 1 | 39 | WHITESPACE 1 "\n" |
40 | INT_NUMBER 1 | 40 | INT_NUMBER 1 "0" |
41 | ERROR 1 | 41 | ERROR 1 "." |
42 | ERROR 1 | 42 | ERROR 1 "." |
43 | INT_NUMBER 1 | 43 | INT_NUMBER 1 "2" |
44 | WHITESPACE 1 | 44 | WHITESPACE 1 "\n" |
45 | INT_NUMBER 1 | 45 | INT_NUMBER 1 "0" |
46 | ERROR 1 | 46 | ERROR 1 "." |
47 | IDENT 3 | 47 | IDENT 3 "foo" |
48 | ERROR 1 | 48 | ERROR 1 "(" |
49 | ERROR 1 | 49 | ERROR 1 ")" |
50 | WHITESPACE 1 | 50 | WHITESPACE 1 "\n" |
51 | INT_NUMBER 2 | 51 | INT_NUMBER 2 "0e" |
52 | ERROR 1 | 52 | ERROR 1 "+" |
53 | INT_NUMBER 1 | 53 | INT_NUMBER 1 "1" |
54 | WHITESPACE 1 | 54 | WHITESPACE 1 "\n" |
55 | INT_NUMBER 1 | 55 | INT_NUMBER 1 "0" |
56 | ERROR 1 | 56 | ERROR 1 "." |
57 | IDENT 1 | 57 | IDENT 1 "e" |
58 | ERROR 1 | 58 | ERROR 1 "+" |
59 | INT_NUMBER 1 | 59 | INT_NUMBER 1 "1" |
60 | WHITESPACE 1 | 60 | WHITESPACE 1 "\n" |
61 | FLOAT_NUMBER 6 | 61 | FLOAT_NUMBER 6 "0.0E-2" |
62 | WHITESPACE 1 | 62 | WHITESPACE 1 "\n" |
diff --git a/tests/lexer.rs b/tests/lexer.rs index a3c8916b1..6a9bab66b 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs | |||
@@ -31,6 +31,7 @@ fn lexer_test_cases() -> Vec<PathBuf> { | |||
31 | acc.push(path); | 31 | acc.push(path); |
32 | } | 32 | } |
33 | } | 33 | } |
34 | acc.sort(); | ||
34 | acc | 35 | acc |
35 | } | 36 | } |
36 | 37 | ||
@@ -38,7 +39,7 @@ fn lexer_test_case(path: &Path) { | |||
38 | let actual = { | 39 | let actual = { |
39 | let text = file::get_text(path).unwrap(); | 40 | let text = file::get_text(path).unwrap(); |
40 | let tokens = tokenize(&text); | 41 | let tokens = tokenize(&text); |
41 | dump_tokens(&tokens) | 42 | dump_tokens(&tokens, &text) |
42 | }; | 43 | }; |
43 | let expected = file::get_text(&path.with_extension("txt")).unwrap(); | 44 | let expected = file::get_text(&path.with_extension("txt")).unwrap(); |
44 | let expected = expected.as_str(); | 45 | let expected = expected.as_str(); |
@@ -64,10 +65,15 @@ fn tokenize(text: &str) -> Vec<Token> { | |||
64 | acc | 65 | acc |
65 | } | 66 | } |
66 | 67 | ||
67 | fn dump_tokens(tokens: &[Token]) -> String { | 68 | fn dump_tokens(tokens: &[Token], text: &str) -> String { |
68 | let mut acc = String::new(); | 69 | let mut acc = String::new(); |
70 | let mut offset = 0; | ||
69 | for token in tokens { | 71 | for token in tokens { |
70 | write!(acc, "{:?} {}\n", token.kind, token.len).unwrap() | 72 | let len: u32 = token.len.into(); |
73 | let len = len as usize; | ||
74 | let token_text = &text[offset..offset + len]; | ||
75 | offset += len; | ||
76 | write!(acc, "{:?} {} {:?}\n", token.kind, token.len, token_text).unwrap() | ||
71 | } | 77 | } |
72 | acc | 78 | acc |
73 | } \ No newline at end of file | 79 | } \ No newline at end of file |