diff options
author | Aleksey Kladov <[email protected]> | 2017-12-28 21:56:36 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2017-12-28 21:56:36 +0000 |
commit | 5e1e8ed34a46738dda507a4a0f4e73065be74e57 (patch) | |
tree | 5191bbb793ce1a563026a9360380cbb207b43b9b | |
parent | 45fce4b3ef53b0ffc78aac7064701f1f31792a5a (diff) |
Lexer scaffold
-rw-r--r-- | src/lexer.rs | 10 | ||||
-rw-r--r-- | src/lib.rs | 15 | ||||
-rw-r--r-- | src/syntax_kinds.rs | 16 | ||||
-rw-r--r-- | src/text.rs | 32 | ||||
-rw-r--r-- | src/tree.rs | 31 | ||||
-rw-r--r-- | tests/lexer.rs | 24 |
6 files changed, 117 insertions, 11 deletions
diff --git a/src/lexer.rs b/src/lexer.rs new file mode 100644 index 000000000..cda9fe2b2 --- /dev/null +++ b/src/lexer.rs | |||
@@ -0,0 +1,10 @@ | |||
1 | use {Token, TextUnit}; | ||
2 | use syntax_kinds::*; | ||
3 | |||
4 | pub fn next_token(text: &str) -> Token { | ||
5 | let c = text.chars().next().unwrap(); | ||
6 | Token { | ||
7 | kind: IDENT, | ||
8 | len: TextUnit::len_of_char(c), | ||
9 | } | ||
10 | } \ No newline at end of file | ||
diff --git a/src/lib.rs b/src/lib.rs index 31e1bb209..4385c0325 100644 --- a/src/lib.rs +++ b/src/lib.rs | |||
@@ -1,7 +1,8 @@ | |||
1 | #[cfg(test)] | 1 | mod text; |
2 | mod tests { | 2 | mod tree; |
3 | #[test] | 3 | mod lexer; |
4 | fn it_works() { | 4 | |
5 | assert_eq!(2 + 2, 4); | 5 | pub mod syntax_kinds; |
6 | } | 6 | pub use text::TextUnit; |
7 | } | 7 | pub use tree::{SyntaxKind, Token}; |
8 | pub use lexer::next_token; | ||
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs new file mode 100644 index 000000000..18574b7f5 --- /dev/null +++ b/src/syntax_kinds.rs | |||
@@ -0,0 +1,16 @@ | |||
1 | use tree::{SyntaxKind, SyntaxInfo}; | ||
2 | |||
3 | pub const IDENT: SyntaxKind = SyntaxKind(1); | ||
4 | pub const WHITESPACE: SyntaxKind = SyntaxKind(2); | ||
5 | |||
6 | |||
7 | static IDENT_INFO: SyntaxInfo = SyntaxInfo { | ||
8 | name: "IDENT", | ||
9 | }; | ||
10 | |||
11 | pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { | ||
12 | match kind { | ||
13 | IDENT => &IDENT_INFO, | ||
14 | _ => unreachable!(), | ||
15 | } | ||
16 | } \ No newline at end of file | ||
diff --git a/src/text.rs b/src/text.rs new file mode 100644 index 000000000..5297275ed --- /dev/null +++ b/src/text.rs | |||
@@ -0,0 +1,32 @@ | |||
1 | use std::fmt; | ||
2 | |||
3 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
4 | pub struct TextUnit(u32); | ||
5 | |||
6 | impl TextUnit { | ||
7 | pub fn len_of_char(c: char) -> TextUnit { | ||
8 | TextUnit(c.len_utf8() as u32) | ||
9 | } | ||
10 | |||
11 | pub fn new(val: u32) -> TextUnit { | ||
12 | TextUnit(val) | ||
13 | } | ||
14 | } | ||
15 | |||
16 | impl fmt::Debug for TextUnit { | ||
17 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
18 | <Self as fmt::Display>::fmt(self, f) | ||
19 | } | ||
20 | } | ||
21 | |||
22 | impl fmt::Display for TextUnit { | ||
23 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
24 | self.0.fmt(f) | ||
25 | } | ||
26 | } | ||
27 | |||
28 | impl From<TextUnit> for u32 { | ||
29 | fn from(tu: TextUnit) -> u32 { | ||
30 | tu.0 | ||
31 | } | ||
32 | } | ||
diff --git a/src/tree.rs b/src/tree.rs new file mode 100644 index 000000000..0924f38d0 --- /dev/null +++ b/src/tree.rs | |||
@@ -0,0 +1,31 @@ | |||
1 | use text::{TextUnit}; | ||
2 | use syntax_kinds::syntax_info; | ||
3 | |||
4 | use std::fmt; | ||
5 | |||
6 | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
7 | pub struct SyntaxKind(pub(crate) u32); | ||
8 | |||
9 | impl SyntaxKind { | ||
10 | fn info(self) -> &'static SyntaxInfo { | ||
11 | syntax_info(self) | ||
12 | } | ||
13 | } | ||
14 | |||
15 | impl fmt::Debug for SyntaxKind { | ||
16 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||
17 | let name = self.info().name; | ||
18 | f.write_str(name) | ||
19 | } | ||
20 | } | ||
21 | |||
22 | |||
23 | pub(crate) struct SyntaxInfo { | ||
24 | pub name: &'static str, | ||
25 | } | ||
26 | |||
27 | #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] | ||
28 | pub struct Token { | ||
29 | pub kind: SyntaxKind, | ||
30 | pub len: TextUnit, | ||
31 | } \ No newline at end of file | ||
diff --git a/tests/lexer.rs b/tests/lexer.rs index de76f0a15..a27e7c395 100644 --- a/tests/lexer.rs +++ b/tests/lexer.rs | |||
@@ -1,9 +1,13 @@ | |||
1 | extern crate file; | 1 | extern crate file; |
2 | #[macro_use(assert_diff)] | 2 | #[macro_use(assert_diff)] |
3 | extern crate difference; | 3 | extern crate difference; |
4 | extern crate libsyntax2; | ||
4 | 5 | ||
5 | use std::path::{PathBuf, Path}; | 6 | use std::path::{PathBuf, Path}; |
6 | use std::fs::read_dir; | 7 | use std::fs::read_dir; |
8 | use std::fmt::Write; | ||
9 | |||
10 | use libsyntax2::{Token, next_token}; | ||
7 | 11 | ||
8 | #[test] | 12 | #[test] |
9 | fn lexer_tests() { | 13 | fn lexer_tests() { |
@@ -46,10 +50,22 @@ fn lexer_test_case(path: &Path) { | |||
46 | ) | 50 | ) |
47 | } | 51 | } |
48 | 52 | ||
49 | fn tokenize(text: &str) -> Vec<()> { | 53 | fn tokenize(text: &str) -> Vec<Token> { |
50 | Vec::new() | 54 | let mut text = text; |
55 | let mut acc = Vec::new(); | ||
56 | while !text.is_empty() { | ||
57 | let token = next_token(text); | ||
58 | acc.push(token); | ||
59 | let len: u32 = token.len.into(); | ||
60 | text = &text[len as usize..]; | ||
61 | } | ||
62 | acc | ||
51 | } | 63 | } |
52 | 64 | ||
53 | fn dump_tokens(tokens: &[()]) -> String { | 65 | fn dump_tokens(tokens: &[Token]) -> String { |
54 | "IDENT 5\nKEYWORD 1\nIDENT 5\n".to_string() | 66 | let mut acc = String::new(); |
67 | for token in tokens { | ||
68 | write!(acc, "{:?} {}\n", token.kind, token.len).unwrap() | ||
69 | } | ||
70 | acc | ||
55 | } \ No newline at end of file | 71 | } \ No newline at end of file |