diff options
author | Aleksey Kladov <[email protected]> | 2018-01-01 15:58:46 +0000 |
---|---|---|
committer | Aleksey Kladov <[email protected]> | 2018-01-01 15:58:46 +0000 |
commit | cb362626f326a565aca34c1a11c95dcb7152b798 (patch) | |
tree | 5a1cc081e36b4061f8e9275db9bf14ed71e924f9 | |
parent | 0af33a2587e4fb96e5001492792f1e926d576e27 (diff) |
Parser: guess what? Groundwork!
-rw-r--r-- | grammar.ron | 10 | ||||
-rw-r--r-- | src/bin/gen.rs | 19 | ||||
-rw-r--r-- | src/lexer/mod.rs | 3 | ||||
-rw-r--r-- | src/lexer/ptr.rs | 5 | ||||
-rw-r--r-- | src/parser/event_parser/grammar.rs | 62 | ||||
-rw-r--r-- | src/parser/event_parser/parser.rs | 15 | ||||
-rw-r--r-- | src/syntax_kinds.rs | 130 | ||||
-rw-r--r-- | tests/data/lexer/0011_keywords.rs | 1 | ||||
-rw-r--r-- | tests/data/lexer/0011_keywords.txt | 12 |
9 files changed, 199 insertions, 58 deletions
diff --git a/grammar.ron b/grammar.ron index 439c4ef9c..fb2c6d90e 100644 --- a/grammar.ron +++ b/grammar.ron | |||
@@ -1,4 +1,12 @@ | |||
1 | Grammar( | 1 | Grammar( |
2 | keywords: [ | ||
3 | "use", | ||
4 | "fn", | ||
5 | "struct", | ||
6 | "enum", | ||
7 | "trait", | ||
8 | "impl", | ||
9 | ], | ||
2 | tokens: [ | 10 | tokens: [ |
3 | "ERROR", | 11 | "ERROR", |
4 | "IDENT", | 12 | "IDENT", |
@@ -53,6 +61,6 @@ Grammar( | |||
53 | "SHEBANG", | 61 | "SHEBANG", |
54 | ], | 62 | ], |
55 | nodes: [ | 63 | nodes: [ |
56 | "FILE" | 64 | "FILE", |
57 | ] | 65 | ] |
58 | ) \ No newline at end of file | 66 | ) \ No newline at end of file |
diff --git a/src/bin/gen.rs b/src/bin/gen.rs index f5a66d9f2..9d7f7e389 100644 --- a/src/bin/gen.rs +++ b/src/bin/gen.rs | |||
@@ -17,6 +17,7 @@ fn main() { | |||
17 | 17 | ||
18 | #[derive(Deserialize)] | 18 | #[derive(Deserialize)] |
19 | struct Grammar { | 19 | struct Grammar { |
20 | keywords: Vec<String>, | ||
20 | tokens: Vec<String>, | 21 | tokens: Vec<String>, |
21 | nodes: Vec<String>, | 22 | nodes: Vec<String>, |
22 | } | 23 | } |
@@ -33,8 +34,10 @@ impl Grammar { | |||
33 | acc.push_str("use tree::{SyntaxKind, SyntaxInfo};\n"); | 34 | acc.push_str("use tree::{SyntaxKind, SyntaxInfo};\n"); |
34 | acc.push_str("\n"); | 35 | acc.push_str("\n"); |
35 | 36 | ||
36 | let syntax_kinds: Vec<&String> = | 37 | let syntax_kinds: Vec<String> = |
37 | self.tokens.iter().chain(self.nodes.iter()) | 38 | self.keywords.iter().map(|kw| kw_token(kw)) |
39 | .chain(self.tokens.iter().cloned()) | ||
40 | .chain(self.nodes.iter().cloned()) | ||
38 | .collect(); | 41 | .collect(); |
39 | 42 | ||
40 | for (idx, kind) in syntax_kinds.iter().enumerate() { | 43 | for (idx, kind) in syntax_kinds.iter().enumerate() { |
@@ -60,6 +63,14 @@ impl Grammar { | |||
60 | 63 | ||
61 | acc.push_str("pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {\n"); | 64 | acc.push_str("pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {\n"); |
62 | acc.push_str(" &INFOS[kind.0 as usize]\n"); | 65 | acc.push_str(" &INFOS[kind.0 as usize]\n"); |
66 | acc.push_str("}\n\n"); | ||
67 | acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {\n"); | ||
68 | acc.push_str(" match ident {\n"); | ||
69 | for kw in self.keywords.iter() { | ||
70 | write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap(); | ||
71 | } | ||
72 | acc.push_str(" _ => None,\n"); | ||
73 | acc.push_str(" }\n"); | ||
63 | acc.push_str("}\n"); | 74 | acc.push_str("}\n"); |
64 | acc | 75 | acc |
65 | } | 76 | } |
@@ -77,4 +88,8 @@ fn generated_file() -> PathBuf { | |||
77 | 88 | ||
78 | fn scream(word: &str) -> String { | 89 | fn scream(word: &str) -> String { |
79 | word.chars().map(|c| c.to_ascii_uppercase()).collect() | 90 | word.chars().map(|c| c.to_ascii_uppercase()).collect() |
91 | } | ||
92 | |||
93 | fn kw_token(keyword: &str) -> String { | ||
94 | format!("{}_KW", scream(keyword)) | ||
80 | } \ No newline at end of file | 95 | } \ No newline at end of file |
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 7c4259763..bc5344b5f 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs | |||
@@ -187,6 +187,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { | |||
187 | return if c == '_' { UNDERSCORE } else { IDENT }; | 187 | return if c == '_' { UNDERSCORE } else { IDENT }; |
188 | } | 188 | } |
189 | ptr.bump_while(is_ident_continue); | 189 | ptr.bump_while(is_ident_continue); |
190 | if let Some(kind) = ident_to_keyword(ptr.current_token_text()) { | ||
191 | return kind; | ||
192 | } | ||
190 | IDENT | 193 | IDENT |
191 | } | 194 | } |
192 | 195 | ||
diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs index 2f759119a..ff6ef11fc 100644 --- a/src/lexer/ptr.rs +++ b/src/lexer/ptr.rs | |||
@@ -59,6 +59,11 @@ impl<'s> Ptr<'s> { | |||
59 | } | 59 | } |
60 | } | 60 | } |
61 | 61 | ||
62 | pub fn current_token_text(&self) -> &str { | ||
63 | let len: u32 = self.len.into(); | ||
64 | &self.text[..len as usize] | ||
65 | } | ||
66 | |||
62 | fn chars(&self) -> Chars { | 67 | fn chars(&self) -> Chars { |
63 | let len: u32 = self.len.into(); | 68 | let len: u32 = self.len.into(); |
64 | self.text[len as usize ..].chars() | 69 | self.text[len as usize ..].chars() |
diff --git a/src/parser/event_parser/grammar.rs b/src/parser/event_parser/grammar.rs index c3496cccd..5219ed535 100644 --- a/src/parser/event_parser/grammar.rs +++ b/src/parser/event_parser/grammar.rs | |||
@@ -3,8 +3,68 @@ use super::parser::Parser; | |||
3 | 3 | ||
4 | use syntax_kinds::*; | 4 | use syntax_kinds::*; |
5 | 5 | ||
6 | // Items // | ||
7 | |||
6 | pub fn file(p: &mut Parser) { | 8 | pub fn file(p: &mut Parser) { |
7 | p.start(FILE); | 9 | p.start(FILE); |
8 | //TODO: parse_shebang | 10 | shebang(p); |
11 | inner_attributes(p); | ||
12 | mod_items(p); | ||
13 | p.finish(); | ||
14 | } | ||
15 | |||
16 | type Result = ::std::result::Result<(), ()>; | ||
17 | const OK: Result = Ok(()); | ||
18 | const ERR: Result = Err(()); | ||
19 | |||
20 | fn shebang(_: &mut Parser) { | ||
21 | //TODO | ||
22 | } | ||
23 | |||
24 | fn inner_attributes(_: &mut Parser) { | ||
25 | //TODO | ||
26 | } | ||
27 | |||
28 | fn mod_items(p: &mut Parser) { | ||
29 | loop { | ||
30 | skip_until_item(p); | ||
31 | if p.is_eof() { | ||
32 | return; | ||
33 | } | ||
34 | if item(p).is_err() { | ||
35 | skip_one_token(p); | ||
36 | } | ||
37 | } | ||
38 | } | ||
39 | |||
40 | fn item(p: &mut Parser) -> Result { | ||
41 | outer_attributes(p)?; | ||
42 | visibility(p)?; | ||
43 | ERR | ||
44 | } | ||
45 | |||
46 | |||
47 | |||
48 | // Paths, types, attributes, and stuff // | ||
49 | |||
50 | fn outer_attributes(_: &mut Parser) -> Result { | ||
51 | OK | ||
52 | } | ||
53 | |||
54 | fn visibility(_: &mut Parser) -> Result { | ||
55 | OK | ||
56 | } | ||
57 | |||
58 | // Expressions // | ||
59 | |||
60 | // Error recovery and high-order utils // | ||
61 | |||
62 | fn skip_until_item(_: &mut Parser) { | ||
63 | //TODO | ||
64 | } | ||
65 | |||
66 | fn skip_one_token(p: &mut Parser) { | ||
67 | p.start(ERROR); | ||
68 | p.bump().unwrap(); | ||
9 | p.finish(); | 69 | p.finish(); |
10 | } \ No newline at end of file | 70 | } \ No newline at end of file |
diff --git a/src/parser/event_parser/parser.rs b/src/parser/event_parser/parser.rs index 9592b90c9..0e4d44b79 100644 --- a/src/parser/event_parser/parser.rs +++ b/src/parser/event_parser/parser.rs | |||
@@ -34,10 +34,14 @@ impl<'t> Parser<'t> { | |||
34 | } | 34 | } |
35 | 35 | ||
36 | pub(crate) fn into_events(self) -> Vec<Event> { | 36 | pub(crate) fn into_events(self) -> Vec<Event> { |
37 | assert!(self.pos == self.non_ws_tokens.len()); | 37 | assert!(self.is_eof()); |
38 | self.events | 38 | self.events |
39 | } | 39 | } |
40 | 40 | ||
41 | pub(crate) fn is_eof(&self) -> bool { | ||
42 | self.pos == self.non_ws_tokens.len() | ||
43 | } | ||
44 | |||
41 | pub(crate) fn start(&mut self, kind: SyntaxKind) { | 45 | pub(crate) fn start(&mut self, kind: SyntaxKind) { |
42 | self.event(Event::Start { kind }); | 46 | self.event(Event::Start { kind }); |
43 | } | 47 | } |
@@ -46,6 +50,15 @@ impl<'t> Parser<'t> { | |||
46 | self.event(Event::Finish); | 50 | self.event(Event::Finish); |
47 | } | 51 | } |
48 | 52 | ||
53 | pub(crate) fn bump(&mut self) -> Option<SyntaxKind> { | ||
54 | if self.is_eof() { | ||
55 | return None; | ||
56 | } | ||
57 | let idx = self.non_ws_tokens[self.pos].0; | ||
58 | self.pos += 1; | ||
59 | Some(self.raw_tokens[idx].kind) | ||
60 | } | ||
61 | |||
49 | fn event(&mut self, event: Event) { | 62 | fn event(&mut self, event: Event) { |
50 | self.events.push(event) | 63 | self.events.push(event) |
51 | } | 64 | } |
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs index b83f48dd8..a1bcad062 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds.rs | |||
@@ -1,60 +1,72 @@ | |||
1 | // Generated from grammar.ron | 1 | // Generated from grammar.ron |
2 | use tree::{SyntaxKind, SyntaxInfo}; | 2 | use tree::{SyntaxKind, SyntaxInfo}; |
3 | 3 | ||
4 | pub const ERROR: SyntaxKind = SyntaxKind(0); | 4 | pub const USE_KW: SyntaxKind = SyntaxKind(0); |
5 | pub const IDENT: SyntaxKind = SyntaxKind(1); | 5 | pub const FN_KW: SyntaxKind = SyntaxKind(1); |
6 | pub const UNDERSCORE: SyntaxKind = SyntaxKind(2); | 6 | pub const STRUCT_KW: SyntaxKind = SyntaxKind(2); |
7 | pub const WHITESPACE: SyntaxKind = SyntaxKind(3); | 7 | pub const ENUM_KW: SyntaxKind = SyntaxKind(3); |
8 | pub const INT_NUMBER: SyntaxKind = SyntaxKind(4); | 8 | pub const TRAIT_KW: SyntaxKind = SyntaxKind(4); |
9 | pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5); | 9 | pub const IMPL_KW: SyntaxKind = SyntaxKind(5); |
10 | pub const SEMI: SyntaxKind = SyntaxKind(6); | 10 | pub const ERROR: SyntaxKind = SyntaxKind(6); |
11 | pub const COMMA: SyntaxKind = SyntaxKind(7); | 11 | pub const IDENT: SyntaxKind = SyntaxKind(7); |
12 | pub const DOT: SyntaxKind = SyntaxKind(8); | 12 | pub const UNDERSCORE: SyntaxKind = SyntaxKind(8); |
13 | pub const DOTDOT: SyntaxKind = SyntaxKind(9); | 13 | pub const WHITESPACE: SyntaxKind = SyntaxKind(9); |
14 | pub const DOTDOTDOT: SyntaxKind = SyntaxKind(10); | 14 | pub const INT_NUMBER: SyntaxKind = SyntaxKind(10); |
15 | pub const DOTDOTEQ: SyntaxKind = SyntaxKind(11); | 15 | pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(11); |
16 | pub const L_PAREN: SyntaxKind = SyntaxKind(12); | 16 | pub const SEMI: SyntaxKind = SyntaxKind(12); |
17 | pub const R_PAREN: SyntaxKind = SyntaxKind(13); | 17 | pub const COMMA: SyntaxKind = SyntaxKind(13); |
18 | pub const L_CURLY: SyntaxKind = SyntaxKind(14); | 18 | pub const DOT: SyntaxKind = SyntaxKind(14); |
19 | pub const R_CURLY: SyntaxKind = SyntaxKind(15); | 19 | pub const DOTDOT: SyntaxKind = SyntaxKind(15); |
20 | pub const L_BRACK: SyntaxKind = SyntaxKind(16); | 20 | pub const DOTDOTDOT: SyntaxKind = SyntaxKind(16); |
21 | pub const R_BRACK: SyntaxKind = SyntaxKind(17); | 21 | pub const DOTDOTEQ: SyntaxKind = SyntaxKind(17); |
22 | pub const L_ANGLE: SyntaxKind = SyntaxKind(18); | 22 | pub const L_PAREN: SyntaxKind = SyntaxKind(18); |
23 | pub const R_ANGLE: SyntaxKind = SyntaxKind(19); | 23 | pub const R_PAREN: SyntaxKind = SyntaxKind(19); |
24 | pub const AT: SyntaxKind = SyntaxKind(20); | 24 | pub const L_CURLY: SyntaxKind = SyntaxKind(20); |
25 | pub const POUND: SyntaxKind = SyntaxKind(21); | 25 | pub const R_CURLY: SyntaxKind = SyntaxKind(21); |
26 | pub const TILDE: SyntaxKind = SyntaxKind(22); | 26 | pub const L_BRACK: SyntaxKind = SyntaxKind(22); |
27 | pub const QUESTION: SyntaxKind = SyntaxKind(23); | 27 | pub const R_BRACK: SyntaxKind = SyntaxKind(23); |
28 | pub const COLON: SyntaxKind = SyntaxKind(24); | 28 | pub const L_ANGLE: SyntaxKind = SyntaxKind(24); |
29 | pub const COLONCOLON: SyntaxKind = SyntaxKind(25); | 29 | pub const R_ANGLE: SyntaxKind = SyntaxKind(25); |
30 | pub const DOLLAR: SyntaxKind = SyntaxKind(26); | 30 | pub const AT: SyntaxKind = SyntaxKind(26); |
31 | pub const EQ: SyntaxKind = SyntaxKind(27); | 31 | pub const POUND: SyntaxKind = SyntaxKind(27); |
32 | pub const EQEQ: SyntaxKind = SyntaxKind(28); | 32 | pub const TILDE: SyntaxKind = SyntaxKind(28); |
33 | pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); | 33 | pub const QUESTION: SyntaxKind = SyntaxKind(29); |
34 | pub const NEQ: SyntaxKind = SyntaxKind(30); | 34 | pub const COLON: SyntaxKind = SyntaxKind(30); |
35 | pub const NOT: SyntaxKind = SyntaxKind(31); | 35 | pub const COLONCOLON: SyntaxKind = SyntaxKind(31); |
36 | pub const LIFETIME: SyntaxKind = SyntaxKind(32); | 36 | pub const DOLLAR: SyntaxKind = SyntaxKind(32); |
37 | pub const CHAR: SyntaxKind = SyntaxKind(33); | 37 | pub const EQ: SyntaxKind = SyntaxKind(33); |
38 | pub const BYTE: SyntaxKind = SyntaxKind(34); | 38 | pub const EQEQ: SyntaxKind = SyntaxKind(34); |
39 | pub const STRING: SyntaxKind = SyntaxKind(35); | 39 | pub const FAT_ARROW: SyntaxKind = SyntaxKind(35); |
40 | pub const RAW_STRING: SyntaxKind = SyntaxKind(36); | 40 | pub const NEQ: SyntaxKind = SyntaxKind(36); |
41 | pub const BYTE_STRING: SyntaxKind = SyntaxKind(37); | 41 | pub const NOT: SyntaxKind = SyntaxKind(37); |
42 | pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(38); | 42 | pub const LIFETIME: SyntaxKind = SyntaxKind(38); |
43 | pub const PLUS: SyntaxKind = SyntaxKind(39); | 43 | pub const CHAR: SyntaxKind = SyntaxKind(39); |
44 | pub const MINUS: SyntaxKind = SyntaxKind(40); | 44 | pub const BYTE: SyntaxKind = SyntaxKind(40); |
45 | pub const STAR: SyntaxKind = SyntaxKind(41); | 45 | pub const STRING: SyntaxKind = SyntaxKind(41); |
46 | pub const SLASH: SyntaxKind = SyntaxKind(42); | 46 | pub const RAW_STRING: SyntaxKind = SyntaxKind(42); |
47 | pub const CARET: SyntaxKind = SyntaxKind(43); | 47 | pub const BYTE_STRING: SyntaxKind = SyntaxKind(43); |
48 | pub const PERCENT: SyntaxKind = SyntaxKind(44); | 48 | pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(44); |
49 | pub const AMPERSAND: SyntaxKind = SyntaxKind(45); | 49 | pub const PLUS: SyntaxKind = SyntaxKind(45); |
50 | pub const PIPE: SyntaxKind = SyntaxKind(46); | 50 | pub const MINUS: SyntaxKind = SyntaxKind(46); |
51 | pub const THIN_ARROW: SyntaxKind = SyntaxKind(47); | 51 | pub const STAR: SyntaxKind = SyntaxKind(47); |
52 | pub const COMMENT: SyntaxKind = SyntaxKind(48); | 52 | pub const SLASH: SyntaxKind = SyntaxKind(48); |
53 | pub const DOC_COMMENT: SyntaxKind = SyntaxKind(49); | 53 | pub const CARET: SyntaxKind = SyntaxKind(49); |
54 | pub const SHEBANG: SyntaxKind = SyntaxKind(50); | 54 | pub const PERCENT: SyntaxKind = SyntaxKind(50); |
55 | pub const FILE: SyntaxKind = SyntaxKind(51); | 55 | pub const AMPERSAND: SyntaxKind = SyntaxKind(51); |
56 | pub const PIPE: SyntaxKind = SyntaxKind(52); | ||
57 | pub const THIN_ARROW: SyntaxKind = SyntaxKind(53); | ||
58 | pub const COMMENT: SyntaxKind = SyntaxKind(54); | ||
59 | pub const DOC_COMMENT: SyntaxKind = SyntaxKind(55); | ||
60 | pub const SHEBANG: SyntaxKind = SyntaxKind(56); | ||
61 | pub const FILE: SyntaxKind = SyntaxKind(57); | ||
56 | 62 | ||
57 | static INFOS: [SyntaxInfo; 52] = [ | 63 | static INFOS: [SyntaxInfo; 58] = [ |
64 | SyntaxInfo { name: "USE_KW" }, | ||
65 | SyntaxInfo { name: "FN_KW" }, | ||
66 | SyntaxInfo { name: "STRUCT_KW" }, | ||
67 | SyntaxInfo { name: "ENUM_KW" }, | ||
68 | SyntaxInfo { name: "TRAIT_KW" }, | ||
69 | SyntaxInfo { name: "IMPL_KW" }, | ||
58 | SyntaxInfo { name: "ERROR" }, | 70 | SyntaxInfo { name: "ERROR" }, |
59 | SyntaxInfo { name: "IDENT" }, | 71 | SyntaxInfo { name: "IDENT" }, |
60 | SyntaxInfo { name: "UNDERSCORE" }, | 72 | SyntaxInfo { name: "UNDERSCORE" }, |
@@ -112,3 +124,15 @@ static INFOS: [SyntaxInfo; 52] = [ | |||
112 | pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { | 124 | pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { |
113 | &INFOS[kind.0 as usize] | 125 | &INFOS[kind.0 as usize] |
114 | } | 126 | } |
127 | |||
128 | pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> { | ||
129 | match ident { | ||
130 | "use" => Some(USE_KW), | ||
131 | "fn" => Some(FN_KW), | ||
132 | "struct" => Some(STRUCT_KW), | ||
133 | "enum" => Some(ENUM_KW), | ||
134 | "trait" => Some(TRAIT_KW), | ||
135 | "impl" => Some(IMPL_KW), | ||
136 | _ => None, | ||
137 | } | ||
138 | } | ||
diff --git a/tests/data/lexer/0011_keywords.rs b/tests/data/lexer/0011_keywords.rs new file mode 100644 index 000000000..aa89d70c5 --- /dev/null +++ b/tests/data/lexer/0011_keywords.rs | |||
@@ -0,0 +1 @@ | |||
fn use struct trait enum impl | |||
diff --git a/tests/data/lexer/0011_keywords.txt b/tests/data/lexer/0011_keywords.txt new file mode 100644 index 000000000..d90047d1e --- /dev/null +++ b/tests/data/lexer/0011_keywords.txt | |||
@@ -0,0 +1,12 @@ | |||
1 | FN_KW 2 "fn" | ||
2 | WHITESPACE 1 " " | ||
3 | USE_KW 3 "use" | ||
4 | WHITESPACE 1 " " | ||
5 | STRUCT_KW 6 "struct" | ||
6 | WHITESPACE 1 " " | ||
7 | TRAIT_KW 5 "trait" | ||
8 | WHITESPACE 1 " " | ||
9 | ENUM_KW 4 "enum" | ||
10 | WHITESPACE 1 " " | ||
11 | IMPL_KW 4 "impl" | ||
12 | WHITESPACE 1 "\n" | ||