aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksey Kladov <[email protected]>2018-01-01 15:58:46 +0000
committerAleksey Kladov <[email protected]>2018-01-01 15:58:46 +0000
commitcb362626f326a565aca34c1a11c95dcb7152b798 (patch)
tree5a1cc081e36b4061f8e9275db9bf14ed71e924f9
parent0af33a2587e4fb96e5001492792f1e926d576e27 (diff)
Parser: guess what? Groundwork!
-rw-r--r--grammar.ron10
-rw-r--r--src/bin/gen.rs19
-rw-r--r--src/lexer/mod.rs3
-rw-r--r--src/lexer/ptr.rs5
-rw-r--r--src/parser/event_parser/grammar.rs62
-rw-r--r--src/parser/event_parser/parser.rs15
-rw-r--r--src/syntax_kinds.rs130
-rw-r--r--tests/data/lexer/0011_keywords.rs1
-rw-r--r--tests/data/lexer/0011_keywords.txt12
9 files changed, 199 insertions, 58 deletions
diff --git a/grammar.ron b/grammar.ron
index 439c4ef9c..fb2c6d90e 100644
--- a/grammar.ron
+++ b/grammar.ron
@@ -1,4 +1,12 @@
1Grammar( 1Grammar(
2 keywords: [
3 "use",
4 "fn",
5 "struct",
6 "enum",
7 "trait",
8 "impl",
9 ],
2 tokens: [ 10 tokens: [
3 "ERROR", 11 "ERROR",
4 "IDENT", 12 "IDENT",
@@ -53,6 +61,6 @@ Grammar(
53 "SHEBANG", 61 "SHEBANG",
54 ], 62 ],
55 nodes: [ 63 nodes: [
56 "FILE" 64 "FILE",
57 ] 65 ]
58) \ No newline at end of file 66) \ No newline at end of file
diff --git a/src/bin/gen.rs b/src/bin/gen.rs
index f5a66d9f2..9d7f7e389 100644
--- a/src/bin/gen.rs
+++ b/src/bin/gen.rs
@@ -17,6 +17,7 @@ fn main() {
17 17
18#[derive(Deserialize)] 18#[derive(Deserialize)]
19struct Grammar { 19struct Grammar {
20 keywords: Vec<String>,
20 tokens: Vec<String>, 21 tokens: Vec<String>,
21 nodes: Vec<String>, 22 nodes: Vec<String>,
22} 23}
@@ -33,8 +34,10 @@ impl Grammar {
33 acc.push_str("use tree::{SyntaxKind, SyntaxInfo};\n"); 34 acc.push_str("use tree::{SyntaxKind, SyntaxInfo};\n");
34 acc.push_str("\n"); 35 acc.push_str("\n");
35 36
36 let syntax_kinds: Vec<&String> = 37 let syntax_kinds: Vec<String> =
37 self.tokens.iter().chain(self.nodes.iter()) 38 self.keywords.iter().map(|kw| kw_token(kw))
39 .chain(self.tokens.iter().cloned())
40 .chain(self.nodes.iter().cloned())
38 .collect(); 41 .collect();
39 42
40 for (idx, kind) in syntax_kinds.iter().enumerate() { 43 for (idx, kind) in syntax_kinds.iter().enumerate() {
@@ -60,6 +63,14 @@ impl Grammar {
60 63
61 acc.push_str("pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {\n"); 64 acc.push_str("pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {\n");
62 acc.push_str(" &INFOS[kind.0 as usize]\n"); 65 acc.push_str(" &INFOS[kind.0 as usize]\n");
66 acc.push_str("}\n\n");
67 acc.push_str("pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {\n");
68 acc.push_str(" match ident {\n");
69 for kw in self.keywords.iter() {
70 write!(acc, " {:?} => Some({}),\n", kw, kw_token(kw)).unwrap();
71 }
72 acc.push_str(" _ => None,\n");
73 acc.push_str(" }\n");
63 acc.push_str("}\n"); 74 acc.push_str("}\n");
64 acc 75 acc
65 } 76 }
@@ -77,4 +88,8 @@ fn generated_file() -> PathBuf {
77 88
78fn scream(word: &str) -> String { 89fn scream(word: &str) -> String {
79 word.chars().map(|c| c.to_ascii_uppercase()).collect() 90 word.chars().map(|c| c.to_ascii_uppercase()).collect()
91}
92
93fn kw_token(keyword: &str) -> String {
94 format!("{}_KW", scream(keyword))
80} \ No newline at end of file 95} \ No newline at end of file
diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs
index 7c4259763..bc5344b5f 100644
--- a/src/lexer/mod.rs
+++ b/src/lexer/mod.rs
@@ -187,6 +187,9 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind {
187 return if c == '_' { UNDERSCORE } else { IDENT }; 187 return if c == '_' { UNDERSCORE } else { IDENT };
188 } 188 }
189 ptr.bump_while(is_ident_continue); 189 ptr.bump_while(is_ident_continue);
190 if let Some(kind) = ident_to_keyword(ptr.current_token_text()) {
191 return kind;
192 }
190 IDENT 193 IDENT
191} 194}
192 195
diff --git a/src/lexer/ptr.rs b/src/lexer/ptr.rs
index 2f759119a..ff6ef11fc 100644
--- a/src/lexer/ptr.rs
+++ b/src/lexer/ptr.rs
@@ -59,6 +59,11 @@ impl<'s> Ptr<'s> {
59 } 59 }
60 } 60 }
61 61
62 pub fn current_token_text(&self) -> &str {
63 let len: u32 = self.len.into();
64 &self.text[..len as usize]
65 }
66
62 fn chars(&self) -> Chars { 67 fn chars(&self) -> Chars {
63 let len: u32 = self.len.into(); 68 let len: u32 = self.len.into();
64 self.text[len as usize ..].chars() 69 self.text[len as usize ..].chars()
diff --git a/src/parser/event_parser/grammar.rs b/src/parser/event_parser/grammar.rs
index c3496cccd..5219ed535 100644
--- a/src/parser/event_parser/grammar.rs
+++ b/src/parser/event_parser/grammar.rs
@@ -3,8 +3,68 @@ use super::parser::Parser;
3 3
4use syntax_kinds::*; 4use syntax_kinds::*;
5 5
6// Items //
7
6pub fn file(p: &mut Parser) { 8pub fn file(p: &mut Parser) {
7 p.start(FILE); 9 p.start(FILE);
8 //TODO: parse_shebang 10 shebang(p);
11 inner_attributes(p);
12 mod_items(p);
13 p.finish();
14}
15
16type Result = ::std::result::Result<(), ()>;
17const OK: Result = Ok(());
18const ERR: Result = Err(());
19
20fn shebang(_: &mut Parser) {
21 //TODO
22}
23
24fn inner_attributes(_: &mut Parser) {
25 //TODO
26}
27
28fn mod_items(p: &mut Parser) {
29 loop {
30 skip_until_item(p);
31 if p.is_eof() {
32 return;
33 }
34 if item(p).is_err() {
35 skip_one_token(p);
36 }
37 }
38}
39
40fn item(p: &mut Parser) -> Result {
41 outer_attributes(p)?;
42 visibility(p)?;
43 ERR
44}
45
46
47
48// Paths, types, attributes, and stuff //
49
50fn outer_attributes(_: &mut Parser) -> Result {
51 OK
52}
53
54fn visibility(_: &mut Parser) -> Result {
55 OK
56}
57
58// Expressions //
59
60// Error recovery and high-order utils //
61
62fn skip_until_item(_: &mut Parser) {
63 //TODO
64}
65
66fn skip_one_token(p: &mut Parser) {
67 p.start(ERROR);
68 p.bump().unwrap();
9 p.finish(); 69 p.finish();
10} \ No newline at end of file 70} \ No newline at end of file
diff --git a/src/parser/event_parser/parser.rs b/src/parser/event_parser/parser.rs
index 9592b90c9..0e4d44b79 100644
--- a/src/parser/event_parser/parser.rs
+++ b/src/parser/event_parser/parser.rs
@@ -34,10 +34,14 @@ impl<'t> Parser<'t> {
34 } 34 }
35 35
36 pub(crate) fn into_events(self) -> Vec<Event> { 36 pub(crate) fn into_events(self) -> Vec<Event> {
37 assert!(self.pos == self.non_ws_tokens.len()); 37 assert!(self.is_eof());
38 self.events 38 self.events
39 } 39 }
40 40
41 pub(crate) fn is_eof(&self) -> bool {
42 self.pos == self.non_ws_tokens.len()
43 }
44
41 pub(crate) fn start(&mut self, kind: SyntaxKind) { 45 pub(crate) fn start(&mut self, kind: SyntaxKind) {
42 self.event(Event::Start { kind }); 46 self.event(Event::Start { kind });
43 } 47 }
@@ -46,6 +50,15 @@ impl<'t> Parser<'t> {
46 self.event(Event::Finish); 50 self.event(Event::Finish);
47 } 51 }
48 52
53 pub(crate) fn bump(&mut self) -> Option<SyntaxKind> {
54 if self.is_eof() {
55 return None;
56 }
57 let idx = self.non_ws_tokens[self.pos].0;
58 self.pos += 1;
59 Some(self.raw_tokens[idx].kind)
60 }
61
49 fn event(&mut self, event: Event) { 62 fn event(&mut self, event: Event) {
50 self.events.push(event) 63 self.events.push(event)
51 } 64 }
diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs
index b83f48dd8..a1bcad062 100644
--- a/src/syntax_kinds.rs
+++ b/src/syntax_kinds.rs
@@ -1,60 +1,72 @@
1// Generated from grammar.ron 1// Generated from grammar.ron
2use tree::{SyntaxKind, SyntaxInfo}; 2use tree::{SyntaxKind, SyntaxInfo};
3 3
4pub const ERROR: SyntaxKind = SyntaxKind(0); 4pub const USE_KW: SyntaxKind = SyntaxKind(0);
5pub const IDENT: SyntaxKind = SyntaxKind(1); 5pub const FN_KW: SyntaxKind = SyntaxKind(1);
6pub const UNDERSCORE: SyntaxKind = SyntaxKind(2); 6pub const STRUCT_KW: SyntaxKind = SyntaxKind(2);
7pub const WHITESPACE: SyntaxKind = SyntaxKind(3); 7pub const ENUM_KW: SyntaxKind = SyntaxKind(3);
8pub const INT_NUMBER: SyntaxKind = SyntaxKind(4); 8pub const TRAIT_KW: SyntaxKind = SyntaxKind(4);
9pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(5); 9pub const IMPL_KW: SyntaxKind = SyntaxKind(5);
10pub const SEMI: SyntaxKind = SyntaxKind(6); 10pub const ERROR: SyntaxKind = SyntaxKind(6);
11pub const COMMA: SyntaxKind = SyntaxKind(7); 11pub const IDENT: SyntaxKind = SyntaxKind(7);
12pub const DOT: SyntaxKind = SyntaxKind(8); 12pub const UNDERSCORE: SyntaxKind = SyntaxKind(8);
13pub const DOTDOT: SyntaxKind = SyntaxKind(9); 13pub const WHITESPACE: SyntaxKind = SyntaxKind(9);
14pub const DOTDOTDOT: SyntaxKind = SyntaxKind(10); 14pub const INT_NUMBER: SyntaxKind = SyntaxKind(10);
15pub const DOTDOTEQ: SyntaxKind = SyntaxKind(11); 15pub const FLOAT_NUMBER: SyntaxKind = SyntaxKind(11);
16pub const L_PAREN: SyntaxKind = SyntaxKind(12); 16pub const SEMI: SyntaxKind = SyntaxKind(12);
17pub const R_PAREN: SyntaxKind = SyntaxKind(13); 17pub const COMMA: SyntaxKind = SyntaxKind(13);
18pub const L_CURLY: SyntaxKind = SyntaxKind(14); 18pub const DOT: SyntaxKind = SyntaxKind(14);
19pub const R_CURLY: SyntaxKind = SyntaxKind(15); 19pub const DOTDOT: SyntaxKind = SyntaxKind(15);
20pub const L_BRACK: SyntaxKind = SyntaxKind(16); 20pub const DOTDOTDOT: SyntaxKind = SyntaxKind(16);
21pub const R_BRACK: SyntaxKind = SyntaxKind(17); 21pub const DOTDOTEQ: SyntaxKind = SyntaxKind(17);
22pub const L_ANGLE: SyntaxKind = SyntaxKind(18); 22pub const L_PAREN: SyntaxKind = SyntaxKind(18);
23pub const R_ANGLE: SyntaxKind = SyntaxKind(19); 23pub const R_PAREN: SyntaxKind = SyntaxKind(19);
24pub const AT: SyntaxKind = SyntaxKind(20); 24pub const L_CURLY: SyntaxKind = SyntaxKind(20);
25pub const POUND: SyntaxKind = SyntaxKind(21); 25pub const R_CURLY: SyntaxKind = SyntaxKind(21);
26pub const TILDE: SyntaxKind = SyntaxKind(22); 26pub const L_BRACK: SyntaxKind = SyntaxKind(22);
27pub const QUESTION: SyntaxKind = SyntaxKind(23); 27pub const R_BRACK: SyntaxKind = SyntaxKind(23);
28pub const COLON: SyntaxKind = SyntaxKind(24); 28pub const L_ANGLE: SyntaxKind = SyntaxKind(24);
29pub const COLONCOLON: SyntaxKind = SyntaxKind(25); 29pub const R_ANGLE: SyntaxKind = SyntaxKind(25);
30pub const DOLLAR: SyntaxKind = SyntaxKind(26); 30pub const AT: SyntaxKind = SyntaxKind(26);
31pub const EQ: SyntaxKind = SyntaxKind(27); 31pub const POUND: SyntaxKind = SyntaxKind(27);
32pub const EQEQ: SyntaxKind = SyntaxKind(28); 32pub const TILDE: SyntaxKind = SyntaxKind(28);
33pub const FAT_ARROW: SyntaxKind = SyntaxKind(29); 33pub const QUESTION: SyntaxKind = SyntaxKind(29);
34pub const NEQ: SyntaxKind = SyntaxKind(30); 34pub const COLON: SyntaxKind = SyntaxKind(30);
35pub const NOT: SyntaxKind = SyntaxKind(31); 35pub const COLONCOLON: SyntaxKind = SyntaxKind(31);
36pub const LIFETIME: SyntaxKind = SyntaxKind(32); 36pub const DOLLAR: SyntaxKind = SyntaxKind(32);
37pub const CHAR: SyntaxKind = SyntaxKind(33); 37pub const EQ: SyntaxKind = SyntaxKind(33);
38pub const BYTE: SyntaxKind = SyntaxKind(34); 38pub const EQEQ: SyntaxKind = SyntaxKind(34);
39pub const STRING: SyntaxKind = SyntaxKind(35); 39pub const FAT_ARROW: SyntaxKind = SyntaxKind(35);
40pub const RAW_STRING: SyntaxKind = SyntaxKind(36); 40pub const NEQ: SyntaxKind = SyntaxKind(36);
41pub const BYTE_STRING: SyntaxKind = SyntaxKind(37); 41pub const NOT: SyntaxKind = SyntaxKind(37);
42pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(38); 42pub const LIFETIME: SyntaxKind = SyntaxKind(38);
43pub const PLUS: SyntaxKind = SyntaxKind(39); 43pub const CHAR: SyntaxKind = SyntaxKind(39);
44pub const MINUS: SyntaxKind = SyntaxKind(40); 44pub const BYTE: SyntaxKind = SyntaxKind(40);
45pub const STAR: SyntaxKind = SyntaxKind(41); 45pub const STRING: SyntaxKind = SyntaxKind(41);
46pub const SLASH: SyntaxKind = SyntaxKind(42); 46pub const RAW_STRING: SyntaxKind = SyntaxKind(42);
47pub const CARET: SyntaxKind = SyntaxKind(43); 47pub const BYTE_STRING: SyntaxKind = SyntaxKind(43);
48pub const PERCENT: SyntaxKind = SyntaxKind(44); 48pub const RAW_BYTE_STRING: SyntaxKind = SyntaxKind(44);
49pub const AMPERSAND: SyntaxKind = SyntaxKind(45); 49pub const PLUS: SyntaxKind = SyntaxKind(45);
50pub const PIPE: SyntaxKind = SyntaxKind(46); 50pub const MINUS: SyntaxKind = SyntaxKind(46);
51pub const THIN_ARROW: SyntaxKind = SyntaxKind(47); 51pub const STAR: SyntaxKind = SyntaxKind(47);
52pub const COMMENT: SyntaxKind = SyntaxKind(48); 52pub const SLASH: SyntaxKind = SyntaxKind(48);
53pub const DOC_COMMENT: SyntaxKind = SyntaxKind(49); 53pub const CARET: SyntaxKind = SyntaxKind(49);
54pub const SHEBANG: SyntaxKind = SyntaxKind(50); 54pub const PERCENT: SyntaxKind = SyntaxKind(50);
55pub const FILE: SyntaxKind = SyntaxKind(51); 55pub const AMPERSAND: SyntaxKind = SyntaxKind(51);
56pub const PIPE: SyntaxKind = SyntaxKind(52);
57pub const THIN_ARROW: SyntaxKind = SyntaxKind(53);
58pub const COMMENT: SyntaxKind = SyntaxKind(54);
59pub const DOC_COMMENT: SyntaxKind = SyntaxKind(55);
60pub const SHEBANG: SyntaxKind = SyntaxKind(56);
61pub const FILE: SyntaxKind = SyntaxKind(57);
56 62
57static INFOS: [SyntaxInfo; 52] = [ 63static INFOS: [SyntaxInfo; 58] = [
64 SyntaxInfo { name: "USE_KW" },
65 SyntaxInfo { name: "FN_KW" },
66 SyntaxInfo { name: "STRUCT_KW" },
67 SyntaxInfo { name: "ENUM_KW" },
68 SyntaxInfo { name: "TRAIT_KW" },
69 SyntaxInfo { name: "IMPL_KW" },
58 SyntaxInfo { name: "ERROR" }, 70 SyntaxInfo { name: "ERROR" },
59 SyntaxInfo { name: "IDENT" }, 71 SyntaxInfo { name: "IDENT" },
60 SyntaxInfo { name: "UNDERSCORE" }, 72 SyntaxInfo { name: "UNDERSCORE" },
@@ -112,3 +124,15 @@ static INFOS: [SyntaxInfo; 52] = [
112pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { 124pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo {
113 &INFOS[kind.0 as usize] 125 &INFOS[kind.0 as usize]
114} 126}
127
128pub(crate) fn ident_to_keyword(ident: &str) -> Option<SyntaxKind> {
129 match ident {
130 "use" => Some(USE_KW),
131 "fn" => Some(FN_KW),
132 "struct" => Some(STRUCT_KW),
133 "enum" => Some(ENUM_KW),
134 "trait" => Some(TRAIT_KW),
135 "impl" => Some(IMPL_KW),
136 _ => None,
137 }
138}
diff --git a/tests/data/lexer/0011_keywords.rs b/tests/data/lexer/0011_keywords.rs
new file mode 100644
index 000000000..aa89d70c5
--- /dev/null
+++ b/tests/data/lexer/0011_keywords.rs
@@ -0,0 +1 @@
fn use struct trait enum impl
diff --git a/tests/data/lexer/0011_keywords.txt b/tests/data/lexer/0011_keywords.txt
new file mode 100644
index 000000000..d90047d1e
--- /dev/null
+++ b/tests/data/lexer/0011_keywords.txt
@@ -0,0 +1,12 @@
1FN_KW 2 "fn"
2WHITESPACE 1 " "
3USE_KW 3 "use"
4WHITESPACE 1 " "
5STRUCT_KW 6 "struct"
6WHITESPACE 1 " "
7TRAIT_KW 5 "trait"
8WHITESPACE 1 " "
9ENUM_KW 4 "enum"
10WHITESPACE 1 " "
11IMPL_KW 4 "impl"
12WHITESPACE 1 "\n"