From f194750a2a4d5f034e89b937e1271637b884a503 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 7 Jan 2018 19:50:54 +0300 Subject: G: start attributes --- grammar.ron | 3 +- src/lexer/mod.rs | 2 +- src/parser/event_parser/grammar.rs | 26 ++++- src/parser/event_parser/parser.rs | 8 ++ src/syntax_kinds.rs | 8 +- tests/data/lexer/0005_symbols.txt | 2 +- tests/data/parser/ok/0006_inner_attributes.rs | 10 ++ tests/data/parser/ok/0006_inner_attributes.txt | 135 +++++++++++++++++++++++++ validation.md | 5 + 9 files changed, 188 insertions(+), 11 deletions(-) create mode 100644 tests/data/parser/ok/0006_inner_attributes.rs create mode 100644 tests/data/parser/ok/0006_inner_attributes.txt diff --git a/grammar.ron b/grammar.ron index 9beb7f7aa..39cb0a543 100644 --- a/grammar.ron +++ b/grammar.ron @@ -39,7 +39,7 @@ Grammar( "EQEQ", "FAT_ARROW", "NEQ", - "NOT", + "EXCL", "LIFETIME", "CHAR", "BYTE", @@ -65,5 +65,6 @@ Grammar( "STRUCT_ITEM", "STRUCT_FIELD", "FN_ITEM", + "ATTR", ] ) \ No newline at end of file diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index bc5344b5f..842059a42 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -129,7 +129,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { ptr.bump(); NEQ } - _ => NOT, + _ => EXCL, }, '-' => return if ptr.next_is('>') { ptr.bump(); diff --git a/src/parser/event_parser/grammar.rs b/src/parser/event_parser/grammar.rs index 09b2f02b2..d657ee1cd 100644 --- a/src/parser/event_parser/grammar.rs +++ b/src/parser/event_parser/grammar.rs @@ -17,10 +17,6 @@ pub fn file(p: &mut Parser) { }) } -fn inner_attributes(_: &mut Parser) { - //TODO -} - fn item_first(p: &Parser) -> bool { let current = match p.current() { Some(c) => c, @@ -58,6 +54,20 @@ fn fn_item(p: &mut Parser) { // Paths, types, attributes, and stuff // +fn inner_attributes(p: &mut Parser) { + many(p, inner_attribute) +} + +fn inner_attribute(p: &mut Parser) -> bool { + if !(p.lookahead(&[EXCL, POUND])) { + return false; + } + node(p, ATTR, |p| { + p.bump_n(2); + }); + true +} + fn outer_attributes(_: &mut Parser) { } @@ -143,9 +153,15 @@ impl<'p> Parser<'p> { } } - pub(crate) fn optional(&mut self, kind: SyntaxKind) { + fn optional(&mut self, kind: SyntaxKind) { if self.current_is(kind) { self.bump(); } } + + fn bump_n(&mut self, n: u8) { + for _ in 0..n { + self.bump(); + } + } } \ No newline at end of file diff --git a/src/parser/event_parser/parser.rs b/src/parser/event_parser/parser.rs index eafa03521..f8330af4e 100644 --- a/src/parser/event_parser/parser.rs +++ b/src/parser/event_parser/parser.rs @@ -86,6 +86,14 @@ impl<'t> Parser<'t> { Some(kind) } + pub(crate) fn lookahead(&self, kinds: &[SyntaxKind]) -> bool { + if self.non_ws_tokens[self.pos..].len() < kinds.len() { + return false + } + kinds.iter().zip(self.non_ws_tokens[self.pos..].iter()) + .all(|(&k1, &(idx, _))| k1 == self.raw_tokens[idx].kind) + } + pub(crate) fn curly_block(&mut self, f: F) -> bool { let old_level = self.curly_level; let old_limit = self.curly_limit; diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs index 6099cd6e0..67c840a3e 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds.rs @@ -38,7 +38,7 @@ pub const EQ: SyntaxKind = SyntaxKind(33); pub const EQEQ: SyntaxKind = SyntaxKind(34); pub const FAT_ARROW: SyntaxKind = SyntaxKind(35); pub const NEQ: SyntaxKind = SyntaxKind(36); -pub const NOT: SyntaxKind = SyntaxKind(37); +pub const EXCL: SyntaxKind = SyntaxKind(37); pub const LIFETIME: SyntaxKind = SyntaxKind(38); pub const CHAR: SyntaxKind = SyntaxKind(39); pub const BYTE: SyntaxKind = SyntaxKind(40); @@ -62,8 +62,9 @@ pub const FILE: SyntaxKind = SyntaxKind(57); pub const STRUCT_ITEM: SyntaxKind = SyntaxKind(58); pub const STRUCT_FIELD: SyntaxKind = SyntaxKind(59); pub const FN_ITEM: SyntaxKind = SyntaxKind(60); +pub const ATTR: SyntaxKind = SyntaxKind(61); -static INFOS: [SyntaxInfo; 61] = [ +static INFOS: [SyntaxInfo; 62] = [ SyntaxInfo { name: "USE_KW" }, SyntaxInfo { name: "FN_KW" }, SyntaxInfo { name: "STRUCT_KW" }, @@ -101,7 +102,7 @@ static INFOS: [SyntaxInfo; 61] = [ SyntaxInfo { name: "EQEQ" }, SyntaxInfo { name: "FAT_ARROW" }, SyntaxInfo { name: "NEQ" }, - SyntaxInfo { name: "NOT" }, + SyntaxInfo { name: "EXCL" }, SyntaxInfo { name: "LIFETIME" }, SyntaxInfo { name: "CHAR" }, SyntaxInfo { name: "BYTE" }, @@ -125,6 +126,7 @@ static INFOS: [SyntaxInfo; 61] = [ SyntaxInfo { name: "STRUCT_ITEM" }, SyntaxInfo { name: "STRUCT_FIELD" }, SyntaxInfo { name: "FN_ITEM" }, + SyntaxInfo { name: "ATTR" }, ]; pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { diff --git a/tests/data/lexer/0005_symbols.txt b/tests/data/lexer/0005_symbols.txt index ebb41accd..0f99c24cd 100644 --- a/tests/data/lexer/0005_symbols.txt +++ b/tests/data/lexer/0005_symbols.txt @@ -58,7 +58,7 @@ EQ 1 "=" WHITESPACE 1 " " FAT_ARROW 2 "=>" WHITESPACE 1 "\n" -NOT 1 "!" +EXCL 1 "!" WHITESPACE 1 " " NEQ 2 "!=" WHITESPACE 1 "\n" diff --git a/tests/data/parser/ok/0006_inner_attributes.rs b/tests/data/parser/ok/0006_inner_attributes.rs new file mode 100644 index 000000000..e81f8b1e8 --- /dev/null +++ b/tests/data/parser/ok/0006_inner_attributes.rs @@ -0,0 +1,10 @@ +#![attr] +#![attr(true)] +#![attr(ident)] +#![attr(ident, 100, true, "true", ident = 100, ident = "hello", ident(100))] +#![attr(100)] +#![attr(enabled = true)] +#![enabled(true)] +#![attr("hello")] +#![repr(C, align = 4)] +#![repr(C, align(4))] \ No newline at end of file diff --git a/tests/data/parser/ok/0006_inner_attributes.txt b/tests/data/parser/ok/0006_inner_attributes.txt new file mode 100644 index 000000000..c837979d1 --- /dev/null +++ b/tests/data/parser/ok/0006_inner_attributes.txt @@ -0,0 +1,135 @@ +FILE@[0; 236) + ERROR@[0; 236) + err: `expected item` + POUND@[0; 1) + EXCL@[1; 2) + L_BRACK@[2; 3) + IDENT@[3; 7) + R_BRACK@[7; 8) + WHITESPACE@[8; 9) + POUND@[9; 10) + EXCL@[10; 11) + L_BRACK@[11; 12) + IDENT@[12; 16) + L_PAREN@[16; 17) + IDENT@[17; 21) + R_PAREN@[21; 22) + R_BRACK@[22; 23) + WHITESPACE@[23; 24) + POUND@[24; 25) + EXCL@[25; 26) + L_BRACK@[26; 27) + IDENT@[27; 31) + L_PAREN@[31; 32) + IDENT@[32; 37) + R_PAREN@[37; 38) + R_BRACK@[38; 39) + WHITESPACE@[39; 40) + POUND@[40; 41) + EXCL@[41; 42) + L_BRACK@[42; 43) + IDENT@[43; 47) + L_PAREN@[47; 48) + IDENT@[48; 53) + COMMA@[53; 54) + WHITESPACE@[54; 55) + INT_NUMBER@[55; 58) + COMMA@[58; 59) + WHITESPACE@[59; 60) + IDENT@[60; 64) + COMMA@[64; 65) + WHITESPACE@[65; 66) + STRING@[66; 72) + COMMA@[72; 73) + WHITESPACE@[73; 74) + IDENT@[74; 79) + WHITESPACE@[79; 80) + EQ@[80; 81) + WHITESPACE@[81; 82) + INT_NUMBER@[82; 85) + COMMA@[85; 86) + WHITESPACE@[86; 87) + IDENT@[87; 92) + WHITESPACE@[92; 93) + EQ@[93; 94) + WHITESPACE@[94; 95) + STRING@[95; 102) + COMMA@[102; 103) + WHITESPACE@[103; 104) + IDENT@[104; 109) + L_PAREN@[109; 110) + INT_NUMBER@[110; 113) + R_PAREN@[113; 114) + R_PAREN@[114; 115) + R_BRACK@[115; 116) + WHITESPACE@[116; 117) + POUND@[117; 118) + EXCL@[118; 119) + L_BRACK@[119; 120) + IDENT@[120; 124) + L_PAREN@[124; 125) + INT_NUMBER@[125; 128) + R_PAREN@[128; 129) + R_BRACK@[129; 130) + WHITESPACE@[130; 131) + POUND@[131; 132) + EXCL@[132; 133) + L_BRACK@[133; 134) + IDENT@[134; 138) + L_PAREN@[138; 139) + IDENT@[139; 146) + WHITESPACE@[146; 147) + EQ@[147; 148) + WHITESPACE@[148; 149) + IDENT@[149; 153) + R_PAREN@[153; 154) + R_BRACK@[154; 155) + WHITESPACE@[155; 156) + POUND@[156; 157) + EXCL@[157; 158) + L_BRACK@[158; 159) + IDENT@[159; 166) + L_PAREN@[166; 167) + IDENT@[167; 171) + R_PAREN@[171; 172) + R_BRACK@[172; 173) + WHITESPACE@[173; 174) + POUND@[174; 175) + EXCL@[175; 176) + L_BRACK@[176; 177) + IDENT@[177; 181) + L_PAREN@[181; 182) + STRING@[182; 189) + R_PAREN@[189; 190) + R_BRACK@[190; 191) + WHITESPACE@[191; 192) + POUND@[192; 193) + EXCL@[193; 194) + L_BRACK@[194; 195) + IDENT@[195; 199) + L_PAREN@[199; 200) + IDENT@[200; 201) + COMMA@[201; 202) + WHITESPACE@[202; 203) + IDENT@[203; 208) + WHITESPACE@[208; 209) + EQ@[209; 210) + WHITESPACE@[210; 211) + INT_NUMBER@[211; 212) + R_PAREN@[212; 213) + R_BRACK@[213; 214) + WHITESPACE@[214; 215) + POUND@[215; 216) + EXCL@[216; 217) + L_BRACK@[217; 218) + IDENT@[218; 222) + L_PAREN@[222; 223) + IDENT@[223; 224) + COMMA@[224; 225) + WHITESPACE@[225; 226) + IDENT@[226; 231) + L_PAREN@[231; 232) + INT_NUMBER@[232; 233) + R_PAREN@[233; 234) + R_PAREN@[234; 235) + R_BRACK@[235; 236) diff --git a/validation.md b/validation.md index e72de1ea2..2739bfcdd 100644 --- a/validation.md +++ b/validation.md @@ -1,5 +1,6 @@ Fixmes: +Lexer: * Fix `is_whitespace`, add more tests * Add more thorough tests for idents for XID_Start & XID_Continue * Validate that float and integer literals use digits only of the appropriate @@ -7,3 +8,7 @@ Fixmes: * Validation for unclosed char literal * Strings are completely wrong: more tests and comparison with libsyntax. * Comment lexing is completely wrong + +Parser: +* Figure out what is the expected state of attribute grammar. + Token trees or something more structured? Token trees would be unfortunate: no extend selection =/ \ No newline at end of file -- cgit v1.2.3