From cb6f07618440859a26fd6adea63bd030da375952 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 31 Dec 2017 16:42:22 +0300 Subject: Lexer: comments groundwork --- grammar.ron | 3 +++ src/lexer/comments.rs | 11 +++++++++++ src/lexer/mod.rs | 23 ++++++++++++++++++----- src/syntax_kinds.rs | 8 +++++++- 4 files changed, 39 insertions(+), 6 deletions(-) create mode 100644 src/lexer/comments.rs diff --git a/grammar.ron b/grammar.ron index 482e00f91..71e354dac 100644 --- a/grammar.ron +++ b/grammar.ron @@ -48,5 +48,8 @@ Grammar( "AMPERSAND", "PIPE", "THIN_ARROW", + "COMMENT", + "DOC_COMMENT", + "SHEBANG", ] ) \ No newline at end of file diff --git a/src/lexer/comments.rs b/src/lexer/comments.rs new file mode 100644 index 000000000..c61c85824 --- /dev/null +++ b/src/lexer/comments.rs @@ -0,0 +1,11 @@ +use lexer::ptr::Ptr; + +use {SyntaxKind}; + +pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { + false +} + +pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option { + None +} \ No newline at end of file diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 3e49b1c2b..f46746bee 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -13,6 +13,9 @@ use self::numbers::scan_number; mod strings; use self::strings::{is_string_literal_start, scan_char, scan_byte_char_or_string, scan_string, scan_raw_string}; +mod comments; +use self::comments::{scan_shebang, scan_comment}; + pub fn next_token(text: &str) -> Token { assert!(!text.is_empty()); let mut ptr = Ptr::new(text); @@ -23,16 +26,26 @@ pub fn next_token(text: &str) -> Token { } fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { - let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext()); - if ident_start { - return scan_ident(c, ptr); - } - if is_whitespace(c) { ptr.bump_while(is_whitespace); return WHITESPACE; } + match c { + '#' => if scan_shebang(ptr) { + return SHEBANG; + } + '/' => if let Some(kind) = scan_comment(ptr) { + return kind; + } + _ => (), + } + + let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext()); + if ident_start { + return scan_ident(c, ptr); + } + if is_dec_digit(c) { let kind = scan_number(c, ptr); scan_literal_suffix(ptr); diff --git a/src/syntax_kinds.rs b/src/syntax_kinds.rs index 83fabe403..ec2a036b9 100644 --- a/src/syntax_kinds.rs +++ b/src/syntax_kinds.rs @@ -49,8 +49,11 @@ pub const PERCENT: SyntaxKind = SyntaxKind(44); pub const AMPERSAND: SyntaxKind = SyntaxKind(45); pub const PIPE: SyntaxKind = SyntaxKind(46); pub const THIN_ARROW: SyntaxKind = SyntaxKind(47); +pub const COMMENT: SyntaxKind = SyntaxKind(48); +pub const DOC_COMMENT: SyntaxKind = SyntaxKind(49); +pub const SHEBANG: SyntaxKind = SyntaxKind(50); -static INFOS: [SyntaxInfo; 48] = [ +static INFOS: [SyntaxInfo; 51] = [ SyntaxInfo { name: "ERROR" }, SyntaxInfo { name: "IDENT" }, SyntaxInfo { name: "UNDERSCORE" }, @@ -99,6 +102,9 @@ static INFOS: [SyntaxInfo; 48] = [ SyntaxInfo { name: "AMPERSAND" }, SyntaxInfo { name: "PIPE" }, SyntaxInfo { name: "THIN_ARROW" }, + SyntaxInfo { name: "COMMENT" }, + SyntaxInfo { name: "DOC_COMMENT" }, + SyntaxInfo { name: "SHEBANG" }, ]; pub(crate) fn syntax_info(kind: SyntaxKind) -> &'static SyntaxInfo { -- cgit v1.2.3