From ddc637c16120fb352183698f635fc93a68580f7b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 30 Dec 2017 15:22:40 +0300 Subject: Lexer: start numbers --- src/lexer/mod.rs | 91 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 81 insertions(+), 10 deletions(-) (limited to 'src/lexer/mod.rs') diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index 83a411cdd..afbbee4d0 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -22,16 +22,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { // They are not identifiers, and are handled further down. let ident_start = is_ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext()); if ident_start { - let is_single_letter = match ptr.next() { - None => true, - Some(c) if !is_ident_continue(c) => true, - _ => false, - }; - if is_single_letter { - return if c == '_' { UNDERSCORE } else { IDENT }; - } - ptr.bump_while(is_ident_continue); - return IDENT; + return scan_ident(c, ptr); } if is_whitespace(c) { @@ -39,9 +30,89 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { return WHITESPACE; } + if is_dec_digit(c) { + return scan_number(c, ptr); + } + ERROR } +fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { + let is_single_letter = match ptr.next() { + None => true, + Some(c) if !is_ident_continue(c) => true, + _ => false, + }; + if is_single_letter { + return if c == '_' { UNDERSCORE } else { IDENT }; + } + ptr.bump_while(is_ident_continue); + IDENT +} + +fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { + if c == '0' { + match ptr.next().unwrap_or('\0') { + 'b' | 'o' => { + ptr.bump(); + scan_digits(ptr, false); + } + 'x' => { + ptr.bump(); + scan_digits(ptr, true); + } + '0'...'9' | '_' | '.' | 'e' | 'E' => { + scan_digits(ptr, true); + } + _ => return INT_NUMBER, + } + } else { + scan_digits(ptr, false); + } + + // might be a float, but don't be greedy if this is actually an + // integer literal followed by field/method access or a range pattern + // (`0..2` and `12.foo()`) + if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) { + // might have stuff after the ., and if it does, it needs to start + // with a number + ptr.bump(); + scan_digits(ptr, false); + scan_float_exponent(ptr); + return FLOAT_NUMBER; + } + // it might be a float if it has an exponent + if ptr.next_is('e') || ptr.next_is('E') { + scan_float_exponent(ptr); + return FLOAT_NUMBER; + } + INT_NUMBER +} + +fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { + while let Some(c) = ptr.next() { + match c { + '_' | '0'...'9' => { + ptr.bump(); + } + 'a'...'f' | 'A' ... 'F' if allow_hex => { + ptr.bump(); + } + _ => return + } + } +} + +fn scan_float_exponent(ptr: &mut Ptr) { + if ptr.next_is('e') || ptr.next_is('E') { + ptr.bump(); + if ptr.next_is('-') || ptr.next_is('+') { + ptr.bump(); + } + scan_digits(ptr, false); + } +} + fn string_literal_start(c: char, c1: Option, c2: Option) -> bool { match (c, c1, c2) { ('r', Some('"'), _) | -- cgit v1.2.3