From 8103772a10f00378c4dcdd09f9af310c23146933 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sat, 30 Dec 2017 01:01:57 +0300 Subject: Lexer: underscore --- src/lexer/mod.rs | 8 ++++++++ tests/data/lexer/0003_ident.rs | 1 + tests/data/lexer/0003_ident.txt | 14 ++++++++++++++ validation.md | 1 + 4 files changed, 24 insertions(+) create mode 100644 tests/data/lexer/0003_ident.rs create mode 100644 tests/data/lexer/0003_ident.txt diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index ecea664da..83a411cdd 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -22,6 +22,14 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { // They are not identifiers, and are handled further down. let ident_start = is_ident_start(c) && !string_literal_start(c, ptr.next(), ptr.nnext()); if ident_start { + let is_single_letter = match ptr.next() { + None => true, + Some(c) if !is_ident_continue(c) => true, + _ => false, + }; + if is_single_letter { + return if c == '_' { UNDERSCORE } else { IDENT }; + } ptr.bump_while(is_ident_continue); return IDENT; } diff --git a/tests/data/lexer/0003_ident.rs b/tests/data/lexer/0003_ident.rs new file mode 100644 index 000000000..c05c9c009 --- /dev/null +++ b/tests/data/lexer/0003_ident.rs @@ -0,0 +1 @@ +foo foo_ _foo _ __ x привет diff --git a/tests/data/lexer/0003_ident.txt b/tests/data/lexer/0003_ident.txt new file mode 100644 index 000000000..eec82fb91 --- /dev/null +++ b/tests/data/lexer/0003_ident.txt @@ -0,0 +1,14 @@ +IDENT 3 +WHITESPACE 1 +IDENT 4 +WHITESPACE 1 +IDENT 4 +WHITESPACE 1 +UNDERSCORE 1 +WHITESPACE 1 +IDENT 2 +WHITESPACE 1 +IDENT 1 +WHITESPACE 1 +IDENT 12 +WHITESPACE 1 diff --git a/validation.md b/validation.md index 9cfec5309..3706760ba 100644 --- a/validation.md +++ b/validation.md @@ -1,4 +1,5 @@ Fixmes: * Fix `is_whitespace`, add more test +* Add more thorough tests for idents for XID_Start & XID_Continue -- cgit v1.2.3