From af0ae9ee0409ce2296dafebf977e353a5b871d80 Mon Sep 17 00:00:00 2001 From: Zach Lute Date: Tue, 4 Sep 2018 22:56:16 -0700 Subject: Updated Ptr methods to better match Parser method names. --- crates/libsyntax2/src/lexer/comments.rs | 12 +++++----- crates/libsyntax2/src/lexer/mod.rs | 22 ++++++++--------- crates/libsyntax2/src/lexer/numbers.rs | 12 +++++----- crates/libsyntax2/src/lexer/ptr.rs | 42 ++++++++++++++++++++++----------- crates/libsyntax2/src/lexer/strings.rs | 16 ++++++------- 5 files changed, 59 insertions(+), 45 deletions(-) (limited to 'crates/libsyntax2/src') diff --git a/crates/libsyntax2/src/lexer/comments.rs b/crates/libsyntax2/src/lexer/comments.rs index 01acb6515..eb417c2dc 100644 --- a/crates/libsyntax2/src/lexer/comments.rs +++ b/crates/libsyntax2/src/lexer/comments.rs @@ -3,7 +3,7 @@ use lexer::ptr::Ptr; use SyntaxKind::{self, *}; pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { - if ptr.next_is('!') && ptr.nnext_is('/') { + if ptr.at_str("!/") { ptr.bump(); ptr.bump(); bump_until_eol(ptr); @@ -14,15 +14,15 @@ pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { } fn scan_block_comment(ptr: &mut Ptr) -> Option { - if ptr.next_is('*') { + if ptr.at('*') { ptr.bump(); let mut depth: u32 = 1; while depth > 0 { - if ptr.next_is('*') && ptr.nnext_is('/') { + if ptr.at_str("*/") { depth -= 1; ptr.bump(); ptr.bump(); - } else if ptr.next_is('/') && ptr.nnext_is('*') { + } else if ptr.at_str("/*") { depth += 1; ptr.bump(); ptr.bump(); @@ -37,7 +37,7 @@ fn scan_block_comment(ptr: &mut Ptr) -> Option { } pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option { - if ptr.next_is('/') { + if ptr.at('/') { bump_until_eol(ptr); Some(COMMENT) } else { @@ -47,7 +47,7 @@ pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option { fn bump_until_eol(ptr: &mut Ptr) { loop { - if ptr.next_is('\n') || ptr.next_is('\r') && ptr.nnext_is('\n') { + if ptr.at('\n') || ptr.at_str("\r\n") { return; } if ptr.bump().is_none() { diff --git a/crates/libsyntax2/src/lexer/mod.rs b/crates/libsyntax2/src/lexer/mod.rs index f8fdc41ac..3e11db88b 100644 --- a/crates/libsyntax2/src/lexer/mod.rs +++ b/crates/libsyntax2/src/lexer/mod.rs @@ -67,7 +67,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { _ => (), } - let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.next(), ptr.nnext()); + let ident_start = is_ident_start(c) && !is_string_literal_start(c, ptr.current(), ptr.nth(1)); if ident_start { return scan_ident(c, ptr); } @@ -86,7 +86,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { match c { // Multi-byte tokens. '.' => { - return match (ptr.next(), ptr.nnext()) { + return match (ptr.current(), ptr.nth(1)) { (Some('.'), Some('.')) => { ptr.bump(); ptr.bump(); @@ -105,7 +105,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { }; } ':' => { - return match ptr.next() { + return match ptr.current() { Some(':') => { ptr.bump(); COLONCOLON @@ -114,7 +114,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { }; } '=' => { - return match ptr.next() { + return match ptr.current() { Some('=') => { ptr.bump(); EQEQ @@ -127,7 +127,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { }; } '!' => { - return match ptr.next() { + return match ptr.current() { Some('=') => { ptr.bump(); NEQ @@ -136,7 +136,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { }; } '-' => { - return if ptr.next_is('>') { + return if ptr.at('>') { ptr.bump(); THIN_ARROW } else { @@ -147,14 +147,14 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { // If the character is an ident start not followed by another single // quote, then this is a lifetime name: '\'' => { - return if ptr.next_is_p(is_ident_start) && !ptr.nnext_is('\'') { + return if ptr.at_p(is_ident_start) && !ptr.at_str("''") { ptr.bump(); - while ptr.next_is_p(is_ident_continue) { + while ptr.at_p(is_ident_continue) { ptr.bump(); } // lifetimes shouldn't end with a single quote // if we find one, then this is an invalid character literal - if ptr.next_is('\'') { + if ptr.at('\'') { ptr.bump(); return CHAR; // TODO: error reporting } @@ -186,7 +186,7 @@ fn next_token_inner(c: char, ptr: &mut Ptr) -> SyntaxKind { } fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { - let is_single_letter = match ptr.next() { + let is_single_letter = match ptr.current() { None => true, Some(c) if !is_ident_continue(c) => true, _ => false, @@ -202,7 +202,7 @@ fn scan_ident(c: char, ptr: &mut Ptr) -> SyntaxKind { } fn scan_literal_suffix(ptr: &mut Ptr) { - if ptr.next_is_p(is_ident_start) { + if ptr.at_p(is_ident_start) { ptr.bump(); } ptr.bump_while(is_ident_continue); diff --git a/crates/libsyntax2/src/lexer/numbers.rs b/crates/libsyntax2/src/lexer/numbers.rs index 5c4641a2d..22e7d4e99 100644 --- a/crates/libsyntax2/src/lexer/numbers.rs +++ b/crates/libsyntax2/src/lexer/numbers.rs @@ -5,7 +5,7 @@ use SyntaxKind::{self, *}; pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { if c == '0' { - match ptr.next().unwrap_or('\0') { + match ptr.current().unwrap_or('\0') { 'b' | 'o' => { ptr.bump(); scan_digits(ptr, false); @@ -26,7 +26,7 @@ pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { // might be a float, but don't be greedy if this is actually an // integer literal followed by field/method access or a range pattern // (`0..2` and `12.foo()`) - if ptr.next_is('.') && !(ptr.nnext_is('.') || ptr.nnext_is_p(is_ident_start)) { + if ptr.at('.') && !(ptr.at_str("..") || ptr.nth_is_p(1, is_ident_start)) { // might have stuff after the ., and if it does, it needs to start // with a number ptr.bump(); @@ -35,7 +35,7 @@ pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { return FLOAT_NUMBER; } // it might be a float if it has an exponent - if ptr.next_is('e') || ptr.next_is('E') { + if ptr.at('e') || ptr.at('E') { scan_float_exponent(ptr); return FLOAT_NUMBER; } @@ -43,7 +43,7 @@ pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { } fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { - while let Some(c) = ptr.next() { + while let Some(c) = ptr.current() { match c { '_' | '0'...'9' => { ptr.bump(); @@ -57,9 +57,9 @@ fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { } fn scan_float_exponent(ptr: &mut Ptr) { - if ptr.next_is('e') || ptr.next_is('E') { + if ptr.at('e') || ptr.at('E') { ptr.bump(); - if ptr.next_is('-') || ptr.next_is('+') { + if ptr.at('-') || ptr.at('+') { ptr.bump(); } scan_digits(ptr, false); diff --git a/crates/libsyntax2/src/lexer/ptr.rs b/crates/libsyntax2/src/lexer/ptr.rs index d1391fd5f..aa59e33cc 100644 --- a/crates/libsyntax2/src/lexer/ptr.rs +++ b/crates/libsyntax2/src/lexer/ptr.rs @@ -2,12 +2,14 @@ use TextUnit; use std::str::Chars; +/// A simple view into the characters of a string. pub(crate) struct Ptr<'s> { text: &'s str, len: TextUnit, } impl<'s> Ptr<'s> { + /// Creates a new `Ptr` from a string. pub fn new(text: &'s str) -> Ptr<'s> { Ptr { text, @@ -15,45 +17,55 @@ impl<'s> Ptr<'s> { } } + /// Gets the length of the remaining string. pub fn into_len(self) -> TextUnit { self.len } - pub fn next(&self) -> Option { + /// Gets the current character, if one exists. + pub fn current(&self) -> Option { self.chars().next() } - pub fn nnext(&self) -> Option { - let mut chars = self.chars(); - chars.next()?; - chars.next() + /// Gets the nth character from the current. + /// For example, 0 will return the current token, 1 will return the next, etc. + pub fn nth(&self, n: u32) -> Option { + let mut chars = self.chars().peekable(); + chars.by_ref().skip(n as usize).next() } - pub fn next_is(&self, c: char) -> bool { - self.next() == Some(c) + /// Checks whether the current character is `c`. + pub fn at(&self, c: char) -> bool { + self.current() == Some(c) } - pub fn nnext_is(&self, c: char) -> bool { - self.nnext() == Some(c) + /// Checks whether the next characters match `s`. + pub fn at_str(&self, s: &str) -> bool { + let chars = self.chars(); + chars.as_str().starts_with(s) } - pub fn next_is_p bool>(&self, p: P) -> bool { - self.next().map(p) == Some(true) + /// Checks whether the current character satisfies the predicate `p`. + pub fn at_p bool>(&self, p: P) -> bool { + self.current().map(p) == Some(true) } - pub fn nnext_is_p bool>(&self, p: P) -> bool { - self.nnext().map(p) == Some(true) + /// Checks whether the nth character satisfies the predicate `p`. + pub fn nth_is_p bool>(&self, n: u32, p: P) -> bool { + self.nth(n).map(p) == Some(true) } + /// Moves to the next character. pub fn bump(&mut self) -> Option { let ch = self.chars().next()?; self.len += TextUnit::of_char(ch); Some(ch) } + /// Moves to the next character as long as `pred` is satisfied. pub fn bump_while bool>(&mut self, pred: F) { loop { - match self.next() { + match self.current() { Some(c) if pred(c) => { self.bump(); } @@ -62,11 +74,13 @@ impl<'s> Ptr<'s> { } } + /// Returns the text up to the current point. pub fn current_token_text(&self) -> &str { let len: u32 = self.len.into(); &self.text[..len as usize] } + /// Returns an iterator over the remaining characters. fn chars(&self) -> Chars { let len: u32 = self.len.into(); self.text[len as usize..].chars() diff --git a/crates/libsyntax2/src/lexer/strings.rs b/crates/libsyntax2/src/lexer/strings.rs index e6ade54a4..5ff483d14 100644 --- a/crates/libsyntax2/src/lexer/strings.rs +++ b/crates/libsyntax2/src/lexer/strings.rs @@ -15,11 +15,11 @@ pub(crate) fn is_string_literal_start(c: char, c1: Option, c2: Option { ptr.bump(); - if ptr.next_is('\\') || ptr.next_is('\'') { + if ptr.at('\\') || ptr.at('\'') { ptr.bump(); } } @@ -57,11 +57,11 @@ pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind { } pub(crate) fn scan_string(ptr: &mut Ptr) { - while let Some(c) = ptr.next() { + while let Some(c) = ptr.current() { match c { '\\' => { ptr.bump(); - if ptr.next_is('\\') || ptr.next_is('"') { + if ptr.at('\\') || ptr.at('"') { ptr.bump(); } } @@ -78,11 +78,11 @@ pub(crate) fn scan_string(ptr: &mut Ptr) { pub(crate) fn scan_raw_string(ptr: &mut Ptr) { let mut hashes = 0; - while ptr.next_is('#') { + while ptr.at('#') { hashes += 1; ptr.bump(); } - if !ptr.next_is('"') { + if !ptr.at('"') { return; } ptr.bump(); @@ -90,7 +90,7 @@ pub(crate) fn scan_raw_string(ptr: &mut Ptr) { while let Some(c) = ptr.bump() { if c == '"' { let mut hashes_left = hashes; - while ptr.next_is('#') && hashes_left > 0{ + while ptr.at('#') && hashes_left > 0{ hashes_left -= 1; ptr.bump(); } @@ -110,7 +110,7 @@ fn scan_byte_string(ptr: &mut Ptr) { } fn scan_raw_byte_string(ptr: &mut Ptr) { - if !ptr.next_is('"') { + if !ptr.at('"') { return; } ptr.bump(); -- cgit v1.2.3 From d21fead150d502aa69db82d35967e5e9d73aed56 Mon Sep 17 00:00:00 2001 From: Zach Lute Date: Tue, 4 Sep 2018 23:26:11 -0700 Subject: Added tests for Ptr. --- crates/libsyntax2/src/lexer/ptr.rs | 78 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) (limited to 'crates/libsyntax2/src') diff --git a/crates/libsyntax2/src/lexer/ptr.rs b/crates/libsyntax2/src/lexer/ptr.rs index aa59e33cc..c9a5354ea 100644 --- a/crates/libsyntax2/src/lexer/ptr.rs +++ b/crates/libsyntax2/src/lexer/ptr.rs @@ -86,3 +86,81 @@ impl<'s> Ptr<'s> { self.text[len as usize..].chars() } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_current() { + let ptr = Ptr::new("test"); + assert_eq!(ptr.current(), Some('t')); + } + + #[test] + fn test_nth() { + let ptr = Ptr::new("test"); + assert_eq!(ptr.nth(0), Some('t')); + assert_eq!(ptr.nth(1), Some('e')); + assert_eq!(ptr.nth(2), Some('s')); + assert_eq!(ptr.nth(3), Some('t')); + assert_eq!(ptr.nth(4), None); + } + + #[test] + fn test_at() { + let ptr = Ptr::new("test"); + assert!(ptr.at('t')); + assert!(!ptr.at('a')); + } + + #[test] + fn test_at_str() { + let ptr = Ptr::new("test"); + assert!(ptr.at_str("t")); + assert!(ptr.at_str("te")); + assert!(ptr.at_str("test")); + assert!(!ptr.at_str("tests")); + assert!(!ptr.at_str("rust")); + } + + #[test] + fn test_at_p() { + let ptr = Ptr::new("test"); + assert!(ptr.at_p(|c| c == 't')); + assert!(!ptr.at_p(|c| c == 'e')); + } + + #[test] + fn test_nth_is_p() { + let ptr = Ptr::new("test"); + assert!(ptr.nth_is_p(0,|c| c == 't')); + assert!(!ptr.nth_is_p(1,|c| c == 't')); + assert!(ptr.nth_is_p(3,|c| c == 't')); + assert!(!ptr.nth_is_p(150,|c| c == 't')); + } + + #[test] + fn test_bump() { + let mut ptr = Ptr::new("test"); + assert_eq!(ptr.current(), Some('t')); + ptr.bump(); + assert_eq!(ptr.current(), Some('e')); + ptr.bump(); + assert_eq!(ptr.current(), Some('s')); + ptr.bump(); + assert_eq!(ptr.current(), Some('t')); + ptr.bump(); + assert_eq!(ptr.current(), None); + ptr.bump(); + assert_eq!(ptr.current(), None); + } + + #[test] + fn test_bump_while() { + let mut ptr = Ptr::new("test"); + assert_eq!(ptr.current(), Some('t')); + ptr.bump_while(|c| c != 's'); + assert_eq!(ptr.current(), Some('s')); + } +} -- cgit v1.2.3