From 5222b8aba3b1c2c68706aacf6869423a8e4fe6d5 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 20 Feb 2019 15:47:32 +0300 Subject: move all parsing related bits to a separate module --- crates/ra_syntax/src/lexer/classes.rs | 26 ------ crates/ra_syntax/src/lexer/comments.rs | 57 ------------ crates/ra_syntax/src/lexer/numbers.rs | 67 -------------- crates/ra_syntax/src/lexer/ptr.rs | 162 --------------------------------- crates/ra_syntax/src/lexer/strings.rs | 111 ---------------------- 5 files changed, 423 deletions(-) delete mode 100644 crates/ra_syntax/src/lexer/classes.rs delete mode 100644 crates/ra_syntax/src/lexer/comments.rs delete mode 100644 crates/ra_syntax/src/lexer/numbers.rs delete mode 100644 crates/ra_syntax/src/lexer/ptr.rs delete mode 100644 crates/ra_syntax/src/lexer/strings.rs (limited to 'crates/ra_syntax/src/lexer') diff --git a/crates/ra_syntax/src/lexer/classes.rs b/crates/ra_syntax/src/lexer/classes.rs deleted file mode 100644 index 4235d2648..000000000 --- a/crates/ra_syntax/src/lexer/classes.rs +++ /dev/null @@ -1,26 +0,0 @@ -use unicode_xid::UnicodeXID; - -pub fn is_ident_start(c: char) -> bool { - (c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || c == '_' - || (c > '\x7f' && UnicodeXID::is_xid_start(c)) -} - -pub fn is_ident_continue(c: char) -> bool { - (c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || (c >= '0' && c <= '9') - || c == '_' - || (c > '\x7f' && UnicodeXID::is_xid_continue(c)) -} - -pub fn is_whitespace(c: char) -> bool { - //FIXME: use is_pattern_whitespace - //https://github.com/behnam/rust-unic/issues/192 - c.is_whitespace() -} - -pub fn is_dec_digit(c: char) -> bool { - '0' <= c && c <= '9' -} diff --git a/crates/ra_syntax/src/lexer/comments.rs b/crates/ra_syntax/src/lexer/comments.rs deleted file mode 100644 index afe6886a1..000000000 --- a/crates/ra_syntax/src/lexer/comments.rs +++ /dev/null @@ -1,57 +0,0 @@ -use crate::lexer::ptr::Ptr; - -use crate::SyntaxKind::{self, *}; - -pub(crate) fn scan_shebang(ptr: &mut Ptr) -> bool { - if ptr.at_str("!/") { - ptr.bump(); - ptr.bump(); - bump_until_eol(ptr); - true - } else { - false - } -} - -fn scan_block_comment(ptr: &mut Ptr) -> Option { - if ptr.at('*') { - ptr.bump(); - let mut depth: u32 = 1; - while depth > 0 { - if ptr.at_str("*/") { - depth -= 1; - ptr.bump(); - ptr.bump(); - } else if ptr.at_str("/*") { - depth += 1; - ptr.bump(); - ptr.bump(); - } else if ptr.bump().is_none() { - break; - } - } - Some(COMMENT) - } else { - None - } -} - -pub(crate) fn scan_comment(ptr: &mut Ptr) -> Option { - if ptr.at('/') { - bump_until_eol(ptr); - Some(COMMENT) - } else { - scan_block_comment(ptr) - } -} - -fn bump_until_eol(ptr: &mut Ptr) { - loop { - if ptr.at('\n') || ptr.at_str("\r\n") { - return; - } - if ptr.bump().is_none() { - break; - } - } -} diff --git a/crates/ra_syntax/src/lexer/numbers.rs b/crates/ra_syntax/src/lexer/numbers.rs deleted file mode 100644 index 46daf5e52..000000000 --- a/crates/ra_syntax/src/lexer/numbers.rs +++ /dev/null @@ -1,67 +0,0 @@ -use crate::lexer::classes::*; -use crate::lexer::ptr::Ptr; - -use crate::SyntaxKind::{self, *}; - -pub(crate) fn scan_number(c: char, ptr: &mut Ptr) -> SyntaxKind { - if c == '0' { - match ptr.current().unwrap_or('\0') { - 'b' | 'o' => { - ptr.bump(); - scan_digits(ptr, false); - } - 'x' => { - ptr.bump(); - scan_digits(ptr, true); - } - '0'...'9' | '_' | '.' | 'e' | 'E' => { - scan_digits(ptr, true); - } - _ => return INT_NUMBER, - } - } else { - scan_digits(ptr, false); - } - - // might be a float, but don't be greedy if this is actually an - // integer literal followed by field/method access or a range pattern - // (`0..2` and `12.foo()`) - if ptr.at('.') && !(ptr.at_str("..") || ptr.nth_is_p(1, is_ident_start)) { - // might have stuff after the ., and if it does, it needs to start - // with a number - ptr.bump(); - scan_digits(ptr, false); - scan_float_exponent(ptr); - return FLOAT_NUMBER; - } - // it might be a float if it has an exponent - if ptr.at('e') || ptr.at('E') { - scan_float_exponent(ptr); - return FLOAT_NUMBER; - } - INT_NUMBER -} - -fn scan_digits(ptr: &mut Ptr, allow_hex: bool) { - while let Some(c) = ptr.current() { - match c { - '_' | '0'...'9' => { - ptr.bump(); - } - 'a'...'f' | 'A'...'F' if allow_hex => { - ptr.bump(); - } - _ => return, - } - } -} - -fn scan_float_exponent(ptr: &mut Ptr) { - if ptr.at('e') || ptr.at('E') { - ptr.bump(); - if ptr.at('-') || ptr.at('+') { - ptr.bump(); - } - scan_digits(ptr, false); - } -} diff --git a/crates/ra_syntax/src/lexer/ptr.rs b/crates/ra_syntax/src/lexer/ptr.rs deleted file mode 100644 index c341c4176..000000000 --- a/crates/ra_syntax/src/lexer/ptr.rs +++ /dev/null @@ -1,162 +0,0 @@ -use crate::TextUnit; - -use std::str::Chars; - -/// A simple view into the characters of a string. -pub(crate) struct Ptr<'s> { - text: &'s str, - len: TextUnit, -} - -impl<'s> Ptr<'s> { - /// Creates a new `Ptr` from a string. - pub fn new(text: &'s str) -> Ptr<'s> { - Ptr { text, len: 0.into() } - } - - /// Gets the length of the remaining string. - pub fn into_len(self) -> TextUnit { - self.len - } - - /// Gets the current character, if one exists. - pub fn current(&self) -> Option { - self.chars().next() - } - - /// Gets the nth character from the current. - /// For example, 0 will return the current character, 1 will return the next, etc. - pub fn nth(&self, n: u32) -> Option { - self.chars().nth(n as usize) - } - - /// Checks whether the current character is `c`. - pub fn at(&self, c: char) -> bool { - self.current() == Some(c) - } - - /// Checks whether the next characters match `s`. - pub fn at_str(&self, s: &str) -> bool { - let chars = self.chars(); - chars.as_str().starts_with(s) - } - - /// Checks whether the current character satisfies the predicate `p`. - pub fn at_p bool>(&self, p: P) -> bool { - self.current().map(p) == Some(true) - } - - /// Checks whether the nth character satisfies the predicate `p`. - pub fn nth_is_p bool>(&self, n: u32, p: P) -> bool { - self.nth(n).map(p) == Some(true) - } - - /// Moves to the next character. - pub fn bump(&mut self) -> Option { - let ch = self.chars().next()?; - self.len += TextUnit::of_char(ch); - Some(ch) - } - - /// Moves to the next character as long as `pred` is satisfied. - pub fn bump_while bool>(&mut self, pred: F) { - loop { - match self.current() { - Some(c) if pred(c) => { - self.bump(); - } - _ => return, - } - } - } - - /// Returns the text up to the current point. - pub fn current_token_text(&self) -> &str { - let len: u32 = self.len.into(); - &self.text[..len as usize] - } - - /// Returns an iterator over the remaining characters. - fn chars(&self) -> Chars { - let len: u32 = self.len.into(); - self.text[len as usize..].chars() - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_current() { - let ptr = Ptr::new("test"); - assert_eq!(ptr.current(), Some('t')); - } - - #[test] - fn test_nth() { - let ptr = Ptr::new("test"); - assert_eq!(ptr.nth(0), Some('t')); - assert_eq!(ptr.nth(1), Some('e')); - assert_eq!(ptr.nth(2), Some('s')); - assert_eq!(ptr.nth(3), Some('t')); - assert_eq!(ptr.nth(4), None); - } - - #[test] - fn test_at() { - let ptr = Ptr::new("test"); - assert!(ptr.at('t')); - assert!(!ptr.at('a')); - } - - #[test] - fn test_at_str() { - let ptr = Ptr::new("test"); - assert!(ptr.at_str("t")); - assert!(ptr.at_str("te")); - assert!(ptr.at_str("test")); - assert!(!ptr.at_str("tests")); - assert!(!ptr.at_str("rust")); - } - - #[test] - fn test_at_p() { - let ptr = Ptr::new("test"); - assert!(ptr.at_p(|c| c == 't')); - assert!(!ptr.at_p(|c| c == 'e')); - } - - #[test] - fn test_nth_is_p() { - let ptr = Ptr::new("test"); - assert!(ptr.nth_is_p(0, |c| c == 't')); - assert!(!ptr.nth_is_p(1, |c| c == 't')); - assert!(ptr.nth_is_p(3, |c| c == 't')); - assert!(!ptr.nth_is_p(150, |c| c == 't')); - } - - #[test] - fn test_bump() { - let mut ptr = Ptr::new("test"); - assert_eq!(ptr.current(), Some('t')); - ptr.bump(); - assert_eq!(ptr.current(), Some('e')); - ptr.bump(); - assert_eq!(ptr.current(), Some('s')); - ptr.bump(); - assert_eq!(ptr.current(), Some('t')); - ptr.bump(); - assert_eq!(ptr.current(), None); - ptr.bump(); - assert_eq!(ptr.current(), None); - } - - #[test] - fn test_bump_while() { - let mut ptr = Ptr::new("test"); - assert_eq!(ptr.current(), Some('t')); - ptr.bump_while(|c| c != 's'); - assert_eq!(ptr.current(), Some('s')); - } -} diff --git a/crates/ra_syntax/src/lexer/strings.rs b/crates/ra_syntax/src/lexer/strings.rs deleted file mode 100644 index 5c1cf3e9c..000000000 --- a/crates/ra_syntax/src/lexer/strings.rs +++ /dev/null @@ -1,111 +0,0 @@ -use crate::SyntaxKind::{self, *}; - -use crate::lexer::ptr::Ptr; - -pub(crate) fn is_string_literal_start(c: char, c1: Option, c2: Option) -> bool { - match (c, c1, c2) { - ('r', Some('"'), _) - | ('r', Some('#'), Some('"')) - | ('r', Some('#'), Some('#')) - | ('b', Some('"'), _) - | ('b', Some('\''), _) - | ('b', Some('r'), Some('"')) - | ('b', Some('r'), Some('#')) => true, - _ => false, - } -} - -pub(crate) fn scan_char(ptr: &mut Ptr) { - while let Some(c) = ptr.current() { - match c { - '\\' => { - ptr.bump(); - if ptr.at('\\') || ptr.at('\'') { - ptr.bump(); - } - } - '\'' => { - ptr.bump(); - return; - } - '\n' => return, - _ => { - ptr.bump(); - } - } - } -} - -pub(crate) fn scan_byte_char_or_string(ptr: &mut Ptr) -> SyntaxKind { - // unwrapping and not-exhaustive match are ok - // because of string_literal_start - let c = ptr.bump().unwrap(); - match c { - '\'' => { - scan_byte(ptr); - BYTE - } - '"' => { - scan_byte_string(ptr); - BYTE_STRING - } - 'r' => { - scan_raw_string(ptr); - RAW_BYTE_STRING - } - _ => unreachable!(), - } -} - -pub(crate) fn scan_string(ptr: &mut Ptr) { - while let Some(c) = ptr.current() { - match c { - '\\' => { - ptr.bump(); - if ptr.at('\\') || ptr.at('"') { - ptr.bump(); - } - } - '"' => { - ptr.bump(); - return; - } - _ => { - ptr.bump(); - } - } - } -} - -pub(crate) fn scan_raw_string(ptr: &mut Ptr) { - let mut hashes = 0; - while ptr.at('#') { - hashes += 1; - ptr.bump(); - } - if !ptr.at('"') { - return; - } - ptr.bump(); - - while let Some(c) = ptr.bump() { - if c == '"' { - let mut hashes_left = hashes; - while ptr.at('#') && hashes_left > 0 { - hashes_left -= 1; - ptr.bump(); - } - if hashes_left == 0 { - return; - } - } - } -} - -fn scan_byte(ptr: &mut Ptr) { - scan_char(ptr) -} - -fn scan_byte_string(ptr: &mut Ptr) { - scan_string(ptr) -} -- cgit v1.2.3