From cf1caf518122b84b9516e1b9f65ba778f1900bf3 Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Tue, 2 Apr 2019 18:18:00 +0300 Subject: simplify --- crates/ra_syntax/src/string_lexing/parser.rs | 168 -------------------- crates/ra_syntax/src/string_lexing/string.rs | 222 --------------------------- 2 files changed, 390 deletions(-) delete mode 100644 crates/ra_syntax/src/string_lexing/parser.rs delete mode 100644 crates/ra_syntax/src/string_lexing/string.rs (limited to 'crates/ra_syntax/src/string_lexing') diff --git a/crates/ra_syntax/src/string_lexing/parser.rs b/crates/ra_syntax/src/string_lexing/parser.rs deleted file mode 100644 index 7469eb903..000000000 --- a/crates/ra_syntax/src/string_lexing/parser.rs +++ /dev/null @@ -1,168 +0,0 @@ -use rowan::{TextRange, TextUnit}; - -use self::StringComponentKind::*; - -pub struct Parser<'a> { - pub(super) quote: u8, - pub(super) src: &'a str, - pub(super) pos: usize, -} - -impl<'a> Parser<'a> { - pub fn new(src: &'a str, quote: u8) -> Parser<'a> { - Parser { quote, src, pos: 0 } - } - - // Utility methods - - pub fn peek(&self) -> Option { - if self.pos == self.src.len() { - return None; - } - - self.src[self.pos..].chars().next() - } - - pub fn advance(&mut self) -> char { - let next = self.peek().expect("cannot advance if end of input is reached"); - self.pos += next.len_utf8(); - next - } - - pub fn skip_whitespace(&mut self) { - while self.peek().map(|c| c.is_whitespace()) == Some(true) { - self.advance(); - } - } - - pub fn get_pos(&self) -> TextUnit { - (self.pos as u32).into() - } - - // Char parsing methods - - fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent { - match self.peek() { - Some('{') => { - self.advance(); - - // Parse anything until we reach `}` - while let Some(next) = self.peek() { - self.advance(); - if next == '}' { - break; - } - } - - let end = self.get_pos(); - StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) - } - Some(_) | None => { - let end = self.get_pos(); - StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) - } - } - } - - fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent { - let code_start = self.get_pos(); - while let Some(next) = self.peek() { - if next == '\'' || (self.get_pos() - code_start == 2.into()) { - break; - } - - self.advance(); - } - - let end = self.get_pos(); - StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) - } - - fn parse_escape(&mut self, start: TextUnit) -> StringComponent { - if self.peek().is_none() { - return StringComponent::new(TextRange::from_to(start, self.get_pos()), AsciiEscape); - } - - let next = self.advance(); - let end = self.get_pos(); - let range = TextRange::from_to(start, end); - match next { - 'x' => self.parse_ascii_code_escape(start), - 'u' => self.parse_unicode_escape(start), - _ => StringComponent::new(range, AsciiEscape), - } - } - - pub fn parse_ignore_newline(&mut self, start: TextUnit) -> Option { - // In string literals, when a `\` occurs immediately before the newline, the `\`, - // the newline, and all whitespace at the beginning of the next line are ignored - match self.peek() { - Some('\n') | Some('\r') => { - self.skip_whitespace(); - Some(StringComponent::new( - TextRange::from_to(start, self.get_pos()), - StringComponentKind::IgnoreNewline, - )) - } - _ => None, - } - } - - pub fn parse_component(&mut self) -> Option { - let next = self.peek()?; - - // Ignore string close - if next == self.quote as char { - return None; - } - - let start = self.get_pos(); - self.advance(); - - if next == '\\' { - // Strings can use `\` to ignore newlines, so we first try to parse one of those - // before falling back to parsing char escapes - if self.quote == b'"' { - if let Some(component) = self.parse_ignore_newline(start) { - return Some(component); - } - } - - Some(self.parse_escape(start)) - } else { - let end = self.get_pos(); - Some(StringComponent::new(TextRange::from_to(start, end), CodePoint)) - } - } - - pub fn parse_suffix(&mut self) -> Option { - let start = self.get_pos(); - let _ = self.peek()?; - while let Some(_) = self.peek() { - self.advance(); - } - let end = self.get_pos(); - Some(TextRange::from_to(start, end)) - } -} - -#[derive(Debug, Eq, PartialEq, Clone)] -pub struct StringComponent { - pub range: TextRange, - pub kind: StringComponentKind, -} - -impl StringComponent { - fn new(range: TextRange, kind: StringComponentKind) -> StringComponent { - StringComponent { range, kind } - } -} - -#[derive(Debug, Eq, PartialEq, Clone)] -pub enum StringComponentKind { - IgnoreNewline, - CodePoint, - AsciiEscape, - AsciiCodeEscape, - UnicodeEscape, -} diff --git a/crates/ra_syntax/src/string_lexing/string.rs b/crates/ra_syntax/src/string_lexing/string.rs deleted file mode 100644 index a4742a0d1..000000000 --- a/crates/ra_syntax/src/string_lexing/string.rs +++ /dev/null @@ -1,222 +0,0 @@ -use crate::{ - TextRange, - string_lexing::{ - parser::Parser, - StringComponent, -}}; - -pub fn parse_string_literal(src: &str) -> StringComponentIterator { - StringComponentIterator { - parser: Parser::new(src, b'"'), - has_closing_quote: false, - suffix: None, - prefix: None, - quote: b'"', - } -} - -pub fn parse_byte_string_literal(src: &str) -> StringComponentIterator { - StringComponentIterator { - parser: Parser::new(src, b'"'), - has_closing_quote: false, - suffix: None, - prefix: Some(b'b'), - quote: b'"', - } -} - -pub fn parse_char_literal(src: &str) -> StringComponentIterator { - StringComponentIterator { - parser: Parser::new(src, b'\''), - has_closing_quote: false, - suffix: None, - prefix: None, - quote: b'\'', - } -} - -pub fn parse_byte_literal(src: &str) -> StringComponentIterator { - StringComponentIterator { - parser: Parser::new(src, b'\''), - has_closing_quote: false, - suffix: None, - prefix: Some(b'b'), - quote: b'\'', - } -} - -pub struct StringComponentIterator<'a> { - parser: Parser<'a>, - pub has_closing_quote: bool, - pub suffix: Option, - prefix: Option, - quote: u8, -} - -impl<'a> Iterator for StringComponentIterator<'a> { - type Item = StringComponent; - fn next(&mut self) -> Option { - if self.parser.pos == 0 { - if let Some(prefix) = self.prefix { - assert!( - self.parser.advance() == prefix as char, - "literal should start with a {:?}", - prefix as char, - ); - } - assert!( - self.parser.advance() == self.quote as char, - "literal should start with a {:?}", - self.quote as char, - ); - } - - if let Some(component) = self.parser.parse_component() { - return Some(component); - } - - // We get here when there are no char components left to parse - if self.parser.peek() == Some(self.quote as char) { - self.parser.advance(); - self.has_closing_quote = true; - if let Some(range) = self.parser.parse_suffix() { - self.suffix = Some(range); - } - } - - assert!( - self.parser.peek() == None, - "literal should leave no unparsed input: src = {:?}, pos = {}, length = {}", - self.parser.src, - self.parser.pos, - self.parser.src.len() - ); - - None - } -} - -#[cfg(test)] -mod tests { - use rowan::TextRange; - use crate::string_lexing::{ - StringComponent, - StringComponentKind::*, -}; - - fn parse(src: &str) -> (bool, Vec) { - let component_iterator = &mut super::parse_char_literal(src); - let components: Vec<_> = component_iterator.collect(); - (component_iterator.has_closing_quote, components) - } - - fn unclosed_char_component(src: &str) -> StringComponent { - let (has_closing_quote, components) = parse(src); - assert!(!has_closing_quote, "char should not have closing quote"); - assert!(components.len() == 1); - components[0].clone() - } - - fn closed_char_component(src: &str) -> StringComponent { - let (has_closing_quote, components) = parse(src); - assert!(has_closing_quote, "char should have closing quote"); - assert!(components.len() == 1, "Literal: {}\nComponents: {:#?}", src, components); - components[0].clone() - } - - fn closed_char_components(src: &str) -> Vec { - let (has_closing_quote, components) = parse(src); - assert!(has_closing_quote, "char should have closing quote"); - components - } - - fn range_closed(src: &str) -> TextRange { - TextRange::from_to(1.into(), (src.len() as u32 - 1).into()) - } - - fn range_unclosed(src: &str) -> TextRange { - TextRange::from_to(1.into(), (src.len() as u32).into()) - } - - #[test] - fn test_unicode_escapes() { - let unicode_escapes = &[r"{DEAD}", "{BEEF}", "{FF}", "{}", ""]; - for escape in unicode_escapes { - let escape_sequence = format!(r"'\u{}'", escape); - let component = closed_char_component(&escape_sequence); - let expected_range = range_closed(&escape_sequence); - assert_eq!(component.kind, UnicodeEscape); - assert_eq!(component.range, expected_range); - } - } - - #[test] - fn test_unicode_escapes_unclosed() { - let unicode_escapes = &["{DEAD", "{BEEF", "{FF"]; - for escape in unicode_escapes { - let escape_sequence = format!(r"'\u{}'", escape); - let component = unclosed_char_component(&escape_sequence); - let expected_range = range_unclosed(&escape_sequence); - assert_eq!(component.kind, UnicodeEscape); - assert_eq!(component.range, expected_range); - } - } - - #[test] - fn test_empty_char() { - let (has_closing_quote, components) = parse("''"); - assert!(has_closing_quote, "char should have closing quote"); - assert!(components.len() == 0); - } - - #[test] - fn test_unclosed_char() { - let component = unclosed_char_component("'a"); - assert!(component.kind == CodePoint); - assert!(component.range == TextRange::from_to(1.into(), 2.into())); - } - - #[test] - fn test_digit_escapes() { - let literals = &[r"", r"5", r"55"]; - - for literal in literals { - let lit_text = format!(r"'\x{}'", literal); - let component = closed_char_component(&lit_text); - assert!(component.kind == AsciiCodeEscape); - assert!(component.range == range_closed(&lit_text)); - } - - // More than 2 digits starts a new codepoint - let components = closed_char_components(r"'\x555'"); - assert!(components.len() == 2); - assert!(components[1].kind == CodePoint); - } - - #[test] - fn test_ascii_escapes() { - let literals = &[ - r"\'", "\\\"", // equivalent to \" - r"\n", r"\r", r"\t", r"\\", r"\0", - ]; - - for literal in literals { - let lit_text = format!("'{}'", literal); - let component = closed_char_component(&lit_text); - assert!(component.kind == AsciiEscape); - assert!(component.range == range_closed(&lit_text)); - } - } - - #[test] - fn test_no_escapes() { - let literals = &['"', 'n', 'r', 't', '0', 'x', 'u']; - - for &literal in literals { - let lit_text = format!("'{}'", literal); - let component = closed_char_component(&lit_text); - assert!(component.kind == CodePoint); - assert!(component.range == range_closed(&lit_text)); - } - } -} -- cgit v1.2.3