diff options
Diffstat (limited to 'crates/ra_syntax/src/string_lexing/parser.rs')
-rw-r--r-- | crates/ra_syntax/src/string_lexing/parser.rs | 168 |
1 files changed, 0 insertions, 168 deletions
diff --git a/crates/ra_syntax/src/string_lexing/parser.rs b/crates/ra_syntax/src/string_lexing/parser.rs deleted file mode 100644 index 7469eb903..000000000 --- a/crates/ra_syntax/src/string_lexing/parser.rs +++ /dev/null | |||
@@ -1,168 +0,0 @@ | |||
1 | use rowan::{TextRange, TextUnit}; | ||
2 | |||
3 | use self::StringComponentKind::*; | ||
4 | |||
5 | pub struct Parser<'a> { | ||
6 | pub(super) quote: u8, | ||
7 | pub(super) src: &'a str, | ||
8 | pub(super) pos: usize, | ||
9 | } | ||
10 | |||
11 | impl<'a> Parser<'a> { | ||
12 | pub fn new(src: &'a str, quote: u8) -> Parser<'a> { | ||
13 | Parser { quote, src, pos: 0 } | ||
14 | } | ||
15 | |||
16 | // Utility methods | ||
17 | |||
18 | pub fn peek(&self) -> Option<char> { | ||
19 | if self.pos == self.src.len() { | ||
20 | return None; | ||
21 | } | ||
22 | |||
23 | self.src[self.pos..].chars().next() | ||
24 | } | ||
25 | |||
26 | pub fn advance(&mut self) -> char { | ||
27 | let next = self.peek().expect("cannot advance if end of input is reached"); | ||
28 | self.pos += next.len_utf8(); | ||
29 | next | ||
30 | } | ||
31 | |||
32 | pub fn skip_whitespace(&mut self) { | ||
33 | while self.peek().map(|c| c.is_whitespace()) == Some(true) { | ||
34 | self.advance(); | ||
35 | } | ||
36 | } | ||
37 | |||
38 | pub fn get_pos(&self) -> TextUnit { | ||
39 | (self.pos as u32).into() | ||
40 | } | ||
41 | |||
42 | // Char parsing methods | ||
43 | |||
44 | fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent { | ||
45 | match self.peek() { | ||
46 | Some('{') => { | ||
47 | self.advance(); | ||
48 | |||
49 | // Parse anything until we reach `}` | ||
50 | while let Some(next) = self.peek() { | ||
51 | self.advance(); | ||
52 | if next == '}' { | ||
53 | break; | ||
54 | } | ||
55 | } | ||
56 | |||
57 | let end = self.get_pos(); | ||
58 | StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) | ||
59 | } | ||
60 | Some(_) | None => { | ||
61 | let end = self.get_pos(); | ||
62 | StringComponent::new(TextRange::from_to(start, end), UnicodeEscape) | ||
63 | } | ||
64 | } | ||
65 | } | ||
66 | |||
67 | fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent { | ||
68 | let code_start = self.get_pos(); | ||
69 | while let Some(next) = self.peek() { | ||
70 | if next == '\'' || (self.get_pos() - code_start == 2.into()) { | ||
71 | break; | ||
72 | } | ||
73 | |||
74 | self.advance(); | ||
75 | } | ||
76 | |||
77 | let end = self.get_pos(); | ||
78 | StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) | ||
79 | } | ||
80 | |||
81 | fn parse_escape(&mut self, start: TextUnit) -> StringComponent { | ||
82 | if self.peek().is_none() { | ||
83 | return StringComponent::new(TextRange::from_to(start, self.get_pos()), AsciiEscape); | ||
84 | } | ||
85 | |||
86 | let next = self.advance(); | ||
87 | let end = self.get_pos(); | ||
88 | let range = TextRange::from_to(start, end); | ||
89 | match next { | ||
90 | 'x' => self.parse_ascii_code_escape(start), | ||
91 | 'u' => self.parse_unicode_escape(start), | ||
92 | _ => StringComponent::new(range, AsciiEscape), | ||
93 | } | ||
94 | } | ||
95 | |||
96 | pub fn parse_ignore_newline(&mut self, start: TextUnit) -> Option<StringComponent> { | ||
97 | // In string literals, when a `\` occurs immediately before the newline, the `\`, | ||
98 | // the newline, and all whitespace at the beginning of the next line are ignored | ||
99 | match self.peek() { | ||
100 | Some('\n') | Some('\r') => { | ||
101 | self.skip_whitespace(); | ||
102 | Some(StringComponent::new( | ||
103 | TextRange::from_to(start, self.get_pos()), | ||
104 | StringComponentKind::IgnoreNewline, | ||
105 | )) | ||
106 | } | ||
107 | _ => None, | ||
108 | } | ||
109 | } | ||
110 | |||
111 | pub fn parse_component(&mut self) -> Option<StringComponent> { | ||
112 | let next = self.peek()?; | ||
113 | |||
114 | // Ignore string close | ||
115 | if next == self.quote as char { | ||
116 | return None; | ||
117 | } | ||
118 | |||
119 | let start = self.get_pos(); | ||
120 | self.advance(); | ||
121 | |||
122 | if next == '\\' { | ||
123 | // Strings can use `\` to ignore newlines, so we first try to parse one of those | ||
124 | // before falling back to parsing char escapes | ||
125 | if self.quote == b'"' { | ||
126 | if let Some(component) = self.parse_ignore_newline(start) { | ||
127 | return Some(component); | ||
128 | } | ||
129 | } | ||
130 | |||
131 | Some(self.parse_escape(start)) | ||
132 | } else { | ||
133 | let end = self.get_pos(); | ||
134 | Some(StringComponent::new(TextRange::from_to(start, end), CodePoint)) | ||
135 | } | ||
136 | } | ||
137 | |||
138 | pub fn parse_suffix(&mut self) -> Option<TextRange> { | ||
139 | let start = self.get_pos(); | ||
140 | let _ = self.peek()?; | ||
141 | while let Some(_) = self.peek() { | ||
142 | self.advance(); | ||
143 | } | ||
144 | let end = self.get_pos(); | ||
145 | Some(TextRange::from_to(start, end)) | ||
146 | } | ||
147 | } | ||
148 | |||
149 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
150 | pub struct StringComponent { | ||
151 | pub range: TextRange, | ||
152 | pub kind: StringComponentKind, | ||
153 | } | ||
154 | |||
155 | impl StringComponent { | ||
156 | fn new(range: TextRange, kind: StringComponentKind) -> StringComponent { | ||
157 | StringComponent { range, kind } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
162 | pub enum StringComponentKind { | ||
163 | IgnoreNewline, | ||
164 | CodePoint, | ||
165 | AsciiEscape, | ||
166 | AsciiCodeEscape, | ||
167 | UnicodeEscape, | ||
168 | } | ||