diff options
Diffstat (limited to 'crates/ra_syntax/src/string_lexing/parser.rs')
-rw-r--r-- | crates/ra_syntax/src/string_lexing/parser.rs | 201 |
1 files changed, 201 insertions, 0 deletions
diff --git a/crates/ra_syntax/src/string_lexing/parser.rs b/crates/ra_syntax/src/string_lexing/parser.rs new file mode 100644 index 000000000..4a6d5bc93 --- /dev/null +++ b/crates/ra_syntax/src/string_lexing/parser.rs | |||
@@ -0,0 +1,201 @@ | |||
1 | use rowan::{TextRange, TextUnit}; | ||
2 | |||
3 | use self::CharComponentKind::*; | ||
4 | |||
5 | pub struct Parser<'a> { | ||
6 | pub(super) src: &'a str, | ||
7 | pub(super) pos: usize, | ||
8 | } | ||
9 | |||
10 | impl<'a> Parser<'a> { | ||
11 | pub fn new(src: &'a str) -> Parser<'a> { | ||
12 | Parser { src, pos: 0 } | ||
13 | } | ||
14 | |||
15 | // Utility methods | ||
16 | |||
17 | pub fn peek(&self) -> Option<char> { | ||
18 | if self.pos == self.src.len() { | ||
19 | return None; | ||
20 | } | ||
21 | |||
22 | self.src[self.pos..].chars().next() | ||
23 | } | ||
24 | |||
25 | pub fn advance(&mut self) -> char { | ||
26 | let next = self | ||
27 | .peek() | ||
28 | .expect("cannot advance if end of input is reached"); | ||
29 | self.pos += next.len_utf8(); | ||
30 | next | ||
31 | } | ||
32 | |||
33 | pub fn skip_whitespace(&mut self) { | ||
34 | while self.peek().map(|c| c.is_whitespace()) == Some(true) { | ||
35 | self.advance(); | ||
36 | } | ||
37 | } | ||
38 | |||
39 | pub fn get_pos(&self) -> TextUnit { | ||
40 | (self.pos as u32).into() | ||
41 | } | ||
42 | |||
43 | // Char parsing methods | ||
44 | |||
45 | fn parse_unicode_escape(&mut self, start: TextUnit) -> CharComponent { | ||
46 | match self.peek() { | ||
47 | Some('{') => { | ||
48 | self.advance(); | ||
49 | |||
50 | // Parse anything until we reach `}` | ||
51 | while let Some(next) = self.peek() { | ||
52 | self.advance(); | ||
53 | if next == '}' { | ||
54 | break; | ||
55 | } | ||
56 | } | ||
57 | |||
58 | let end = self.get_pos(); | ||
59 | CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) | ||
60 | } | ||
61 | Some(_) | None => { | ||
62 | let end = self.get_pos(); | ||
63 | CharComponent::new(TextRange::from_to(start, end), UnicodeEscape) | ||
64 | } | ||
65 | } | ||
66 | } | ||
67 | |||
68 | fn parse_ascii_code_escape(&mut self, start: TextUnit) -> CharComponent { | ||
69 | let code_start = self.get_pos(); | ||
70 | while let Some(next) = self.peek() { | ||
71 | if next == '\'' || (self.get_pos() - code_start == 2.into()) { | ||
72 | break; | ||
73 | } | ||
74 | |||
75 | self.advance(); | ||
76 | } | ||
77 | |||
78 | let end = self.get_pos(); | ||
79 | CharComponent::new(TextRange::from_to(start, end), AsciiCodeEscape) | ||
80 | } | ||
81 | |||
82 | fn parse_escape(&mut self, start: TextUnit) -> CharComponent { | ||
83 | if self.peek().is_none() { | ||
84 | return CharComponent::new(TextRange::from_to(start, start), AsciiEscape); | ||
85 | } | ||
86 | |||
87 | let next = self.advance(); | ||
88 | let end = self.get_pos(); | ||
89 | let range = TextRange::from_to(start, end); | ||
90 | match next { | ||
91 | 'x' => self.parse_ascii_code_escape(start), | ||
92 | 'u' => self.parse_unicode_escape(start), | ||
93 | _ => CharComponent::new(range, AsciiEscape), | ||
94 | } | ||
95 | } | ||
96 | |||
97 | pub fn parse_char_component(&mut self) -> Option<CharComponent> { | ||
98 | let next = self.peek()?; | ||
99 | |||
100 | // Ignore character close | ||
101 | if next == '\'' { | ||
102 | return None; | ||
103 | } | ||
104 | |||
105 | let start = self.get_pos(); | ||
106 | self.advance(); | ||
107 | |||
108 | if next == '\\' { | ||
109 | Some(self.parse_escape(start)) | ||
110 | } else { | ||
111 | let end = self.get_pos(); | ||
112 | Some(CharComponent::new( | ||
113 | TextRange::from_to(start, end), | ||
114 | CodePoint, | ||
115 | )) | ||
116 | } | ||
117 | } | ||
118 | |||
119 | pub fn parse_ignore_newline(&mut self, start: TextUnit) -> Option<StringComponent> { | ||
120 | // In string literals, when a `\` occurs immediately before the newline, the `\`, | ||
121 | // the newline, and all whitespace at the beginning of the next line are ignored | ||
122 | match self.peek() { | ||
123 | Some('\n') | Some('\r') => { | ||
124 | self.skip_whitespace(); | ||
125 | Some(StringComponent::new( | ||
126 | TextRange::from_to(start, self.get_pos()), | ||
127 | StringComponentKind::IgnoreNewline, | ||
128 | )) | ||
129 | } | ||
130 | _ => None, | ||
131 | } | ||
132 | } | ||
133 | |||
134 | pub fn parse_string_component(&mut self) -> Option<StringComponent> { | ||
135 | let next = self.peek()?; | ||
136 | |||
137 | // Ignore string close | ||
138 | if next == '"' { | ||
139 | return None; | ||
140 | } | ||
141 | |||
142 | let start = self.get_pos(); | ||
143 | self.advance(); | ||
144 | |||
145 | if next == '\\' { | ||
146 | // Strings can use `\` to ignore newlines, so we first try to parse one of those | ||
147 | // before falling back to parsing char escapes | ||
148 | self.parse_ignore_newline(start).or_else(|| { | ||
149 | let char_component = self.parse_escape(start); | ||
150 | Some(StringComponent::new( | ||
151 | char_component.range, | ||
152 | StringComponentKind::Char(char_component.kind), | ||
153 | )) | ||
154 | }) | ||
155 | } else { | ||
156 | let end = self.get_pos(); | ||
157 | Some(StringComponent::new( | ||
158 | TextRange::from_to(start, end), | ||
159 | StringComponentKind::Char(CodePoint), | ||
160 | )) | ||
161 | } | ||
162 | } | ||
163 | } | ||
164 | |||
165 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
166 | pub struct StringComponent { | ||
167 | pub range: TextRange, | ||
168 | pub kind: StringComponentKind, | ||
169 | } | ||
170 | |||
171 | impl StringComponent { | ||
172 | fn new(range: TextRange, kind: StringComponentKind) -> StringComponent { | ||
173 | StringComponent { range, kind } | ||
174 | } | ||
175 | } | ||
176 | |||
177 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
178 | pub enum StringComponentKind { | ||
179 | IgnoreNewline, | ||
180 | Char(CharComponentKind), | ||
181 | } | ||
182 | |||
183 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
184 | pub struct CharComponent { | ||
185 | pub range: TextRange, | ||
186 | pub kind: CharComponentKind, | ||
187 | } | ||
188 | |||
189 | impl CharComponent { | ||
190 | fn new(range: TextRange, kind: CharComponentKind) -> CharComponent { | ||
191 | CharComponent { range, kind } | ||
192 | } | ||
193 | } | ||
194 | |||
195 | #[derive(Debug, Eq, PartialEq, Clone)] | ||
196 | pub enum CharComponentKind { | ||
197 | CodePoint, | ||
198 | AsciiEscape, | ||
199 | AsciiCodeEscape, | ||
200 | UnicodeEscape, | ||
201 | } | ||