diff options
Diffstat (limited to 'crates/syntax/src/ast/token_ext.rs')
-rw-r--r-- | crates/syntax/src/ast/token_ext.rs | 538 |
1 files changed, 538 insertions, 0 deletions
diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs new file mode 100644 index 000000000..c5ef92733 --- /dev/null +++ b/crates/syntax/src/ast/token_ext.rs | |||
@@ -0,0 +1,538 @@ | |||
1 | //! There are many AstNodes, but only a few tokens, so we hand-write them here. | ||
2 | |||
3 | use std::{ | ||
4 | borrow::Cow, | ||
5 | convert::{TryFrom, TryInto}, | ||
6 | }; | ||
7 | |||
8 | use rustc_lexer::unescape::{unescape_literal, Mode}; | ||
9 | |||
10 | use crate::{ | ||
11 | ast::{AstToken, Comment, RawString, String, Whitespace}, | ||
12 | TextRange, TextSize, | ||
13 | }; | ||
14 | |||
15 | impl Comment { | ||
16 | pub fn kind(&self) -> CommentKind { | ||
17 | kind_by_prefix(self.text()) | ||
18 | } | ||
19 | |||
20 | pub fn prefix(&self) -> &'static str { | ||
21 | for (prefix, k) in COMMENT_PREFIX_TO_KIND.iter() { | ||
22 | if *k == self.kind() && self.text().starts_with(prefix) { | ||
23 | return prefix; | ||
24 | } | ||
25 | } | ||
26 | unreachable!() | ||
27 | } | ||
28 | } | ||
29 | |||
30 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] | ||
31 | pub struct CommentKind { | ||
32 | pub shape: CommentShape, | ||
33 | pub doc: Option<CommentPlacement>, | ||
34 | } | ||
35 | |||
36 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] | ||
37 | pub enum CommentShape { | ||
38 | Line, | ||
39 | Block, | ||
40 | } | ||
41 | |||
42 | impl CommentShape { | ||
43 | pub fn is_line(self) -> bool { | ||
44 | self == CommentShape::Line | ||
45 | } | ||
46 | |||
47 | pub fn is_block(self) -> bool { | ||
48 | self == CommentShape::Block | ||
49 | } | ||
50 | } | ||
51 | |||
52 | #[derive(Debug, PartialEq, Eq, Clone, Copy)] | ||
53 | pub enum CommentPlacement { | ||
54 | Inner, | ||
55 | Outer, | ||
56 | } | ||
57 | |||
58 | const COMMENT_PREFIX_TO_KIND: &[(&str, CommentKind)] = { | ||
59 | use {CommentPlacement::*, CommentShape::*}; | ||
60 | &[ | ||
61 | ("////", CommentKind { shape: Line, doc: None }), | ||
62 | ("///", CommentKind { shape: Line, doc: Some(Outer) }), | ||
63 | ("//!", CommentKind { shape: Line, doc: Some(Inner) }), | ||
64 | ("/**", CommentKind { shape: Block, doc: Some(Outer) }), | ||
65 | ("/*!", CommentKind { shape: Block, doc: Some(Inner) }), | ||
66 | ("//", CommentKind { shape: Line, doc: None }), | ||
67 | ("/*", CommentKind { shape: Block, doc: None }), | ||
68 | ] | ||
69 | }; | ||
70 | |||
71 | fn kind_by_prefix(text: &str) -> CommentKind { | ||
72 | if text == "/**/" { | ||
73 | return CommentKind { shape: CommentShape::Block, doc: None }; | ||
74 | } | ||
75 | for (prefix, kind) in COMMENT_PREFIX_TO_KIND.iter() { | ||
76 | if text.starts_with(prefix) { | ||
77 | return *kind; | ||
78 | } | ||
79 | } | ||
80 | panic!("bad comment text: {:?}", text) | ||
81 | } | ||
82 | |||
83 | impl Whitespace { | ||
84 | pub fn spans_multiple_lines(&self) -> bool { | ||
85 | let text = self.text(); | ||
86 | text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n')) | ||
87 | } | ||
88 | } | ||
89 | |||
90 | pub struct QuoteOffsets { | ||
91 | pub quotes: (TextRange, TextRange), | ||
92 | pub contents: TextRange, | ||
93 | } | ||
94 | |||
95 | impl QuoteOffsets { | ||
96 | fn new(literal: &str) -> Option<QuoteOffsets> { | ||
97 | let left_quote = literal.find('"')?; | ||
98 | let right_quote = literal.rfind('"')?; | ||
99 | if left_quote == right_quote { | ||
100 | // `literal` only contains one quote | ||
101 | return None; | ||
102 | } | ||
103 | |||
104 | let start = TextSize::from(0); | ||
105 | let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"'); | ||
106 | let right_quote = TextSize::try_from(right_quote).unwrap(); | ||
107 | let end = TextSize::of(literal); | ||
108 | |||
109 | let res = QuoteOffsets { | ||
110 | quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)), | ||
111 | contents: TextRange::new(left_quote, right_quote), | ||
112 | }; | ||
113 | Some(res) | ||
114 | } | ||
115 | } | ||
116 | |||
117 | pub trait HasQuotes: AstToken { | ||
118 | fn quote_offsets(&self) -> Option<QuoteOffsets> { | ||
119 | let text = self.text().as_str(); | ||
120 | let offsets = QuoteOffsets::new(text)?; | ||
121 | let o = self.syntax().text_range().start(); | ||
122 | let offsets = QuoteOffsets { | ||
123 | quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o), | ||
124 | contents: offsets.contents + o, | ||
125 | }; | ||
126 | Some(offsets) | ||
127 | } | ||
128 | fn open_quote_text_range(&self) -> Option<TextRange> { | ||
129 | self.quote_offsets().map(|it| it.quotes.0) | ||
130 | } | ||
131 | |||
132 | fn close_quote_text_range(&self) -> Option<TextRange> { | ||
133 | self.quote_offsets().map(|it| it.quotes.1) | ||
134 | } | ||
135 | |||
136 | fn text_range_between_quotes(&self) -> Option<TextRange> { | ||
137 | self.quote_offsets().map(|it| it.contents) | ||
138 | } | ||
139 | } | ||
140 | |||
141 | impl HasQuotes for String {} | ||
142 | impl HasQuotes for RawString {} | ||
143 | |||
144 | pub trait HasStringValue: HasQuotes { | ||
145 | fn value(&self) -> Option<Cow<'_, str>>; | ||
146 | } | ||
147 | |||
148 | impl HasStringValue for String { | ||
149 | fn value(&self) -> Option<Cow<'_, str>> { | ||
150 | let text = self.text().as_str(); | ||
151 | let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; | ||
152 | |||
153 | let mut buf = std::string::String::with_capacity(text.len()); | ||
154 | let mut has_error = false; | ||
155 | unescape_literal(text, Mode::Str, &mut |_, unescaped_char| match unescaped_char { | ||
156 | Ok(c) => buf.push(c), | ||
157 | Err(_) => has_error = true, | ||
158 | }); | ||
159 | |||
160 | if has_error { | ||
161 | return None; | ||
162 | } | ||
163 | // FIXME: don't actually allocate for borrowed case | ||
164 | let res = if buf == text { Cow::Borrowed(text) } else { Cow::Owned(buf) }; | ||
165 | Some(res) | ||
166 | } | ||
167 | } | ||
168 | |||
169 | impl HasStringValue for RawString { | ||
170 | fn value(&self) -> Option<Cow<'_, str>> { | ||
171 | let text = self.text().as_str(); | ||
172 | let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; | ||
173 | Some(Cow::Borrowed(text)) | ||
174 | } | ||
175 | } | ||
176 | |||
177 | impl RawString { | ||
178 | pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> { | ||
179 | let contents_range = self.text_range_between_quotes()?; | ||
180 | assert!(TextRange::up_to(contents_range.len()).contains_range(range)); | ||
181 | Some(range + contents_range.start()) | ||
182 | } | ||
183 | } | ||
184 | |||
185 | #[derive(Debug)] | ||
186 | pub enum FormatSpecifier { | ||
187 | Open, | ||
188 | Close, | ||
189 | Integer, | ||
190 | Identifier, | ||
191 | Colon, | ||
192 | Fill, | ||
193 | Align, | ||
194 | Sign, | ||
195 | NumberSign, | ||
196 | Zero, | ||
197 | DollarSign, | ||
198 | Dot, | ||
199 | Asterisk, | ||
200 | QuestionMark, | ||
201 | } | ||
202 | |||
203 | pub trait HasFormatSpecifier: AstToken { | ||
204 | fn char_ranges( | ||
205 | &self, | ||
206 | ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>; | ||
207 | |||
208 | fn lex_format_specifier<F>(&self, mut callback: F) | ||
209 | where | ||
210 | F: FnMut(TextRange, FormatSpecifier), | ||
211 | { | ||
212 | let char_ranges = if let Some(char_ranges) = self.char_ranges() { | ||
213 | char_ranges | ||
214 | } else { | ||
215 | return; | ||
216 | }; | ||
217 | let mut chars = char_ranges.iter().peekable(); | ||
218 | |||
219 | while let Some((range, first_char)) = chars.next() { | ||
220 | match first_char { | ||
221 | Ok('{') => { | ||
222 | // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax | ||
223 | if let Some((_, Ok('{'))) = chars.peek() { | ||
224 | // Escaped format specifier, `{{` | ||
225 | chars.next(); | ||
226 | continue; | ||
227 | } | ||
228 | |||
229 | callback(*range, FormatSpecifier::Open); | ||
230 | |||
231 | // check for integer/identifier | ||
232 | match chars | ||
233 | .peek() | ||
234 | .and_then(|next| next.1.as_ref().ok()) | ||
235 | .copied() | ||
236 | .unwrap_or_default() | ||
237 | { | ||
238 | '0'..='9' => { | ||
239 | // integer | ||
240 | read_integer(&mut chars, &mut callback); | ||
241 | } | ||
242 | c if c == '_' || c.is_alphabetic() => { | ||
243 | // identifier | ||
244 | read_identifier(&mut chars, &mut callback); | ||
245 | } | ||
246 | _ => {} | ||
247 | } | ||
248 | |||
249 | if let Some((_, Ok(':'))) = chars.peek() { | ||
250 | skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback); | ||
251 | |||
252 | // check for fill/align | ||
253 | let mut cloned = chars.clone().take(2); | ||
254 | let first = cloned | ||
255 | .next() | ||
256 | .and_then(|next| next.1.as_ref().ok()) | ||
257 | .copied() | ||
258 | .unwrap_or_default(); | ||
259 | let second = cloned | ||
260 | .next() | ||
261 | .and_then(|next| next.1.as_ref().ok()) | ||
262 | .copied() | ||
263 | .unwrap_or_default(); | ||
264 | match second { | ||
265 | '<' | '^' | '>' => { | ||
266 | // alignment specifier, first char specifies fillment | ||
267 | skip_char_and_emit( | ||
268 | &mut chars, | ||
269 | FormatSpecifier::Fill, | ||
270 | &mut callback, | ||
271 | ); | ||
272 | skip_char_and_emit( | ||
273 | &mut chars, | ||
274 | FormatSpecifier::Align, | ||
275 | &mut callback, | ||
276 | ); | ||
277 | } | ||
278 | _ => match first { | ||
279 | '<' | '^' | '>' => { | ||
280 | skip_char_and_emit( | ||
281 | &mut chars, | ||
282 | FormatSpecifier::Align, | ||
283 | &mut callback, | ||
284 | ); | ||
285 | } | ||
286 | _ => {} | ||
287 | }, | ||
288 | } | ||
289 | |||
290 | // check for sign | ||
291 | match chars | ||
292 | .peek() | ||
293 | .and_then(|next| next.1.as_ref().ok()) | ||
294 | .copied() | ||
295 | .unwrap_or_default() | ||
296 | { | ||
297 | '+' | '-' => { | ||
298 | skip_char_and_emit( | ||
299 | &mut chars, | ||
300 | FormatSpecifier::Sign, | ||
301 | &mut callback, | ||
302 | ); | ||
303 | } | ||
304 | _ => {} | ||
305 | } | ||
306 | |||
307 | // check for `#` | ||
308 | if let Some((_, Ok('#'))) = chars.peek() { | ||
309 | skip_char_and_emit( | ||
310 | &mut chars, | ||
311 | FormatSpecifier::NumberSign, | ||
312 | &mut callback, | ||
313 | ); | ||
314 | } | ||
315 | |||
316 | // check for `0` | ||
317 | let mut cloned = chars.clone().take(2); | ||
318 | let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied(); | ||
319 | let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied(); | ||
320 | |||
321 | if first == Some('0') && second != Some('$') { | ||
322 | skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback); | ||
323 | } | ||
324 | |||
325 | // width | ||
326 | match chars | ||
327 | .peek() | ||
328 | .and_then(|next| next.1.as_ref().ok()) | ||
329 | .copied() | ||
330 | .unwrap_or_default() | ||
331 | { | ||
332 | '0'..='9' => { | ||
333 | read_integer(&mut chars, &mut callback); | ||
334 | if let Some((_, Ok('$'))) = chars.peek() { | ||
335 | skip_char_and_emit( | ||
336 | &mut chars, | ||
337 | FormatSpecifier::DollarSign, | ||
338 | &mut callback, | ||
339 | ); | ||
340 | } | ||
341 | } | ||
342 | c if c == '_' || c.is_alphabetic() => { | ||
343 | read_identifier(&mut chars, &mut callback); | ||
344 | // can be either width (indicated by dollar sign, or type in which case | ||
345 | // the next sign has to be `}`) | ||
346 | let next = | ||
347 | chars.peek().and_then(|next| next.1.as_ref().ok()).copied(); | ||
348 | match next { | ||
349 | Some('$') => skip_char_and_emit( | ||
350 | &mut chars, | ||
351 | FormatSpecifier::DollarSign, | ||
352 | &mut callback, | ||
353 | ), | ||
354 | Some('}') => { | ||
355 | skip_char_and_emit( | ||
356 | &mut chars, | ||
357 | FormatSpecifier::Close, | ||
358 | &mut callback, | ||
359 | ); | ||
360 | continue; | ||
361 | } | ||
362 | _ => continue, | ||
363 | }; | ||
364 | } | ||
365 | _ => {} | ||
366 | } | ||
367 | |||
368 | // precision | ||
369 | if let Some((_, Ok('.'))) = chars.peek() { | ||
370 | skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback); | ||
371 | |||
372 | match chars | ||
373 | .peek() | ||
374 | .and_then(|next| next.1.as_ref().ok()) | ||
375 | .copied() | ||
376 | .unwrap_or_default() | ||
377 | { | ||
378 | '*' => { | ||
379 | skip_char_and_emit( | ||
380 | &mut chars, | ||
381 | FormatSpecifier::Asterisk, | ||
382 | &mut callback, | ||
383 | ); | ||
384 | } | ||
385 | '0'..='9' => { | ||
386 | read_integer(&mut chars, &mut callback); | ||
387 | if let Some((_, Ok('$'))) = chars.peek() { | ||
388 | skip_char_and_emit( | ||
389 | &mut chars, | ||
390 | FormatSpecifier::DollarSign, | ||
391 | &mut callback, | ||
392 | ); | ||
393 | } | ||
394 | } | ||
395 | c if c == '_' || c.is_alphabetic() => { | ||
396 | read_identifier(&mut chars, &mut callback); | ||
397 | if chars.peek().and_then(|next| next.1.as_ref().ok()).copied() | ||
398 | != Some('$') | ||
399 | { | ||
400 | continue; | ||
401 | } | ||
402 | skip_char_and_emit( | ||
403 | &mut chars, | ||
404 | FormatSpecifier::DollarSign, | ||
405 | &mut callback, | ||
406 | ); | ||
407 | } | ||
408 | _ => { | ||
409 | continue; | ||
410 | } | ||
411 | } | ||
412 | } | ||
413 | |||
414 | // type | ||
415 | match chars | ||
416 | .peek() | ||
417 | .and_then(|next| next.1.as_ref().ok()) | ||
418 | .copied() | ||
419 | .unwrap_or_default() | ||
420 | { | ||
421 | '?' => { | ||
422 | skip_char_and_emit( | ||
423 | &mut chars, | ||
424 | FormatSpecifier::QuestionMark, | ||
425 | &mut callback, | ||
426 | ); | ||
427 | } | ||
428 | c if c == '_' || c.is_alphabetic() => { | ||
429 | read_identifier(&mut chars, &mut callback); | ||
430 | } | ||
431 | _ => {} | ||
432 | } | ||
433 | } | ||
434 | |||
435 | if let Some((_, Ok('}'))) = chars.peek() { | ||
436 | skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback); | ||
437 | } else { | ||
438 | continue; | ||
439 | } | ||
440 | } | ||
441 | _ => { | ||
442 | while let Some((_, Ok(next_char))) = chars.peek() { | ||
443 | match next_char { | ||
444 | '{' => break, | ||
445 | _ => {} | ||
446 | } | ||
447 | chars.next(); | ||
448 | } | ||
449 | } | ||
450 | }; | ||
451 | } | ||
452 | |||
453 | fn skip_char_and_emit<'a, I, F>( | ||
454 | chars: &mut std::iter::Peekable<I>, | ||
455 | emit: FormatSpecifier, | ||
456 | callback: &mut F, | ||
457 | ) where | ||
458 | I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>, | ||
459 | F: FnMut(TextRange, FormatSpecifier), | ||
460 | { | ||
461 | let (range, _) = chars.next().unwrap(); | ||
462 | callback(*range, emit); | ||
463 | } | ||
464 | |||
465 | fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F) | ||
466 | where | ||
467 | I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>, | ||
468 | F: FnMut(TextRange, FormatSpecifier), | ||
469 | { | ||
470 | let (mut range, c) = chars.next().unwrap(); | ||
471 | assert!(c.as_ref().unwrap().is_ascii_digit()); | ||
472 | while let Some((r, Ok(next_char))) = chars.peek() { | ||
473 | if next_char.is_ascii_digit() { | ||
474 | chars.next(); | ||
475 | range = range.cover(*r); | ||
476 | } else { | ||
477 | break; | ||
478 | } | ||
479 | } | ||
480 | callback(range, FormatSpecifier::Integer); | ||
481 | } | ||
482 | |||
483 | fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F) | ||
484 | where | ||
485 | I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>, | ||
486 | F: FnMut(TextRange, FormatSpecifier), | ||
487 | { | ||
488 | let (mut range, c) = chars.next().unwrap(); | ||
489 | assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_'); | ||
490 | while let Some((r, Ok(next_char))) = chars.peek() { | ||
491 | if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() { | ||
492 | chars.next(); | ||
493 | range = range.cover(*r); | ||
494 | } else { | ||
495 | break; | ||
496 | } | ||
497 | } | ||
498 | callback(range, FormatSpecifier::Identifier); | ||
499 | } | ||
500 | } | ||
501 | } | ||
502 | |||
503 | impl HasFormatSpecifier for String { | ||
504 | fn char_ranges( | ||
505 | &self, | ||
506 | ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> { | ||
507 | let text = self.text().as_str(); | ||
508 | let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; | ||
509 | let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start(); | ||
510 | |||
511 | let mut res = Vec::with_capacity(text.len()); | ||
512 | unescape_literal(text, Mode::Str, &mut |range, unescaped_char| { | ||
513 | res.push(( | ||
514 | TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap()) | ||
515 | + offset, | ||
516 | unescaped_char, | ||
517 | )) | ||
518 | }); | ||
519 | |||
520 | Some(res) | ||
521 | } | ||
522 | } | ||
523 | |||
524 | impl HasFormatSpecifier for RawString { | ||
525 | fn char_ranges( | ||
526 | &self, | ||
527 | ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> { | ||
528 | let text = self.text().as_str(); | ||
529 | let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()]; | ||
530 | let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start(); | ||
531 | |||
532 | let mut res = Vec::with_capacity(text.len()); | ||
533 | for (idx, c) in text.char_indices() { | ||
534 | res.push((TextRange::at(idx.try_into().unwrap(), TextSize::of(c)) + offset, Ok(c))); | ||
535 | } | ||
536 | Some(res) | ||
537 | } | ||
538 | } | ||