aboutsummaryrefslogtreecommitdiff
path: root/crates/syntax/src/ast/token_ext.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/syntax/src/ast/token_ext.rs')
-rw-r--r--crates/syntax/src/ast/token_ext.rs538
1 files changed, 538 insertions, 0 deletions
diff --git a/crates/syntax/src/ast/token_ext.rs b/crates/syntax/src/ast/token_ext.rs
new file mode 100644
index 000000000..c5ef92733
--- /dev/null
+++ b/crates/syntax/src/ast/token_ext.rs
@@ -0,0 +1,538 @@
1//! There are many AstNodes, but only a few tokens, so we hand-write them here.
2
3use std::{
4 borrow::Cow,
5 convert::{TryFrom, TryInto},
6};
7
8use rustc_lexer::unescape::{unescape_literal, Mode};
9
10use crate::{
11 ast::{AstToken, Comment, RawString, String, Whitespace},
12 TextRange, TextSize,
13};
14
15impl Comment {
16 pub fn kind(&self) -> CommentKind {
17 kind_by_prefix(self.text())
18 }
19
20 pub fn prefix(&self) -> &'static str {
21 for (prefix, k) in COMMENT_PREFIX_TO_KIND.iter() {
22 if *k == self.kind() && self.text().starts_with(prefix) {
23 return prefix;
24 }
25 }
26 unreachable!()
27 }
28}
29
30#[derive(Debug, PartialEq, Eq, Clone, Copy)]
31pub struct CommentKind {
32 pub shape: CommentShape,
33 pub doc: Option<CommentPlacement>,
34}
35
36#[derive(Debug, PartialEq, Eq, Clone, Copy)]
37pub enum CommentShape {
38 Line,
39 Block,
40}
41
42impl CommentShape {
43 pub fn is_line(self) -> bool {
44 self == CommentShape::Line
45 }
46
47 pub fn is_block(self) -> bool {
48 self == CommentShape::Block
49 }
50}
51
52#[derive(Debug, PartialEq, Eq, Clone, Copy)]
53pub enum CommentPlacement {
54 Inner,
55 Outer,
56}
57
58const COMMENT_PREFIX_TO_KIND: &[(&str, CommentKind)] = {
59 use {CommentPlacement::*, CommentShape::*};
60 &[
61 ("////", CommentKind { shape: Line, doc: None }),
62 ("///", CommentKind { shape: Line, doc: Some(Outer) }),
63 ("//!", CommentKind { shape: Line, doc: Some(Inner) }),
64 ("/**", CommentKind { shape: Block, doc: Some(Outer) }),
65 ("/*!", CommentKind { shape: Block, doc: Some(Inner) }),
66 ("//", CommentKind { shape: Line, doc: None }),
67 ("/*", CommentKind { shape: Block, doc: None }),
68 ]
69};
70
71fn kind_by_prefix(text: &str) -> CommentKind {
72 if text == "/**/" {
73 return CommentKind { shape: CommentShape::Block, doc: None };
74 }
75 for (prefix, kind) in COMMENT_PREFIX_TO_KIND.iter() {
76 if text.starts_with(prefix) {
77 return *kind;
78 }
79 }
80 panic!("bad comment text: {:?}", text)
81}
82
83impl Whitespace {
84 pub fn spans_multiple_lines(&self) -> bool {
85 let text = self.text();
86 text.find('\n').map_or(false, |idx| text[idx + 1..].contains('\n'))
87 }
88}
89
90pub struct QuoteOffsets {
91 pub quotes: (TextRange, TextRange),
92 pub contents: TextRange,
93}
94
95impl QuoteOffsets {
96 fn new(literal: &str) -> Option<QuoteOffsets> {
97 let left_quote = literal.find('"')?;
98 let right_quote = literal.rfind('"')?;
99 if left_quote == right_quote {
100 // `literal` only contains one quote
101 return None;
102 }
103
104 let start = TextSize::from(0);
105 let left_quote = TextSize::try_from(left_quote).unwrap() + TextSize::of('"');
106 let right_quote = TextSize::try_from(right_quote).unwrap();
107 let end = TextSize::of(literal);
108
109 let res = QuoteOffsets {
110 quotes: (TextRange::new(start, left_quote), TextRange::new(right_quote, end)),
111 contents: TextRange::new(left_quote, right_quote),
112 };
113 Some(res)
114 }
115}
116
117pub trait HasQuotes: AstToken {
118 fn quote_offsets(&self) -> Option<QuoteOffsets> {
119 let text = self.text().as_str();
120 let offsets = QuoteOffsets::new(text)?;
121 let o = self.syntax().text_range().start();
122 let offsets = QuoteOffsets {
123 quotes: (offsets.quotes.0 + o, offsets.quotes.1 + o),
124 contents: offsets.contents + o,
125 };
126 Some(offsets)
127 }
128 fn open_quote_text_range(&self) -> Option<TextRange> {
129 self.quote_offsets().map(|it| it.quotes.0)
130 }
131
132 fn close_quote_text_range(&self) -> Option<TextRange> {
133 self.quote_offsets().map(|it| it.quotes.1)
134 }
135
136 fn text_range_between_quotes(&self) -> Option<TextRange> {
137 self.quote_offsets().map(|it| it.contents)
138 }
139}
140
141impl HasQuotes for String {}
142impl HasQuotes for RawString {}
143
144pub trait HasStringValue: HasQuotes {
145 fn value(&self) -> Option<Cow<'_, str>>;
146}
147
148impl HasStringValue for String {
149 fn value(&self) -> Option<Cow<'_, str>> {
150 let text = self.text().as_str();
151 let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
152
153 let mut buf = std::string::String::with_capacity(text.len());
154 let mut has_error = false;
155 unescape_literal(text, Mode::Str, &mut |_, unescaped_char| match unescaped_char {
156 Ok(c) => buf.push(c),
157 Err(_) => has_error = true,
158 });
159
160 if has_error {
161 return None;
162 }
163 // FIXME: don't actually allocate for borrowed case
164 let res = if buf == text { Cow::Borrowed(text) } else { Cow::Owned(buf) };
165 Some(res)
166 }
167}
168
169impl HasStringValue for RawString {
170 fn value(&self) -> Option<Cow<'_, str>> {
171 let text = self.text().as_str();
172 let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
173 Some(Cow::Borrowed(text))
174 }
175}
176
177impl RawString {
178 pub fn map_range_up(&self, range: TextRange) -> Option<TextRange> {
179 let contents_range = self.text_range_between_quotes()?;
180 assert!(TextRange::up_to(contents_range.len()).contains_range(range));
181 Some(range + contents_range.start())
182 }
183}
184
185#[derive(Debug)]
186pub enum FormatSpecifier {
187 Open,
188 Close,
189 Integer,
190 Identifier,
191 Colon,
192 Fill,
193 Align,
194 Sign,
195 NumberSign,
196 Zero,
197 DollarSign,
198 Dot,
199 Asterisk,
200 QuestionMark,
201}
202
203pub trait HasFormatSpecifier: AstToken {
204 fn char_ranges(
205 &self,
206 ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>>;
207
208 fn lex_format_specifier<F>(&self, mut callback: F)
209 where
210 F: FnMut(TextRange, FormatSpecifier),
211 {
212 let char_ranges = if let Some(char_ranges) = self.char_ranges() {
213 char_ranges
214 } else {
215 return;
216 };
217 let mut chars = char_ranges.iter().peekable();
218
219 while let Some((range, first_char)) = chars.next() {
220 match first_char {
221 Ok('{') => {
222 // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax
223 if let Some((_, Ok('{'))) = chars.peek() {
224 // Escaped format specifier, `{{`
225 chars.next();
226 continue;
227 }
228
229 callback(*range, FormatSpecifier::Open);
230
231 // check for integer/identifier
232 match chars
233 .peek()
234 .and_then(|next| next.1.as_ref().ok())
235 .copied()
236 .unwrap_or_default()
237 {
238 '0'..='9' => {
239 // integer
240 read_integer(&mut chars, &mut callback);
241 }
242 c if c == '_' || c.is_alphabetic() => {
243 // identifier
244 read_identifier(&mut chars, &mut callback);
245 }
246 _ => {}
247 }
248
249 if let Some((_, Ok(':'))) = chars.peek() {
250 skip_char_and_emit(&mut chars, FormatSpecifier::Colon, &mut callback);
251
252 // check for fill/align
253 let mut cloned = chars.clone().take(2);
254 let first = cloned
255 .next()
256 .and_then(|next| next.1.as_ref().ok())
257 .copied()
258 .unwrap_or_default();
259 let second = cloned
260 .next()
261 .and_then(|next| next.1.as_ref().ok())
262 .copied()
263 .unwrap_or_default();
264 match second {
265 '<' | '^' | '>' => {
266 // alignment specifier, first char specifies fillment
267 skip_char_and_emit(
268 &mut chars,
269 FormatSpecifier::Fill,
270 &mut callback,
271 );
272 skip_char_and_emit(
273 &mut chars,
274 FormatSpecifier::Align,
275 &mut callback,
276 );
277 }
278 _ => match first {
279 '<' | '^' | '>' => {
280 skip_char_and_emit(
281 &mut chars,
282 FormatSpecifier::Align,
283 &mut callback,
284 );
285 }
286 _ => {}
287 },
288 }
289
290 // check for sign
291 match chars
292 .peek()
293 .and_then(|next| next.1.as_ref().ok())
294 .copied()
295 .unwrap_or_default()
296 {
297 '+' | '-' => {
298 skip_char_and_emit(
299 &mut chars,
300 FormatSpecifier::Sign,
301 &mut callback,
302 );
303 }
304 _ => {}
305 }
306
307 // check for `#`
308 if let Some((_, Ok('#'))) = chars.peek() {
309 skip_char_and_emit(
310 &mut chars,
311 FormatSpecifier::NumberSign,
312 &mut callback,
313 );
314 }
315
316 // check for `0`
317 let mut cloned = chars.clone().take(2);
318 let first = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
319 let second = cloned.next().and_then(|next| next.1.as_ref().ok()).copied();
320
321 if first == Some('0') && second != Some('$') {
322 skip_char_and_emit(&mut chars, FormatSpecifier::Zero, &mut callback);
323 }
324
325 // width
326 match chars
327 .peek()
328 .and_then(|next| next.1.as_ref().ok())
329 .copied()
330 .unwrap_or_default()
331 {
332 '0'..='9' => {
333 read_integer(&mut chars, &mut callback);
334 if let Some((_, Ok('$'))) = chars.peek() {
335 skip_char_and_emit(
336 &mut chars,
337 FormatSpecifier::DollarSign,
338 &mut callback,
339 );
340 }
341 }
342 c if c == '_' || c.is_alphabetic() => {
343 read_identifier(&mut chars, &mut callback);
344 // can be either width (indicated by dollar sign, or type in which case
345 // the next sign has to be `}`)
346 let next =
347 chars.peek().and_then(|next| next.1.as_ref().ok()).copied();
348 match next {
349 Some('$') => skip_char_and_emit(
350 &mut chars,
351 FormatSpecifier::DollarSign,
352 &mut callback,
353 ),
354 Some('}') => {
355 skip_char_and_emit(
356 &mut chars,
357 FormatSpecifier::Close,
358 &mut callback,
359 );
360 continue;
361 }
362 _ => continue,
363 };
364 }
365 _ => {}
366 }
367
368 // precision
369 if let Some((_, Ok('.'))) = chars.peek() {
370 skip_char_and_emit(&mut chars, FormatSpecifier::Dot, &mut callback);
371
372 match chars
373 .peek()
374 .and_then(|next| next.1.as_ref().ok())
375 .copied()
376 .unwrap_or_default()
377 {
378 '*' => {
379 skip_char_and_emit(
380 &mut chars,
381 FormatSpecifier::Asterisk,
382 &mut callback,
383 );
384 }
385 '0'..='9' => {
386 read_integer(&mut chars, &mut callback);
387 if let Some((_, Ok('$'))) = chars.peek() {
388 skip_char_and_emit(
389 &mut chars,
390 FormatSpecifier::DollarSign,
391 &mut callback,
392 );
393 }
394 }
395 c if c == '_' || c.is_alphabetic() => {
396 read_identifier(&mut chars, &mut callback);
397 if chars.peek().and_then(|next| next.1.as_ref().ok()).copied()
398 != Some('$')
399 {
400 continue;
401 }
402 skip_char_and_emit(
403 &mut chars,
404 FormatSpecifier::DollarSign,
405 &mut callback,
406 );
407 }
408 _ => {
409 continue;
410 }
411 }
412 }
413
414 // type
415 match chars
416 .peek()
417 .and_then(|next| next.1.as_ref().ok())
418 .copied()
419 .unwrap_or_default()
420 {
421 '?' => {
422 skip_char_and_emit(
423 &mut chars,
424 FormatSpecifier::QuestionMark,
425 &mut callback,
426 );
427 }
428 c if c == '_' || c.is_alphabetic() => {
429 read_identifier(&mut chars, &mut callback);
430 }
431 _ => {}
432 }
433 }
434
435 if let Some((_, Ok('}'))) = chars.peek() {
436 skip_char_and_emit(&mut chars, FormatSpecifier::Close, &mut callback);
437 } else {
438 continue;
439 }
440 }
441 _ => {
442 while let Some((_, Ok(next_char))) = chars.peek() {
443 match next_char {
444 '{' => break,
445 _ => {}
446 }
447 chars.next();
448 }
449 }
450 };
451 }
452
453 fn skip_char_and_emit<'a, I, F>(
454 chars: &mut std::iter::Peekable<I>,
455 emit: FormatSpecifier,
456 callback: &mut F,
457 ) where
458 I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
459 F: FnMut(TextRange, FormatSpecifier),
460 {
461 let (range, _) = chars.next().unwrap();
462 callback(*range, emit);
463 }
464
465 fn read_integer<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
466 where
467 I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
468 F: FnMut(TextRange, FormatSpecifier),
469 {
470 let (mut range, c) = chars.next().unwrap();
471 assert!(c.as_ref().unwrap().is_ascii_digit());
472 while let Some((r, Ok(next_char))) = chars.peek() {
473 if next_char.is_ascii_digit() {
474 chars.next();
475 range = range.cover(*r);
476 } else {
477 break;
478 }
479 }
480 callback(range, FormatSpecifier::Integer);
481 }
482
483 fn read_identifier<'a, I, F>(chars: &mut std::iter::Peekable<I>, callback: &mut F)
484 where
485 I: Iterator<Item = &'a (TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>,
486 F: FnMut(TextRange, FormatSpecifier),
487 {
488 let (mut range, c) = chars.next().unwrap();
489 assert!(c.as_ref().unwrap().is_alphabetic() || *c.as_ref().unwrap() == '_');
490 while let Some((r, Ok(next_char))) = chars.peek() {
491 if *next_char == '_' || next_char.is_ascii_digit() || next_char.is_alphabetic() {
492 chars.next();
493 range = range.cover(*r);
494 } else {
495 break;
496 }
497 }
498 callback(range, FormatSpecifier::Identifier);
499 }
500 }
501}
502
503impl HasFormatSpecifier for String {
504 fn char_ranges(
505 &self,
506 ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
507 let text = self.text().as_str();
508 let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
509 let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
510
511 let mut res = Vec::with_capacity(text.len());
512 unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
513 res.push((
514 TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap())
515 + offset,
516 unescaped_char,
517 ))
518 });
519
520 Some(res)
521 }
522}
523
524impl HasFormatSpecifier for RawString {
525 fn char_ranges(
526 &self,
527 ) -> Option<Vec<(TextRange, Result<char, rustc_lexer::unescape::EscapeError>)>> {
528 let text = self.text().as_str();
529 let text = &text[self.text_range_between_quotes()? - self.syntax().text_range().start()];
530 let offset = self.text_range_between_quotes()?.start() - self.syntax().text_range().start();
531
532 let mut res = Vec::with_capacity(text.len());
533 for (idx, c) in text.char_indices() {
534 res.push((TextRange::at(idx.try_into().unwrap(), TextSize::of(c)) + offset, Ok(c)));
535 }
536 Some(res)
537 }
538}