aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_syntax/src/string_lexing/parser.rs
blob: 7469eb903ede5fb21088f112c2f03a7edf0e6375 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
use rowan::{TextRange, TextUnit};

use self::StringComponentKind::*;

pub struct Parser<'a> {
    pub(super) quote: u8,
    pub(super) src: &'a str,
    pub(super) pos: usize,
}

impl<'a> Parser<'a> {
    pub fn new(src: &'a str, quote: u8) -> Parser<'a> {
        Parser { quote, src, pos: 0 }
    }

    // Utility methods

    pub fn peek(&self) -> Option<char> {
        if self.pos == self.src.len() {
            return None;
        }

        self.src[self.pos..].chars().next()
    }

    pub fn advance(&mut self) -> char {
        let next = self.peek().expect("cannot advance if end of input is reached");
        self.pos += next.len_utf8();
        next
    }

    pub fn skip_whitespace(&mut self) {
        while self.peek().map(|c| c.is_whitespace()) == Some(true) {
            self.advance();
        }
    }

    pub fn get_pos(&self) -> TextUnit {
        (self.pos as u32).into()
    }

    // Char parsing methods

    fn parse_unicode_escape(&mut self, start: TextUnit) -> StringComponent {
        match self.peek() {
            Some('{') => {
                self.advance();

                // Parse anything until we reach `}`
                while let Some(next) = self.peek() {
                    self.advance();
                    if next == '}' {
                        break;
                    }
                }

                let end = self.get_pos();
                StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
            }
            Some(_) | None => {
                let end = self.get_pos();
                StringComponent::new(TextRange::from_to(start, end), UnicodeEscape)
            }
        }
    }

    fn parse_ascii_code_escape(&mut self, start: TextUnit) -> StringComponent {
        let code_start = self.get_pos();
        while let Some(next) = self.peek() {
            if next == '\'' || (self.get_pos() - code_start == 2.into()) {
                break;
            }

            self.advance();
        }

        let end = self.get_pos();
        StringComponent::new(TextRange::from_to(start, end), AsciiCodeEscape)
    }

    fn parse_escape(&mut self, start: TextUnit) -> StringComponent {
        if self.peek().is_none() {
            return StringComponent::new(TextRange::from_to(start, self.get_pos()), AsciiEscape);
        }

        let next = self.advance();
        let end = self.get_pos();
        let range = TextRange::from_to(start, end);
        match next {
            'x' => self.parse_ascii_code_escape(start),
            'u' => self.parse_unicode_escape(start),
            _ => StringComponent::new(range, AsciiEscape),
        }
    }

    pub fn parse_ignore_newline(&mut self, start: TextUnit) -> Option<StringComponent> {
        // In string literals, when a `\` occurs immediately before the newline, the `\`,
        // the newline, and all whitespace at the beginning of the next line are ignored
        match self.peek() {
            Some('\n') | Some('\r') => {
                self.skip_whitespace();
                Some(StringComponent::new(
                    TextRange::from_to(start, self.get_pos()),
                    StringComponentKind::IgnoreNewline,
                ))
            }
            _ => None,
        }
    }

    pub fn parse_component(&mut self) -> Option<StringComponent> {
        let next = self.peek()?;

        // Ignore string close
        if next == self.quote as char {
            return None;
        }

        let start = self.get_pos();
        self.advance();

        if next == '\\' {
            // Strings can use `\` to ignore newlines, so we first try to parse one of those
            // before falling back to parsing char escapes
            if self.quote == b'"' {
                if let Some(component) = self.parse_ignore_newline(start) {
                    return Some(component);
                }
            }

            Some(self.parse_escape(start))
        } else {
            let end = self.get_pos();
            Some(StringComponent::new(TextRange::from_to(start, end), CodePoint))
        }
    }

    pub fn parse_suffix(&mut self) -> Option<TextRange> {
        let start = self.get_pos();
        let _ = self.peek()?;
        while let Some(_) = self.peek() {
            self.advance();
        }
        let end = self.get_pos();
        Some(TextRange::from_to(start, end))
    }
}

#[derive(Debug, Eq, PartialEq, Clone)]
pub struct StringComponent {
    pub range: TextRange,
    pub kind: StringComponentKind,
}

impl StringComponent {
    fn new(range: TextRange, kind: StringComponentKind) -> StringComponent {
        StringComponent { range, kind }
    }
}

#[derive(Debug, Eq, PartialEq, Clone)]
pub enum StringComponentKind {
    IgnoreNewline,
    CodePoint,
    AsciiEscape,
    AsciiCodeEscape,
    UnicodeEscape,
}