diff options
Diffstat (limited to 'crates/ra_editor/src')
-rw-r--r-- | crates/ra_editor/src/line_index.rs | 303 |
1 files changed, 197 insertions, 106 deletions
diff --git a/crates/ra_editor/src/line_index.rs b/crates/ra_editor/src/line_index.rs index 9abbb0d09..0b3a28cd4 100644 --- a/crates/ra_editor/src/line_index.rs +++ b/crates/ra_editor/src/line_index.rs | |||
@@ -1,43 +1,124 @@ | |||
1 | use crate::TextUnit; | 1 | use crate::TextUnit; |
2 | use rustc_hash::FxHashMap; | ||
2 | use superslice::Ext; | 3 | use superslice::Ext; |
3 | 4 | ||
4 | #[derive(Clone, Debug, Hash, PartialEq, Eq)] | 5 | #[derive(Clone, Debug, PartialEq, Eq)] |
5 | pub struct LineIndex { | 6 | pub struct LineIndex { |
6 | newlines: Vec<TextUnit>, | 7 | newlines: Vec<TextUnit>, |
8 | utf16_lines: FxHashMap<u32, Vec<Utf16Char>>, | ||
7 | } | 9 | } |
8 | 10 | ||
9 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] | 11 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] |
10 | pub struct LineCol { | 12 | pub struct LineCol { |
11 | pub line: u32, | 13 | pub line: u32, |
12 | pub col: TextUnit, | 14 | pub col: u32, |
15 | } | ||
16 | |||
17 | #[derive(Clone, Debug, Hash, PartialEq, Eq)] | ||
18 | struct Utf16Char { | ||
19 | start: TextUnit, | ||
20 | end: TextUnit, | ||
21 | } | ||
22 | |||
23 | impl Utf16Char { | ||
24 | fn len(&self) -> TextUnit { | ||
25 | self.end - self.start | ||
26 | } | ||
13 | } | 27 | } |
14 | 28 | ||
15 | impl LineIndex { | 29 | impl LineIndex { |
16 | pub fn new(text: &str) -> LineIndex { | 30 | pub fn new(text: &str) -> LineIndex { |
31 | let mut utf16_lines = FxHashMap::default(); | ||
32 | let mut utf16_chars = Vec::new(); | ||
33 | |||
17 | let mut newlines = vec![0.into()]; | 34 | let mut newlines = vec![0.into()]; |
18 | let mut curr = 0.into(); | 35 | let mut curr_row = 0.into(); |
36 | let mut curr_col = 0.into(); | ||
37 | let mut line = 0; | ||
19 | for c in text.chars() { | 38 | for c in text.chars() { |
20 | curr += TextUnit::of_char(c); | 39 | curr_row += TextUnit::of_char(c); |
21 | if c == '\n' { | 40 | if c == '\n' { |
22 | newlines.push(curr); | 41 | newlines.push(curr_row); |
42 | |||
43 | // Save any utf-16 characters seen in the previous line | ||
44 | if utf16_chars.len() > 0 { | ||
45 | utf16_lines.insert(line, utf16_chars); | ||
46 | utf16_chars = Vec::new(); | ||
47 | } | ||
48 | |||
49 | // Prepare for processing the next line | ||
50 | curr_col = 0.into(); | ||
51 | line += 1; | ||
52 | continue; | ||
23 | } | 53 | } |
54 | |||
55 | let char_len = TextUnit::of_char(c); | ||
56 | if char_len.to_usize() > 1 { | ||
57 | utf16_chars.push(Utf16Char { | ||
58 | start: curr_col, | ||
59 | end: curr_col + char_len, | ||
60 | }); | ||
61 | } | ||
62 | |||
63 | curr_col += char_len; | ||
64 | } | ||
65 | LineIndex { | ||
66 | newlines, | ||
67 | utf16_lines, | ||
24 | } | 68 | } |
25 | LineIndex { newlines } | ||
26 | } | 69 | } |
27 | 70 | ||
28 | pub fn line_col(&self, offset: TextUnit) -> LineCol { | 71 | pub fn line_col(&self, offset: TextUnit) -> LineCol { |
29 | let line = self.newlines.upper_bound(&offset) - 1; | 72 | let line = self.newlines.upper_bound(&offset) - 1; |
30 | let line_start_offset = self.newlines[line]; | 73 | let line_start_offset = self.newlines[line]; |
31 | let col = offset - line_start_offset; | 74 | let col = offset - line_start_offset; |
75 | |||
32 | LineCol { | 76 | LineCol { |
33 | line: line as u32, | 77 | line: line as u32, |
34 | col, | 78 | col: self.utf8_to_utf16_col(line as u32, col) as u32, |
35 | } | 79 | } |
36 | } | 80 | } |
37 | 81 | ||
38 | pub fn offset(&self, line_col: LineCol) -> TextUnit { | 82 | pub fn offset(&self, line_col: LineCol) -> TextUnit { |
39 | //TODO: return Result | 83 | //TODO: return Result |
40 | self.newlines[line_col.line as usize] + line_col.col | 84 | let col = self.utf16_to_utf8_col(line_col.line, line_col.col); |
85 | self.newlines[line_col.line as usize] + col | ||
86 | } | ||
87 | |||
88 | fn utf8_to_utf16_col(&self, line: u32, mut col: TextUnit) -> usize { | ||
89 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { | ||
90 | let mut correction = TextUnit::from_usize(0); | ||
91 | for c in utf16_chars { | ||
92 | if col >= c.end { | ||
93 | correction += c.len() - TextUnit::from_usize(1); | ||
94 | } else { | ||
95 | // From here on, all utf16 characters come *after* the character we are mapping, | ||
96 | // so we don't need to take them into account | ||
97 | break; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | col -= correction; | ||
102 | } | ||
103 | |||
104 | col.to_usize() | ||
105 | } | ||
106 | |||
107 | fn utf16_to_utf8_col(&self, line: u32, col: u32) -> TextUnit { | ||
108 | let mut col: TextUnit = col.into(); | ||
109 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { | ||
110 | for c in utf16_chars { | ||
111 | if col >= c.start { | ||
112 | col += c.len() - TextUnit::from_usize(1); | ||
113 | } else { | ||
114 | // From here on, all utf16 characters come *after* the character we are mapping, | ||
115 | // so we don't need to take them into account | ||
116 | break; | ||
117 | } | ||
118 | } | ||
119 | } | ||
120 | |||
121 | col | ||
41 | } | 122 | } |
42 | } | 123 | } |
43 | 124 | ||
@@ -45,105 +126,115 @@ impl LineIndex { | |||
45 | fn test_line_index() { | 126 | fn test_line_index() { |
46 | let text = "hello\nworld"; | 127 | let text = "hello\nworld"; |
47 | let index = LineIndex::new(text); | 128 | let index = LineIndex::new(text); |
48 | assert_eq!( | 129 | assert_eq!(index.line_col(0.into()), LineCol { line: 0, col: 0 }); |
49 | index.line_col(0.into()), | 130 | assert_eq!(index.line_col(1.into()), LineCol { line: 0, col: 1 }); |
50 | LineCol { | 131 | assert_eq!(index.line_col(5.into()), LineCol { line: 0, col: 5 }); |
51 | line: 0, | 132 | assert_eq!(index.line_col(6.into()), LineCol { line: 1, col: 0 }); |
52 | col: 0.into() | 133 | assert_eq!(index.line_col(7.into()), LineCol { line: 1, col: 1 }); |
53 | } | 134 | assert_eq!(index.line_col(8.into()), LineCol { line: 1, col: 2 }); |
54 | ); | 135 | assert_eq!(index.line_col(10.into()), LineCol { line: 1, col: 4 }); |
55 | assert_eq!( | 136 | assert_eq!(index.line_col(11.into()), LineCol { line: 1, col: 5 }); |
56 | index.line_col(1.into()), | 137 | assert_eq!(index.line_col(12.into()), LineCol { line: 1, col: 6 }); |
57 | LineCol { | ||
58 | line: 0, | ||
59 | col: 1.into() | ||
60 | } | ||
61 | ); | ||
62 | assert_eq!( | ||
63 | index.line_col(5.into()), | ||
64 | LineCol { | ||
65 | line: 0, | ||
66 | col: 5.into() | ||
67 | } | ||
68 | ); | ||
69 | assert_eq!( | ||
70 | index.line_col(6.into()), | ||
71 | LineCol { | ||
72 | line: 1, | ||
73 | col: 0.into() | ||
74 | } | ||
75 | ); | ||
76 | assert_eq!( | ||
77 | index.line_col(7.into()), | ||
78 | LineCol { | ||
79 | line: 1, | ||
80 | col: 1.into() | ||
81 | } | ||
82 | ); | ||
83 | assert_eq!( | ||
84 | index.line_col(8.into()), | ||
85 | LineCol { | ||
86 | line: 1, | ||
87 | col: 2.into() | ||
88 | } | ||
89 | ); | ||
90 | assert_eq!( | ||
91 | index.line_col(10.into()), | ||
92 | LineCol { | ||
93 | line: 1, | ||
94 | col: 4.into() | ||
95 | } | ||
96 | ); | ||
97 | assert_eq!( | ||
98 | index.line_col(11.into()), | ||
99 | LineCol { | ||
100 | line: 1, | ||
101 | col: 5.into() | ||
102 | } | ||
103 | ); | ||
104 | assert_eq!( | ||
105 | index.line_col(12.into()), | ||
106 | LineCol { | ||
107 | line: 1, | ||
108 | col: 6.into() | ||
109 | } | ||
110 | ); | ||
111 | 138 | ||
112 | let text = "\nhello\nworld"; | 139 | let text = "\nhello\nworld"; |
113 | let index = LineIndex::new(text); | 140 | let index = LineIndex::new(text); |
114 | assert_eq!( | 141 | assert_eq!(index.line_col(0.into()), LineCol { line: 0, col: 0 }); |
115 | index.line_col(0.into()), | 142 | assert_eq!(index.line_col(1.into()), LineCol { line: 1, col: 0 }); |
116 | LineCol { | 143 | assert_eq!(index.line_col(2.into()), LineCol { line: 1, col: 1 }); |
117 | line: 0, | 144 | assert_eq!(index.line_col(6.into()), LineCol { line: 1, col: 5 }); |
118 | col: 0.into() | 145 | assert_eq!(index.line_col(7.into()), LineCol { line: 2, col: 0 }); |
119 | } | 146 | } |
120 | ); | 147 | |
121 | assert_eq!( | 148 | #[cfg(test)] |
122 | index.line_col(1.into()), | 149 | mod test_utf8_utf16_conv { |
123 | LineCol { | 150 | use super::*; |
124 | line: 1, | 151 | |
125 | col: 0.into() | 152 | #[test] |
126 | } | 153 | fn test_char_len() { |
127 | ); | 154 | assert_eq!('メ'.len_utf8(), 3); |
128 | assert_eq!( | 155 | assert_eq!('メ'.len_utf16(), 1); |
129 | index.line_col(2.into()), | 156 | } |
130 | LineCol { | 157 | |
131 | line: 1, | 158 | #[test] |
132 | col: 1.into() | 159 | fn test_empty_index() { |
133 | } | 160 | let col_index = LineIndex::new( |
134 | ); | 161 | " |
135 | assert_eq!( | 162 | const C: char = 'x'; |
136 | index.line_col(6.into()), | 163 | ", |
137 | LineCol { | 164 | ); |
138 | line: 1, | 165 | assert_eq!(col_index.utf16_lines.len(), 0); |
139 | col: 5.into() | 166 | } |
140 | } | 167 | |
141 | ); | 168 | #[test] |
142 | assert_eq!( | 169 | fn test_single_char() { |
143 | index.line_col(7.into()), | 170 | let col_index = LineIndex::new( |
144 | LineCol { | 171 | " |
145 | line: 2, | 172 | const C: char = 'メ'; |
146 | col: 0.into() | 173 | ", |
147 | } | 174 | ); |
148 | ); | 175 | |
176 | assert_eq!(col_index.utf16_lines.len(), 1); | ||
177 | assert_eq!(col_index.utf16_lines[&1].len(), 1); | ||
178 | assert_eq!( | ||
179 | col_index.utf16_lines[&1][0], | ||
180 | Utf16Char { | ||
181 | start: 17.into(), | ||
182 | end: 20.into() | ||
183 | } | ||
184 | ); | ||
185 | |||
186 | // UTF-8 to UTF-16, no changes | ||
187 | assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15); | ||
188 | |||
189 | // UTF-8 to UTF-16 | ||
190 | assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20); | ||
191 | |||
192 | // UTF-16 to UTF-8, no changes | ||
193 | assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from(15)); | ||
194 | |||
195 | // UTF-16 to UTF-8 | ||
196 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from(21)); | ||
197 | } | ||
198 | |||
199 | #[test] | ||
200 | fn test_string() { | ||
201 | let col_index = LineIndex::new( | ||
202 | " | ||
203 | const C: char = \"メ メ\"; | ||
204 | ", | ||
205 | ); | ||
206 | |||
207 | assert_eq!(col_index.utf16_lines.len(), 1); | ||
208 | assert_eq!(col_index.utf16_lines[&1].len(), 2); | ||
209 | assert_eq!( | ||
210 | col_index.utf16_lines[&1][0], | ||
211 | Utf16Char { | ||
212 | start: 17.into(), | ||
213 | end: 20.into() | ||
214 | } | ||
215 | ); | ||
216 | assert_eq!( | ||
217 | col_index.utf16_lines[&1][1], | ||
218 | Utf16Char { | ||
219 | start: 21.into(), | ||
220 | end: 24.into() | ||
221 | } | ||
222 | ); | ||
223 | |||
224 | // UTF-8 to UTF-16 | ||
225 | assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15); | ||
226 | |||
227 | assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19); | ||
228 | assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21); | ||
229 | |||
230 | assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15); | ||
231 | |||
232 | // UTF-16 to UTF-8 | ||
233 | assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from_usize(15)); | ||
234 | |||
235 | assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextUnit::from_usize(20)); | ||
236 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from_usize(23)); | ||
237 | |||
238 | assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextUnit::from_usize(15)); | ||
239 | } | ||
149 | } | 240 | } |