aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_ide_api_light/src/line_index.rs
diff options
context:
space:
mode:
authorbors[bot] <bors[bot]@users.noreply.github.com>2019-01-08 19:48:48 +0000
committerbors[bot] <bors[bot]@users.noreply.github.com>2019-01-08 19:48:48 +0000
commit46f74e33ca53a7897e9020d3de75cc76a6b89d79 (patch)
tree2bc001c8ecf58b49ac9a0da1f20d5644ce29fb3a /crates/ra_ide_api_light/src/line_index.rs
parent4f4f7933b1b7ff34f8633b1686b18b2d1b994c47 (diff)
parent0c62b1bb7a49bf527780ce1f8cade5eb4fbfdb2d (diff)
Merge #471
471: rename crates to match reality r=matklad a=matklad Co-authored-by: Aleksey Kladov <[email protected]>
Diffstat (limited to 'crates/ra_ide_api_light/src/line_index.rs')
-rw-r--r--crates/ra_ide_api_light/src/line_index.rs399
1 files changed, 399 insertions, 0 deletions
diff --git a/crates/ra_ide_api_light/src/line_index.rs b/crates/ra_ide_api_light/src/line_index.rs
new file mode 100644
index 000000000..898fee7e0
--- /dev/null
+++ b/crates/ra_ide_api_light/src/line_index.rs
@@ -0,0 +1,399 @@
1use crate::TextUnit;
2use rustc_hash::FxHashMap;
3use superslice::Ext;
4
5#[derive(Clone, Debug, PartialEq, Eq)]
6pub struct LineIndex {
7 pub(crate) newlines: Vec<TextUnit>,
8 pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
9}
10
11#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
12pub struct LineCol {
13 pub line: u32,
14 pub col_utf16: u32,
15}
16
17#[derive(Clone, Debug, Hash, PartialEq, Eq)]
18pub(crate) struct Utf16Char {
19 pub(crate) start: TextUnit,
20 pub(crate) end: TextUnit,
21}
22
23impl Utf16Char {
24 fn len(&self) -> TextUnit {
25 self.end - self.start
26 }
27}
28
29impl LineIndex {
30 pub fn new(text: &str) -> LineIndex {
31 let mut utf16_lines = FxHashMap::default();
32 let mut utf16_chars = Vec::new();
33
34 let mut newlines = vec![0.into()];
35 let mut curr_row = 0.into();
36 let mut curr_col = 0.into();
37 let mut line = 0;
38 for c in text.chars() {
39 curr_row += TextUnit::of_char(c);
40 if c == '\n' {
41 newlines.push(curr_row);
42
43 // Save any utf-16 characters seen in the previous line
44 if utf16_chars.len() > 0 {
45 utf16_lines.insert(line, utf16_chars);
46 utf16_chars = Vec::new();
47 }
48
49 // Prepare for processing the next line
50 curr_col = 0.into();
51 line += 1;
52 continue;
53 }
54
55 let char_len = TextUnit::of_char(c);
56 if char_len.to_usize() > 1 {
57 utf16_chars.push(Utf16Char {
58 start: curr_col,
59 end: curr_col + char_len,
60 });
61 }
62
63 curr_col += char_len;
64 }
65
66 // Save any utf-16 characters seen in the last line
67 if utf16_chars.len() > 0 {
68 utf16_lines.insert(line, utf16_chars);
69 }
70
71 LineIndex {
72 newlines,
73 utf16_lines,
74 }
75 }
76
77 pub fn line_col(&self, offset: TextUnit) -> LineCol {
78 let line = self.newlines.upper_bound(&offset) - 1;
79 let line_start_offset = self.newlines[line];
80 let col = offset - line_start_offset;
81
82 LineCol {
83 line: line as u32,
84 col_utf16: self.utf8_to_utf16_col(line as u32, col) as u32,
85 }
86 }
87
88 pub fn offset(&self, line_col: LineCol) -> TextUnit {
89 //TODO: return Result
90 let col = self.utf16_to_utf8_col(line_col.line, line_col.col_utf16);
91 self.newlines[line_col.line as usize] + col
92 }
93
94 fn utf8_to_utf16_col(&self, line: u32, mut col: TextUnit) -> usize {
95 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
96 let mut correction = TextUnit::from_usize(0);
97 for c in utf16_chars {
98 if col >= c.end {
99 correction += c.len() - TextUnit::from_usize(1);
100 } else {
101 // From here on, all utf16 characters come *after* the character we are mapping,
102 // so we don't need to take them into account
103 break;
104 }
105 }
106
107 col -= correction;
108 }
109
110 col.to_usize()
111 }
112
113 fn utf16_to_utf8_col(&self, line: u32, col: u32) -> TextUnit {
114 let mut col: TextUnit = col.into();
115 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
116 for c in utf16_chars {
117 if col >= c.start {
118 col += c.len() - TextUnit::from_usize(1);
119 } else {
120 // From here on, all utf16 characters come *after* the character we are mapping,
121 // so we don't need to take them into account
122 break;
123 }
124 }
125 }
126
127 col
128 }
129}
130
131#[cfg(test)]
132/// Simple reference implementation to use in proptests
133pub fn to_line_col(text: &str, offset: TextUnit) -> LineCol {
134 let mut res = LineCol {
135 line: 0,
136 col_utf16: 0,
137 };
138 for (i, c) in text.char_indices() {
139 if i + c.len_utf8() > offset.to_usize() {
140 // if it's an invalid offset, inside a multibyte char
141 // return as if it was at the start of the char
142 break;
143 }
144 if c == '\n' {
145 res.line += 1;
146 res.col_utf16 = 0;
147 } else {
148 res.col_utf16 += 1;
149 }
150 }
151 res
152}
153
154#[cfg(test)]
155mod test_line_index {
156 use super::*;
157 use proptest::{prelude::*, proptest, proptest_helper};
158 use ra_text_edit::test_utils::{arb_text, arb_offset};
159
160 #[test]
161 fn test_line_index() {
162 let text = "hello\nworld";
163 let index = LineIndex::new(text);
164 assert_eq!(
165 index.line_col(0.into()),
166 LineCol {
167 line: 0,
168 col_utf16: 0
169 }
170 );
171 assert_eq!(
172 index.line_col(1.into()),
173 LineCol {
174 line: 0,
175 col_utf16: 1
176 }
177 );
178 assert_eq!(
179 index.line_col(5.into()),
180 LineCol {
181 line: 0,
182 col_utf16: 5
183 }
184 );
185 assert_eq!(
186 index.line_col(6.into()),
187 LineCol {
188 line: 1,
189 col_utf16: 0
190 }
191 );
192 assert_eq!(
193 index.line_col(7.into()),
194 LineCol {
195 line: 1,
196 col_utf16: 1
197 }
198 );
199 assert_eq!(
200 index.line_col(8.into()),
201 LineCol {
202 line: 1,
203 col_utf16: 2
204 }
205 );
206 assert_eq!(
207 index.line_col(10.into()),
208 LineCol {
209 line: 1,
210 col_utf16: 4
211 }
212 );
213 assert_eq!(
214 index.line_col(11.into()),
215 LineCol {
216 line: 1,
217 col_utf16: 5
218 }
219 );
220 assert_eq!(
221 index.line_col(12.into()),
222 LineCol {
223 line: 1,
224 col_utf16: 6
225 }
226 );
227
228 let text = "\nhello\nworld";
229 let index = LineIndex::new(text);
230 assert_eq!(
231 index.line_col(0.into()),
232 LineCol {
233 line: 0,
234 col_utf16: 0
235 }
236 );
237 assert_eq!(
238 index.line_col(1.into()),
239 LineCol {
240 line: 1,
241 col_utf16: 0
242 }
243 );
244 assert_eq!(
245 index.line_col(2.into()),
246 LineCol {
247 line: 1,
248 col_utf16: 1
249 }
250 );
251 assert_eq!(
252 index.line_col(6.into()),
253 LineCol {
254 line: 1,
255 col_utf16: 5
256 }
257 );
258 assert_eq!(
259 index.line_col(7.into()),
260 LineCol {
261 line: 2,
262 col_utf16: 0
263 }
264 );
265 }
266
267 fn arb_text_with_offset() -> BoxedStrategy<(TextUnit, String)> {
268 arb_text()
269 .prop_flat_map(|text| (arb_offset(&text), Just(text)))
270 .boxed()
271 }
272
273 fn to_line_col(text: &str, offset: TextUnit) -> LineCol {
274 let mut res = LineCol {
275 line: 0,
276 col_utf16: 0,
277 };
278 for (i, c) in text.char_indices() {
279 if i + c.len_utf8() > offset.to_usize() {
280 // if it's an invalid offset, inside a multibyte char
281 // return as if it was at the start of the char
282 break;
283 }
284 if c == '\n' {
285 res.line += 1;
286 res.col_utf16 = 0;
287 } else {
288 res.col_utf16 += 1;
289 }
290 }
291 res
292 }
293
294 proptest! {
295 #[test]
296 fn test_line_index_proptest((offset, text) in arb_text_with_offset()) {
297 let expected = to_line_col(&text, offset);
298 let line_index = LineIndex::new(&text);
299 let actual = line_index.line_col(offset);
300
301 assert_eq!(actual, expected);
302 }
303 }
304}
305
306#[cfg(test)]
307mod test_utf8_utf16_conv {
308 use super::*;
309
310 #[test]
311 fn test_char_len() {
312 assert_eq!('メ'.len_utf8(), 3);
313 assert_eq!('メ'.len_utf16(), 1);
314 }
315
316 #[test]
317 fn test_empty_index() {
318 let col_index = LineIndex::new(
319 "
320const C: char = 'x';
321",
322 );
323 assert_eq!(col_index.utf16_lines.len(), 0);
324 }
325
326 #[test]
327 fn test_single_char() {
328 let col_index = LineIndex::new(
329 "
330const C: char = 'メ';
331",
332 );
333
334 assert_eq!(col_index.utf16_lines.len(), 1);
335 assert_eq!(col_index.utf16_lines[&1].len(), 1);
336 assert_eq!(
337 col_index.utf16_lines[&1][0],
338 Utf16Char {
339 start: 17.into(),
340 end: 20.into()
341 }
342 );
343
344 // UTF-8 to UTF-16, no changes
345 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
346
347 // UTF-8 to UTF-16
348 assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20);
349
350 // UTF-16 to UTF-8, no changes
351 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from(15));
352
353 // UTF-16 to UTF-8
354 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from(21));
355 }
356
357 #[test]
358 fn test_string() {
359 let col_index = LineIndex::new(
360 "
361const C: char = \"メ メ\";
362",
363 );
364
365 assert_eq!(col_index.utf16_lines.len(), 1);
366 assert_eq!(col_index.utf16_lines[&1].len(), 2);
367 assert_eq!(
368 col_index.utf16_lines[&1][0],
369 Utf16Char {
370 start: 17.into(),
371 end: 20.into()
372 }
373 );
374 assert_eq!(
375 col_index.utf16_lines[&1][1],
376 Utf16Char {
377 start: 21.into(),
378 end: 24.into()
379 }
380 );
381
382 // UTF-8 to UTF-16
383 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
384
385 assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19);
386 assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21);
387
388 assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15);
389
390 // UTF-16 to UTF-8
391 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from_usize(15));
392
393 assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextUnit::from_usize(20));
394 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from_usize(23));
395
396 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextUnit::from_usize(15));
397 }
398
399}