diff options
Diffstat (limited to 'crates/ra_ide_db/src/line_index.rs')
-rw-r--r-- | crates/ra_ide_db/src/line_index.rs | 29 |
1 files changed, 24 insertions, 5 deletions
diff --git a/crates/ra_ide_db/src/line_index.rs b/crates/ra_ide_db/src/line_index.rs index 00ba95913..c7c744fce 100644 --- a/crates/ra_ide_db/src/line_index.rs +++ b/crates/ra_ide_db/src/line_index.rs | |||
@@ -8,7 +8,9 @@ use superslice::Ext; | |||
8 | 8 | ||
9 | #[derive(Clone, Debug, PartialEq, Eq)] | 9 | #[derive(Clone, Debug, PartialEq, Eq)] |
10 | pub struct LineIndex { | 10 | pub struct LineIndex { |
11 | /// Offset the the beginning of each line, zero-based | ||
11 | pub(crate) newlines: Vec<TextSize>, | 12 | pub(crate) newlines: Vec<TextSize>, |
13 | /// List of non-ASCII characters on each line | ||
12 | pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>, | 14 | pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>, |
13 | } | 15 | } |
14 | 16 | ||
@@ -22,14 +24,26 @@ pub struct LineCol { | |||
22 | 24 | ||
23 | #[derive(Clone, Debug, Hash, PartialEq, Eq)] | 25 | #[derive(Clone, Debug, Hash, PartialEq, Eq)] |
24 | pub(crate) struct Utf16Char { | 26 | pub(crate) struct Utf16Char { |
27 | /// Start offset of a character inside a line, zero-based | ||
25 | pub(crate) start: TextSize, | 28 | pub(crate) start: TextSize, |
29 | /// End offset of a character inside a line, zero-based | ||
26 | pub(crate) end: TextSize, | 30 | pub(crate) end: TextSize, |
27 | } | 31 | } |
28 | 32 | ||
29 | impl Utf16Char { | 33 | impl Utf16Char { |
34 | /// Returns the length in 8-bit UTF-8 code units. | ||
30 | fn len(&self) -> TextSize { | 35 | fn len(&self) -> TextSize { |
31 | self.end - self.start | 36 | self.end - self.start |
32 | } | 37 | } |
38 | |||
39 | /// Returns the length in 16-bit UTF-16 code units. | ||
40 | fn len_utf16(&self) -> usize { | ||
41 | if self.len() == TextSize::from(4) { | ||
42 | 2 | ||
43 | } else { | ||
44 | 1 | ||
45 | } | ||
46 | } | ||
33 | } | 47 | } |
34 | 48 | ||
35 | impl LineIndex { | 49 | impl LineIndex { |
@@ -106,7 +120,7 @@ impl LineIndex { | |||
106 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { | 120 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { |
107 | for c in utf16_chars { | 121 | for c in utf16_chars { |
108 | if c.end <= col { | 122 | if c.end <= col { |
109 | res -= usize::from(c.len()) - 1; | 123 | res -= usize::from(c.len()) - c.len_utf16(); |
110 | } else { | 124 | } else { |
111 | // From here on, all utf16 characters come *after* the character we are mapping, | 125 | // From here on, all utf16 characters come *after* the character we are mapping, |
112 | // so we don't need to take them into account | 126 | // so we don't need to take them into account |
@@ -120,8 +134,8 @@ impl LineIndex { | |||
120 | fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { | 134 | fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { |
121 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { | 135 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { |
122 | for c in utf16_chars { | 136 | for c in utf16_chars { |
123 | if col >= u32::from(c.start) { | 137 | if col > u32::from(c.start) { |
124 | col += u32::from(c.len()) - 1; | 138 | col += u32::from(c.len()) - c.len_utf16() as u32; |
125 | } else { | 139 | } else { |
126 | // From here on, all utf16 characters come *after* the character we are mapping, | 140 | // From here on, all utf16 characters come *after* the character we are mapping, |
127 | // so we don't need to take them into account | 141 | // so we don't need to take them into account |
@@ -200,6 +214,9 @@ const C: char = 'メ'; | |||
200 | 214 | ||
201 | // UTF-16 to UTF-8 | 215 | // UTF-16 to UTF-8 |
202 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); | 216 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); |
217 | |||
218 | let col_index = LineIndex::new("a𐐏b"); | ||
219 | assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5)); | ||
203 | } | 220 | } |
204 | 221 | ||
205 | #[test] | 222 | #[test] |
@@ -226,8 +243,10 @@ const C: char = \"メ メ\"; | |||
226 | // UTF-16 to UTF-8 | 243 | // UTF-16 to UTF-8 |
227 | assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); | 244 | assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); |
228 | 245 | ||
229 | assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); | 246 | // メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1 |
230 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(23)); | 247 | assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20 |
248 | assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space | ||
249 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24 | ||
231 | 250 | ||
232 | assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15)); | 251 | assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15)); |
233 | } | 252 | } |