aboutsummaryrefslogtreecommitdiff
path: root/crates/ra_ide_db/src/line_index.rs
diff options
context:
space:
mode:
Diffstat (limited to 'crates/ra_ide_db/src/line_index.rs')
-rw-r--r--crates/ra_ide_db/src/line_index.rs29
1 files changed, 24 insertions, 5 deletions
diff --git a/crates/ra_ide_db/src/line_index.rs b/crates/ra_ide_db/src/line_index.rs
index 00ba95913..c7c744fce 100644
--- a/crates/ra_ide_db/src/line_index.rs
+++ b/crates/ra_ide_db/src/line_index.rs
@@ -8,7 +8,9 @@ use superslice::Ext;
8 8
9#[derive(Clone, Debug, PartialEq, Eq)] 9#[derive(Clone, Debug, PartialEq, Eq)]
10pub struct LineIndex { 10pub struct LineIndex {
11 /// Offset the the beginning of each line, zero-based
11 pub(crate) newlines: Vec<TextSize>, 12 pub(crate) newlines: Vec<TextSize>,
13 /// List of non-ASCII characters on each line
12 pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>, 14 pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
13} 15}
14 16
@@ -22,14 +24,26 @@ pub struct LineCol {
22 24
23#[derive(Clone, Debug, Hash, PartialEq, Eq)] 25#[derive(Clone, Debug, Hash, PartialEq, Eq)]
24pub(crate) struct Utf16Char { 26pub(crate) struct Utf16Char {
27 /// Start offset of a character inside a line, zero-based
25 pub(crate) start: TextSize, 28 pub(crate) start: TextSize,
29 /// End offset of a character inside a line, zero-based
26 pub(crate) end: TextSize, 30 pub(crate) end: TextSize,
27} 31}
28 32
29impl Utf16Char { 33impl Utf16Char {
34 /// Returns the length in 8-bit UTF-8 code units.
30 fn len(&self) -> TextSize { 35 fn len(&self) -> TextSize {
31 self.end - self.start 36 self.end - self.start
32 } 37 }
38
39 /// Returns the length in 16-bit UTF-16 code units.
40 fn len_utf16(&self) -> usize {
41 if self.len() == TextSize::from(4) {
42 2
43 } else {
44 1
45 }
46 }
33} 47}
34 48
35impl LineIndex { 49impl LineIndex {
@@ -106,7 +120,7 @@ impl LineIndex {
106 if let Some(utf16_chars) = self.utf16_lines.get(&line) { 120 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
107 for c in utf16_chars { 121 for c in utf16_chars {
108 if c.end <= col { 122 if c.end <= col {
109 res -= usize::from(c.len()) - 1; 123 res -= usize::from(c.len()) - c.len_utf16();
110 } else { 124 } else {
111 // From here on, all utf16 characters come *after* the character we are mapping, 125 // From here on, all utf16 characters come *after* the character we are mapping,
112 // so we don't need to take them into account 126 // so we don't need to take them into account
@@ -120,8 +134,8 @@ impl LineIndex {
120 fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { 134 fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
121 if let Some(utf16_chars) = self.utf16_lines.get(&line) { 135 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
122 for c in utf16_chars { 136 for c in utf16_chars {
123 if col >= u32::from(c.start) { 137 if col > u32::from(c.start) {
124 col += u32::from(c.len()) - 1; 138 col += u32::from(c.len()) - c.len_utf16() as u32;
125 } else { 139 } else {
126 // From here on, all utf16 characters come *after* the character we are mapping, 140 // From here on, all utf16 characters come *after* the character we are mapping,
127 // so we don't need to take them into account 141 // so we don't need to take them into account
@@ -200,6 +214,9 @@ const C: char = 'メ';
200 214
201 // UTF-16 to UTF-8 215 // UTF-16 to UTF-8
202 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); 216 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21));
217
218 let col_index = LineIndex::new("a𐐏b");
219 assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5));
203 } 220 }
204 221
205 #[test] 222 #[test]
@@ -226,8 +243,10 @@ const C: char = \"メ メ\";
226 // UTF-16 to UTF-8 243 // UTF-16 to UTF-8
227 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); 244 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
228 245
229 assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); 246 // メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1
230 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(23)); 247 assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20
248 assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space
249 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24
231 250
232 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15)); 251 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15));
233 } 252 }