From 16d3bb9106762710d2a5314ae4042674dad1b446 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lauren=C8=9Biu=20Nicola?= <lnicola@dend.ro>
Date: Sun, 3 May 2020 09:54:15 +0300
Subject: Don't count start of non-ASCII characters as being inside of them

---
 crates/ra_ide_db/src/line_index.rs | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'crates/ra_ide_db/src/line_index.rs')
diff --git a/crates/ra_ide_db/src/line_index.rs b/crates/ra_ide_db/src/line_index.rs
index 00ba95913..212cb7b5b 100644
--- a/crates/ra_ide_db/src/line_index.rs
+++ b/crates/ra_ide_db/src/line_index.rs
@@ -8,7 +8,9 @@ use superslice::Ext;
 
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct LineIndex {
+    /// Offset the the beginning of each line, zero-based
     pub(crate) newlines: Vec<TextSize>,
+    /// List of non-ASCII characters on each line
     pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
 }
 
@@ -22,7 +24,9 @@ pub struct LineCol {
 
 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
 pub(crate) struct Utf16Char {
+    /// Start offset of a character inside a line, zero-based
     pub(crate) start: TextSize,
+    /// End offset of a character inside a line, zero-based
     pub(crate) end: TextSize,
 }
 
@@ -120,7 +124,7 @@ impl LineIndex {
     fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
         if let Some(utf16_chars) = self.utf16_lines.get(&line) {
             for c in utf16_chars {
-                if col >= u32::from(c.start) {
+                if col > u32::from(c.start) {
                     col += u32::from(c.len()) - 1;
                 } else {
                     // From here on, all utf16 characters come *after* the character we are mapping,
@@ -226,8 +230,10 @@ const C: char = \"メ メ\";
         // UTF-16 to UTF-8
         assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
 
-        assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20));
-        assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(23));
+        // メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1
+        assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20
+        assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space
+        assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24
 
         assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15));
     }
-- 
cgit v1.2.3


From 1d794e859028a71d182daf2fa5826aeeeab2876b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lauren=C8=9Biu=20Nicola?= <lnicola@dend.ro>
Date: Tue, 5 May 2020 20:29:04 +0300
Subject: Fix column conversion for supplementary plane characters

---
 crates/ra_ide_db/src/line_index.rs | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'crates/ra_ide_db/src/line_index.rs')

diff --git a/crates/ra_ide_db/src/line_index.rs b/crates/ra_ide_db/src/line_index.rs
index 212cb7b5b..c7c744fce 100644
--- a/crates/ra_ide_db/src/line_index.rs
+++ b/crates/ra_ide_db/src/line_index.rs
@@ -31,9 +31,19 @@ pub(crate) struct Utf16Char {
 }
 
 impl Utf16Char {
+    /// Returns the length in 8-bit UTF-8 code units.
     fn len(&self) -> TextSize {
         self.end - self.start
     }
+
+    /// Returns the length in 16-bit UTF-16 code units.
+    fn len_utf16(&self) -> usize {
+        if self.len() == TextSize::from(4) {
+            2
+        } else {
+            1
+        }
+    }
 }
 
 impl LineIndex {
@@ -110,7 +120,7 @@ impl LineIndex {
         if let Some(utf16_chars) = self.utf16_lines.get(&line) {
             for c in utf16_chars {
                 if c.end <= col {
-                    res -= usize::from(c.len()) - 1;
+                    res -= usize::from(c.len()) - c.len_utf16();
                 } else {
                     // From here on, all utf16 characters come *after* the character we are mapping,
                     // so we don't need to take them into account
@@ -125,7 +135,7 @@ impl LineIndex {
         if let Some(utf16_chars) = self.utf16_lines.get(&line) {
             for c in utf16_chars {
                 if col > u32::from(c.start) {
-                    col += u32::from(c.len()) - 1;
+                    col += u32::from(c.len()) - c.len_utf16() as u32;
                 } else {
                     // From here on, all utf16 characters come *after* the character we are mapping,
                     // so we don't need to take them into account
@@ -204,6 +214,9 @@ const C: char = 'メ';
 
         // UTF-16 to UTF-8
         assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21));
+
+        let col_index = LineIndex::new("a𐐏b");
+        assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5));
     }
 
     #[test]
-- 
cgit v1.2.3