From 16d3bb9106762710d2a5314ae4042674dad1b446 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Nicola?= Date: Sun, 3 May 2020 09:54:15 +0300 Subject: Don't count start of non-ASCII characters as being inside of them --- crates/ra_ide_db/src/line_index.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'crates/ra_ide_db/src/line_index.rs') diff --git a/crates/ra_ide_db/src/line_index.rs b/crates/ra_ide_db/src/line_index.rs index 00ba95913..212cb7b5b 100644 --- a/crates/ra_ide_db/src/line_index.rs +++ b/crates/ra_ide_db/src/line_index.rs @@ -8,7 +8,9 @@ use superslice::Ext; #[derive(Clone, Debug, PartialEq, Eq)] pub struct LineIndex { + /// Offset the the beginning of each line, zero-based pub(crate) newlines: Vec, + /// List of non-ASCII characters on each line pub(crate) utf16_lines: FxHashMap>, } @@ -22,7 +24,9 @@ pub struct LineCol { #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub(crate) struct Utf16Char { + /// Start offset of a character inside a line, zero-based pub(crate) start: TextSize, + /// End offset of a character inside a line, zero-based pub(crate) end: TextSize, } @@ -120,7 +124,7 @@ impl LineIndex { fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { if let Some(utf16_chars) = self.utf16_lines.get(&line) { for c in utf16_chars { - if col >= u32::from(c.start) { + if col > u32::from(c.start) { col += u32::from(c.len()) - 1; } else { // From here on, all utf16 characters come *after* the character we are mapping, @@ -226,8 +230,10 @@ const C: char = \"メ メ\"; // UTF-16 to UTF-8 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); - assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); - assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(23)); + // メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1 + assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20 + assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space + assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15)); } -- cgit v1.2.3 From 1d794e859028a71d182daf2fa5826aeeeab2876b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lauren=C8=9Biu=20Nicola?= Date: Tue, 5 May 2020 20:29:04 +0300 Subject: Fix column conversion for supplementary plane characters --- crates/ra_ide_db/src/line_index.rs | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'crates/ra_ide_db/src/line_index.rs') diff --git a/crates/ra_ide_db/src/line_index.rs b/crates/ra_ide_db/src/line_index.rs index 212cb7b5b..c7c744fce 100644 --- a/crates/ra_ide_db/src/line_index.rs +++ b/crates/ra_ide_db/src/line_index.rs @@ -31,9 +31,19 @@ pub(crate) struct Utf16Char { } impl Utf16Char { + /// Returns the length in 8-bit UTF-8 code units. fn len(&self) -> TextSize { self.end - self.start } + + /// Returns the length in 16-bit UTF-16 code units. + fn len_utf16(&self) -> usize { + if self.len() == TextSize::from(4) { + 2 + } else { + 1 + } + } } impl LineIndex { @@ -110,7 +120,7 @@ impl LineIndex { if let Some(utf16_chars) = self.utf16_lines.get(&line) { for c in utf16_chars { if c.end <= col { - res -= usize::from(c.len()) - 1; + res -= usize::from(c.len()) - c.len_utf16(); } else { // From here on, all utf16 characters come *after* the character we are mapping, // so we don't need to take them into account @@ -125,7 +135,7 @@ impl LineIndex { if let Some(utf16_chars) = self.utf16_lines.get(&line) { for c in utf16_chars { if col > u32::from(c.start) { - col += u32::from(c.len()) - 1; + col += u32::from(c.len()) - c.len_utf16() as u32; } else { // From here on, all utf16 characters come *after* the character we are mapping, // so we don't need to take them into account @@ -204,6 +214,9 @@ const C: char = 'メ'; // UTF-16 to UTF-8 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); + + let col_index = LineIndex::new("a𐐏b"); + assert_eq!(col_index.utf16_to_utf8_col(0, 3), TextSize::from(5)); } #[test] -- cgit v1.2.3