aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbors[bot] <26634292+bors[bot]@users.noreply.github.com>2020-05-03 09:57:02 +0100
committerGitHub <[email protected]>2020-05-03 09:57:02 +0100
commit682c079043656a55775eff33d806da517542f9eb (patch)
tree7ac096bc57e71ff76f05691a76e53df06de24d62
parent2474f42ae95bffea7c0bc713f92322bfec4d59a7 (diff)
parent16d3bb9106762710d2a5314ae4042674dad1b446 (diff)
Merge #4276
4276: Don't count start of non-ASCII characters as being inside of them r=matklad a=lnicola I'm still not sure that `utf16_to_utf8_col` is correct for code points from Supplementary Planes. These have two UTF-16 code units, and I feel we're not going to count them correctly. Fixes the crash in https://github.com/rust-analyzer/rust-analyzer/issues/4263#issuecomment-622988258. Co-authored-by: Laurențiu Nicola <[email protected]>
-rw-r--r--crates/ra_ide_db/src/line_index.rs12
1 files changed, 9 insertions, 3 deletions
diff --git a/crates/ra_ide_db/src/line_index.rs b/crates/ra_ide_db/src/line_index.rs
index 00ba95913..212cb7b5b 100644
--- a/crates/ra_ide_db/src/line_index.rs
+++ b/crates/ra_ide_db/src/line_index.rs
@@ -8,7 +8,9 @@ use superslice::Ext;
8 8
9#[derive(Clone, Debug, PartialEq, Eq)] 9#[derive(Clone, Debug, PartialEq, Eq)]
10pub struct LineIndex { 10pub struct LineIndex {
11 /// Offset the the beginning of each line, zero-based
11 pub(crate) newlines: Vec<TextSize>, 12 pub(crate) newlines: Vec<TextSize>,
13 /// List of non-ASCII characters on each line
12 pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>, 14 pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
13} 15}
14 16
@@ -22,7 +24,9 @@ pub struct LineCol {
22 24
23#[derive(Clone, Debug, Hash, PartialEq, Eq)] 25#[derive(Clone, Debug, Hash, PartialEq, Eq)]
24pub(crate) struct Utf16Char { 26pub(crate) struct Utf16Char {
27 /// Start offset of a character inside a line, zero-based
25 pub(crate) start: TextSize, 28 pub(crate) start: TextSize,
29 /// End offset of a character inside a line, zero-based
26 pub(crate) end: TextSize, 30 pub(crate) end: TextSize,
27} 31}
28 32
@@ -120,7 +124,7 @@ impl LineIndex {
120 fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { 124 fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize {
121 if let Some(utf16_chars) = self.utf16_lines.get(&line) { 125 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
122 for c in utf16_chars { 126 for c in utf16_chars {
123 if col >= u32::from(c.start) { 127 if col > u32::from(c.start) {
124 col += u32::from(c.len()) - 1; 128 col += u32::from(c.len()) - 1;
125 } else { 129 } else {
126 // From here on, all utf16 characters come *after* the character we are mapping, 130 // From here on, all utf16 characters come *after* the character we are mapping,
@@ -226,8 +230,10 @@ const C: char = \"メ メ\";
226 // UTF-16 to UTF-8 230 // UTF-16 to UTF-8
227 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); 231 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15));
228 232
229 assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); 233 // メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1
230 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(23)); 234 assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20
235 assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space
236 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24
231 237
232 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15)); 238 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15));
233 } 239 }