diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2020-05-03 09:57:02 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2020-05-03 09:57:02 +0100 |
commit | 682c079043656a55775eff33d806da517542f9eb (patch) | |
tree | 7ac096bc57e71ff76f05691a76e53df06de24d62 /crates | |
parent | 2474f42ae95bffea7c0bc713f92322bfec4d59a7 (diff) | |
parent | 16d3bb9106762710d2a5314ae4042674dad1b446 (diff) |
Merge #4276
4276: Don't count start of non-ASCII characters as being inside of them r=matklad a=lnicola
I'm still not sure that `utf16_to_utf8_col` is correct for code points from Supplementary Planes. These have two UTF-16 code units, and I feel we're not going to count them correctly.
Fixes the crash in https://github.com/rust-analyzer/rust-analyzer/issues/4263#issuecomment-622988258.
Co-authored-by: Laurențiu Nicola <[email protected]>
Diffstat (limited to 'crates')
-rw-r--r-- | crates/ra_ide_db/src/line_index.rs | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/crates/ra_ide_db/src/line_index.rs b/crates/ra_ide_db/src/line_index.rs index 00ba95913..212cb7b5b 100644 --- a/crates/ra_ide_db/src/line_index.rs +++ b/crates/ra_ide_db/src/line_index.rs | |||
@@ -8,7 +8,9 @@ use superslice::Ext; | |||
8 | 8 | ||
9 | #[derive(Clone, Debug, PartialEq, Eq)] | 9 | #[derive(Clone, Debug, PartialEq, Eq)] |
10 | pub struct LineIndex { | 10 | pub struct LineIndex { |
11 | /// Offset the the beginning of each line, zero-based | ||
11 | pub(crate) newlines: Vec<TextSize>, | 12 | pub(crate) newlines: Vec<TextSize>, |
13 | /// List of non-ASCII characters on each line | ||
12 | pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>, | 14 | pub(crate) utf16_lines: FxHashMap<u32, Vec<Utf16Char>>, |
13 | } | 15 | } |
14 | 16 | ||
@@ -22,7 +24,9 @@ pub struct LineCol { | |||
22 | 24 | ||
23 | #[derive(Clone, Debug, Hash, PartialEq, Eq)] | 25 | #[derive(Clone, Debug, Hash, PartialEq, Eq)] |
24 | pub(crate) struct Utf16Char { | 26 | pub(crate) struct Utf16Char { |
27 | /// Start offset of a character inside a line, zero-based | ||
25 | pub(crate) start: TextSize, | 28 | pub(crate) start: TextSize, |
29 | /// End offset of a character inside a line, zero-based | ||
26 | pub(crate) end: TextSize, | 30 | pub(crate) end: TextSize, |
27 | } | 31 | } |
28 | 32 | ||
@@ -120,7 +124,7 @@ impl LineIndex { | |||
120 | fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { | 124 | fn utf16_to_utf8_col(&self, line: u32, mut col: u32) -> TextSize { |
121 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { | 125 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { |
122 | for c in utf16_chars { | 126 | for c in utf16_chars { |
123 | if col >= u32::from(c.start) { | 127 | if col > u32::from(c.start) { |
124 | col += u32::from(c.len()) - 1; | 128 | col += u32::from(c.len()) - 1; |
125 | } else { | 129 | } else { |
126 | // From here on, all utf16 characters come *after* the character we are mapping, | 130 | // From here on, all utf16 characters come *after* the character we are mapping, |
@@ -226,8 +230,10 @@ const C: char = \"メ メ\"; | |||
226 | // UTF-16 to UTF-8 | 230 | // UTF-16 to UTF-8 |
227 | assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); | 231 | assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextSize::from(15)); |
228 | 232 | ||
229 | assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); | 233 | // メ UTF-8: 0xE3 0x83 0xA1, UTF-16: 0x30E1 |
230 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(23)); | 234 | assert_eq!(col_index.utf16_to_utf8_col(1, 17), TextSize::from(17)); // first メ at 17..20 |
235 | assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextSize::from(20)); // space | ||
236 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextSize::from(21)); // second メ at 21..24 | ||
231 | 237 | ||
232 | assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15)); | 238 | assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextSize::from(15)); |
233 | } | 239 | } |