diff options
author | bors[bot] <bors[bot]@users.noreply.github.com> | 2018-11-16 11:22:40 +0000 |
---|---|---|
committer | bors[bot] <bors[bot]@users.noreply.github.com> | 2018-11-16 11:22:40 +0000 |
commit | 97532c8bf74b96336c1c553171b3ee787edbc66a (patch) | |
tree | ec9b9ab4d9849cfbe30127eaf4f984dd97571e63 | |
parent | 923483e321acace3bbf38688bd70d4d38f49b35e (diff) | |
parent | acd51cb361720458615bd1ceb909b0f4461328df (diff) |
Merge #227
227: Correctly map between UTF-8 and UTF-16 positions r=aochagavia a=aochagavia
Fixes #202
Co-authored-by: Adolfo Ochagavía <[email protected]>
Co-authored-by: Adolfo Ochagavía <[email protected]>
-rw-r--r-- | crates/ra_editor/src/line_index.rs | 219 | ||||
-rw-r--r-- | crates/ra_lsp_server/src/conv.rs | 19 |
2 files changed, 208 insertions, 30 deletions
diff --git a/crates/ra_editor/src/line_index.rs b/crates/ra_editor/src/line_index.rs index 9abbb0d09..aab7e4081 100644 --- a/crates/ra_editor/src/line_index.rs +++ b/crates/ra_editor/src/line_index.rs | |||
@@ -1,43 +1,124 @@ | |||
1 | use crate::TextUnit; | 1 | use crate::TextUnit; |
2 | use rustc_hash::FxHashMap; | ||
2 | use superslice::Ext; | 3 | use superslice::Ext; |
3 | 4 | ||
4 | #[derive(Clone, Debug, Hash, PartialEq, Eq)] | 5 | #[derive(Clone, Debug, PartialEq, Eq)] |
5 | pub struct LineIndex { | 6 | pub struct LineIndex { |
6 | newlines: Vec<TextUnit>, | 7 | newlines: Vec<TextUnit>, |
8 | utf16_lines: FxHashMap<u32, Vec<Utf16Char>>, | ||
7 | } | 9 | } |
8 | 10 | ||
9 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] | 11 | #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] |
10 | pub struct LineCol { | 12 | pub struct LineCol { |
11 | pub line: u32, | 13 | pub line: u32, |
12 | pub col: TextUnit, | 14 | pub col_utf16: u32, |
15 | } | ||
16 | |||
17 | #[derive(Clone, Debug, Hash, PartialEq, Eq)] | ||
18 | struct Utf16Char { | ||
19 | start: TextUnit, | ||
20 | end: TextUnit, | ||
21 | } | ||
22 | |||
23 | impl Utf16Char { | ||
24 | fn len(&self) -> TextUnit { | ||
25 | self.end - self.start | ||
26 | } | ||
13 | } | 27 | } |
14 | 28 | ||
15 | impl LineIndex { | 29 | impl LineIndex { |
16 | pub fn new(text: &str) -> LineIndex { | 30 | pub fn new(text: &str) -> LineIndex { |
31 | let mut utf16_lines = FxHashMap::default(); | ||
32 | let mut utf16_chars = Vec::new(); | ||
33 | |||
17 | let mut newlines = vec![0.into()]; | 34 | let mut newlines = vec![0.into()]; |
18 | let mut curr = 0.into(); | 35 | let mut curr_row = 0.into(); |
36 | let mut curr_col = 0.into(); | ||
37 | let mut line = 0; | ||
19 | for c in text.chars() { | 38 | for c in text.chars() { |
20 | curr += TextUnit::of_char(c); | 39 | curr_row += TextUnit::of_char(c); |
21 | if c == '\n' { | 40 | if c == '\n' { |
22 | newlines.push(curr); | 41 | newlines.push(curr_row); |
42 | |||
43 | // Save any utf-16 characters seen in the previous line | ||
44 | if utf16_chars.len() > 0 { | ||
45 | utf16_lines.insert(line, utf16_chars); | ||
46 | utf16_chars = Vec::new(); | ||
47 | } | ||
48 | |||
49 | // Prepare for processing the next line | ||
50 | curr_col = 0.into(); | ||
51 | line += 1; | ||
52 | continue; | ||
23 | } | 53 | } |
54 | |||
55 | let char_len = TextUnit::of_char(c); | ||
56 | if char_len.to_usize() > 1 { | ||
57 | utf16_chars.push(Utf16Char { | ||
58 | start: curr_col, | ||
59 | end: curr_col + char_len, | ||
60 | }); | ||
61 | } | ||
62 | |||
63 | curr_col += char_len; | ||
64 | } | ||
65 | LineIndex { | ||
66 | newlines, | ||
67 | utf16_lines, | ||
24 | } | 68 | } |
25 | LineIndex { newlines } | ||
26 | } | 69 | } |
27 | 70 | ||
28 | pub fn line_col(&self, offset: TextUnit) -> LineCol { | 71 | pub fn line_col(&self, offset: TextUnit) -> LineCol { |
29 | let line = self.newlines.upper_bound(&offset) - 1; | 72 | let line = self.newlines.upper_bound(&offset) - 1; |
30 | let line_start_offset = self.newlines[line]; | 73 | let line_start_offset = self.newlines[line]; |
31 | let col = offset - line_start_offset; | 74 | let col = offset - line_start_offset; |
75 | |||
32 | LineCol { | 76 | LineCol { |
33 | line: line as u32, | 77 | line: line as u32, |
34 | col, | 78 | col_utf16: self.utf8_to_utf16_col(line as u32, col) as u32, |
35 | } | 79 | } |
36 | } | 80 | } |
37 | 81 | ||
38 | pub fn offset(&self, line_col: LineCol) -> TextUnit { | 82 | pub fn offset(&self, line_col: LineCol) -> TextUnit { |
39 | //TODO: return Result | 83 | //TODO: return Result |
40 | self.newlines[line_col.line as usize] + line_col.col | 84 | let col = self.utf16_to_utf8_col(line_col.line, line_col.col_utf16); |
85 | self.newlines[line_col.line as usize] + col | ||
86 | } | ||
87 | |||
88 | fn utf8_to_utf16_col(&self, line: u32, mut col: TextUnit) -> usize { | ||
89 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { | ||
90 | let mut correction = TextUnit::from_usize(0); | ||
91 | for c in utf16_chars { | ||
92 | if col >= c.end { | ||
93 | correction += c.len() - TextUnit::from_usize(1); | ||
94 | } else { | ||
95 | // From here on, all utf16 characters come *after* the character we are mapping, | ||
96 | // so we don't need to take them into account | ||
97 | break; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | col -= correction; | ||
102 | } | ||
103 | |||
104 | col.to_usize() | ||
105 | } | ||
106 | |||
107 | fn utf16_to_utf8_col(&self, line: u32, col: u32) -> TextUnit { | ||
108 | let mut col: TextUnit = col.into(); | ||
109 | if let Some(utf16_chars) = self.utf16_lines.get(&line) { | ||
110 | for c in utf16_chars { | ||
111 | if col >= c.start { | ||
112 | col += c.len() - TextUnit::from_usize(1); | ||
113 | } else { | ||
114 | // From here on, all utf16 characters come *after* the character we are mapping, | ||
115 | // so we don't need to take them into account | ||
116 | break; | ||
117 | } | ||
118 | } | ||
119 | } | ||
120 | |||
121 | col | ||
41 | } | 122 | } |
42 | } | 123 | } |
43 | 124 | ||
@@ -49,63 +130,63 @@ fn test_line_index() { | |||
49 | index.line_col(0.into()), | 130 | index.line_col(0.into()), |
50 | LineCol { | 131 | LineCol { |
51 | line: 0, | 132 | line: 0, |
52 | col: 0.into() | 133 | col_utf16: 0 |
53 | } | 134 | } |
54 | ); | 135 | ); |
55 | assert_eq!( | 136 | assert_eq!( |
56 | index.line_col(1.into()), | 137 | index.line_col(1.into()), |
57 | LineCol { | 138 | LineCol { |
58 | line: 0, | 139 | line: 0, |
59 | col: 1.into() | 140 | col_utf16: 1 |
60 | } | 141 | } |
61 | ); | 142 | ); |
62 | assert_eq!( | 143 | assert_eq!( |
63 | index.line_col(5.into()), | 144 | index.line_col(5.into()), |
64 | LineCol { | 145 | LineCol { |
65 | line: 0, | 146 | line: 0, |
66 | col: 5.into() | 147 | col_utf16: 5 |
67 | } | 148 | } |
68 | ); | 149 | ); |
69 | assert_eq!( | 150 | assert_eq!( |
70 | index.line_col(6.into()), | 151 | index.line_col(6.into()), |
71 | LineCol { | 152 | LineCol { |
72 | line: 1, | 153 | line: 1, |
73 | col: 0.into() | 154 | col_utf16: 0 |
74 | } | 155 | } |
75 | ); | 156 | ); |
76 | assert_eq!( | 157 | assert_eq!( |
77 | index.line_col(7.into()), | 158 | index.line_col(7.into()), |
78 | LineCol { | 159 | LineCol { |
79 | line: 1, | 160 | line: 1, |
80 | col: 1.into() | 161 | col_utf16: 1 |
81 | } | 162 | } |
82 | ); | 163 | ); |
83 | assert_eq!( | 164 | assert_eq!( |
84 | index.line_col(8.into()), | 165 | index.line_col(8.into()), |
85 | LineCol { | 166 | LineCol { |
86 | line: 1, | 167 | line: 1, |
87 | col: 2.into() | 168 | col_utf16: 2 |
88 | } | 169 | } |
89 | ); | 170 | ); |
90 | assert_eq!( | 171 | assert_eq!( |
91 | index.line_col(10.into()), | 172 | index.line_col(10.into()), |
92 | LineCol { | 173 | LineCol { |
93 | line: 1, | 174 | line: 1, |
94 | col: 4.into() | 175 | col_utf16: 4 |
95 | } | 176 | } |
96 | ); | 177 | ); |
97 | assert_eq!( | 178 | assert_eq!( |
98 | index.line_col(11.into()), | 179 | index.line_col(11.into()), |
99 | LineCol { | 180 | LineCol { |
100 | line: 1, | 181 | line: 1, |
101 | col: 5.into() | 182 | col_utf16: 5 |
102 | } | 183 | } |
103 | ); | 184 | ); |
104 | assert_eq!( | 185 | assert_eq!( |
105 | index.line_col(12.into()), | 186 | index.line_col(12.into()), |
106 | LineCol { | 187 | LineCol { |
107 | line: 1, | 188 | line: 1, |
108 | col: 6.into() | 189 | col_utf16: 6 |
109 | } | 190 | } |
110 | ); | 191 | ); |
111 | 192 | ||
@@ -115,35 +196,129 @@ fn test_line_index() { | |||
115 | index.line_col(0.into()), | 196 | index.line_col(0.into()), |
116 | LineCol { | 197 | LineCol { |
117 | line: 0, | 198 | line: 0, |
118 | col: 0.into() | 199 | col_utf16: 0 |
119 | } | 200 | } |
120 | ); | 201 | ); |
121 | assert_eq!( | 202 | assert_eq!( |
122 | index.line_col(1.into()), | 203 | index.line_col(1.into()), |
123 | LineCol { | 204 | LineCol { |
124 | line: 1, | 205 | line: 1, |
125 | col: 0.into() | 206 | col_utf16: 0 |
126 | } | 207 | } |
127 | ); | 208 | ); |
128 | assert_eq!( | 209 | assert_eq!( |
129 | index.line_col(2.into()), | 210 | index.line_col(2.into()), |
130 | LineCol { | 211 | LineCol { |
131 | line: 1, | 212 | line: 1, |
132 | col: 1.into() | 213 | col_utf16: 1 |
133 | } | 214 | } |
134 | ); | 215 | ); |
135 | assert_eq!( | 216 | assert_eq!( |
136 | index.line_col(6.into()), | 217 | index.line_col(6.into()), |
137 | LineCol { | 218 | LineCol { |
138 | line: 1, | 219 | line: 1, |
139 | col: 5.into() | 220 | col_utf16: 5 |
140 | } | 221 | } |
141 | ); | 222 | ); |
142 | assert_eq!( | 223 | assert_eq!( |
143 | index.line_col(7.into()), | 224 | index.line_col(7.into()), |
144 | LineCol { | 225 | LineCol { |
145 | line: 2, | 226 | line: 2, |
146 | col: 0.into() | 227 | col_utf16: 0 |
147 | } | 228 | } |
148 | ); | 229 | ); |
149 | } | 230 | } |
231 | |||
232 | #[cfg(test)] | ||
233 | mod test_utf8_utf16_conv { | ||
234 | use super::*; | ||
235 | |||
236 | #[test] | ||
237 | fn test_char_len() { | ||
238 | assert_eq!('メ'.len_utf8(), 3); | ||
239 | assert_eq!('メ'.len_utf16(), 1); | ||
240 | } | ||
241 | |||
242 | #[test] | ||
243 | fn test_empty_index() { | ||
244 | let col_index = LineIndex::new( | ||
245 | " | ||
246 | const C: char = 'x'; | ||
247 | ", | ||
248 | ); | ||
249 | assert_eq!(col_index.utf16_lines.len(), 0); | ||
250 | } | ||
251 | |||
252 | #[test] | ||
253 | fn test_single_char() { | ||
254 | let col_index = LineIndex::new( | ||
255 | " | ||
256 | const C: char = 'メ'; | ||
257 | ", | ||
258 | ); | ||
259 | |||
260 | assert_eq!(col_index.utf16_lines.len(), 1); | ||
261 | assert_eq!(col_index.utf16_lines[&1].len(), 1); | ||
262 | assert_eq!( | ||
263 | col_index.utf16_lines[&1][0], | ||
264 | Utf16Char { | ||
265 | start: 17.into(), | ||
266 | end: 20.into() | ||
267 | } | ||
268 | ); | ||
269 | |||
270 | // UTF-8 to UTF-16, no changes | ||
271 | assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15); | ||
272 | |||
273 | // UTF-8 to UTF-16 | ||
274 | assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20); | ||
275 | |||
276 | // UTF-16 to UTF-8, no changes | ||
277 | assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from(15)); | ||
278 | |||
279 | // UTF-16 to UTF-8 | ||
280 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from(21)); | ||
281 | } | ||
282 | |||
283 | #[test] | ||
284 | fn test_string() { | ||
285 | let col_index = LineIndex::new( | ||
286 | " | ||
287 | const C: char = \"メ メ\"; | ||
288 | ", | ||
289 | ); | ||
290 | |||
291 | assert_eq!(col_index.utf16_lines.len(), 1); | ||
292 | assert_eq!(col_index.utf16_lines[&1].len(), 2); | ||
293 | assert_eq!( | ||
294 | col_index.utf16_lines[&1][0], | ||
295 | Utf16Char { | ||
296 | start: 17.into(), | ||
297 | end: 20.into() | ||
298 | } | ||
299 | ); | ||
300 | assert_eq!( | ||
301 | col_index.utf16_lines[&1][1], | ||
302 | Utf16Char { | ||
303 | start: 21.into(), | ||
304 | end: 24.into() | ||
305 | } | ||
306 | ); | ||
307 | |||
308 | // UTF-8 to UTF-16 | ||
309 | assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15); | ||
310 | |||
311 | assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19); | ||
312 | assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21); | ||
313 | |||
314 | assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15); | ||
315 | |||
316 | // UTF-16 to UTF-8 | ||
317 | assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from_usize(15)); | ||
318 | |||
319 | assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextUnit::from_usize(20)); | ||
320 | assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from_usize(23)); | ||
321 | |||
322 | assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextUnit::from_usize(15)); | ||
323 | } | ||
324 | } | ||
diff --git a/crates/ra_lsp_server/src/conv.rs b/crates/ra_lsp_server/src/conv.rs index e5a2449c2..5d5a0c55e 100644 --- a/crates/ra_lsp_server/src/conv.rs +++ b/crates/ra_lsp_server/src/conv.rs | |||
@@ -49,10 +49,9 @@ impl ConvWith for Position { | |||
49 | type Output = TextUnit; | 49 | type Output = TextUnit; |
50 | 50 | ||
51 | fn conv_with(self, line_index: &LineIndex) -> TextUnit { | 51 | fn conv_with(self, line_index: &LineIndex) -> TextUnit { |
52 | // TODO: UTF-16 | ||
53 | let line_col = LineCol { | 52 | let line_col = LineCol { |
54 | line: self.line as u32, | 53 | line: self.line as u32, |
55 | col: (self.character as u32).into(), | 54 | col_utf16: self.character as u32, |
56 | }; | 55 | }; |
57 | line_index.offset(line_col) | 56 | line_index.offset(line_col) |
58 | } | 57 | } |
@@ -64,8 +63,10 @@ impl ConvWith for TextUnit { | |||
64 | 63 | ||
65 | fn conv_with(self, line_index: &LineIndex) -> Position { | 64 | fn conv_with(self, line_index: &LineIndex) -> Position { |
66 | let line_col = line_index.line_col(self); | 65 | let line_col = line_index.line_col(self); |
67 | // TODO: UTF-16 | 66 | Position::new( |
68 | Position::new(u64::from(line_col.line), u64::from(u32::from(line_col.col))) | 67 | u64::from(line_col.line), |
68 | u64::from(u32::from(line_col.col_utf16)), | ||
69 | ) | ||
69 | } | 70 | } |
70 | } | 71 | } |
71 | 72 | ||
@@ -203,8 +204,10 @@ impl TryConvWith for SourceChange { | |||
203 | .map(|it| it.edits.as_slice()) | 204 | .map(|it| it.edits.as_slice()) |
204 | .unwrap_or(&[]); | 205 | .unwrap_or(&[]); |
205 | let line_col = translate_offset_with_edit(&*line_index, pos.offset, edits); | 206 | let line_col = translate_offset_with_edit(&*line_index, pos.offset, edits); |
206 | let position = | 207 | let position = Position::new( |
207 | Position::new(u64::from(line_col.line), u64::from(u32::from(line_col.col))); | 208 | u64::from(line_col.line), |
209 | u64::from(u32::from(line_col.col_utf16)), | ||
210 | ); | ||
208 | Some(TextDocumentPositionParams { | 211 | Some(TextDocumentPositionParams { |
209 | text_document: TextDocumentIdentifier::new(pos.file_id.try_conv_with(world)?), | 212 | text_document: TextDocumentIdentifier::new(pos.file_id.try_conv_with(world)?), |
210 | position, | 213 | position, |
@@ -247,12 +250,12 @@ fn translate_offset_with_edit( | |||
247 | if in_edit_line_col.line == 0 { | 250 | if in_edit_line_col.line == 0 { |
248 | LineCol { | 251 | LineCol { |
249 | line: edit_line_col.line, | 252 | line: edit_line_col.line, |
250 | col: edit_line_col.col + in_edit_line_col.col, | 253 | col_utf16: edit_line_col.col_utf16 + in_edit_line_col.col_utf16, |
251 | } | 254 | } |
252 | } else { | 255 | } else { |
253 | LineCol { | 256 | LineCol { |
254 | line: edit_line_col.line + in_edit_line_col.line, | 257 | line: edit_line_col.line + in_edit_line_col.line, |
255 | col: in_edit_line_col.col, | 258 | col_utf16: in_edit_line_col.col_utf16, |
256 | } | 259 | } |
257 | } | 260 | } |
258 | } | 261 | } |