aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorbors[bot] <bors[bot]@users.noreply.github.com>2018-11-16 11:22:40 +0000
committerbors[bot] <bors[bot]@users.noreply.github.com>2018-11-16 11:22:40 +0000
commit97532c8bf74b96336c1c553171b3ee787edbc66a (patch)
treeec9b9ab4d9849cfbe30127eaf4f984dd97571e63
parent923483e321acace3bbf38688bd70d4d38f49b35e (diff)
parentacd51cb361720458615bd1ceb909b0f4461328df (diff)
Merge #227
227: Correctly map between UTF-8 and UTF-16 positions r=aochagavia a=aochagavia Fixes #202 Co-authored-by: Adolfo Ochagavía <[email protected]> Co-authored-by: Adolfo Ochagavía <[email protected]>
-rw-r--r--crates/ra_editor/src/line_index.rs219
-rw-r--r--crates/ra_lsp_server/src/conv.rs19
2 files changed, 208 insertions, 30 deletions
diff --git a/crates/ra_editor/src/line_index.rs b/crates/ra_editor/src/line_index.rs
index 9abbb0d09..aab7e4081 100644
--- a/crates/ra_editor/src/line_index.rs
+++ b/crates/ra_editor/src/line_index.rs
@@ -1,43 +1,124 @@
1use crate::TextUnit; 1use crate::TextUnit;
2use rustc_hash::FxHashMap;
2use superslice::Ext; 3use superslice::Ext;
3 4
4#[derive(Clone, Debug, Hash, PartialEq, Eq)] 5#[derive(Clone, Debug, PartialEq, Eq)]
5pub struct LineIndex { 6pub struct LineIndex {
6 newlines: Vec<TextUnit>, 7 newlines: Vec<TextUnit>,
8 utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
7} 9}
8 10
9#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] 11#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
10pub struct LineCol { 12pub struct LineCol {
11 pub line: u32, 13 pub line: u32,
12 pub col: TextUnit, 14 pub col_utf16: u32,
15}
16
17#[derive(Clone, Debug, Hash, PartialEq, Eq)]
18struct Utf16Char {
19 start: TextUnit,
20 end: TextUnit,
21}
22
23impl Utf16Char {
24 fn len(&self) -> TextUnit {
25 self.end - self.start
26 }
13} 27}
14 28
15impl LineIndex { 29impl LineIndex {
16 pub fn new(text: &str) -> LineIndex { 30 pub fn new(text: &str) -> LineIndex {
31 let mut utf16_lines = FxHashMap::default();
32 let mut utf16_chars = Vec::new();
33
17 let mut newlines = vec![0.into()]; 34 let mut newlines = vec![0.into()];
18 let mut curr = 0.into(); 35 let mut curr_row = 0.into();
36 let mut curr_col = 0.into();
37 let mut line = 0;
19 for c in text.chars() { 38 for c in text.chars() {
20 curr += TextUnit::of_char(c); 39 curr_row += TextUnit::of_char(c);
21 if c == '\n' { 40 if c == '\n' {
22 newlines.push(curr); 41 newlines.push(curr_row);
42
43 // Save any utf-16 characters seen in the previous line
44 if utf16_chars.len() > 0 {
45 utf16_lines.insert(line, utf16_chars);
46 utf16_chars = Vec::new();
47 }
48
49 // Prepare for processing the next line
50 curr_col = 0.into();
51 line += 1;
52 continue;
23 } 53 }
54
55 let char_len = TextUnit::of_char(c);
56 if char_len.to_usize() > 1 {
57 utf16_chars.push(Utf16Char {
58 start: curr_col,
59 end: curr_col + char_len,
60 });
61 }
62
63 curr_col += char_len;
64 }
65 LineIndex {
66 newlines,
67 utf16_lines,
24 } 68 }
25 LineIndex { newlines }
26 } 69 }
27 70
28 pub fn line_col(&self, offset: TextUnit) -> LineCol { 71 pub fn line_col(&self, offset: TextUnit) -> LineCol {
29 let line = self.newlines.upper_bound(&offset) - 1; 72 let line = self.newlines.upper_bound(&offset) - 1;
30 let line_start_offset = self.newlines[line]; 73 let line_start_offset = self.newlines[line];
31 let col = offset - line_start_offset; 74 let col = offset - line_start_offset;
75
32 LineCol { 76 LineCol {
33 line: line as u32, 77 line: line as u32,
34 col, 78 col_utf16: self.utf8_to_utf16_col(line as u32, col) as u32,
35 } 79 }
36 } 80 }
37 81
38 pub fn offset(&self, line_col: LineCol) -> TextUnit { 82 pub fn offset(&self, line_col: LineCol) -> TextUnit {
39 //TODO: return Result 83 //TODO: return Result
40 self.newlines[line_col.line as usize] + line_col.col 84 let col = self.utf16_to_utf8_col(line_col.line, line_col.col_utf16);
85 self.newlines[line_col.line as usize] + col
86 }
87
88 fn utf8_to_utf16_col(&self, line: u32, mut col: TextUnit) -> usize {
89 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
90 let mut correction = TextUnit::from_usize(0);
91 for c in utf16_chars {
92 if col >= c.end {
93 correction += c.len() - TextUnit::from_usize(1);
94 } else {
95 // From here on, all utf16 characters come *after* the character we are mapping,
96 // so we don't need to take them into account
97 break;
98 }
99 }
100
101 col -= correction;
102 }
103
104 col.to_usize()
105 }
106
107 fn utf16_to_utf8_col(&self, line: u32, col: u32) -> TextUnit {
108 let mut col: TextUnit = col.into();
109 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
110 for c in utf16_chars {
111 if col >= c.start {
112 col += c.len() - TextUnit::from_usize(1);
113 } else {
114 // From here on, all utf16 characters come *after* the character we are mapping,
115 // so we don't need to take them into account
116 break;
117 }
118 }
119 }
120
121 col
41 } 122 }
42} 123}
43 124
@@ -49,63 +130,63 @@ fn test_line_index() {
49 index.line_col(0.into()), 130 index.line_col(0.into()),
50 LineCol { 131 LineCol {
51 line: 0, 132 line: 0,
52 col: 0.into() 133 col_utf16: 0
53 } 134 }
54 ); 135 );
55 assert_eq!( 136 assert_eq!(
56 index.line_col(1.into()), 137 index.line_col(1.into()),
57 LineCol { 138 LineCol {
58 line: 0, 139 line: 0,
59 col: 1.into() 140 col_utf16: 1
60 } 141 }
61 ); 142 );
62 assert_eq!( 143 assert_eq!(
63 index.line_col(5.into()), 144 index.line_col(5.into()),
64 LineCol { 145 LineCol {
65 line: 0, 146 line: 0,
66 col: 5.into() 147 col_utf16: 5
67 } 148 }
68 ); 149 );
69 assert_eq!( 150 assert_eq!(
70 index.line_col(6.into()), 151 index.line_col(6.into()),
71 LineCol { 152 LineCol {
72 line: 1, 153 line: 1,
73 col: 0.into() 154 col_utf16: 0
74 } 155 }
75 ); 156 );
76 assert_eq!( 157 assert_eq!(
77 index.line_col(7.into()), 158 index.line_col(7.into()),
78 LineCol { 159 LineCol {
79 line: 1, 160 line: 1,
80 col: 1.into() 161 col_utf16: 1
81 } 162 }
82 ); 163 );
83 assert_eq!( 164 assert_eq!(
84 index.line_col(8.into()), 165 index.line_col(8.into()),
85 LineCol { 166 LineCol {
86 line: 1, 167 line: 1,
87 col: 2.into() 168 col_utf16: 2
88 } 169 }
89 ); 170 );
90 assert_eq!( 171 assert_eq!(
91 index.line_col(10.into()), 172 index.line_col(10.into()),
92 LineCol { 173 LineCol {
93 line: 1, 174 line: 1,
94 col: 4.into() 175 col_utf16: 4
95 } 176 }
96 ); 177 );
97 assert_eq!( 178 assert_eq!(
98 index.line_col(11.into()), 179 index.line_col(11.into()),
99 LineCol { 180 LineCol {
100 line: 1, 181 line: 1,
101 col: 5.into() 182 col_utf16: 5
102 } 183 }
103 ); 184 );
104 assert_eq!( 185 assert_eq!(
105 index.line_col(12.into()), 186 index.line_col(12.into()),
106 LineCol { 187 LineCol {
107 line: 1, 188 line: 1,
108 col: 6.into() 189 col_utf16: 6
109 } 190 }
110 ); 191 );
111 192
@@ -115,35 +196,129 @@ fn test_line_index() {
115 index.line_col(0.into()), 196 index.line_col(0.into()),
116 LineCol { 197 LineCol {
117 line: 0, 198 line: 0,
118 col: 0.into() 199 col_utf16: 0
119 } 200 }
120 ); 201 );
121 assert_eq!( 202 assert_eq!(
122 index.line_col(1.into()), 203 index.line_col(1.into()),
123 LineCol { 204 LineCol {
124 line: 1, 205 line: 1,
125 col: 0.into() 206 col_utf16: 0
126 } 207 }
127 ); 208 );
128 assert_eq!( 209 assert_eq!(
129 index.line_col(2.into()), 210 index.line_col(2.into()),
130 LineCol { 211 LineCol {
131 line: 1, 212 line: 1,
132 col: 1.into() 213 col_utf16: 1
133 } 214 }
134 ); 215 );
135 assert_eq!( 216 assert_eq!(
136 index.line_col(6.into()), 217 index.line_col(6.into()),
137 LineCol { 218 LineCol {
138 line: 1, 219 line: 1,
139 col: 5.into() 220 col_utf16: 5
140 } 221 }
141 ); 222 );
142 assert_eq!( 223 assert_eq!(
143 index.line_col(7.into()), 224 index.line_col(7.into()),
144 LineCol { 225 LineCol {
145 line: 2, 226 line: 2,
146 col: 0.into() 227 col_utf16: 0
147 } 228 }
148 ); 229 );
149} 230}
231
232#[cfg(test)]
233mod test_utf8_utf16_conv {
234 use super::*;
235
236 #[test]
237 fn test_char_len() {
238 assert_eq!('メ'.len_utf8(), 3);
239 assert_eq!('メ'.len_utf16(), 1);
240 }
241
242 #[test]
243 fn test_empty_index() {
244 let col_index = LineIndex::new(
245 "
246const C: char = 'x';
247",
248 );
249 assert_eq!(col_index.utf16_lines.len(), 0);
250 }
251
252 #[test]
253 fn test_single_char() {
254 let col_index = LineIndex::new(
255 "
256const C: char = 'メ';
257",
258 );
259
260 assert_eq!(col_index.utf16_lines.len(), 1);
261 assert_eq!(col_index.utf16_lines[&1].len(), 1);
262 assert_eq!(
263 col_index.utf16_lines[&1][0],
264 Utf16Char {
265 start: 17.into(),
266 end: 20.into()
267 }
268 );
269
270 // UTF-8 to UTF-16, no changes
271 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
272
273 // UTF-8 to UTF-16
274 assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20);
275
276 // UTF-16 to UTF-8, no changes
277 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from(15));
278
279 // UTF-16 to UTF-8
280 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from(21));
281 }
282
283 #[test]
284 fn test_string() {
285 let col_index = LineIndex::new(
286 "
287const C: char = \"メ メ\";
288",
289 );
290
291 assert_eq!(col_index.utf16_lines.len(), 1);
292 assert_eq!(col_index.utf16_lines[&1].len(), 2);
293 assert_eq!(
294 col_index.utf16_lines[&1][0],
295 Utf16Char {
296 start: 17.into(),
297 end: 20.into()
298 }
299 );
300 assert_eq!(
301 col_index.utf16_lines[&1][1],
302 Utf16Char {
303 start: 21.into(),
304 end: 24.into()
305 }
306 );
307
308 // UTF-8 to UTF-16
309 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
310
311 assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19);
312 assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21);
313
314 assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15);
315
316 // UTF-16 to UTF-8
317 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from_usize(15));
318
319 assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextUnit::from_usize(20));
320 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from_usize(23));
321
322 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextUnit::from_usize(15));
323 }
324}
diff --git a/crates/ra_lsp_server/src/conv.rs b/crates/ra_lsp_server/src/conv.rs
index e5a2449c2..5d5a0c55e 100644
--- a/crates/ra_lsp_server/src/conv.rs
+++ b/crates/ra_lsp_server/src/conv.rs
@@ -49,10 +49,9 @@ impl ConvWith for Position {
49 type Output = TextUnit; 49 type Output = TextUnit;
50 50
51 fn conv_with(self, line_index: &LineIndex) -> TextUnit { 51 fn conv_with(self, line_index: &LineIndex) -> TextUnit {
52 // TODO: UTF-16
53 let line_col = LineCol { 52 let line_col = LineCol {
54 line: self.line as u32, 53 line: self.line as u32,
55 col: (self.character as u32).into(), 54 col_utf16: self.character as u32,
56 }; 55 };
57 line_index.offset(line_col) 56 line_index.offset(line_col)
58 } 57 }
@@ -64,8 +63,10 @@ impl ConvWith for TextUnit {
64 63
65 fn conv_with(self, line_index: &LineIndex) -> Position { 64 fn conv_with(self, line_index: &LineIndex) -> Position {
66 let line_col = line_index.line_col(self); 65 let line_col = line_index.line_col(self);
67 // TODO: UTF-16 66 Position::new(
68 Position::new(u64::from(line_col.line), u64::from(u32::from(line_col.col))) 67 u64::from(line_col.line),
68 u64::from(u32::from(line_col.col_utf16)),
69 )
69 } 70 }
70} 71}
71 72
@@ -203,8 +204,10 @@ impl TryConvWith for SourceChange {
203 .map(|it| it.edits.as_slice()) 204 .map(|it| it.edits.as_slice())
204 .unwrap_or(&[]); 205 .unwrap_or(&[]);
205 let line_col = translate_offset_with_edit(&*line_index, pos.offset, edits); 206 let line_col = translate_offset_with_edit(&*line_index, pos.offset, edits);
206 let position = 207 let position = Position::new(
207 Position::new(u64::from(line_col.line), u64::from(u32::from(line_col.col))); 208 u64::from(line_col.line),
209 u64::from(u32::from(line_col.col_utf16)),
210 );
208 Some(TextDocumentPositionParams { 211 Some(TextDocumentPositionParams {
209 text_document: TextDocumentIdentifier::new(pos.file_id.try_conv_with(world)?), 212 text_document: TextDocumentIdentifier::new(pos.file_id.try_conv_with(world)?),
210 position, 213 position,
@@ -247,12 +250,12 @@ fn translate_offset_with_edit(
247 if in_edit_line_col.line == 0 { 250 if in_edit_line_col.line == 0 {
248 LineCol { 251 LineCol {
249 line: edit_line_col.line, 252 line: edit_line_col.line,
250 col: edit_line_col.col + in_edit_line_col.col, 253 col_utf16: edit_line_col.col_utf16 + in_edit_line_col.col_utf16,
251 } 254 }
252 } else { 255 } else {
253 LineCol { 256 LineCol {
254 line: edit_line_col.line + in_edit_line_col.line, 257 line: edit_line_col.line + in_edit_line_col.line,
255 col: in_edit_line_col.col, 258 col_utf16: in_edit_line_col.col_utf16,
256 } 259 }
257 } 260 }
258} 261}