aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdolfo Ochagavía <[email protected]>2018-11-15 16:34:05 +0000
committerAdolfo Ochagavía <[email protected]>2018-11-16 11:15:50 +0000
commit136d1864bcb5046e7f334ac347a8a94946d1ba90 (patch)
tree94be94eff9265d0e41cb847be2190e35416eb0a3
parent9aebd9e6caf49467ca20caf2583c47cf5092c788 (diff)
Support UTF-16 chars in LineIndex
-rw-r--r--crates/ra_editor/src/line_index.rs303
-rw-r--r--crates/ra_lsp_server/src/conv.rs2
2 files changed, 197 insertions, 108 deletions
diff --git a/crates/ra_editor/src/line_index.rs b/crates/ra_editor/src/line_index.rs
index 9abbb0d09..0b3a28cd4 100644
--- a/crates/ra_editor/src/line_index.rs
+++ b/crates/ra_editor/src/line_index.rs
@@ -1,43 +1,124 @@
1use crate::TextUnit; 1use crate::TextUnit;
2use rustc_hash::FxHashMap;
2use superslice::Ext; 3use superslice::Ext;
3 4
4#[derive(Clone, Debug, Hash, PartialEq, Eq)] 5#[derive(Clone, Debug, PartialEq, Eq)]
5pub struct LineIndex { 6pub struct LineIndex {
6 newlines: Vec<TextUnit>, 7 newlines: Vec<TextUnit>,
8 utf16_lines: FxHashMap<u32, Vec<Utf16Char>>,
7} 9}
8 10
9#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] 11#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
10pub struct LineCol { 12pub struct LineCol {
11 pub line: u32, 13 pub line: u32,
12 pub col: TextUnit, 14 pub col: u32,
15}
16
17#[derive(Clone, Debug, Hash, PartialEq, Eq)]
18struct Utf16Char {
19 start: TextUnit,
20 end: TextUnit,
21}
22
23impl Utf16Char {
24 fn len(&self) -> TextUnit {
25 self.end - self.start
26 }
13} 27}
14 28
15impl LineIndex { 29impl LineIndex {
16 pub fn new(text: &str) -> LineIndex { 30 pub fn new(text: &str) -> LineIndex {
31 let mut utf16_lines = FxHashMap::default();
32 let mut utf16_chars = Vec::new();
33
17 let mut newlines = vec![0.into()]; 34 let mut newlines = vec![0.into()];
18 let mut curr = 0.into(); 35 let mut curr_row = 0.into();
36 let mut curr_col = 0.into();
37 let mut line = 0;
19 for c in text.chars() { 38 for c in text.chars() {
20 curr += TextUnit::of_char(c); 39 curr_row += TextUnit::of_char(c);
21 if c == '\n' { 40 if c == '\n' {
22 newlines.push(curr); 41 newlines.push(curr_row);
42
43 // Save any utf-16 characters seen in the previous line
44 if utf16_chars.len() > 0 {
45 utf16_lines.insert(line, utf16_chars);
46 utf16_chars = Vec::new();
47 }
48
49 // Prepare for processing the next line
50 curr_col = 0.into();
51 line += 1;
52 continue;
23 } 53 }
54
55 let char_len = TextUnit::of_char(c);
56 if char_len.to_usize() > 1 {
57 utf16_chars.push(Utf16Char {
58 start: curr_col,
59 end: curr_col + char_len,
60 });
61 }
62
63 curr_col += char_len;
64 }
65 LineIndex {
66 newlines,
67 utf16_lines,
24 } 68 }
25 LineIndex { newlines }
26 } 69 }
27 70
28 pub fn line_col(&self, offset: TextUnit) -> LineCol { 71 pub fn line_col(&self, offset: TextUnit) -> LineCol {
29 let line = self.newlines.upper_bound(&offset) - 1; 72 let line = self.newlines.upper_bound(&offset) - 1;
30 let line_start_offset = self.newlines[line]; 73 let line_start_offset = self.newlines[line];
31 let col = offset - line_start_offset; 74 let col = offset - line_start_offset;
75
32 LineCol { 76 LineCol {
33 line: line as u32, 77 line: line as u32,
34 col, 78 col: self.utf8_to_utf16_col(line as u32, col) as u32,
35 } 79 }
36 } 80 }
37 81
38 pub fn offset(&self, line_col: LineCol) -> TextUnit { 82 pub fn offset(&self, line_col: LineCol) -> TextUnit {
39 //TODO: return Result 83 //TODO: return Result
40 self.newlines[line_col.line as usize] + line_col.col 84 let col = self.utf16_to_utf8_col(line_col.line, line_col.col);
85 self.newlines[line_col.line as usize] + col
86 }
87
88 fn utf8_to_utf16_col(&self, line: u32, mut col: TextUnit) -> usize {
89 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
90 let mut correction = TextUnit::from_usize(0);
91 for c in utf16_chars {
92 if col >= c.end {
93 correction += c.len() - TextUnit::from_usize(1);
94 } else {
95 // From here on, all utf16 characters come *after* the character we are mapping,
96 // so we don't need to take them into account
97 break;
98 }
99 }
100
101 col -= correction;
102 }
103
104 col.to_usize()
105 }
106
107 fn utf16_to_utf8_col(&self, line: u32, col: u32) -> TextUnit {
108 let mut col: TextUnit = col.into();
109 if let Some(utf16_chars) = self.utf16_lines.get(&line) {
110 for c in utf16_chars {
111 if col >= c.start {
112 col += c.len() - TextUnit::from_usize(1);
113 } else {
114 // From here on, all utf16 characters come *after* the character we are mapping,
115 // so we don't need to take them into account
116 break;
117 }
118 }
119 }
120
121 col
41 } 122 }
42} 123}
43 124
@@ -45,105 +126,115 @@ impl LineIndex {
45fn test_line_index() { 126fn test_line_index() {
46 let text = "hello\nworld"; 127 let text = "hello\nworld";
47 let index = LineIndex::new(text); 128 let index = LineIndex::new(text);
48 assert_eq!( 129 assert_eq!(index.line_col(0.into()), LineCol { line: 0, col: 0 });
49 index.line_col(0.into()), 130 assert_eq!(index.line_col(1.into()), LineCol { line: 0, col: 1 });
50 LineCol { 131 assert_eq!(index.line_col(5.into()), LineCol { line: 0, col: 5 });
51 line: 0, 132 assert_eq!(index.line_col(6.into()), LineCol { line: 1, col: 0 });
52 col: 0.into() 133 assert_eq!(index.line_col(7.into()), LineCol { line: 1, col: 1 });
53 } 134 assert_eq!(index.line_col(8.into()), LineCol { line: 1, col: 2 });
54 ); 135 assert_eq!(index.line_col(10.into()), LineCol { line: 1, col: 4 });
55 assert_eq!( 136 assert_eq!(index.line_col(11.into()), LineCol { line: 1, col: 5 });
56 index.line_col(1.into()), 137 assert_eq!(index.line_col(12.into()), LineCol { line: 1, col: 6 });
57 LineCol {
58 line: 0,
59 col: 1.into()
60 }
61 );
62 assert_eq!(
63 index.line_col(5.into()),
64 LineCol {
65 line: 0,
66 col: 5.into()
67 }
68 );
69 assert_eq!(
70 index.line_col(6.into()),
71 LineCol {
72 line: 1,
73 col: 0.into()
74 }
75 );
76 assert_eq!(
77 index.line_col(7.into()),
78 LineCol {
79 line: 1,
80 col: 1.into()
81 }
82 );
83 assert_eq!(
84 index.line_col(8.into()),
85 LineCol {
86 line: 1,
87 col: 2.into()
88 }
89 );
90 assert_eq!(
91 index.line_col(10.into()),
92 LineCol {
93 line: 1,
94 col: 4.into()
95 }
96 );
97 assert_eq!(
98 index.line_col(11.into()),
99 LineCol {
100 line: 1,
101 col: 5.into()
102 }
103 );
104 assert_eq!(
105 index.line_col(12.into()),
106 LineCol {
107 line: 1,
108 col: 6.into()
109 }
110 );
111 138
112 let text = "\nhello\nworld"; 139 let text = "\nhello\nworld";
113 let index = LineIndex::new(text); 140 let index = LineIndex::new(text);
114 assert_eq!( 141 assert_eq!(index.line_col(0.into()), LineCol { line: 0, col: 0 });
115 index.line_col(0.into()), 142 assert_eq!(index.line_col(1.into()), LineCol { line: 1, col: 0 });
116 LineCol { 143 assert_eq!(index.line_col(2.into()), LineCol { line: 1, col: 1 });
117 line: 0, 144 assert_eq!(index.line_col(6.into()), LineCol { line: 1, col: 5 });
118 col: 0.into() 145 assert_eq!(index.line_col(7.into()), LineCol { line: 2, col: 0 });
119 } 146}
120 ); 147
121 assert_eq!( 148#[cfg(test)]
122 index.line_col(1.into()), 149mod test_utf8_utf16_conv {
123 LineCol { 150 use super::*;
124 line: 1, 151
125 col: 0.into() 152 #[test]
126 } 153 fn test_char_len() {
127 ); 154 assert_eq!('メ'.len_utf8(), 3);
128 assert_eq!( 155 assert_eq!('メ'.len_utf16(), 1);
129 index.line_col(2.into()), 156 }
130 LineCol { 157
131 line: 1, 158 #[test]
132 col: 1.into() 159 fn test_empty_index() {
133 } 160 let col_index = LineIndex::new(
134 ); 161 "
135 assert_eq!( 162const C: char = 'x';
136 index.line_col(6.into()), 163",
137 LineCol { 164 );
138 line: 1, 165 assert_eq!(col_index.utf16_lines.len(), 0);
139 col: 5.into() 166 }
140 } 167
141 ); 168 #[test]
142 assert_eq!( 169 fn test_single_char() {
143 index.line_col(7.into()), 170 let col_index = LineIndex::new(
144 LineCol { 171 "
145 line: 2, 172const C: char = 'メ';
146 col: 0.into() 173",
147 } 174 );
148 ); 175
176 assert_eq!(col_index.utf16_lines.len(), 1);
177 assert_eq!(col_index.utf16_lines[&1].len(), 1);
178 assert_eq!(
179 col_index.utf16_lines[&1][0],
180 Utf16Char {
181 start: 17.into(),
182 end: 20.into()
183 }
184 );
185
186 // UTF-8 to UTF-16, no changes
187 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
188
189 // UTF-8 to UTF-16
190 assert_eq!(col_index.utf8_to_utf16_col(1, 22.into()), 20);
191
192 // UTF-16 to UTF-8, no changes
193 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from(15));
194
195 // UTF-16 to UTF-8
196 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from(21));
197 }
198
199 #[test]
200 fn test_string() {
201 let col_index = LineIndex::new(
202 "
203const C: char = \"メ メ\";
204",
205 );
206
207 assert_eq!(col_index.utf16_lines.len(), 1);
208 assert_eq!(col_index.utf16_lines[&1].len(), 2);
209 assert_eq!(
210 col_index.utf16_lines[&1][0],
211 Utf16Char {
212 start: 17.into(),
213 end: 20.into()
214 }
215 );
216 assert_eq!(
217 col_index.utf16_lines[&1][1],
218 Utf16Char {
219 start: 21.into(),
220 end: 24.into()
221 }
222 );
223
224 // UTF-8 to UTF-16
225 assert_eq!(col_index.utf8_to_utf16_col(1, 15.into()), 15);
226
227 assert_eq!(col_index.utf8_to_utf16_col(1, 21.into()), 19);
228 assert_eq!(col_index.utf8_to_utf16_col(1, 25.into()), 21);
229
230 assert!(col_index.utf8_to_utf16_col(2, 15.into()) == 15);
231
232 // UTF-16 to UTF-8
233 assert_eq!(col_index.utf16_to_utf8_col(1, 15), TextUnit::from_usize(15));
234
235 assert_eq!(col_index.utf16_to_utf8_col(1, 18), TextUnit::from_usize(20));
236 assert_eq!(col_index.utf16_to_utf8_col(1, 19), TextUnit::from_usize(23));
237
238 assert_eq!(col_index.utf16_to_utf8_col(2, 15), TextUnit::from_usize(15));
239 }
149} 240}
diff --git a/crates/ra_lsp_server/src/conv.rs b/crates/ra_lsp_server/src/conv.rs
index e5a2449c2..a102b9105 100644
--- a/crates/ra_lsp_server/src/conv.rs
+++ b/crates/ra_lsp_server/src/conv.rs
@@ -49,7 +49,6 @@ impl ConvWith for Position {
49 type Output = TextUnit; 49 type Output = TextUnit;
50 50
51 fn conv_with(self, line_index: &LineIndex) -> TextUnit { 51 fn conv_with(self, line_index: &LineIndex) -> TextUnit {
52 // TODO: UTF-16
53 let line_col = LineCol { 52 let line_col = LineCol {
54 line: self.line as u32, 53 line: self.line as u32,
55 col: (self.character as u32).into(), 54 col: (self.character as u32).into(),
@@ -64,7 +63,6 @@ impl ConvWith for TextUnit {
64 63
65 fn conv_with(self, line_index: &LineIndex) -> Position { 64 fn conv_with(self, line_index: &LineIndex) -> Position {
66 let line_col = line_index.line_col(self); 65 let line_col = line_index.line_col(self);
67 // TODO: UTF-16
68 Position::new(u64::from(line_col.line), u64::from(u32::from(line_col.col))) 66 Position::new(u64::from(line_col.line), u64::from(u32::from(line_col.col)))
69 } 67 }
70} 68}