diff options
author | bors[bot] <26634292+bors[bot]@users.noreply.github.com> | 2020-10-26 13:20:57 +0000 |
---|---|---|
committer | GitHub <[email protected]> | 2020-10-26 13:20:57 +0000 |
commit | 29f5154d1c664c8fb1dfae45281873ed97a68a55 (patch) | |
tree | 74bff4f88c0d467d38c47a42d421e2e7c88d6f56 /crates | |
parent | 35ed3d2c005fdb0ec9ab78a52b31758bd7de298e (diff) | |
parent | afb2166aeb038f7c2587402db390510640b6f4fc (diff) |
Merge #6350
6350: Make IncorrectDiagnostic match rustc by copying rustc's code. r=popzxc a=ArifRoktim
This closes #6343 and closes #6345.
The old algorithm which used a `DetectedCase` enum, didn't match how rustc thinks of cases. Some inputs can be interpreted as more than 1 case depending on the situation. For example, to rustc:
- `ABCD`: Can be both camel case and upper snake case
- `X86_64`: Can be both camel case and upper snake case
I could've made `detect_case` return a collection of `DetectedCase` and then modified the other code as such, but I think using the same code rustc uses is simpler and a surefire way to achieve the same diagnostics as rustc.
Co-authored-by: Arif Roktim <[email protected]>
Diffstat (limited to 'crates')
-rw-r--r-- | crates/hir_ty/src/diagnostics/decl_check/case_conv.rs | 249 |
1 files changed, 123 insertions, 126 deletions
diff --git a/crates/hir_ty/src/diagnostics/decl_check/case_conv.rs b/crates/hir_ty/src/diagnostics/decl_check/case_conv.rs index 324d60765..b0144a289 100644 --- a/crates/hir_ty/src/diagnostics/decl_check/case_conv.rs +++ b/crates/hir_ty/src/diagnostics/decl_check/case_conv.rs | |||
@@ -1,156 +1,145 @@ | |||
1 | //! Functions for string case manipulation, such as detecting the identifier case, | 1 | //! Functions for string case manipulation, such as detecting the identifier case, |
2 | //! and converting it into appropriate form. | 2 | //! and converting it into appropriate form. |
3 | 3 | ||
4 | #[derive(Debug)] | 4 | // Code that was taken from rustc was taken at commit 89fdb30, |
5 | enum DetectedCase { | 5 | // from file /compiler/rustc_lint/src/nonstandard_style.rs |
6 | LowerCamelCase, | ||
7 | UpperCamelCase, | ||
8 | LowerSnakeCase, | ||
9 | UpperSnakeCase, | ||
10 | Unknown, | ||
11 | } | ||
12 | |||
13 | fn detect_case(ident: &str) -> DetectedCase { | ||
14 | let trimmed_ident = ident.trim_matches('_'); | ||
15 | let first_lowercase = trimmed_ident.starts_with(|chr: char| chr.is_ascii_lowercase()); | ||
16 | let mut has_lowercase = first_lowercase; | ||
17 | let mut has_uppercase = false; | ||
18 | let mut has_underscore = false; | ||
19 | |||
20 | for chr in trimmed_ident.chars() { | ||
21 | if chr == '_' { | ||
22 | has_underscore = true; | ||
23 | } else if chr.is_ascii_uppercase() { | ||
24 | has_uppercase = true; | ||
25 | } else if chr.is_ascii_lowercase() { | ||
26 | has_lowercase = true; | ||
27 | } | ||
28 | } | ||
29 | |||
30 | if has_uppercase { | ||
31 | if !has_lowercase { | ||
32 | if has_underscore { | ||
33 | DetectedCase::UpperSnakeCase | ||
34 | } else { | ||
35 | // It has uppercase only and no underscores. Ex: "AABB" | ||
36 | // This is a camel cased acronym. | ||
37 | DetectedCase::UpperCamelCase | ||
38 | } | ||
39 | } else if !has_underscore { | ||
40 | if first_lowercase { | ||
41 | DetectedCase::LowerCamelCase | ||
42 | } else { | ||
43 | DetectedCase::UpperCamelCase | ||
44 | } | ||
45 | } else { | ||
46 | // It has uppercase, it has lowercase, it has underscore. | ||
47 | // No assumptions here | ||
48 | DetectedCase::Unknown | ||
49 | } | ||
50 | } else { | ||
51 | DetectedCase::LowerSnakeCase | ||
52 | } | ||
53 | } | ||
54 | 6 | ||
55 | /// Converts an identifier to an UpperCamelCase form. | 7 | /// Converts an identifier to an UpperCamelCase form. |
56 | /// Returns `None` if the string is already is UpperCamelCase. | 8 | /// Returns `None` if the string is already is UpperCamelCase. |
57 | pub fn to_camel_case(ident: &str) -> Option<String> { | 9 | pub fn to_camel_case(ident: &str) -> Option<String> { |
58 | let detected_case = detect_case(ident); | 10 | if is_camel_case(ident) { |
59 | 11 | return None; | |
60 | match detected_case { | ||
61 | DetectedCase::UpperCamelCase => return None, | ||
62 | DetectedCase::LowerCamelCase => { | ||
63 | let mut first_capitalized = false; | ||
64 | let output = ident | ||
65 | .chars() | ||
66 | .map(|chr| { | ||
67 | if !first_capitalized && chr.is_ascii_lowercase() { | ||
68 | first_capitalized = true; | ||
69 | chr.to_ascii_uppercase() | ||
70 | } else { | ||
71 | chr | ||
72 | } | ||
73 | }) | ||
74 | .collect(); | ||
75 | return Some(output); | ||
76 | } | ||
77 | _ => {} | ||
78 | } | 12 | } |
79 | 13 | ||
80 | let mut output = String::with_capacity(ident.len()); | 14 | // Taken from rustc. |
81 | 15 | let ret = ident | |
82 | let mut capital_added = false; | 16 | .trim_matches('_') |
83 | for chr in ident.chars() { | 17 | .split('_') |
84 | if chr.is_alphabetic() { | 18 | .filter(|component| !component.is_empty()) |
85 | if !capital_added { | 19 | .map(|component| { |
86 | output.push(chr.to_ascii_uppercase()); | 20 | let mut camel_cased_component = String::new(); |
87 | capital_added = true; | 21 | |
88 | } else { | 22 | let mut new_word = true; |
89 | output.push(chr.to_ascii_lowercase()); | 23 | let mut prev_is_lower_case = true; |
24 | |||
25 | for c in component.chars() { | ||
26 | // Preserve the case if an uppercase letter follows a lowercase letter, so that | ||
27 | // `camelCase` is converted to `CamelCase`. | ||
28 | if prev_is_lower_case && c.is_uppercase() { | ||
29 | new_word = true; | ||
30 | } | ||
31 | |||
32 | if new_word { | ||
33 | camel_cased_component.push_str(&c.to_uppercase().to_string()); | ||
34 | } else { | ||
35 | camel_cased_component.push_str(&c.to_lowercase().to_string()); | ||
36 | } | ||
37 | |||
38 | prev_is_lower_case = c.is_lowercase(); | ||
39 | new_word = false; | ||
90 | } | 40 | } |
91 | } else if chr == '_' { | ||
92 | // Skip this character and make the next one capital. | ||
93 | capital_added = false; | ||
94 | } else { | ||
95 | // Put the characted as-is. | ||
96 | output.push(chr); | ||
97 | } | ||
98 | } | ||
99 | 41 | ||
100 | if output == ident { | 42 | camel_cased_component |
101 | // While we didn't detect the correct case at the beginning, there | 43 | }) |
102 | // may be special cases: e.g. `A` is both valid CamelCase and UPPER_SNAKE_CASE. | 44 | .fold((String::new(), None), |(acc, prev): (String, Option<String>), next| { |
103 | None | 45 | // separate two components with an underscore if their boundary cannot |
104 | } else { | 46 | // be distinguished using a uppercase/lowercase case distinction |
105 | Some(output) | 47 | let join = if let Some(prev) = prev { |
106 | } | 48 | let l = prev.chars().last().unwrap(); |
49 | let f = next.chars().next().unwrap(); | ||
50 | !char_has_case(l) && !char_has_case(f) | ||
51 | } else { | ||
52 | false | ||
53 | }; | ||
54 | (acc + if join { "_" } else { "" } + &next, Some(next)) | ||
55 | }) | ||
56 | .0; | ||
57 | Some(ret) | ||
107 | } | 58 | } |
108 | 59 | ||
109 | /// Converts an identifier to a lower_snake_case form. | 60 | /// Converts an identifier to a lower_snake_case form. |
110 | /// Returns `None` if the string is already in lower_snake_case. | 61 | /// Returns `None` if the string is already in lower_snake_case. |
111 | pub fn to_lower_snake_case(ident: &str) -> Option<String> { | 62 | pub fn to_lower_snake_case(ident: &str) -> Option<String> { |
112 | // First, assume that it's UPPER_SNAKE_CASE. | 63 | if is_lower_snake_case(ident) { |
113 | match detect_case(ident) { | 64 | return None; |
114 | DetectedCase::LowerSnakeCase => return None, | 65 | } else if is_upper_snake_case(ident) { |
115 | DetectedCase::UpperSnakeCase => { | 66 | return Some(ident.to_lowercase()); |
116 | return Some(ident.chars().map(|chr| chr.to_ascii_lowercase()).collect()) | ||
117 | } | ||
118 | _ => {} | ||
119 | } | 67 | } |
120 | 68 | ||
121 | // Otherwise, assume that it's CamelCase. | 69 | Some(stdx::to_lower_snake_case(ident)) |
122 | let lower_snake_case = stdx::to_lower_snake_case(ident); | ||
123 | |||
124 | if lower_snake_case == ident { | ||
125 | // While we didn't detect the correct case at the beginning, there | ||
126 | // may be special cases: e.g. `a` is both valid camelCase and snake_case. | ||
127 | None | ||
128 | } else { | ||
129 | Some(lower_snake_case) | ||
130 | } | ||
131 | } | 70 | } |
132 | 71 | ||
133 | /// Converts an identifier to an UPPER_SNAKE_CASE form. | 72 | /// Converts an identifier to an UPPER_SNAKE_CASE form. |
134 | /// Returns `None` if the string is already is UPPER_SNAKE_CASE. | 73 | /// Returns `None` if the string is already is UPPER_SNAKE_CASE. |
135 | pub fn to_upper_snake_case(ident: &str) -> Option<String> { | 74 | pub fn to_upper_snake_case(ident: &str) -> Option<String> { |
136 | match detect_case(ident) { | 75 | if is_upper_snake_case(ident) { |
137 | DetectedCase::UpperSnakeCase => return None, | 76 | return None; |
138 | DetectedCase::LowerSnakeCase => { | 77 | } else if is_lower_snake_case(ident) { |
139 | return Some(ident.chars().map(|chr| chr.to_ascii_uppercase()).collect()) | 78 | return Some(ident.to_uppercase()); |
140 | } | 79 | } |
141 | _ => {} | 80 | |
81 | Some(stdx::to_upper_snake_case(ident)) | ||
82 | } | ||
83 | |||
84 | // Taken from rustc. | ||
85 | // Modified by replacing the use of unstable feature `array_windows`. | ||
86 | fn is_camel_case(name: &str) -> bool { | ||
87 | let name = name.trim_matches('_'); | ||
88 | if name.is_empty() { | ||
89 | return true; | ||
142 | } | 90 | } |
143 | 91 | ||
144 | // Normalize the string from whatever form it's in currently, and then just make it uppercase. | 92 | let mut fst = None; |
145 | let upper_snake_case = stdx::to_upper_snake_case(ident); | 93 | // start with a non-lowercase letter rather than non-uppercase |
94 | // ones (some scripts don't have a concept of upper/lowercase) | ||
95 | !name.chars().next().unwrap().is_lowercase() | ||
96 | && !name.contains("__") | ||
97 | && !name.chars().any(|snd| { | ||
98 | let ret = match (fst, snd) { | ||
99 | (None, _) => false, | ||
100 | (Some(fst), snd) => { | ||
101 | char_has_case(fst) && snd == '_' || char_has_case(snd) && fst == '_' | ||
102 | } | ||
103 | }; | ||
104 | fst = Some(snd); | ||
105 | |||
106 | ret | ||
107 | }) | ||
108 | } | ||
109 | |||
110 | fn is_lower_snake_case(ident: &str) -> bool { | ||
111 | is_snake_case(ident, char::is_uppercase) | ||
112 | } | ||
146 | 113 | ||
147 | if upper_snake_case == ident { | 114 | fn is_upper_snake_case(ident: &str) -> bool { |
148 | // While we didn't detect the correct case at the beginning, there | 115 | is_snake_case(ident, char::is_lowercase) |
149 | // may be special cases: e.g. `A` is both valid CamelCase and UPPER_SNAKE_CASE. | 116 | } |
150 | None | 117 | |
151 | } else { | 118 | // Taken from rustc. |
152 | Some(upper_snake_case) | 119 | // Modified to allow checking for both upper and lower snake case. |
120 | fn is_snake_case<F: Fn(char) -> bool>(ident: &str, wrong_case: F) -> bool { | ||
121 | if ident.is_empty() { | ||
122 | return true; | ||
153 | } | 123 | } |
124 | let ident = ident.trim_matches('_'); | ||
125 | |||
126 | let mut allow_underscore = true; | ||
127 | ident.chars().all(|c| { | ||
128 | allow_underscore = match c { | ||
129 | '_' if !allow_underscore => return false, | ||
130 | '_' => false, | ||
131 | // It would be more obvious to check for the correct case, | ||
132 | // but some characters do not have a case. | ||
133 | c if !wrong_case(c) => true, | ||
134 | _ => return false, | ||
135 | }; | ||
136 | true | ||
137 | }) | ||
138 | } | ||
139 | |||
140 | // Taken from rustc. | ||
141 | fn char_has_case(c: char) -> bool { | ||
142 | c.is_lowercase() || c.is_uppercase() | ||
154 | } | 143 | } |
155 | 144 | ||
156 | #[cfg(test)] | 145 | #[cfg(test)] |
@@ -173,6 +162,7 @@ mod tests { | |||
173 | check(to_lower_snake_case, "CamelCase", expect![["camel_case"]]); | 162 | check(to_lower_snake_case, "CamelCase", expect![["camel_case"]]); |
174 | check(to_lower_snake_case, "lowerCamelCase", expect![["lower_camel_case"]]); | 163 | check(to_lower_snake_case, "lowerCamelCase", expect![["lower_camel_case"]]); |
175 | check(to_lower_snake_case, "a", expect![[""]]); | 164 | check(to_lower_snake_case, "a", expect![[""]]); |
165 | check(to_lower_snake_case, "abc", expect![[""]]); | ||
176 | } | 166 | } |
177 | 167 | ||
178 | #[test] | 168 | #[test] |
@@ -187,6 +177,11 @@ mod tests { | |||
187 | check(to_camel_case, "name", expect![["Name"]]); | 177 | check(to_camel_case, "name", expect![["Name"]]); |
188 | check(to_camel_case, "A", expect![[""]]); | 178 | check(to_camel_case, "A", expect![[""]]); |
189 | check(to_camel_case, "AABB", expect![[""]]); | 179 | check(to_camel_case, "AABB", expect![[""]]); |
180 | // Taken from rustc: /compiler/rustc_lint/src/nonstandard_style/tests.rs | ||
181 | check(to_camel_case, "X86_64", expect![[""]]); | ||
182 | check(to_camel_case, "x86__64", expect![["X86_64"]]); | ||
183 | check(to_camel_case, "Abc_123", expect![["Abc123"]]); | ||
184 | check(to_camel_case, "A1_b2_c3", expect![["A1B2C3"]]); | ||
190 | } | 185 | } |
191 | 186 | ||
192 | #[test] | 187 | #[test] |
@@ -197,5 +192,7 @@ mod tests { | |||
197 | check(to_upper_snake_case, "CamelCase", expect![["CAMEL_CASE"]]); | 192 | check(to_upper_snake_case, "CamelCase", expect![["CAMEL_CASE"]]); |
198 | check(to_upper_snake_case, "lowerCamelCase", expect![["LOWER_CAMEL_CASE"]]); | 193 | check(to_upper_snake_case, "lowerCamelCase", expect![["LOWER_CAMEL_CASE"]]); |
199 | check(to_upper_snake_case, "A", expect![[""]]); | 194 | check(to_upper_snake_case, "A", expect![[""]]); |
195 | check(to_upper_snake_case, "ABC", expect![[""]]); | ||
196 | check(to_upper_snake_case, "X86_64", expect![[""]]); | ||
200 | } | 197 | } |
201 | } | 198 | } |