From ac798e1f7cfbc6d27c87bb28e3f1d5b6801796aa Mon Sep 17 00:00:00 2001 From: Leander Tentrup Date: Fri, 17 Apr 2020 09:37:18 +0200 Subject: Implement syntax highlighting for format strings Detailed changes: 1) Implement a lexer for string literals that divides the string in format specifier `{}` including the format specifier modifier. 2) Adapt syntax highlighting to add ranges for the detected sequences. 3) Add a test case for the format string syntax highlighting. --- crates/ra_ide/src/snapshots/highlight_strings.html | 77 +++++ crates/ra_ide/src/syntax_highlighting.rs | 69 ++++- crates/ra_ide/src/syntax_highlighting/tests.rs | 65 +++++ crates/ra_syntax/src/ast/tokens.rs | 324 +++++++++++++++++++++ 4 files changed, 532 insertions(+), 3 deletions(-) create mode 100644 crates/ra_ide/src/snapshots/highlight_strings.html (limited to 'crates') diff --git a/crates/ra_ide/src/snapshots/highlight_strings.html b/crates/ra_ide/src/snapshots/highlight_strings.html new file mode 100644 index 000000000..d70627da0 --- /dev/null +++ b/crates/ra_ide/src/snapshots/highlight_strings.html @@ -0,0 +1,77 @@ + + +
macro_rules! println {
+    ($($arg:tt)*) => ({
+        $crate::io::_print($crate::format_args_nl!($($arg)*));
+    })
+}
+#[rustc_builtin_macro]
+macro_rules! format_args_nl {
+    ($fmt:expr) => {{ /* compiler built-in */ }};
+    ($fmt:expr, $($args:tt)*) => {{ /* compiler built-in */ }};
+}
+
+fn main() {
+    // from https://doc.rust-lang.org/std/fmt/index.html
+    println!("Hello");                 // => "Hello"
+    println!("Hello, {}!", "world");   // => "Hello, world!"
+    println!("The number is {}", 1);   // => "The number is 1"
+    println!("{:?}", (3, 4));          // => "(3, 4)"
+    println!("{value}", value=4);      // => "4"
+    println!("{} {}", 1, 2);           // => "1 2"
+    println!("{:04}", 42);             // => "0042" with leading zerosV
+    println!("{1} {} {0} {}", 1, 2);   // => "2 1 1 2"
+    println!("{argument}", argument = "test");   // => "test"
+    println!("{name} {}", 1, name = 2);          // => "2 1"
+    println!("{a} {c} {b}", a="a", b='b', c=3);  // => "a 3 b"
+    println!("Hello {:5}!", "x");
+    println!("Hello {:1$}!", "x", 5);
+    println!("Hello {1:0$}!", 5, "x");
+    println!("Hello {:width$}!", "x", width = 5);
+    println!("Hello {:<5}!", "x");
+    println!("Hello {:-<5}!", "x");
+    println!("Hello {:^5}!", "x");
+    println!("Hello {:>5}!", "x");
+    println!("Hello {:+}!", 5);
+    println!("{:#x}!", 27);
+    println!("Hello {:05}!", 5);
+    println!("Hello {:05}!", -5);
+    println!("{:#010x}!", 27);
+    println!("Hello {0} is {1:.5}", "x", 0.01);
+    println!("Hello {1} is {2:.0$}", 5, "x", 0.01);
+    println!("Hello {0} is {2:.1$}", "x", 5, 0.01);
+    println!("Hello {} is {:.*}",    "x", 5, 0.01);
+    println!("Hello {} is {2:.*}",   "x", 5, 0.01);
+    println!("Hello {} is {number:.prec$}", "x", prec = 5, number = 0.01);
+    println!("{}, `{name:.*}` has 3 fractional digits", "Hello", 3, name=1234.56);
+    println!("{}, `{name:.*}` has 3 characters", "Hello", 3, name="1234.56");
+    println!("{}, `{name:>8.*}` has 3 right-aligned characters", "Hello", 3, name="1234.56");
+    println!("Hello {{}}");
+    println!("{{ Hello");
+}
\ No newline at end of file diff --git a/crates/ra_ide/src/syntax_highlighting.rs b/crates/ra_ide/src/syntax_highlighting.rs index e7d9bf696..e342ca9df 100644 --- a/crates/ra_ide/src/syntax_highlighting.rs +++ b/crates/ra_ide/src/syntax_highlighting.rs @@ -12,7 +12,7 @@ use ra_ide_db::{ }; use ra_prof::profile; use ra_syntax::{ - ast::{self, HasQuotes, HasStringValue}, + ast::{self, HasFormatSpecifier, HasQuotes, HasStringValue}, AstNode, AstToken, Direction, NodeOrToken, SyntaxElement, SyntaxKind::*, SyntaxToken, TextRange, WalkEvent, T, @@ -21,6 +21,7 @@ use rustc_hash::FxHashMap; use crate::{call_info::call_info_for_token, Analysis, FileId}; +use ast::FormatSpecifier; pub(crate) use html::highlight_as_html; pub use tags::{Highlight, HighlightModifier, HighlightModifiers, HighlightTag}; @@ -95,7 +96,8 @@ impl HighlightedRangeStack { 1, "after DFS traversal, the stack should only contain a single element" ); - let res = self.stack.pop().unwrap(); + let mut res = self.stack.pop().unwrap(); + res.sort_by_key(|range| range.range.start()); // Check that ranges are sorted and disjoint assert!(res .iter() @@ -134,6 +136,7 @@ pub(crate) fn highlight( let mut stack = HighlightedRangeStack::new(); let mut current_macro_call: Option = None; + let mut format_string: Option = None; // Walk all nodes, keeping track of whether we are inside a macro or not. // If in macro, expand it first and highlight the expanded code. @@ -169,6 +172,7 @@ pub(crate) fn highlight( WalkEvent::Leave(Some(mc)) => { assert!(current_macro_call == Some(mc)); current_macro_call = None; + format_string = None; continue; } _ => (), @@ -189,6 +193,30 @@ pub(crate) fn highlight( }; let token = sema.descend_into_macros(token.clone()); let parent = token.parent(); + + // Check if macro takes a format string and remeber it for highlighting later. + // The macros that accept a format string expand to a compiler builtin macros + // `format_args` and `format_args_nl`. + if let Some(fmt_macro_call) = parent.parent().and_then(ast::MacroCall::cast) { + if let Some(name) = + fmt_macro_call.path().and_then(|p| p.segment()).and_then(|s| s.name_ref()) + { + match name.text().as_str() { + "format_args" | "format_args_nl" => { + format_string = parent + .children_with_tokens() + .filter(|t| t.kind() != WHITESPACE) + .nth(1) + .filter(|e| { + ast::String::can_cast(e.kind()) + || ast::RawString::can_cast(e.kind()) + }) + } + _ => {} + } + } + } + // We only care Name and Name_ref match (token.kind(), parent.kind()) { (IDENT, NAME) | (IDENT, NAME_REF) => parent.into(), @@ -205,10 +233,45 @@ pub(crate) fn highlight( } } + let is_format_string = + format_string.as_ref().map(|fs| fs == &element_to_highlight).unwrap_or_default(); + if let Some((highlight, binding_hash)) = - highlight_element(&sema, &mut bindings_shadow_count, element_to_highlight) + highlight_element(&sema, &mut bindings_shadow_count, element_to_highlight.clone()) { stack.add(HighlightedRange { range, highlight, binding_hash }); + if let Some(string) = + element_to_highlight.as_token().cloned().and_then(ast::String::cast) + { + stack.push(); + if is_format_string { + string.lex_format_specifier(&mut |piece_range, kind| { + let highlight = match kind { + FormatSpecifier::Open + | FormatSpecifier::Close + | FormatSpecifier::Colon + | FormatSpecifier::Fill + | FormatSpecifier::Align + | FormatSpecifier::Sign + | FormatSpecifier::NumberSign + | FormatSpecifier::DollarSign + | FormatSpecifier::Dot + | FormatSpecifier::Asterisk + | FormatSpecifier::QuestionMark => HighlightTag::Attribute, + FormatSpecifier::Integer | FormatSpecifier::Zero => { + HighlightTag::NumericLiteral + } + FormatSpecifier::Identifier => HighlightTag::Local, + }; + stack.add(HighlightedRange { + range: piece_range + range.start(), + highlight: highlight.into(), + binding_hash: None, + }); + }); + } + stack.pop(); + } } } diff --git a/crates/ra_ide/src/syntax_highlighting/tests.rs b/crates/ra_ide/src/syntax_highlighting/tests.rs index 73611e23a..f198767ce 100644 --- a/crates/ra_ide/src/syntax_highlighting/tests.rs +++ b/crates/ra_ide/src/syntax_highlighting/tests.rs @@ -168,3 +168,68 @@ macro_rules! test {} ); let _ = analysis.highlight(file_id).unwrap(); } + +#[test] +fn test_string_highlighting() { + // The format string detection is based on macro-expansion, + // thus, we have to copy the macro definition from `std` + let (analysis, file_id) = single_file( + r#" +macro_rules! println { + ($($arg:tt)*) => ({ + $crate::io::_print($crate::format_args_nl!($($arg)*)); + }) +} +#[rustc_builtin_macro] +macro_rules! format_args_nl { + ($fmt:expr) => {{ /* compiler built-in */ }}; + ($fmt:expr, $($args:tt)*) => {{ /* compiler built-in */ }}; +} + +fn main() { + // from https://doc.rust-lang.org/std/fmt/index.html + println!("Hello"); // => "Hello" + println!("Hello, {}!", "world"); // => "Hello, world!" + println!("The number is {}", 1); // => "The number is 1" + println!("{:?}", (3, 4)); // => "(3, 4)" + println!("{value}", value=4); // => "4" + println!("{} {}", 1, 2); // => "1 2" + println!("{:04}", 42); // => "0042" with leading zerosV + println!("{1} {} {0} {}", 1, 2); // => "2 1 1 2" + println!("{argument}", argument = "test"); // => "test" + println!("{name} {}", 1, name = 2); // => "2 1" + println!("{a} {c} {b}", a="a", b='b', c=3); // => "a 3 b" + println!("Hello {:5}!", "x"); + println!("Hello {:1$}!", "x", 5); + println!("Hello {1:0$}!", 5, "x"); + println!("Hello {:width$}!", "x", width = 5); + println!("Hello {:<5}!", "x"); + println!("Hello {:-<5}!", "x"); + println!("Hello {:^5}!", "x"); + println!("Hello {:>5}!", "x"); + println!("Hello {:+}!", 5); + println!("{:#x}!", 27); + println!("Hello {:05}!", 5); + println!("Hello {:05}!", -5); + println!("{:#010x}!", 27); + println!("Hello {0} is {1:.5}", "x", 0.01); + println!("Hello {1} is {2:.0$}", 5, "x", 0.01); + println!("Hello {0} is {2:.1$}", "x", 5, 0.01); + println!("Hello {} is {:.*}", "x", 5, 0.01); + println!("Hello {} is {2:.*}", "x", 5, 0.01); + println!("Hello {} is {number:.prec$}", "x", prec = 5, number = 0.01); + println!("{}, `{name:.*}` has 3 fractional digits", "Hello", 3, name=1234.56); + println!("{}, `{name:.*}` has 3 characters", "Hello", 3, name="1234.56"); + println!("{}, `{name:>8.*}` has 3 right-aligned characters", "Hello", 3, name="1234.56"); + println!("Hello {{}}"); + println!("{{ Hello"); +}"# + .trim(), + ); + + let dst_file = project_dir().join("crates/ra_ide/src/snapshots/highlight_strings.html"); + let actual_html = &analysis.highlight_as_html(file_id, false).unwrap(); + let expected_html = &read_text(&dst_file); + fs::write(dst_file, &actual_html).unwrap(); + assert_eq_text!(expected_html, actual_html); +} diff --git a/crates/ra_syntax/src/ast/tokens.rs b/crates/ra_syntax/src/ast/tokens.rs index e8320b57e..ec3b4e553 100644 --- a/crates/ra_syntax/src/ast/tokens.rs +++ b/crates/ra_syntax/src/ast/tokens.rs @@ -172,3 +172,327 @@ impl RawString { Some(range + contents_range.start()) } } + +#[derive(Debug)] +pub enum FormatSpecifier { + Open, + Close, + Integer, + Identifier, + Colon, + Fill, + Align, + Sign, + NumberSign, + Zero, + DollarSign, + Dot, + Asterisk, + QuestionMark, +} + +pub trait HasFormatSpecifier: AstToken { + fn lex_format_specifier(&self, callback: &mut F) + where + F: FnMut(TextRange, FormatSpecifier), + { + let src = self.text().as_str(); + let initial_len = src.len(); + let mut chars = src.chars(); + + while let Some(first_char) = chars.next() { + match first_char { + '{' => { + // Format specifier, see syntax at https://doc.rust-lang.org/std/fmt/index.html#syntax + if chars.clone().next() == Some('{') { + // Escaped format specifier, `{{` + chars.next(); + continue; + } + + let start = initial_len - chars.as_str().len() - first_char.len_utf8(); + let end = initial_len - chars.as_str().len(); + callback( + TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)), + FormatSpecifier::Open, + ); + + let next_char = if let Some(c) = chars.clone().next() { + c + } else { + break; + }; + + // check for integer/identifier + match next_char { + '0'..='9' => { + // integer + read_integer(&mut chars, initial_len, callback); + } + 'a'..='z' | 'A'..='Z' | '_' => { + // identifier + read_identifier(&mut chars, initial_len, callback); + } + _ => {} + } + + if chars.clone().next() == Some(':') { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::Colon, + callback, + ); + + // check for fill/align + let mut cloned = chars.clone().take(2); + let first = cloned.next().unwrap_or_default(); + let second = cloned.next().unwrap_or_default(); + match second { + '<' | '^' | '>' => { + // alignment specifier, first char specifies fillment + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::Fill, + callback, + ); + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::Align, + callback, + ); + } + _ => match first { + '<' | '^' | '>' => { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::Align, + callback, + ); + } + _ => {} + }, + } + + // check for sign + match chars.clone().next().unwrap_or_default() { + '+' | '-' => { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::Sign, + callback, + ); + } + _ => {} + } + + // check for `#` + if let Some('#') = chars.clone().next() { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::NumberSign, + callback, + ); + } + + // check for `0` + let mut cloned = chars.clone().take(2); + let first = cloned.next(); + let second = cloned.next(); + + if first == Some('0') && second != Some('$') { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::Zero, + callback, + ); + } + + // width + match chars.clone().next().unwrap_or_default() { + '0'..='9' => { + read_integer(&mut chars, initial_len, callback); + if chars.clone().next() == Some('$') { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::DollarSign, + callback, + ); + } + } + 'a'..='z' | 'A'..='Z' | '_' => { + read_identifier(&mut chars, initial_len, callback); + if chars.clone().next() != Some('$') { + continue; + } + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::DollarSign, + callback, + ); + } + _ => {} + } + + // precision + if chars.clone().next() == Some('.') { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::Dot, + callback, + ); + + match chars.clone().next().unwrap_or_default() { + '*' => { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::Asterisk, + callback, + ); + } + '0'..='9' => { + read_integer(&mut chars, initial_len, callback); + if chars.clone().next() == Some('$') { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::DollarSign, + callback, + ); + } + } + 'a'..='z' | 'A'..='Z' | '_' => { + read_identifier(&mut chars, initial_len, callback); + if chars.clone().next() != Some('$') { + continue; + } + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::DollarSign, + callback, + ); + } + _ => { + continue; + } + } + } + + // type + match chars.clone().next().unwrap_or_default() { + '?' => { + skip_char_and_emit( + &mut chars, + initial_len, + FormatSpecifier::QuestionMark, + callback, + ); + } + 'a'..='z' | 'A'..='Z' | '_' => { + read_identifier(&mut chars, initial_len, callback); + } + _ => {} + } + } + + let mut cloned = chars.clone().take(2); + let first = cloned.next(); + let second = cloned.next(); + if first != Some('}') { + continue; + } + if second == Some('}') { + // Escaped format end specifier, `}}` + continue; + } + skip_char_and_emit(&mut chars, initial_len, FormatSpecifier::Close, callback); + } + _ => { + while let Some(next_char) = chars.clone().next() { + match next_char { + '{' => break, + _ => {} + } + chars.next(); + } + } + }; + } + + fn skip_char_and_emit( + chars: &mut std::str::Chars, + initial_len: usize, + emit: FormatSpecifier, + callback: &mut F, + ) where + F: FnMut(TextRange, FormatSpecifier), + { + let start = initial_len - chars.as_str().len(); + chars.next(); + let end = initial_len - chars.as_str().len(); + callback( + TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)), + emit, + ); + } + + fn read_integer(chars: &mut std::str::Chars, initial_len: usize, callback: &mut F) + where + F: FnMut(TextRange, FormatSpecifier), + { + let start = initial_len - chars.as_str().len(); + chars.next(); + while let Some(next_char) = chars.clone().next() { + match next_char { + '0'..='9' => { + chars.next(); + } + _ => { + break; + } + } + } + let end = initial_len - chars.as_str().len(); + callback( + TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)), + FormatSpecifier::Integer, + ); + } + fn read_identifier(chars: &mut std::str::Chars, initial_len: usize, callback: &mut F) + where + F: FnMut(TextRange, FormatSpecifier), + { + let start = initial_len - chars.as_str().len(); + chars.next(); + while let Some(next_char) = chars.clone().next() { + match next_char { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' => { + chars.next(); + } + _ => { + break; + } + } + } + let end = initial_len - chars.as_str().len(); + callback( + TextRange::from_to(TextUnit::from_usize(start), TextUnit::from_usize(end)), + FormatSpecifier::Identifier, + ); + } + } +} + +impl HasFormatSpecifier for String {} +impl HasFormatSpecifier for RawString {} -- cgit v1.2.3