From ae3abd6e575940eb1221acf26c09e96352f052fa Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Thu, 13 Aug 2020 16:45:10 +0200 Subject: Rename ra_ssr -> ssr --- crates/ssr/src/parsing.rs | 389 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 389 insertions(+) create mode 100644 crates/ssr/src/parsing.rs (limited to 'crates/ssr/src/parsing.rs') diff --git a/crates/ssr/src/parsing.rs b/crates/ssr/src/parsing.rs new file mode 100644 index 000000000..9570e96e3 --- /dev/null +++ b/crates/ssr/src/parsing.rs @@ -0,0 +1,389 @@ +//! This file contains code for parsing SSR rules, which look something like `foo($a) ==>> bar($b)`. +//! We first split everything before and after the separator `==>>`. Next, both the search pattern +//! and the replacement template get tokenized by the Rust tokenizer. Tokens are then searched for +//! placeholders, which start with `$`. For replacement templates, this is the final form. For +//! search patterns, we go further and parse the pattern as each kind of thing that we can match. +//! e.g. expressions, type references etc. + +use crate::errors::bail; +use crate::{SsrError, SsrPattern, SsrRule}; +use rustc_hash::{FxHashMap, FxHashSet}; +use std::str::FromStr; +use syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, T}; +use test_utils::mark; + +#[derive(Debug)] +pub(crate) struct ParsedRule { + pub(crate) placeholders_by_stand_in: FxHashMap, + pub(crate) pattern: SyntaxNode, + pub(crate) template: Option, +} + +#[derive(Debug)] +pub(crate) struct RawPattern { + tokens: Vec, +} + +// Part of a search or replace pattern. +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) enum PatternElement { + Token(Token), + Placeholder(Placeholder), +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) struct Placeholder { + /// The name of this placeholder. e.g. for "$a", this would be "a" + pub(crate) ident: SmolStr, + /// A unique name used in place of this placeholder when we parse the pattern as Rust code. + stand_in_name: String, + pub(crate) constraints: Vec, +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) enum Constraint { + Kind(NodeKind), + Not(Box), +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub(crate) enum NodeKind { + Literal, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct Token { + kind: SyntaxKind, + pub(crate) text: SmolStr, +} + +impl ParsedRule { + fn new( + pattern: &RawPattern, + template: Option<&RawPattern>, + ) -> Result, SsrError> { + let raw_pattern = pattern.as_rust_code(); + let raw_template = template.map(|t| t.as_rust_code()); + let raw_template = raw_template.as_ref().map(|s| s.as_str()); + let mut builder = RuleBuilder { + placeholders_by_stand_in: pattern.placeholders_by_stand_in(), + rules: Vec::new(), + }; + builder.try_add(ast::Expr::parse(&raw_pattern), raw_template.map(ast::Expr::parse)); + builder.try_add(ast::Type::parse(&raw_pattern), raw_template.map(ast::Type::parse)); + builder.try_add(ast::Item::parse(&raw_pattern), raw_template.map(ast::Item::parse)); + builder.try_add(ast::Path::parse(&raw_pattern), raw_template.map(ast::Path::parse)); + builder.try_add(ast::Pat::parse(&raw_pattern), raw_template.map(ast::Pat::parse)); + builder.build() + } +} + +struct RuleBuilder { + placeholders_by_stand_in: FxHashMap, + rules: Vec, +} + +impl RuleBuilder { + fn try_add(&mut self, pattern: Result, template: Option>) { + match (pattern, template) { + (Ok(pattern), Some(Ok(template))) => self.rules.push(ParsedRule { + placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), + pattern: pattern.syntax().clone(), + template: Some(template.syntax().clone()), + }), + (Ok(pattern), None) => self.rules.push(ParsedRule { + placeholders_by_stand_in: self.placeholders_by_stand_in.clone(), + pattern: pattern.syntax().clone(), + template: None, + }), + _ => {} + } + } + + fn build(mut self) -> Result, SsrError> { + if self.rules.is_empty() { + bail!("Not a valid Rust expression, type, item, path or pattern"); + } + // If any rules contain paths, then we reject any rules that don't contain paths. Allowing a + // mix leads to strange semantics, since the path-based rules only match things where the + // path refers to semantically the same thing, whereas the non-path-based rules could match + // anything. Specifically, if we have a rule like `foo ==>> bar` we only want to match the + // `foo` that is in the current scope, not any `foo`. However "foo" can be parsed as a + // pattern (IDENT_PAT -> NAME -> IDENT). Allowing such a rule through would result in + // renaming everything called `foo` to `bar`. It'd also be slow, since without a path, we'd + // have to use the slow-scan search mechanism. + if self.rules.iter().any(|rule| contains_path(&rule.pattern)) { + let old_len = self.rules.len(); + self.rules.retain(|rule| contains_path(&rule.pattern)); + if self.rules.len() < old_len { + mark::hit!(pattern_is_a_single_segment_path); + } + } + Ok(self.rules) + } +} + +/// Returns whether there are any paths in `node`. +fn contains_path(node: &SyntaxNode) -> bool { + node.kind() == SyntaxKind::PATH + || node.descendants().any(|node| node.kind() == SyntaxKind::PATH) +} + +impl FromStr for SsrRule { + type Err = SsrError; + + fn from_str(query: &str) -> Result { + let mut it = query.split("==>>"); + let pattern = it.next().expect("at least empty string").trim(); + let template = it + .next() + .ok_or_else(|| SsrError("Cannot find delimiter `==>>`".into()))? + .trim() + .to_string(); + if it.next().is_some() { + return Err(SsrError("More than one delimiter found".into())); + } + let raw_pattern = pattern.parse()?; + let raw_template = template.parse()?; + let parsed_rules = ParsedRule::new(&raw_pattern, Some(&raw_template))?; + let rule = SsrRule { pattern: raw_pattern, template: raw_template, parsed_rules }; + validate_rule(&rule)?; + Ok(rule) + } +} + +impl FromStr for RawPattern { + type Err = SsrError; + + fn from_str(pattern_str: &str) -> Result { + Ok(RawPattern { tokens: parse_pattern(pattern_str)? }) + } +} + +impl RawPattern { + /// Returns this search pattern as Rust source code that we can feed to the Rust parser. + fn as_rust_code(&self) -> String { + let mut res = String::new(); + for t in &self.tokens { + res.push_str(match t { + PatternElement::Token(token) => token.text.as_str(), + PatternElement::Placeholder(placeholder) => placeholder.stand_in_name.as_str(), + }); + } + res + } + + pub(crate) fn placeholders_by_stand_in(&self) -> FxHashMap { + let mut res = FxHashMap::default(); + for t in &self.tokens { + if let PatternElement::Placeholder(placeholder) = t { + res.insert(SmolStr::new(placeholder.stand_in_name.clone()), placeholder.clone()); + } + } + res + } +} + +impl FromStr for SsrPattern { + type Err = SsrError; + + fn from_str(pattern_str: &str) -> Result { + let raw_pattern = pattern_str.parse()?; + let parsed_rules = ParsedRule::new(&raw_pattern, None)?; + Ok(SsrPattern { raw: raw_pattern, parsed_rules }) + } +} + +/// Returns `pattern_str`, parsed as a search or replace pattern. If `remove_whitespace` is true, +/// then any whitespace tokens will be removed, which we do for the search pattern, but not for the +/// replace pattern. +fn parse_pattern(pattern_str: &str) -> Result, SsrError> { + let mut res = Vec::new(); + let mut placeholder_names = FxHashSet::default(); + let mut tokens = tokenize(pattern_str)?.into_iter(); + while let Some(token) = tokens.next() { + if token.kind == T![$] { + let placeholder = parse_placeholder(&mut tokens)?; + if !placeholder_names.insert(placeholder.ident.clone()) { + bail!("Name `{}` repeats more than once", placeholder.ident); + } + res.push(PatternElement::Placeholder(placeholder)); + } else { + res.push(PatternElement::Token(token)); + } + } + Ok(res) +} + +/// Checks for errors in a rule. e.g. the replace pattern referencing placeholders that the search +/// pattern didn't define. +fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { + let mut defined_placeholders = FxHashSet::default(); + for p in &rule.pattern.tokens { + if let PatternElement::Placeholder(placeholder) = p { + defined_placeholders.insert(&placeholder.ident); + } + } + let mut undefined = Vec::new(); + for p in &rule.template.tokens { + if let PatternElement::Placeholder(placeholder) = p { + if !defined_placeholders.contains(&placeholder.ident) { + undefined.push(format!("${}", placeholder.ident)); + } + if !placeholder.constraints.is_empty() { + bail!("Replacement placeholders cannot have constraints"); + } + } + } + if !undefined.is_empty() { + bail!("Replacement contains undefined placeholders: {}", undefined.join(", ")); + } + Ok(()) +} + +fn tokenize(source: &str) -> Result, SsrError> { + let mut start = 0; + let (raw_tokens, errors) = syntax::tokenize(source); + if let Some(first_error) = errors.first() { + bail!("Failed to parse pattern: {}", first_error); + } + let mut tokens: Vec = Vec::new(); + for raw_token in raw_tokens { + let token_len = usize::from(raw_token.len); + tokens.push(Token { + kind: raw_token.kind, + text: SmolStr::new(&source[start..start + token_len]), + }); + start += token_len; + } + Ok(tokens) +} + +fn parse_placeholder(tokens: &mut std::vec::IntoIter) -> Result { + let mut name = None; + let mut constraints = Vec::new(); + if let Some(token) = tokens.next() { + match token.kind { + SyntaxKind::IDENT => { + name = Some(token.text); + } + T!['{'] => { + let token = + tokens.next().ok_or_else(|| SsrError::new("Unexpected end of placeholder"))?; + if token.kind == SyntaxKind::IDENT { + name = Some(token.text); + } + loop { + let token = tokens + .next() + .ok_or_else(|| SsrError::new("Placeholder is missing closing brace '}'"))?; + match token.kind { + T![:] => { + constraints.push(parse_constraint(tokens)?); + } + T!['}'] => break, + _ => bail!("Unexpected token while parsing placeholder: '{}'", token.text), + } + } + } + _ => { + bail!("Placeholders should either be $name or ${{name:constraints}}"); + } + } + } + let name = name.ok_or_else(|| SsrError::new("Placeholder ($) with no name"))?; + Ok(Placeholder::new(name, constraints)) +} + +fn parse_constraint(tokens: &mut std::vec::IntoIter) -> Result { + let constraint_type = tokens + .next() + .ok_or_else(|| SsrError::new("Found end of placeholder while looking for a constraint"))? + .text + .to_string(); + match constraint_type.as_str() { + "kind" => { + expect_token(tokens, "(")?; + let t = tokens.next().ok_or_else(|| { + SsrError::new("Unexpected end of constraint while looking for kind") + })?; + if t.kind != SyntaxKind::IDENT { + bail!("Expected ident, found {:?} while parsing kind constraint", t.kind); + } + expect_token(tokens, ")")?; + Ok(Constraint::Kind(NodeKind::from(&t.text)?)) + } + "not" => { + expect_token(tokens, "(")?; + let sub = parse_constraint(tokens)?; + expect_token(tokens, ")")?; + Ok(Constraint::Not(Box::new(sub))) + } + x => bail!("Unsupported constraint type '{}'", x), + } +} + +fn expect_token(tokens: &mut std::vec::IntoIter, expected: &str) -> Result<(), SsrError> { + if let Some(t) = tokens.next() { + if t.text == expected { + return Ok(()); + } + bail!("Expected {} found {}", expected, t.text); + } + bail!("Expected {} found end of stream", expected); +} + +impl NodeKind { + fn from(name: &SmolStr) -> Result { + Ok(match name.as_str() { + "literal" => NodeKind::Literal, + _ => bail!("Unknown node kind '{}'", name), + }) + } +} + +impl Placeholder { + fn new(name: SmolStr, constraints: Vec) -> Self { + Self { stand_in_name: format!("__placeholder_{}", name), constraints, ident: name } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parser_happy_case() { + fn token(kind: SyntaxKind, text: &str) -> PatternElement { + PatternElement::Token(Token { kind, text: SmolStr::new(text) }) + } + fn placeholder(name: &str) -> PatternElement { + PatternElement::Placeholder(Placeholder::new(SmolStr::new(name), Vec::new())) + } + let result: SsrRule = "foo($a, $b) ==>> bar($b, $a)".parse().unwrap(); + assert_eq!( + result.pattern.tokens, + vec![ + token(SyntaxKind::IDENT, "foo"), + token(T!['('], "("), + placeholder("a"), + token(T![,], ","), + token(SyntaxKind::WHITESPACE, " "), + placeholder("b"), + token(T![')'], ")"), + ] + ); + assert_eq!( + result.template.tokens, + vec![ + token(SyntaxKind::IDENT, "bar"), + token(T!['('], "("), + placeholder("b"), + token(T![,], ","), + token(SyntaxKind::WHITESPACE, " "), + placeholder("a"), + token(T![')'], ")"), + ] + ); + } +} -- cgit v1.2.3 From c84f98385a28eeb7595f38b7cfaf861a6e06f4ea Mon Sep 17 00:00:00 2001 From: David Lattimore Date: Thu, 6 Aug 2020 11:30:52 +1000 Subject: Refactor SSR so that placeholders store a Var This allows lookup of placeholder bindings given a placeholder without needing to create a Var instance. --- crates/ssr/src/parsing.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'crates/ssr/src/parsing.rs') diff --git a/crates/ssr/src/parsing.rs b/crates/ssr/src/parsing.rs index 9570e96e3..05b66dcd7 100644 --- a/crates/ssr/src/parsing.rs +++ b/crates/ssr/src/parsing.rs @@ -8,7 +8,7 @@ use crate::errors::bail; use crate::{SsrError, SsrPattern, SsrRule}; use rustc_hash::{FxHashMap, FxHashSet}; -use std::str::FromStr; +use std::{fmt::Display, str::FromStr}; use syntax::{ast, AstNode, SmolStr, SyntaxKind, SyntaxNode, T}; use test_utils::mark; @@ -34,12 +34,16 @@ pub(crate) enum PatternElement { #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) struct Placeholder { /// The name of this placeholder. e.g. for "$a", this would be "a" - pub(crate) ident: SmolStr, + pub(crate) ident: Var, /// A unique name used in place of this placeholder when we parse the pattern as Rust code. stand_in_name: String, pub(crate) constraints: Vec, } +/// Represents a `$var` in an SSR query. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) struct Var(pub String); + #[derive(Clone, Debug, PartialEq, Eq)] pub(crate) enum Constraint { Kind(NodeKind), @@ -205,7 +209,7 @@ fn parse_pattern(pattern_str: &str) -> Result, SsrError> { if token.kind == T![$] { let placeholder = parse_placeholder(&mut tokens)?; if !placeholder_names.insert(placeholder.ident.clone()) { - bail!("Name `{}` repeats more than once", placeholder.ident); + bail!("Placeholder `{}` repeats more than once", placeholder.ident); } res.push(PatternElement::Placeholder(placeholder)); } else { @@ -228,7 +232,7 @@ fn validate_rule(rule: &SsrRule) -> Result<(), SsrError> { for p in &rule.template.tokens { if let PatternElement::Placeholder(placeholder) = p { if !defined_placeholders.contains(&placeholder.ident) { - undefined.push(format!("${}", placeholder.ident)); + undefined.push(placeholder.ident.to_string()); } if !placeholder.constraints.is_empty() { bail!("Replacement placeholders cannot have constraints"); @@ -344,7 +348,17 @@ impl NodeKind { impl Placeholder { fn new(name: SmolStr, constraints: Vec) -> Self { - Self { stand_in_name: format!("__placeholder_{}", name), constraints, ident: name } + Self { + stand_in_name: format!("__placeholder_{}", name), + constraints, + ident: Var(name.to_string()), + } + } +} + +impl Display for Var { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "${}", self.0) } } -- cgit v1.2.3