From efd96e8df6805a45aaf5822141dee11c642b51ae Mon Sep 17 00:00:00 2001 From: Akshay Date: Tue, 2 Aug 2022 16:57:04 +0530 Subject: init --- src/lib.rs | 220 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 src/lib.rs (limited to 'src/lib.rs') diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..b708984 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,220 @@ +// locals +mod error; +mod traverse; + +// std +use std::{ + fmt, + ops::Not, + path::{Path, PathBuf}, +}; + +// extern +use error::SimError; +use itertools::{EitherOrBoth, Itertools}; +use traverse::PreOrder; +use tree_sitter::{Node, Parser, Tree}; + +type Result = std::result::Result; + +// Check if two tree-sitter objects (trees or nodes etc.) are "similar" +pub trait Similarity { + // This function accepts a predicate to filter out nodes, + // for e.g.: trivia such as comments and whitespace may be filtered + // out with `!node.is_extra()` + fn is_similar

(&self, other: &Self, predicate: P) -> bool + where + P: Fn(&Node) -> bool; +} + +// Representation of a fully parsed file from the filesystem +pub struct ProgramFile { + pub path: PathBuf, + pub src: String, + pub tree: Tree, +} + +impl Similarity for ProgramFile { + fn is_similar

(&self, other: &Self, predicate: P) -> bool + where + P: Fn(&Node) -> bool, + { + // The order of traversal is unimportant + let mine = PreOrder::new(&self.tree).filter(|n| predicate(n)); + let theirs = PreOrder::new(&other.tree).filter(|n| predicate(n)); + + // Simultaneously traverse both the trees, and find dissimilarities. + // + // A dissimilarity is when: + // - one tree's traversal is longer than the other + // - the trees have identical traversal lengths but non-identical + // nodes along the traversal + mine.zip_longest(theirs) + .any(|result| match result { + EitherOrBoth::Both(mine, theirs) => mine.kind_id() != theirs.kind_id(), + EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => true, + }) + .not() + } +} + +impl fmt::Debug for ProgramFile { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ProgramFile") + .field("path", &self.path) + .field("src", &self.src.len()) + .finish() + } +} + +impl ProgramFile { + pub fn new>(path: P, parser: &mut Parser) -> Result { + let src = std::fs::read_to_string(&path)?; + let path = path.as_ref().to_owned(); + let tree = parser.parse(&src, None).ok_or(SimError::LanguageNotSet)?; + Ok(Self { path, src, tree }) + } +} + +#[cfg(test)] +mod test { + use super::*; + use tree_sitter::Parser; + + // Utility to quickly bootstrap ProgramFiles + // and run similarity checks + fn is_similar(mine: &str, their: &str) -> bool { + let lang = tree_sitter_javascript::language(); + let mut parser = Parser::new(); + parser.set_language(lang).unwrap(); + + let my_program_file = ProgramFile { + path: PathBuf::from("mine"), + src: mine.to_owned(), + tree: parser.parse(&mine, None).unwrap(), + }; + + let their_program_file = ProgramFile { + path: PathBuf::from("their"), + src: their.to_owned(), + tree: parser.parse(&their, None).unwrap(), + }; + + my_program_file.is_similar(&their_program_file, |n| !n.is_extra()) + } + + #[test] + fn trivial() { + assert!(is_similar( + r#" + var x = 2; + "#, + r#" + var y = 5; + "# + )) + } + + #[test] + fn automorphism() { + assert!(is_similar( + r#" + function(x) { + x * 2 + } + "#, + r#" + function(x) { + x * 2 + } + "# + )) + } + + #[test] + fn sample() { + assert!(is_similar( + r#" + var scrapeWebsite = async function scrapeWebsite(website) { + try { + var url = normalizeUrl(website); + var html = await fetch(url).then(function (res) { + return res.text(); + }); + var $ = load(html); + var handles = {}; + module.exports.CONFIG.socialNetworks.forEach(function (socialNetwork) { + handles[socialNetwork] = parse(socialNetwork)($); + }) + return handles; + } catch (error) { + throw new Error("Error scraping website data for " + website + ": " + error.message) + } + } + "#, + r#" + var parseWebsite = async function parseWebsite(website) { + try { + var url = normalizeUrl(website); + var html = await fetch(url).then(function (res) { + return res.text(); + }); + var $ = load(html); + var handles = {}; + module.exports.CONFIG.socialNetworks.forEach(function (socialNetwork) { + handles[socialNetwork] = parse(socialNetwork)($); + }) + return handles; + } catch (error) { + throw new Error("Error fetching website data for " + website + ": " + error.message) + } + } + "# + )) + } + + #[test] + fn ignore_comment_trivia() { + assert!(is_similar( + r#" + // doubles x + function(x) { + x * 2 + } + "#, + r#" + function(x) { + x * 2 // doubles x + } + "# + )) + } + + #[test] + fn ignore_whitespace_trivia() { + assert!(is_similar( + r#" + function(x) { +return x * 2; + } + "#, + r#" + function(x ) { + return x * 2; + } + "# + )) + } + + #[test] + fn dissimilar() { + assert!(!is_similar( + r#" + var t = function(x, y) { } + "#, + r#" + var l = function(x) { } + "# + )) + } +} -- cgit v1.2.3