From cfc70207996e202edbb577b2ad97a61ba9eb0eaa Mon Sep 17 00:00:00 2001 From: Akshay Date: Tue, 2 Aug 2022 19:50:46 +0530 Subject: add textual comparison structural comparison helps detect a vast majority of duplicates, but it has a few false positives when files contain only trivia. textual similarity can help detect and eliminate those false positives. --- Cargo.lock | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'Cargo.lock') diff --git a/Cargo.lock b/Cargo.lock index 6c0cbfd..ecfe571 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,7 @@ version = "0.1.0" dependencies = [ "itertools", "rayon", + "simhash", "thiserror", "tree-sitter", "tree-sitter-javascript", @@ -211,6 +212,21 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "simhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd8e19fb8912cfcdc26507d2e38f53c9f7462b58bf7356f6f91acfb632d3a224" +dependencies = [ + "siphasher", +] + +[[package]] +name = "siphasher" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac" + [[package]] name = "syn" version = "1.0.98" -- cgit v1.2.3