aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAkshay <[email protected]>2022-08-02 12:27:04 +0100
committerAkshay <[email protected]>2022-08-02 12:27:04 +0100
commitefd96e8df6805a45aaf5822141dee11c642b51ae (patch)
tree56cca3a780dd0753e21eb5e0dbda585ce6ff7f7b
init
-rw-r--r--.gitignore4
-rw-r--r--Cargo.lock269
-rw-r--r--Cargo.toml19
-rw-r--r--flake.lock87
-rw-r--r--flake.nix67
-rw-r--r--readme38
-rw-r--r--src/error.rs17
-rw-r--r--src/lib.rs220
-rw-r--r--src/main.rs43
-rw-r--r--src/traverse.rs51
10 files changed, 815 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a2c5a6d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
1/target
2/js-files
3.direnv
4.envrc
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..6c0cbfd
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,269 @@
1# This file is automatically @generated by Cargo.
2# It is not intended for manual editing.
3version = 3
4
5[[package]]
6name = "aho-corasick"
7version = "0.7.18"
8source = "registry+https://github.com/rust-lang/crates.io-index"
9checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
10dependencies = [
11 "memchr",
12]
13
14[[package]]
15name = "autocfg"
16version = "1.1.0"
17source = "registry+https://github.com/rust-lang/crates.io-index"
18checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
19
20[[package]]
21name = "bloop"
22version = "0.1.0"
23dependencies = [
24 "itertools",
25 "rayon",
26 "thiserror",
27 "tree-sitter",
28 "tree-sitter-javascript",
29]
30
31[[package]]
32name = "cc"
33version = "1.0.73"
34source = "registry+https://github.com/rust-lang/crates.io-index"
35checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
36
37[[package]]
38name = "cfg-if"
39version = "1.0.0"
40source = "registry+https://github.com/rust-lang/crates.io-index"
41checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
42
43[[package]]
44name = "crossbeam-channel"
45version = "0.5.6"
46source = "registry+https://github.com/rust-lang/crates.io-index"
47checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
48dependencies = [
49 "cfg-if",
50 "crossbeam-utils",
51]
52
53[[package]]
54name = "crossbeam-deque"
55version = "0.8.2"
56source = "registry+https://github.com/rust-lang/crates.io-index"
57checksum = "715e8152b692bba2d374b53d4875445368fdf21a94751410af607a5ac677d1fc"
58dependencies = [
59 "cfg-if",
60 "crossbeam-epoch",
61 "crossbeam-utils",
62]
63
64[[package]]
65name = "crossbeam-epoch"
66version = "0.9.10"
67source = "registry+https://github.com/rust-lang/crates.io-index"
68checksum = "045ebe27666471bb549370b4b0b3e51b07f56325befa4284db65fc89c02511b1"
69dependencies = [
70 "autocfg",
71 "cfg-if",
72 "crossbeam-utils",
73 "memoffset",
74 "once_cell",
75 "scopeguard",
76]
77
78[[package]]
79name = "crossbeam-utils"
80version = "0.8.11"
81source = "registry+https://github.com/rust-lang/crates.io-index"
82checksum = "51887d4adc7b564537b15adcfb307936f8075dfcd5f00dde9a9f1d29383682bc"
83dependencies = [
84 "cfg-if",
85 "once_cell",
86]
87
88[[package]]
89name = "either"
90version = "1.7.0"
91source = "registry+https://github.com/rust-lang/crates.io-index"
92checksum = "3f107b87b6afc2a64fd13cac55fe06d6c8859f12d4b14cbcdd2c67d0976781be"
93
94[[package]]
95name = "hermit-abi"
96version = "0.1.19"
97source = "registry+https://github.com/rust-lang/crates.io-index"
98checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
99dependencies = [
100 "libc",
101]
102
103[[package]]
104name = "itertools"
105version = "0.10.3"
106source = "registry+https://github.com/rust-lang/crates.io-index"
107checksum = "a9a9d19fa1e79b6215ff29b9d6880b706147f16e9b1dbb1e4e5947b5b02bc5e3"
108dependencies = [
109 "either",
110]
111
112[[package]]
113name = "libc"
114version = "0.2.126"
115source = "registry+https://github.com/rust-lang/crates.io-index"
116checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
117
118[[package]]
119name = "memchr"
120version = "2.5.0"
121source = "registry+https://github.com/rust-lang/crates.io-index"
122checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
123
124[[package]]
125name = "memoffset"
126version = "0.6.5"
127source = "registry+https://github.com/rust-lang/crates.io-index"
128checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
129dependencies = [
130 "autocfg",
131]
132
133[[package]]
134name = "num_cpus"
135version = "1.13.1"
136source = "registry+https://github.com/rust-lang/crates.io-index"
137checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
138dependencies = [
139 "hermit-abi",
140 "libc",
141]
142
143[[package]]
144name = "once_cell"
145version = "1.13.0"
146source = "registry+https://github.com/rust-lang/crates.io-index"
147checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1"
148
149[[package]]
150name = "proc-macro2"
151version = "1.0.42"
152source = "registry+https://github.com/rust-lang/crates.io-index"
153checksum = "c278e965f1d8cf32d6e0e96de3d3e79712178ae67986d9cf9151f51e95aac89b"
154dependencies = [
155 "unicode-ident",
156]
157
158[[package]]
159name = "quote"
160version = "1.0.20"
161source = "registry+https://github.com/rust-lang/crates.io-index"
162checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
163dependencies = [
164 "proc-macro2",
165]
166
167[[package]]
168name = "rayon"
169version = "1.5.3"
170source = "registry+https://github.com/rust-lang/crates.io-index"
171checksum = "bd99e5772ead8baa5215278c9b15bf92087709e9c1b2d1f97cdb5a183c933a7d"
172dependencies = [
173 "autocfg",
174 "crossbeam-deque",
175 "either",
176 "rayon-core",
177]
178
179[[package]]
180name = "rayon-core"
181version = "1.9.3"
182source = "registry+https://github.com/rust-lang/crates.io-index"
183checksum = "258bcdb5ac6dad48491bb2992db6b7cf74878b0384908af124823d118c99683f"
184dependencies = [
185 "crossbeam-channel",
186 "crossbeam-deque",
187 "crossbeam-utils",
188 "num_cpus",
189]
190
191[[package]]
192name = "regex"
193version = "1.6.0"
194source = "registry+https://github.com/rust-lang/crates.io-index"
195checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
196dependencies = [
197 "aho-corasick",
198 "memchr",
199 "regex-syntax",
200]
201
202[[package]]
203name = "regex-syntax"
204version = "0.6.27"
205source = "registry+https://github.com/rust-lang/crates.io-index"
206checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"
207
208[[package]]
209name = "scopeguard"
210version = "1.1.0"
211source = "registry+https://github.com/rust-lang/crates.io-index"
212checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
213
214[[package]]
215name = "syn"
216version = "1.0.98"
217source = "registry+https://github.com/rust-lang/crates.io-index"
218checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
219dependencies = [
220 "proc-macro2",
221 "quote",
222 "unicode-ident",
223]
224
225[[package]]
226name = "thiserror"
227version = "1.0.31"
228source = "registry+https://github.com/rust-lang/crates.io-index"
229checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a"
230dependencies = [
231 "thiserror-impl",
232]
233
234[[package]]
235name = "thiserror-impl"
236version = "1.0.31"
237source = "registry+https://github.com/rust-lang/crates.io-index"
238checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a"
239dependencies = [
240 "proc-macro2",
241 "quote",
242 "syn",
243]
244
245[[package]]
246name = "tree-sitter"
247version = "0.20.8"
248source = "registry+https://github.com/rust-lang/crates.io-index"
249checksum = "268bf3e3ca0c09e5d21b59c2638e12cb6dcf7ea2681250a696a2d0936cb57ba0"
250dependencies = [
251 "cc",
252 "regex",
253]
254
255[[package]]
256name = "tree-sitter-javascript"
257version = "0.20.0"
258source = "registry+https://github.com/rust-lang/crates.io-index"
259checksum = "2490fab08630b2c8943c320f7b63473cbf65511c8d83aec551beb9b4375906ed"
260dependencies = [
261 "cc",
262 "tree-sitter",
263]
264
265[[package]]
266name = "unicode-ident"
267version = "1.0.2"
268source = "registry+https://github.com/rust-lang/crates.io-index"
269checksum = "15c61ba63f9235225a22310255a29b806b907c9b8c964bcbd0a2c70f3f2deea7"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..579c1c4
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,19 @@
1[package]
2name = "bloop"
3version = "0.1.0"
4edition = "2021"
5
6[lib]
7name = "similarity"
8path = "src/lib.rs"
9
10[[bin]]
11name = "sim"
12path = "src/main.rs"
13
14[dependencies]
15tree-sitter-javascript = "0.20"
16tree-sitter = "0.20"
17itertools = "0.10"
18rayon = "1.5"
19thiserror = "1.0"
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..a7c083f
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,87 @@
1{
2 "nodes": {
3 "fenix": {
4 "inputs": {
5 "nixpkgs": [
6 "nixpkgs"
7 ],
8 "rust-analyzer-src": "rust-analyzer-src"
9 },
10 "locked": {
11 "lastModified": 1645251813,
12 "narHash": "sha256-cQ66tGjnZclBCS3nD26mZ5fUH+3/HnysGffBiWXUSHk=",
13 "owner": "nix-community",
14 "repo": "fenix",
15 "rev": "9892337b588c38ec59466a1c89befce464aae7f8",
16 "type": "github"
17 },
18 "original": {
19 "owner": "nix-community",
20 "repo": "fenix",
21 "type": "github"
22 }
23 },
24 "gitignore": {
25 "inputs": {
26 "nixpkgs": [
27 "nixpkgs"
28 ]
29 },
30 "locked": {
31 "lastModified": 1635165013,
32 "narHash": "sha256-o/BdVjNwcB6jOmzZjOH703BesSkkS5O7ej3xhyO8hAY=",
33 "owner": "hercules-ci",
34 "repo": "gitignore.nix",
35 "rev": "5b9e0ff9d3b551234b4f3eb3983744fa354b17f1",
36 "type": "github"
37 },
38 "original": {
39 "owner": "hercules-ci",
40 "repo": "gitignore.nix",
41 "type": "github"
42 }
43 },
44 "nixpkgs": {
45 "locked": {
46 "lastModified": 1645013224,
47 "narHash": "sha256-b7OEC8vwzJv3rsz9pwnTX2LQDkeOWz2DbKypkVvNHXc=",
48 "owner": "nixos",
49 "repo": "nixpkgs",
50 "rev": "b66b39216b1fef2d8c33cc7a5c72d8da80b79970",
51 "type": "github"
52 },
53 "original": {
54 "owner": "nixos",
55 "ref": "nixpkgs-unstable",
56 "repo": "nixpkgs",
57 "type": "github"
58 }
59 },
60 "root": {
61 "inputs": {
62 "fenix": "fenix",
63 "gitignore": "gitignore",
64 "nixpkgs": "nixpkgs"
65 }
66 },
67 "rust-analyzer-src": {
68 "flake": false,
69 "locked": {
70 "lastModified": 1645205556,
71 "narHash": "sha256-e4lZW3qRyOEJ+vLKFQP7m2Dxh5P44NrnekZYLxlucww=",
72 "owner": "rust-analyzer",
73 "repo": "rust-analyzer",
74 "rev": "acf5874b39f3dc5262317a6074d9fc7285081161",
75 "type": "github"
76 },
77 "original": {
78 "owner": "rust-analyzer",
79 "ref": "nightly",
80 "repo": "rust-analyzer",
81 "type": "github"
82 }
83 }
84 },
85 "root": "root",
86 "version": 7
87}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..dcdf3b7
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,67 @@
1{
2 inputs = {
3
4 nixpkgs.url = "github:nixos/nixpkgs/nixpkgs-unstable";
5
6 fenix = {
7 url = "github:nix-community/fenix";
8 inputs.nixpkgs.follows = "nixpkgs";
9 };
10
11 gitignore = {
12 url = "github:hercules-ci/gitignore.nix";
13 inputs.nixpkgs.follows = "nixpkgs";
14 };
15
16 };
17
18 outputs =
19 { self
20 , nixpkgs
21 , fenix
22 , gitignore
23 }:
24 let
25 inherit (gitignore.lib) gitignoreSource;
26
27 supportedSystems = [ "x86_64-linux" "aarch64-linux" "x86_64-darwin" "aarch64-darwin" ];
28 forAllSystems = nixpkgs.lib.genAttrs supportedSystems;
29 nixpkgsFor = forAllSystems (system:
30 import nixpkgs { inherit system; });
31
32 chanspec = {
33 date = "2022-02-06";
34 channel = "nightly";
35 sha256 = "oKkTWopCDx4tphzTtRn+zDDtvmIZrL/H44tV2ruSfDw="; # set zeros after modifying channel or date
36 };
37 rustChannel = p: (fenix.overlay p p).fenix.toolchainOf chanspec;
38
39 in
40 {
41
42
43 devShell = forAllSystems (system:
44 let
45 pkgs = nixpkgsFor."${system}";
46 toolchain = (rustChannel pkgs).withComponents [
47 "rustc"
48 "cargo"
49 "rust-std"
50 "rustfmt"
51 "clippy"
52 "rust-src"
53 ];
54 inherit (fenix.packages."${system}") rust-analyzer;
55 in
56 pkgs.mkShell {
57 nativeBuildInputs = [
58 pkgs.bacon
59 pkgs.cargo-insta
60 rust-analyzer
61 toolchain
62 ];
63 RUST_LOG = "info";
64 RUST_BACKTRACE = 1;
65 });
66 };
67}
diff --git a/readme b/readme
new file mode 100644
index 0000000..869e43e
--- /dev/null
+++ b/readme
@@ -0,0 +1,38 @@
1Accepts a list of files as args and returns pairs of duplicate files, one pair
2per line.
3
4Usage:
5-----
6
7 cargo run --release --quiet -- [FILES]
8
9
10Example:
11-------
12
13 cargo run ---release --quiet -- js-files/*.js
14
15
16Internals:
17---------
18
19The tool uses tree-sitter to produce ASTs for the given files. It then lazily
20traverses the trees of the two files to be compared and exits on encountering
21the first structural difference in the ASTs.
22
23
24Known issues:
25------------
26
27- A fully commented-out file is equivalent to every other fully commented-out
28 file and to empty files
29
30- Does not account for equivalence of unordered children:
31
32 ==== file1.rs ==== ==== file2.rs ====
33 fn bar(x, y, z) {} fn foo(a, b) {}
34 fn foo(a, b) {} fn bar(x, y, z) {}
35
36 Here, `function` nodes are "unordered" children, the files are structurally
37 equivalent because the order of `function` nodes is irrelevant. However, the
38 tool considers these files to be unique.
diff --git a/src/error.rs b/src/error.rs
new file mode 100644
index 0000000..64bd094
--- /dev/null
+++ b/src/error.rs
@@ -0,0 +1,17 @@
1// std
2use std::io;
3
4// extern
5use thiserror::Error;
6
7#[derive(Error, Debug)]
8pub enum SimError {
9 #[error("unable to set language")]
10 UnableToSetLanguage,
11
12 #[error("parser does not have a corresponding tree-sitter language")]
13 LanguageNotSet,
14
15 #[error("io error: {0}")]
16 IoError(#[from] io::Error),
17}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..b708984
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,220 @@
1// locals
2mod error;
3mod traverse;
4
5// std
6use std::{
7 fmt,
8 ops::Not,
9 path::{Path, PathBuf},
10};
11
12// extern
13use error::SimError;
14use itertools::{EitherOrBoth, Itertools};
15use traverse::PreOrder;
16use tree_sitter::{Node, Parser, Tree};
17
18type Result<T> = std::result::Result<T, SimError>;
19
20// Check if two tree-sitter objects (trees or nodes etc.) are "similar"
21pub trait Similarity {
22 // This function accepts a predicate to filter out nodes,
23 // for e.g.: trivia such as comments and whitespace may be filtered
24 // out with `!node.is_extra()`
25 fn is_similar<P>(&self, other: &Self, predicate: P) -> bool
26 where
27 P: Fn(&Node) -> bool;
28}
29
30// Representation of a fully parsed file from the filesystem
31pub struct ProgramFile {
32 pub path: PathBuf,
33 pub src: String,
34 pub tree: Tree,
35}
36
37impl Similarity for ProgramFile {
38 fn is_similar<P>(&self, other: &Self, predicate: P) -> bool
39 where
40 P: Fn(&Node) -> bool,
41 {
42 // The order of traversal is unimportant
43 let mine = PreOrder::new(&self.tree).filter(|n| predicate(n));
44 let theirs = PreOrder::new(&other.tree).filter(|n| predicate(n));
45
46 // Simultaneously traverse both the trees, and find dissimilarities.
47 //
48 // A dissimilarity is when:
49 // - one tree's traversal is longer than the other
50 // - the trees have identical traversal lengths but non-identical
51 // nodes along the traversal
52 mine.zip_longest(theirs)
53 .any(|result| match result {
54 EitherOrBoth::Both(mine, theirs) => mine.kind_id() != theirs.kind_id(),
55 EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => true,
56 })
57 .not()
58 }
59}
60
61impl fmt::Debug for ProgramFile {
62 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
63 f.debug_struct("ProgramFile")
64 .field("path", &self.path)
65 .field("src", &self.src.len())
66 .finish()
67 }
68}
69
70impl ProgramFile {
71 pub fn new<P: AsRef<Path>>(path: P, parser: &mut Parser) -> Result<Self> {
72 let src = std::fs::read_to_string(&path)?;
73 let path = path.as_ref().to_owned();
74 let tree = parser.parse(&src, None).ok_or(SimError::LanguageNotSet)?;
75 Ok(Self { path, src, tree })
76 }
77}
78
79#[cfg(test)]
80mod test {
81 use super::*;
82 use tree_sitter::Parser;
83
84 // Utility to quickly bootstrap ProgramFiles
85 // and run similarity checks
86 fn is_similar(mine: &str, their: &str) -> bool {
87 let lang = tree_sitter_javascript::language();
88 let mut parser = Parser::new();
89 parser.set_language(lang).unwrap();
90
91 let my_program_file = ProgramFile {
92 path: PathBuf::from("mine"),
93 src: mine.to_owned(),
94 tree: parser.parse(&mine, None).unwrap(),
95 };
96
97 let their_program_file = ProgramFile {
98 path: PathBuf::from("their"),
99 src: their.to_owned(),
100 tree: parser.parse(&their, None).unwrap(),
101 };
102
103 my_program_file.is_similar(&their_program_file, |n| !n.is_extra())
104 }
105
106 #[test]
107 fn trivial() {
108 assert!(is_similar(
109 r#"
110 var x = 2;
111 "#,
112 r#"
113 var y = 5;
114 "#
115 ))
116 }
117
118 #[test]
119 fn automorphism() {
120 assert!(is_similar(
121 r#"
122 function(x) {
123 x * 2
124 }
125 "#,
126 r#"
127 function(x) {
128 x * 2
129 }
130 "#
131 ))
132 }
133
134 #[test]
135 fn sample() {
136 assert!(is_similar(
137 r#"
138 var scrapeWebsite = async function scrapeWebsite(website) {
139 try {
140 var url = normalizeUrl(website);
141 var html = await fetch(url).then(function (res) {
142 return res.text();
143 });
144 var $ = load(html);
145 var handles = {};
146 module.exports.CONFIG.socialNetworks.forEach(function (socialNetwork) {
147 handles[socialNetwork] = parse(socialNetwork)($);
148 })
149 return handles;
150 } catch (error) {
151 throw new Error("Error scraping website data for " + website + ": " + error.message)
152 }
153 }
154 "#,
155 r#"
156 var parseWebsite = async function parseWebsite(website) {
157 try {
158 var url = normalizeUrl(website);
159 var html = await fetch(url).then(function (res) {
160 return res.text();
161 });
162 var $ = load(html);
163 var handles = {};
164 module.exports.CONFIG.socialNetworks.forEach(function (socialNetwork) {
165 handles[socialNetwork] = parse(socialNetwork)($);
166 })
167 return handles;
168 } catch (error) {
169 throw new Error("Error fetching website data for " + website + ": " + error.message)
170 }
171 }
172 "#
173 ))
174 }
175
176 #[test]
177 fn ignore_comment_trivia() {
178 assert!(is_similar(
179 r#"
180 // doubles x
181 function(x) {
182 x * 2
183 }
184 "#,
185 r#"
186 function(x) {
187 x * 2 // doubles x
188 }
189 "#
190 ))
191 }
192
193 #[test]
194 fn ignore_whitespace_trivia() {
195 assert!(is_similar(
196 r#"
197 function(x) {
198return x * 2;
199 }
200 "#,
201 r#"
202 function(x ) {
203 return x * 2;
204 }
205 "#
206 ))
207 }
208
209 #[test]
210 fn dissimilar() {
211 assert!(!is_similar(
212 r#"
213 var t = function(x, y) { }
214 "#,
215 r#"
216 var l = function(x) { }
217 "#
218 ))
219 }
220}
diff --git a/src/main.rs b/src/main.rs
new file mode 100644
index 0000000..77c7584
--- /dev/null
+++ b/src/main.rs
@@ -0,0 +1,43 @@
1// extern
2use itertools::Itertools;
3use rayon::prelude::*;
4use similarity::{ProgramFile, Similarity};
5use tree_sitter::Parser;
6
7fn parser() -> Parser {
8 let mut parser = Parser::new();
9 parser
10 .set_language(tree_sitter_javascript::language())
11 .unwrap();
12 parser
13}
14
15fn main() {
16 let args = std::env::args().skip(1).collect::<Vec<_>>();
17
18 // read and parse files from disk
19 let (files, errors): (Vec<_>, Vec<_>) = args
20 .par_iter()
21 .map(|arg| ProgramFile::new(arg, &mut parser()))
22 .partition(Result::is_ok);
23
24 // dump errors if any
25 errors.into_iter().map(Result::unwrap_err).for_each(|err| {
26 eprintln!("{}", err);
27 });
28
29 let files: Vec<_> = files.into_iter().map(Result::unwrap).collect();
30
31 // compare every pair of files for similarity
32 (0..files.len())
33 .combinations(2)
34 .map(|item| (item[0], item[1]))
35 .par_bridge()
36 .for_each(|(one, two)| {
37 let one = &files[one];
38 let two = &files[two];
39 if one.is_similar(&two, |n| !n.is_extra()) && one.path != two.path {
40 println!("{} {}", one.path.display(), two.path.display());
41 }
42 });
43}
diff --git a/src/traverse.rs b/src/traverse.rs
new file mode 100644
index 0000000..df13f5d
--- /dev/null
+++ b/src/traverse.rs
@@ -0,0 +1,51 @@
1// Lazy pre-order traversal over arbitrary tree-sitter trees
2
3// extern
4use tree_sitter::{Node, Tree, TreeCursor};
5
6pub struct PreOrder<'a> {
7 cursor: TreeCursor<'a>,
8 done: bool,
9}
10
11impl<'a> PreOrder<'a> {
12 pub fn new(tree: &'a Tree) -> Self {
13 Self {
14 cursor: tree.walk(),
15 done: false,
16 }
17 }
18}
19
20// Traverse the tree in root-left-right fashion
21impl<'a> Iterator for PreOrder<'a> {
22 type Item = Node<'a>;
23
24 fn next(&mut self) -> Option<Self::Item> {
25 if self.done {
26 return None;
27 }
28
29 // capture the root node
30 let node = self.cursor.node();
31
32 // if this node has a child, move there and return the root, or
33 // if this node has a sibling, move there and return the root
34 if self.cursor.goto_first_child() || self.cursor.goto_next_sibling() {
35 return Some(node);
36 }
37
38 loop {
39 // we are done retracing all the way back to the root node
40 if !self.cursor.goto_parent() {
41 self.done = true;
42 break;
43 }
44
45 if self.cursor.goto_next_sibling() {
46 break;
47 }
48 }
49 Some(node)
50 }
51}