aboutsummaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs220
1 files changed, 220 insertions, 0 deletions
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..b708984
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,220 @@
1// locals
2mod error;
3mod traverse;
4
5// std
6use std::{
7 fmt,
8 ops::Not,
9 path::{Path, PathBuf},
10};
11
12// extern
13use error::SimError;
14use itertools::{EitherOrBoth, Itertools};
15use traverse::PreOrder;
16use tree_sitter::{Node, Parser, Tree};
17
18type Result<T> = std::result::Result<T, SimError>;
19
20// Check if two tree-sitter objects (trees or nodes etc.) are "similar"
21pub trait Similarity {
22 // This function accepts a predicate to filter out nodes,
23 // for e.g.: trivia such as comments and whitespace may be filtered
24 // out with `!node.is_extra()`
25 fn is_similar<P>(&self, other: &Self, predicate: P) -> bool
26 where
27 P: Fn(&Node) -> bool;
28}
29
30// Representation of a fully parsed file from the filesystem
31pub struct ProgramFile {
32 pub path: PathBuf,
33 pub src: String,
34 pub tree: Tree,
35}
36
37impl Similarity for ProgramFile {
38 fn is_similar<P>(&self, other: &Self, predicate: P) -> bool
39 where
40 P: Fn(&Node) -> bool,
41 {
42 // The order of traversal is unimportant
43 let mine = PreOrder::new(&self.tree).filter(|n| predicate(n));
44 let theirs = PreOrder::new(&other.tree).filter(|n| predicate(n));
45
46 // Simultaneously traverse both the trees, and find dissimilarities.
47 //
48 // A dissimilarity is when:
49 // - one tree's traversal is longer than the other
50 // - the trees have identical traversal lengths but non-identical
51 // nodes along the traversal
52 mine.zip_longest(theirs)
53 .any(|result| match result {
54 EitherOrBoth::Both(mine, theirs) => mine.kind_id() != theirs.kind_id(),
55 EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => true,
56 })
57 .not()
58 }
59}
60
61impl fmt::Debug for ProgramFile {
62 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
63 f.debug_struct("ProgramFile")
64 .field("path", &self.path)
65 .field("src", &self.src.len())
66 .finish()
67 }
68}
69
70impl ProgramFile {
71 pub fn new<P: AsRef<Path>>(path: P, parser: &mut Parser) -> Result<Self> {
72 let src = std::fs::read_to_string(&path)?;
73 let path = path.as_ref().to_owned();
74 let tree = parser.parse(&src, None).ok_or(SimError::LanguageNotSet)?;
75 Ok(Self { path, src, tree })
76 }
77}
78
79#[cfg(test)]
80mod test {
81 use super::*;
82 use tree_sitter::Parser;
83
84 // Utility to quickly bootstrap ProgramFiles
85 // and run similarity checks
86 fn is_similar(mine: &str, their: &str) -> bool {
87 let lang = tree_sitter_javascript::language();
88 let mut parser = Parser::new();
89 parser.set_language(lang).unwrap();
90
91 let my_program_file = ProgramFile {
92 path: PathBuf::from("mine"),
93 src: mine.to_owned(),
94 tree: parser.parse(&mine, None).unwrap(),
95 };
96
97 let their_program_file = ProgramFile {
98 path: PathBuf::from("their"),
99 src: their.to_owned(),
100 tree: parser.parse(&their, None).unwrap(),
101 };
102
103 my_program_file.is_similar(&their_program_file, |n| !n.is_extra())
104 }
105
106 #[test]
107 fn trivial() {
108 assert!(is_similar(
109 r#"
110 var x = 2;
111 "#,
112 r#"
113 var y = 5;
114 "#
115 ))
116 }
117
118 #[test]
119 fn automorphism() {
120 assert!(is_similar(
121 r#"
122 function(x) {
123 x * 2
124 }
125 "#,
126 r#"
127 function(x) {
128 x * 2
129 }
130 "#
131 ))
132 }
133
134 #[test]
135 fn sample() {
136 assert!(is_similar(
137 r#"
138 var scrapeWebsite = async function scrapeWebsite(website) {
139 try {
140 var url = normalizeUrl(website);
141 var html = await fetch(url).then(function (res) {
142 return res.text();
143 });
144 var $ = load(html);
145 var handles = {};
146 module.exports.CONFIG.socialNetworks.forEach(function (socialNetwork) {
147 handles[socialNetwork] = parse(socialNetwork)($);
148 })
149 return handles;
150 } catch (error) {
151 throw new Error("Error scraping website data for " + website + ": " + error.message)
152 }
153 }
154 "#,
155 r#"
156 var parseWebsite = async function parseWebsite(website) {
157 try {
158 var url = normalizeUrl(website);
159 var html = await fetch(url).then(function (res) {
160 return res.text();
161 });
162 var $ = load(html);
163 var handles = {};
164 module.exports.CONFIG.socialNetworks.forEach(function (socialNetwork) {
165 handles[socialNetwork] = parse(socialNetwork)($);
166 })
167 return handles;
168 } catch (error) {
169 throw new Error("Error fetching website data for " + website + ": " + error.message)
170 }
171 }
172 "#
173 ))
174 }
175
176 #[test]
177 fn ignore_comment_trivia() {
178 assert!(is_similar(
179 r#"
180 // doubles x
181 function(x) {
182 x * 2
183 }
184 "#,
185 r#"
186 function(x) {
187 x * 2 // doubles x
188 }
189 "#
190 ))
191 }
192
193 #[test]
194 fn ignore_whitespace_trivia() {
195 assert!(is_similar(
196 r#"
197 function(x) {
198return x * 2;
199 }
200 "#,
201 r#"
202 function(x ) {
203 return x * 2;
204 }
205 "#
206 ))
207 }
208
209 #[test]
210 fn dissimilar() {
211 assert!(!is_similar(
212 r#"
213 var t = function(x, y) { }
214 "#,
215 r#"
216 var l = function(x) { }
217 "#
218 ))
219 }
220}