Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(283)

Side by Side Diff: ui/file_manager/file_manager/background/js/duplicate_finder.js

Issue 980603003: Move content deduplication into the scan process. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Respond to review comments. Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Namespace 5 // Namespace
6 var importer = importer || {}; 6 var importer = importer || {};
7 7
8 /** 8 /**
9 * Interface for import deduplicators. A duplicate finder is linked to an
10 * import destination, and will check whether files already exist in that import
11 * destination.
12 * @interface
13 */
14 importer.DuplicateFinder = function() {};
15
16 /**
17 * Checks whether the given file already exists in the import destination.
18 * @param {!FileEntry} entry The file entry to check.
19 * @return {!Promise<boolean>}
20 */
21 importer.DuplicateFinder.prototype.checkDuplicate;
22
23 /**
24 * A factory for producing duplicate finders.
25 * @interface
26 */
27 importer.DuplicateFinder.Factory = function() {};
28
29 /** @return {!importer.DuplicateFinder} */
30 importer.DuplicateFinder.Factory.prototype.create;
31
32 /**
33 * A duplicate finder for Google Drive. 9 * A duplicate finder for Google Drive.
34 * 10 *
35 * @constructor 11 * @constructor
36 * @implements {importer.DuplicateFinder}
37 * @struct 12 * @struct
13 *
14 * @param {!analytics.Tracker} tracker
38 */ 15 */
39 importer.DriveDuplicateFinder = function() { 16 importer.DriveDuplicateFinder = function(tracker) {
17
18 /** @private {!analytics.Tracker} */
19 this.tracker_ = tracker;
20
40 /** @private {Promise<string>} */ 21 /** @private {Promise<string>} */
41 this.driveIdPromise_ = null; 22 this.driveIdPromise_ = null;
42
43 /**
44 * Aggregate time spent computing content hashes (in ms).
45 * @private {number}
46 */
47 this.computeHashTime_ = 0;
48
49 /**
50 * Aggregate time spent performing content hash searches (in ms).
51 * @private {number}
52 */
53 this.searchHashTime_ = 0;
54 }; 23 };
55 24
56 /** 25 /**
57 * @typedef {{ 26 * @param {!FileEntry} entry
58 * computeHashTime: number, 27 * @return {!Promise<boolean>}
59 * searchHashTime: number
60 * }}
61 */ 28 */
62 importer.DriveDuplicateFinder.Statistics; 29 importer.DriveDuplicateFinder.prototype.isDuplicate = function(entry) {
63
64 /** @override */
65 importer.DriveDuplicateFinder.prototype.checkDuplicate = function(entry) {
66 return this.computeHash_(entry) 30 return this.computeHash_(entry)
67 .then(this.findByHash_.bind(this)) 31 .then(this.findByHash_.bind(this))
68 .then( 32 .then(
69 /** 33 /**
70 * @param {!Array<string>} urls 34 * @param {!Array<string>} urls
71 * @return {boolean} 35 * @return {boolean}
72 */ 36 */
73 function(urls) { 37 function(urls) {
74 return urls.length > 0; 38 return urls.length > 0;
75 }); 39 });
76 }; 40 };
77 41
42 /** @private @const {number} */
43 importer.DriveDuplicateFinder.HASH_EVENT_THRESHOLD_ = 5000;
44
45 /** @private @const {number} */
46 importer.DriveDuplicateFinder.SEARCH_EVENT_THRESHOLD_ = 1000;
47
78 /** 48 /**
79 * Computes the content hash for the given file entry. 49 * Computes the content hash for the given file entry.
80 * @param {!FileEntry} entry 50 * @param {!FileEntry} entry
81 * @private 51 * @private
82 */ 52 */
83 importer.DriveDuplicateFinder.prototype.computeHash_ = function(entry) { 53 importer.DriveDuplicateFinder.prototype.computeHash_ = function(entry) {
84 return new Promise( 54 return new Promise(
85 /** @this {importer.DriveDuplicateFinder} */ 55 /** @this {importer.DriveDuplicateFinder} */
86 function(resolve, reject) { 56 function(resolve, reject) {
87 var startTime = new Date().getTime(); 57 var startTime = new Date().getTime();
88 chrome.fileManagerPrivate.computeChecksum( 58 chrome.fileManagerPrivate.computeChecksum(
89 entry.toURL(), 59 entry.toURL(),
90 /** @param {string} result The content hash. */ 60 /**
61 * @param {string} result The content hash.
62 * @this {importer.DriveDuplicateFinder}
63 */
91 function(result) { 64 function(result) {
92 var endTime = new Date().getTime(); 65 var elapsedTime = new Date().getTime() - startTime;
93 this.searchHashTime_ += endTime - startTime; 66 // Send the timing to GA only if it is sorta exceptionally long.
67 // A one second, CPU intensive operation, is pretty long.
68 if (elapsedTime >=
69 importer.DriveDuplicateFinder.HASH_EVENT_THRESHOLD_) {
70 this.tracker_.sendTiming(
71 metrics.Categories.ACQUISITION,
72 metrics.timing.Variables.COMPUTE_HASH,
73 elapsedTime);
74 }
94 if (chrome.runtime.lastError) { 75 if (chrome.runtime.lastError) {
95 reject(chrome.runtime.lastError); 76 reject(chrome.runtime.lastError);
96 } else { 77 } else {
97 resolve(result); 78 resolve(result);
98 } 79 }
99 }); 80 }.bind(this));
100 }.bind(this)); 81 }.bind(this));
101 }; 82 };
102 83
103 /** 84 /**
104 * Finds files with content hashes matching the given hash. 85 * Finds files with content hashes matching the given hash.
105 * @param {string} hash The content hash of the file to find. 86 * @param {string} hash The content hash of the file to find.
106 * @return {!Promise<Array<string>>} The URLs of the found files. If there are 87 * @return {!Promise<Array<string>>} The URLs of the found files. If there are
107 * no matches, the list will be empty. 88 * no matches, the list will be empty.
108 * @private 89 * @private
109 */ 90 */
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
145 function(resolve, reject) { 126 function(resolve, reject) {
146 var startTime = new Date().getTime(); 127 var startTime = new Date().getTime();
147 chrome.fileManagerPrivate.searchFilesByHashes( 128 chrome.fileManagerPrivate.searchFilesByHashes(
148 volumeId, 129 volumeId,
149 [hash], 130 [hash],
150 /** 131 /**
151 * @param {!Object<string, Array<string>>} urls 132 * @param {!Object<string, Array<string>>} urls
152 * @this {importer.DriveDuplicateFinder} 133 * @this {importer.DriveDuplicateFinder}
153 */ 134 */
154 function(urls) { 135 function(urls) {
155 var endTime = new Date().getTime(); 136 var elapsedTime = new Date().getTime() - startTime;
156 this.searchHashTime_ += endTime - startTime; 137 // Send the timing to GA only if it is sorta exceptionally long.
138 if (elapsedTime >=
139 importer.DriveDuplicateFinder.SEARCH_EVENT_THRESHOLD_) {
140 this.tracker_.sendTiming(
141 metrics.Categories.ACQUISITION,
142 metrics.timing.Variables.SEARCH_BY_HASH,
143 elapsedTime);
144 }
157 if (chrome.runtime.lastError) { 145 if (chrome.runtime.lastError) {
158 reject(chrome.runtime.lastError); 146 reject(chrome.runtime.lastError);
159 } else { 147 } else {
160 resolve(urls[hash]); 148 resolve(urls[hash]);
161 } 149 }
162 }.bind(this)); 150 }.bind(this));
163 }.bind(this)); 151 }.bind(this));
164 }; 152 };
165 153
166 /** @return {!importer.DriveDuplicateFinder.Statistics} */ 154 /**
167 importer.DriveDuplicateFinder.prototype.getStatistics = function() { 155 * A class that aggregates history/content-dupe checking
168 return { 156 * into a single "Disposition" value. Should now be the
169 computeHashTime: this.computeHashTime_, 157 * primary source for duplicate checking (with the exception
170 searchHashTime: this.searchHashTime_ 158 * of in-scan deduplication, where duplicate results that
171 }; 159 * are within the scan are ignored).
160 *
161 * @constructor
162 *
163 * @param {!importer.HistoryLoader} historyLoader
164 * @param {!importer.DriveDuplicateFinder} contentMatcher
165 */
166 importer.DispositionChecker = function(historyLoader, contentMatcher) {
167 /** @private {!importer.HistoryLoader} */
168 this.historyLoader_ = historyLoader;
169
170 /** @private {!importer.DriveDuplicateFinder} */
171 this.contentMatcher_ = contentMatcher;
172 }; 172 };
173 173
174 /** 174 /**
175 * @constructor 175 * @param {!FileEntry} entry
176 * @implements {importer.DuplicateFinder.Factory} 176 * @param {!importer.Destination} destination
177 * @return {!Promise<!importer.Disposition>}
177 */ 178 */
178 importer.DriveDuplicateFinder.Factory = function() {}; 179 importer.DispositionChecker.prototype.getDisposition =
180 function(entry, destination) {
181 if (destination !== importer.Destination.GOOGLE_DRIVE) {
182 return Promise.reject('Unsupported destination: ' + destination);
183 }
179 184
180 /** @override */ 185 return new Promise(
181 importer.DriveDuplicateFinder.Factory.prototype.create = function() { 186 /** @this {importer.DispositionChecker} */
182 return new importer.DriveDuplicateFinder(); 187 function(resolve, reject) {
188 this.hasHistoryDuplicate_(entry, destination)
189 .then(
190 /**
191 * @param {boolean} duplicate
192 * @this {importer.DispositionChecker}
193 */
194 function(duplicate) {
195 if (duplicate) {
196 resolve(importer.Disposition.HISTORY_DUPLICATE);
197 } else {
198 this.contentMatcher_.isDuplicate(entry)
199 .then(
200 /** @param {boolean} duplicate */
201 function(duplicate) {
202 if (duplicate) {
203 resolve(
204 importer.Disposition.CONTENT_DUPLICATE);
205 } else {
206 resolve(importer.Disposition.ORIGINAL);
207 }
208 });
209 }
210 }.bind(this));
211 }.bind(this));
183 }; 212 };
213
214 /**
215 * @param {!FileEntry} entry
216 * @param {!importer.Destination} destination
217 * @return {!Promise.<boolean>} True if there is a history-entry-duplicate
218 * for the file.
219 * @private
220 */
221 importer.DispositionChecker.prototype.hasHistoryDuplicate_ =
222 function(entry, destination) {
223 return this.historyLoader_.getHistory()
224 .then(
225 /**
226 * @param {!importer.ImportHistory} history
227 * @return {!Promise}
228 * @this {importer.DefaultMediaScanner}
229 */
230 function(history) {
231 return Promise.all([
232 history.wasCopied(entry, destination),
233 history.wasImported(entry, destination)
234 ]).then(
235 /**
236 * @param {!Array<boolean>} results
237 * @return {boolean}
238 */
239 function(results) {
240 return results[0] || results[1];
241 });
242 }.bind(this));
243 };
244
245 /**
246 * Factory for a function that returns an entry's disposition.
247 *
248 * @param {!importer.HistoryLoader} historyLoader
249 * @param {!analytics.Tracker} tracker
250 *
251 * @return {function(!FileEntry, !importer.Destination):
252 * !Promise<!importer.Disposition>}
253 */
254 importer.DispositionChecker.createChecker =
255 function(historyLoader, tracker) {
256 var checker = new importer.DispositionChecker(
257 historyLoader,
258 new importer.DriveDuplicateFinder(tracker));
259 return checker.getDisposition.bind(checker);
260 };
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698