Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(191)

Side by Side Diff: ui/file_manager/file_manager/background/js/duplicate_finder.js

Issue 980603003: Move content deduplication into the scan process. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Don't report error count when zero, and don't report an explicit END to import...since that is impl… Created 5 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2015 The Chromium Authors. All rights reserved. 1 // Copyright 2015 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 // Namespace 5 // Namespace
6 var importer = importer || {}; 6 var importer = importer || {};
7 7
8 /** 8 /**
9 * Interface for import deduplicators. A duplicate finder is linked to an
10 * import destination, and will check whether files already exist in that import
11 * destination.
12 * @interface
13 */
14 importer.DuplicateFinder = function() {};
15
16 /**
17 * Checks whether the given file already exists in the import destination.
18 * @param {!FileEntry} entry The file entry to check.
19 * @return {!Promise<boolean>}
20 */
21 importer.DuplicateFinder.prototype.checkDuplicate;
22
23 /**
24 * A factory for producing duplicate finders.
25 * @interface
26 */
27 importer.DuplicateFinder.Factory = function() {};
28
29 /** @return {!importer.DuplicateFinder} */
30 importer.DuplicateFinder.Factory.prototype.create;
31
32 /**
33 * A duplicate finder for Google Drive. 9 * A duplicate finder for Google Drive.
34 * 10 *
35 * @constructor 11 * @constructor
36 * @implements {importer.DuplicateFinder}
37 * @struct 12 * @struct
13 *
14 * @param {!analytics.Tracker} tracker
38 */ 15 */
39 importer.DriveDuplicateFinder = function() { 16 importer.DriveDuplicateFinder = function(tracker) {
17
18 /** @private {!analytics.Tracker} */
19 this.tracker_ = tracker;
20
40 /** @private {Promise<string>} */ 21 /** @private {Promise<string>} */
41 this.driveIdPromise_ = null; 22 this.driveIdPromise_ = null;
42
43 /**
44 * Aggregate time spent computing content hashes (in ms).
45 * @private {number}
46 */
47 this.computeHashTime_ = 0;
48
49 /**
50 * Aggregate time spent performing content hash searches (in ms).
51 * @private {number}
52 */
53 this.searchHashTime_ = 0;
54 }; 23 };
55 24
56 /** 25 /**
57 * @typedef {{ 26 * @param {!FileEntry} entry
58 * computeHashTime: number, 27 * @return {!Promise<boolean>}
59 * searchHashTime: number
60 * }}
61 */ 28 */
62 importer.DriveDuplicateFinder.Statistics; 29 importer.DriveDuplicateFinder.prototype.isDuplicate = function(entry) {
63
64 /** @override */
65 importer.DriveDuplicateFinder.prototype.checkDuplicate = function(entry) {
66 return this.computeHash_(entry) 30 return this.computeHash_(entry)
67 .then(this.findByHash_.bind(this)) 31 .then(this.findByHash_.bind(this))
68 .then( 32 .then(
69 /** 33 /**
70 * @param {!Array<string>} urls 34 * @param {!Array<string>} urls
71 * @return {boolean} 35 * @return {boolean}
72 */ 36 */
73 function(urls) { 37 function(urls) {
74 return urls.length > 0; 38 return urls.length > 0;
75 }); 39 });
76 }; 40 };
77 41
78 /** 42 /**
79 * Computes the content hash for the given file entry. 43 * Computes the content hash for the given file entry.
80 * @param {!FileEntry} entry 44 * @param {!FileEntry} entry
81 * @private 45 * @private
82 */ 46 */
83 importer.DriveDuplicateFinder.prototype.computeHash_ = function(entry) { 47 importer.DriveDuplicateFinder.prototype.computeHash_ = function(entry) {
84 return new Promise( 48 return new Promise(
85 /** @this {importer.DriveDuplicateFinder} */ 49 /** @this {importer.DriveDuplicateFinder} */
86 function(resolve, reject) { 50 function(resolve, reject) {
87 var startTime = new Date().getTime(); 51 var startTime = new Date().getTime();
88 chrome.fileManagerPrivate.computeChecksum( 52 chrome.fileManagerPrivate.computeChecksum(
89 entry.toURL(), 53 entry.toURL(),
90 /** @param {string} result The content hash. */ 54 /**
55 * @param {string} result The content hash.
56 * @this {importer.DriveDuplicateFinder}
57 */
91 function(result) { 58 function(result) {
92 var endTime = new Date().getTime(); 59 var elapsedTime = new Date().getTime() - startTime;
93 this.searchHashTime_ += endTime - startTime; 60 // Send the timing to GA only if it is sorta exceptionally long.
61 // A one second, CPU intensive operation, is pretty long.
62 if (elapsedTime >= 5000) {
mtomasz 2015/03/05 04:36:52 nit: Can we move to a constant?
Steve McKay 2015/03/05 21:39:03 Done.
63 this.tracker_.sendTiming(
64 metrics.Categories.ACQUISITION,
65 metrics.timing.Variables.COMPUTE_HASH,
66 elapsedTime);
67 }
94 if (chrome.runtime.lastError) { 68 if (chrome.runtime.lastError) {
95 reject(chrome.runtime.lastError); 69 reject(chrome.runtime.lastError);
96 } else { 70 } else {
97 resolve(result); 71 resolve(result);
98 } 72 }
99 }); 73 }.bind(this));
100 }.bind(this)); 74 }.bind(this));
101 }; 75 };
102 76
103 /** 77 /**
104 * Finds files with content hashes matching the given hash. 78 * Finds files with content hashes matching the given hash.
105 * @param {string} hash The content hash of the file to find. 79 * @param {string} hash The content hash of the file to find.
106 * @return {!Promise<Array<string>>} The URLs of the found files. If there are 80 * @return {!Promise<Array<string>>} The URLs of the found files. If there are
107 * no matches, the list will be empty. 81 * no matches, the list will be empty.
108 * @private 82 * @private
109 */ 83 */
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
145 function(resolve, reject) { 119 function(resolve, reject) {
146 var startTime = new Date().getTime(); 120 var startTime = new Date().getTime();
147 chrome.fileManagerPrivate.searchFilesByHashes( 121 chrome.fileManagerPrivate.searchFilesByHashes(
148 volumeId, 122 volumeId,
149 [hash], 123 [hash],
150 /** 124 /**
151 * @param {!Object<string, Array<string>>} urls 125 * @param {!Object<string, Array<string>>} urls
152 * @this {importer.DriveDuplicateFinder} 126 * @this {importer.DriveDuplicateFinder}
153 */ 127 */
154 function(urls) { 128 function(urls) {
155 var endTime = new Date().getTime(); 129 var elapsedTime = new Date().getTime() - startTime;
156 this.searchHashTime_ += endTime - startTime; 130 // Send the timing to GA only if it is sorta exceptionally long.
131 if (elapsedTime >= 1000) {
mtomasz 2015/03/05 04:36:52 nit: Constant?
Steve McKay 2015/03/05 21:39:03 Done.
132 this.tracker_.sendTiming(
133 metrics.Categories.ACQUISITION,
134 metrics.timing.Variables.SEARCH_BY_HASH,
135 elapsedTime);
136 }
157 if (chrome.runtime.lastError) { 137 if (chrome.runtime.lastError) {
158 reject(chrome.runtime.lastError); 138 reject(chrome.runtime.lastError);
159 } else { 139 } else {
160 resolve(urls[hash]); 140 resolve(urls[hash]);
161 } 141 }
162 }.bind(this)); 142 }.bind(this));
163 }.bind(this)); 143 }.bind(this));
164 }; 144 };
165 145
166 /** @return {!importer.DriveDuplicateFinder.Statistics} */ 146 /**
167 importer.DriveDuplicateFinder.prototype.getStatistics = function() { 147 * A class that aggregates history/content-dupe checking
168 return { 148 * into a single "Disposition" value. Should now be the
169 computeHashTime: this.computeHashTime_, 149 * primary source for duplicate checking (with the exception
170 searchHashTime: this.searchHashTime_ 150 * of in-scan deduplication, where duplicate results that
171 }; 151 * are within the scan are ignored).
152 *
153 * @constructor
154 *
155 * @param {!importer.HistoryLoader} historyLoader
156 * @param {!importer.DriveDuplicateFinder} contentMatcher
157 */
158 importer.DispositionChecker = function(historyLoader, contentMatcher) {
159 /** @private {!importer.HistoryLoader} */
160 this.historyLoader_ = historyLoader;
161
162 /** @private {!importer.DriveDuplicateFinder} */
163 this.contentMatcher_ = contentMatcher;
172 }; 164 };
173 165
174 /** 166 /**
175 * @constructor 167 * @param {!FileEntry} entry
176 * @implements {importer.DuplicateFinder.Factory} 168 * @param {!importer.Destination} destination
169 * @return {!Promise<!importer.Disposition>}
177 */ 170 */
178 importer.DriveDuplicateFinder.Factory = function() {}; 171 importer.DispositionChecker.prototype.getDisposition =
172 function(entry, destination) {
173 if (destination !== importer.Destination.GOOGLE_DRIVE) {
174 throw new Error('Unsupported destination: ' + destination);
mtomasz 2015/03/05 04:36:52 nit: assert or return Promise.reject('Unsupported.
Steve McKay 2015/03/05 21:39:03 Done.
175 }
179 176
180 /** @override */ 177 return new Promise(
181 importer.DriveDuplicateFinder.Factory.prototype.create = function() { 178 /** @this {importer.DispositionChecker} */
182 return new importer.DriveDuplicateFinder(); 179 function(resolve, reject) {
180 this.hasHistoryDuplicate_(entry, destination)
181 .then(
mtomasz 2015/03/05 04:36:52 nit: indent
Steve McKay 2015/03/05 21:39:03 Done.
182 /** @this {importer.DispositionChecker} */
183 function(duplicate) {
Ben Kwa 2015/03/05 20:05:10 nit: param needs jsdoc
Steve McKay 2015/03/05 21:39:03 Done.
184 if (duplicate) {
185 resolve(importer.Disposition.HISTORY_DUPLICATE);
186 } else {
187 this.contentMatcher_.isDuplicate(entry)
188 .then(
189 function(duplicate) {
Ben Kwa 2015/03/05 20:05:10 nit: param needs jsdoc
Steve McKay 2015/03/05 21:39:03 Done.
190 if (duplicate) {
191 resolve(
192 importer.Disposition.CONTENT_DUPLICATE);
193 } else {
194 resolve(importer.Disposition.ORIGINAL);
195 }
196 });
197 }
198 }.bind(this));
199 }.bind(this));
183 }; 200 };
201
202 /**
203 * @param {!FileEntry} entry
204 * @param {!importer.Destination} destination
205 * @return {!Promise.<boolean>} True if there is a history-entry-duplicate
206 * for the file.
207 * @private
208 */
209 importer.DispositionChecker.prototype.hasHistoryDuplicate_ =
210 function(entry, destination) {
211 return this.historyLoader_.getHistory()
212 .then(
213 /**
214 * @param {!importer.ImportHistory} history
215 * @return {!Promise}
216 * @this {importer.DefaultMediaScanner}
217 */
218 function(history) {
219 return Promise.all([
220 history.wasCopied(entry, destination),
221 history.wasImported(entry, destination)
222 ]).then(
223 /**
224 * @param {!Array<boolean>} results
225 * @return {!Promise}
226 * @this {importer.DefaultMediaScanner}
227 */
228 function(results) {
Ben Kwa 2015/03/05 20:05:10 nit: This function doesn't need a @this or a bind.
Steve McKay 2015/03/05 21:39:03 Done.
229 return results[0] || results[1];
230 }.bind(this));
231 }.bind(this));
232 };
233
234 /**
235 * Factory for a function that returns an entry's disposition.
236 *
237 * @param {!importer.HistoryLoader} historyLoader
238 * @param {!analytics.Tracker} tracker
239 *
240 * @return {function(!FileEntry, !importer.Destination):
241 * !Promise<importer.Disposition>}
mtomasz 2015/03/05 04:36:52 nit: Shall it be !importer.Disposition?
Steve McKay 2015/03/05 21:39:03 Done.
242 */
243 importer.DispositionChecker.createChecker =
244 function(historyLoader, tracker) {
245 var checker = new importer.DispositionChecker(
246 historyLoader,
247 new importer.DriveDuplicateFinder(tracker));
248 return checker.getDisposition.bind(checker);
249 };
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698