Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(15)

Side by Side Diff: components/visitedlink/browser/visitedlink_master.h

Issue 1417943002: Load the table of visited links from database file asynchronously. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 5 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ 5 #ifndef COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
6 #define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ 6 #define COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
7 7
8 #if defined(OS_WIN) 8 #if defined(OS_WIN)
9 #include <windows.h> 9 #include <windows.h>
10 #endif 10 #endif
11 #include <set> 11 #include <set>
12 #include <vector> 12 #include <vector>
13 13
14 #include "base/callback.h" 14 #include "base/callback.h"
15 #include "base/callback_forward.h" 15 #include "base/callback_forward.h"
16 #include "base/files/file_path.h" 16 #include "base/files/file_path.h"
17 #include "base/gtest_prod_util.h" 17 #include "base/gtest_prod_util.h"
18 #include "base/memory/ref_counted.h"
18 #include "base/memory/shared_memory.h" 19 #include "base/memory/shared_memory.h"
20 #include "base/memory/weak_ptr.h"
19 #include "base/threading/sequenced_worker_pool.h" 21 #include "base/threading/sequenced_worker_pool.h"
20 #include "components/visitedlink/common/visitedlink_common.h" 22 #include "components/visitedlink/common/visitedlink_common.h"
21 23
22 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) 24 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
23 #include "base/logging.h" 25 #include "base/logging.h"
24 #endif 26 #endif
25 27
26 class GURL; 28 class GURL;
27 29
28 namespace content { 30 namespace content {
(...skipping 124 matching lines...) Expand 10 before | Expand all | Expand 10 after
153 void RewriteFile() { 155 void RewriteFile() {
154 WriteFullTable(); 156 WriteFullTable();
155 } 157 }
156 #endif 158 #endif
157 159
158 private: 160 private:
159 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete); 161 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, Delete);
160 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete); 162 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigDelete);
161 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport); 163 FRIEND_TEST_ALL_PREFIXES(VisitedLinkTest, BigImport);
162 164
165 // Keeps the result of loading a table from a database file to the UI thread.
166 struct LoadFromFileResult;
167
168 using TableLoadCompleteCallback = base::Callback<void(
169 bool success,
170 scoped_refptr<LoadFromFileResult> load_from_file_result)>;
171
163 // Object to rebuild the table on the history thread (see the .cc file). 172 // Object to rebuild the table on the history thread (see the .cc file).
164 class TableBuilder; 173 class TableBuilder;
165 174
166 // Byte offsets of values in the header. 175 // Byte offsets of values in the header.
167 static const int32 kFileHeaderSignatureOffset; 176 static const int32 kFileHeaderSignatureOffset;
168 static const int32 kFileHeaderVersionOffset; 177 static const int32 kFileHeaderVersionOffset;
169 static const int32 kFileHeaderLengthOffset; 178 static const int32 kFileHeaderLengthOffset;
170 static const int32 kFileHeaderUsedOffset; 179 static const int32 kFileHeaderUsedOffset;
171 static const int32 kFileHeaderSaltOffset; 180 static const int32 kFileHeaderSaltOffset;
172 181
(...skipping 27 matching lines...) Expand all
200 // These functions are only called if |persist_to_disk_| is true. 209 // These functions are only called if |persist_to_disk_| is true.
201 210
202 // Posts the given task to the blocking worker pool with our options. 211 // Posts the given task to the blocking worker pool with our options.
203 void PostIOTask(const tracked_objects::Location& from_here, 212 void PostIOTask(const tracked_objects::Location& from_here,
204 const base::Closure& task); 213 const base::Closure& task);
205 214
206 // Writes the entire table to disk. It will leave the table file open and 215 // Writes the entire table to disk. It will leave the table file open and
207 // the handle to it will be stored in file_. 216 // the handle to it will be stored in file_.
208 void WriteFullTable(); 217 void WriteFullTable();
209 218
210 // Try to load the table from the database file. If the file doesn't exist or 219 // Tries to load asynchronously the table from the database file.
211 // is corrupt, this will return failure.
212 bool InitFromFile(); 220 bool InitFromFile();
213 221
222 // Load the table from the database file. Calls |callback| when completed. It
223 // is called from the background thread. It must be first in the sequence of
224 // background operations with the database file.
225 static void LoadFromFile(const base::FilePath& filename,
226 const TableLoadCompleteCallback& callback);
227
228 // Load the table from the database file. Returns true on success.
229 // Fills parameter |load_from_file_result| on success. It is called from
230 // the background thread.
231 static bool LoadApartFromFile(
232 const base::FilePath& filename,
233 scoped_refptr<LoadFromFileResult>* load_from_file_result);
234
235 // It is called from the background thread and executed on the UI
236 // thread.
237 void OnTableLoadComplete(
238 bool success,
239 scoped_refptr<LoadFromFileResult> load_from_file_result);
240
214 // Reads the header of the link coloring database from disk. Assumes the 241 // Reads the header of the link coloring database from disk. Assumes the
215 // file pointer is at the beginning of the file and that there are no pending 242 // file pointer is at the beginning of the file and that it is the first
216 // asynchronous I/O operations. 243 // asynchronous I/O operation on the background thread.
217 // 244 //
218 // Returns true on success and places the size of the table in num_entries 245 // Returns true on success and places the size of the table in num_entries
219 // and the number of nonzero fingerprints in used_count. This will fail if 246 // and the number of nonzero fingerprints in used_count. This will fail if
220 // the version of the file is not the current version of the database. 247 // the version of the file is not the current version of the database.
221 bool ReadFileHeader(FILE* hfile, int32* num_entries, int32* used_count, 248 static bool ReadFileHeader(FILE* hfile,
222 uint8 salt[LINK_SALT_LENGTH]); 249 int32* num_entries,
250 int32* used_count,
251 uint8 salt[LINK_SALT_LENGTH]);
223 252
224 // Fills *filename with the name of the link database filename 253 // Fills *filename with the name of the link database filename
225 bool GetDatabaseFileName(base::FilePath* filename); 254 bool GetDatabaseFileName(base::FilePath* filename);
226 255
227 // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy 256 // Wrapper around Window's WriteFile using asynchronous I/O. This will proxy
228 // the write to a background thread. 257 // the write to a background thread.
229 void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size); 258 void WriteToFile(FILE** hfile, off_t offset, void* data, int32 data_size);
230 259
231 // Helper function to schedule and asynchronous write of the used count to 260 // Helper function to schedule and asynchronous write of the used count to
232 // disk (this is a common operation). 261 // disk (this is a common operation).
233 void WriteUsedItemCountToFile(); 262 void WriteUsedItemCountToFile();
234 263
235 // Helper function to schedule an asynchronous write of the given range of 264 // Helper function to schedule an asynchronous write of the given range of
236 // hash functions to disk. The range is inclusive on both ends. The range can 265 // hash functions to disk. The range is inclusive on both ends. The range can
237 // wrap around at 0 and this function will handle it. 266 // wrap around at 0 and this function will handle it.
238 void WriteHashRangeToFile(Hash first_hash, Hash last_hash); 267 void WriteHashRangeToFile(Hash first_hash, Hash last_hash);
239 268
240 // Synchronous read from the file. Assumes there are no pending asynchronous 269 // Synchronous read from the file. Assumes that it is the first asynchronous
241 // I/O functions. Returns true if the entire buffer was successfully filled. 270 // I/O operation in the background thread. Returns true if the entire buffer
242 bool ReadFromFile(FILE* hfile, off_t offset, void* data, size_t data_size); 271 // was successfully filled.
272 static bool ReadFromFile(FILE* hfile,
273 off_t offset,
274 void* data,
275 size_t data_size);
243 276
244 // General table handling 277 // General table handling
245 // ---------------------- 278 // ----------------------
246 279
247 // Called to add a fingerprint to the table. If |send_notifications| is true 280 // Called to add a fingerprint to the table. If |send_notifications| is true
248 // and the item is added successfully, Listener::Add will be invoked. 281 // and the item is added successfully, Listener::Add will be invoked.
249 // Returns the index of the inserted fingerprint or null_hash_ if there was a 282 // Returns the index of the inserted fingerprint or null_hash_ if there was a
250 // duplicate and this item was skippped. 283 // duplicate and this item was skippped.
251 Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications); 284 Hash AddFingerprint(Fingerprint fingerprint, bool send_notifications);
252 285
253 // Deletes all fingerprints from the given vector from the current hash table 286 // Deletes all fingerprints from the given vector from the current hash table
254 // and syncs it to disk if there are changes. This does not update the 287 // and syncs it to disk if there are changes. This does not update the
255 // deleted_since_rebuild_ list, the caller must update this itself if there 288 // deleted_since_rebuild_ list, the caller must update this itself if there
256 // is an update pending. 289 // is an update pending.
257 void DeleteFingerprintsFromCurrentTable( 290 void DeleteFingerprintsFromCurrentTable(
258 const std::set<Fingerprint>& fingerprints); 291 const std::set<Fingerprint>& fingerprints);
259 292
260 // Removes the indicated fingerprint from the table. If the update_file flag 293 // Removes the indicated fingerprint from the table. If the update_file flag
261 // is set, the changes will also be written to disk. Returns true if the 294 // is set, the changes will also be written to disk. Returns true if the
262 // fingerprint was deleted, false if it was not in the table to delete. 295 // fingerprint was deleted, false if it was not in the table to delete.
263 bool DeleteFingerprint(Fingerprint fingerprint, bool update_file); 296 bool DeleteFingerprint(Fingerprint fingerprint, bool update_file);
264 297
265 // Creates a new empty table, call if InitFromFile() fails. Normally, when 298 // Creates a new empty table, call if InitFromFile() fails. Normally, when
266 // |suppress_rebuild| is false, the table will be rebuilt from history, 299 // |suppress_rebuild| is false, the table will be rebuilt from history,
267 // keeping us in sync. When |suppress_rebuild| is true, the new table will be 300 // keeping us in sync. When |suppress_rebuild| is true, the new table will be
268 // empty and we will not consult history. This is used when clearing the 301 // empty and we will not consult history. This is used when clearing the
269 // database and for unit tests. 302 // database and for unit tests.
270 bool InitFromScratch(bool suppress_rebuild); 303 bool InitFromScratch(bool suppress_rebuild);
271 304
272 // Allocates the Fingerprint structure and length. When init_to_empty is set, 305 // Allocates the Fingerprint structure and length. Structure is filled with 0s
273 // the table will be filled with 0s and used_items_ will be set to 0 as well. 306 // and shared header with salt and used_items_ is set to 0.
274 // If the flag is not set, these things are untouched and it is the 307 bool CreateURLTable(int32 num_entries);
275 // responsibility of the caller to fill them (like when we are reading from 308
276 // a file). 309 // Allocates the Fingerprint structure and length. Returns true on success.
277 bool CreateURLTable(int32 num_entries, bool init_to_empty); 310 // Structure is filled with 0s and shared header with salt.
311 static bool CreateApartURLTable(int32 num_entries,
312 const uint8 salt[LINK_SALT_LENGTH],
313 scoped_ptr<base::SharedMemory>* shared_memory,
314 VisitedLinkCommon::Fingerprint** hash_table);
278 315
279 // A wrapper for CreateURLTable, this will allocate a new table, initialized 316 // A wrapper for CreateURLTable, this will allocate a new table, initialized
280 // to empty. The caller is responsible for saving the shared memory pointer 317 // to empty. The caller is responsible for saving the shared memory pointer
281 // and handles before this call (they will be replaced with new ones) and 318 // and handles before this call (they will be replaced with new ones) and
282 // releasing them later. This is designed for callers that make a new table 319 // releasing them later. This is designed for callers that make a new table
283 // and then copy values from the old table to the new one, then release the 320 // and then copy values from the old table to the new one, then release the
284 // old table. 321 // old table.
285 // 322 //
286 // Returns true on success. On failure, the old table will be restored. The 323 // Returns true on success. On failure, the old table will be restored. The
287 // caller should not attemp to release the pointer/handle in this case. 324 // caller should not attemp to release the pointer/handle in this case.
288 bool BeginReplaceURLTable(int32 num_entries); 325 bool BeginReplaceURLTable(int32 num_entries);
289 326
290 // unallocates the Fingerprint table 327 // unallocates the Fingerprint table
291 void FreeURLTable(); 328 void FreeURLTable();
292 329
293 // For growing the table. ResizeTableIfNecessary will check to see if the 330 // For growing the table. ResizeTableIfNecessary will check to see if the
294 // table should be resized and calls ResizeTable if needed. Returns true if 331 // table should be resized and calls ResizeTable if needed. Returns true if
295 // we decided to resize the table. 332 // we decided to resize the table.
296 bool ResizeTableIfNecessary(); 333 bool ResizeTableIfNecessary();
297 334
298 // Resizes the table (growing or shrinking) as necessary to accomodate the 335 // Resizes the table (growing or shrinking) as necessary to accomodate the
299 // current count. 336 // current count.
300 void ResizeTable(int32 new_size); 337 void ResizeTable(int32 new_size);
301 338
339 // Returns the default table size. It can be overrided in unit tests.
340 uint32 DefaultTableSize() const;
341
302 // Returns the desired table size for |item_count| URLs. 342 // Returns the desired table size for |item_count| URLs.
303 uint32 NewTableSizeForCount(int32 item_count) const; 343 uint32 NewTableSizeForCount(int32 item_count) const;
304 344
305 // Computes the table load as fraction. For example, if 1/4 of the entries are 345 // Computes the table load as fraction. For example, if 1/4 of the entries are
306 // full, this value will be 0.25 346 // full, this value will be 0.25
307 float ComputeTableLoad() const { 347 float ComputeTableLoad() const {
308 return static_cast<float>(used_items_) / static_cast<float>(table_length_); 348 return static_cast<float>(used_items_) / static_cast<float>(table_length_);
309 } 349 }
310 350
311 // Initializes a rebuild of the visited link database based on the browser 351 // Initializes a rebuild of the visited link database based on the browser
(...skipping 18 matching lines...) Expand all
330 if (hash >= table_length_ - 1) 370 if (hash >= table_length_ - 1)
331 return 0; // Wrap around. 371 return 0; // Wrap around.
332 return hash + 1; 372 return hash + 1;
333 } 373 }
334 inline Hash DecrementHash(Hash hash) { 374 inline Hash DecrementHash(Hash hash) {
335 if (hash <= 0) 375 if (hash <= 0)
336 return table_length_ - 1; // Wrap around. 376 return table_length_ - 1; // Wrap around.
337 return hash - 1; 377 return hash - 1;
338 } 378 }
339 379
340 #ifndef NDEBUG
341 // Indicates whether any asynchronous operation has ever been completed.
342 // We do some synchronous reads that require that no asynchronous operations
343 // are pending, yet we don't track whether they have been completed. This
344 // flag is a sanity check that these reads only happen before any
345 // asynchronous writes have been fired.
346 bool posted_asynchronous_operation_;
347 #endif
348
349 // Reference to the browser context that this object belongs to 380 // Reference to the browser context that this object belongs to
350 // (it knows the path to where the data is stored) 381 // (it knows the path to where the data is stored)
351 content::BrowserContext* browser_context_; 382 content::BrowserContext* browser_context_;
352 383
353 // Client owns the delegate and is responsible for it being valid through 384 // Client owns the delegate and is responsible for it being valid through
354 // the life time this VisitedLinkMaster. 385 // the life time this VisitedLinkMaster.
355 VisitedLinkDelegate* delegate_; 386 VisitedLinkDelegate* delegate_;
356 387
357 // VisitedLinkEventListener to handle incoming events. 388 // VisitedLinkEventListener to handle incoming events.
358 scoped_ptr<Listener> listener_; 389 scoped_ptr<Listener> listener_;
359 390
360 // Lazily initialized sequence token for posting file tasks. 391 // Lazily initialized sequence token for posting file tasks.
361 base::SequencedWorkerPool::SequenceToken sequence_token_; 392 base::SequencedWorkerPool::SequenceToken sequence_token_;
362 393
363 // When non-NULL, indicates we are in database rebuild mode and points to 394 // When non-NULL, indicates we are in database rebuild mode and points to
364 // the class collecting fingerprint information from the history system. 395 // the class collecting fingerprint information from the history system.
365 // The pointer is owned by this class, but it must remain valid while the 396 // The pointer is owned by this class, but it must remain valid while the
366 // history query is running. We must only delete it when the query is done. 397 // history query is running. We must only delete it when the query is done.
367 scoped_refptr<TableBuilder> table_builder_; 398 scoped_refptr<TableBuilder> table_builder_;
368 399
369 // Indicates URLs added and deleted since we started rebuilding the table. 400 // Indicates URLs added and deleted since we started rebuilding or loading the
370 std::set<Fingerprint> added_since_rebuild_; 401 // table.
371 std::set<Fingerprint> deleted_since_rebuild_; 402 std::set<Fingerprint> added_since_rebuild_or_load_;
372 403 std::set<Fingerprint> deleted_since_rebuild_or_load_;
373 // TODO(brettw) Support deletion, we need to track whether anything was
374 // deleted during the rebuild here. Then we should delete any of these
375 // entries from the complete table later.
376 // std::vector<Fingerprint> removed_since_rebuild_;
377 404
378 // The currently open file with the table in it. This may be NULL if we're 405 // The currently open file with the table in it. This may be NULL if we're
379 // rebuilding and haven't written a new version yet or if |persist_to_disk_| 406 // rebuilding and haven't written a new version yet or if |persist_to_disk_|
380 // is false. Writing to the file may be safely ignored in this case. Also 407 // is false. Writing to the file may be safely ignored in this case. Also
381 // |file_| may be non-NULL but point to a NULL pointer. That would mean that 408 // |file_| may be non-NULL but point to a NULL pointer. That would mean that
382 // opening of the file is already scheduled in a background thread and any 409 // opening of the file is already scheduled in a background thread and any
383 // writing to the file can also be scheduled to the background thread as it's 410 // writing to the file can also be scheduled to the background thread as it's
384 // guaranteed to be executed after the opening. 411 // guaranteed to be executed after the opening.
385 // The class owns both the |file_| pointer and the pointer pointed 412 // The class owns both the |file_| pointer and the pointer pointed
386 // by |*file_|. 413 // by |*file_|.
387 FILE** file_; 414 FILE** file_;
388 415
389 // If true, will try to persist the hash table to disk. Will rebuild from 416 // If true, will try to persist the hash table to disk. Will rebuild from
390 // VisitedLinkDelegate::RebuildTable if there are disk corruptions. 417 // VisitedLinkDelegate::RebuildTable if there are disk corruptions.
391 bool persist_to_disk_; 418 bool persist_to_disk_;
392 419
393 // Shared memory consists of a SharedHeader followed by the table. 420 // Shared memory consists of a SharedHeader followed by the table.
394 base::SharedMemory *shared_memory_; 421 base::SharedMemory *shared_memory_;
395 422
396 // When we generate new tables, we increment the serial number of the 423 // When we generate new tables, we increment the serial number of the
397 // shared memory object. 424 // shared memory object.
398 int32 shared_memory_serial_; 425 int32 shared_memory_serial_;
399 426
400 // Number of non-empty items in the table, used to compute fullness. 427 // Number of non-empty items in the table, used to compute fullness.
401 int32 used_items_; 428 int32 used_items_;
402 429
430 // We set this to true to avoid writing to the database file.
431 bool table_is_loading_from_file_;
432
403 // Testing values ----------------------------------------------------------- 433 // Testing values -----------------------------------------------------------
404 // 434 //
405 // The following fields exist for testing purposes. They are not used in 435 // The following fields exist for testing purposes. They are not used in
406 // release builds. It'd be nice to eliminate them in release builds, but we 436 // release builds. It'd be nice to eliminate them in release builds, but we
407 // don't want to change the signature of the object between the unit test and 437 // don't want to change the signature of the object between the unit test and
408 // regular builds. Instead, we just have "default" values that these take 438 // regular builds. Instead, we just have "default" values that these take
409 // in release builds that give "regular" behavior. 439 // in release builds that give "regular" behavior.
410 440
411 // Overridden database file name for testing 441 // Overridden database file name for testing
412 base::FilePath database_name_override_; 442 base::FilePath database_name_override_;
413 443
414 // When nonzero, overrides the table size for new databases for testing 444 // When nonzero, overrides the table size for new databases for testing
415 int32 table_size_override_; 445 int32 table_size_override_;
416 446
417 // When set, indicates the task that should be run after the next rebuild from 447 // When set, indicates the task that should be run after the next rebuild from
418 // history is complete. 448 // history is complete.
419 base::Closure rebuild_complete_task_; 449 base::Closure rebuild_complete_task_;
420 450
421 // Set to prevent us from attempting to rebuild the database from global 451 // Set to prevent us from attempting to rebuild the database from global
422 // history if we have an error opening the file. This is used for testing, 452 // history if we have an error opening the file. This is used for testing,
423 // will be false in production. 453 // will be false in production.
424 bool suppress_rebuild_; 454 bool suppress_rebuild_;
425 455
456 base::WeakPtrFactory<VisitedLinkMaster> weak_ptr_factory_;
457
426 DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster); 458 DISALLOW_COPY_AND_ASSIGN(VisitedLinkMaster);
427 }; 459 };
428 460
429 // NOTE: These methods are defined inline here, so we can share the compilation 461 // NOTE: These methods are defined inline here, so we can share the compilation
430 // of visitedlink_master.cc between the browser and the unit/perf tests. 462 // of visitedlink_master.cc between the browser and the unit/perf tests.
431 463
432 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG) 464 #if defined(UNIT_TEST) || defined(PERF_TEST) || !defined(NDEBUG)
433 inline void VisitedLinkMaster::DebugValidate() { 465 inline void VisitedLinkMaster::DebugValidate() {
434 int32 used_count = 0; 466 int32 used_count = 0;
435 for (int32 i = 0; i < table_length_; i++) { 467 for (int32 i = 0; i < table_length_; i++) {
436 if (hash_table_[i]) 468 if (hash_table_[i])
437 used_count++; 469 used_count++;
438 } 470 }
439 DCHECK_EQ(used_count, used_items_); 471 DCHECK_EQ(used_count, used_items_);
440 } 472 }
441 #endif 473 #endif
442 474
443 } // namespace visitedlink 475 } // namespace visitedlink
444 476
445 #endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_ 477 #endif // COMPONENTS_VISITEDLINK_BROWSER_VISITEDLINK_MASTER_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698