Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(155)

Side by Side Diff: chrome/browser/visitedlink/visitedlink_master.cc

Issue 11825011: Componentize visitedlinks to src/components/visitedlink (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Final rebase Created 7 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/visitedlink/visitedlink_master.h"
6
7 #if defined(OS_WIN)
8 #include <windows.h>
9 #include <io.h>
10 #include <shlobj.h>
11 #endif // defined(OS_WIN)
12 #include <stdio.h>
13
14 #include <algorithm>
15
16 #include "base/bind.h"
17 #include "base/bind_helpers.h"
18 #include "base/containers/stack_container.h"
19 #include "base/file_util.h"
20 #include "base/logging.h"
21 #include "base/message_loop.h"
22 #include "base/path_service.h"
23 #include "base/process_util.h"
24 #include "base/rand_util.h"
25 #include "base/string_util.h"
26 #include "base/threading/thread_restrictions.h"
27 #include "chrome/browser/visitedlink/visitedlink_delegate.h"
28 #include "chrome/browser/visitedlink/visitedlink_event_listener.h"
29 #include "content/public/browser/browser_context.h"
30 #include "content/public/browser/browser_thread.h"
31 #include "googleurl/src/gurl.h"
32
33 using content::BrowserThread;
34 using file_util::ScopedFILE;
35 using file_util::OpenFile;
36 using file_util::TruncateFile;
37
38 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
39 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
40 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
41 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
42 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
43
44 const int32 VisitedLinkMaster::kFileCurrentVersion = 3;
45
46 // the signature at the beginning of the URL table = "VLnk" (visited links)
47 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
48 const size_t VisitedLinkMaster::kFileHeaderSize =
49 kFileHeaderSaltOffset + LINK_SALT_LENGTH;
50
51 // This value should also be the same as the smallest size in the lookup
52 // table in NewTableSizeForCount (prime number).
53 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
54
55 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
56
57 namespace {
58
59 // Fills the given salt structure with some quasi-random values
60 // It is not necessary to generate a cryptographically strong random string,
61 // only that it be reasonably different for different users.
62 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
63 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
64 uint64 randval = base::RandUint64();
65 memcpy(salt, &randval, 8);
66 }
67
68 // Opens file on a background thread to not block UI thread.
69 void AsyncOpen(FILE** file, const FilePath& filename) {
70 *file = OpenFile(filename, "wb+");
71 DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value();
72 }
73
74 // Returns true if the write was complete.
75 static bool WriteToFile(FILE* file,
76 off_t offset,
77 const void* data,
78 size_t data_len) {
79 if (fseek(file, offset, SEEK_SET) != 0)
80 return false; // Don't write to an invalid part of the file.
81
82 size_t num_written = fwrite(data, 1, data_len, file);
83
84 // The write may not make it to the kernel (stdlib may buffer the write)
85 // until the next fseek/fclose call. If we crash, it's easy for our used
86 // item count to be out of sync with the number of hashes we write.
87 // Protect against this by calling fflush.
88 int ret = fflush(file);
89 DCHECK_EQ(0, ret);
90 return num_written == data_len;
91 }
92
93 // This task executes on a background thread and executes a write. This
94 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE
95 // is used because file may still not be opened by the time of scheduling
96 // the task for execution.
97 void AsyncWrite(FILE** file, int32 offset, const std::string& data) {
98 if (*file)
99 WriteToFile(*file, offset, data.data(), data.size());
100 }
101
102 // Truncates the file to the current position asynchronously on a background
103 // thread. Double pointer to FILE is used because file may still not be opened
104 // by the time of scheduling the task for execution.
105 void AsyncTruncate(FILE** file) {
106 if (*file)
107 base::IgnoreResult(TruncateFile(*file));
108 }
109
110 // Closes the file on a background thread and releases memory used for storage
111 // of FILE* value. Double pointer to FILE is used because file may still not
112 // be opened by the time of scheduling the task for execution.
113 void AsyncClose(FILE** file) {
114 if (*file)
115 base::IgnoreResult(fclose(*file));
116 free(file);
117 }
118
119 } // namespace
120
121 // TableBuilder ---------------------------------------------------------------
122
123 // How rebuilding from history works
124 // ---------------------------------
125 //
126 // We mark that we're rebuilding from history by setting the table_builder_
127 // member in VisitedLinkMaster to the TableBuilder we create. This builder
128 // will be called on the history thread by the history system for every URL
129 // in the database.
130 //
131 // The builder will store the fingerprints for those URLs, and then marshalls
132 // back to the main thread where the VisitedLinkMaster will be notified. The
133 // master then replaces its table with a new table containing the computed
134 // fingerprints.
135 //
136 // The builder must remain active while the history system is using it.
137 // Sometimes, the master will be deleted before the rebuild is complete, in
138 // which case it notifies the builder via DisownMaster(). The builder will
139 // delete itself once rebuilding is complete, and not execute any callback.
140 class VisitedLinkMaster::TableBuilder
141 : public VisitedLinkDelegate::URLEnumerator {
142 public:
143 TableBuilder(VisitedLinkMaster* master,
144 const uint8 salt[LINK_SALT_LENGTH]);
145
146 // Called on the main thread when the master is being destroyed. This will
147 // prevent a crash when the query completes and the master is no longer
148 // around. We can not actually do anything but mark this fact, since the
149 // table will be being rebuilt simultaneously on the other thread.
150 void DisownMaster();
151
152 // VisitedLinkDelegate::URLEnumerator
153 virtual void OnURL(const GURL& url);
154 virtual void OnComplete(bool succeed);
155
156 private:
157 virtual ~TableBuilder() {}
158
159 // OnComplete mashals to this function on the main thread to do the
160 // notification.
161 void OnCompleteMainThread();
162
163 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
164 VisitedLinkMaster* master_;
165
166 // Indicates whether the operation has failed or not.
167 bool success_;
168
169 // Salt for this new table.
170 uint8 salt_[LINK_SALT_LENGTH];
171
172 // Stores the fingerprints we computed on the background thread.
173 VisitedLinkCommon::Fingerprints fingerprints_;
174
175 DISALLOW_COPY_AND_ASSIGN(TableBuilder);
176 };
177
178 // VisitedLinkMaster ----------------------------------------------------------
179
180 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context,
181 VisitedLinkDelegate* delegate)
182 : browser_context_(browser_context),
183 delegate_(delegate),
184 listener_(new VisitedLinkEventListener(
185 ALLOW_THIS_IN_INITIALIZER_LIST(this), browser_context)) {
186 InitMembers();
187 }
188
189 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
190 VisitedLinkDelegate* delegate,
191 bool suppress_rebuild,
192 const FilePath& filename,
193 int32 default_table_size)
194 : browser_context_(NULL),
195 delegate_(delegate) {
196 listener_.reset(listener);
197 DCHECK(listener_.get());
198 InitMembers();
199
200 database_name_override_ = filename;
201 table_size_override_ = default_table_size;
202 suppress_rebuild_ = suppress_rebuild;
203 }
204
205 VisitedLinkMaster::~VisitedLinkMaster() {
206 if (table_builder_.get()) {
207 // Prevent the table builder from calling us back now that we're being
208 // destroyed. Note that we DON'T delete the object, since the history
209 // system is still writing into it. When that is complete, the table
210 // builder will destroy itself when it finds we are gone.
211 table_builder_->DisownMaster();
212 }
213 FreeURLTable();
214 // FreeURLTable() will schedule closing of the file and deletion of |file_|.
215 // So nothing should be done here.
216 }
217
218 void VisitedLinkMaster::InitMembers() {
219 file_ = NULL;
220 shared_memory_ = NULL;
221 shared_memory_serial_ = 0;
222 used_items_ = 0;
223 table_size_override_ = 0;
224 suppress_rebuild_ = false;
225 sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken();
226
227 #ifndef NDEBUG
228 posted_asynchronous_operation_ = false;
229 #endif
230 }
231
232 bool VisitedLinkMaster::Init() {
233 // We probably shouldn't be loading this from the UI thread,
234 // but it does need to happen early on in startup.
235 // http://code.google.com/p/chromium/issues/detail?id=24163
236 base::ThreadRestrictions::ScopedAllowIO allow_io;
237 if (!InitFromFile())
238 return InitFromScratch(suppress_rebuild_);
239 return true;
240 }
241
242 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
243 // Extra check that we are not incognito. This should not happen.
244 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is
245 // removed from BrowserContext.
246 if (browser_context_ && browser_context_->IsOffTheRecord()) {
247 NOTREACHED();
248 return null_hash_;
249 }
250
251 if (!url.is_valid())
252 return null_hash_; // Don't add invalid URLs.
253
254 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
255 url.spec().size(),
256 salt_);
257 if (table_builder_) {
258 // If we have a pending delete for this fingerprint, cancel it.
259 std::set<Fingerprint>::iterator found =
260 deleted_since_rebuild_.find(fingerprint);
261 if (found != deleted_since_rebuild_.end())
262 deleted_since_rebuild_.erase(found);
263
264 // A rebuild is in progress, save this addition in the temporary list so
265 // it can be added once rebuild is complete.
266 added_since_rebuild_.insert(fingerprint);
267 }
268
269 // If the table is "full", we don't add URLs and just drop them on the floor.
270 // This can happen if we get thousands of new URLs and something causes
271 // the table resizing to fail. This check prevents a hang in that case. Note
272 // that this is *not* the resize limit, this is just a sanity check.
273 if (used_items_ / 8 > table_length_ / 10)
274 return null_hash_; // Table is more than 80% full.
275
276 return AddFingerprint(fingerprint, true);
277 }
278
279 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here,
280 const base::Closure& task) {
281 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_,
282 from_here, task);
283 }
284
285 void VisitedLinkMaster::AddURL(const GURL& url) {
286 Hash index = TryToAddURL(url);
287 if (!table_builder_ && index != null_hash_) {
288 // Not rebuilding, so we want to keep the file on disk up-to-date.
289 WriteUsedItemCountToFile();
290 WriteHashRangeToFile(index, index);
291 ResizeTableIfNecessary();
292 }
293 }
294
295 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
296 for (std::vector<GURL>::const_iterator i = url.begin();
297 i != url.end(); ++i) {
298 Hash index = TryToAddURL(*i);
299 if (!table_builder_ && index != null_hash_)
300 ResizeTableIfNecessary();
301 }
302
303 // Keeps the file on disk up-to-date.
304 if (!table_builder_)
305 WriteFullTable();
306 }
307
308 void VisitedLinkMaster::DeleteAllURLs() {
309 // Any pending modifications are invalid.
310 added_since_rebuild_.clear();
311 deleted_since_rebuild_.clear();
312
313 // Clear the hash table.
314 used_items_ = 0;
315 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
316
317 // Resize it if it is now too empty. Resize may write the new table out for
318 // us, otherwise, schedule writing the new table to disk ourselves.
319 if (!ResizeTableIfNecessary())
320 WriteFullTable();
321
322 listener_->Reset();
323 }
324
325 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() {
326 return delegate_;
327 }
328
329 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) {
330 if (!urls->HasNextURL())
331 return;
332
333 listener_->Reset();
334
335 if (table_builder_) {
336 // A rebuild is in progress, save this deletion in the temporary list so
337 // it can be added once rebuild is complete.
338 while (urls->HasNextURL()) {
339 const GURL& url(urls->NextURL());
340 if (!url.is_valid())
341 continue;
342
343 Fingerprint fingerprint =
344 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_);
345 deleted_since_rebuild_.insert(fingerprint);
346
347 // If the URL was just added and now we're deleting it, it may be in the
348 // list of things added since the last rebuild. Delete it from that list.
349 std::set<Fingerprint>::iterator found =
350 added_since_rebuild_.find(fingerprint);
351 if (found != added_since_rebuild_.end())
352 added_since_rebuild_.erase(found);
353
354 // Delete the URLs from the in-memory table, but don't bother writing
355 // to disk since it will be replaced soon.
356 DeleteFingerprint(fingerprint, false);
357 }
358 return;
359 }
360
361 // Compute the deleted URLs' fingerprints and delete them
362 std::set<Fingerprint> deleted_fingerprints;
363 while (urls->HasNextURL()) {
364 const GURL& url(urls->NextURL());
365 if (!url.is_valid())
366 continue;
367 deleted_fingerprints.insert(
368 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_));
369 }
370 DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
371 }
372
373 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
374 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
375 Fingerprint fingerprint,
376 bool send_notifications) {
377 if (!hash_table_ || table_length_ == 0) {
378 NOTREACHED(); // Not initialized.
379 return null_hash_;
380 }
381
382 Hash cur_hash = HashFingerprint(fingerprint);
383 Hash first_hash = cur_hash;
384 while (true) {
385 Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
386 if (cur_fingerprint == fingerprint)
387 return null_hash_; // This fingerprint is already in there, do nothing.
388
389 if (cur_fingerprint == null_fingerprint_) {
390 // End of probe sequence found, insert here.
391 hash_table_[cur_hash] = fingerprint;
392 used_items_++;
393 // If allowed, notify listener that a new visited link was added.
394 if (send_notifications)
395 listener_->Add(fingerprint);
396 return cur_hash;
397 }
398
399 // Advance in the probe sequence.
400 cur_hash = IncrementHash(cur_hash);
401 if (cur_hash == first_hash) {
402 // This means that we've wrapped around and are about to go into an
403 // infinite loop. Something was wrong with the hashtable resizing
404 // logic, so stop here.
405 NOTREACHED();
406 return null_hash_;
407 }
408 }
409 }
410
411 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
412 const std::set<Fingerprint>& fingerprints) {
413 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
414
415 // Delete the URLs from the table.
416 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
417 i != fingerprints.end(); ++i)
418 DeleteFingerprint(*i, !bulk_write);
419
420 // These deleted fingerprints may make us shrink the table.
421 if (ResizeTableIfNecessary())
422 return; // The resize function wrote the new table to disk for us.
423
424 // Nobody wrote this out for us, write the full file to disk.
425 if (bulk_write)
426 WriteFullTable();
427 }
428
429 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
430 bool update_file) {
431 if (!hash_table_ || table_length_ == 0) {
432 NOTREACHED(); // Not initialized.
433 return false;
434 }
435 if (!IsVisited(fingerprint))
436 return false; // Not in the database to delete.
437
438 // First update the header used count.
439 used_items_--;
440 if (update_file)
441 WriteUsedItemCountToFile();
442
443 Hash deleted_hash = HashFingerprint(fingerprint);
444
445 // Find the range of "stuff" in the hash table that is adjacent to this
446 // fingerprint. These are things that could be affected by the change in
447 // the hash table. Since we use linear probing, anything after the deleted
448 // item up until an empty item could be affected.
449 Hash end_range = deleted_hash;
450 while (true) {
451 Hash next_hash = IncrementHash(end_range);
452 if (next_hash == deleted_hash)
453 break; // We wrapped around and the whole table is full.
454 if (!hash_table_[next_hash])
455 break; // Found the last spot.
456 end_range = next_hash;
457 }
458
459 // We could get all fancy and move the affected fingerprints around, but
460 // instead we just remove them all and re-add them (minus our deleted one).
461 // This will mean there's a small window of time where the affected links
462 // won't be marked visited.
463 base::StackVector<Fingerprint, 32> shuffled_fingerprints;
464 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive.
465 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
466 if (hash_table_[i] != fingerprint) {
467 // Don't save the one we're deleting!
468 shuffled_fingerprints->push_back(hash_table_[i]);
469
470 // This will balance the increment of this value in AddFingerprint below
471 // so there is no net change.
472 used_items_--;
473 }
474 hash_table_[i] = null_fingerprint_;
475 }
476
477 if (!shuffled_fingerprints->empty()) {
478 // Need to add the new items back.
479 for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
480 AddFingerprint(shuffled_fingerprints[i], false);
481 }
482
483 // Write the affected range to disk [deleted_hash, end_range].
484 if (update_file)
485 WriteHashRangeToFile(deleted_hash, end_range);
486
487 return true;
488 }
489
490 void VisitedLinkMaster::WriteFullTable() {
491 // This function can get called when the file is open, for example, when we
492 // resize the table. We must handle this case and not try to reopen the file,
493 // since there may be write operations pending on the file I/O thread.
494 //
495 // Note that once we start writing, we do not delete on error. This means
496 // there can be a partial file, but the short file will be detected next time
497 // we start, and will be replaced.
498 //
499 // This might possibly get corrupted if we crash in the middle of writing.
500 // We should pick up the most common types of these failures when we notice
501 // that the file size is different when we load it back in, and then we will
502 // regenerate the table.
503 if (!file_) {
504 file_ = static_cast<FILE**>(calloc(1, sizeof(*file_)));
505 FilePath filename;
506 GetDatabaseFileName(&filename);
507 PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename));
508 }
509
510 // Write the new header.
511 int32 header[4];
512 header[0] = kFileSignature;
513 header[1] = kFileCurrentVersion;
514 header[2] = table_length_;
515 header[3] = used_items_;
516 WriteToFile(file_, 0, header, sizeof(header));
517 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
518
519 // Write the hash data.
520 WriteToFile(file_, kFileHeaderSize,
521 hash_table_, table_length_ * sizeof(Fingerprint));
522
523 // The hash table may have shrunk, so make sure this is the end.
524 PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_));
525 }
526
527 bool VisitedLinkMaster::InitFromFile() {
528 DCHECK(file_ == NULL);
529
530 FilePath filename;
531 GetDatabaseFileName(&filename);
532 ScopedFILE file_closer(OpenFile(filename, "rb+"));
533 if (!file_closer.get())
534 return false;
535
536 int32 num_entries, used_count;
537 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
538 return false; // Header isn't valid.
539
540 // Allocate and read the table.
541 if (!CreateURLTable(num_entries, false))
542 return false;
543 if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
544 hash_table_, num_entries * sizeof(Fingerprint))) {
545 FreeURLTable();
546 return false;
547 }
548 used_items_ = used_count;
549
550 #ifndef NDEBUG
551 DebugValidate();
552 #endif
553
554 file_ = static_cast<FILE**>(malloc(sizeof(*file_)));
555 *file_ = file_closer.release();
556 return true;
557 }
558
559 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
560 int32 table_size = kDefaultTableSize;
561 if (table_size_override_)
562 table_size = table_size_override_;
563
564 // The salt must be generated before the table so that it can be copied to
565 // the shared memory.
566 GenerateSalt(salt_);
567 if (!CreateURLTable(table_size, true))
568 return false;
569
570 #ifndef NDEBUG
571 DebugValidate();
572 #endif
573
574 if (suppress_rebuild) {
575 // When we disallow rebuilds (normally just unit tests), just use the
576 // current empty table.
577 WriteFullTable();
578 return true;
579 }
580
581 // This will build the table from history. On the first run, history will
582 // be empty, so this will be correct. This will also write the new table
583 // to disk. We don't want to save explicitly here, since the rebuild may
584 // not complete, leaving us with an empty but valid visited link database.
585 // In the future, we won't know we need to try rebuilding again.
586 return RebuildTableFromDelegate();
587 }
588
589 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
590 int32* num_entries,
591 int32* used_count,
592 uint8 salt[LINK_SALT_LENGTH]) {
593 // Get file size.
594 // Note that there is no need to seek back to the original location in the
595 // file since ReadFromFile() [which is the next call accessing the file]
596 // seeks before reading.
597 if (fseek(file, 0, SEEK_END) == -1)
598 return false;
599 size_t file_size = ftell(file);
600
601 if (file_size <= kFileHeaderSize)
602 return false;
603
604 uint8 header[kFileHeaderSize];
605 if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
606 return false;
607
608 // Verify the signature.
609 int32 signature;
610 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
611 if (signature != kFileSignature)
612 return false;
613
614 // Verify the version is up-to-date. As with other read errors, a version
615 // mistmatch will trigger a rebuild of the database from history, which will
616 // have the effect of migrating the database.
617 int32 version;
618 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
619 if (version != kFileCurrentVersion)
620 return false; // Bad version.
621
622 // Read the table size and make sure it matches the file size.
623 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
624 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
625 return false; // Bad size.
626
627 // Read the used item count.
628 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
629 if (*used_count > *num_entries)
630 return false; // Bad used item count;
631
632 // Read the salt.
633 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
634
635 // This file looks OK from the header's perspective.
636 return true;
637 }
638
639 bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) {
640 if (!database_name_override_.empty()) {
641 // use this filename, the directory must exist
642 *filename = database_name_override_;
643 return true;
644 }
645
646 if (!browser_context_ || browser_context_->GetPath().empty())
647 return false;
648
649 FilePath profile_dir = browser_context_->GetPath();
650 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
651 return true;
652 }
653
654 // Initializes the shared memory structure. The salt should already be filled
655 // in so that it can be written to the shared memory
656 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
657 // The table is the size of the table followed by the entries.
658 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
659
660 // Create the shared memory object.
661 shared_memory_ = new base::SharedMemory();
662 if (!shared_memory_)
663 return false;
664
665 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) {
666 delete shared_memory_;
667 shared_memory_ = NULL;
668 return false;
669 }
670
671 if (init_to_empty) {
672 memset(shared_memory_->memory(), 0, alloc_size);
673 used_items_ = 0;
674 }
675 table_length_ = num_entries;
676
677 // Save the header for other processes to read.
678 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
679 header->length = table_length_;
680 memcpy(header->salt, salt_, LINK_SALT_LENGTH);
681
682 // Our table pointer is just the data immediately following the size.
683 hash_table_ = reinterpret_cast<Fingerprint*>(
684 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
685
686 return true;
687 }
688
689 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
690 base::SharedMemory *old_shared_memory = shared_memory_;
691 Fingerprint* old_hash_table = hash_table_;
692 int32 old_table_length = table_length_;
693 if (!CreateURLTable(num_entries, true)) {
694 // Try to put back the old state.
695 shared_memory_ = old_shared_memory;
696 hash_table_ = old_hash_table;
697 table_length_ = old_table_length;
698 return false;
699 }
700
701 #ifndef NDEBUG
702 DebugValidate();
703 #endif
704
705 return true;
706 }
707
708 void VisitedLinkMaster::FreeURLTable() {
709 if (shared_memory_) {
710 delete shared_memory_;
711 shared_memory_ = NULL;
712 }
713 if (!file_)
714 return;
715 PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_));
716 // AsyncClose() will close the file and free the memory pointed by |file_|.
717 file_ = NULL;
718 }
719
720 bool VisitedLinkMaster::ResizeTableIfNecessary() {
721 DCHECK(table_length_ > 0) << "Must have a table";
722
723 // Load limits for good performance/space. We are pretty conservative about
724 // keeping the table not very full. This is because we use linear probing
725 // which increases the likelihood of clumps of entries which will reduce
726 // performance.
727 const float max_table_load = 0.5f; // Grow when we're > this full.
728 const float min_table_load = 0.2f; // Shrink when we're < this full.
729
730 float load = ComputeTableLoad();
731 if (load < max_table_load &&
732 (table_length_ <= static_cast<float>(kDefaultTableSize) ||
733 load > min_table_load))
734 return false;
735
736 // Table needs to grow or shrink.
737 int new_size = NewTableSizeForCount(used_items_);
738 DCHECK(new_size > used_items_);
739 DCHECK(load <= min_table_load || new_size > table_length_);
740 ResizeTable(new_size);
741 return true;
742 }
743
744 void VisitedLinkMaster::ResizeTable(int32 new_size) {
745 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
746 shared_memory_serial_++;
747
748 #ifndef NDEBUG
749 DebugValidate();
750 #endif
751
752 base::SharedMemory* old_shared_memory = shared_memory_;
753 Fingerprint* old_hash_table = hash_table_;
754 int32 old_table_length = table_length_;
755 if (!BeginReplaceURLTable(new_size))
756 return;
757
758 // Now we have two tables, our local copy which is the old one, and the new
759 // one loaded into this object where we need to copy the data.
760 for (int32 i = 0; i < old_table_length; i++) {
761 Fingerprint cur = old_hash_table[i];
762 if (cur)
763 AddFingerprint(cur, false);
764 }
765
766 // On error unmapping, just forget about it since we can't do anything
767 // else to release it.
768 delete old_shared_memory;
769
770 // Send an update notification to all child processes so they read the new
771 // table.
772 listener_->NewTable(shared_memory_);
773
774 #ifndef NDEBUG
775 DebugValidate();
776 #endif
777
778 // The new table needs to be written to disk.
779 WriteFullTable();
780 }
781
782 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
783 // These table sizes are selected to be the maximum prime number less than
784 // a "convenient" multiple of 1K.
785 static const int table_sizes[] = {
786 16381, // 16K = 16384 <- don't shrink below this table size
787 // (should be == default_table_size)
788 32767, // 32K = 32768
789 65521, // 64K = 65536
790 130051, // 128K = 131072
791 262127, // 256K = 262144
792 524269, // 512K = 524288
793 1048549, // 1M = 1048576
794 2097143, // 2M = 2097152
795 4194301, // 4M = 4194304
796 8388571, // 8M = 8388608
797 16777199, // 16M = 16777216
798 33554347}; // 32M = 33554432
799
800 // Try to leave the table 33% full.
801 int desired = item_count * 3;
802
803 // Find the closest prime.
804 for (size_t i = 0; i < arraysize(table_sizes); i ++) {
805 if (table_sizes[i] > desired)
806 return table_sizes[i];
807 }
808
809 // Growing very big, just approximate a "good" number, not growing as much
810 // as normal.
811 return item_count * 2 - 1;
812 }
813
814 // See the TableBuilder definition in the header file for how this works.
815 bool VisitedLinkMaster::RebuildTableFromDelegate() {
816 DCHECK(!table_builder_);
817
818 // TODO(brettw) make sure we have reasonable salt!
819 table_builder_ = new TableBuilder(this, salt_);
820 delegate_->RebuildTable(table_builder_);
821 return true;
822 }
823
824 // See the TableBuilder declaration above for how this works.
825 void VisitedLinkMaster::OnTableRebuildComplete(
826 bool success,
827 const std::vector<Fingerprint>& fingerprints) {
828 if (success) {
829 // Replace the old table with a new blank one.
830 shared_memory_serial_++;
831
832 // We are responsible for freeing it AFTER it has been replaced if
833 // replacement succeeds.
834 base::SharedMemory* old_shared_memory = shared_memory_;
835
836 int new_table_size = NewTableSizeForCount(
837 static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
838 if (BeginReplaceURLTable(new_table_size)) {
839 // Free the old table.
840 delete old_shared_memory;
841
842 // Add the stored fingerprints to the hash table.
843 for (size_t i = 0; i < fingerprints.size(); i++)
844 AddFingerprint(fingerprints[i], false);
845
846 // Also add anything that was added while we were asynchronously
847 // generating the new table.
848 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
849 i != added_since_rebuild_.end(); ++i)
850 AddFingerprint(*i, false);
851 added_since_rebuild_.clear();
852
853 // Now handle deletions.
854 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
855 deleted_since_rebuild_.clear();
856
857 // Send an update notification to all child processes.
858 listener_->NewTable(shared_memory_);
859
860 WriteFullTable();
861 }
862 }
863 table_builder_ = NULL; // Will release our reference to the builder.
864
865 // Notify the unit test that the rebuild is complete (will be NULL in prod.)
866 if (!rebuild_complete_task_.is_null()) {
867 rebuild_complete_task_.Run();
868 rebuild_complete_task_.Reset();
869 }
870 }
871
872 void VisitedLinkMaster::WriteToFile(FILE** file,
873 off_t offset,
874 void* data,
875 int32 data_size) {
876 #ifndef NDEBUG
877 posted_asynchronous_operation_ = true;
878 #endif
879 PostIOTask(FROM_HERE,
880 base::Bind(&AsyncWrite, file, offset,
881 std::string(static_cast<const char*>(data), data_size)));
882 }
883
884 void VisitedLinkMaster::WriteUsedItemCountToFile() {
885 if (!file_)
886 return; // See comment on the file_ variable for why this might happen.
887 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
888 }
889
890 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
891 if (!file_)
892 return; // See comment on the file_ variable for why this might happen.
893 if (last_hash < first_hash) {
894 // Handle wraparound at 0. This first write is first_hash->EOF
895 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
896 &hash_table_[first_hash],
897 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
898
899 // Now do 0->last_lash.
900 WriteToFile(file_, kFileHeaderSize, hash_table_,
901 (last_hash + 1) * sizeof(Fingerprint));
902 } else {
903 // Normal case, just write the range.
904 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
905 &hash_table_[first_hash],
906 (last_hash - first_hash + 1) * sizeof(Fingerprint));
907 }
908 }
909
910 bool VisitedLinkMaster::ReadFromFile(FILE* file,
911 off_t offset,
912 void* data,
913 size_t data_size) {
914 #ifndef NDEBUG
915 // Since this function is synchronous, we require that no asynchronous
916 // operations could possibly be pending.
917 DCHECK(!posted_asynchronous_operation_);
918 #endif
919
920 if (fseek(file, offset, SEEK_SET) != 0)
921 return false;
922
923 size_t num_read = fread(data, 1, data_size, file);
924 return num_read == data_size;
925 }
926
927 // VisitedLinkTableBuilder ----------------------------------------------------
928
929 VisitedLinkMaster::TableBuilder::TableBuilder(
930 VisitedLinkMaster* master,
931 const uint8 salt[LINK_SALT_LENGTH])
932 : master_(master),
933 success_(true) {
934 fingerprints_.reserve(4096);
935 memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8));
936 }
937
938 // TODO(brettw): Do we want to try to cancel the request if this happens? It
939 // could delay shutdown if there are a lot of URLs.
940 void VisitedLinkMaster::TableBuilder::DisownMaster() {
941 master_ = NULL;
942 }
943
944 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
945 if (!url.is_empty()) {
946 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
947 url.spec().data(), url.spec().length(), salt_));
948 }
949 }
950
951 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
952 success_ = success;
953 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
954
955 // Marshal to the main thread to notify the VisitedLinkMaster that the
956 // rebuild is complete.
957 BrowserThread::PostTask(
958 BrowserThread::UI, FROM_HERE,
959 base::Bind(&TableBuilder::OnCompleteMainThread, this));
960 }
961
962 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
963 if (master_)
964 master_->OnTableRebuildComplete(success_, fingerprints_);
965 }
OLDNEW
« no previous file with comments | « chrome/browser/visitedlink/visitedlink_master.h ('k') | chrome/browser/visitedlink/visitedlink_perftest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698