Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(174)

Side by Side Diff: chrome/browser/visitedlink_master.cc

Issue 5606002: Move:... (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src/
Patch Set: Created 10 years ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/browser/visitedlink_master.h ('k') | chrome/browser/visitedlink_perftest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/visitedlink_master.h"
6
7 #if defined(OS_WIN)
8 #include <windows.h>
9 #include <io.h>
10 #include <shlobj.h>
11 #endif // defined(OS_WIN)
12 #include <stdio.h>
13
14 #include <algorithm>
15
16 #include "base/file_util.h"
17 #include "base/logging.h"
18 #include "base/message_loop.h"
19 #include "base/path_service.h"
20 #include "base/process_util.h"
21 #include "base/rand_util.h"
22 #include "base/stack_container.h"
23 #include "base/string_util.h"
24 #include "base/thread_restrictions.h"
25 #include "chrome/browser/browser_process.h"
26 #include "chrome/browser/browser_thread.h"
27 #include "chrome/browser/history/history.h"
28 #include "chrome/browser/profile.h"
29
30 using file_util::ScopedFILE;
31 using file_util::OpenFile;
32 using file_util::TruncateFile;
33
34 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0;
35 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4;
36 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8;
37 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12;
38 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16;
39
40 const int32 VisitedLinkMaster::kFileCurrentVersion = 2;
41
42 // the signature at the beginning of the URL table = "VLnk" (visited links)
43 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56;
44 const size_t VisitedLinkMaster::kFileHeaderSize =
45 kFileHeaderSaltOffset + LINK_SALT_LENGTH;
46
47 // This value should also be the same as the smallest size in the lookup
48 // table in NewTableSizeForCount (prime number).
49 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381;
50
51 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64;
52
53 namespace {
54
55 // Fills the given salt structure with some quasi-random values
56 // It is not necessary to generate a cryptographically strong random string,
57 // only that it be reasonably different for different users.
58 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) {
59 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt";
60 uint64 randval = base::RandUint64();
61 memcpy(salt, &randval, 8);
62 }
63
64 // AsyncWriter ----------------------------------------------------------------
65
66 // This task executes on a background thread and executes a write. This
67 // prevents us from blocking the UI thread doing I/O.
68 class AsyncWriter : public Task {
69 public:
70 AsyncWriter(FILE* file, int32 offset, const void* data, size_t data_len)
71 : file_(file),
72 offset_(offset) {
73 data_->resize(data_len);
74 memcpy(&*data_->begin(), data, data_len);
75 }
76
77 virtual void Run() {
78 WriteToFile(file_, offset_,
79 &*data_->begin(), static_cast<int32>(data_->size()));
80 }
81
82 // Exposed as a static so it can be called directly from the Master to
83 // reduce the number of platform-specific I/O sites we have. Returns true if
84 // the write was complete.
85 static bool WriteToFile(FILE* file,
86 off_t offset,
87 const void* data,
88 size_t data_len) {
89 if (fseek(file, offset, SEEK_SET) != 0)
90 return false; // Don't write to an invalid part of the file.
91
92 size_t num_written = fwrite(data, 1, data_len, file);
93
94 // The write may not make it to the kernel (stdlib may buffer the write)
95 // until the next fseek/fclose call. If we crash, it's easy for our used
96 // item count to be out of sync with the number of hashes we write.
97 // Protect against this by calling fflush.
98 int ret = fflush(file);
99 DCHECK_EQ(0, ret);
100 return num_written == data_len;
101 }
102
103 private:
104 // The data to write and where to write it.
105 FILE* file_;
106 int32 offset_; // Offset from the beginning of the file.
107
108 // Most writes are just a single fingerprint, so we reserve that much in this
109 // object to avoid mallocs in that case.
110 StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_;
111
112 DISALLOW_COPY_AND_ASSIGN(AsyncWriter);
113 };
114
115 // Used to asynchronously set the end of the file. This must be done on the
116 // same thread as the writing to keep things synchronized.
117 class AsyncSetEndOfFile : public Task {
118 public:
119 explicit AsyncSetEndOfFile(FILE* file) : file_(file) {}
120
121 virtual void Run() {
122 TruncateFile(file_);
123 }
124
125 private:
126 FILE* file_;
127 DISALLOW_COPY_AND_ASSIGN(AsyncSetEndOfFile);
128 };
129
130 // Used to asynchronously close a file. This must be done on the same thread as
131 // the writing to keep things synchronized.
132 class AsyncCloseHandle : public Task {
133 public:
134 explicit AsyncCloseHandle(FILE* file) : file_(file) {}
135
136 virtual void Run() {
137 fclose(file_);
138 }
139
140 private:
141 FILE* file_;
142 DISALLOW_COPY_AND_ASSIGN(AsyncCloseHandle);
143 };
144
145 } // namespace
146
147 // TableBuilder ---------------------------------------------------------------
148
149 // How rebuilding from history works
150 // ---------------------------------
151 //
152 // We mark that we're rebuilding from history by setting the table_builder_
153 // member in VisitedLinkMaster to the TableBuilder we create. This builder
154 // will be called on the history thread by the history system for every URL
155 // in the database.
156 //
157 // The builder will store the fingerprints for those URLs, and then marshalls
158 // back to the main thread where the VisitedLinkMaster will be notified. The
159 // master then replaces its table with a new table containing the computed
160 // fingerprints.
161 //
162 // The builder must remain active while the history system is using it.
163 // Sometimes, the master will be deleted before the rebuild is complete, in
164 // which case it notifies the builder via DisownMaster(). The builder will
165 // delete itself once rebuilding is complete, and not execute any callback.
166 class VisitedLinkMaster::TableBuilder
167 : public HistoryService::URLEnumerator,
168 public base::RefCountedThreadSafe<TableBuilder> {
169 public:
170 TableBuilder(VisitedLinkMaster* master,
171 const uint8 salt[LINK_SALT_LENGTH]);
172
173 // Called on the main thread when the master is being destroyed. This will
174 // prevent a crash when the query completes and the master is no longer
175 // around. We can not actually do anything but mark this fact, since the
176 // table will be being rebuilt simultaneously on the other thread.
177 void DisownMaster();
178
179 // HistoryService::URLEnumerator
180 virtual void OnURL(const GURL& url);
181 virtual void OnComplete(bool succeed);
182
183 private:
184 friend class base::RefCountedThreadSafe<TableBuilder>;
185
186 ~TableBuilder() {}
187
188 // OnComplete mashals to this function on the main thread to do the
189 // notification.
190 void OnCompleteMainThread();
191
192 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD!
193 VisitedLinkMaster* master_;
194
195 // Indicates whether the operation has failed or not.
196 bool success_;
197
198 // Salt for this new table.
199 uint8 salt_[LINK_SALT_LENGTH];
200
201 // Stores the fingerprints we computed on the background thread.
202 VisitedLinkCommon::Fingerprints fingerprints_;
203 };
204
205 // VisitedLinkMaster ----------------------------------------------------------
206
207 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
208 Profile* profile) {
209 InitMembers(listener, profile);
210 }
211
212 VisitedLinkMaster::VisitedLinkMaster(Listener* listener,
213 HistoryService* history_service,
214 bool suppress_rebuild,
215 const FilePath& filename,
216 int32 default_table_size) {
217 InitMembers(listener, NULL);
218
219 database_name_override_ = filename;
220 table_size_override_ = default_table_size;
221 history_service_override_ = history_service;
222 suppress_rebuild_ = suppress_rebuild;
223 }
224
225 VisitedLinkMaster::~VisitedLinkMaster() {
226 if (table_builder_.get()) {
227 // Prevent the table builder from calling us back now that we're being
228 // destroyed. Note that we DON'T delete the object, since the history
229 // system is still writing into it. When that is complete, the table
230 // builder will destroy itself when it finds we are gone.
231 table_builder_->DisownMaster();
232 }
233 FreeURLTable();
234 }
235
236 void VisitedLinkMaster::InitMembers(Listener* listener, Profile* profile) {
237 DCHECK(listener);
238
239 listener_ = listener;
240 file_ = NULL;
241 shared_memory_ = NULL;
242 shared_memory_serial_ = 0;
243 used_items_ = 0;
244 table_size_override_ = 0;
245 history_service_override_ = NULL;
246 suppress_rebuild_ = false;
247 profile_ = profile;
248
249 #ifndef NDEBUG
250 posted_asynchronous_operation_ = false;
251 #endif
252 }
253
254 bool VisitedLinkMaster::Init() {
255 // We probably shouldn't be loading this from the UI thread,
256 // but it does need to happen early on in startup.
257 // http://code.google.com/p/chromium/issues/detail?id=24163
258 base::ThreadRestrictions::ScopedAllowIO allow_io;
259 if (!InitFromFile())
260 return InitFromScratch(suppress_rebuild_);
261 return true;
262 }
263
264 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) {
265 // Extra check that we are not off the record. This should not happen.
266 if (profile_ && profile_->IsOffTheRecord()) {
267 NOTREACHED();
268 return null_hash_;
269 }
270
271 if (!url.is_valid())
272 return null_hash_; // Don't add invalid URLs.
273
274 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(),
275 url.spec().size(),
276 salt_);
277 if (table_builder_) {
278 // If we have a pending delete for this fingerprint, cancel it.
279 std::set<Fingerprint>::iterator found =
280 deleted_since_rebuild_.find(fingerprint);
281 if (found != deleted_since_rebuild_.end())
282 deleted_since_rebuild_.erase(found);
283
284 // A rebuild is in progress, save this addition in the temporary list so
285 // it can be added once rebuild is complete.
286 added_since_rebuild_.insert(fingerprint);
287 }
288
289 // If the table is "full", we don't add URLs and just drop them on the floor.
290 // This can happen if we get thousands of new URLs and something causes
291 // the table resizing to fail. This check prevents a hang in that case. Note
292 // that this is *not* the resize limit, this is just a sanity check.
293 if (used_items_ / 8 > table_length_ / 10)
294 return null_hash_; // Table is more than 80% full.
295
296 return AddFingerprint(fingerprint, true);
297 }
298
299 void VisitedLinkMaster::AddURL(const GURL& url) {
300 Hash index = TryToAddURL(url);
301 if (!table_builder_ && index != null_hash_) {
302 // Not rebuilding, so we want to keep the file on disk up-to-date.
303 WriteUsedItemCountToFile();
304 WriteHashRangeToFile(index, index);
305 ResizeTableIfNecessary();
306 }
307 }
308
309 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) {
310 for (std::vector<GURL>::const_iterator i = url.begin();
311 i != url.end(); ++i) {
312 Hash index = TryToAddURL(*i);
313 if (!table_builder_ && index != null_hash_)
314 ResizeTableIfNecessary();
315 }
316
317 // Keeps the file on disk up-to-date.
318 if (!table_builder_)
319 WriteFullTable();
320 }
321
322 void VisitedLinkMaster::DeleteAllURLs() {
323 // Any pending modifications are invalid.
324 added_since_rebuild_.clear();
325 deleted_since_rebuild_.clear();
326
327 // Clear the hash table.
328 used_items_ = 0;
329 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint));
330
331 // Resize it if it is now too empty. Resize may write the new table out for
332 // us, otherwise, schedule writing the new table to disk ourselves.
333 if (!ResizeTableIfNecessary())
334 WriteFullTable();
335
336 listener_->Reset();
337 }
338
339 void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) {
340 typedef std::set<GURL>::const_iterator SetIterator;
341
342 if (urls.empty())
343 return;
344
345 listener_->Reset();
346
347 if (table_builder_) {
348 // A rebuild is in progress, save this deletion in the temporary list so
349 // it can be added once rebuild is complete.
350 for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
351 if (!i->is_valid())
352 continue;
353
354 Fingerprint fingerprint =
355 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_);
356 deleted_since_rebuild_.insert(fingerprint);
357
358 // If the URL was just added and now we're deleting it, it may be in the
359 // list of things added since the last rebuild. Delete it from that list.
360 std::set<Fingerprint>::iterator found =
361 added_since_rebuild_.find(fingerprint);
362 if (found != added_since_rebuild_.end())
363 added_since_rebuild_.erase(found);
364
365 // Delete the URLs from the in-memory table, but don't bother writing
366 // to disk since it will be replaced soon.
367 DeleteFingerprint(fingerprint, false);
368 }
369 return;
370 }
371
372 // Compute the deleted URLs' fingerprints and delete them
373 std::set<Fingerprint> deleted_fingerprints;
374 for (SetIterator i = urls.begin(); i != urls.end(); ++i) {
375 if (!i->is_valid())
376 continue;
377 deleted_fingerprints.insert(
378 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_));
379 }
380 DeleteFingerprintsFromCurrentTable(deleted_fingerprints);
381 }
382
383 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm
384 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint(
385 Fingerprint fingerprint,
386 bool send_notifications) {
387 if (!hash_table_ || table_length_ == 0) {
388 NOTREACHED(); // Not initialized.
389 return null_hash_;
390 }
391
392 Hash cur_hash = HashFingerprint(fingerprint);
393 Hash first_hash = cur_hash;
394 while (true) {
395 Fingerprint cur_fingerprint = FingerprintAt(cur_hash);
396 if (cur_fingerprint == fingerprint)
397 return null_hash_; // This fingerprint is already in there, do nothing.
398
399 if (cur_fingerprint == null_fingerprint_) {
400 // End of probe sequence found, insert here.
401 hash_table_[cur_hash] = fingerprint;
402 used_items_++;
403 // If allowed, notify listener that a new visited link was added.
404 if (send_notifications)
405 listener_->Add(fingerprint);
406 return cur_hash;
407 }
408
409 // Advance in the probe sequence.
410 cur_hash = IncrementHash(cur_hash);
411 if (cur_hash == first_hash) {
412 // This means that we've wrapped around and are about to go into an
413 // infinite loop. Something was wrong with the hashtable resizing
414 // logic, so stop here.
415 NOTREACHED();
416 return null_hash_;
417 }
418 }
419 }
420
421 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable(
422 const std::set<Fingerprint>& fingerprints) {
423 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold);
424
425 // Delete the URLs from the table.
426 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin();
427 i != fingerprints.end(); ++i)
428 DeleteFingerprint(*i, !bulk_write);
429
430 // These deleted fingerprints may make us shrink the table.
431 if (ResizeTableIfNecessary())
432 return; // The resize function wrote the new table to disk for us.
433
434 // Nobody wrote this out for us, write the full file to disk.
435 if (bulk_write)
436 WriteFullTable();
437 }
438
439 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint,
440 bool update_file) {
441 if (!hash_table_ || table_length_ == 0) {
442 NOTREACHED(); // Not initialized.
443 return false;
444 }
445 if (!IsVisited(fingerprint))
446 return false; // Not in the database to delete.
447
448 // First update the header used count.
449 used_items_--;
450 if (update_file)
451 WriteUsedItemCountToFile();
452
453 Hash deleted_hash = HashFingerprint(fingerprint);
454
455 // Find the range of "stuff" in the hash table that is adjacent to this
456 // fingerprint. These are things that could be affected by the change in
457 // the hash table. Since we use linear probing, anything after the deleted
458 // item up until an empty item could be affected.
459 Hash end_range = deleted_hash;
460 while (true) {
461 Hash next_hash = IncrementHash(end_range);
462 if (next_hash == deleted_hash)
463 break; // We wrapped around and the whole table is full.
464 if (!hash_table_[next_hash])
465 break; // Found the last spot.
466 end_range = next_hash;
467 }
468
469 // We could get all fancy and move the affected fingerprints around, but
470 // instead we just remove them all and re-add them (minus our deleted one).
471 // This will mean there's a small window of time where the affected links
472 // won't be marked visited.
473 StackVector<Fingerprint, 32> shuffled_fingerprints;
474 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive.
475 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) {
476 if (hash_table_[i] != fingerprint) {
477 // Don't save the one we're deleting!
478 shuffled_fingerprints->push_back(hash_table_[i]);
479
480 // This will balance the increment of this value in AddFingerprint below
481 // so there is no net change.
482 used_items_--;
483 }
484 hash_table_[i] = null_fingerprint_;
485 }
486
487 if (!shuffled_fingerprints->empty()) {
488 // Need to add the new items back.
489 for (size_t i = 0; i < shuffled_fingerprints->size(); i++)
490 AddFingerprint(shuffled_fingerprints[i], false);
491 }
492
493 // Write the affected range to disk [deleted_hash, end_range].
494 if (update_file)
495 WriteHashRangeToFile(deleted_hash, end_range);
496
497 return true;
498 }
499
500 bool VisitedLinkMaster::WriteFullTable() {
501 // This function can get called when the file is open, for example, when we
502 // resize the table. We must handle this case and not try to reopen the file,
503 // since there may be write operations pending on the file I/O thread.
504 //
505 // Note that once we start writing, we do not delete on error. This means
506 // there can be a partial file, but the short file will be detected next time
507 // we start, and will be replaced.
508 //
509 // This might possibly get corrupted if we crash in the middle of writing.
510 // We should pick up the most common types of these failures when we notice
511 // that the file size is different when we load it back in, and then we will
512 // regenerate the table.
513 if (!file_) {
514 FilePath filename;
515 GetDatabaseFileName(&filename);
516 file_ = OpenFile(filename, "wb+");
517 if (!file_) {
518 DLOG(ERROR) << "Failed to open file " << filename.value();
519 return false;
520 }
521 }
522
523 // Write the new header.
524 int32 header[4];
525 header[0] = kFileSignature;
526 header[1] = kFileCurrentVersion;
527 header[2] = table_length_;
528 header[3] = used_items_;
529 WriteToFile(file_, 0, header, sizeof(header));
530 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH);
531
532 // Write the hash data.
533 WriteToFile(file_, kFileHeaderSize,
534 hash_table_, table_length_ * sizeof(Fingerprint));
535
536 // The hash table may have shrunk, so make sure this is the end.
537 BrowserThread::PostTask(
538 BrowserThread::FILE, FROM_HERE, new AsyncSetEndOfFile(file_));
539 return true;
540 }
541
542 bool VisitedLinkMaster::InitFromFile() {
543 DCHECK(file_ == NULL);
544
545 FilePath filename;
546 GetDatabaseFileName(&filename);
547 ScopedFILE file_closer(OpenFile(filename, "rb+"));
548 if (!file_closer.get())
549 return false;
550
551 int32 num_entries, used_count;
552 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_))
553 return false; // Header isn't valid.
554
555 // Allocate and read the table.
556 if (!CreateURLTable(num_entries, false))
557 return false;
558 if (!ReadFromFile(file_closer.get(), kFileHeaderSize,
559 hash_table_, num_entries * sizeof(Fingerprint))) {
560 FreeURLTable();
561 return false;
562 }
563 used_items_ = used_count;
564
565 #ifndef NDEBUG
566 DebugValidate();
567 #endif
568
569 file_ = file_closer.release();
570 return true;
571 }
572
573 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) {
574 int32 table_size = kDefaultTableSize;
575 if (table_size_override_)
576 table_size = table_size_override_;
577
578 // The salt must be generated before the table so that it can be copied to
579 // the shared memory.
580 GenerateSalt(salt_);
581 if (!CreateURLTable(table_size, true))
582 return false;
583
584 #ifndef NDEBUG
585 DebugValidate();
586 #endif
587
588 if (suppress_rebuild) {
589 // When we disallow rebuilds (normally just unit tests), just use the
590 // current empty table.
591 return WriteFullTable();
592 }
593
594 // This will build the table from history. On the first run, history will
595 // be empty, so this will be correct. This will also write the new table
596 // to disk. We don't want to save explicitly here, since the rebuild may
597 // not complete, leaving us with an empty but valid visited link database.
598 // In the future, we won't know we need to try rebuilding again.
599 return RebuildTableFromHistory();
600 }
601
602 bool VisitedLinkMaster::ReadFileHeader(FILE* file,
603 int32* num_entries,
604 int32* used_count,
605 uint8 salt[LINK_SALT_LENGTH]) {
606 // Get file size.
607 // Note that there is no need to seek back to the original location in the
608 // file since ReadFromFile() [which is the next call accessing the file]
609 // seeks before reading.
610 if (fseek(file, 0, SEEK_END) == -1)
611 return false;
612 size_t file_size = ftell(file);
613
614 if (file_size <= kFileHeaderSize)
615 return false;
616
617 uint8 header[kFileHeaderSize];
618 if (!ReadFromFile(file, 0, &header, kFileHeaderSize))
619 return false;
620
621 // Verify the signature.
622 int32 signature;
623 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature));
624 if (signature != kFileSignature)
625 return false;
626
627 // Verify the version is up-to-date. As with other read errors, a version
628 // mistmatch will trigger a rebuild of the database from history, which will
629 // have the effect of migrating the database.
630 int32 version;
631 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version));
632 if (version != kFileCurrentVersion)
633 return false; // Bad version.
634
635 // Read the table size and make sure it matches the file size.
636 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries));
637 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size)
638 return false; // Bad size.
639
640 // Read the used item count.
641 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count));
642 if (*used_count > *num_entries)
643 return false; // Bad used item count;
644
645 // Read the salt.
646 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH);
647
648 // This file looks OK from the header's perspective.
649 return true;
650 }
651
652 bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) {
653 if (!database_name_override_.empty()) {
654 // use this filename, the directory must exist
655 *filename = database_name_override_;
656 return true;
657 }
658
659 if (!profile_ || profile_->GetPath().empty())
660 return false;
661
662 FilePath profile_dir = profile_->GetPath();
663 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links"));
664 return true;
665 }
666
667 // Initializes the shared memory structure. The salt should already be filled
668 // in so that it can be written to the shared memory
669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) {
670 // The table is the size of the table followed by the entries.
671 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader);
672
673 // Create the shared memory object.
674 shared_memory_ = new base::SharedMemory();
675 if (!shared_memory_)
676 return false;
677
678 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) {
679 delete shared_memory_;
680 shared_memory_ = NULL;
681 return false;
682 }
683
684 if (init_to_empty) {
685 memset(shared_memory_->memory(), 0, alloc_size);
686 used_items_ = 0;
687 }
688 table_length_ = num_entries;
689
690 // Save the header for other processes to read.
691 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory());
692 header->length = table_length_;
693 memcpy(header->salt, salt_, LINK_SALT_LENGTH);
694
695 // Our table pointer is just the data immediately following the size.
696 hash_table_ = reinterpret_cast<Fingerprint*>(
697 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader));
698
699 return true;
700 }
701
702 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) {
703 base::SharedMemory *old_shared_memory = shared_memory_;
704 Fingerprint* old_hash_table = hash_table_;
705 int32 old_table_length = table_length_;
706 if (!CreateURLTable(num_entries, true)) {
707 // Try to put back the old state.
708 shared_memory_ = old_shared_memory;
709 hash_table_ = old_hash_table;
710 table_length_ = old_table_length;
711 return false;
712 }
713
714 #ifndef NDEBUG
715 DebugValidate();
716 #endif
717
718 return true;
719 }
720
721 void VisitedLinkMaster::FreeURLTable() {
722 if (shared_memory_) {
723 delete shared_memory_;
724 shared_memory_ = NULL;
725 }
726 if (!file_)
727 return;
728
729 BrowserThread::PostTask(
730 BrowserThread::FILE, FROM_HERE, new AsyncCloseHandle(file_));
731 }
732
733 bool VisitedLinkMaster::ResizeTableIfNecessary() {
734 DCHECK(table_length_ > 0) << "Must have a table";
735
736 // Load limits for good performance/space. We are pretty conservative about
737 // keeping the table not very full. This is because we use linear probing
738 // which increases the likelihood of clumps of entries which will reduce
739 // performance.
740 const float max_table_load = 0.5f; // Grow when we're > this full.
741 const float min_table_load = 0.2f; // Shrink when we're < this full.
742
743 float load = ComputeTableLoad();
744 if (load < max_table_load &&
745 (table_length_ <= static_cast<float>(kDefaultTableSize) ||
746 load > min_table_load))
747 return false;
748
749 // Table needs to grow or shrink.
750 int new_size = NewTableSizeForCount(used_items_);
751 DCHECK(new_size > used_items_);
752 DCHECK(load <= min_table_load || new_size > table_length_);
753 ResizeTable(new_size);
754 return true;
755 }
756
757 void VisitedLinkMaster::ResizeTable(int32 new_size) {
758 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_);
759 shared_memory_serial_++;
760
761 #ifndef NDEBUG
762 DebugValidate();
763 #endif
764
765 base::SharedMemory* old_shared_memory = shared_memory_;
766 Fingerprint* old_hash_table = hash_table_;
767 int32 old_table_length = table_length_;
768 if (!BeginReplaceURLTable(new_size))
769 return;
770
771 // Now we have two tables, our local copy which is the old one, and the new
772 // one loaded into this object where we need to copy the data.
773 for (int32 i = 0; i < old_table_length; i++) {
774 Fingerprint cur = old_hash_table[i];
775 if (cur)
776 AddFingerprint(cur, false);
777 }
778
779 // On error unmapping, just forget about it since we can't do anything
780 // else to release it.
781 delete old_shared_memory;
782
783 // Send an update notification to all child processes so they read the new
784 // table.
785 listener_->NewTable(shared_memory_);
786
787 #ifndef NDEBUG
788 DebugValidate();
789 #endif
790
791 // The new table needs to be written to disk.
792 WriteFullTable();
793 }
794
795 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const {
796 // These table sizes are selected to be the maximum prime number less than
797 // a "convenient" multiple of 1K.
798 static const int table_sizes[] = {
799 16381, // 16K = 16384 <- don't shrink below this table size
800 // (should be == default_table_size)
801 32767, // 32K = 32768
802 65521, // 64K = 65536
803 130051, // 128K = 131072
804 262127, // 256K = 262144
805 524269, // 512K = 524288
806 1048549, // 1M = 1048576
807 2097143, // 2M = 2097152
808 4194301, // 4M = 4194304
809 8388571, // 8M = 8388608
810 16777199, // 16M = 16777216
811 33554347}; // 32M = 33554432
812
813 // Try to leave the table 33% full.
814 int desired = item_count * 3;
815
816 // Find the closest prime.
817 for (size_t i = 0; i < arraysize(table_sizes); i ++) {
818 if (table_sizes[i] > desired)
819 return table_sizes[i];
820 }
821
822 // Growing very big, just approximate a "good" number, not growing as much
823 // as normal.
824 return item_count * 2 - 1;
825 }
826
827 // See the TableBuilder definition in the header file for how this works.
828 bool VisitedLinkMaster::RebuildTableFromHistory() {
829 DCHECK(!table_builder_);
830 if (table_builder_)
831 return false;
832
833 HistoryService* history_service = history_service_override_;
834 if (!history_service && profile_) {
835 history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS);
836 }
837
838 if (!history_service) {
839 DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't "
840 "obtain a HistoryService.";
841 return false;
842 }
843
844 // TODO(brettw) make sure we have reasonable salt!
845 table_builder_ = new TableBuilder(this, salt_);
846
847 // Make sure the table builder stays live during the call, even if the
848 // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread.
849 table_builder_->AddRef();
850 history_service->IterateURLs(table_builder_);
851 return true;
852 }
853
854 // See the TableBuilder declaration above for how this works.
855 void VisitedLinkMaster::OnTableRebuildComplete(
856 bool success,
857 const std::vector<Fingerprint>& fingerprints) {
858 if (success) {
859 // Replace the old table with a new blank one.
860 shared_memory_serial_++;
861
862 // We are responsible for freeing it AFTER it has been replaced if
863 // replacement succeeds.
864 base::SharedMemory* old_shared_memory = shared_memory_;
865
866 int new_table_size = NewTableSizeForCount(
867 static_cast<int>(fingerprints.size() + added_since_rebuild_.size()));
868 if (BeginReplaceURLTable(new_table_size)) {
869 // Free the old table.
870 delete old_shared_memory;
871
872 // Add the stored fingerprints to the hash table.
873 for (size_t i = 0; i < fingerprints.size(); i++)
874 AddFingerprint(fingerprints[i], false);
875
876 // Also add anything that was added while we were asynchronously
877 // generating the new table.
878 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin();
879 i != added_since_rebuild_.end(); ++i)
880 AddFingerprint(*i, false);
881 added_since_rebuild_.clear();
882
883 // Now handle deletions.
884 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_);
885 deleted_since_rebuild_.clear();
886
887 // Send an update notification to all child processes.
888 listener_->NewTable(shared_memory_);
889
890 // We shouldn't be writing the table from the main thread!
891 // http://code.google.com/p/chromium/issues/detail?id=24163
892 base::ThreadRestrictions::ScopedAllowIO allow_io;
893 WriteFullTable();
894 }
895 }
896 table_builder_ = NULL; // Will release our reference to the builder.
897
898 // Notify the unit test that the rebuild is complete (will be NULL in prod.)
899 if (rebuild_complete_task_.get()) {
900 rebuild_complete_task_->Run();
901 rebuild_complete_task_.reset(NULL);
902 }
903 }
904
905 void VisitedLinkMaster::WriteToFile(FILE* file,
906 off_t offset,
907 void* data,
908 int32 data_size) {
909 #ifndef NDEBUG
910 posted_asynchronous_operation_ = true;
911 #endif
912
913 BrowserThread::PostTask(
914 BrowserThread::FILE, FROM_HERE,
915 new AsyncWriter(file, offset, data, data_size));
916 }
917
918 void VisitedLinkMaster::WriteUsedItemCountToFile() {
919 if (!file_)
920 return; // See comment on the file_ variable for why this might happen.
921 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_));
922 }
923
924 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) {
925 if (!file_)
926 return; // See comment on the file_ variable for why this might happen.
927 if (last_hash < first_hash) {
928 // Handle wraparound at 0. This first write is first_hash->EOF
929 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
930 &hash_table_[first_hash],
931 (table_length_ - first_hash + 1) * sizeof(Fingerprint));
932
933 // Now do 0->last_lash.
934 WriteToFile(file_, kFileHeaderSize, hash_table_,
935 (last_hash + 1) * sizeof(Fingerprint));
936 } else {
937 // Normal case, just write the range.
938 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize,
939 &hash_table_[first_hash],
940 (last_hash - first_hash + 1) * sizeof(Fingerprint));
941 }
942 }
943
944 bool VisitedLinkMaster::ReadFromFile(FILE* file,
945 off_t offset,
946 void* data,
947 size_t data_size) {
948 #ifndef NDEBUG
949 // Since this function is synchronous, we require that no asynchronous
950 // operations could possibly be pending.
951 DCHECK(!posted_asynchronous_operation_);
952 #endif
953
954 fseek(file, offset, SEEK_SET);
955
956 size_t num_read = fread(data, 1, data_size, file);
957 return num_read == data_size;
958 }
959
960 // VisitedLinkTableBuilder ----------------------------------------------------
961
962 VisitedLinkMaster::TableBuilder::TableBuilder(
963 VisitedLinkMaster* master,
964 const uint8 salt[LINK_SALT_LENGTH])
965 : master_(master),
966 success_(true) {
967 fingerprints_.reserve(4096);
968 memcpy(salt_, salt, sizeof(salt));
969 }
970
971 // TODO(brettw): Do we want to try to cancel the request if this happens? It
972 // could delay shutdown if there are a lot of URLs.
973 void VisitedLinkMaster::TableBuilder::DisownMaster() {
974 master_ = NULL;
975 }
976
977 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) {
978 if (!url.is_empty()) {
979 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint(
980 url.spec().data(), url.spec().length(), salt_));
981 }
982 }
983
984 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) {
985 success_ = success;
986 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links";
987
988 // Marshal to the main thread to notify the VisitedLinkMaster that the
989 // rebuild is complete.
990 BrowserThread::PostTask(
991 BrowserThread::UI, FROM_HERE,
992 NewRunnableMethod(this, &TableBuilder::OnCompleteMainThread));
993 }
994
995 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() {
996 if (master_)
997 master_->OnTableRebuildComplete(success_, fingerprints_);
998
999 // WILL (generally) DELETE THIS! This balances the AddRef in
1000 // VisitedLinkMaster::RebuildTableFromHistory.
1001 Release();
1002 }
OLDNEW
« no previous file with comments | « chrome/browser/visitedlink_master.h ('k') | chrome/browser/visitedlink_perftest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698