Index: chrome/browser/visitedlink_master.cc |
=================================================================== |
--- chrome/browser/visitedlink_master.cc (revision 68044) |
+++ chrome/browser/visitedlink_master.cc (working copy) |
@@ -1,1002 +0,0 @@ |
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. |
-// Use of this source code is governed by a BSD-style license that can be |
-// found in the LICENSE file. |
- |
-#include "chrome/browser/visitedlink_master.h" |
- |
-#if defined(OS_WIN) |
-#include <windows.h> |
-#include <io.h> |
-#include <shlobj.h> |
-#endif // defined(OS_WIN) |
-#include <stdio.h> |
- |
-#include <algorithm> |
- |
-#include "base/file_util.h" |
-#include "base/logging.h" |
-#include "base/message_loop.h" |
-#include "base/path_service.h" |
-#include "base/process_util.h" |
-#include "base/rand_util.h" |
-#include "base/stack_container.h" |
-#include "base/string_util.h" |
-#include "base/thread_restrictions.h" |
-#include "chrome/browser/browser_process.h" |
-#include "chrome/browser/browser_thread.h" |
-#include "chrome/browser/history/history.h" |
-#include "chrome/browser/profile.h" |
- |
-using file_util::ScopedFILE; |
-using file_util::OpenFile; |
-using file_util::TruncateFile; |
- |
-const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; |
-const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; |
-const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; |
-const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; |
-const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; |
- |
-const int32 VisitedLinkMaster::kFileCurrentVersion = 2; |
- |
-// the signature at the beginning of the URL table = "VLnk" (visited links) |
-const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; |
-const size_t VisitedLinkMaster::kFileHeaderSize = |
- kFileHeaderSaltOffset + LINK_SALT_LENGTH; |
- |
-// This value should also be the same as the smallest size in the lookup |
-// table in NewTableSizeForCount (prime number). |
-const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; |
- |
-const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; |
- |
-namespace { |
- |
-// Fills the given salt structure with some quasi-random values |
-// It is not necessary to generate a cryptographically strong random string, |
-// only that it be reasonably different for different users. |
-void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { |
- DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; |
- uint64 randval = base::RandUint64(); |
- memcpy(salt, &randval, 8); |
-} |
- |
-// AsyncWriter ---------------------------------------------------------------- |
- |
-// This task executes on a background thread and executes a write. This |
-// prevents us from blocking the UI thread doing I/O. |
-class AsyncWriter : public Task { |
- public: |
- AsyncWriter(FILE* file, int32 offset, const void* data, size_t data_len) |
- : file_(file), |
- offset_(offset) { |
- data_->resize(data_len); |
- memcpy(&*data_->begin(), data, data_len); |
- } |
- |
- virtual void Run() { |
- WriteToFile(file_, offset_, |
- &*data_->begin(), static_cast<int32>(data_->size())); |
- } |
- |
- // Exposed as a static so it can be called directly from the Master to |
- // reduce the number of platform-specific I/O sites we have. Returns true if |
- // the write was complete. |
- static bool WriteToFile(FILE* file, |
- off_t offset, |
- const void* data, |
- size_t data_len) { |
- if (fseek(file, offset, SEEK_SET) != 0) |
- return false; // Don't write to an invalid part of the file. |
- |
- size_t num_written = fwrite(data, 1, data_len, file); |
- |
- // The write may not make it to the kernel (stdlib may buffer the write) |
- // until the next fseek/fclose call. If we crash, it's easy for our used |
- // item count to be out of sync with the number of hashes we write. |
- // Protect against this by calling fflush. |
- int ret = fflush(file); |
- DCHECK_EQ(0, ret); |
- return num_written == data_len; |
- } |
- |
- private: |
- // The data to write and where to write it. |
- FILE* file_; |
- int32 offset_; // Offset from the beginning of the file. |
- |
- // Most writes are just a single fingerprint, so we reserve that much in this |
- // object to avoid mallocs in that case. |
- StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_; |
- |
- DISALLOW_COPY_AND_ASSIGN(AsyncWriter); |
-}; |
- |
-// Used to asynchronously set the end of the file. This must be done on the |
-// same thread as the writing to keep things synchronized. |
-class AsyncSetEndOfFile : public Task { |
- public: |
- explicit AsyncSetEndOfFile(FILE* file) : file_(file) {} |
- |
- virtual void Run() { |
- TruncateFile(file_); |
- } |
- |
- private: |
- FILE* file_; |
- DISALLOW_COPY_AND_ASSIGN(AsyncSetEndOfFile); |
-}; |
- |
-// Used to asynchronously close a file. This must be done on the same thread as |
-// the writing to keep things synchronized. |
-class AsyncCloseHandle : public Task { |
- public: |
- explicit AsyncCloseHandle(FILE* file) : file_(file) {} |
- |
- virtual void Run() { |
- fclose(file_); |
- } |
- |
- private: |
- FILE* file_; |
- DISALLOW_COPY_AND_ASSIGN(AsyncCloseHandle); |
-}; |
- |
-} // namespace |
- |
-// TableBuilder --------------------------------------------------------------- |
- |
-// How rebuilding from history works |
-// --------------------------------- |
-// |
-// We mark that we're rebuilding from history by setting the table_builder_ |
-// member in VisitedLinkMaster to the TableBuilder we create. This builder |
-// will be called on the history thread by the history system for every URL |
-// in the database. |
-// |
-// The builder will store the fingerprints for those URLs, and then marshalls |
-// back to the main thread where the VisitedLinkMaster will be notified. The |
-// master then replaces its table with a new table containing the computed |
-// fingerprints. |
-// |
-// The builder must remain active while the history system is using it. |
-// Sometimes, the master will be deleted before the rebuild is complete, in |
-// which case it notifies the builder via DisownMaster(). The builder will |
-// delete itself once rebuilding is complete, and not execute any callback. |
-class VisitedLinkMaster::TableBuilder |
- : public HistoryService::URLEnumerator, |
- public base::RefCountedThreadSafe<TableBuilder> { |
- public: |
- TableBuilder(VisitedLinkMaster* master, |
- const uint8 salt[LINK_SALT_LENGTH]); |
- |
- // Called on the main thread when the master is being destroyed. This will |
- // prevent a crash when the query completes and the master is no longer |
- // around. We can not actually do anything but mark this fact, since the |
- // table will be being rebuilt simultaneously on the other thread. |
- void DisownMaster(); |
- |
- // HistoryService::URLEnumerator |
- virtual void OnURL(const GURL& url); |
- virtual void OnComplete(bool succeed); |
- |
- private: |
- friend class base::RefCountedThreadSafe<TableBuilder>; |
- |
- ~TableBuilder() {} |
- |
- // OnComplete mashals to this function on the main thread to do the |
- // notification. |
- void OnCompleteMainThread(); |
- |
- // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! |
- VisitedLinkMaster* master_; |
- |
- // Indicates whether the operation has failed or not. |
- bool success_; |
- |
- // Salt for this new table. |
- uint8 salt_[LINK_SALT_LENGTH]; |
- |
- // Stores the fingerprints we computed on the background thread. |
- VisitedLinkCommon::Fingerprints fingerprints_; |
-}; |
- |
-// VisitedLinkMaster ---------------------------------------------------------- |
- |
-VisitedLinkMaster::VisitedLinkMaster(Listener* listener, |
- Profile* profile) { |
- InitMembers(listener, profile); |
-} |
- |
-VisitedLinkMaster::VisitedLinkMaster(Listener* listener, |
- HistoryService* history_service, |
- bool suppress_rebuild, |
- const FilePath& filename, |
- int32 default_table_size) { |
- InitMembers(listener, NULL); |
- |
- database_name_override_ = filename; |
- table_size_override_ = default_table_size; |
- history_service_override_ = history_service; |
- suppress_rebuild_ = suppress_rebuild; |
-} |
- |
-VisitedLinkMaster::~VisitedLinkMaster() { |
- if (table_builder_.get()) { |
- // Prevent the table builder from calling us back now that we're being |
- // destroyed. Note that we DON'T delete the object, since the history |
- // system is still writing into it. When that is complete, the table |
- // builder will destroy itself when it finds we are gone. |
- table_builder_->DisownMaster(); |
- } |
- FreeURLTable(); |
-} |
- |
-void VisitedLinkMaster::InitMembers(Listener* listener, Profile* profile) { |
- DCHECK(listener); |
- |
- listener_ = listener; |
- file_ = NULL; |
- shared_memory_ = NULL; |
- shared_memory_serial_ = 0; |
- used_items_ = 0; |
- table_size_override_ = 0; |
- history_service_override_ = NULL; |
- suppress_rebuild_ = false; |
- profile_ = profile; |
- |
-#ifndef NDEBUG |
- posted_asynchronous_operation_ = false; |
-#endif |
-} |
- |
-bool VisitedLinkMaster::Init() { |
- // We probably shouldn't be loading this from the UI thread, |
- // but it does need to happen early on in startup. |
- // http://code.google.com/p/chromium/issues/detail?id=24163 |
- base::ThreadRestrictions::ScopedAllowIO allow_io; |
- if (!InitFromFile()) |
- return InitFromScratch(suppress_rebuild_); |
- return true; |
-} |
- |
-VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { |
- // Extra check that we are not off the record. This should not happen. |
- if (profile_ && profile_->IsOffTheRecord()) { |
- NOTREACHED(); |
- return null_hash_; |
- } |
- |
- if (!url.is_valid()) |
- return null_hash_; // Don't add invalid URLs. |
- |
- Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), |
- url.spec().size(), |
- salt_); |
- if (table_builder_) { |
- // If we have a pending delete for this fingerprint, cancel it. |
- std::set<Fingerprint>::iterator found = |
- deleted_since_rebuild_.find(fingerprint); |
- if (found != deleted_since_rebuild_.end()) |
- deleted_since_rebuild_.erase(found); |
- |
- // A rebuild is in progress, save this addition in the temporary list so |
- // it can be added once rebuild is complete. |
- added_since_rebuild_.insert(fingerprint); |
- } |
- |
- // If the table is "full", we don't add URLs and just drop them on the floor. |
- // This can happen if we get thousands of new URLs and something causes |
- // the table resizing to fail. This check prevents a hang in that case. Note |
- // that this is *not* the resize limit, this is just a sanity check. |
- if (used_items_ / 8 > table_length_ / 10) |
- return null_hash_; // Table is more than 80% full. |
- |
- return AddFingerprint(fingerprint, true); |
-} |
- |
-void VisitedLinkMaster::AddURL(const GURL& url) { |
- Hash index = TryToAddURL(url); |
- if (!table_builder_ && index != null_hash_) { |
- // Not rebuilding, so we want to keep the file on disk up-to-date. |
- WriteUsedItemCountToFile(); |
- WriteHashRangeToFile(index, index); |
- ResizeTableIfNecessary(); |
- } |
-} |
- |
-void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { |
- for (std::vector<GURL>::const_iterator i = url.begin(); |
- i != url.end(); ++i) { |
- Hash index = TryToAddURL(*i); |
- if (!table_builder_ && index != null_hash_) |
- ResizeTableIfNecessary(); |
- } |
- |
- // Keeps the file on disk up-to-date. |
- if (!table_builder_) |
- WriteFullTable(); |
-} |
- |
-void VisitedLinkMaster::DeleteAllURLs() { |
- // Any pending modifications are invalid. |
- added_since_rebuild_.clear(); |
- deleted_since_rebuild_.clear(); |
- |
- // Clear the hash table. |
- used_items_ = 0; |
- memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); |
- |
- // Resize it if it is now too empty. Resize may write the new table out for |
- // us, otherwise, schedule writing the new table to disk ourselves. |
- if (!ResizeTableIfNecessary()) |
- WriteFullTable(); |
- |
- listener_->Reset(); |
-} |
- |
-void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) { |
- typedef std::set<GURL>::const_iterator SetIterator; |
- |
- if (urls.empty()) |
- return; |
- |
- listener_->Reset(); |
- |
- if (table_builder_) { |
- // A rebuild is in progress, save this deletion in the temporary list so |
- // it can be added once rebuild is complete. |
- for (SetIterator i = urls.begin(); i != urls.end(); ++i) { |
- if (!i->is_valid()) |
- continue; |
- |
- Fingerprint fingerprint = |
- ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_); |
- deleted_since_rebuild_.insert(fingerprint); |
- |
- // If the URL was just added and now we're deleting it, it may be in the |
- // list of things added since the last rebuild. Delete it from that list. |
- std::set<Fingerprint>::iterator found = |
- added_since_rebuild_.find(fingerprint); |
- if (found != added_since_rebuild_.end()) |
- added_since_rebuild_.erase(found); |
- |
- // Delete the URLs from the in-memory table, but don't bother writing |
- // to disk since it will be replaced soon. |
- DeleteFingerprint(fingerprint, false); |
- } |
- return; |
- } |
- |
- // Compute the deleted URLs' fingerprints and delete them |
- std::set<Fingerprint> deleted_fingerprints; |
- for (SetIterator i = urls.begin(); i != urls.end(); ++i) { |
- if (!i->is_valid()) |
- continue; |
- deleted_fingerprints.insert( |
- ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_)); |
- } |
- DeleteFingerprintsFromCurrentTable(deleted_fingerprints); |
-} |
- |
-// See VisitedLinkCommon::IsVisited which should be in sync with this algorithm |
-VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( |
- Fingerprint fingerprint, |
- bool send_notifications) { |
- if (!hash_table_ || table_length_ == 0) { |
- NOTREACHED(); // Not initialized. |
- return null_hash_; |
- } |
- |
- Hash cur_hash = HashFingerprint(fingerprint); |
- Hash first_hash = cur_hash; |
- while (true) { |
- Fingerprint cur_fingerprint = FingerprintAt(cur_hash); |
- if (cur_fingerprint == fingerprint) |
- return null_hash_; // This fingerprint is already in there, do nothing. |
- |
- if (cur_fingerprint == null_fingerprint_) { |
- // End of probe sequence found, insert here. |
- hash_table_[cur_hash] = fingerprint; |
- used_items_++; |
- // If allowed, notify listener that a new visited link was added. |
- if (send_notifications) |
- listener_->Add(fingerprint); |
- return cur_hash; |
- } |
- |
- // Advance in the probe sequence. |
- cur_hash = IncrementHash(cur_hash); |
- if (cur_hash == first_hash) { |
- // This means that we've wrapped around and are about to go into an |
- // infinite loop. Something was wrong with the hashtable resizing |
- // logic, so stop here. |
- NOTREACHED(); |
- return null_hash_; |
- } |
- } |
-} |
- |
-void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( |
- const std::set<Fingerprint>& fingerprints) { |
- bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); |
- |
- // Delete the URLs from the table. |
- for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); |
- i != fingerprints.end(); ++i) |
- DeleteFingerprint(*i, !bulk_write); |
- |
- // These deleted fingerprints may make us shrink the table. |
- if (ResizeTableIfNecessary()) |
- return; // The resize function wrote the new table to disk for us. |
- |
- // Nobody wrote this out for us, write the full file to disk. |
- if (bulk_write) |
- WriteFullTable(); |
-} |
- |
-bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, |
- bool update_file) { |
- if (!hash_table_ || table_length_ == 0) { |
- NOTREACHED(); // Not initialized. |
- return false; |
- } |
- if (!IsVisited(fingerprint)) |
- return false; // Not in the database to delete. |
- |
- // First update the header used count. |
- used_items_--; |
- if (update_file) |
- WriteUsedItemCountToFile(); |
- |
- Hash deleted_hash = HashFingerprint(fingerprint); |
- |
- // Find the range of "stuff" in the hash table that is adjacent to this |
- // fingerprint. These are things that could be affected by the change in |
- // the hash table. Since we use linear probing, anything after the deleted |
- // item up until an empty item could be affected. |
- Hash end_range = deleted_hash; |
- while (true) { |
- Hash next_hash = IncrementHash(end_range); |
- if (next_hash == deleted_hash) |
- break; // We wrapped around and the whole table is full. |
- if (!hash_table_[next_hash]) |
- break; // Found the last spot. |
- end_range = next_hash; |
- } |
- |
- // We could get all fancy and move the affected fingerprints around, but |
- // instead we just remove them all and re-add them (minus our deleted one). |
- // This will mean there's a small window of time where the affected links |
- // won't be marked visited. |
- StackVector<Fingerprint, 32> shuffled_fingerprints; |
- Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. |
- for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { |
- if (hash_table_[i] != fingerprint) { |
- // Don't save the one we're deleting! |
- shuffled_fingerprints->push_back(hash_table_[i]); |
- |
- // This will balance the increment of this value in AddFingerprint below |
- // so there is no net change. |
- used_items_--; |
- } |
- hash_table_[i] = null_fingerprint_; |
- } |
- |
- if (!shuffled_fingerprints->empty()) { |
- // Need to add the new items back. |
- for (size_t i = 0; i < shuffled_fingerprints->size(); i++) |
- AddFingerprint(shuffled_fingerprints[i], false); |
- } |
- |
- // Write the affected range to disk [deleted_hash, end_range]. |
- if (update_file) |
- WriteHashRangeToFile(deleted_hash, end_range); |
- |
- return true; |
-} |
- |
-bool VisitedLinkMaster::WriteFullTable() { |
- // This function can get called when the file is open, for example, when we |
- // resize the table. We must handle this case and not try to reopen the file, |
- // since there may be write operations pending on the file I/O thread. |
- // |
- // Note that once we start writing, we do not delete on error. This means |
- // there can be a partial file, but the short file will be detected next time |
- // we start, and will be replaced. |
- // |
- // This might possibly get corrupted if we crash in the middle of writing. |
- // We should pick up the most common types of these failures when we notice |
- // that the file size is different when we load it back in, and then we will |
- // regenerate the table. |
- if (!file_) { |
- FilePath filename; |
- GetDatabaseFileName(&filename); |
- file_ = OpenFile(filename, "wb+"); |
- if (!file_) { |
- DLOG(ERROR) << "Failed to open file " << filename.value(); |
- return false; |
- } |
- } |
- |
- // Write the new header. |
- int32 header[4]; |
- header[0] = kFileSignature; |
- header[1] = kFileCurrentVersion; |
- header[2] = table_length_; |
- header[3] = used_items_; |
- WriteToFile(file_, 0, header, sizeof(header)); |
- WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); |
- |
- // Write the hash data. |
- WriteToFile(file_, kFileHeaderSize, |
- hash_table_, table_length_ * sizeof(Fingerprint)); |
- |
- // The hash table may have shrunk, so make sure this is the end. |
- BrowserThread::PostTask( |
- BrowserThread::FILE, FROM_HERE, new AsyncSetEndOfFile(file_)); |
- return true; |
-} |
- |
-bool VisitedLinkMaster::InitFromFile() { |
- DCHECK(file_ == NULL); |
- |
- FilePath filename; |
- GetDatabaseFileName(&filename); |
- ScopedFILE file_closer(OpenFile(filename, "rb+")); |
- if (!file_closer.get()) |
- return false; |
- |
- int32 num_entries, used_count; |
- if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) |
- return false; // Header isn't valid. |
- |
- // Allocate and read the table. |
- if (!CreateURLTable(num_entries, false)) |
- return false; |
- if (!ReadFromFile(file_closer.get(), kFileHeaderSize, |
- hash_table_, num_entries * sizeof(Fingerprint))) { |
- FreeURLTable(); |
- return false; |
- } |
- used_items_ = used_count; |
- |
-#ifndef NDEBUG |
- DebugValidate(); |
-#endif |
- |
- file_ = file_closer.release(); |
- return true; |
-} |
- |
-bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { |
- int32 table_size = kDefaultTableSize; |
- if (table_size_override_) |
- table_size = table_size_override_; |
- |
- // The salt must be generated before the table so that it can be copied to |
- // the shared memory. |
- GenerateSalt(salt_); |
- if (!CreateURLTable(table_size, true)) |
- return false; |
- |
-#ifndef NDEBUG |
- DebugValidate(); |
-#endif |
- |
- if (suppress_rebuild) { |
- // When we disallow rebuilds (normally just unit tests), just use the |
- // current empty table. |
- return WriteFullTable(); |
- } |
- |
- // This will build the table from history. On the first run, history will |
- // be empty, so this will be correct. This will also write the new table |
- // to disk. We don't want to save explicitly here, since the rebuild may |
- // not complete, leaving us with an empty but valid visited link database. |
- // In the future, we won't know we need to try rebuilding again. |
- return RebuildTableFromHistory(); |
-} |
- |
-bool VisitedLinkMaster::ReadFileHeader(FILE* file, |
- int32* num_entries, |
- int32* used_count, |
- uint8 salt[LINK_SALT_LENGTH]) { |
- // Get file size. |
- // Note that there is no need to seek back to the original location in the |
- // file since ReadFromFile() [which is the next call accessing the file] |
- // seeks before reading. |
- if (fseek(file, 0, SEEK_END) == -1) |
- return false; |
- size_t file_size = ftell(file); |
- |
- if (file_size <= kFileHeaderSize) |
- return false; |
- |
- uint8 header[kFileHeaderSize]; |
- if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) |
- return false; |
- |
- // Verify the signature. |
- int32 signature; |
- memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); |
- if (signature != kFileSignature) |
- return false; |
- |
- // Verify the version is up-to-date. As with other read errors, a version |
- // mistmatch will trigger a rebuild of the database from history, which will |
- // have the effect of migrating the database. |
- int32 version; |
- memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); |
- if (version != kFileCurrentVersion) |
- return false; // Bad version. |
- |
- // Read the table size and make sure it matches the file size. |
- memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); |
- if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) |
- return false; // Bad size. |
- |
- // Read the used item count. |
- memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); |
- if (*used_count > *num_entries) |
- return false; // Bad used item count; |
- |
- // Read the salt. |
- memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); |
- |
- // This file looks OK from the header's perspective. |
- return true; |
-} |
- |
-bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) { |
- if (!database_name_override_.empty()) { |
- // use this filename, the directory must exist |
- *filename = database_name_override_; |
- return true; |
- } |
- |
- if (!profile_ || profile_->GetPath().empty()) |
- return false; |
- |
- FilePath profile_dir = profile_->GetPath(); |
- *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); |
- return true; |
-} |
- |
-// Initializes the shared memory structure. The salt should already be filled |
-// in so that it can be written to the shared memory |
-bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { |
- // The table is the size of the table followed by the entries. |
- uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); |
- |
- // Create the shared memory object. |
- shared_memory_ = new base::SharedMemory(); |
- if (!shared_memory_) |
- return false; |
- |
- if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { |
- delete shared_memory_; |
- shared_memory_ = NULL; |
- return false; |
- } |
- |
- if (init_to_empty) { |
- memset(shared_memory_->memory(), 0, alloc_size); |
- used_items_ = 0; |
- } |
- table_length_ = num_entries; |
- |
- // Save the header for other processes to read. |
- SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); |
- header->length = table_length_; |
- memcpy(header->salt, salt_, LINK_SALT_LENGTH); |
- |
- // Our table pointer is just the data immediately following the size. |
- hash_table_ = reinterpret_cast<Fingerprint*>( |
- static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); |
- |
- return true; |
-} |
- |
-bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { |
- base::SharedMemory *old_shared_memory = shared_memory_; |
- Fingerprint* old_hash_table = hash_table_; |
- int32 old_table_length = table_length_; |
- if (!CreateURLTable(num_entries, true)) { |
- // Try to put back the old state. |
- shared_memory_ = old_shared_memory; |
- hash_table_ = old_hash_table; |
- table_length_ = old_table_length; |
- return false; |
- } |
- |
-#ifndef NDEBUG |
- DebugValidate(); |
-#endif |
- |
- return true; |
-} |
- |
-void VisitedLinkMaster::FreeURLTable() { |
- if (shared_memory_) { |
- delete shared_memory_; |
- shared_memory_ = NULL; |
- } |
- if (!file_) |
- return; |
- |
- BrowserThread::PostTask( |
- BrowserThread::FILE, FROM_HERE, new AsyncCloseHandle(file_)); |
-} |
- |
-bool VisitedLinkMaster::ResizeTableIfNecessary() { |
- DCHECK(table_length_ > 0) << "Must have a table"; |
- |
- // Load limits for good performance/space. We are pretty conservative about |
- // keeping the table not very full. This is because we use linear probing |
- // which increases the likelihood of clumps of entries which will reduce |
- // performance. |
- const float max_table_load = 0.5f; // Grow when we're > this full. |
- const float min_table_load = 0.2f; // Shrink when we're < this full. |
- |
- float load = ComputeTableLoad(); |
- if (load < max_table_load && |
- (table_length_ <= static_cast<float>(kDefaultTableSize) || |
- load > min_table_load)) |
- return false; |
- |
- // Table needs to grow or shrink. |
- int new_size = NewTableSizeForCount(used_items_); |
- DCHECK(new_size > used_items_); |
- DCHECK(load <= min_table_load || new_size > table_length_); |
- ResizeTable(new_size); |
- return true; |
-} |
- |
-void VisitedLinkMaster::ResizeTable(int32 new_size) { |
- DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); |
- shared_memory_serial_++; |
- |
-#ifndef NDEBUG |
- DebugValidate(); |
-#endif |
- |
- base::SharedMemory* old_shared_memory = shared_memory_; |
- Fingerprint* old_hash_table = hash_table_; |
- int32 old_table_length = table_length_; |
- if (!BeginReplaceURLTable(new_size)) |
- return; |
- |
- // Now we have two tables, our local copy which is the old one, and the new |
- // one loaded into this object where we need to copy the data. |
- for (int32 i = 0; i < old_table_length; i++) { |
- Fingerprint cur = old_hash_table[i]; |
- if (cur) |
- AddFingerprint(cur, false); |
- } |
- |
- // On error unmapping, just forget about it since we can't do anything |
- // else to release it. |
- delete old_shared_memory; |
- |
- // Send an update notification to all child processes so they read the new |
- // table. |
- listener_->NewTable(shared_memory_); |
- |
-#ifndef NDEBUG |
- DebugValidate(); |
-#endif |
- |
- // The new table needs to be written to disk. |
- WriteFullTable(); |
-} |
- |
-uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { |
- // These table sizes are selected to be the maximum prime number less than |
- // a "convenient" multiple of 1K. |
- static const int table_sizes[] = { |
- 16381, // 16K = 16384 <- don't shrink below this table size |
- // (should be == default_table_size) |
- 32767, // 32K = 32768 |
- 65521, // 64K = 65536 |
- 130051, // 128K = 131072 |
- 262127, // 256K = 262144 |
- 524269, // 512K = 524288 |
- 1048549, // 1M = 1048576 |
- 2097143, // 2M = 2097152 |
- 4194301, // 4M = 4194304 |
- 8388571, // 8M = 8388608 |
- 16777199, // 16M = 16777216 |
- 33554347}; // 32M = 33554432 |
- |
- // Try to leave the table 33% full. |
- int desired = item_count * 3; |
- |
- // Find the closest prime. |
- for (size_t i = 0; i < arraysize(table_sizes); i ++) { |
- if (table_sizes[i] > desired) |
- return table_sizes[i]; |
- } |
- |
- // Growing very big, just approximate a "good" number, not growing as much |
- // as normal. |
- return item_count * 2 - 1; |
-} |
- |
-// See the TableBuilder definition in the header file for how this works. |
-bool VisitedLinkMaster::RebuildTableFromHistory() { |
- DCHECK(!table_builder_); |
- if (table_builder_) |
- return false; |
- |
- HistoryService* history_service = history_service_override_; |
- if (!history_service && profile_) { |
- history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); |
- } |
- |
- if (!history_service) { |
- DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't " |
- "obtain a HistoryService."; |
- return false; |
- } |
- |
- // TODO(brettw) make sure we have reasonable salt! |
- table_builder_ = new TableBuilder(this, salt_); |
- |
- // Make sure the table builder stays live during the call, even if the |
- // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread. |
- table_builder_->AddRef(); |
- history_service->IterateURLs(table_builder_); |
- return true; |
-} |
- |
-// See the TableBuilder declaration above for how this works. |
-void VisitedLinkMaster::OnTableRebuildComplete( |
- bool success, |
- const std::vector<Fingerprint>& fingerprints) { |
- if (success) { |
- // Replace the old table with a new blank one. |
- shared_memory_serial_++; |
- |
- // We are responsible for freeing it AFTER it has been replaced if |
- // replacement succeeds. |
- base::SharedMemory* old_shared_memory = shared_memory_; |
- |
- int new_table_size = NewTableSizeForCount( |
- static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); |
- if (BeginReplaceURLTable(new_table_size)) { |
- // Free the old table. |
- delete old_shared_memory; |
- |
- // Add the stored fingerprints to the hash table. |
- for (size_t i = 0; i < fingerprints.size(); i++) |
- AddFingerprint(fingerprints[i], false); |
- |
- // Also add anything that was added while we were asynchronously |
- // generating the new table. |
- for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); |
- i != added_since_rebuild_.end(); ++i) |
- AddFingerprint(*i, false); |
- added_since_rebuild_.clear(); |
- |
- // Now handle deletions. |
- DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); |
- deleted_since_rebuild_.clear(); |
- |
- // Send an update notification to all child processes. |
- listener_->NewTable(shared_memory_); |
- |
- // We shouldn't be writing the table from the main thread! |
- // http://code.google.com/p/chromium/issues/detail?id=24163 |
- base::ThreadRestrictions::ScopedAllowIO allow_io; |
- WriteFullTable(); |
- } |
- } |
- table_builder_ = NULL; // Will release our reference to the builder. |
- |
- // Notify the unit test that the rebuild is complete (will be NULL in prod.) |
- if (rebuild_complete_task_.get()) { |
- rebuild_complete_task_->Run(); |
- rebuild_complete_task_.reset(NULL); |
- } |
-} |
- |
-void VisitedLinkMaster::WriteToFile(FILE* file, |
- off_t offset, |
- void* data, |
- int32 data_size) { |
-#ifndef NDEBUG |
- posted_asynchronous_operation_ = true; |
-#endif |
- |
- BrowserThread::PostTask( |
- BrowserThread::FILE, FROM_HERE, |
- new AsyncWriter(file, offset, data, data_size)); |
-} |
- |
-void VisitedLinkMaster::WriteUsedItemCountToFile() { |
- if (!file_) |
- return; // See comment on the file_ variable for why this might happen. |
- WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); |
-} |
- |
-void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { |
- if (!file_) |
- return; // See comment on the file_ variable for why this might happen. |
- if (last_hash < first_hash) { |
- // Handle wraparound at 0. This first write is first_hash->EOF |
- WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, |
- &hash_table_[first_hash], |
- (table_length_ - first_hash + 1) * sizeof(Fingerprint)); |
- |
- // Now do 0->last_lash. |
- WriteToFile(file_, kFileHeaderSize, hash_table_, |
- (last_hash + 1) * sizeof(Fingerprint)); |
- } else { |
- // Normal case, just write the range. |
- WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, |
- &hash_table_[first_hash], |
- (last_hash - first_hash + 1) * sizeof(Fingerprint)); |
- } |
-} |
- |
-bool VisitedLinkMaster::ReadFromFile(FILE* file, |
- off_t offset, |
- void* data, |
- size_t data_size) { |
-#ifndef NDEBUG |
- // Since this function is synchronous, we require that no asynchronous |
- // operations could possibly be pending. |
- DCHECK(!posted_asynchronous_operation_); |
-#endif |
- |
- fseek(file, offset, SEEK_SET); |
- |
- size_t num_read = fread(data, 1, data_size, file); |
- return num_read == data_size; |
-} |
- |
-// VisitedLinkTableBuilder ---------------------------------------------------- |
- |
-VisitedLinkMaster::TableBuilder::TableBuilder( |
- VisitedLinkMaster* master, |
- const uint8 salt[LINK_SALT_LENGTH]) |
- : master_(master), |
- success_(true) { |
- fingerprints_.reserve(4096); |
- memcpy(salt_, salt, sizeof(salt)); |
-} |
- |
-// TODO(brettw): Do we want to try to cancel the request if this happens? It |
-// could delay shutdown if there are a lot of URLs. |
-void VisitedLinkMaster::TableBuilder::DisownMaster() { |
- master_ = NULL; |
-} |
- |
-void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { |
- if (!url.is_empty()) { |
- fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( |
- url.spec().data(), url.spec().length(), salt_)); |
- } |
-} |
- |
-void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { |
- success_ = success; |
- DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; |
- |
- // Marshal to the main thread to notify the VisitedLinkMaster that the |
- // rebuild is complete. |
- BrowserThread::PostTask( |
- BrowserThread::UI, FROM_HERE, |
- NewRunnableMethod(this, &TableBuilder::OnCompleteMainThread)); |
-} |
- |
-void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { |
- if (master_) |
- master_->OnTableRebuildComplete(success_, fingerprints_); |
- |
- // WILL (generally) DELETE THIS! This balances the AddRef in |
- // VisitedLinkMaster::RebuildTableFromHistory. |
- Release(); |
-} |