| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/visitedlink/visitedlink_master.h" | |
| 6 | |
| 7 #if defined(OS_WIN) | |
| 8 #include <windows.h> | |
| 9 #include <io.h> | |
| 10 #include <shlobj.h> | |
| 11 #endif // defined(OS_WIN) | |
| 12 #include <stdio.h> | |
| 13 | |
| 14 #include <algorithm> | |
| 15 | |
| 16 #include "base/bind.h" | |
| 17 #include "base/bind_helpers.h" | |
| 18 #include "base/containers/stack_container.h" | |
| 19 #include "base/file_util.h" | |
| 20 #include "base/logging.h" | |
| 21 #include "base/message_loop.h" | |
| 22 #include "base/path_service.h" | |
| 23 #include "base/process_util.h" | |
| 24 #include "base/rand_util.h" | |
| 25 #include "base/string_util.h" | |
| 26 #include "base/threading/thread_restrictions.h" | |
| 27 #include "chrome/browser/visitedlink/visitedlink_delegate.h" | |
| 28 #include "chrome/browser/visitedlink/visitedlink_event_listener.h" | |
| 29 #include "content/public/browser/browser_context.h" | |
| 30 #include "content/public/browser/browser_thread.h" | |
| 31 #include "googleurl/src/gurl.h" | |
| 32 | |
| 33 using content::BrowserThread; | |
| 34 using file_util::ScopedFILE; | |
| 35 using file_util::OpenFile; | |
| 36 using file_util::TruncateFile; | |
| 37 | |
| 38 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; | |
| 39 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; | |
| 40 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; | |
| 41 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; | |
| 42 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; | |
| 43 | |
| 44 const int32 VisitedLinkMaster::kFileCurrentVersion = 3; | |
| 45 | |
| 46 // the signature at the beginning of the URL table = "VLnk" (visited links) | |
| 47 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; | |
| 48 const size_t VisitedLinkMaster::kFileHeaderSize = | |
| 49 kFileHeaderSaltOffset + LINK_SALT_LENGTH; | |
| 50 | |
| 51 // This value should also be the same as the smallest size in the lookup | |
| 52 // table in NewTableSizeForCount (prime number). | |
| 53 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; | |
| 54 | |
| 55 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; | |
| 56 | |
| 57 namespace { | |
| 58 | |
| 59 // Fills the given salt structure with some quasi-random values | |
| 60 // It is not necessary to generate a cryptographically strong random string, | |
| 61 // only that it be reasonably different for different users. | |
| 62 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { | |
| 63 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; | |
| 64 uint64 randval = base::RandUint64(); | |
| 65 memcpy(salt, &randval, 8); | |
| 66 } | |
| 67 | |
| 68 // Opens file on a background thread to not block UI thread. | |
| 69 void AsyncOpen(FILE** file, const FilePath& filename) { | |
| 70 *file = OpenFile(filename, "wb+"); | |
| 71 DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value(); | |
| 72 } | |
| 73 | |
| 74 // Returns true if the write was complete. | |
| 75 static bool WriteToFile(FILE* file, | |
| 76 off_t offset, | |
| 77 const void* data, | |
| 78 size_t data_len) { | |
| 79 if (fseek(file, offset, SEEK_SET) != 0) | |
| 80 return false; // Don't write to an invalid part of the file. | |
| 81 | |
| 82 size_t num_written = fwrite(data, 1, data_len, file); | |
| 83 | |
| 84 // The write may not make it to the kernel (stdlib may buffer the write) | |
| 85 // until the next fseek/fclose call. If we crash, it's easy for our used | |
| 86 // item count to be out of sync with the number of hashes we write. | |
| 87 // Protect against this by calling fflush. | |
| 88 int ret = fflush(file); | |
| 89 DCHECK_EQ(0, ret); | |
| 90 return num_written == data_len; | |
| 91 } | |
| 92 | |
| 93 // This task executes on a background thread and executes a write. This | |
| 94 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE | |
| 95 // is used because file may still not be opened by the time of scheduling | |
| 96 // the task for execution. | |
| 97 void AsyncWrite(FILE** file, int32 offset, const std::string& data) { | |
| 98 if (*file) | |
| 99 WriteToFile(*file, offset, data.data(), data.size()); | |
| 100 } | |
| 101 | |
| 102 // Truncates the file to the current position asynchronously on a background | |
| 103 // thread. Double pointer to FILE is used because file may still not be opened | |
| 104 // by the time of scheduling the task for execution. | |
| 105 void AsyncTruncate(FILE** file) { | |
| 106 if (*file) | |
| 107 base::IgnoreResult(TruncateFile(*file)); | |
| 108 } | |
| 109 | |
| 110 // Closes the file on a background thread and releases memory used for storage | |
| 111 // of FILE* value. Double pointer to FILE is used because file may still not | |
| 112 // be opened by the time of scheduling the task for execution. | |
| 113 void AsyncClose(FILE** file) { | |
| 114 if (*file) | |
| 115 base::IgnoreResult(fclose(*file)); | |
| 116 free(file); | |
| 117 } | |
| 118 | |
| 119 } // namespace | |
| 120 | |
| 121 // TableBuilder --------------------------------------------------------------- | |
| 122 | |
| 123 // How rebuilding from history works | |
| 124 // --------------------------------- | |
| 125 // | |
| 126 // We mark that we're rebuilding from history by setting the table_builder_ | |
| 127 // member in VisitedLinkMaster to the TableBuilder we create. This builder | |
| 128 // will be called on the history thread by the history system for every URL | |
| 129 // in the database. | |
| 130 // | |
| 131 // The builder will store the fingerprints for those URLs, and then marshalls | |
| 132 // back to the main thread where the VisitedLinkMaster will be notified. The | |
| 133 // master then replaces its table with a new table containing the computed | |
| 134 // fingerprints. | |
| 135 // | |
| 136 // The builder must remain active while the history system is using it. | |
| 137 // Sometimes, the master will be deleted before the rebuild is complete, in | |
| 138 // which case it notifies the builder via DisownMaster(). The builder will | |
| 139 // delete itself once rebuilding is complete, and not execute any callback. | |
| 140 class VisitedLinkMaster::TableBuilder | |
| 141 : public VisitedLinkDelegate::URLEnumerator { | |
| 142 public: | |
| 143 TableBuilder(VisitedLinkMaster* master, | |
| 144 const uint8 salt[LINK_SALT_LENGTH]); | |
| 145 | |
| 146 // Called on the main thread when the master is being destroyed. This will | |
| 147 // prevent a crash when the query completes and the master is no longer | |
| 148 // around. We can not actually do anything but mark this fact, since the | |
| 149 // table will be being rebuilt simultaneously on the other thread. | |
| 150 void DisownMaster(); | |
| 151 | |
| 152 // VisitedLinkDelegate::URLEnumerator | |
| 153 virtual void OnURL(const GURL& url); | |
| 154 virtual void OnComplete(bool succeed); | |
| 155 | |
| 156 private: | |
| 157 virtual ~TableBuilder() {} | |
| 158 | |
| 159 // OnComplete mashals to this function on the main thread to do the | |
| 160 // notification. | |
| 161 void OnCompleteMainThread(); | |
| 162 | |
| 163 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! | |
| 164 VisitedLinkMaster* master_; | |
| 165 | |
| 166 // Indicates whether the operation has failed or not. | |
| 167 bool success_; | |
| 168 | |
| 169 // Salt for this new table. | |
| 170 uint8 salt_[LINK_SALT_LENGTH]; | |
| 171 | |
| 172 // Stores the fingerprints we computed on the background thread. | |
| 173 VisitedLinkCommon::Fingerprints fingerprints_; | |
| 174 | |
| 175 DISALLOW_COPY_AND_ASSIGN(TableBuilder); | |
| 176 }; | |
| 177 | |
| 178 // VisitedLinkMaster ---------------------------------------------------------- | |
| 179 | |
| 180 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context, | |
| 181 VisitedLinkDelegate* delegate) | |
| 182 : browser_context_(browser_context), | |
| 183 delegate_(delegate), | |
| 184 listener_(new VisitedLinkEventListener( | |
| 185 ALLOW_THIS_IN_INITIALIZER_LIST(this), browser_context)) { | |
| 186 InitMembers(); | |
| 187 } | |
| 188 | |
| 189 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, | |
| 190 VisitedLinkDelegate* delegate, | |
| 191 bool suppress_rebuild, | |
| 192 const FilePath& filename, | |
| 193 int32 default_table_size) | |
| 194 : browser_context_(NULL), | |
| 195 delegate_(delegate) { | |
| 196 listener_.reset(listener); | |
| 197 DCHECK(listener_.get()); | |
| 198 InitMembers(); | |
| 199 | |
| 200 database_name_override_ = filename; | |
| 201 table_size_override_ = default_table_size; | |
| 202 suppress_rebuild_ = suppress_rebuild; | |
| 203 } | |
| 204 | |
| 205 VisitedLinkMaster::~VisitedLinkMaster() { | |
| 206 if (table_builder_.get()) { | |
| 207 // Prevent the table builder from calling us back now that we're being | |
| 208 // destroyed. Note that we DON'T delete the object, since the history | |
| 209 // system is still writing into it. When that is complete, the table | |
| 210 // builder will destroy itself when it finds we are gone. | |
| 211 table_builder_->DisownMaster(); | |
| 212 } | |
| 213 FreeURLTable(); | |
| 214 // FreeURLTable() will schedule closing of the file and deletion of |file_|. | |
| 215 // So nothing should be done here. | |
| 216 } | |
| 217 | |
| 218 void VisitedLinkMaster::InitMembers() { | |
| 219 file_ = NULL; | |
| 220 shared_memory_ = NULL; | |
| 221 shared_memory_serial_ = 0; | |
| 222 used_items_ = 0; | |
| 223 table_size_override_ = 0; | |
| 224 suppress_rebuild_ = false; | |
| 225 sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken(); | |
| 226 | |
| 227 #ifndef NDEBUG | |
| 228 posted_asynchronous_operation_ = false; | |
| 229 #endif | |
| 230 } | |
| 231 | |
| 232 bool VisitedLinkMaster::Init() { | |
| 233 // We probably shouldn't be loading this from the UI thread, | |
| 234 // but it does need to happen early on in startup. | |
| 235 // http://code.google.com/p/chromium/issues/detail?id=24163 | |
| 236 base::ThreadRestrictions::ScopedAllowIO allow_io; | |
| 237 if (!InitFromFile()) | |
| 238 return InitFromScratch(suppress_rebuild_); | |
| 239 return true; | |
| 240 } | |
| 241 | |
| 242 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { | |
| 243 // Extra check that we are not incognito. This should not happen. | |
| 244 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is | |
| 245 // removed from BrowserContext. | |
| 246 if (browser_context_ && browser_context_->IsOffTheRecord()) { | |
| 247 NOTREACHED(); | |
| 248 return null_hash_; | |
| 249 } | |
| 250 | |
| 251 if (!url.is_valid()) | |
| 252 return null_hash_; // Don't add invalid URLs. | |
| 253 | |
| 254 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), | |
| 255 url.spec().size(), | |
| 256 salt_); | |
| 257 if (table_builder_) { | |
| 258 // If we have a pending delete for this fingerprint, cancel it. | |
| 259 std::set<Fingerprint>::iterator found = | |
| 260 deleted_since_rebuild_.find(fingerprint); | |
| 261 if (found != deleted_since_rebuild_.end()) | |
| 262 deleted_since_rebuild_.erase(found); | |
| 263 | |
| 264 // A rebuild is in progress, save this addition in the temporary list so | |
| 265 // it can be added once rebuild is complete. | |
| 266 added_since_rebuild_.insert(fingerprint); | |
| 267 } | |
| 268 | |
| 269 // If the table is "full", we don't add URLs and just drop them on the floor. | |
| 270 // This can happen if we get thousands of new URLs and something causes | |
| 271 // the table resizing to fail. This check prevents a hang in that case. Note | |
| 272 // that this is *not* the resize limit, this is just a sanity check. | |
| 273 if (used_items_ / 8 > table_length_ / 10) | |
| 274 return null_hash_; // Table is more than 80% full. | |
| 275 | |
| 276 return AddFingerprint(fingerprint, true); | |
| 277 } | |
| 278 | |
| 279 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here, | |
| 280 const base::Closure& task) { | |
| 281 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_, | |
| 282 from_here, task); | |
| 283 } | |
| 284 | |
| 285 void VisitedLinkMaster::AddURL(const GURL& url) { | |
| 286 Hash index = TryToAddURL(url); | |
| 287 if (!table_builder_ && index != null_hash_) { | |
| 288 // Not rebuilding, so we want to keep the file on disk up-to-date. | |
| 289 WriteUsedItemCountToFile(); | |
| 290 WriteHashRangeToFile(index, index); | |
| 291 ResizeTableIfNecessary(); | |
| 292 } | |
| 293 } | |
| 294 | |
| 295 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { | |
| 296 for (std::vector<GURL>::const_iterator i = url.begin(); | |
| 297 i != url.end(); ++i) { | |
| 298 Hash index = TryToAddURL(*i); | |
| 299 if (!table_builder_ && index != null_hash_) | |
| 300 ResizeTableIfNecessary(); | |
| 301 } | |
| 302 | |
| 303 // Keeps the file on disk up-to-date. | |
| 304 if (!table_builder_) | |
| 305 WriteFullTable(); | |
| 306 } | |
| 307 | |
| 308 void VisitedLinkMaster::DeleteAllURLs() { | |
| 309 // Any pending modifications are invalid. | |
| 310 added_since_rebuild_.clear(); | |
| 311 deleted_since_rebuild_.clear(); | |
| 312 | |
| 313 // Clear the hash table. | |
| 314 used_items_ = 0; | |
| 315 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); | |
| 316 | |
| 317 // Resize it if it is now too empty. Resize may write the new table out for | |
| 318 // us, otherwise, schedule writing the new table to disk ourselves. | |
| 319 if (!ResizeTableIfNecessary()) | |
| 320 WriteFullTable(); | |
| 321 | |
| 322 listener_->Reset(); | |
| 323 } | |
| 324 | |
| 325 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() { | |
| 326 return delegate_; | |
| 327 } | |
| 328 | |
| 329 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) { | |
| 330 if (!urls->HasNextURL()) | |
| 331 return; | |
| 332 | |
| 333 listener_->Reset(); | |
| 334 | |
| 335 if (table_builder_) { | |
| 336 // A rebuild is in progress, save this deletion in the temporary list so | |
| 337 // it can be added once rebuild is complete. | |
| 338 while (urls->HasNextURL()) { | |
| 339 const GURL& url(urls->NextURL()); | |
| 340 if (!url.is_valid()) | |
| 341 continue; | |
| 342 | |
| 343 Fingerprint fingerprint = | |
| 344 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_); | |
| 345 deleted_since_rebuild_.insert(fingerprint); | |
| 346 | |
| 347 // If the URL was just added and now we're deleting it, it may be in the | |
| 348 // list of things added since the last rebuild. Delete it from that list. | |
| 349 std::set<Fingerprint>::iterator found = | |
| 350 added_since_rebuild_.find(fingerprint); | |
| 351 if (found != added_since_rebuild_.end()) | |
| 352 added_since_rebuild_.erase(found); | |
| 353 | |
| 354 // Delete the URLs from the in-memory table, but don't bother writing | |
| 355 // to disk since it will be replaced soon. | |
| 356 DeleteFingerprint(fingerprint, false); | |
| 357 } | |
| 358 return; | |
| 359 } | |
| 360 | |
| 361 // Compute the deleted URLs' fingerprints and delete them | |
| 362 std::set<Fingerprint> deleted_fingerprints; | |
| 363 while (urls->HasNextURL()) { | |
| 364 const GURL& url(urls->NextURL()); | |
| 365 if (!url.is_valid()) | |
| 366 continue; | |
| 367 deleted_fingerprints.insert( | |
| 368 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_)); | |
| 369 } | |
| 370 DeleteFingerprintsFromCurrentTable(deleted_fingerprints); | |
| 371 } | |
| 372 | |
| 373 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm | |
| 374 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( | |
| 375 Fingerprint fingerprint, | |
| 376 bool send_notifications) { | |
| 377 if (!hash_table_ || table_length_ == 0) { | |
| 378 NOTREACHED(); // Not initialized. | |
| 379 return null_hash_; | |
| 380 } | |
| 381 | |
| 382 Hash cur_hash = HashFingerprint(fingerprint); | |
| 383 Hash first_hash = cur_hash; | |
| 384 while (true) { | |
| 385 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); | |
| 386 if (cur_fingerprint == fingerprint) | |
| 387 return null_hash_; // This fingerprint is already in there, do nothing. | |
| 388 | |
| 389 if (cur_fingerprint == null_fingerprint_) { | |
| 390 // End of probe sequence found, insert here. | |
| 391 hash_table_[cur_hash] = fingerprint; | |
| 392 used_items_++; | |
| 393 // If allowed, notify listener that a new visited link was added. | |
| 394 if (send_notifications) | |
| 395 listener_->Add(fingerprint); | |
| 396 return cur_hash; | |
| 397 } | |
| 398 | |
| 399 // Advance in the probe sequence. | |
| 400 cur_hash = IncrementHash(cur_hash); | |
| 401 if (cur_hash == first_hash) { | |
| 402 // This means that we've wrapped around and are about to go into an | |
| 403 // infinite loop. Something was wrong with the hashtable resizing | |
| 404 // logic, so stop here. | |
| 405 NOTREACHED(); | |
| 406 return null_hash_; | |
| 407 } | |
| 408 } | |
| 409 } | |
| 410 | |
| 411 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( | |
| 412 const std::set<Fingerprint>& fingerprints) { | |
| 413 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); | |
| 414 | |
| 415 // Delete the URLs from the table. | |
| 416 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); | |
| 417 i != fingerprints.end(); ++i) | |
| 418 DeleteFingerprint(*i, !bulk_write); | |
| 419 | |
| 420 // These deleted fingerprints may make us shrink the table. | |
| 421 if (ResizeTableIfNecessary()) | |
| 422 return; // The resize function wrote the new table to disk for us. | |
| 423 | |
| 424 // Nobody wrote this out for us, write the full file to disk. | |
| 425 if (bulk_write) | |
| 426 WriteFullTable(); | |
| 427 } | |
| 428 | |
| 429 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, | |
| 430 bool update_file) { | |
| 431 if (!hash_table_ || table_length_ == 0) { | |
| 432 NOTREACHED(); // Not initialized. | |
| 433 return false; | |
| 434 } | |
| 435 if (!IsVisited(fingerprint)) | |
| 436 return false; // Not in the database to delete. | |
| 437 | |
| 438 // First update the header used count. | |
| 439 used_items_--; | |
| 440 if (update_file) | |
| 441 WriteUsedItemCountToFile(); | |
| 442 | |
| 443 Hash deleted_hash = HashFingerprint(fingerprint); | |
| 444 | |
| 445 // Find the range of "stuff" in the hash table that is adjacent to this | |
| 446 // fingerprint. These are things that could be affected by the change in | |
| 447 // the hash table. Since we use linear probing, anything after the deleted | |
| 448 // item up until an empty item could be affected. | |
| 449 Hash end_range = deleted_hash; | |
| 450 while (true) { | |
| 451 Hash next_hash = IncrementHash(end_range); | |
| 452 if (next_hash == deleted_hash) | |
| 453 break; // We wrapped around and the whole table is full. | |
| 454 if (!hash_table_[next_hash]) | |
| 455 break; // Found the last spot. | |
| 456 end_range = next_hash; | |
| 457 } | |
| 458 | |
| 459 // We could get all fancy and move the affected fingerprints around, but | |
| 460 // instead we just remove them all and re-add them (minus our deleted one). | |
| 461 // This will mean there's a small window of time where the affected links | |
| 462 // won't be marked visited. | |
| 463 base::StackVector<Fingerprint, 32> shuffled_fingerprints; | |
| 464 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. | |
| 465 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { | |
| 466 if (hash_table_[i] != fingerprint) { | |
| 467 // Don't save the one we're deleting! | |
| 468 shuffled_fingerprints->push_back(hash_table_[i]); | |
| 469 | |
| 470 // This will balance the increment of this value in AddFingerprint below | |
| 471 // so there is no net change. | |
| 472 used_items_--; | |
| 473 } | |
| 474 hash_table_[i] = null_fingerprint_; | |
| 475 } | |
| 476 | |
| 477 if (!shuffled_fingerprints->empty()) { | |
| 478 // Need to add the new items back. | |
| 479 for (size_t i = 0; i < shuffled_fingerprints->size(); i++) | |
| 480 AddFingerprint(shuffled_fingerprints[i], false); | |
| 481 } | |
| 482 | |
| 483 // Write the affected range to disk [deleted_hash, end_range]. | |
| 484 if (update_file) | |
| 485 WriteHashRangeToFile(deleted_hash, end_range); | |
| 486 | |
| 487 return true; | |
| 488 } | |
| 489 | |
| 490 void VisitedLinkMaster::WriteFullTable() { | |
| 491 // This function can get called when the file is open, for example, when we | |
| 492 // resize the table. We must handle this case and not try to reopen the file, | |
| 493 // since there may be write operations pending on the file I/O thread. | |
| 494 // | |
| 495 // Note that once we start writing, we do not delete on error. This means | |
| 496 // there can be a partial file, but the short file will be detected next time | |
| 497 // we start, and will be replaced. | |
| 498 // | |
| 499 // This might possibly get corrupted if we crash in the middle of writing. | |
| 500 // We should pick up the most common types of these failures when we notice | |
| 501 // that the file size is different when we load it back in, and then we will | |
| 502 // regenerate the table. | |
| 503 if (!file_) { | |
| 504 file_ = static_cast<FILE**>(calloc(1, sizeof(*file_))); | |
| 505 FilePath filename; | |
| 506 GetDatabaseFileName(&filename); | |
| 507 PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename)); | |
| 508 } | |
| 509 | |
| 510 // Write the new header. | |
| 511 int32 header[4]; | |
| 512 header[0] = kFileSignature; | |
| 513 header[1] = kFileCurrentVersion; | |
| 514 header[2] = table_length_; | |
| 515 header[3] = used_items_; | |
| 516 WriteToFile(file_, 0, header, sizeof(header)); | |
| 517 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); | |
| 518 | |
| 519 // Write the hash data. | |
| 520 WriteToFile(file_, kFileHeaderSize, | |
| 521 hash_table_, table_length_ * sizeof(Fingerprint)); | |
| 522 | |
| 523 // The hash table may have shrunk, so make sure this is the end. | |
| 524 PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_)); | |
| 525 } | |
| 526 | |
| 527 bool VisitedLinkMaster::InitFromFile() { | |
| 528 DCHECK(file_ == NULL); | |
| 529 | |
| 530 FilePath filename; | |
| 531 GetDatabaseFileName(&filename); | |
| 532 ScopedFILE file_closer(OpenFile(filename, "rb+")); | |
| 533 if (!file_closer.get()) | |
| 534 return false; | |
| 535 | |
| 536 int32 num_entries, used_count; | |
| 537 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) | |
| 538 return false; // Header isn't valid. | |
| 539 | |
| 540 // Allocate and read the table. | |
| 541 if (!CreateURLTable(num_entries, false)) | |
| 542 return false; | |
| 543 if (!ReadFromFile(file_closer.get(), kFileHeaderSize, | |
| 544 hash_table_, num_entries * sizeof(Fingerprint))) { | |
| 545 FreeURLTable(); | |
| 546 return false; | |
| 547 } | |
| 548 used_items_ = used_count; | |
| 549 | |
| 550 #ifndef NDEBUG | |
| 551 DebugValidate(); | |
| 552 #endif | |
| 553 | |
| 554 file_ = static_cast<FILE**>(malloc(sizeof(*file_))); | |
| 555 *file_ = file_closer.release(); | |
| 556 return true; | |
| 557 } | |
| 558 | |
| 559 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { | |
| 560 int32 table_size = kDefaultTableSize; | |
| 561 if (table_size_override_) | |
| 562 table_size = table_size_override_; | |
| 563 | |
| 564 // The salt must be generated before the table so that it can be copied to | |
| 565 // the shared memory. | |
| 566 GenerateSalt(salt_); | |
| 567 if (!CreateURLTable(table_size, true)) | |
| 568 return false; | |
| 569 | |
| 570 #ifndef NDEBUG | |
| 571 DebugValidate(); | |
| 572 #endif | |
| 573 | |
| 574 if (suppress_rebuild) { | |
| 575 // When we disallow rebuilds (normally just unit tests), just use the | |
| 576 // current empty table. | |
| 577 WriteFullTable(); | |
| 578 return true; | |
| 579 } | |
| 580 | |
| 581 // This will build the table from history. On the first run, history will | |
| 582 // be empty, so this will be correct. This will also write the new table | |
| 583 // to disk. We don't want to save explicitly here, since the rebuild may | |
| 584 // not complete, leaving us with an empty but valid visited link database. | |
| 585 // In the future, we won't know we need to try rebuilding again. | |
| 586 return RebuildTableFromDelegate(); | |
| 587 } | |
| 588 | |
| 589 bool VisitedLinkMaster::ReadFileHeader(FILE* file, | |
| 590 int32* num_entries, | |
| 591 int32* used_count, | |
| 592 uint8 salt[LINK_SALT_LENGTH]) { | |
| 593 // Get file size. | |
| 594 // Note that there is no need to seek back to the original location in the | |
| 595 // file since ReadFromFile() [which is the next call accessing the file] | |
| 596 // seeks before reading. | |
| 597 if (fseek(file, 0, SEEK_END) == -1) | |
| 598 return false; | |
| 599 size_t file_size = ftell(file); | |
| 600 | |
| 601 if (file_size <= kFileHeaderSize) | |
| 602 return false; | |
| 603 | |
| 604 uint8 header[kFileHeaderSize]; | |
| 605 if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) | |
| 606 return false; | |
| 607 | |
| 608 // Verify the signature. | |
| 609 int32 signature; | |
| 610 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); | |
| 611 if (signature != kFileSignature) | |
| 612 return false; | |
| 613 | |
| 614 // Verify the version is up-to-date. As with other read errors, a version | |
| 615 // mistmatch will trigger a rebuild of the database from history, which will | |
| 616 // have the effect of migrating the database. | |
| 617 int32 version; | |
| 618 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); | |
| 619 if (version != kFileCurrentVersion) | |
| 620 return false; // Bad version. | |
| 621 | |
| 622 // Read the table size and make sure it matches the file size. | |
| 623 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); | |
| 624 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) | |
| 625 return false; // Bad size. | |
| 626 | |
| 627 // Read the used item count. | |
| 628 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); | |
| 629 if (*used_count > *num_entries) | |
| 630 return false; // Bad used item count; | |
| 631 | |
| 632 // Read the salt. | |
| 633 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); | |
| 634 | |
| 635 // This file looks OK from the header's perspective. | |
| 636 return true; | |
| 637 } | |
| 638 | |
| 639 bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) { | |
| 640 if (!database_name_override_.empty()) { | |
| 641 // use this filename, the directory must exist | |
| 642 *filename = database_name_override_; | |
| 643 return true; | |
| 644 } | |
| 645 | |
| 646 if (!browser_context_ || browser_context_->GetPath().empty()) | |
| 647 return false; | |
| 648 | |
| 649 FilePath profile_dir = browser_context_->GetPath(); | |
| 650 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); | |
| 651 return true; | |
| 652 } | |
| 653 | |
| 654 // Initializes the shared memory structure. The salt should already be filled | |
| 655 // in so that it can be written to the shared memory | |
| 656 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { | |
| 657 // The table is the size of the table followed by the entries. | |
| 658 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); | |
| 659 | |
| 660 // Create the shared memory object. | |
| 661 shared_memory_ = new base::SharedMemory(); | |
| 662 if (!shared_memory_) | |
| 663 return false; | |
| 664 | |
| 665 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { | |
| 666 delete shared_memory_; | |
| 667 shared_memory_ = NULL; | |
| 668 return false; | |
| 669 } | |
| 670 | |
| 671 if (init_to_empty) { | |
| 672 memset(shared_memory_->memory(), 0, alloc_size); | |
| 673 used_items_ = 0; | |
| 674 } | |
| 675 table_length_ = num_entries; | |
| 676 | |
| 677 // Save the header for other processes to read. | |
| 678 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); | |
| 679 header->length = table_length_; | |
| 680 memcpy(header->salt, salt_, LINK_SALT_LENGTH); | |
| 681 | |
| 682 // Our table pointer is just the data immediately following the size. | |
| 683 hash_table_ = reinterpret_cast<Fingerprint*>( | |
| 684 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); | |
| 685 | |
| 686 return true; | |
| 687 } | |
| 688 | |
| 689 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { | |
| 690 base::SharedMemory *old_shared_memory = shared_memory_; | |
| 691 Fingerprint* old_hash_table = hash_table_; | |
| 692 int32 old_table_length = table_length_; | |
| 693 if (!CreateURLTable(num_entries, true)) { | |
| 694 // Try to put back the old state. | |
| 695 shared_memory_ = old_shared_memory; | |
| 696 hash_table_ = old_hash_table; | |
| 697 table_length_ = old_table_length; | |
| 698 return false; | |
| 699 } | |
| 700 | |
| 701 #ifndef NDEBUG | |
| 702 DebugValidate(); | |
| 703 #endif | |
| 704 | |
| 705 return true; | |
| 706 } | |
| 707 | |
| 708 void VisitedLinkMaster::FreeURLTable() { | |
| 709 if (shared_memory_) { | |
| 710 delete shared_memory_; | |
| 711 shared_memory_ = NULL; | |
| 712 } | |
| 713 if (!file_) | |
| 714 return; | |
| 715 PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_)); | |
| 716 // AsyncClose() will close the file and free the memory pointed by |file_|. | |
| 717 file_ = NULL; | |
| 718 } | |
| 719 | |
| 720 bool VisitedLinkMaster::ResizeTableIfNecessary() { | |
| 721 DCHECK(table_length_ > 0) << "Must have a table"; | |
| 722 | |
| 723 // Load limits for good performance/space. We are pretty conservative about | |
| 724 // keeping the table not very full. This is because we use linear probing | |
| 725 // which increases the likelihood of clumps of entries which will reduce | |
| 726 // performance. | |
| 727 const float max_table_load = 0.5f; // Grow when we're > this full. | |
| 728 const float min_table_load = 0.2f; // Shrink when we're < this full. | |
| 729 | |
| 730 float load = ComputeTableLoad(); | |
| 731 if (load < max_table_load && | |
| 732 (table_length_ <= static_cast<float>(kDefaultTableSize) || | |
| 733 load > min_table_load)) | |
| 734 return false; | |
| 735 | |
| 736 // Table needs to grow or shrink. | |
| 737 int new_size = NewTableSizeForCount(used_items_); | |
| 738 DCHECK(new_size > used_items_); | |
| 739 DCHECK(load <= min_table_load || new_size > table_length_); | |
| 740 ResizeTable(new_size); | |
| 741 return true; | |
| 742 } | |
| 743 | |
| 744 void VisitedLinkMaster::ResizeTable(int32 new_size) { | |
| 745 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); | |
| 746 shared_memory_serial_++; | |
| 747 | |
| 748 #ifndef NDEBUG | |
| 749 DebugValidate(); | |
| 750 #endif | |
| 751 | |
| 752 base::SharedMemory* old_shared_memory = shared_memory_; | |
| 753 Fingerprint* old_hash_table = hash_table_; | |
| 754 int32 old_table_length = table_length_; | |
| 755 if (!BeginReplaceURLTable(new_size)) | |
| 756 return; | |
| 757 | |
| 758 // Now we have two tables, our local copy which is the old one, and the new | |
| 759 // one loaded into this object where we need to copy the data. | |
| 760 for (int32 i = 0; i < old_table_length; i++) { | |
| 761 Fingerprint cur = old_hash_table[i]; | |
| 762 if (cur) | |
| 763 AddFingerprint(cur, false); | |
| 764 } | |
| 765 | |
| 766 // On error unmapping, just forget about it since we can't do anything | |
| 767 // else to release it. | |
| 768 delete old_shared_memory; | |
| 769 | |
| 770 // Send an update notification to all child processes so they read the new | |
| 771 // table. | |
| 772 listener_->NewTable(shared_memory_); | |
| 773 | |
| 774 #ifndef NDEBUG | |
| 775 DebugValidate(); | |
| 776 #endif | |
| 777 | |
| 778 // The new table needs to be written to disk. | |
| 779 WriteFullTable(); | |
| 780 } | |
| 781 | |
| 782 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { | |
| 783 // These table sizes are selected to be the maximum prime number less than | |
| 784 // a "convenient" multiple of 1K. | |
| 785 static const int table_sizes[] = { | |
| 786 16381, // 16K = 16384 <- don't shrink below this table size | |
| 787 // (should be == default_table_size) | |
| 788 32767, // 32K = 32768 | |
| 789 65521, // 64K = 65536 | |
| 790 130051, // 128K = 131072 | |
| 791 262127, // 256K = 262144 | |
| 792 524269, // 512K = 524288 | |
| 793 1048549, // 1M = 1048576 | |
| 794 2097143, // 2M = 2097152 | |
| 795 4194301, // 4M = 4194304 | |
| 796 8388571, // 8M = 8388608 | |
| 797 16777199, // 16M = 16777216 | |
| 798 33554347}; // 32M = 33554432 | |
| 799 | |
| 800 // Try to leave the table 33% full. | |
| 801 int desired = item_count * 3; | |
| 802 | |
| 803 // Find the closest prime. | |
| 804 for (size_t i = 0; i < arraysize(table_sizes); i ++) { | |
| 805 if (table_sizes[i] > desired) | |
| 806 return table_sizes[i]; | |
| 807 } | |
| 808 | |
| 809 // Growing very big, just approximate a "good" number, not growing as much | |
| 810 // as normal. | |
| 811 return item_count * 2 - 1; | |
| 812 } | |
| 813 | |
| 814 // See the TableBuilder definition in the header file for how this works. | |
| 815 bool VisitedLinkMaster::RebuildTableFromDelegate() { | |
| 816 DCHECK(!table_builder_); | |
| 817 | |
| 818 // TODO(brettw) make sure we have reasonable salt! | |
| 819 table_builder_ = new TableBuilder(this, salt_); | |
| 820 delegate_->RebuildTable(table_builder_); | |
| 821 return true; | |
| 822 } | |
| 823 | |
| 824 // See the TableBuilder declaration above for how this works. | |
| 825 void VisitedLinkMaster::OnTableRebuildComplete( | |
| 826 bool success, | |
| 827 const std::vector<Fingerprint>& fingerprints) { | |
| 828 if (success) { | |
| 829 // Replace the old table with a new blank one. | |
| 830 shared_memory_serial_++; | |
| 831 | |
| 832 // We are responsible for freeing it AFTER it has been replaced if | |
| 833 // replacement succeeds. | |
| 834 base::SharedMemory* old_shared_memory = shared_memory_; | |
| 835 | |
| 836 int new_table_size = NewTableSizeForCount( | |
| 837 static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); | |
| 838 if (BeginReplaceURLTable(new_table_size)) { | |
| 839 // Free the old table. | |
| 840 delete old_shared_memory; | |
| 841 | |
| 842 // Add the stored fingerprints to the hash table. | |
| 843 for (size_t i = 0; i < fingerprints.size(); i++) | |
| 844 AddFingerprint(fingerprints[i], false); | |
| 845 | |
| 846 // Also add anything that was added while we were asynchronously | |
| 847 // generating the new table. | |
| 848 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); | |
| 849 i != added_since_rebuild_.end(); ++i) | |
| 850 AddFingerprint(*i, false); | |
| 851 added_since_rebuild_.clear(); | |
| 852 | |
| 853 // Now handle deletions. | |
| 854 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); | |
| 855 deleted_since_rebuild_.clear(); | |
| 856 | |
| 857 // Send an update notification to all child processes. | |
| 858 listener_->NewTable(shared_memory_); | |
| 859 | |
| 860 WriteFullTable(); | |
| 861 } | |
| 862 } | |
| 863 table_builder_ = NULL; // Will release our reference to the builder. | |
| 864 | |
| 865 // Notify the unit test that the rebuild is complete (will be NULL in prod.) | |
| 866 if (!rebuild_complete_task_.is_null()) { | |
| 867 rebuild_complete_task_.Run(); | |
| 868 rebuild_complete_task_.Reset(); | |
| 869 } | |
| 870 } | |
| 871 | |
| 872 void VisitedLinkMaster::WriteToFile(FILE** file, | |
| 873 off_t offset, | |
| 874 void* data, | |
| 875 int32 data_size) { | |
| 876 #ifndef NDEBUG | |
| 877 posted_asynchronous_operation_ = true; | |
| 878 #endif | |
| 879 PostIOTask(FROM_HERE, | |
| 880 base::Bind(&AsyncWrite, file, offset, | |
| 881 std::string(static_cast<const char*>(data), data_size))); | |
| 882 } | |
| 883 | |
| 884 void VisitedLinkMaster::WriteUsedItemCountToFile() { | |
| 885 if (!file_) | |
| 886 return; // See comment on the file_ variable for why this might happen. | |
| 887 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); | |
| 888 } | |
| 889 | |
| 890 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { | |
| 891 if (!file_) | |
| 892 return; // See comment on the file_ variable for why this might happen. | |
| 893 if (last_hash < first_hash) { | |
| 894 // Handle wraparound at 0. This first write is first_hash->EOF | |
| 895 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, | |
| 896 &hash_table_[first_hash], | |
| 897 (table_length_ - first_hash + 1) * sizeof(Fingerprint)); | |
| 898 | |
| 899 // Now do 0->last_lash. | |
| 900 WriteToFile(file_, kFileHeaderSize, hash_table_, | |
| 901 (last_hash + 1) * sizeof(Fingerprint)); | |
| 902 } else { | |
| 903 // Normal case, just write the range. | |
| 904 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, | |
| 905 &hash_table_[first_hash], | |
| 906 (last_hash - first_hash + 1) * sizeof(Fingerprint)); | |
| 907 } | |
| 908 } | |
| 909 | |
| 910 bool VisitedLinkMaster::ReadFromFile(FILE* file, | |
| 911 off_t offset, | |
| 912 void* data, | |
| 913 size_t data_size) { | |
| 914 #ifndef NDEBUG | |
| 915 // Since this function is synchronous, we require that no asynchronous | |
| 916 // operations could possibly be pending. | |
| 917 DCHECK(!posted_asynchronous_operation_); | |
| 918 #endif | |
| 919 | |
| 920 if (fseek(file, offset, SEEK_SET) != 0) | |
| 921 return false; | |
| 922 | |
| 923 size_t num_read = fread(data, 1, data_size, file); | |
| 924 return num_read == data_size; | |
| 925 } | |
| 926 | |
| 927 // VisitedLinkTableBuilder ---------------------------------------------------- | |
| 928 | |
| 929 VisitedLinkMaster::TableBuilder::TableBuilder( | |
| 930 VisitedLinkMaster* master, | |
| 931 const uint8 salt[LINK_SALT_LENGTH]) | |
| 932 : master_(master), | |
| 933 success_(true) { | |
| 934 fingerprints_.reserve(4096); | |
| 935 memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8)); | |
| 936 } | |
| 937 | |
| 938 // TODO(brettw): Do we want to try to cancel the request if this happens? It | |
| 939 // could delay shutdown if there are a lot of URLs. | |
| 940 void VisitedLinkMaster::TableBuilder::DisownMaster() { | |
| 941 master_ = NULL; | |
| 942 } | |
| 943 | |
| 944 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { | |
| 945 if (!url.is_empty()) { | |
| 946 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( | |
| 947 url.spec().data(), url.spec().length(), salt_)); | |
| 948 } | |
| 949 } | |
| 950 | |
| 951 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { | |
| 952 success_ = success; | |
| 953 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; | |
| 954 | |
| 955 // Marshal to the main thread to notify the VisitedLinkMaster that the | |
| 956 // rebuild is complete. | |
| 957 BrowserThread::PostTask( | |
| 958 BrowserThread::UI, FROM_HERE, | |
| 959 base::Bind(&TableBuilder::OnCompleteMainThread, this)); | |
| 960 } | |
| 961 | |
| 962 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { | |
| 963 if (master_) | |
| 964 master_->OnTableRebuildComplete(success_, fingerprints_); | |
| 965 } | |
| OLD | NEW |