| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "chrome/browser/visitedlink_master.h" | |
| 6 | |
| 7 #if defined(OS_WIN) | |
| 8 #include <windows.h> | |
| 9 #include <io.h> | |
| 10 #include <shlobj.h> | |
| 11 #endif // defined(OS_WIN) | |
| 12 #include <stdio.h> | |
| 13 | |
| 14 #include <algorithm> | |
| 15 | |
| 16 #include "base/file_util.h" | |
| 17 #include "base/logging.h" | |
| 18 #include "base/message_loop.h" | |
| 19 #include "base/path_service.h" | |
| 20 #include "base/process_util.h" | |
| 21 #include "base/rand_util.h" | |
| 22 #include "base/stack_container.h" | |
| 23 #include "base/string_util.h" | |
| 24 #include "base/thread_restrictions.h" | |
| 25 #include "chrome/browser/browser_process.h" | |
| 26 #include "chrome/browser/browser_thread.h" | |
| 27 #include "chrome/browser/history/history.h" | |
| 28 #include "chrome/browser/profile.h" | |
| 29 | |
| 30 using file_util::ScopedFILE; | |
| 31 using file_util::OpenFile; | |
| 32 using file_util::TruncateFile; | |
| 33 | |
| 34 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; | |
| 35 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; | |
| 36 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; | |
| 37 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; | |
| 38 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; | |
| 39 | |
| 40 const int32 VisitedLinkMaster::kFileCurrentVersion = 2; | |
| 41 | |
| 42 // the signature at the beginning of the URL table = "VLnk" (visited links) | |
| 43 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; | |
| 44 const size_t VisitedLinkMaster::kFileHeaderSize = | |
| 45 kFileHeaderSaltOffset + LINK_SALT_LENGTH; | |
| 46 | |
| 47 // This value should also be the same as the smallest size in the lookup | |
| 48 // table in NewTableSizeForCount (prime number). | |
| 49 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; | |
| 50 | |
| 51 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; | |
| 52 | |
| 53 namespace { | |
| 54 | |
| 55 // Fills the given salt structure with some quasi-random values | |
| 56 // It is not necessary to generate a cryptographically strong random string, | |
| 57 // only that it be reasonably different for different users. | |
| 58 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { | |
| 59 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; | |
| 60 uint64 randval = base::RandUint64(); | |
| 61 memcpy(salt, &randval, 8); | |
| 62 } | |
| 63 | |
| 64 // AsyncWriter ---------------------------------------------------------------- | |
| 65 | |
| 66 // This task executes on a background thread and executes a write. This | |
| 67 // prevents us from blocking the UI thread doing I/O. | |
| 68 class AsyncWriter : public Task { | |
| 69 public: | |
| 70 AsyncWriter(FILE* file, int32 offset, const void* data, size_t data_len) | |
| 71 : file_(file), | |
| 72 offset_(offset) { | |
| 73 data_->resize(data_len); | |
| 74 memcpy(&*data_->begin(), data, data_len); | |
| 75 } | |
| 76 | |
| 77 virtual void Run() { | |
| 78 WriteToFile(file_, offset_, | |
| 79 &*data_->begin(), static_cast<int32>(data_->size())); | |
| 80 } | |
| 81 | |
| 82 // Exposed as a static so it can be called directly from the Master to | |
| 83 // reduce the number of platform-specific I/O sites we have. Returns true if | |
| 84 // the write was complete. | |
| 85 static bool WriteToFile(FILE* file, | |
| 86 off_t offset, | |
| 87 const void* data, | |
| 88 size_t data_len) { | |
| 89 if (fseek(file, offset, SEEK_SET) != 0) | |
| 90 return false; // Don't write to an invalid part of the file. | |
| 91 | |
| 92 size_t num_written = fwrite(data, 1, data_len, file); | |
| 93 | |
| 94 // The write may not make it to the kernel (stdlib may buffer the write) | |
| 95 // until the next fseek/fclose call. If we crash, it's easy for our used | |
| 96 // item count to be out of sync with the number of hashes we write. | |
| 97 // Protect against this by calling fflush. | |
| 98 int ret = fflush(file); | |
| 99 DCHECK_EQ(0, ret); | |
| 100 return num_written == data_len; | |
| 101 } | |
| 102 | |
| 103 private: | |
| 104 // The data to write and where to write it. | |
| 105 FILE* file_; | |
| 106 int32 offset_; // Offset from the beginning of the file. | |
| 107 | |
| 108 // Most writes are just a single fingerprint, so we reserve that much in this | |
| 109 // object to avoid mallocs in that case. | |
| 110 StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_; | |
| 111 | |
| 112 DISALLOW_COPY_AND_ASSIGN(AsyncWriter); | |
| 113 }; | |
| 114 | |
| 115 // Used to asynchronously set the end of the file. This must be done on the | |
| 116 // same thread as the writing to keep things synchronized. | |
| 117 class AsyncSetEndOfFile : public Task { | |
| 118 public: | |
| 119 explicit AsyncSetEndOfFile(FILE* file) : file_(file) {} | |
| 120 | |
| 121 virtual void Run() { | |
| 122 TruncateFile(file_); | |
| 123 } | |
| 124 | |
| 125 private: | |
| 126 FILE* file_; | |
| 127 DISALLOW_COPY_AND_ASSIGN(AsyncSetEndOfFile); | |
| 128 }; | |
| 129 | |
| 130 // Used to asynchronously close a file. This must be done on the same thread as | |
| 131 // the writing to keep things synchronized. | |
| 132 class AsyncCloseHandle : public Task { | |
| 133 public: | |
| 134 explicit AsyncCloseHandle(FILE* file) : file_(file) {} | |
| 135 | |
| 136 virtual void Run() { | |
| 137 fclose(file_); | |
| 138 } | |
| 139 | |
| 140 private: | |
| 141 FILE* file_; | |
| 142 DISALLOW_COPY_AND_ASSIGN(AsyncCloseHandle); | |
| 143 }; | |
| 144 | |
| 145 } // namespace | |
| 146 | |
| 147 // TableBuilder --------------------------------------------------------------- | |
| 148 | |
| 149 // How rebuilding from history works | |
| 150 // --------------------------------- | |
| 151 // | |
| 152 // We mark that we're rebuilding from history by setting the table_builder_ | |
| 153 // member in VisitedLinkMaster to the TableBuilder we create. This builder | |
| 154 // will be called on the history thread by the history system for every URL | |
| 155 // in the database. | |
| 156 // | |
| 157 // The builder will store the fingerprints for those URLs, and then marshalls | |
| 158 // back to the main thread where the VisitedLinkMaster will be notified. The | |
| 159 // master then replaces its table with a new table containing the computed | |
| 160 // fingerprints. | |
| 161 // | |
| 162 // The builder must remain active while the history system is using it. | |
| 163 // Sometimes, the master will be deleted before the rebuild is complete, in | |
| 164 // which case it notifies the builder via DisownMaster(). The builder will | |
| 165 // delete itself once rebuilding is complete, and not execute any callback. | |
| 166 class VisitedLinkMaster::TableBuilder | |
| 167 : public HistoryService::URLEnumerator, | |
| 168 public base::RefCountedThreadSafe<TableBuilder> { | |
| 169 public: | |
| 170 TableBuilder(VisitedLinkMaster* master, | |
| 171 const uint8 salt[LINK_SALT_LENGTH]); | |
| 172 | |
| 173 // Called on the main thread when the master is being destroyed. This will | |
| 174 // prevent a crash when the query completes and the master is no longer | |
| 175 // around. We can not actually do anything but mark this fact, since the | |
| 176 // table will be being rebuilt simultaneously on the other thread. | |
| 177 void DisownMaster(); | |
| 178 | |
| 179 // HistoryService::URLEnumerator | |
| 180 virtual void OnURL(const GURL& url); | |
| 181 virtual void OnComplete(bool succeed); | |
| 182 | |
| 183 private: | |
| 184 friend class base::RefCountedThreadSafe<TableBuilder>; | |
| 185 | |
| 186 ~TableBuilder() {} | |
| 187 | |
| 188 // OnComplete mashals to this function on the main thread to do the | |
| 189 // notification. | |
| 190 void OnCompleteMainThread(); | |
| 191 | |
| 192 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! | |
| 193 VisitedLinkMaster* master_; | |
| 194 | |
| 195 // Indicates whether the operation has failed or not. | |
| 196 bool success_; | |
| 197 | |
| 198 // Salt for this new table. | |
| 199 uint8 salt_[LINK_SALT_LENGTH]; | |
| 200 | |
| 201 // Stores the fingerprints we computed on the background thread. | |
| 202 VisitedLinkCommon::Fingerprints fingerprints_; | |
| 203 }; | |
| 204 | |
| 205 // VisitedLinkMaster ---------------------------------------------------------- | |
| 206 | |
| 207 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, | |
| 208 Profile* profile) { | |
| 209 InitMembers(listener, profile); | |
| 210 } | |
| 211 | |
| 212 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, | |
| 213 HistoryService* history_service, | |
| 214 bool suppress_rebuild, | |
| 215 const FilePath& filename, | |
| 216 int32 default_table_size) { | |
| 217 InitMembers(listener, NULL); | |
| 218 | |
| 219 database_name_override_ = filename; | |
| 220 table_size_override_ = default_table_size; | |
| 221 history_service_override_ = history_service; | |
| 222 suppress_rebuild_ = suppress_rebuild; | |
| 223 } | |
| 224 | |
| 225 VisitedLinkMaster::~VisitedLinkMaster() { | |
| 226 if (table_builder_.get()) { | |
| 227 // Prevent the table builder from calling us back now that we're being | |
| 228 // destroyed. Note that we DON'T delete the object, since the history | |
| 229 // system is still writing into it. When that is complete, the table | |
| 230 // builder will destroy itself when it finds we are gone. | |
| 231 table_builder_->DisownMaster(); | |
| 232 } | |
| 233 FreeURLTable(); | |
| 234 } | |
| 235 | |
| 236 void VisitedLinkMaster::InitMembers(Listener* listener, Profile* profile) { | |
| 237 DCHECK(listener); | |
| 238 | |
| 239 listener_ = listener; | |
| 240 file_ = NULL; | |
| 241 shared_memory_ = NULL; | |
| 242 shared_memory_serial_ = 0; | |
| 243 used_items_ = 0; | |
| 244 table_size_override_ = 0; | |
| 245 history_service_override_ = NULL; | |
| 246 suppress_rebuild_ = false; | |
| 247 profile_ = profile; | |
| 248 | |
| 249 #ifndef NDEBUG | |
| 250 posted_asynchronous_operation_ = false; | |
| 251 #endif | |
| 252 } | |
| 253 | |
| 254 bool VisitedLinkMaster::Init() { | |
| 255 // We probably shouldn't be loading this from the UI thread, | |
| 256 // but it does need to happen early on in startup. | |
| 257 // http://code.google.com/p/chromium/issues/detail?id=24163 | |
| 258 base::ThreadRestrictions::ScopedAllowIO allow_io; | |
| 259 if (!InitFromFile()) | |
| 260 return InitFromScratch(suppress_rebuild_); | |
| 261 return true; | |
| 262 } | |
| 263 | |
| 264 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { | |
| 265 // Extra check that we are not off the record. This should not happen. | |
| 266 if (profile_ && profile_->IsOffTheRecord()) { | |
| 267 NOTREACHED(); | |
| 268 return null_hash_; | |
| 269 } | |
| 270 | |
| 271 if (!url.is_valid()) | |
| 272 return null_hash_; // Don't add invalid URLs. | |
| 273 | |
| 274 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), | |
| 275 url.spec().size(), | |
| 276 salt_); | |
| 277 if (table_builder_) { | |
| 278 // If we have a pending delete for this fingerprint, cancel it. | |
| 279 std::set<Fingerprint>::iterator found = | |
| 280 deleted_since_rebuild_.find(fingerprint); | |
| 281 if (found != deleted_since_rebuild_.end()) | |
| 282 deleted_since_rebuild_.erase(found); | |
| 283 | |
| 284 // A rebuild is in progress, save this addition in the temporary list so | |
| 285 // it can be added once rebuild is complete. | |
| 286 added_since_rebuild_.insert(fingerprint); | |
| 287 } | |
| 288 | |
| 289 // If the table is "full", we don't add URLs and just drop them on the floor. | |
| 290 // This can happen if we get thousands of new URLs and something causes | |
| 291 // the table resizing to fail. This check prevents a hang in that case. Note | |
| 292 // that this is *not* the resize limit, this is just a sanity check. | |
| 293 if (used_items_ / 8 > table_length_ / 10) | |
| 294 return null_hash_; // Table is more than 80% full. | |
| 295 | |
| 296 return AddFingerprint(fingerprint, true); | |
| 297 } | |
| 298 | |
| 299 void VisitedLinkMaster::AddURL(const GURL& url) { | |
| 300 Hash index = TryToAddURL(url); | |
| 301 if (!table_builder_ && index != null_hash_) { | |
| 302 // Not rebuilding, so we want to keep the file on disk up-to-date. | |
| 303 WriteUsedItemCountToFile(); | |
| 304 WriteHashRangeToFile(index, index); | |
| 305 ResizeTableIfNecessary(); | |
| 306 } | |
| 307 } | |
| 308 | |
| 309 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { | |
| 310 for (std::vector<GURL>::const_iterator i = url.begin(); | |
| 311 i != url.end(); ++i) { | |
| 312 Hash index = TryToAddURL(*i); | |
| 313 if (!table_builder_ && index != null_hash_) | |
| 314 ResizeTableIfNecessary(); | |
| 315 } | |
| 316 | |
| 317 // Keeps the file on disk up-to-date. | |
| 318 if (!table_builder_) | |
| 319 WriteFullTable(); | |
| 320 } | |
| 321 | |
| 322 void VisitedLinkMaster::DeleteAllURLs() { | |
| 323 // Any pending modifications are invalid. | |
| 324 added_since_rebuild_.clear(); | |
| 325 deleted_since_rebuild_.clear(); | |
| 326 | |
| 327 // Clear the hash table. | |
| 328 used_items_ = 0; | |
| 329 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); | |
| 330 | |
| 331 // Resize it if it is now too empty. Resize may write the new table out for | |
| 332 // us, otherwise, schedule writing the new table to disk ourselves. | |
| 333 if (!ResizeTableIfNecessary()) | |
| 334 WriteFullTable(); | |
| 335 | |
| 336 listener_->Reset(); | |
| 337 } | |
| 338 | |
| 339 void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) { | |
| 340 typedef std::set<GURL>::const_iterator SetIterator; | |
| 341 | |
| 342 if (urls.empty()) | |
| 343 return; | |
| 344 | |
| 345 listener_->Reset(); | |
| 346 | |
| 347 if (table_builder_) { | |
| 348 // A rebuild is in progress, save this deletion in the temporary list so | |
| 349 // it can be added once rebuild is complete. | |
| 350 for (SetIterator i = urls.begin(); i != urls.end(); ++i) { | |
| 351 if (!i->is_valid()) | |
| 352 continue; | |
| 353 | |
| 354 Fingerprint fingerprint = | |
| 355 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_); | |
| 356 deleted_since_rebuild_.insert(fingerprint); | |
| 357 | |
| 358 // If the URL was just added and now we're deleting it, it may be in the | |
| 359 // list of things added since the last rebuild. Delete it from that list. | |
| 360 std::set<Fingerprint>::iterator found = | |
| 361 added_since_rebuild_.find(fingerprint); | |
| 362 if (found != added_since_rebuild_.end()) | |
| 363 added_since_rebuild_.erase(found); | |
| 364 | |
| 365 // Delete the URLs from the in-memory table, but don't bother writing | |
| 366 // to disk since it will be replaced soon. | |
| 367 DeleteFingerprint(fingerprint, false); | |
| 368 } | |
| 369 return; | |
| 370 } | |
| 371 | |
| 372 // Compute the deleted URLs' fingerprints and delete them | |
| 373 std::set<Fingerprint> deleted_fingerprints; | |
| 374 for (SetIterator i = urls.begin(); i != urls.end(); ++i) { | |
| 375 if (!i->is_valid()) | |
| 376 continue; | |
| 377 deleted_fingerprints.insert( | |
| 378 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_)); | |
| 379 } | |
| 380 DeleteFingerprintsFromCurrentTable(deleted_fingerprints); | |
| 381 } | |
| 382 | |
| 383 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm | |
| 384 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( | |
| 385 Fingerprint fingerprint, | |
| 386 bool send_notifications) { | |
| 387 if (!hash_table_ || table_length_ == 0) { | |
| 388 NOTREACHED(); // Not initialized. | |
| 389 return null_hash_; | |
| 390 } | |
| 391 | |
| 392 Hash cur_hash = HashFingerprint(fingerprint); | |
| 393 Hash first_hash = cur_hash; | |
| 394 while (true) { | |
| 395 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); | |
| 396 if (cur_fingerprint == fingerprint) | |
| 397 return null_hash_; // This fingerprint is already in there, do nothing. | |
| 398 | |
| 399 if (cur_fingerprint == null_fingerprint_) { | |
| 400 // End of probe sequence found, insert here. | |
| 401 hash_table_[cur_hash] = fingerprint; | |
| 402 used_items_++; | |
| 403 // If allowed, notify listener that a new visited link was added. | |
| 404 if (send_notifications) | |
| 405 listener_->Add(fingerprint); | |
| 406 return cur_hash; | |
| 407 } | |
| 408 | |
| 409 // Advance in the probe sequence. | |
| 410 cur_hash = IncrementHash(cur_hash); | |
| 411 if (cur_hash == first_hash) { | |
| 412 // This means that we've wrapped around and are about to go into an | |
| 413 // infinite loop. Something was wrong with the hashtable resizing | |
| 414 // logic, so stop here. | |
| 415 NOTREACHED(); | |
| 416 return null_hash_; | |
| 417 } | |
| 418 } | |
| 419 } | |
| 420 | |
| 421 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( | |
| 422 const std::set<Fingerprint>& fingerprints) { | |
| 423 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); | |
| 424 | |
| 425 // Delete the URLs from the table. | |
| 426 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); | |
| 427 i != fingerprints.end(); ++i) | |
| 428 DeleteFingerprint(*i, !bulk_write); | |
| 429 | |
| 430 // These deleted fingerprints may make us shrink the table. | |
| 431 if (ResizeTableIfNecessary()) | |
| 432 return; // The resize function wrote the new table to disk for us. | |
| 433 | |
| 434 // Nobody wrote this out for us, write the full file to disk. | |
| 435 if (bulk_write) | |
| 436 WriteFullTable(); | |
| 437 } | |
| 438 | |
| 439 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, | |
| 440 bool update_file) { | |
| 441 if (!hash_table_ || table_length_ == 0) { | |
| 442 NOTREACHED(); // Not initialized. | |
| 443 return false; | |
| 444 } | |
| 445 if (!IsVisited(fingerprint)) | |
| 446 return false; // Not in the database to delete. | |
| 447 | |
| 448 // First update the header used count. | |
| 449 used_items_--; | |
| 450 if (update_file) | |
| 451 WriteUsedItemCountToFile(); | |
| 452 | |
| 453 Hash deleted_hash = HashFingerprint(fingerprint); | |
| 454 | |
| 455 // Find the range of "stuff" in the hash table that is adjacent to this | |
| 456 // fingerprint. These are things that could be affected by the change in | |
| 457 // the hash table. Since we use linear probing, anything after the deleted | |
| 458 // item up until an empty item could be affected. | |
| 459 Hash end_range = deleted_hash; | |
| 460 while (true) { | |
| 461 Hash next_hash = IncrementHash(end_range); | |
| 462 if (next_hash == deleted_hash) | |
| 463 break; // We wrapped around and the whole table is full. | |
| 464 if (!hash_table_[next_hash]) | |
| 465 break; // Found the last spot. | |
| 466 end_range = next_hash; | |
| 467 } | |
| 468 | |
| 469 // We could get all fancy and move the affected fingerprints around, but | |
| 470 // instead we just remove them all and re-add them (minus our deleted one). | |
| 471 // This will mean there's a small window of time where the affected links | |
| 472 // won't be marked visited. | |
| 473 StackVector<Fingerprint, 32> shuffled_fingerprints; | |
| 474 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. | |
| 475 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { | |
| 476 if (hash_table_[i] != fingerprint) { | |
| 477 // Don't save the one we're deleting! | |
| 478 shuffled_fingerprints->push_back(hash_table_[i]); | |
| 479 | |
| 480 // This will balance the increment of this value in AddFingerprint below | |
| 481 // so there is no net change. | |
| 482 used_items_--; | |
| 483 } | |
| 484 hash_table_[i] = null_fingerprint_; | |
| 485 } | |
| 486 | |
| 487 if (!shuffled_fingerprints->empty()) { | |
| 488 // Need to add the new items back. | |
| 489 for (size_t i = 0; i < shuffled_fingerprints->size(); i++) | |
| 490 AddFingerprint(shuffled_fingerprints[i], false); | |
| 491 } | |
| 492 | |
| 493 // Write the affected range to disk [deleted_hash, end_range]. | |
| 494 if (update_file) | |
| 495 WriteHashRangeToFile(deleted_hash, end_range); | |
| 496 | |
| 497 return true; | |
| 498 } | |
| 499 | |
| 500 bool VisitedLinkMaster::WriteFullTable() { | |
| 501 // This function can get called when the file is open, for example, when we | |
| 502 // resize the table. We must handle this case and not try to reopen the file, | |
| 503 // since there may be write operations pending on the file I/O thread. | |
| 504 // | |
| 505 // Note that once we start writing, we do not delete on error. This means | |
| 506 // there can be a partial file, but the short file will be detected next time | |
| 507 // we start, and will be replaced. | |
| 508 // | |
| 509 // This might possibly get corrupted if we crash in the middle of writing. | |
| 510 // We should pick up the most common types of these failures when we notice | |
| 511 // that the file size is different when we load it back in, and then we will | |
| 512 // regenerate the table. | |
| 513 if (!file_) { | |
| 514 FilePath filename; | |
| 515 GetDatabaseFileName(&filename); | |
| 516 file_ = OpenFile(filename, "wb+"); | |
| 517 if (!file_) { | |
| 518 DLOG(ERROR) << "Failed to open file " << filename.value(); | |
| 519 return false; | |
| 520 } | |
| 521 } | |
| 522 | |
| 523 // Write the new header. | |
| 524 int32 header[4]; | |
| 525 header[0] = kFileSignature; | |
| 526 header[1] = kFileCurrentVersion; | |
| 527 header[2] = table_length_; | |
| 528 header[3] = used_items_; | |
| 529 WriteToFile(file_, 0, header, sizeof(header)); | |
| 530 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); | |
| 531 | |
| 532 // Write the hash data. | |
| 533 WriteToFile(file_, kFileHeaderSize, | |
| 534 hash_table_, table_length_ * sizeof(Fingerprint)); | |
| 535 | |
| 536 // The hash table may have shrunk, so make sure this is the end. | |
| 537 BrowserThread::PostTask( | |
| 538 BrowserThread::FILE, FROM_HERE, new AsyncSetEndOfFile(file_)); | |
| 539 return true; | |
| 540 } | |
| 541 | |
| 542 bool VisitedLinkMaster::InitFromFile() { | |
| 543 DCHECK(file_ == NULL); | |
| 544 | |
| 545 FilePath filename; | |
| 546 GetDatabaseFileName(&filename); | |
| 547 ScopedFILE file_closer(OpenFile(filename, "rb+")); | |
| 548 if (!file_closer.get()) | |
| 549 return false; | |
| 550 | |
| 551 int32 num_entries, used_count; | |
| 552 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) | |
| 553 return false; // Header isn't valid. | |
| 554 | |
| 555 // Allocate and read the table. | |
| 556 if (!CreateURLTable(num_entries, false)) | |
| 557 return false; | |
| 558 if (!ReadFromFile(file_closer.get(), kFileHeaderSize, | |
| 559 hash_table_, num_entries * sizeof(Fingerprint))) { | |
| 560 FreeURLTable(); | |
| 561 return false; | |
| 562 } | |
| 563 used_items_ = used_count; | |
| 564 | |
| 565 #ifndef NDEBUG | |
| 566 DebugValidate(); | |
| 567 #endif | |
| 568 | |
| 569 file_ = file_closer.release(); | |
| 570 return true; | |
| 571 } | |
| 572 | |
| 573 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { | |
| 574 int32 table_size = kDefaultTableSize; | |
| 575 if (table_size_override_) | |
| 576 table_size = table_size_override_; | |
| 577 | |
| 578 // The salt must be generated before the table so that it can be copied to | |
| 579 // the shared memory. | |
| 580 GenerateSalt(salt_); | |
| 581 if (!CreateURLTable(table_size, true)) | |
| 582 return false; | |
| 583 | |
| 584 #ifndef NDEBUG | |
| 585 DebugValidate(); | |
| 586 #endif | |
| 587 | |
| 588 if (suppress_rebuild) { | |
| 589 // When we disallow rebuilds (normally just unit tests), just use the | |
| 590 // current empty table. | |
| 591 return WriteFullTable(); | |
| 592 } | |
| 593 | |
| 594 // This will build the table from history. On the first run, history will | |
| 595 // be empty, so this will be correct. This will also write the new table | |
| 596 // to disk. We don't want to save explicitly here, since the rebuild may | |
| 597 // not complete, leaving us with an empty but valid visited link database. | |
| 598 // In the future, we won't know we need to try rebuilding again. | |
| 599 return RebuildTableFromHistory(); | |
| 600 } | |
| 601 | |
| 602 bool VisitedLinkMaster::ReadFileHeader(FILE* file, | |
| 603 int32* num_entries, | |
| 604 int32* used_count, | |
| 605 uint8 salt[LINK_SALT_LENGTH]) { | |
| 606 // Get file size. | |
| 607 // Note that there is no need to seek back to the original location in the | |
| 608 // file since ReadFromFile() [which is the next call accessing the file] | |
| 609 // seeks before reading. | |
| 610 if (fseek(file, 0, SEEK_END) == -1) | |
| 611 return false; | |
| 612 size_t file_size = ftell(file); | |
| 613 | |
| 614 if (file_size <= kFileHeaderSize) | |
| 615 return false; | |
| 616 | |
| 617 uint8 header[kFileHeaderSize]; | |
| 618 if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) | |
| 619 return false; | |
| 620 | |
| 621 // Verify the signature. | |
| 622 int32 signature; | |
| 623 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); | |
| 624 if (signature != kFileSignature) | |
| 625 return false; | |
| 626 | |
| 627 // Verify the version is up-to-date. As with other read errors, a version | |
| 628 // mistmatch will trigger a rebuild of the database from history, which will | |
| 629 // have the effect of migrating the database. | |
| 630 int32 version; | |
| 631 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); | |
| 632 if (version != kFileCurrentVersion) | |
| 633 return false; // Bad version. | |
| 634 | |
| 635 // Read the table size and make sure it matches the file size. | |
| 636 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); | |
| 637 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) | |
| 638 return false; // Bad size. | |
| 639 | |
| 640 // Read the used item count. | |
| 641 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); | |
| 642 if (*used_count > *num_entries) | |
| 643 return false; // Bad used item count; | |
| 644 | |
| 645 // Read the salt. | |
| 646 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); | |
| 647 | |
| 648 // This file looks OK from the header's perspective. | |
| 649 return true; | |
| 650 } | |
| 651 | |
| 652 bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) { | |
| 653 if (!database_name_override_.empty()) { | |
| 654 // use this filename, the directory must exist | |
| 655 *filename = database_name_override_; | |
| 656 return true; | |
| 657 } | |
| 658 | |
| 659 if (!profile_ || profile_->GetPath().empty()) | |
| 660 return false; | |
| 661 | |
| 662 FilePath profile_dir = profile_->GetPath(); | |
| 663 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); | |
| 664 return true; | |
| 665 } | |
| 666 | |
| 667 // Initializes the shared memory structure. The salt should already be filled | |
| 668 // in so that it can be written to the shared memory | |
| 669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { | |
| 670 // The table is the size of the table followed by the entries. | |
| 671 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); | |
| 672 | |
| 673 // Create the shared memory object. | |
| 674 shared_memory_ = new base::SharedMemory(); | |
| 675 if (!shared_memory_) | |
| 676 return false; | |
| 677 | |
| 678 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { | |
| 679 delete shared_memory_; | |
| 680 shared_memory_ = NULL; | |
| 681 return false; | |
| 682 } | |
| 683 | |
| 684 if (init_to_empty) { | |
| 685 memset(shared_memory_->memory(), 0, alloc_size); | |
| 686 used_items_ = 0; | |
| 687 } | |
| 688 table_length_ = num_entries; | |
| 689 | |
| 690 // Save the header for other processes to read. | |
| 691 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); | |
| 692 header->length = table_length_; | |
| 693 memcpy(header->salt, salt_, LINK_SALT_LENGTH); | |
| 694 | |
| 695 // Our table pointer is just the data immediately following the size. | |
| 696 hash_table_ = reinterpret_cast<Fingerprint*>( | |
| 697 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); | |
| 698 | |
| 699 return true; | |
| 700 } | |
| 701 | |
| 702 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { | |
| 703 base::SharedMemory *old_shared_memory = shared_memory_; | |
| 704 Fingerprint* old_hash_table = hash_table_; | |
| 705 int32 old_table_length = table_length_; | |
| 706 if (!CreateURLTable(num_entries, true)) { | |
| 707 // Try to put back the old state. | |
| 708 shared_memory_ = old_shared_memory; | |
| 709 hash_table_ = old_hash_table; | |
| 710 table_length_ = old_table_length; | |
| 711 return false; | |
| 712 } | |
| 713 | |
| 714 #ifndef NDEBUG | |
| 715 DebugValidate(); | |
| 716 #endif | |
| 717 | |
| 718 return true; | |
| 719 } | |
| 720 | |
| 721 void VisitedLinkMaster::FreeURLTable() { | |
| 722 if (shared_memory_) { | |
| 723 delete shared_memory_; | |
| 724 shared_memory_ = NULL; | |
| 725 } | |
| 726 if (!file_) | |
| 727 return; | |
| 728 | |
| 729 BrowserThread::PostTask( | |
| 730 BrowserThread::FILE, FROM_HERE, new AsyncCloseHandle(file_)); | |
| 731 } | |
| 732 | |
| 733 bool VisitedLinkMaster::ResizeTableIfNecessary() { | |
| 734 DCHECK(table_length_ > 0) << "Must have a table"; | |
| 735 | |
| 736 // Load limits for good performance/space. We are pretty conservative about | |
| 737 // keeping the table not very full. This is because we use linear probing | |
| 738 // which increases the likelihood of clumps of entries which will reduce | |
| 739 // performance. | |
| 740 const float max_table_load = 0.5f; // Grow when we're > this full. | |
| 741 const float min_table_load = 0.2f; // Shrink when we're < this full. | |
| 742 | |
| 743 float load = ComputeTableLoad(); | |
| 744 if (load < max_table_load && | |
| 745 (table_length_ <= static_cast<float>(kDefaultTableSize) || | |
| 746 load > min_table_load)) | |
| 747 return false; | |
| 748 | |
| 749 // Table needs to grow or shrink. | |
| 750 int new_size = NewTableSizeForCount(used_items_); | |
| 751 DCHECK(new_size > used_items_); | |
| 752 DCHECK(load <= min_table_load || new_size > table_length_); | |
| 753 ResizeTable(new_size); | |
| 754 return true; | |
| 755 } | |
| 756 | |
| 757 void VisitedLinkMaster::ResizeTable(int32 new_size) { | |
| 758 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); | |
| 759 shared_memory_serial_++; | |
| 760 | |
| 761 #ifndef NDEBUG | |
| 762 DebugValidate(); | |
| 763 #endif | |
| 764 | |
| 765 base::SharedMemory* old_shared_memory = shared_memory_; | |
| 766 Fingerprint* old_hash_table = hash_table_; | |
| 767 int32 old_table_length = table_length_; | |
| 768 if (!BeginReplaceURLTable(new_size)) | |
| 769 return; | |
| 770 | |
| 771 // Now we have two tables, our local copy which is the old one, and the new | |
| 772 // one loaded into this object where we need to copy the data. | |
| 773 for (int32 i = 0; i < old_table_length; i++) { | |
| 774 Fingerprint cur = old_hash_table[i]; | |
| 775 if (cur) | |
| 776 AddFingerprint(cur, false); | |
| 777 } | |
| 778 | |
| 779 // On error unmapping, just forget about it since we can't do anything | |
| 780 // else to release it. | |
| 781 delete old_shared_memory; | |
| 782 | |
| 783 // Send an update notification to all child processes so they read the new | |
| 784 // table. | |
| 785 listener_->NewTable(shared_memory_); | |
| 786 | |
| 787 #ifndef NDEBUG | |
| 788 DebugValidate(); | |
| 789 #endif | |
| 790 | |
| 791 // The new table needs to be written to disk. | |
| 792 WriteFullTable(); | |
| 793 } | |
| 794 | |
| 795 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { | |
| 796 // These table sizes are selected to be the maximum prime number less than | |
| 797 // a "convenient" multiple of 1K. | |
| 798 static const int table_sizes[] = { | |
| 799 16381, // 16K = 16384 <- don't shrink below this table size | |
| 800 // (should be == default_table_size) | |
| 801 32767, // 32K = 32768 | |
| 802 65521, // 64K = 65536 | |
| 803 130051, // 128K = 131072 | |
| 804 262127, // 256K = 262144 | |
| 805 524269, // 512K = 524288 | |
| 806 1048549, // 1M = 1048576 | |
| 807 2097143, // 2M = 2097152 | |
| 808 4194301, // 4M = 4194304 | |
| 809 8388571, // 8M = 8388608 | |
| 810 16777199, // 16M = 16777216 | |
| 811 33554347}; // 32M = 33554432 | |
| 812 | |
| 813 // Try to leave the table 33% full. | |
| 814 int desired = item_count * 3; | |
| 815 | |
| 816 // Find the closest prime. | |
| 817 for (size_t i = 0; i < arraysize(table_sizes); i ++) { | |
| 818 if (table_sizes[i] > desired) | |
| 819 return table_sizes[i]; | |
| 820 } | |
| 821 | |
| 822 // Growing very big, just approximate a "good" number, not growing as much | |
| 823 // as normal. | |
| 824 return item_count * 2 - 1; | |
| 825 } | |
| 826 | |
| 827 // See the TableBuilder definition in the header file for how this works. | |
| 828 bool VisitedLinkMaster::RebuildTableFromHistory() { | |
| 829 DCHECK(!table_builder_); | |
| 830 if (table_builder_) | |
| 831 return false; | |
| 832 | |
| 833 HistoryService* history_service = history_service_override_; | |
| 834 if (!history_service && profile_) { | |
| 835 history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); | |
| 836 } | |
| 837 | |
| 838 if (!history_service) { | |
| 839 DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't " | |
| 840 "obtain a HistoryService."; | |
| 841 return false; | |
| 842 } | |
| 843 | |
| 844 // TODO(brettw) make sure we have reasonable salt! | |
| 845 table_builder_ = new TableBuilder(this, salt_); | |
| 846 | |
| 847 // Make sure the table builder stays live during the call, even if the | |
| 848 // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread. | |
| 849 table_builder_->AddRef(); | |
| 850 history_service->IterateURLs(table_builder_); | |
| 851 return true; | |
| 852 } | |
| 853 | |
| 854 // See the TableBuilder declaration above for how this works. | |
| 855 void VisitedLinkMaster::OnTableRebuildComplete( | |
| 856 bool success, | |
| 857 const std::vector<Fingerprint>& fingerprints) { | |
| 858 if (success) { | |
| 859 // Replace the old table with a new blank one. | |
| 860 shared_memory_serial_++; | |
| 861 | |
| 862 // We are responsible for freeing it AFTER it has been replaced if | |
| 863 // replacement succeeds. | |
| 864 base::SharedMemory* old_shared_memory = shared_memory_; | |
| 865 | |
| 866 int new_table_size = NewTableSizeForCount( | |
| 867 static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); | |
| 868 if (BeginReplaceURLTable(new_table_size)) { | |
| 869 // Free the old table. | |
| 870 delete old_shared_memory; | |
| 871 | |
| 872 // Add the stored fingerprints to the hash table. | |
| 873 for (size_t i = 0; i < fingerprints.size(); i++) | |
| 874 AddFingerprint(fingerprints[i], false); | |
| 875 | |
| 876 // Also add anything that was added while we were asynchronously | |
| 877 // generating the new table. | |
| 878 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); | |
| 879 i != added_since_rebuild_.end(); ++i) | |
| 880 AddFingerprint(*i, false); | |
| 881 added_since_rebuild_.clear(); | |
| 882 | |
| 883 // Now handle deletions. | |
| 884 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); | |
| 885 deleted_since_rebuild_.clear(); | |
| 886 | |
| 887 // Send an update notification to all child processes. | |
| 888 listener_->NewTable(shared_memory_); | |
| 889 | |
| 890 // We shouldn't be writing the table from the main thread! | |
| 891 // http://code.google.com/p/chromium/issues/detail?id=24163 | |
| 892 base::ThreadRestrictions::ScopedAllowIO allow_io; | |
| 893 WriteFullTable(); | |
| 894 } | |
| 895 } | |
| 896 table_builder_ = NULL; // Will release our reference to the builder. | |
| 897 | |
| 898 // Notify the unit test that the rebuild is complete (will be NULL in prod.) | |
| 899 if (rebuild_complete_task_.get()) { | |
| 900 rebuild_complete_task_->Run(); | |
| 901 rebuild_complete_task_.reset(NULL); | |
| 902 } | |
| 903 } | |
| 904 | |
| 905 void VisitedLinkMaster::WriteToFile(FILE* file, | |
| 906 off_t offset, | |
| 907 void* data, | |
| 908 int32 data_size) { | |
| 909 #ifndef NDEBUG | |
| 910 posted_asynchronous_operation_ = true; | |
| 911 #endif | |
| 912 | |
| 913 BrowserThread::PostTask( | |
| 914 BrowserThread::FILE, FROM_HERE, | |
| 915 new AsyncWriter(file, offset, data, data_size)); | |
| 916 } | |
| 917 | |
| 918 void VisitedLinkMaster::WriteUsedItemCountToFile() { | |
| 919 if (!file_) | |
| 920 return; // See comment on the file_ variable for why this might happen. | |
| 921 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); | |
| 922 } | |
| 923 | |
| 924 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { | |
| 925 if (!file_) | |
| 926 return; // See comment on the file_ variable for why this might happen. | |
| 927 if (last_hash < first_hash) { | |
| 928 // Handle wraparound at 0. This first write is first_hash->EOF | |
| 929 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, | |
| 930 &hash_table_[first_hash], | |
| 931 (table_length_ - first_hash + 1) * sizeof(Fingerprint)); | |
| 932 | |
| 933 // Now do 0->last_lash. | |
| 934 WriteToFile(file_, kFileHeaderSize, hash_table_, | |
| 935 (last_hash + 1) * sizeof(Fingerprint)); | |
| 936 } else { | |
| 937 // Normal case, just write the range. | |
| 938 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, | |
| 939 &hash_table_[first_hash], | |
| 940 (last_hash - first_hash + 1) * sizeof(Fingerprint)); | |
| 941 } | |
| 942 } | |
| 943 | |
| 944 bool VisitedLinkMaster::ReadFromFile(FILE* file, | |
| 945 off_t offset, | |
| 946 void* data, | |
| 947 size_t data_size) { | |
| 948 #ifndef NDEBUG | |
| 949 // Since this function is synchronous, we require that no asynchronous | |
| 950 // operations could possibly be pending. | |
| 951 DCHECK(!posted_asynchronous_operation_); | |
| 952 #endif | |
| 953 | |
| 954 fseek(file, offset, SEEK_SET); | |
| 955 | |
| 956 size_t num_read = fread(data, 1, data_size, file); | |
| 957 return num_read == data_size; | |
| 958 } | |
| 959 | |
| 960 // VisitedLinkTableBuilder ---------------------------------------------------- | |
| 961 | |
| 962 VisitedLinkMaster::TableBuilder::TableBuilder( | |
| 963 VisitedLinkMaster* master, | |
| 964 const uint8 salt[LINK_SALT_LENGTH]) | |
| 965 : master_(master), | |
| 966 success_(true) { | |
| 967 fingerprints_.reserve(4096); | |
| 968 memcpy(salt_, salt, sizeof(salt)); | |
| 969 } | |
| 970 | |
| 971 // TODO(brettw): Do we want to try to cancel the request if this happens? It | |
| 972 // could delay shutdown if there are a lot of URLs. | |
| 973 void VisitedLinkMaster::TableBuilder::DisownMaster() { | |
| 974 master_ = NULL; | |
| 975 } | |
| 976 | |
| 977 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { | |
| 978 if (!url.is_empty()) { | |
| 979 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( | |
| 980 url.spec().data(), url.spec().length(), salt_)); | |
| 981 } | |
| 982 } | |
| 983 | |
| 984 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { | |
| 985 success_ = success; | |
| 986 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; | |
| 987 | |
| 988 // Marshal to the main thread to notify the VisitedLinkMaster that the | |
| 989 // rebuild is complete. | |
| 990 BrowserThread::PostTask( | |
| 991 BrowserThread::UI, FROM_HERE, | |
| 992 NewRunnableMethod(this, &TableBuilder::OnCompleteMainThread)); | |
| 993 } | |
| 994 | |
| 995 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { | |
| 996 if (master_) | |
| 997 master_->OnTableRebuildComplete(success_, fingerprints_); | |
| 998 | |
| 999 // WILL (generally) DELETE THIS! This balances the AddRef in | |
| 1000 // VisitedLinkMaster::RebuildTableFromHistory. | |
| 1001 Release(); | |
| 1002 } | |
| OLD | NEW |