OLD | NEW |
| (Empty) |
1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/visitedlink_master.h" | |
6 | |
7 #if defined(OS_WIN) | |
8 #include <windows.h> | |
9 #include <io.h> | |
10 #include <shlobj.h> | |
11 #endif // defined(OS_WIN) | |
12 #include <stdio.h> | |
13 | |
14 #include <algorithm> | |
15 | |
16 #include "base/file_util.h" | |
17 #include "base/logging.h" | |
18 #include "base/message_loop.h" | |
19 #include "base/path_service.h" | |
20 #include "base/process_util.h" | |
21 #include "base/rand_util.h" | |
22 #include "base/stack_container.h" | |
23 #include "base/string_util.h" | |
24 #include "base/thread_restrictions.h" | |
25 #include "chrome/browser/browser_process.h" | |
26 #include "chrome/browser/browser_thread.h" | |
27 #include "chrome/browser/history/history.h" | |
28 #include "chrome/browser/profile.h" | |
29 | |
30 using file_util::ScopedFILE; | |
31 using file_util::OpenFile; | |
32 using file_util::TruncateFile; | |
33 | |
34 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; | |
35 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; | |
36 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; | |
37 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; | |
38 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; | |
39 | |
40 const int32 VisitedLinkMaster::kFileCurrentVersion = 2; | |
41 | |
42 // the signature at the beginning of the URL table = "VLnk" (visited links) | |
43 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; | |
44 const size_t VisitedLinkMaster::kFileHeaderSize = | |
45 kFileHeaderSaltOffset + LINK_SALT_LENGTH; | |
46 | |
47 // This value should also be the same as the smallest size in the lookup | |
48 // table in NewTableSizeForCount (prime number). | |
49 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; | |
50 | |
51 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; | |
52 | |
53 namespace { | |
54 | |
55 // Fills the given salt structure with some quasi-random values | |
56 // It is not necessary to generate a cryptographically strong random string, | |
57 // only that it be reasonably different for different users. | |
58 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { | |
59 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; | |
60 uint64 randval = base::RandUint64(); | |
61 memcpy(salt, &randval, 8); | |
62 } | |
63 | |
64 // AsyncWriter ---------------------------------------------------------------- | |
65 | |
66 // This task executes on a background thread and executes a write. This | |
67 // prevents us from blocking the UI thread doing I/O. | |
68 class AsyncWriter : public Task { | |
69 public: | |
70 AsyncWriter(FILE* file, int32 offset, const void* data, size_t data_len) | |
71 : file_(file), | |
72 offset_(offset) { | |
73 data_->resize(data_len); | |
74 memcpy(&*data_->begin(), data, data_len); | |
75 } | |
76 | |
77 virtual void Run() { | |
78 WriteToFile(file_, offset_, | |
79 &*data_->begin(), static_cast<int32>(data_->size())); | |
80 } | |
81 | |
82 // Exposed as a static so it can be called directly from the Master to | |
83 // reduce the number of platform-specific I/O sites we have. Returns true if | |
84 // the write was complete. | |
85 static bool WriteToFile(FILE* file, | |
86 off_t offset, | |
87 const void* data, | |
88 size_t data_len) { | |
89 if (fseek(file, offset, SEEK_SET) != 0) | |
90 return false; // Don't write to an invalid part of the file. | |
91 | |
92 size_t num_written = fwrite(data, 1, data_len, file); | |
93 | |
94 // The write may not make it to the kernel (stdlib may buffer the write) | |
95 // until the next fseek/fclose call. If we crash, it's easy for our used | |
96 // item count to be out of sync with the number of hashes we write. | |
97 // Protect against this by calling fflush. | |
98 int ret = fflush(file); | |
99 DCHECK_EQ(0, ret); | |
100 return num_written == data_len; | |
101 } | |
102 | |
103 private: | |
104 // The data to write and where to write it. | |
105 FILE* file_; | |
106 int32 offset_; // Offset from the beginning of the file. | |
107 | |
108 // Most writes are just a single fingerprint, so we reserve that much in this | |
109 // object to avoid mallocs in that case. | |
110 StackVector<char, sizeof(VisitedLinkCommon::Fingerprint)> data_; | |
111 | |
112 DISALLOW_COPY_AND_ASSIGN(AsyncWriter); | |
113 }; | |
114 | |
115 // Used to asynchronously set the end of the file. This must be done on the | |
116 // same thread as the writing to keep things synchronized. | |
117 class AsyncSetEndOfFile : public Task { | |
118 public: | |
119 explicit AsyncSetEndOfFile(FILE* file) : file_(file) {} | |
120 | |
121 virtual void Run() { | |
122 TruncateFile(file_); | |
123 } | |
124 | |
125 private: | |
126 FILE* file_; | |
127 DISALLOW_COPY_AND_ASSIGN(AsyncSetEndOfFile); | |
128 }; | |
129 | |
130 // Used to asynchronously close a file. This must be done on the same thread as | |
131 // the writing to keep things synchronized. | |
132 class AsyncCloseHandle : public Task { | |
133 public: | |
134 explicit AsyncCloseHandle(FILE* file) : file_(file) {} | |
135 | |
136 virtual void Run() { | |
137 fclose(file_); | |
138 } | |
139 | |
140 private: | |
141 FILE* file_; | |
142 DISALLOW_COPY_AND_ASSIGN(AsyncCloseHandle); | |
143 }; | |
144 | |
145 } // namespace | |
146 | |
147 // TableBuilder --------------------------------------------------------------- | |
148 | |
149 // How rebuilding from history works | |
150 // --------------------------------- | |
151 // | |
152 // We mark that we're rebuilding from history by setting the table_builder_ | |
153 // member in VisitedLinkMaster to the TableBuilder we create. This builder | |
154 // will be called on the history thread by the history system for every URL | |
155 // in the database. | |
156 // | |
157 // The builder will store the fingerprints for those URLs, and then marshalls | |
158 // back to the main thread where the VisitedLinkMaster will be notified. The | |
159 // master then replaces its table with a new table containing the computed | |
160 // fingerprints. | |
161 // | |
162 // The builder must remain active while the history system is using it. | |
163 // Sometimes, the master will be deleted before the rebuild is complete, in | |
164 // which case it notifies the builder via DisownMaster(). The builder will | |
165 // delete itself once rebuilding is complete, and not execute any callback. | |
166 class VisitedLinkMaster::TableBuilder | |
167 : public HistoryService::URLEnumerator, | |
168 public base::RefCountedThreadSafe<TableBuilder> { | |
169 public: | |
170 TableBuilder(VisitedLinkMaster* master, | |
171 const uint8 salt[LINK_SALT_LENGTH]); | |
172 | |
173 // Called on the main thread when the master is being destroyed. This will | |
174 // prevent a crash when the query completes and the master is no longer | |
175 // around. We can not actually do anything but mark this fact, since the | |
176 // table will be being rebuilt simultaneously on the other thread. | |
177 void DisownMaster(); | |
178 | |
179 // HistoryService::URLEnumerator | |
180 virtual void OnURL(const GURL& url); | |
181 virtual void OnComplete(bool succeed); | |
182 | |
183 private: | |
184 friend class base::RefCountedThreadSafe<TableBuilder>; | |
185 | |
186 ~TableBuilder() {} | |
187 | |
188 // OnComplete mashals to this function on the main thread to do the | |
189 // notification. | |
190 void OnCompleteMainThread(); | |
191 | |
192 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! | |
193 VisitedLinkMaster* master_; | |
194 | |
195 // Indicates whether the operation has failed or not. | |
196 bool success_; | |
197 | |
198 // Salt for this new table. | |
199 uint8 salt_[LINK_SALT_LENGTH]; | |
200 | |
201 // Stores the fingerprints we computed on the background thread. | |
202 VisitedLinkCommon::Fingerprints fingerprints_; | |
203 }; | |
204 | |
205 // VisitedLinkMaster ---------------------------------------------------------- | |
206 | |
207 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, | |
208 Profile* profile) { | |
209 InitMembers(listener, profile); | |
210 } | |
211 | |
212 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, | |
213 HistoryService* history_service, | |
214 bool suppress_rebuild, | |
215 const FilePath& filename, | |
216 int32 default_table_size) { | |
217 InitMembers(listener, NULL); | |
218 | |
219 database_name_override_ = filename; | |
220 table_size_override_ = default_table_size; | |
221 history_service_override_ = history_service; | |
222 suppress_rebuild_ = suppress_rebuild; | |
223 } | |
224 | |
225 VisitedLinkMaster::~VisitedLinkMaster() { | |
226 if (table_builder_.get()) { | |
227 // Prevent the table builder from calling us back now that we're being | |
228 // destroyed. Note that we DON'T delete the object, since the history | |
229 // system is still writing into it. When that is complete, the table | |
230 // builder will destroy itself when it finds we are gone. | |
231 table_builder_->DisownMaster(); | |
232 } | |
233 FreeURLTable(); | |
234 } | |
235 | |
236 void VisitedLinkMaster::InitMembers(Listener* listener, Profile* profile) { | |
237 DCHECK(listener); | |
238 | |
239 listener_ = listener; | |
240 file_ = NULL; | |
241 shared_memory_ = NULL; | |
242 shared_memory_serial_ = 0; | |
243 used_items_ = 0; | |
244 table_size_override_ = 0; | |
245 history_service_override_ = NULL; | |
246 suppress_rebuild_ = false; | |
247 profile_ = profile; | |
248 | |
249 #ifndef NDEBUG | |
250 posted_asynchronous_operation_ = false; | |
251 #endif | |
252 } | |
253 | |
254 bool VisitedLinkMaster::Init() { | |
255 // We probably shouldn't be loading this from the UI thread, | |
256 // but it does need to happen early on in startup. | |
257 // http://code.google.com/p/chromium/issues/detail?id=24163 | |
258 base::ThreadRestrictions::ScopedAllowIO allow_io; | |
259 if (!InitFromFile()) | |
260 return InitFromScratch(suppress_rebuild_); | |
261 return true; | |
262 } | |
263 | |
264 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { | |
265 // Extra check that we are not off the record. This should not happen. | |
266 if (profile_ && profile_->IsOffTheRecord()) { | |
267 NOTREACHED(); | |
268 return null_hash_; | |
269 } | |
270 | |
271 if (!url.is_valid()) | |
272 return null_hash_; // Don't add invalid URLs. | |
273 | |
274 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), | |
275 url.spec().size(), | |
276 salt_); | |
277 if (table_builder_) { | |
278 // If we have a pending delete for this fingerprint, cancel it. | |
279 std::set<Fingerprint>::iterator found = | |
280 deleted_since_rebuild_.find(fingerprint); | |
281 if (found != deleted_since_rebuild_.end()) | |
282 deleted_since_rebuild_.erase(found); | |
283 | |
284 // A rebuild is in progress, save this addition in the temporary list so | |
285 // it can be added once rebuild is complete. | |
286 added_since_rebuild_.insert(fingerprint); | |
287 } | |
288 | |
289 // If the table is "full", we don't add URLs and just drop them on the floor. | |
290 // This can happen if we get thousands of new URLs and something causes | |
291 // the table resizing to fail. This check prevents a hang in that case. Note | |
292 // that this is *not* the resize limit, this is just a sanity check. | |
293 if (used_items_ / 8 > table_length_ / 10) | |
294 return null_hash_; // Table is more than 80% full. | |
295 | |
296 return AddFingerprint(fingerprint, true); | |
297 } | |
298 | |
299 void VisitedLinkMaster::AddURL(const GURL& url) { | |
300 Hash index = TryToAddURL(url); | |
301 if (!table_builder_ && index != null_hash_) { | |
302 // Not rebuilding, so we want to keep the file on disk up-to-date. | |
303 WriteUsedItemCountToFile(); | |
304 WriteHashRangeToFile(index, index); | |
305 ResizeTableIfNecessary(); | |
306 } | |
307 } | |
308 | |
309 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { | |
310 for (std::vector<GURL>::const_iterator i = url.begin(); | |
311 i != url.end(); ++i) { | |
312 Hash index = TryToAddURL(*i); | |
313 if (!table_builder_ && index != null_hash_) | |
314 ResizeTableIfNecessary(); | |
315 } | |
316 | |
317 // Keeps the file on disk up-to-date. | |
318 if (!table_builder_) | |
319 WriteFullTable(); | |
320 } | |
321 | |
322 void VisitedLinkMaster::DeleteAllURLs() { | |
323 // Any pending modifications are invalid. | |
324 added_since_rebuild_.clear(); | |
325 deleted_since_rebuild_.clear(); | |
326 | |
327 // Clear the hash table. | |
328 used_items_ = 0; | |
329 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); | |
330 | |
331 // Resize it if it is now too empty. Resize may write the new table out for | |
332 // us, otherwise, schedule writing the new table to disk ourselves. | |
333 if (!ResizeTableIfNecessary()) | |
334 WriteFullTable(); | |
335 | |
336 listener_->Reset(); | |
337 } | |
338 | |
339 void VisitedLinkMaster::DeleteURLs(const std::set<GURL>& urls) { | |
340 typedef std::set<GURL>::const_iterator SetIterator; | |
341 | |
342 if (urls.empty()) | |
343 return; | |
344 | |
345 listener_->Reset(); | |
346 | |
347 if (table_builder_) { | |
348 // A rebuild is in progress, save this deletion in the temporary list so | |
349 // it can be added once rebuild is complete. | |
350 for (SetIterator i = urls.begin(); i != urls.end(); ++i) { | |
351 if (!i->is_valid()) | |
352 continue; | |
353 | |
354 Fingerprint fingerprint = | |
355 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_); | |
356 deleted_since_rebuild_.insert(fingerprint); | |
357 | |
358 // If the URL was just added and now we're deleting it, it may be in the | |
359 // list of things added since the last rebuild. Delete it from that list. | |
360 std::set<Fingerprint>::iterator found = | |
361 added_since_rebuild_.find(fingerprint); | |
362 if (found != added_since_rebuild_.end()) | |
363 added_since_rebuild_.erase(found); | |
364 | |
365 // Delete the URLs from the in-memory table, but don't bother writing | |
366 // to disk since it will be replaced soon. | |
367 DeleteFingerprint(fingerprint, false); | |
368 } | |
369 return; | |
370 } | |
371 | |
372 // Compute the deleted URLs' fingerprints and delete them | |
373 std::set<Fingerprint> deleted_fingerprints; | |
374 for (SetIterator i = urls.begin(); i != urls.end(); ++i) { | |
375 if (!i->is_valid()) | |
376 continue; | |
377 deleted_fingerprints.insert( | |
378 ComputeURLFingerprint(i->spec().data(), i->spec().size(), salt_)); | |
379 } | |
380 DeleteFingerprintsFromCurrentTable(deleted_fingerprints); | |
381 } | |
382 | |
383 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm | |
384 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( | |
385 Fingerprint fingerprint, | |
386 bool send_notifications) { | |
387 if (!hash_table_ || table_length_ == 0) { | |
388 NOTREACHED(); // Not initialized. | |
389 return null_hash_; | |
390 } | |
391 | |
392 Hash cur_hash = HashFingerprint(fingerprint); | |
393 Hash first_hash = cur_hash; | |
394 while (true) { | |
395 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); | |
396 if (cur_fingerprint == fingerprint) | |
397 return null_hash_; // This fingerprint is already in there, do nothing. | |
398 | |
399 if (cur_fingerprint == null_fingerprint_) { | |
400 // End of probe sequence found, insert here. | |
401 hash_table_[cur_hash] = fingerprint; | |
402 used_items_++; | |
403 // If allowed, notify listener that a new visited link was added. | |
404 if (send_notifications) | |
405 listener_->Add(fingerprint); | |
406 return cur_hash; | |
407 } | |
408 | |
409 // Advance in the probe sequence. | |
410 cur_hash = IncrementHash(cur_hash); | |
411 if (cur_hash == first_hash) { | |
412 // This means that we've wrapped around and are about to go into an | |
413 // infinite loop. Something was wrong with the hashtable resizing | |
414 // logic, so stop here. | |
415 NOTREACHED(); | |
416 return null_hash_; | |
417 } | |
418 } | |
419 } | |
420 | |
421 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( | |
422 const std::set<Fingerprint>& fingerprints) { | |
423 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); | |
424 | |
425 // Delete the URLs from the table. | |
426 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); | |
427 i != fingerprints.end(); ++i) | |
428 DeleteFingerprint(*i, !bulk_write); | |
429 | |
430 // These deleted fingerprints may make us shrink the table. | |
431 if (ResizeTableIfNecessary()) | |
432 return; // The resize function wrote the new table to disk for us. | |
433 | |
434 // Nobody wrote this out for us, write the full file to disk. | |
435 if (bulk_write) | |
436 WriteFullTable(); | |
437 } | |
438 | |
439 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, | |
440 bool update_file) { | |
441 if (!hash_table_ || table_length_ == 0) { | |
442 NOTREACHED(); // Not initialized. | |
443 return false; | |
444 } | |
445 if (!IsVisited(fingerprint)) | |
446 return false; // Not in the database to delete. | |
447 | |
448 // First update the header used count. | |
449 used_items_--; | |
450 if (update_file) | |
451 WriteUsedItemCountToFile(); | |
452 | |
453 Hash deleted_hash = HashFingerprint(fingerprint); | |
454 | |
455 // Find the range of "stuff" in the hash table that is adjacent to this | |
456 // fingerprint. These are things that could be affected by the change in | |
457 // the hash table. Since we use linear probing, anything after the deleted | |
458 // item up until an empty item could be affected. | |
459 Hash end_range = deleted_hash; | |
460 while (true) { | |
461 Hash next_hash = IncrementHash(end_range); | |
462 if (next_hash == deleted_hash) | |
463 break; // We wrapped around and the whole table is full. | |
464 if (!hash_table_[next_hash]) | |
465 break; // Found the last spot. | |
466 end_range = next_hash; | |
467 } | |
468 | |
469 // We could get all fancy and move the affected fingerprints around, but | |
470 // instead we just remove them all and re-add them (minus our deleted one). | |
471 // This will mean there's a small window of time where the affected links | |
472 // won't be marked visited. | |
473 StackVector<Fingerprint, 32> shuffled_fingerprints; | |
474 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. | |
475 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { | |
476 if (hash_table_[i] != fingerprint) { | |
477 // Don't save the one we're deleting! | |
478 shuffled_fingerprints->push_back(hash_table_[i]); | |
479 | |
480 // This will balance the increment of this value in AddFingerprint below | |
481 // so there is no net change. | |
482 used_items_--; | |
483 } | |
484 hash_table_[i] = null_fingerprint_; | |
485 } | |
486 | |
487 if (!shuffled_fingerprints->empty()) { | |
488 // Need to add the new items back. | |
489 for (size_t i = 0; i < shuffled_fingerprints->size(); i++) | |
490 AddFingerprint(shuffled_fingerprints[i], false); | |
491 } | |
492 | |
493 // Write the affected range to disk [deleted_hash, end_range]. | |
494 if (update_file) | |
495 WriteHashRangeToFile(deleted_hash, end_range); | |
496 | |
497 return true; | |
498 } | |
499 | |
500 bool VisitedLinkMaster::WriteFullTable() { | |
501 // This function can get called when the file is open, for example, when we | |
502 // resize the table. We must handle this case and not try to reopen the file, | |
503 // since there may be write operations pending on the file I/O thread. | |
504 // | |
505 // Note that once we start writing, we do not delete on error. This means | |
506 // there can be a partial file, but the short file will be detected next time | |
507 // we start, and will be replaced. | |
508 // | |
509 // This might possibly get corrupted if we crash in the middle of writing. | |
510 // We should pick up the most common types of these failures when we notice | |
511 // that the file size is different when we load it back in, and then we will | |
512 // regenerate the table. | |
513 if (!file_) { | |
514 FilePath filename; | |
515 GetDatabaseFileName(&filename); | |
516 file_ = OpenFile(filename, "wb+"); | |
517 if (!file_) { | |
518 DLOG(ERROR) << "Failed to open file " << filename.value(); | |
519 return false; | |
520 } | |
521 } | |
522 | |
523 // Write the new header. | |
524 int32 header[4]; | |
525 header[0] = kFileSignature; | |
526 header[1] = kFileCurrentVersion; | |
527 header[2] = table_length_; | |
528 header[3] = used_items_; | |
529 WriteToFile(file_, 0, header, sizeof(header)); | |
530 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); | |
531 | |
532 // Write the hash data. | |
533 WriteToFile(file_, kFileHeaderSize, | |
534 hash_table_, table_length_ * sizeof(Fingerprint)); | |
535 | |
536 // The hash table may have shrunk, so make sure this is the end. | |
537 BrowserThread::PostTask( | |
538 BrowserThread::FILE, FROM_HERE, new AsyncSetEndOfFile(file_)); | |
539 return true; | |
540 } | |
541 | |
542 bool VisitedLinkMaster::InitFromFile() { | |
543 DCHECK(file_ == NULL); | |
544 | |
545 FilePath filename; | |
546 GetDatabaseFileName(&filename); | |
547 ScopedFILE file_closer(OpenFile(filename, "rb+")); | |
548 if (!file_closer.get()) | |
549 return false; | |
550 | |
551 int32 num_entries, used_count; | |
552 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) | |
553 return false; // Header isn't valid. | |
554 | |
555 // Allocate and read the table. | |
556 if (!CreateURLTable(num_entries, false)) | |
557 return false; | |
558 if (!ReadFromFile(file_closer.get(), kFileHeaderSize, | |
559 hash_table_, num_entries * sizeof(Fingerprint))) { | |
560 FreeURLTable(); | |
561 return false; | |
562 } | |
563 used_items_ = used_count; | |
564 | |
565 #ifndef NDEBUG | |
566 DebugValidate(); | |
567 #endif | |
568 | |
569 file_ = file_closer.release(); | |
570 return true; | |
571 } | |
572 | |
573 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { | |
574 int32 table_size = kDefaultTableSize; | |
575 if (table_size_override_) | |
576 table_size = table_size_override_; | |
577 | |
578 // The salt must be generated before the table so that it can be copied to | |
579 // the shared memory. | |
580 GenerateSalt(salt_); | |
581 if (!CreateURLTable(table_size, true)) | |
582 return false; | |
583 | |
584 #ifndef NDEBUG | |
585 DebugValidate(); | |
586 #endif | |
587 | |
588 if (suppress_rebuild) { | |
589 // When we disallow rebuilds (normally just unit tests), just use the | |
590 // current empty table. | |
591 return WriteFullTable(); | |
592 } | |
593 | |
594 // This will build the table from history. On the first run, history will | |
595 // be empty, so this will be correct. This will also write the new table | |
596 // to disk. We don't want to save explicitly here, since the rebuild may | |
597 // not complete, leaving us with an empty but valid visited link database. | |
598 // In the future, we won't know we need to try rebuilding again. | |
599 return RebuildTableFromHistory(); | |
600 } | |
601 | |
602 bool VisitedLinkMaster::ReadFileHeader(FILE* file, | |
603 int32* num_entries, | |
604 int32* used_count, | |
605 uint8 salt[LINK_SALT_LENGTH]) { | |
606 // Get file size. | |
607 // Note that there is no need to seek back to the original location in the | |
608 // file since ReadFromFile() [which is the next call accessing the file] | |
609 // seeks before reading. | |
610 if (fseek(file, 0, SEEK_END) == -1) | |
611 return false; | |
612 size_t file_size = ftell(file); | |
613 | |
614 if (file_size <= kFileHeaderSize) | |
615 return false; | |
616 | |
617 uint8 header[kFileHeaderSize]; | |
618 if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) | |
619 return false; | |
620 | |
621 // Verify the signature. | |
622 int32 signature; | |
623 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); | |
624 if (signature != kFileSignature) | |
625 return false; | |
626 | |
627 // Verify the version is up-to-date. As with other read errors, a version | |
628 // mistmatch will trigger a rebuild of the database from history, which will | |
629 // have the effect of migrating the database. | |
630 int32 version; | |
631 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); | |
632 if (version != kFileCurrentVersion) | |
633 return false; // Bad version. | |
634 | |
635 // Read the table size and make sure it matches the file size. | |
636 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); | |
637 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) | |
638 return false; // Bad size. | |
639 | |
640 // Read the used item count. | |
641 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); | |
642 if (*used_count > *num_entries) | |
643 return false; // Bad used item count; | |
644 | |
645 // Read the salt. | |
646 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); | |
647 | |
648 // This file looks OK from the header's perspective. | |
649 return true; | |
650 } | |
651 | |
652 bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) { | |
653 if (!database_name_override_.empty()) { | |
654 // use this filename, the directory must exist | |
655 *filename = database_name_override_; | |
656 return true; | |
657 } | |
658 | |
659 if (!profile_ || profile_->GetPath().empty()) | |
660 return false; | |
661 | |
662 FilePath profile_dir = profile_->GetPath(); | |
663 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); | |
664 return true; | |
665 } | |
666 | |
667 // Initializes the shared memory structure. The salt should already be filled | |
668 // in so that it can be written to the shared memory | |
669 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { | |
670 // The table is the size of the table followed by the entries. | |
671 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); | |
672 | |
673 // Create the shared memory object. | |
674 shared_memory_ = new base::SharedMemory(); | |
675 if (!shared_memory_) | |
676 return false; | |
677 | |
678 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { | |
679 delete shared_memory_; | |
680 shared_memory_ = NULL; | |
681 return false; | |
682 } | |
683 | |
684 if (init_to_empty) { | |
685 memset(shared_memory_->memory(), 0, alloc_size); | |
686 used_items_ = 0; | |
687 } | |
688 table_length_ = num_entries; | |
689 | |
690 // Save the header for other processes to read. | |
691 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); | |
692 header->length = table_length_; | |
693 memcpy(header->salt, salt_, LINK_SALT_LENGTH); | |
694 | |
695 // Our table pointer is just the data immediately following the size. | |
696 hash_table_ = reinterpret_cast<Fingerprint*>( | |
697 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); | |
698 | |
699 return true; | |
700 } | |
701 | |
702 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { | |
703 base::SharedMemory *old_shared_memory = shared_memory_; | |
704 Fingerprint* old_hash_table = hash_table_; | |
705 int32 old_table_length = table_length_; | |
706 if (!CreateURLTable(num_entries, true)) { | |
707 // Try to put back the old state. | |
708 shared_memory_ = old_shared_memory; | |
709 hash_table_ = old_hash_table; | |
710 table_length_ = old_table_length; | |
711 return false; | |
712 } | |
713 | |
714 #ifndef NDEBUG | |
715 DebugValidate(); | |
716 #endif | |
717 | |
718 return true; | |
719 } | |
720 | |
721 void VisitedLinkMaster::FreeURLTable() { | |
722 if (shared_memory_) { | |
723 delete shared_memory_; | |
724 shared_memory_ = NULL; | |
725 } | |
726 if (!file_) | |
727 return; | |
728 | |
729 BrowserThread::PostTask( | |
730 BrowserThread::FILE, FROM_HERE, new AsyncCloseHandle(file_)); | |
731 } | |
732 | |
733 bool VisitedLinkMaster::ResizeTableIfNecessary() { | |
734 DCHECK(table_length_ > 0) << "Must have a table"; | |
735 | |
736 // Load limits for good performance/space. We are pretty conservative about | |
737 // keeping the table not very full. This is because we use linear probing | |
738 // which increases the likelihood of clumps of entries which will reduce | |
739 // performance. | |
740 const float max_table_load = 0.5f; // Grow when we're > this full. | |
741 const float min_table_load = 0.2f; // Shrink when we're < this full. | |
742 | |
743 float load = ComputeTableLoad(); | |
744 if (load < max_table_load && | |
745 (table_length_ <= static_cast<float>(kDefaultTableSize) || | |
746 load > min_table_load)) | |
747 return false; | |
748 | |
749 // Table needs to grow or shrink. | |
750 int new_size = NewTableSizeForCount(used_items_); | |
751 DCHECK(new_size > used_items_); | |
752 DCHECK(load <= min_table_load || new_size > table_length_); | |
753 ResizeTable(new_size); | |
754 return true; | |
755 } | |
756 | |
757 void VisitedLinkMaster::ResizeTable(int32 new_size) { | |
758 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); | |
759 shared_memory_serial_++; | |
760 | |
761 #ifndef NDEBUG | |
762 DebugValidate(); | |
763 #endif | |
764 | |
765 base::SharedMemory* old_shared_memory = shared_memory_; | |
766 Fingerprint* old_hash_table = hash_table_; | |
767 int32 old_table_length = table_length_; | |
768 if (!BeginReplaceURLTable(new_size)) | |
769 return; | |
770 | |
771 // Now we have two tables, our local copy which is the old one, and the new | |
772 // one loaded into this object where we need to copy the data. | |
773 for (int32 i = 0; i < old_table_length; i++) { | |
774 Fingerprint cur = old_hash_table[i]; | |
775 if (cur) | |
776 AddFingerprint(cur, false); | |
777 } | |
778 | |
779 // On error unmapping, just forget about it since we can't do anything | |
780 // else to release it. | |
781 delete old_shared_memory; | |
782 | |
783 // Send an update notification to all child processes so they read the new | |
784 // table. | |
785 listener_->NewTable(shared_memory_); | |
786 | |
787 #ifndef NDEBUG | |
788 DebugValidate(); | |
789 #endif | |
790 | |
791 // The new table needs to be written to disk. | |
792 WriteFullTable(); | |
793 } | |
794 | |
795 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { | |
796 // These table sizes are selected to be the maximum prime number less than | |
797 // a "convenient" multiple of 1K. | |
798 static const int table_sizes[] = { | |
799 16381, // 16K = 16384 <- don't shrink below this table size | |
800 // (should be == default_table_size) | |
801 32767, // 32K = 32768 | |
802 65521, // 64K = 65536 | |
803 130051, // 128K = 131072 | |
804 262127, // 256K = 262144 | |
805 524269, // 512K = 524288 | |
806 1048549, // 1M = 1048576 | |
807 2097143, // 2M = 2097152 | |
808 4194301, // 4M = 4194304 | |
809 8388571, // 8M = 8388608 | |
810 16777199, // 16M = 16777216 | |
811 33554347}; // 32M = 33554432 | |
812 | |
813 // Try to leave the table 33% full. | |
814 int desired = item_count * 3; | |
815 | |
816 // Find the closest prime. | |
817 for (size_t i = 0; i < arraysize(table_sizes); i ++) { | |
818 if (table_sizes[i] > desired) | |
819 return table_sizes[i]; | |
820 } | |
821 | |
822 // Growing very big, just approximate a "good" number, not growing as much | |
823 // as normal. | |
824 return item_count * 2 - 1; | |
825 } | |
826 | |
827 // See the TableBuilder definition in the header file for how this works. | |
828 bool VisitedLinkMaster::RebuildTableFromHistory() { | |
829 DCHECK(!table_builder_); | |
830 if (table_builder_) | |
831 return false; | |
832 | |
833 HistoryService* history_service = history_service_override_; | |
834 if (!history_service && profile_) { | |
835 history_service = profile_->GetHistoryService(Profile::EXPLICIT_ACCESS); | |
836 } | |
837 | |
838 if (!history_service) { | |
839 DLOG(WARNING) << "Attempted to rebuild visited link table, but couldn't " | |
840 "obtain a HistoryService."; | |
841 return false; | |
842 } | |
843 | |
844 // TODO(brettw) make sure we have reasonable salt! | |
845 table_builder_ = new TableBuilder(this, salt_); | |
846 | |
847 // Make sure the table builder stays live during the call, even if the | |
848 // master is deleted. This is balanced in TableBuilder::OnCompleteMainThread. | |
849 table_builder_->AddRef(); | |
850 history_service->IterateURLs(table_builder_); | |
851 return true; | |
852 } | |
853 | |
854 // See the TableBuilder declaration above for how this works. | |
855 void VisitedLinkMaster::OnTableRebuildComplete( | |
856 bool success, | |
857 const std::vector<Fingerprint>& fingerprints) { | |
858 if (success) { | |
859 // Replace the old table with a new blank one. | |
860 shared_memory_serial_++; | |
861 | |
862 // We are responsible for freeing it AFTER it has been replaced if | |
863 // replacement succeeds. | |
864 base::SharedMemory* old_shared_memory = shared_memory_; | |
865 | |
866 int new_table_size = NewTableSizeForCount( | |
867 static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); | |
868 if (BeginReplaceURLTable(new_table_size)) { | |
869 // Free the old table. | |
870 delete old_shared_memory; | |
871 | |
872 // Add the stored fingerprints to the hash table. | |
873 for (size_t i = 0; i < fingerprints.size(); i++) | |
874 AddFingerprint(fingerprints[i], false); | |
875 | |
876 // Also add anything that was added while we were asynchronously | |
877 // generating the new table. | |
878 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); | |
879 i != added_since_rebuild_.end(); ++i) | |
880 AddFingerprint(*i, false); | |
881 added_since_rebuild_.clear(); | |
882 | |
883 // Now handle deletions. | |
884 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); | |
885 deleted_since_rebuild_.clear(); | |
886 | |
887 // Send an update notification to all child processes. | |
888 listener_->NewTable(shared_memory_); | |
889 | |
890 // We shouldn't be writing the table from the main thread! | |
891 // http://code.google.com/p/chromium/issues/detail?id=24163 | |
892 base::ThreadRestrictions::ScopedAllowIO allow_io; | |
893 WriteFullTable(); | |
894 } | |
895 } | |
896 table_builder_ = NULL; // Will release our reference to the builder. | |
897 | |
898 // Notify the unit test that the rebuild is complete (will be NULL in prod.) | |
899 if (rebuild_complete_task_.get()) { | |
900 rebuild_complete_task_->Run(); | |
901 rebuild_complete_task_.reset(NULL); | |
902 } | |
903 } | |
904 | |
905 void VisitedLinkMaster::WriteToFile(FILE* file, | |
906 off_t offset, | |
907 void* data, | |
908 int32 data_size) { | |
909 #ifndef NDEBUG | |
910 posted_asynchronous_operation_ = true; | |
911 #endif | |
912 | |
913 BrowserThread::PostTask( | |
914 BrowserThread::FILE, FROM_HERE, | |
915 new AsyncWriter(file, offset, data, data_size)); | |
916 } | |
917 | |
918 void VisitedLinkMaster::WriteUsedItemCountToFile() { | |
919 if (!file_) | |
920 return; // See comment on the file_ variable for why this might happen. | |
921 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); | |
922 } | |
923 | |
924 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { | |
925 if (!file_) | |
926 return; // See comment on the file_ variable for why this might happen. | |
927 if (last_hash < first_hash) { | |
928 // Handle wraparound at 0. This first write is first_hash->EOF | |
929 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, | |
930 &hash_table_[first_hash], | |
931 (table_length_ - first_hash + 1) * sizeof(Fingerprint)); | |
932 | |
933 // Now do 0->last_lash. | |
934 WriteToFile(file_, kFileHeaderSize, hash_table_, | |
935 (last_hash + 1) * sizeof(Fingerprint)); | |
936 } else { | |
937 // Normal case, just write the range. | |
938 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, | |
939 &hash_table_[first_hash], | |
940 (last_hash - first_hash + 1) * sizeof(Fingerprint)); | |
941 } | |
942 } | |
943 | |
944 bool VisitedLinkMaster::ReadFromFile(FILE* file, | |
945 off_t offset, | |
946 void* data, | |
947 size_t data_size) { | |
948 #ifndef NDEBUG | |
949 // Since this function is synchronous, we require that no asynchronous | |
950 // operations could possibly be pending. | |
951 DCHECK(!posted_asynchronous_operation_); | |
952 #endif | |
953 | |
954 fseek(file, offset, SEEK_SET); | |
955 | |
956 size_t num_read = fread(data, 1, data_size, file); | |
957 return num_read == data_size; | |
958 } | |
959 | |
960 // VisitedLinkTableBuilder ---------------------------------------------------- | |
961 | |
962 VisitedLinkMaster::TableBuilder::TableBuilder( | |
963 VisitedLinkMaster* master, | |
964 const uint8 salt[LINK_SALT_LENGTH]) | |
965 : master_(master), | |
966 success_(true) { | |
967 fingerprints_.reserve(4096); | |
968 memcpy(salt_, salt, sizeof(salt)); | |
969 } | |
970 | |
971 // TODO(brettw): Do we want to try to cancel the request if this happens? It | |
972 // could delay shutdown if there are a lot of URLs. | |
973 void VisitedLinkMaster::TableBuilder::DisownMaster() { | |
974 master_ = NULL; | |
975 } | |
976 | |
977 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { | |
978 if (!url.is_empty()) { | |
979 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( | |
980 url.spec().data(), url.spec().length(), salt_)); | |
981 } | |
982 } | |
983 | |
984 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { | |
985 success_ = success; | |
986 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; | |
987 | |
988 // Marshal to the main thread to notify the VisitedLinkMaster that the | |
989 // rebuild is complete. | |
990 BrowserThread::PostTask( | |
991 BrowserThread::UI, FROM_HERE, | |
992 NewRunnableMethod(this, &TableBuilder::OnCompleteMainThread)); | |
993 } | |
994 | |
995 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { | |
996 if (master_) | |
997 master_->OnTableRebuildComplete(success_, fingerprints_); | |
998 | |
999 // WILL (generally) DELETE THIS! This balances the AddRef in | |
1000 // VisitedLinkMaster::RebuildTableFromHistory. | |
1001 Release(); | |
1002 } | |
OLD | NEW |