OLD | NEW |
| (Empty) |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/visitedlink/visitedlink_master.h" | |
6 | |
7 #if defined(OS_WIN) | |
8 #include <windows.h> | |
9 #include <io.h> | |
10 #include <shlobj.h> | |
11 #endif // defined(OS_WIN) | |
12 #include <stdio.h> | |
13 | |
14 #include <algorithm> | |
15 | |
16 #include "base/bind.h" | |
17 #include "base/bind_helpers.h" | |
18 #include "base/containers/stack_container.h" | |
19 #include "base/file_util.h" | |
20 #include "base/logging.h" | |
21 #include "base/message_loop.h" | |
22 #include "base/path_service.h" | |
23 #include "base/process_util.h" | |
24 #include "base/rand_util.h" | |
25 #include "base/string_util.h" | |
26 #include "base/threading/thread_restrictions.h" | |
27 #include "chrome/browser/visitedlink/visitedlink_delegate.h" | |
28 #include "chrome/browser/visitedlink/visitedlink_event_listener.h" | |
29 #include "content/public/browser/browser_context.h" | |
30 #include "content/public/browser/browser_thread.h" | |
31 #include "googleurl/src/gurl.h" | |
32 | |
33 using content::BrowserThread; | |
34 using file_util::ScopedFILE; | |
35 using file_util::OpenFile; | |
36 using file_util::TruncateFile; | |
37 | |
38 const int32 VisitedLinkMaster::kFileHeaderSignatureOffset = 0; | |
39 const int32 VisitedLinkMaster::kFileHeaderVersionOffset = 4; | |
40 const int32 VisitedLinkMaster::kFileHeaderLengthOffset = 8; | |
41 const int32 VisitedLinkMaster::kFileHeaderUsedOffset = 12; | |
42 const int32 VisitedLinkMaster::kFileHeaderSaltOffset = 16; | |
43 | |
44 const int32 VisitedLinkMaster::kFileCurrentVersion = 3; | |
45 | |
46 // the signature at the beginning of the URL table = "VLnk" (visited links) | |
47 const int32 VisitedLinkMaster::kFileSignature = 0x6b6e4c56; | |
48 const size_t VisitedLinkMaster::kFileHeaderSize = | |
49 kFileHeaderSaltOffset + LINK_SALT_LENGTH; | |
50 | |
51 // This value should also be the same as the smallest size in the lookup | |
52 // table in NewTableSizeForCount (prime number). | |
53 const unsigned VisitedLinkMaster::kDefaultTableSize = 16381; | |
54 | |
55 const size_t VisitedLinkMaster::kBigDeleteThreshold = 64; | |
56 | |
57 namespace { | |
58 | |
59 // Fills the given salt structure with some quasi-random values | |
60 // It is not necessary to generate a cryptographically strong random string, | |
61 // only that it be reasonably different for different users. | |
62 void GenerateSalt(uint8 salt[LINK_SALT_LENGTH]) { | |
63 DCHECK_EQ(LINK_SALT_LENGTH, 8) << "This code assumes the length of the salt"; | |
64 uint64 randval = base::RandUint64(); | |
65 memcpy(salt, &randval, 8); | |
66 } | |
67 | |
68 // Opens file on a background thread to not block UI thread. | |
69 void AsyncOpen(FILE** file, const FilePath& filename) { | |
70 *file = OpenFile(filename, "wb+"); | |
71 DLOG_IF(ERROR, !(*file)) << "Failed to open file " << filename.value(); | |
72 } | |
73 | |
74 // Returns true if the write was complete. | |
75 static bool WriteToFile(FILE* file, | |
76 off_t offset, | |
77 const void* data, | |
78 size_t data_len) { | |
79 if (fseek(file, offset, SEEK_SET) != 0) | |
80 return false; // Don't write to an invalid part of the file. | |
81 | |
82 size_t num_written = fwrite(data, 1, data_len, file); | |
83 | |
84 // The write may not make it to the kernel (stdlib may buffer the write) | |
85 // until the next fseek/fclose call. If we crash, it's easy for our used | |
86 // item count to be out of sync with the number of hashes we write. | |
87 // Protect against this by calling fflush. | |
88 int ret = fflush(file); | |
89 DCHECK_EQ(0, ret); | |
90 return num_written == data_len; | |
91 } | |
92 | |
93 // This task executes on a background thread and executes a write. This | |
94 // prevents us from blocking the UI thread doing I/O. Double pointer to FILE | |
95 // is used because file may still not be opened by the time of scheduling | |
96 // the task for execution. | |
97 void AsyncWrite(FILE** file, int32 offset, const std::string& data) { | |
98 if (*file) | |
99 WriteToFile(*file, offset, data.data(), data.size()); | |
100 } | |
101 | |
102 // Truncates the file to the current position asynchronously on a background | |
103 // thread. Double pointer to FILE is used because file may still not be opened | |
104 // by the time of scheduling the task for execution. | |
105 void AsyncTruncate(FILE** file) { | |
106 if (*file) | |
107 base::IgnoreResult(TruncateFile(*file)); | |
108 } | |
109 | |
110 // Closes the file on a background thread and releases memory used for storage | |
111 // of FILE* value. Double pointer to FILE is used because file may still not | |
112 // be opened by the time of scheduling the task for execution. | |
113 void AsyncClose(FILE** file) { | |
114 if (*file) | |
115 base::IgnoreResult(fclose(*file)); | |
116 free(file); | |
117 } | |
118 | |
119 } // namespace | |
120 | |
121 // TableBuilder --------------------------------------------------------------- | |
122 | |
123 // How rebuilding from history works | |
124 // --------------------------------- | |
125 // | |
126 // We mark that we're rebuilding from history by setting the table_builder_ | |
127 // member in VisitedLinkMaster to the TableBuilder we create. This builder | |
128 // will be called on the history thread by the history system for every URL | |
129 // in the database. | |
130 // | |
131 // The builder will store the fingerprints for those URLs, and then marshalls | |
132 // back to the main thread where the VisitedLinkMaster will be notified. The | |
133 // master then replaces its table with a new table containing the computed | |
134 // fingerprints. | |
135 // | |
136 // The builder must remain active while the history system is using it. | |
137 // Sometimes, the master will be deleted before the rebuild is complete, in | |
138 // which case it notifies the builder via DisownMaster(). The builder will | |
139 // delete itself once rebuilding is complete, and not execute any callback. | |
140 class VisitedLinkMaster::TableBuilder | |
141 : public VisitedLinkDelegate::URLEnumerator { | |
142 public: | |
143 TableBuilder(VisitedLinkMaster* master, | |
144 const uint8 salt[LINK_SALT_LENGTH]); | |
145 | |
146 // Called on the main thread when the master is being destroyed. This will | |
147 // prevent a crash when the query completes and the master is no longer | |
148 // around. We can not actually do anything but mark this fact, since the | |
149 // table will be being rebuilt simultaneously on the other thread. | |
150 void DisownMaster(); | |
151 | |
152 // VisitedLinkDelegate::URLEnumerator | |
153 virtual void OnURL(const GURL& url); | |
154 virtual void OnComplete(bool succeed); | |
155 | |
156 private: | |
157 virtual ~TableBuilder() {} | |
158 | |
159 // OnComplete mashals to this function on the main thread to do the | |
160 // notification. | |
161 void OnCompleteMainThread(); | |
162 | |
163 // Owner of this object. MAY ONLY BE ACCESSED ON THE MAIN THREAD! | |
164 VisitedLinkMaster* master_; | |
165 | |
166 // Indicates whether the operation has failed or not. | |
167 bool success_; | |
168 | |
169 // Salt for this new table. | |
170 uint8 salt_[LINK_SALT_LENGTH]; | |
171 | |
172 // Stores the fingerprints we computed on the background thread. | |
173 VisitedLinkCommon::Fingerprints fingerprints_; | |
174 | |
175 DISALLOW_COPY_AND_ASSIGN(TableBuilder); | |
176 }; | |
177 | |
178 // VisitedLinkMaster ---------------------------------------------------------- | |
179 | |
180 VisitedLinkMaster::VisitedLinkMaster(content::BrowserContext* browser_context, | |
181 VisitedLinkDelegate* delegate) | |
182 : browser_context_(browser_context), | |
183 delegate_(delegate), | |
184 listener_(new VisitedLinkEventListener( | |
185 ALLOW_THIS_IN_INITIALIZER_LIST(this), browser_context)) { | |
186 InitMembers(); | |
187 } | |
188 | |
189 VisitedLinkMaster::VisitedLinkMaster(Listener* listener, | |
190 VisitedLinkDelegate* delegate, | |
191 bool suppress_rebuild, | |
192 const FilePath& filename, | |
193 int32 default_table_size) | |
194 : browser_context_(NULL), | |
195 delegate_(delegate) { | |
196 listener_.reset(listener); | |
197 DCHECK(listener_.get()); | |
198 InitMembers(); | |
199 | |
200 database_name_override_ = filename; | |
201 table_size_override_ = default_table_size; | |
202 suppress_rebuild_ = suppress_rebuild; | |
203 } | |
204 | |
205 VisitedLinkMaster::~VisitedLinkMaster() { | |
206 if (table_builder_.get()) { | |
207 // Prevent the table builder from calling us back now that we're being | |
208 // destroyed. Note that we DON'T delete the object, since the history | |
209 // system is still writing into it. When that is complete, the table | |
210 // builder will destroy itself when it finds we are gone. | |
211 table_builder_->DisownMaster(); | |
212 } | |
213 FreeURLTable(); | |
214 // FreeURLTable() will schedule closing of the file and deletion of |file_|. | |
215 // So nothing should be done here. | |
216 } | |
217 | |
218 void VisitedLinkMaster::InitMembers() { | |
219 file_ = NULL; | |
220 shared_memory_ = NULL; | |
221 shared_memory_serial_ = 0; | |
222 used_items_ = 0; | |
223 table_size_override_ = 0; | |
224 suppress_rebuild_ = false; | |
225 sequence_token_ = BrowserThread::GetBlockingPool()->GetSequenceToken(); | |
226 | |
227 #ifndef NDEBUG | |
228 posted_asynchronous_operation_ = false; | |
229 #endif | |
230 } | |
231 | |
232 bool VisitedLinkMaster::Init() { | |
233 // We probably shouldn't be loading this from the UI thread, | |
234 // but it does need to happen early on in startup. | |
235 // http://code.google.com/p/chromium/issues/detail?id=24163 | |
236 base::ThreadRestrictions::ScopedAllowIO allow_io; | |
237 if (!InitFromFile()) | |
238 return InitFromScratch(suppress_rebuild_); | |
239 return true; | |
240 } | |
241 | |
242 VisitedLinkMaster::Hash VisitedLinkMaster::TryToAddURL(const GURL& url) { | |
243 // Extra check that we are not incognito. This should not happen. | |
244 // TODO(boliu): Move this check to HistoryService when IsOffTheRecord is | |
245 // removed from BrowserContext. | |
246 if (browser_context_ && browser_context_->IsOffTheRecord()) { | |
247 NOTREACHED(); | |
248 return null_hash_; | |
249 } | |
250 | |
251 if (!url.is_valid()) | |
252 return null_hash_; // Don't add invalid URLs. | |
253 | |
254 Fingerprint fingerprint = ComputeURLFingerprint(url.spec().data(), | |
255 url.spec().size(), | |
256 salt_); | |
257 if (table_builder_) { | |
258 // If we have a pending delete for this fingerprint, cancel it. | |
259 std::set<Fingerprint>::iterator found = | |
260 deleted_since_rebuild_.find(fingerprint); | |
261 if (found != deleted_since_rebuild_.end()) | |
262 deleted_since_rebuild_.erase(found); | |
263 | |
264 // A rebuild is in progress, save this addition in the temporary list so | |
265 // it can be added once rebuild is complete. | |
266 added_since_rebuild_.insert(fingerprint); | |
267 } | |
268 | |
269 // If the table is "full", we don't add URLs and just drop them on the floor. | |
270 // This can happen if we get thousands of new URLs and something causes | |
271 // the table resizing to fail. This check prevents a hang in that case. Note | |
272 // that this is *not* the resize limit, this is just a sanity check. | |
273 if (used_items_ / 8 > table_length_ / 10) | |
274 return null_hash_; // Table is more than 80% full. | |
275 | |
276 return AddFingerprint(fingerprint, true); | |
277 } | |
278 | |
279 void VisitedLinkMaster::PostIOTask(const tracked_objects::Location& from_here, | |
280 const base::Closure& task) { | |
281 BrowserThread::GetBlockingPool()->PostSequencedWorkerTask(sequence_token_, | |
282 from_here, task); | |
283 } | |
284 | |
285 void VisitedLinkMaster::AddURL(const GURL& url) { | |
286 Hash index = TryToAddURL(url); | |
287 if (!table_builder_ && index != null_hash_) { | |
288 // Not rebuilding, so we want to keep the file on disk up-to-date. | |
289 WriteUsedItemCountToFile(); | |
290 WriteHashRangeToFile(index, index); | |
291 ResizeTableIfNecessary(); | |
292 } | |
293 } | |
294 | |
295 void VisitedLinkMaster::AddURLs(const std::vector<GURL>& url) { | |
296 for (std::vector<GURL>::const_iterator i = url.begin(); | |
297 i != url.end(); ++i) { | |
298 Hash index = TryToAddURL(*i); | |
299 if (!table_builder_ && index != null_hash_) | |
300 ResizeTableIfNecessary(); | |
301 } | |
302 | |
303 // Keeps the file on disk up-to-date. | |
304 if (!table_builder_) | |
305 WriteFullTable(); | |
306 } | |
307 | |
308 void VisitedLinkMaster::DeleteAllURLs() { | |
309 // Any pending modifications are invalid. | |
310 added_since_rebuild_.clear(); | |
311 deleted_since_rebuild_.clear(); | |
312 | |
313 // Clear the hash table. | |
314 used_items_ = 0; | |
315 memset(hash_table_, 0, this->table_length_ * sizeof(Fingerprint)); | |
316 | |
317 // Resize it if it is now too empty. Resize may write the new table out for | |
318 // us, otherwise, schedule writing the new table to disk ourselves. | |
319 if (!ResizeTableIfNecessary()) | |
320 WriteFullTable(); | |
321 | |
322 listener_->Reset(); | |
323 } | |
324 | |
325 VisitedLinkDelegate* VisitedLinkMaster::GetDelegate() { | |
326 return delegate_; | |
327 } | |
328 | |
329 void VisitedLinkMaster::DeleteURLs(URLIterator* urls) { | |
330 if (!urls->HasNextURL()) | |
331 return; | |
332 | |
333 listener_->Reset(); | |
334 | |
335 if (table_builder_) { | |
336 // A rebuild is in progress, save this deletion in the temporary list so | |
337 // it can be added once rebuild is complete. | |
338 while (urls->HasNextURL()) { | |
339 const GURL& url(urls->NextURL()); | |
340 if (!url.is_valid()) | |
341 continue; | |
342 | |
343 Fingerprint fingerprint = | |
344 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_); | |
345 deleted_since_rebuild_.insert(fingerprint); | |
346 | |
347 // If the URL was just added and now we're deleting it, it may be in the | |
348 // list of things added since the last rebuild. Delete it from that list. | |
349 std::set<Fingerprint>::iterator found = | |
350 added_since_rebuild_.find(fingerprint); | |
351 if (found != added_since_rebuild_.end()) | |
352 added_since_rebuild_.erase(found); | |
353 | |
354 // Delete the URLs from the in-memory table, but don't bother writing | |
355 // to disk since it will be replaced soon. | |
356 DeleteFingerprint(fingerprint, false); | |
357 } | |
358 return; | |
359 } | |
360 | |
361 // Compute the deleted URLs' fingerprints and delete them | |
362 std::set<Fingerprint> deleted_fingerprints; | |
363 while (urls->HasNextURL()) { | |
364 const GURL& url(urls->NextURL()); | |
365 if (!url.is_valid()) | |
366 continue; | |
367 deleted_fingerprints.insert( | |
368 ComputeURLFingerprint(url.spec().data(), url.spec().size(), salt_)); | |
369 } | |
370 DeleteFingerprintsFromCurrentTable(deleted_fingerprints); | |
371 } | |
372 | |
373 // See VisitedLinkCommon::IsVisited which should be in sync with this algorithm | |
374 VisitedLinkMaster::Hash VisitedLinkMaster::AddFingerprint( | |
375 Fingerprint fingerprint, | |
376 bool send_notifications) { | |
377 if (!hash_table_ || table_length_ == 0) { | |
378 NOTREACHED(); // Not initialized. | |
379 return null_hash_; | |
380 } | |
381 | |
382 Hash cur_hash = HashFingerprint(fingerprint); | |
383 Hash first_hash = cur_hash; | |
384 while (true) { | |
385 Fingerprint cur_fingerprint = FingerprintAt(cur_hash); | |
386 if (cur_fingerprint == fingerprint) | |
387 return null_hash_; // This fingerprint is already in there, do nothing. | |
388 | |
389 if (cur_fingerprint == null_fingerprint_) { | |
390 // End of probe sequence found, insert here. | |
391 hash_table_[cur_hash] = fingerprint; | |
392 used_items_++; | |
393 // If allowed, notify listener that a new visited link was added. | |
394 if (send_notifications) | |
395 listener_->Add(fingerprint); | |
396 return cur_hash; | |
397 } | |
398 | |
399 // Advance in the probe sequence. | |
400 cur_hash = IncrementHash(cur_hash); | |
401 if (cur_hash == first_hash) { | |
402 // This means that we've wrapped around and are about to go into an | |
403 // infinite loop. Something was wrong with the hashtable resizing | |
404 // logic, so stop here. | |
405 NOTREACHED(); | |
406 return null_hash_; | |
407 } | |
408 } | |
409 } | |
410 | |
411 void VisitedLinkMaster::DeleteFingerprintsFromCurrentTable( | |
412 const std::set<Fingerprint>& fingerprints) { | |
413 bool bulk_write = (fingerprints.size() > kBigDeleteThreshold); | |
414 | |
415 // Delete the URLs from the table. | |
416 for (std::set<Fingerprint>::const_iterator i = fingerprints.begin(); | |
417 i != fingerprints.end(); ++i) | |
418 DeleteFingerprint(*i, !bulk_write); | |
419 | |
420 // These deleted fingerprints may make us shrink the table. | |
421 if (ResizeTableIfNecessary()) | |
422 return; // The resize function wrote the new table to disk for us. | |
423 | |
424 // Nobody wrote this out for us, write the full file to disk. | |
425 if (bulk_write) | |
426 WriteFullTable(); | |
427 } | |
428 | |
429 bool VisitedLinkMaster::DeleteFingerprint(Fingerprint fingerprint, | |
430 bool update_file) { | |
431 if (!hash_table_ || table_length_ == 0) { | |
432 NOTREACHED(); // Not initialized. | |
433 return false; | |
434 } | |
435 if (!IsVisited(fingerprint)) | |
436 return false; // Not in the database to delete. | |
437 | |
438 // First update the header used count. | |
439 used_items_--; | |
440 if (update_file) | |
441 WriteUsedItemCountToFile(); | |
442 | |
443 Hash deleted_hash = HashFingerprint(fingerprint); | |
444 | |
445 // Find the range of "stuff" in the hash table that is adjacent to this | |
446 // fingerprint. These are things that could be affected by the change in | |
447 // the hash table. Since we use linear probing, anything after the deleted | |
448 // item up until an empty item could be affected. | |
449 Hash end_range = deleted_hash; | |
450 while (true) { | |
451 Hash next_hash = IncrementHash(end_range); | |
452 if (next_hash == deleted_hash) | |
453 break; // We wrapped around and the whole table is full. | |
454 if (!hash_table_[next_hash]) | |
455 break; // Found the last spot. | |
456 end_range = next_hash; | |
457 } | |
458 | |
459 // We could get all fancy and move the affected fingerprints around, but | |
460 // instead we just remove them all and re-add them (minus our deleted one). | |
461 // This will mean there's a small window of time where the affected links | |
462 // won't be marked visited. | |
463 base::StackVector<Fingerprint, 32> shuffled_fingerprints; | |
464 Hash stop_loop = IncrementHash(end_range); // The end range is inclusive. | |
465 for (Hash i = deleted_hash; i != stop_loop; i = IncrementHash(i)) { | |
466 if (hash_table_[i] != fingerprint) { | |
467 // Don't save the one we're deleting! | |
468 shuffled_fingerprints->push_back(hash_table_[i]); | |
469 | |
470 // This will balance the increment of this value in AddFingerprint below | |
471 // so there is no net change. | |
472 used_items_--; | |
473 } | |
474 hash_table_[i] = null_fingerprint_; | |
475 } | |
476 | |
477 if (!shuffled_fingerprints->empty()) { | |
478 // Need to add the new items back. | |
479 for (size_t i = 0; i < shuffled_fingerprints->size(); i++) | |
480 AddFingerprint(shuffled_fingerprints[i], false); | |
481 } | |
482 | |
483 // Write the affected range to disk [deleted_hash, end_range]. | |
484 if (update_file) | |
485 WriteHashRangeToFile(deleted_hash, end_range); | |
486 | |
487 return true; | |
488 } | |
489 | |
490 void VisitedLinkMaster::WriteFullTable() { | |
491 // This function can get called when the file is open, for example, when we | |
492 // resize the table. We must handle this case and not try to reopen the file, | |
493 // since there may be write operations pending on the file I/O thread. | |
494 // | |
495 // Note that once we start writing, we do not delete on error. This means | |
496 // there can be a partial file, but the short file will be detected next time | |
497 // we start, and will be replaced. | |
498 // | |
499 // This might possibly get corrupted if we crash in the middle of writing. | |
500 // We should pick up the most common types of these failures when we notice | |
501 // that the file size is different when we load it back in, and then we will | |
502 // regenerate the table. | |
503 if (!file_) { | |
504 file_ = static_cast<FILE**>(calloc(1, sizeof(*file_))); | |
505 FilePath filename; | |
506 GetDatabaseFileName(&filename); | |
507 PostIOTask(FROM_HERE, base::Bind(&AsyncOpen, file_, filename)); | |
508 } | |
509 | |
510 // Write the new header. | |
511 int32 header[4]; | |
512 header[0] = kFileSignature; | |
513 header[1] = kFileCurrentVersion; | |
514 header[2] = table_length_; | |
515 header[3] = used_items_; | |
516 WriteToFile(file_, 0, header, sizeof(header)); | |
517 WriteToFile(file_, sizeof(header), salt_, LINK_SALT_LENGTH); | |
518 | |
519 // Write the hash data. | |
520 WriteToFile(file_, kFileHeaderSize, | |
521 hash_table_, table_length_ * sizeof(Fingerprint)); | |
522 | |
523 // The hash table may have shrunk, so make sure this is the end. | |
524 PostIOTask(FROM_HERE, base::Bind(&AsyncTruncate, file_)); | |
525 } | |
526 | |
527 bool VisitedLinkMaster::InitFromFile() { | |
528 DCHECK(file_ == NULL); | |
529 | |
530 FilePath filename; | |
531 GetDatabaseFileName(&filename); | |
532 ScopedFILE file_closer(OpenFile(filename, "rb+")); | |
533 if (!file_closer.get()) | |
534 return false; | |
535 | |
536 int32 num_entries, used_count; | |
537 if (!ReadFileHeader(file_closer.get(), &num_entries, &used_count, salt_)) | |
538 return false; // Header isn't valid. | |
539 | |
540 // Allocate and read the table. | |
541 if (!CreateURLTable(num_entries, false)) | |
542 return false; | |
543 if (!ReadFromFile(file_closer.get(), kFileHeaderSize, | |
544 hash_table_, num_entries * sizeof(Fingerprint))) { | |
545 FreeURLTable(); | |
546 return false; | |
547 } | |
548 used_items_ = used_count; | |
549 | |
550 #ifndef NDEBUG | |
551 DebugValidate(); | |
552 #endif | |
553 | |
554 file_ = static_cast<FILE**>(malloc(sizeof(*file_))); | |
555 *file_ = file_closer.release(); | |
556 return true; | |
557 } | |
558 | |
559 bool VisitedLinkMaster::InitFromScratch(bool suppress_rebuild) { | |
560 int32 table_size = kDefaultTableSize; | |
561 if (table_size_override_) | |
562 table_size = table_size_override_; | |
563 | |
564 // The salt must be generated before the table so that it can be copied to | |
565 // the shared memory. | |
566 GenerateSalt(salt_); | |
567 if (!CreateURLTable(table_size, true)) | |
568 return false; | |
569 | |
570 #ifndef NDEBUG | |
571 DebugValidate(); | |
572 #endif | |
573 | |
574 if (suppress_rebuild) { | |
575 // When we disallow rebuilds (normally just unit tests), just use the | |
576 // current empty table. | |
577 WriteFullTable(); | |
578 return true; | |
579 } | |
580 | |
581 // This will build the table from history. On the first run, history will | |
582 // be empty, so this will be correct. This will also write the new table | |
583 // to disk. We don't want to save explicitly here, since the rebuild may | |
584 // not complete, leaving us with an empty but valid visited link database. | |
585 // In the future, we won't know we need to try rebuilding again. | |
586 return RebuildTableFromDelegate(); | |
587 } | |
588 | |
589 bool VisitedLinkMaster::ReadFileHeader(FILE* file, | |
590 int32* num_entries, | |
591 int32* used_count, | |
592 uint8 salt[LINK_SALT_LENGTH]) { | |
593 // Get file size. | |
594 // Note that there is no need to seek back to the original location in the | |
595 // file since ReadFromFile() [which is the next call accessing the file] | |
596 // seeks before reading. | |
597 if (fseek(file, 0, SEEK_END) == -1) | |
598 return false; | |
599 size_t file_size = ftell(file); | |
600 | |
601 if (file_size <= kFileHeaderSize) | |
602 return false; | |
603 | |
604 uint8 header[kFileHeaderSize]; | |
605 if (!ReadFromFile(file, 0, &header, kFileHeaderSize)) | |
606 return false; | |
607 | |
608 // Verify the signature. | |
609 int32 signature; | |
610 memcpy(&signature, &header[kFileHeaderSignatureOffset], sizeof(signature)); | |
611 if (signature != kFileSignature) | |
612 return false; | |
613 | |
614 // Verify the version is up-to-date. As with other read errors, a version | |
615 // mistmatch will trigger a rebuild of the database from history, which will | |
616 // have the effect of migrating the database. | |
617 int32 version; | |
618 memcpy(&version, &header[kFileHeaderVersionOffset], sizeof(version)); | |
619 if (version != kFileCurrentVersion) | |
620 return false; // Bad version. | |
621 | |
622 // Read the table size and make sure it matches the file size. | |
623 memcpy(num_entries, &header[kFileHeaderLengthOffset], sizeof(*num_entries)); | |
624 if (*num_entries * sizeof(Fingerprint) + kFileHeaderSize != file_size) | |
625 return false; // Bad size. | |
626 | |
627 // Read the used item count. | |
628 memcpy(used_count, &header[kFileHeaderUsedOffset], sizeof(*used_count)); | |
629 if (*used_count > *num_entries) | |
630 return false; // Bad used item count; | |
631 | |
632 // Read the salt. | |
633 memcpy(salt, &header[kFileHeaderSaltOffset], LINK_SALT_LENGTH); | |
634 | |
635 // This file looks OK from the header's perspective. | |
636 return true; | |
637 } | |
638 | |
639 bool VisitedLinkMaster::GetDatabaseFileName(FilePath* filename) { | |
640 if (!database_name_override_.empty()) { | |
641 // use this filename, the directory must exist | |
642 *filename = database_name_override_; | |
643 return true; | |
644 } | |
645 | |
646 if (!browser_context_ || browser_context_->GetPath().empty()) | |
647 return false; | |
648 | |
649 FilePath profile_dir = browser_context_->GetPath(); | |
650 *filename = profile_dir.Append(FILE_PATH_LITERAL("Visited Links")); | |
651 return true; | |
652 } | |
653 | |
654 // Initializes the shared memory structure. The salt should already be filled | |
655 // in so that it can be written to the shared memory | |
656 bool VisitedLinkMaster::CreateURLTable(int32 num_entries, bool init_to_empty) { | |
657 // The table is the size of the table followed by the entries. | |
658 uint32 alloc_size = num_entries * sizeof(Fingerprint) + sizeof(SharedHeader); | |
659 | |
660 // Create the shared memory object. | |
661 shared_memory_ = new base::SharedMemory(); | |
662 if (!shared_memory_) | |
663 return false; | |
664 | |
665 if (!shared_memory_->CreateAndMapAnonymous(alloc_size)) { | |
666 delete shared_memory_; | |
667 shared_memory_ = NULL; | |
668 return false; | |
669 } | |
670 | |
671 if (init_to_empty) { | |
672 memset(shared_memory_->memory(), 0, alloc_size); | |
673 used_items_ = 0; | |
674 } | |
675 table_length_ = num_entries; | |
676 | |
677 // Save the header for other processes to read. | |
678 SharedHeader* header = static_cast<SharedHeader*>(shared_memory_->memory()); | |
679 header->length = table_length_; | |
680 memcpy(header->salt, salt_, LINK_SALT_LENGTH); | |
681 | |
682 // Our table pointer is just the data immediately following the size. | |
683 hash_table_ = reinterpret_cast<Fingerprint*>( | |
684 static_cast<char*>(shared_memory_->memory()) + sizeof(SharedHeader)); | |
685 | |
686 return true; | |
687 } | |
688 | |
689 bool VisitedLinkMaster::BeginReplaceURLTable(int32 num_entries) { | |
690 base::SharedMemory *old_shared_memory = shared_memory_; | |
691 Fingerprint* old_hash_table = hash_table_; | |
692 int32 old_table_length = table_length_; | |
693 if (!CreateURLTable(num_entries, true)) { | |
694 // Try to put back the old state. | |
695 shared_memory_ = old_shared_memory; | |
696 hash_table_ = old_hash_table; | |
697 table_length_ = old_table_length; | |
698 return false; | |
699 } | |
700 | |
701 #ifndef NDEBUG | |
702 DebugValidate(); | |
703 #endif | |
704 | |
705 return true; | |
706 } | |
707 | |
708 void VisitedLinkMaster::FreeURLTable() { | |
709 if (shared_memory_) { | |
710 delete shared_memory_; | |
711 shared_memory_ = NULL; | |
712 } | |
713 if (!file_) | |
714 return; | |
715 PostIOTask(FROM_HERE, base::Bind(&AsyncClose, file_)); | |
716 // AsyncClose() will close the file and free the memory pointed by |file_|. | |
717 file_ = NULL; | |
718 } | |
719 | |
720 bool VisitedLinkMaster::ResizeTableIfNecessary() { | |
721 DCHECK(table_length_ > 0) << "Must have a table"; | |
722 | |
723 // Load limits for good performance/space. We are pretty conservative about | |
724 // keeping the table not very full. This is because we use linear probing | |
725 // which increases the likelihood of clumps of entries which will reduce | |
726 // performance. | |
727 const float max_table_load = 0.5f; // Grow when we're > this full. | |
728 const float min_table_load = 0.2f; // Shrink when we're < this full. | |
729 | |
730 float load = ComputeTableLoad(); | |
731 if (load < max_table_load && | |
732 (table_length_ <= static_cast<float>(kDefaultTableSize) || | |
733 load > min_table_load)) | |
734 return false; | |
735 | |
736 // Table needs to grow or shrink. | |
737 int new_size = NewTableSizeForCount(used_items_); | |
738 DCHECK(new_size > used_items_); | |
739 DCHECK(load <= min_table_load || new_size > table_length_); | |
740 ResizeTable(new_size); | |
741 return true; | |
742 } | |
743 | |
744 void VisitedLinkMaster::ResizeTable(int32 new_size) { | |
745 DCHECK(shared_memory_ && shared_memory_->memory() && hash_table_); | |
746 shared_memory_serial_++; | |
747 | |
748 #ifndef NDEBUG | |
749 DebugValidate(); | |
750 #endif | |
751 | |
752 base::SharedMemory* old_shared_memory = shared_memory_; | |
753 Fingerprint* old_hash_table = hash_table_; | |
754 int32 old_table_length = table_length_; | |
755 if (!BeginReplaceURLTable(new_size)) | |
756 return; | |
757 | |
758 // Now we have two tables, our local copy which is the old one, and the new | |
759 // one loaded into this object where we need to copy the data. | |
760 for (int32 i = 0; i < old_table_length; i++) { | |
761 Fingerprint cur = old_hash_table[i]; | |
762 if (cur) | |
763 AddFingerprint(cur, false); | |
764 } | |
765 | |
766 // On error unmapping, just forget about it since we can't do anything | |
767 // else to release it. | |
768 delete old_shared_memory; | |
769 | |
770 // Send an update notification to all child processes so they read the new | |
771 // table. | |
772 listener_->NewTable(shared_memory_); | |
773 | |
774 #ifndef NDEBUG | |
775 DebugValidate(); | |
776 #endif | |
777 | |
778 // The new table needs to be written to disk. | |
779 WriteFullTable(); | |
780 } | |
781 | |
782 uint32 VisitedLinkMaster::NewTableSizeForCount(int32 item_count) const { | |
783 // These table sizes are selected to be the maximum prime number less than | |
784 // a "convenient" multiple of 1K. | |
785 static const int table_sizes[] = { | |
786 16381, // 16K = 16384 <- don't shrink below this table size | |
787 // (should be == default_table_size) | |
788 32767, // 32K = 32768 | |
789 65521, // 64K = 65536 | |
790 130051, // 128K = 131072 | |
791 262127, // 256K = 262144 | |
792 524269, // 512K = 524288 | |
793 1048549, // 1M = 1048576 | |
794 2097143, // 2M = 2097152 | |
795 4194301, // 4M = 4194304 | |
796 8388571, // 8M = 8388608 | |
797 16777199, // 16M = 16777216 | |
798 33554347}; // 32M = 33554432 | |
799 | |
800 // Try to leave the table 33% full. | |
801 int desired = item_count * 3; | |
802 | |
803 // Find the closest prime. | |
804 for (size_t i = 0; i < arraysize(table_sizes); i ++) { | |
805 if (table_sizes[i] > desired) | |
806 return table_sizes[i]; | |
807 } | |
808 | |
809 // Growing very big, just approximate a "good" number, not growing as much | |
810 // as normal. | |
811 return item_count * 2 - 1; | |
812 } | |
813 | |
814 // See the TableBuilder definition in the header file for how this works. | |
815 bool VisitedLinkMaster::RebuildTableFromDelegate() { | |
816 DCHECK(!table_builder_); | |
817 | |
818 // TODO(brettw) make sure we have reasonable salt! | |
819 table_builder_ = new TableBuilder(this, salt_); | |
820 delegate_->RebuildTable(table_builder_); | |
821 return true; | |
822 } | |
823 | |
824 // See the TableBuilder declaration above for how this works. | |
825 void VisitedLinkMaster::OnTableRebuildComplete( | |
826 bool success, | |
827 const std::vector<Fingerprint>& fingerprints) { | |
828 if (success) { | |
829 // Replace the old table with a new blank one. | |
830 shared_memory_serial_++; | |
831 | |
832 // We are responsible for freeing it AFTER it has been replaced if | |
833 // replacement succeeds. | |
834 base::SharedMemory* old_shared_memory = shared_memory_; | |
835 | |
836 int new_table_size = NewTableSizeForCount( | |
837 static_cast<int>(fingerprints.size() + added_since_rebuild_.size())); | |
838 if (BeginReplaceURLTable(new_table_size)) { | |
839 // Free the old table. | |
840 delete old_shared_memory; | |
841 | |
842 // Add the stored fingerprints to the hash table. | |
843 for (size_t i = 0; i < fingerprints.size(); i++) | |
844 AddFingerprint(fingerprints[i], false); | |
845 | |
846 // Also add anything that was added while we were asynchronously | |
847 // generating the new table. | |
848 for (std::set<Fingerprint>::iterator i = added_since_rebuild_.begin(); | |
849 i != added_since_rebuild_.end(); ++i) | |
850 AddFingerprint(*i, false); | |
851 added_since_rebuild_.clear(); | |
852 | |
853 // Now handle deletions. | |
854 DeleteFingerprintsFromCurrentTable(deleted_since_rebuild_); | |
855 deleted_since_rebuild_.clear(); | |
856 | |
857 // Send an update notification to all child processes. | |
858 listener_->NewTable(shared_memory_); | |
859 | |
860 WriteFullTable(); | |
861 } | |
862 } | |
863 table_builder_ = NULL; // Will release our reference to the builder. | |
864 | |
865 // Notify the unit test that the rebuild is complete (will be NULL in prod.) | |
866 if (!rebuild_complete_task_.is_null()) { | |
867 rebuild_complete_task_.Run(); | |
868 rebuild_complete_task_.Reset(); | |
869 } | |
870 } | |
871 | |
872 void VisitedLinkMaster::WriteToFile(FILE** file, | |
873 off_t offset, | |
874 void* data, | |
875 int32 data_size) { | |
876 #ifndef NDEBUG | |
877 posted_asynchronous_operation_ = true; | |
878 #endif | |
879 PostIOTask(FROM_HERE, | |
880 base::Bind(&AsyncWrite, file, offset, | |
881 std::string(static_cast<const char*>(data), data_size))); | |
882 } | |
883 | |
884 void VisitedLinkMaster::WriteUsedItemCountToFile() { | |
885 if (!file_) | |
886 return; // See comment on the file_ variable for why this might happen. | |
887 WriteToFile(file_, kFileHeaderUsedOffset, &used_items_, sizeof(used_items_)); | |
888 } | |
889 | |
890 void VisitedLinkMaster::WriteHashRangeToFile(Hash first_hash, Hash last_hash) { | |
891 if (!file_) | |
892 return; // See comment on the file_ variable for why this might happen. | |
893 if (last_hash < first_hash) { | |
894 // Handle wraparound at 0. This first write is first_hash->EOF | |
895 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, | |
896 &hash_table_[first_hash], | |
897 (table_length_ - first_hash + 1) * sizeof(Fingerprint)); | |
898 | |
899 // Now do 0->last_lash. | |
900 WriteToFile(file_, kFileHeaderSize, hash_table_, | |
901 (last_hash + 1) * sizeof(Fingerprint)); | |
902 } else { | |
903 // Normal case, just write the range. | |
904 WriteToFile(file_, first_hash * sizeof(Fingerprint) + kFileHeaderSize, | |
905 &hash_table_[first_hash], | |
906 (last_hash - first_hash + 1) * sizeof(Fingerprint)); | |
907 } | |
908 } | |
909 | |
910 bool VisitedLinkMaster::ReadFromFile(FILE* file, | |
911 off_t offset, | |
912 void* data, | |
913 size_t data_size) { | |
914 #ifndef NDEBUG | |
915 // Since this function is synchronous, we require that no asynchronous | |
916 // operations could possibly be pending. | |
917 DCHECK(!posted_asynchronous_operation_); | |
918 #endif | |
919 | |
920 if (fseek(file, offset, SEEK_SET) != 0) | |
921 return false; | |
922 | |
923 size_t num_read = fread(data, 1, data_size, file); | |
924 return num_read == data_size; | |
925 } | |
926 | |
927 // VisitedLinkTableBuilder ---------------------------------------------------- | |
928 | |
929 VisitedLinkMaster::TableBuilder::TableBuilder( | |
930 VisitedLinkMaster* master, | |
931 const uint8 salt[LINK_SALT_LENGTH]) | |
932 : master_(master), | |
933 success_(true) { | |
934 fingerprints_.reserve(4096); | |
935 memcpy(salt_, salt, LINK_SALT_LENGTH * sizeof(uint8)); | |
936 } | |
937 | |
938 // TODO(brettw): Do we want to try to cancel the request if this happens? It | |
939 // could delay shutdown if there are a lot of URLs. | |
940 void VisitedLinkMaster::TableBuilder::DisownMaster() { | |
941 master_ = NULL; | |
942 } | |
943 | |
944 void VisitedLinkMaster::TableBuilder::OnURL(const GURL& url) { | |
945 if (!url.is_empty()) { | |
946 fingerprints_.push_back(VisitedLinkMaster::ComputeURLFingerprint( | |
947 url.spec().data(), url.spec().length(), salt_)); | |
948 } | |
949 } | |
950 | |
951 void VisitedLinkMaster::TableBuilder::OnComplete(bool success) { | |
952 success_ = success; | |
953 DLOG_IF(WARNING, !success) << "Unable to rebuild visited links"; | |
954 | |
955 // Marshal to the main thread to notify the VisitedLinkMaster that the | |
956 // rebuild is complete. | |
957 BrowserThread::PostTask( | |
958 BrowserThread::UI, FROM_HERE, | |
959 base::Bind(&TableBuilder::OnCompleteMainThread, this)); | |
960 } | |
961 | |
962 void VisitedLinkMaster::TableBuilder::OnCompleteMainThread() { | |
963 if (master_) | |
964 master_->OnTableRebuildComplete(success_, fingerprints_); | |
965 } | |
OLD | NEW |