| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" | 5 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h" |
| 6 | 6 |
| 7 #include <stddef.h> |
| 8 |
| 7 #include "base/files/file_util.h" | 9 #include "base/files/file_util.h" |
| 8 #include "base/files/scoped_file.h" | 10 #include "base/files/scoped_file.h" |
| 9 #include "base/md5.h" | 11 #include "base/md5.h" |
| 10 #include "base/metrics/histogram.h" | 12 #include "base/metrics/histogram.h" |
| 11 #include "base/metrics/sparse_histogram.h" | 13 #include "base/metrics/sparse_histogram.h" |
| 12 #include "components/safe_browsing_db/prefix_set.h" | 14 #include "components/safe_browsing_db/prefix_set.h" |
| 13 | 15 |
| 14 namespace safe_browsing { | 16 namespace safe_browsing { |
| 15 | 17 |
| 16 namespace { | 18 namespace { |
| 17 | 19 |
| 18 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so | 20 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so |
| 19 // that byte-order changes force corruption. | 21 // that byte-order changes force corruption. |
| 20 const int32 kFileMagic = 0x600D71FE; | 22 const int32_t kFileMagic = 0x600D71FE; |
| 21 | 23 |
| 22 // Version history: | 24 // Version history: |
| 23 // Version 6: aad08754/r2814 by erikkay@google.com on 2008-10-02 (sqlite) | 25 // Version 6: aad08754/r2814 by erikkay@google.com on 2008-10-02 (sqlite) |
| 24 // Version 7: 6afe28a5/r37435 by shess@chromium.org on 2010-01-28 | 26 // Version 7: 6afe28a5/r37435 by shess@chromium.org on 2010-01-28 |
| 25 // Version 8: d3dd0715/r259791 by shess@chromium.org on 2014-03-27 | 27 // Version 8: d3dd0715/r259791 by shess@chromium.org on 2014-03-27 |
| 26 const int32 kFileVersion = 8; | 28 const int32_t kFileVersion = 8; |
| 27 | 29 |
| 28 // ReadAndVerifyHeader() returns this in case of error. | 30 // ReadAndVerifyHeader() returns this in case of error. |
| 29 const int32 kInvalidVersion = -1; | 31 const int32_t kInvalidVersion = -1; |
| 30 | 32 |
| 31 // Starting with version 8, the storage is sorted and can be sharded to allow | 33 // Starting with version 8, the storage is sorted and can be sharded to allow |
| 32 // updates to be done with lower memory requirements. Newly written files will | 34 // updates to be done with lower memory requirements. Newly written files will |
| 33 // be sharded to need less than this amount of memory during update. Larger | 35 // be sharded to need less than this amount of memory during update. Larger |
| 34 // values are preferred to minimize looping overhead during processing. | 36 // values are preferred to minimize looping overhead during processing. |
| 35 const int64 kUpdateStorageBytes = 100 * 1024; | 37 const int64_t kUpdateStorageBytes = 100 * 1024; |
| 36 | 38 |
| 37 // Prevent excessive sharding by setting a lower limit on the shard stride. | 39 // Prevent excessive sharding by setting a lower limit on the shard stride. |
| 38 // Smaller values should work fine, but very small values will probably lead to | 40 // Smaller values should work fine, but very small values will probably lead to |
| 39 // poor performance. Shard stride is indirectly related to | 41 // poor performance. Shard stride is indirectly related to |
| 40 // |kUpdateStorageBytes|, setting that very small will bump against this. | 42 // |kUpdateStorageBytes|, setting that very small will bump against this. |
| 41 const uint32 kMinShardStride = 1 << 24; | 43 const uint32_t kMinShardStride = 1 << 24; |
| 42 | 44 |
| 43 // Strides over the entire SBPrefix space. | 45 // Strides over the entire SBPrefix space. |
| 44 const uint64 kMaxShardStride = 1ULL << 32; | 46 const uint64_t kMaxShardStride = 1ULL << 32; |
| 45 | 47 |
| 46 // Maximum SBPrefix value. | 48 // Maximum SBPrefix value. |
| 47 const SBPrefix kMaxSBPrefix = 0xFFFFFFFF; | 49 const SBPrefix kMaxSBPrefix = 0xFFFFFFFF; |
| 48 | 50 |
| 49 // Header at the front of the main database file. | 51 // Header at the front of the main database file. |
| 50 struct FileHeader { | 52 struct FileHeader { |
| 51 int32 magic, version; | 53 int32_t magic, version; |
| 52 uint32 add_chunk_count, sub_chunk_count; | 54 uint32_t add_chunk_count, sub_chunk_count; |
| 53 uint32 shard_stride; | 55 uint32_t shard_stride; |
| 54 // TODO(shess): Is this where 64-bit will bite me? Perhaps write a | 56 // TODO(shess): Is this where 64-bit will bite me? Perhaps write a |
| 55 // specialized read/write? | 57 // specialized read/write? |
| 56 }; | 58 }; |
| 57 | 59 |
| 58 // Header for each chunk in the chunk-accumulation file. | 60 // Header for each chunk in the chunk-accumulation file. |
| 59 struct ChunkHeader { | 61 struct ChunkHeader { |
| 60 uint32 add_prefix_count, sub_prefix_count; | 62 uint32_t add_prefix_count, sub_prefix_count; |
| 61 uint32 add_hash_count, sub_hash_count; | 63 uint32_t add_hash_count, sub_hash_count; |
| 62 }; | 64 }; |
| 63 | 65 |
| 64 // Header for each shard of data in the main database file. | 66 // Header for each shard of data in the main database file. |
| 65 struct ShardHeader { | 67 struct ShardHeader { |
| 66 uint32 add_prefix_count, sub_prefix_count; | 68 uint32_t add_prefix_count, sub_prefix_count; |
| 67 uint32 add_hash_count, sub_hash_count; | 69 uint32_t add_hash_count, sub_hash_count; |
| 68 }; | 70 }; |
| 69 | 71 |
| 70 // Enumerate different format-change events for histogramming | 72 // Enumerate different format-change events for histogramming |
| 71 // purposes. DO NOT CHANGE THE ORDERING OF THESE VALUES. | 73 // purposes. DO NOT CHANGE THE ORDERING OF THESE VALUES. |
| 72 enum FormatEventType { | 74 enum FormatEventType { |
| 73 // Corruption detected, broken down by file format. | 75 // Corruption detected, broken down by file format. |
| 74 FORMAT_EVENT_FILE_CORRUPT, | 76 FORMAT_EVENT_FILE_CORRUPT, |
| 75 FORMAT_EVENT_SQLITE_CORRUPT, // Obsolete | 77 FORMAT_EVENT_SQLITE_CORRUPT, // Obsolete |
| 76 | 78 |
| 77 // The type of format found in the file. The expected case (new | 79 // The type of format found in the file. The expected case (new |
| (...skipping 109 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 187 | 189 |
| 188 // Write all of |values| to |fp|, and fold the data into the checksum | 190 // Write all of |values| to |fp|, and fold the data into the checksum |
| 189 // in |context|, if non-NULL. Returns true if all items successful. | 191 // in |context|, if non-NULL. Returns true if all items successful. |
| 190 template <typename CT> | 192 template <typename CT> |
| 191 bool WriteContainer(const CT& values, FILE* fp, | 193 bool WriteContainer(const CT& values, FILE* fp, |
| 192 base::MD5Context* context) { | 194 base::MD5Context* context) { |
| 193 return WriteRange(values.begin(), values.end(), fp, context); | 195 return WriteRange(values.begin(), values.end(), fp, context); |
| 194 } | 196 } |
| 195 | 197 |
| 196 // Delete the chunks in |deleted| from |chunks|. | 198 // Delete the chunks in |deleted| from |chunks|. |
| 197 void DeleteChunksFromSet(const base::hash_set<int32>& deleted, | 199 void DeleteChunksFromSet(const base::hash_set<int32_t>& deleted, |
| 198 std::set<int32>* chunks) { | 200 std::set<int32_t>* chunks) { |
| 199 for (std::set<int32>::iterator iter = chunks->begin(); | 201 for (std::set<int32_t>::iterator iter = chunks->begin(); |
| 200 iter != chunks->end();) { | 202 iter != chunks->end();) { |
| 201 std::set<int32>::iterator prev = iter++; | 203 std::set<int32_t>::iterator prev = iter++; |
| 202 if (deleted.count(*prev) > 0) | 204 if (deleted.count(*prev) > 0) |
| 203 chunks->erase(prev); | 205 chunks->erase(prev); |
| 204 } | 206 } |
| 205 } | 207 } |
| 206 | 208 |
| 207 bool ReadAndVerifyChecksum(FILE* fp, base::MD5Context* context) { | 209 bool ReadAndVerifyChecksum(FILE* fp, base::MD5Context* context) { |
| 208 base::MD5Digest calculated_digest; | 210 base::MD5Digest calculated_digest; |
| 209 base::MD5IntermediateFinal(&calculated_digest, context); | 211 base::MD5IntermediateFinal(&calculated_digest, context); |
| 210 | 212 |
| 211 base::MD5Digest file_digest; | 213 base::MD5Digest file_digest; |
| 212 if (!ReadItem(&file_digest, fp, context)) | 214 if (!ReadItem(&file_digest, fp, context)) |
| 213 return false; | 215 return false; |
| 214 | 216 |
| 215 return memcmp(&file_digest, &calculated_digest, sizeof(file_digest)) == 0; | 217 return memcmp(&file_digest, &calculated_digest, sizeof(file_digest)) == 0; |
| 216 } | 218 } |
| 217 | 219 |
| 218 // Helper function to read the file header and chunk TOC. Rewinds |fp| and | 220 // Helper function to read the file header and chunk TOC. Rewinds |fp| and |
| 219 // initializes |context|. The header is left in |header|, with the version | 221 // initializes |context|. The header is left in |header|, with the version |
| 220 // returned. kInvalidVersion is returned for sanity check or checksum failure. | 222 // returned. kInvalidVersion is returned for sanity check or checksum failure. |
| 221 int ReadAndVerifyHeader(const base::FilePath& filename, | 223 int ReadAndVerifyHeader(const base::FilePath& filename, |
| 222 FileHeader* header, | 224 FileHeader* header, |
| 223 std::set<int32>* add_chunks, | 225 std::set<int32_t>* add_chunks, |
| 224 std::set<int32>* sub_chunks, | 226 std::set<int32_t>* sub_chunks, |
| 225 FILE* fp, | 227 FILE* fp, |
| 226 base::MD5Context* context) { | 228 base::MD5Context* context) { |
| 227 DCHECK(header); | 229 DCHECK(header); |
| 228 DCHECK(add_chunks); | 230 DCHECK(add_chunks); |
| 229 DCHECK(sub_chunks); | 231 DCHECK(sub_chunks); |
| 230 DCHECK(fp); | 232 DCHECK(fp); |
| 231 DCHECK(context); | 233 DCHECK(context); |
| 232 | 234 |
| 233 base::MD5Init(context); | 235 base::MD5Init(context); |
| 234 if (!FileRewind(fp)) | 236 if (!FileRewind(fp)) |
| (...skipping 19 matching lines...) Expand all Loading... |
| 254 RecordFormatEvent(FORMAT_EVENT_HEADER_CHECKSUM_FAILURE); | 256 RecordFormatEvent(FORMAT_EVENT_HEADER_CHECKSUM_FAILURE); |
| 255 return kInvalidVersion; | 257 return kInvalidVersion; |
| 256 } | 258 } |
| 257 | 259 |
| 258 return kFileVersion; | 260 return kFileVersion; |
| 259 } | 261 } |
| 260 | 262 |
| 261 // Helper function to write out the initial header and chunks-contained data. | 263 // Helper function to write out the initial header and chunks-contained data. |
| 262 // Rewinds |fp|, initializes |context|, then writes a file header and | 264 // Rewinds |fp|, initializes |context|, then writes a file header and |
| 263 // |add_chunks| and |sub_chunks|. | 265 // |add_chunks| and |sub_chunks|. |
| 264 bool WriteHeader(uint32 out_stride, | 266 bool WriteHeader(uint32_t out_stride, |
| 265 const std::set<int32>& add_chunks, | 267 const std::set<int32_t>& add_chunks, |
| 266 const std::set<int32>& sub_chunks, | 268 const std::set<int32_t>& sub_chunks, |
| 267 FILE* fp, | 269 FILE* fp, |
| 268 base::MD5Context* context) { | 270 base::MD5Context* context) { |
| 269 if (!FileRewind(fp)) | 271 if (!FileRewind(fp)) |
| 270 return false; | 272 return false; |
| 271 | 273 |
| 272 base::MD5Init(context); | 274 base::MD5Init(context); |
| 273 FileHeader header; | 275 FileHeader header; |
| 274 header.magic = kFileMagic; | 276 header.magic = kFileMagic; |
| 275 header.version = kFileVersion; | 277 header.version = kFileVersion; |
| 276 header.add_chunk_count = add_chunks.size(); | 278 header.add_chunk_count = add_chunks.size(); |
| (...skipping 116 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 393 sub_full_hashes_.clear(); | 395 sub_full_hashes_.clear(); |
| 394 } | 396 } |
| 395 | 397 |
| 396 // Merge data from |beg|..|end| into receiver's state, then process the state. | 398 // Merge data from |beg|..|end| into receiver's state, then process the state. |
| 397 // The current state and the range given should corrospond to the same sorted | 399 // The current state and the range given should corrospond to the same sorted |
| 398 // shard of data from different sources. |add_del_cache| and |sub_del_cache| | 400 // shard of data from different sources. |add_del_cache| and |sub_del_cache| |
| 399 // indicate the chunk ids which should be deleted during processing (see | 401 // indicate the chunk ids which should be deleted during processing (see |
| 400 // SBProcessSubs). | 402 // SBProcessSubs). |
| 401 void MergeDataAndProcess(const StateInternalPos& beg, | 403 void MergeDataAndProcess(const StateInternalPos& beg, |
| 402 const StateInternalPos& end, | 404 const StateInternalPos& end, |
| 403 const base::hash_set<int32>& add_del_cache, | 405 const base::hash_set<int32_t>& add_del_cache, |
| 404 const base::hash_set<int32>& sub_del_cache) { | 406 const base::hash_set<int32_t>& sub_del_cache) { |
| 405 container_merge(&add_prefixes_, | 407 container_merge(&add_prefixes_, |
| 406 beg.add_prefixes_iter_, | 408 beg.add_prefixes_iter_, |
| 407 end.add_prefixes_iter_, | 409 end.add_prefixes_iter_, |
| 408 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>); | 410 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>); |
| 409 | 411 |
| 410 container_merge(&sub_prefixes_, | 412 container_merge(&sub_prefixes_, |
| 411 beg.sub_prefixes_iter_, | 413 beg.sub_prefixes_iter_, |
| 412 end.sub_prefixes_iter_, | 414 end.sub_prefixes_iter_, |
| 413 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>); | 415 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>); |
| 414 | 416 |
| (...skipping 89 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 504 | 506 |
| 505 // Helper to read the entire database state, used by GetAddPrefixes() and | 507 // Helper to read the entire database state, used by GetAddPrefixes() and |
| 506 // GetAddFullHashes(). Those functions are generally used only for smaller | 508 // GetAddFullHashes(). Those functions are generally used only for smaller |
| 507 // files. Returns false in case of errors reading the data. | 509 // files. Returns false in case of errors reading the data. |
| 508 bool ReadDbStateHelper(const base::FilePath& filename, | 510 bool ReadDbStateHelper(const base::FilePath& filename, |
| 509 StateInternal* db_state) { | 511 StateInternal* db_state) { |
| 510 base::ScopedFILE file(base::OpenFile(filename, "rb")); | 512 base::ScopedFILE file(base::OpenFile(filename, "rb")); |
| 511 if (file.get() == NULL) | 513 if (file.get() == NULL) |
| 512 return false; | 514 return false; |
| 513 | 515 |
| 514 std::set<int32> add_chunks; | 516 std::set<int32_t> add_chunks; |
| 515 std::set<int32> sub_chunks; | 517 std::set<int32_t> sub_chunks; |
| 516 | 518 |
| 517 base::MD5Context context; | 519 base::MD5Context context; |
| 518 FileHeader header; | 520 FileHeader header; |
| 519 const int version = | 521 const int version = |
| 520 ReadAndVerifyHeader(filename, &header, &add_chunks, &sub_chunks, | 522 ReadAndVerifyHeader(filename, &header, &add_chunks, &sub_chunks, |
| 521 file.get(), &context); | 523 file.get(), &context); |
| 522 if (version == kInvalidVersion) | 524 if (version == kInvalidVersion) |
| 523 return false; | 525 return false; |
| 524 | 526 |
| 525 uint64 in_min = 0; | 527 uint64_t in_min = 0; |
| 526 uint64 in_stride = header.shard_stride; | 528 uint64_t in_stride = header.shard_stride; |
| 527 if (!in_stride) | 529 if (!in_stride) |
| 528 in_stride = kMaxShardStride; | 530 in_stride = kMaxShardStride; |
| 529 if (!IsPowerOfTwo(in_stride)) | 531 if (!IsPowerOfTwo(in_stride)) |
| 530 return false; | 532 return false; |
| 531 | 533 |
| 532 do { | 534 do { |
| 533 ShardHeader shard_header; | 535 ShardHeader shard_header; |
| 534 if (!ReadItem(&shard_header, file.get(), &context)) | 536 if (!ReadItem(&shard_header, file.get(), &context)) |
| 535 return false; | 537 return false; |
| 536 | 538 |
| 537 if (!db_state->AppendData(shard_header.add_prefix_count, | 539 if (!db_state->AppendData(shard_header.add_prefix_count, |
| 538 shard_header.sub_prefix_count, | 540 shard_header.sub_prefix_count, |
| 539 shard_header.add_hash_count, | 541 shard_header.add_hash_count, |
| 540 shard_header.sub_hash_count, | 542 shard_header.sub_hash_count, |
| 541 file.get(), &context)) { | 543 file.get(), &context)) { |
| 542 return false; | 544 return false; |
| 543 } | 545 } |
| 544 | 546 |
| 545 in_min += in_stride; | 547 in_min += in_stride; |
| 546 } while (in_min <= kMaxSBPrefix); | 548 } while (in_min <= kMaxSBPrefix); |
| 547 | 549 |
| 548 if (!ReadAndVerifyChecksum(file.get(), &context)) | 550 if (!ReadAndVerifyChecksum(file.get(), &context)) |
| 549 return false; | 551 return false; |
| 550 | 552 |
| 551 int64 size = 0; | 553 int64_t size = 0; |
| 552 if (!base::GetFileSize(filename, &size)) | 554 if (!base::GetFileSize(filename, &size)) |
| 553 return false; | 555 return false; |
| 554 | 556 |
| 555 return static_cast<int64>(ftell(file.get())) == size; | 557 return static_cast<int64_t>(ftell(file.get())) == size; |
| 556 } | 558 } |
| 557 | 559 |
| 558 } // namespace | 560 } // namespace |
| 559 | 561 |
| 560 SafeBrowsingStoreFile::SafeBrowsingStoreFile( | 562 SafeBrowsingStoreFile::SafeBrowsingStoreFile( |
| 561 const scoped_refptr<const base::SequencedTaskRunner>& task_runner) | 563 const scoped_refptr<const base::SequencedTaskRunner>& task_runner) |
| 562 : task_runner_(task_runner), | 564 : task_runner_(task_runner), |
| 563 chunks_written_(0), | 565 chunks_written_(0), |
| 564 empty_(false), | 566 empty_(false), |
| 565 corruption_seen_(false) { | 567 corruption_seen_(false) { |
| (...skipping 29 matching lines...) Expand all Loading... |
| 595 // The file was either empty or never opened. The empty case is | 597 // The file was either empty or never opened. The empty case is |
| 596 // presumed not to be invalid. The never-opened case can happen if | 598 // presumed not to be invalid. The never-opened case can happen if |
| 597 // BeginUpdate() fails for any databases, and should already have | 599 // BeginUpdate() fails for any databases, and should already have |
| 598 // caused the corruption callback to fire. | 600 // caused the corruption callback to fire. |
| 599 if (!file_.get()) | 601 if (!file_.get()) |
| 600 return true; | 602 return true; |
| 601 | 603 |
| 602 if (!FileRewind(file_.get())) | 604 if (!FileRewind(file_.get())) |
| 603 return OnCorruptDatabase(); | 605 return OnCorruptDatabase(); |
| 604 | 606 |
| 605 int64 size = 0; | 607 int64_t size = 0; |
| 606 if (!base::GetFileSize(filename_, &size)) | 608 if (!base::GetFileSize(filename_, &size)) |
| 607 return OnCorruptDatabase(); | 609 return OnCorruptDatabase(); |
| 608 | 610 |
| 609 base::MD5Context context; | 611 base::MD5Context context; |
| 610 base::MD5Init(&context); | 612 base::MD5Init(&context); |
| 611 | 613 |
| 612 // Read everything except the final digest. | 614 // Read everything except the final digest. |
| 613 size_t bytes_left = static_cast<size_t>(size); | 615 size_t bytes_left = static_cast<size_t>(size); |
| 614 CHECK(size == static_cast<int64>(bytes_left)); | 616 CHECK(size == static_cast<int64_t>(bytes_left)); |
| 615 if (bytes_left < sizeof(base::MD5Digest)) | 617 if (bytes_left < sizeof(base::MD5Digest)) |
| 616 return OnCorruptDatabase(); | 618 return OnCorruptDatabase(); |
| 617 bytes_left -= sizeof(base::MD5Digest); | 619 bytes_left -= sizeof(base::MD5Digest); |
| 618 | 620 |
| 619 // Fold the contents of the file into the checksum. | 621 // Fold the contents of the file into the checksum. |
| 620 while (bytes_left > 0) { | 622 while (bytes_left > 0) { |
| 621 char buf[4096]; | 623 char buf[4096]; |
| 622 const size_t c = std::min(sizeof(buf), bytes_left); | 624 const size_t c = std::min(sizeof(buf), bytes_left); |
| 623 const size_t ret = fread(buf, 1, c, file_.get()); | 625 const size_t ret = fread(buf, 1, c, file_.get()); |
| 624 | 626 |
| (...skipping 17 matching lines...) Expand all Loading... |
| 642 DCHECK(CalledOnValidThread()); | 644 DCHECK(CalledOnValidThread()); |
| 643 filename_ = filename; | 645 filename_ = filename; |
| 644 corruption_callback_ = corruption_callback; | 646 corruption_callback_ = corruption_callback; |
| 645 } | 647 } |
| 646 | 648 |
| 647 bool SafeBrowsingStoreFile::BeginChunk() { | 649 bool SafeBrowsingStoreFile::BeginChunk() { |
| 648 DCHECK(CalledOnValidThread()); | 650 DCHECK(CalledOnValidThread()); |
| 649 return ClearChunkBuffers(); | 651 return ClearChunkBuffers(); |
| 650 } | 652 } |
| 651 | 653 |
| 652 bool SafeBrowsingStoreFile::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) { | 654 bool SafeBrowsingStoreFile::WriteAddPrefix(int32_t chunk_id, SBPrefix prefix) { |
| 653 DCHECK(CalledOnValidThread()); | 655 DCHECK(CalledOnValidThread()); |
| 654 add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix)); | 656 add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix)); |
| 655 return true; | 657 return true; |
| 656 } | 658 } |
| 657 | 659 |
| 658 bool SafeBrowsingStoreFile::GetAddPrefixes(SBAddPrefixes* add_prefixes) { | 660 bool SafeBrowsingStoreFile::GetAddPrefixes(SBAddPrefixes* add_prefixes) { |
| 659 DCHECK(CalledOnValidThread()); | 661 DCHECK(CalledOnValidThread()); |
| 660 | 662 |
| 661 add_prefixes->clear(); | 663 add_prefixes->clear(); |
| 662 if (!base::PathExists(filename_)) | 664 if (!base::PathExists(filename_)) |
| (...skipping 16 matching lines...) Expand all Loading... |
| 679 return true; | 681 return true; |
| 680 | 682 |
| 681 StateInternal db_state; | 683 StateInternal db_state; |
| 682 if (!ReadDbStateHelper(filename_, &db_state)) | 684 if (!ReadDbStateHelper(filename_, &db_state)) |
| 683 return OnCorruptDatabase(); | 685 return OnCorruptDatabase(); |
| 684 | 686 |
| 685 add_full_hashes->swap(db_state.add_full_hashes_); | 687 add_full_hashes->swap(db_state.add_full_hashes_); |
| 686 return true; | 688 return true; |
| 687 } | 689 } |
| 688 | 690 |
| 689 bool SafeBrowsingStoreFile::WriteAddHash(int32 chunk_id, | 691 bool SafeBrowsingStoreFile::WriteAddHash(int32_t chunk_id, |
| 690 const SBFullHash& full_hash) { | 692 const SBFullHash& full_hash) { |
| 691 DCHECK(CalledOnValidThread()); | 693 DCHECK(CalledOnValidThread()); |
| 692 add_hashes_.push_back(SBAddFullHash(chunk_id, full_hash)); | 694 add_hashes_.push_back(SBAddFullHash(chunk_id, full_hash)); |
| 693 return true; | 695 return true; |
| 694 } | 696 } |
| 695 | 697 |
| 696 bool SafeBrowsingStoreFile::WriteSubPrefix(int32 chunk_id, | 698 bool SafeBrowsingStoreFile::WriteSubPrefix(int32_t chunk_id, |
| 697 int32 add_chunk_id, | 699 int32_t add_chunk_id, |
| 698 SBPrefix prefix) { | 700 SBPrefix prefix) { |
| 699 DCHECK(CalledOnValidThread()); | 701 DCHECK(CalledOnValidThread()); |
| 700 sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix)); | 702 sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix)); |
| 701 return true; | 703 return true; |
| 702 } | 704 } |
| 703 | 705 |
| 704 bool SafeBrowsingStoreFile::WriteSubHash(int32 chunk_id, int32 add_chunk_id, | 706 bool SafeBrowsingStoreFile::WriteSubHash(int32_t chunk_id, |
| 707 int32_t add_chunk_id, |
| 705 const SBFullHash& full_hash) { | 708 const SBFullHash& full_hash) { |
| 706 DCHECK(CalledOnValidThread()); | 709 DCHECK(CalledOnValidThread()); |
| 707 sub_hashes_.push_back(SBSubFullHash(chunk_id, add_chunk_id, full_hash)); | 710 sub_hashes_.push_back(SBSubFullHash(chunk_id, add_chunk_id, full_hash)); |
| 708 return true; | 711 return true; |
| 709 } | 712 } |
| 710 | 713 |
| 711 bool SafeBrowsingStoreFile::OnCorruptDatabase() { | 714 bool SafeBrowsingStoreFile::OnCorruptDatabase() { |
| 712 DCHECK(CalledOnValidThread()); | 715 DCHECK(CalledOnValidThread()); |
| 713 | 716 |
| 714 if (!corruption_seen_) | 717 if (!corruption_seen_) |
| (...skipping 113 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 828 DCHECK(file_.get() || empty_); | 831 DCHECK(file_.get() || empty_); |
| 829 DCHECK(new_file_.get()); | 832 DCHECK(new_file_.get()); |
| 830 CHECK(builder); | 833 CHECK(builder); |
| 831 CHECK(add_full_hashes_result); | 834 CHECK(add_full_hashes_result); |
| 832 | 835 |
| 833 // Rewind the temporary storage. | 836 // Rewind the temporary storage. |
| 834 if (!FileRewind(new_file_.get())) | 837 if (!FileRewind(new_file_.get())) |
| 835 return false; | 838 return false; |
| 836 | 839 |
| 837 // Get chunk file's size for validating counts. | 840 // Get chunk file's size for validating counts. |
| 838 int64 update_size = 0; | 841 int64_t update_size = 0; |
| 839 if (!base::GetFileSize(TemporaryFileForFilename(filename_), &update_size)) | 842 if (!base::GetFileSize(TemporaryFileForFilename(filename_), &update_size)) |
| 840 return OnCorruptDatabase(); | 843 return OnCorruptDatabase(); |
| 841 | 844 |
| 842 // Track update size to answer questions at http://crbug.com/72216 . | 845 // Track update size to answer questions at http://crbug.com/72216 . |
| 843 // Log small updates as 1k so that the 0 (underflow) bucket can be | 846 // Log small updates as 1k so that the 0 (underflow) bucket can be |
| 844 // used for "empty" in SafeBrowsingDatabase. | 847 // used for "empty" in SafeBrowsingDatabase. |
| 845 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", | 848 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", |
| 846 std::max(static_cast<int>(update_size / 1024), 1)); | 849 std::max(static_cast<int>(update_size / 1024), 1)); |
| 847 | 850 |
| 848 // Chunk updates to integrate. | 851 // Chunk updates to integrate. |
| 849 StateInternal new_state; | 852 StateInternal new_state; |
| 850 | 853 |
| 851 // Read update chunks. | 854 // Read update chunks. |
| 852 for (int i = 0; i < chunks_written_; ++i) { | 855 for (int i = 0; i < chunks_written_; ++i) { |
| 853 ChunkHeader header; | 856 ChunkHeader header; |
| 854 | 857 |
| 855 int64 ofs = ftell(new_file_.get()); | 858 int64_t ofs = ftell(new_file_.get()); |
| 856 if (ofs == -1) | 859 if (ofs == -1) |
| 857 return false; | 860 return false; |
| 858 | 861 |
| 859 if (!ReadItem(&header, new_file_.get(), NULL)) | 862 if (!ReadItem(&header, new_file_.get(), NULL)) |
| 860 return false; | 863 return false; |
| 861 | 864 |
| 862 // As a safety measure, make sure that the header describes a sane | 865 // As a safety measure, make sure that the header describes a sane |
| 863 // chunk, given the remaining file size. | 866 // chunk, given the remaining file size. |
| 864 int64 expected_size = ofs + sizeof(ChunkHeader); | 867 int64_t expected_size = ofs + sizeof(ChunkHeader); |
| 865 expected_size += header.add_prefix_count * sizeof(SBAddPrefix); | 868 expected_size += header.add_prefix_count * sizeof(SBAddPrefix); |
| 866 expected_size += header.sub_prefix_count * sizeof(SBSubPrefix); | 869 expected_size += header.sub_prefix_count * sizeof(SBSubPrefix); |
| 867 expected_size += header.add_hash_count * sizeof(SBAddFullHash); | 870 expected_size += header.add_hash_count * sizeof(SBAddFullHash); |
| 868 expected_size += header.sub_hash_count * sizeof(SBSubFullHash); | 871 expected_size += header.sub_hash_count * sizeof(SBSubFullHash); |
| 869 if (expected_size > update_size) | 872 if (expected_size > update_size) |
| 870 return false; | 873 return false; |
| 871 | 874 |
| 872 if (!new_state.AppendData(header.add_prefix_count, header.sub_prefix_count, | 875 if (!new_state.AppendData(header.add_prefix_count, header.sub_prefix_count, |
| 873 header.add_hash_count, header.sub_hash_count, | 876 header.add_hash_count, header.sub_hash_count, |
| 874 new_file_.get(), NULL)) { | 877 new_file_.get(), NULL)) { |
| 875 return false; | 878 return false; |
| 876 } | 879 } |
| 877 } | 880 } |
| 878 | 881 |
| 879 // The state was accumulated by chunk, sort by prefix. | 882 // The state was accumulated by chunk, sort by prefix. |
| 880 new_state.SortData(); | 883 new_state.SortData(); |
| 881 | 884 |
| 882 // These strides control how much data is loaded into memory per pass. | 885 // These strides control how much data is loaded into memory per pass. |
| 883 // Strides must be an even power of two. |in_stride| will be derived from the | 886 // Strides must be an even power of two. |in_stride| will be derived from the |
| 884 // input file. |out_stride| will be derived from an estimate of the resulting | 887 // input file. |out_stride| will be derived from an estimate of the resulting |
| 885 // file's size. |process_stride| will be the max of both. | 888 // file's size. |process_stride| will be the max of both. |
| 886 uint64 in_stride = kMaxShardStride; | 889 uint64_t in_stride = kMaxShardStride; |
| 887 uint64 out_stride = kMaxShardStride; | 890 uint64_t out_stride = kMaxShardStride; |
| 888 uint64 process_stride = 0; | 891 uint64_t process_stride = 0; |
| 889 | 892 |
| 890 // Used to verify the input's checksum if |!empty_|. | 893 // Used to verify the input's checksum if |!empty_|. |
| 891 base::MD5Context in_context; | 894 base::MD5Context in_context; |
| 892 | 895 |
| 893 if (!empty_) { | 896 if (!empty_) { |
| 894 DCHECK(file_.get()); | 897 DCHECK(file_.get()); |
| 895 | 898 |
| 896 FileHeader header = {0}; | 899 FileHeader header = {0}; |
| 897 int version = ReadAndVerifyHeader(filename_, &header, | 900 int version = ReadAndVerifyHeader(filename_, &header, |
| 898 &add_chunks_cache_, &sub_chunks_cache_, | 901 &add_chunks_cache_, &sub_chunks_cache_, |
| 899 file_.get(), &in_context); | 902 file_.get(), &in_context); |
| 900 if (version == kInvalidVersion) | 903 if (version == kInvalidVersion) |
| 901 return OnCorruptDatabase(); | 904 return OnCorruptDatabase(); |
| 902 | 905 |
| 903 if (header.shard_stride) | 906 if (header.shard_stride) |
| 904 in_stride = header.shard_stride; | 907 in_stride = header.shard_stride; |
| 905 | 908 |
| 906 // The header checksum should have prevented this case, but the code will be | 909 // The header checksum should have prevented this case, but the code will be |
| 907 // broken if this is not correct. | 910 // broken if this is not correct. |
| 908 if (!IsPowerOfTwo(in_stride)) | 911 if (!IsPowerOfTwo(in_stride)) |
| 909 return OnCorruptDatabase(); | 912 return OnCorruptDatabase(); |
| 910 } | 913 } |
| 911 | 914 |
| 912 // We no longer need to track deleted chunks. | 915 // We no longer need to track deleted chunks. |
| 913 DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_); | 916 DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_); |
| 914 DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_); | 917 DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_); |
| 915 | 918 |
| 916 // Calculate |out_stride| to break the file down into reasonable shards. | 919 // Calculate |out_stride| to break the file down into reasonable shards. |
| 917 { | 920 { |
| 918 int64 original_size = 0; | 921 int64_t original_size = 0; |
| 919 if (!empty_ && !base::GetFileSize(filename_, &original_size)) | 922 if (!empty_ && !base::GetFileSize(filename_, &original_size)) |
| 920 return OnCorruptDatabase(); | 923 return OnCorruptDatabase(); |
| 921 | 924 |
| 922 // Approximate the final size as everything. Subs and deletes will reduce | 925 // Approximate the final size as everything. Subs and deletes will reduce |
| 923 // the size, but modest over-sharding won't hurt much. | 926 // the size, but modest over-sharding won't hurt much. |
| 924 int64 shard_size = original_size + update_size; | 927 int64_t shard_size = original_size + update_size; |
| 925 | 928 |
| 926 // Keep splitting until a single stride of data fits the target. | 929 // Keep splitting until a single stride of data fits the target. |
| 927 size_t shifts = 0; | 930 size_t shifts = 0; |
| 928 while (out_stride > kMinShardStride && shard_size > kUpdateStorageBytes) { | 931 while (out_stride > kMinShardStride && shard_size > kUpdateStorageBytes) { |
| 929 out_stride >>= 1; | 932 out_stride >>= 1; |
| 930 shard_size >>= 1; | 933 shard_size >>= 1; |
| 931 ++shifts; | 934 ++shifts; |
| 932 } | 935 } |
| 933 UMA_HISTOGRAM_COUNTS("SB2.OutShardShifts", shifts); | 936 UMA_HISTOGRAM_COUNTS("SB2.OutShardShifts", shifts); |
| 934 | 937 |
| 935 DCHECK(IsPowerOfTwo(out_stride)); | 938 DCHECK(IsPowerOfTwo(out_stride)); |
| 936 } | 939 } |
| 937 | 940 |
| 938 // Outer loop strides by the max of the input stride (to read integral shards) | 941 // Outer loop strides by the max of the input stride (to read integral shards) |
| 939 // and the output stride (to write integral shards). | 942 // and the output stride (to write integral shards). |
| 940 process_stride = std::max(in_stride, out_stride); | 943 process_stride = std::max(in_stride, out_stride); |
| 941 DCHECK(IsPowerOfTwo(process_stride)); | 944 DCHECK(IsPowerOfTwo(process_stride)); |
| 942 DCHECK_EQ(0u, process_stride % in_stride); | 945 DCHECK_EQ(0u, process_stride % in_stride); |
| 943 DCHECK_EQ(0u, process_stride % out_stride); | 946 DCHECK_EQ(0u, process_stride % out_stride); |
| 944 | 947 |
| 945 // Start writing the new data to |new_file_|. | 948 // Start writing the new data to |new_file_|. |
| 946 base::MD5Context out_context; | 949 base::MD5Context out_context; |
| 947 if (!WriteHeader(out_stride, add_chunks_cache_, sub_chunks_cache_, | 950 if (!WriteHeader(out_stride, add_chunks_cache_, sub_chunks_cache_, |
| 948 new_file_.get(), &out_context)) { | 951 new_file_.get(), &out_context)) { |
| 949 return false; | 952 return false; |
| 950 } | 953 } |
| 951 | 954 |
| 952 // Start at the beginning of the SBPrefix space. | 955 // Start at the beginning of the SBPrefix space. |
| 953 uint64 in_min = 0; | 956 uint64_t in_min = 0; |
| 954 uint64 out_min = 0; | 957 uint64_t out_min = 0; |
| 955 uint64 process_min = 0; | 958 uint64_t process_min = 0; |
| 956 | 959 |
| 957 // Start at the beginning of the updates. | 960 // Start at the beginning of the updates. |
| 958 StateInternalPos new_pos = new_state.StateBegin(); | 961 StateInternalPos new_pos = new_state.StateBegin(); |
| 959 | 962 |
| 960 // Re-usable container for shard processing. | 963 // Re-usable container for shard processing. |
| 961 StateInternal db_state; | 964 StateInternal db_state; |
| 962 | 965 |
| 963 // Track aggregate counts for histograms. | 966 // Track aggregate counts for histograms. |
| 964 size_t add_prefix_count = 0; | 967 size_t add_prefix_count = 0; |
| 965 size_t sub_prefix_count = 0; | 968 size_t sub_prefix_count = 0; |
| (...skipping 125 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1091 DCHECK(CalledOnValidThread()); | 1094 DCHECK(CalledOnValidThread()); |
| 1092 bool ret = Close(); | 1095 bool ret = Close(); |
| 1093 | 1096 |
| 1094 // Delete stale staging file. | 1097 // Delete stale staging file. |
| 1095 const base::FilePath new_filename = TemporaryFileForFilename(filename_); | 1098 const base::FilePath new_filename = TemporaryFileForFilename(filename_); |
| 1096 base::DeleteFile(new_filename, false); | 1099 base::DeleteFile(new_filename, false); |
| 1097 | 1100 |
| 1098 return ret; | 1101 return ret; |
| 1099 } | 1102 } |
| 1100 | 1103 |
| 1101 void SafeBrowsingStoreFile::SetAddChunk(int32 chunk_id) { | 1104 void SafeBrowsingStoreFile::SetAddChunk(int32_t chunk_id) { |
| 1102 DCHECK(CalledOnValidThread()); | 1105 DCHECK(CalledOnValidThread()); |
| 1103 add_chunks_cache_.insert(chunk_id); | 1106 add_chunks_cache_.insert(chunk_id); |
| 1104 } | 1107 } |
| 1105 | 1108 |
| 1106 bool SafeBrowsingStoreFile::CheckAddChunk(int32 chunk_id) { | 1109 bool SafeBrowsingStoreFile::CheckAddChunk(int32_t chunk_id) { |
| 1107 DCHECK(CalledOnValidThread()); | 1110 DCHECK(CalledOnValidThread()); |
| 1108 return add_chunks_cache_.count(chunk_id) > 0; | 1111 return add_chunks_cache_.count(chunk_id) > 0; |
| 1109 } | 1112 } |
| 1110 | 1113 |
| 1111 void SafeBrowsingStoreFile::GetAddChunks(std::vector<int32>* out) { | 1114 void SafeBrowsingStoreFile::GetAddChunks(std::vector<int32_t>* out) { |
| 1112 DCHECK(CalledOnValidThread()); | 1115 DCHECK(CalledOnValidThread()); |
| 1113 out->clear(); | 1116 out->clear(); |
| 1114 out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end()); | 1117 out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end()); |
| 1115 } | 1118 } |
| 1116 | 1119 |
| 1117 void SafeBrowsingStoreFile::SetSubChunk(int32 chunk_id) { | 1120 void SafeBrowsingStoreFile::SetSubChunk(int32_t chunk_id) { |
| 1118 DCHECK(CalledOnValidThread()); | 1121 DCHECK(CalledOnValidThread()); |
| 1119 sub_chunks_cache_.insert(chunk_id); | 1122 sub_chunks_cache_.insert(chunk_id); |
| 1120 } | 1123 } |
| 1121 | 1124 |
| 1122 bool SafeBrowsingStoreFile::CheckSubChunk(int32 chunk_id) { | 1125 bool SafeBrowsingStoreFile::CheckSubChunk(int32_t chunk_id) { |
| 1123 DCHECK(CalledOnValidThread()); | 1126 DCHECK(CalledOnValidThread()); |
| 1124 return sub_chunks_cache_.count(chunk_id) > 0; | 1127 return sub_chunks_cache_.count(chunk_id) > 0; |
| 1125 } | 1128 } |
| 1126 | 1129 |
| 1127 void SafeBrowsingStoreFile::GetSubChunks(std::vector<int32>* out) { | 1130 void SafeBrowsingStoreFile::GetSubChunks(std::vector<int32_t>* out) { |
| 1128 DCHECK(CalledOnValidThread()); | 1131 DCHECK(CalledOnValidThread()); |
| 1129 out->clear(); | 1132 out->clear(); |
| 1130 out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end()); | 1133 out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end()); |
| 1131 } | 1134 } |
| 1132 | 1135 |
| 1133 void SafeBrowsingStoreFile::DeleteAddChunk(int32 chunk_id) { | 1136 void SafeBrowsingStoreFile::DeleteAddChunk(int32_t chunk_id) { |
| 1134 DCHECK(CalledOnValidThread()); | 1137 DCHECK(CalledOnValidThread()); |
| 1135 add_del_cache_.insert(chunk_id); | 1138 add_del_cache_.insert(chunk_id); |
| 1136 } | 1139 } |
| 1137 | 1140 |
| 1138 void SafeBrowsingStoreFile::DeleteSubChunk(int32 chunk_id) { | 1141 void SafeBrowsingStoreFile::DeleteSubChunk(int32_t chunk_id) { |
| 1139 DCHECK(CalledOnValidThread()); | 1142 DCHECK(CalledOnValidThread()); |
| 1140 sub_del_cache_.insert(chunk_id); | 1143 sub_del_cache_.insert(chunk_id); |
| 1141 } | 1144 } |
| 1142 | 1145 |
| 1143 // static | 1146 // static |
| 1144 bool SafeBrowsingStoreFile::DeleteStore(const base::FilePath& basename) { | 1147 bool SafeBrowsingStoreFile::DeleteStore(const base::FilePath& basename) { |
| 1145 if (!base::DeleteFile(basename, false) && | 1148 if (!base::DeleteFile(basename, false) && |
| 1146 base::PathExists(basename)) { | 1149 base::PathExists(basename)) { |
| 1147 NOTREACHED(); | 1150 NOTREACHED(); |
| 1148 return false; | 1151 return false; |
| (...skipping 11 matching lines...) Expand all Loading... |
| 1160 // also removed. | 1163 // also removed. |
| 1161 const base::FilePath journal_filename( | 1164 const base::FilePath journal_filename( |
| 1162 basename.value() + FILE_PATH_LITERAL("-journal")); | 1165 basename.value() + FILE_PATH_LITERAL("-journal")); |
| 1163 if (base::PathExists(journal_filename)) | 1166 if (base::PathExists(journal_filename)) |
| 1164 base::DeleteFile(journal_filename, false); | 1167 base::DeleteFile(journal_filename, false); |
| 1165 | 1168 |
| 1166 return true; | 1169 return true; |
| 1167 } | 1170 } |
| 1168 | 1171 |
| 1169 } // namespace safe_browsing | 1172 } // namespace safe_browsing |
| OLD | NEW |