chrome/browser/safe_browsing/safe_browsing_store_file.cc - Issue 1548133002: Switch to standard integer types in chrome/browser/, part 3 of 4.

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_store_file.cc

Issue 1548133002: Switch to standard integer types in chrome/browser/, part 3 of 4. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

« no previous file with comments | « chrome/browser/safe_browsing/safe_browsing_store_file.h ('k') | chrome/browser/safe_browsing/safe_browsing_store_file_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"	5 #include "chrome/browser/safe_browsing/safe_browsing_store_file.h"

6	6

	7 #include <stddef.h>

	8

7 #include "base/files/file_util.h"	9 #include "base/files/file_util.h"

8 #include "base/files/scoped_file.h"	10 #include "base/files/scoped_file.h"

9 #include "base/md5.h"	11 #include "base/md5.h"

10 #include "base/metrics/histogram.h"	12 #include "base/metrics/histogram.h"

11 #include "base/metrics/sparse_histogram.h"	13 #include "base/metrics/sparse_histogram.h"

12 #include "components/safe_browsing_db/prefix_set.h"	14 #include "components/safe_browsing_db/prefix_set.h"

13	15

14 namespace safe_browsing {	16 namespace safe_browsing {

15	17

16 namespace {	18 namespace {

17	19

18 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so	20 // NOTE(shess): kFileMagic should not be a byte-wise palindrome, so

19 // that byte-order changes force corruption.	21 // that byte-order changes force corruption.

20 const int32 kFileMagic = 0x600D71FE;	22 const int32_t kFileMagic = 0x600D71FE;

21	23

22 // Version history:	24 // Version history:

23 // Version 6: aad08754/r2814 by erikkay@google.com on 2008-10-02 (sqlite)	25 // Version 6: aad08754/r2814 by erikkay@google.com on 2008-10-02 (sqlite)

24 // Version 7: 6afe28a5/r37435 by shess@chromium.org on 2010-01-28	26 // Version 7: 6afe28a5/r37435 by shess@chromium.org on 2010-01-28

25 // Version 8: d3dd0715/r259791 by shess@chromium.org on 2014-03-27	27 // Version 8: d3dd0715/r259791 by shess@chromium.org on 2014-03-27

26 const int32 kFileVersion = 8;	28 const int32_t kFileVersion = 8;

27	29

28 // ReadAndVerifyHeader() returns this in case of error.	30 // ReadAndVerifyHeader() returns this in case of error.

29 const int32 kInvalidVersion = -1;	31 const int32_t kInvalidVersion = -1;

30	32

31 // Starting with version 8, the storage is sorted and can be sharded to allow	33 // Starting with version 8, the storage is sorted and can be sharded to allow

32 // updates to be done with lower memory requirements. Newly written files will	34 // updates to be done with lower memory requirements. Newly written files will

33 // be sharded to need less than this amount of memory during update. Larger	35 // be sharded to need less than this amount of memory during update. Larger

34 // values are preferred to minimize looping overhead during processing.	36 // values are preferred to minimize looping overhead during processing.

35 const int64 kUpdateStorageBytes = 100 * 1024;	37 const int64_t kUpdateStorageBytes = 100 * 1024;

36	38

37 // Prevent excessive sharding by setting a lower limit on the shard stride.	39 // Prevent excessive sharding by setting a lower limit on the shard stride.

38 // Smaller values should work fine, but very small values will probably lead to	40 // Smaller values should work fine, but very small values will probably lead to

39 // poor performance. Shard stride is indirectly related to	41 // poor performance. Shard stride is indirectly related to

40 // \|kUpdateStorageBytes\|, setting that very small will bump against this.	42 // \|kUpdateStorageBytes\|, setting that very small will bump against this.

41 const uint32 kMinShardStride = 1 << 24;	43 const uint32_t kMinShardStride = 1 << 24;

42	44

43 // Strides over the entire SBPrefix space.	45 // Strides over the entire SBPrefix space.

44 const uint64 kMaxShardStride = 1ULL << 32;	46 const uint64_t kMaxShardStride = 1ULL << 32;

45	47

46 // Maximum SBPrefix value.	48 // Maximum SBPrefix value.

47 const SBPrefix kMaxSBPrefix = 0xFFFFFFFF;	49 const SBPrefix kMaxSBPrefix = 0xFFFFFFFF;

48	50

49 // Header at the front of the main database file.	51 // Header at the front of the main database file.

50 struct FileHeader {	52 struct FileHeader {

51 int32 magic, version;	53 int32_t magic, version;

52 uint32 add_chunk_count, sub_chunk_count;	54 uint32_t add_chunk_count, sub_chunk_count;

53 uint32 shard_stride;	55 uint32_t shard_stride;

54 // TODO(shess): Is this where 64-bit will bite me? Perhaps write a	56 // TODO(shess): Is this where 64-bit will bite me? Perhaps write a

55 // specialized read/write?	57 // specialized read/write?

56 };	58 };

57	59

58 // Header for each chunk in the chunk-accumulation file.	60 // Header for each chunk in the chunk-accumulation file.

59 struct ChunkHeader {	61 struct ChunkHeader {

60 uint32 add_prefix_count, sub_prefix_count;	62 uint32_t add_prefix_count, sub_prefix_count;

61 uint32 add_hash_count, sub_hash_count;	63 uint32_t add_hash_count, sub_hash_count;

62 };	64 };

63	65

64 // Header for each shard of data in the main database file.	66 // Header for each shard of data in the main database file.

65 struct ShardHeader {	67 struct ShardHeader {

66 uint32 add_prefix_count, sub_prefix_count;	68 uint32_t add_prefix_count, sub_prefix_count;

67 uint32 add_hash_count, sub_hash_count;	69 uint32_t add_hash_count, sub_hash_count;

68 };	70 };

69	71

70 // Enumerate different format-change events for histogramming	72 // Enumerate different format-change events for histogramming

71 // purposes. DO NOT CHANGE THE ORDERING OF THESE VALUES.	73 // purposes. DO NOT CHANGE THE ORDERING OF THESE VALUES.

72 enum FormatEventType {	74 enum FormatEventType {

73 // Corruption detected, broken down by file format.	75 // Corruption detected, broken down by file format.

74 FORMAT_EVENT_FILE_CORRUPT,	76 FORMAT_EVENT_FILE_CORRUPT,

75 FORMAT_EVENT_SQLITE_CORRUPT, // Obsolete	77 FORMAT_EVENT_SQLITE_CORRUPT, // Obsolete

76	78

77 // The type of format found in the file. The expected case (new	79 // The type of format found in the file. The expected case (new

(...skipping 109 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
187	189

188 // Write all of \|values\| to \|fp\|, and fold the data into the checksum	190 // Write all of \|values\| to \|fp\|, and fold the data into the checksum

189 // in \|context\|, if non-NULL. Returns true if all items successful.	191 // in \|context\|, if non-NULL. Returns true if all items successful.

190 template <typename CT>	192 template <typename CT>

191 bool WriteContainer(const CT& values, FILE* fp,	193 bool WriteContainer(const CT& values, FILE* fp,

192 base::MD5Context* context) {	194 base::MD5Context* context) {

193 return WriteRange(values.begin(), values.end(), fp, context);	195 return WriteRange(values.begin(), values.end(), fp, context);

194 }	196 }

195	197

196 // Delete the chunks in \|deleted\| from \|chunks\|.	198 // Delete the chunks in \|deleted\| from \|chunks\|.

197 void DeleteChunksFromSet(const base::hash_set<int32>& deleted,	199 void DeleteChunksFromSet(const base::hash_set<int32_t>& deleted,

198 std::set<int32>* chunks) {	200 std::set<int32_t>* chunks) {

199 for (std::set<int32>::iterator iter = chunks->begin();	201 for (std::set<int32_t>::iterator iter = chunks->begin();

200 iter != chunks->end();) {	202 iter != chunks->end();) {

201 std::set<int32>::iterator prev = iter++;	203 std::set<int32_t>::iterator prev = iter++;

202 if (deleted.count(*prev) > 0)	204 if (deleted.count(*prev) > 0)

203 chunks->erase(prev);	205 chunks->erase(prev);

204 }	206 }

205 }	207 }

206	208

207 bool ReadAndVerifyChecksum(FILE* fp, base::MD5Context* context) {	209 bool ReadAndVerifyChecksum(FILE* fp, base::MD5Context* context) {

208 base::MD5Digest calculated_digest;	210 base::MD5Digest calculated_digest;

209 base::MD5IntermediateFinal(&calculated_digest, context);	211 base::MD5IntermediateFinal(&calculated_digest, context);

210	212

211 base::MD5Digest file_digest;	213 base::MD5Digest file_digest;

212 if (!ReadItem(&file_digest, fp, context))	214 if (!ReadItem(&file_digest, fp, context))

213 return false;	215 return false;

214	216

215 return memcmp(&file_digest, &calculated_digest, sizeof(file_digest)) == 0;	217 return memcmp(&file_digest, &calculated_digest, sizeof(file_digest)) == 0;

216 }	218 }

217	219

218 // Helper function to read the file header and chunk TOC. Rewinds \|fp\| and	220 // Helper function to read the file header and chunk TOC. Rewinds \|fp\| and

219 // initializes \|context\|. The header is left in \|header\|, with the version	221 // initializes \|context\|. The header is left in \|header\|, with the version

220 // returned. kInvalidVersion is returned for sanity check or checksum failure.	222 // returned. kInvalidVersion is returned for sanity check or checksum failure.

221 int ReadAndVerifyHeader(const base::FilePath& filename,	223 int ReadAndVerifyHeader(const base::FilePath& filename,

222 FileHeader* header,	224 FileHeader* header,

223 std::set<int32>* add_chunks,	225 std::set<int32_t>* add_chunks,

224 std::set<int32>* sub_chunks,	226 std::set<int32_t>* sub_chunks,

225 FILE* fp,	227 FILE* fp,

226 base::MD5Context* context) {	228 base::MD5Context* context) {

227 DCHECK(header);	229 DCHECK(header);

228 DCHECK(add_chunks);	230 DCHECK(add_chunks);

229 DCHECK(sub_chunks);	231 DCHECK(sub_chunks);

230 DCHECK(fp);	232 DCHECK(fp);

231 DCHECK(context);	233 DCHECK(context);

232	234

233 base::MD5Init(context);	235 base::MD5Init(context);

234 if (!FileRewind(fp))	236 if (!FileRewind(fp))

(...skipping 19 matching lines...) Expand all Loading...
254 RecordFormatEvent(FORMAT_EVENT_HEADER_CHECKSUM_FAILURE);	256 RecordFormatEvent(FORMAT_EVENT_HEADER_CHECKSUM_FAILURE);

255 return kInvalidVersion;	257 return kInvalidVersion;

256 }	258 }

257	259

258 return kFileVersion;	260 return kFileVersion;

259 }	261 }

260	262

261 // Helper function to write out the initial header and chunks-contained data.	263 // Helper function to write out the initial header and chunks-contained data.

262 // Rewinds \|fp\|, initializes \|context\|, then writes a file header and	264 // Rewinds \|fp\|, initializes \|context\|, then writes a file header and

263 // \|add_chunks\| and \|sub_chunks\|.	265 // \|add_chunks\| and \|sub_chunks\|.

264 bool WriteHeader(uint32 out_stride,	266 bool WriteHeader(uint32_t out_stride,

265 const std::set<int32>& add_chunks,	267 const std::set<int32_t>& add_chunks,

266 const std::set<int32>& sub_chunks,	268 const std::set<int32_t>& sub_chunks,

267 FILE* fp,	269 FILE* fp,

268 base::MD5Context* context) {	270 base::MD5Context* context) {

269 if (!FileRewind(fp))	271 if (!FileRewind(fp))

270 return false;	272 return false;

271	273

272 base::MD5Init(context);	274 base::MD5Init(context);

273 FileHeader header;	275 FileHeader header;

274 header.magic = kFileMagic;	276 header.magic = kFileMagic;

275 header.version = kFileVersion;	277 header.version = kFileVersion;

276 header.add_chunk_count = add_chunks.size();	278 header.add_chunk_count = add_chunks.size();

(...skipping 116 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
393 sub_full_hashes_.clear();	395 sub_full_hashes_.clear();

394 }	396 }

395	397

396 // Merge data from \|beg\|..\|end\| into receiver's state, then process the state.	398 // Merge data from \|beg\|..\|end\| into receiver's state, then process the state.

397 // The current state and the range given should corrospond to the same sorted	399 // The current state and the range given should corrospond to the same sorted

398 // shard of data from different sources. \|add_del_cache\| and \|sub_del_cache\|	400 // shard of data from different sources. \|add_del_cache\| and \|sub_del_cache\|

399 // indicate the chunk ids which should be deleted during processing (see	401 // indicate the chunk ids which should be deleted during processing (see

400 // SBProcessSubs).	402 // SBProcessSubs).

401 void MergeDataAndProcess(const StateInternalPos& beg,	403 void MergeDataAndProcess(const StateInternalPos& beg,

402 const StateInternalPos& end,	404 const StateInternalPos& end,

403 const base::hash_set<int32>& add_del_cache,	405 const base::hash_set<int32_t>& add_del_cache,

404 const base::hash_set<int32>& sub_del_cache) {	406 const base::hash_set<int32_t>& sub_del_cache) {

405 container_merge(&add_prefixes_,	407 container_merge(&add_prefixes_,

406 beg.add_prefixes_iter_,	408 beg.add_prefixes_iter_,

407 end.add_prefixes_iter_,	409 end.add_prefixes_iter_,

408 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);	410 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>);

409	411

410 container_merge(&sub_prefixes_,	412 container_merge(&sub_prefixes_,

411 beg.sub_prefixes_iter_,	413 beg.sub_prefixes_iter_,

412 end.sub_prefixes_iter_,	414 end.sub_prefixes_iter_,

413 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);	415 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>);

414	416

(...skipping 89 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
504	506

505 // Helper to read the entire database state, used by GetAddPrefixes() and	507 // Helper to read the entire database state, used by GetAddPrefixes() and

506 // GetAddFullHashes(). Those functions are generally used only for smaller	508 // GetAddFullHashes(). Those functions are generally used only for smaller

507 // files. Returns false in case of errors reading the data.	509 // files. Returns false in case of errors reading the data.

508 bool ReadDbStateHelper(const base::FilePath& filename,	510 bool ReadDbStateHelper(const base::FilePath& filename,

509 StateInternal* db_state) {	511 StateInternal* db_state) {

510 base::ScopedFILE file(base::OpenFile(filename, "rb"));	512 base::ScopedFILE file(base::OpenFile(filename, "rb"));

511 if (file.get() == NULL)	513 if (file.get() == NULL)

512 return false;	514 return false;

513	515

514 std::set<int32> add_chunks;	516 std::set<int32_t> add_chunks;

515 std::set<int32> sub_chunks;	517 std::set<int32_t> sub_chunks;

516	518

517 base::MD5Context context;	519 base::MD5Context context;

518 FileHeader header;	520 FileHeader header;

519 const int version =	521 const int version =

520 ReadAndVerifyHeader(filename, &header, &add_chunks, &sub_chunks,	522 ReadAndVerifyHeader(filename, &header, &add_chunks, &sub_chunks,

521 file.get(), &context);	523 file.get(), &context);

522 if (version == kInvalidVersion)	524 if (version == kInvalidVersion)

523 return false;	525 return false;

524	526

525 uint64 in_min = 0;	527 uint64_t in_min = 0;

526 uint64 in_stride = header.shard_stride;	528 uint64_t in_stride = header.shard_stride;

527 if (!in_stride)	529 if (!in_stride)

528 in_stride = kMaxShardStride;	530 in_stride = kMaxShardStride;

529 if (!IsPowerOfTwo(in_stride))	531 if (!IsPowerOfTwo(in_stride))

530 return false;	532 return false;

531	533

532 do {	534 do {

533 ShardHeader shard_header;	535 ShardHeader shard_header;

534 if (!ReadItem(&shard_header, file.get(), &context))	536 if (!ReadItem(&shard_header, file.get(), &context))

535 return false;	537 return false;

536	538

537 if (!db_state->AppendData(shard_header.add_prefix_count,	539 if (!db_state->AppendData(shard_header.add_prefix_count,

538 shard_header.sub_prefix_count,	540 shard_header.sub_prefix_count,

539 shard_header.add_hash_count,	541 shard_header.add_hash_count,

540 shard_header.sub_hash_count,	542 shard_header.sub_hash_count,

541 file.get(), &context)) {	543 file.get(), &context)) {

542 return false;	544 return false;

543 }	545 }

544	546

545 in_min += in_stride;	547 in_min += in_stride;

546 } while (in_min <= kMaxSBPrefix);	548 } while (in_min <= kMaxSBPrefix);

547	549

548 if (!ReadAndVerifyChecksum(file.get(), &context))	550 if (!ReadAndVerifyChecksum(file.get(), &context))

549 return false;	551 return false;

550	552

551 int64 size = 0;	553 int64_t size = 0;

552 if (!base::GetFileSize(filename, &size))	554 if (!base::GetFileSize(filename, &size))

553 return false;	555 return false;

554	556

555 return static_cast<int64>(ftell(file.get())) == size;	557 return static_cast<int64_t>(ftell(file.get())) == size;

556 }	558 }

557	559

558 } // namespace	560 } // namespace

559	561

560 SafeBrowsingStoreFile::SafeBrowsingStoreFile(	562 SafeBrowsingStoreFile::SafeBrowsingStoreFile(

561 const scoped_refptr<const base::SequencedTaskRunner>& task_runner)	563 const scoped_refptr<const base::SequencedTaskRunner>& task_runner)

562 : task_runner_(task_runner),	564 : task_runner_(task_runner),

563 chunks_written_(0),	565 chunks_written_(0),

564 empty_(false),	566 empty_(false),

565 corruption_seen_(false) {	567 corruption_seen_(false) {

(...skipping 29 matching lines...) Expand all Loading...
595 // The file was either empty or never opened. The empty case is	597 // The file was either empty or never opened. The empty case is

596 // presumed not to be invalid. The never-opened case can happen if	598 // presumed not to be invalid. The never-opened case can happen if

597 // BeginUpdate() fails for any databases, and should already have	599 // BeginUpdate() fails for any databases, and should already have

598 // caused the corruption callback to fire.	600 // caused the corruption callback to fire.

599 if (!file_.get())	601 if (!file_.get())

600 return true;	602 return true;

601	603

602 if (!FileRewind(file_.get()))	604 if (!FileRewind(file_.get()))

603 return OnCorruptDatabase();	605 return OnCorruptDatabase();

604	606

605 int64 size = 0;	607 int64_t size = 0;

606 if (!base::GetFileSize(filename_, &size))	608 if (!base::GetFileSize(filename_, &size))

607 return OnCorruptDatabase();	609 return OnCorruptDatabase();

608	610

609 base::MD5Context context;	611 base::MD5Context context;

610 base::MD5Init(&context);	612 base::MD5Init(&context);

611	613

612 // Read everything except the final digest.	614 // Read everything except the final digest.

613 size_t bytes_left = static_cast<size_t>(size);	615 size_t bytes_left = static_cast<size_t>(size);

614 CHECK(size == static_cast<int64>(bytes_left));	616 CHECK(size == static_cast<int64_t>(bytes_left));

615 if (bytes_left < sizeof(base::MD5Digest))	617 if (bytes_left < sizeof(base::MD5Digest))

616 return OnCorruptDatabase();	618 return OnCorruptDatabase();

617 bytes_left -= sizeof(base::MD5Digest);	619 bytes_left -= sizeof(base::MD5Digest);

618	620

619 // Fold the contents of the file into the checksum.	621 // Fold the contents of the file into the checksum.

620 while (bytes_left > 0) {	622 while (bytes_left > 0) {

621 char buf[4096];	623 char buf[4096];

622 const size_t c = std::min(sizeof(buf), bytes_left);	624 const size_t c = std::min(sizeof(buf), bytes_left);

623 const size_t ret = fread(buf, 1, c, file_.get());	625 const size_t ret = fread(buf, 1, c, file_.get());

624	626

(...skipping 17 matching lines...) Expand all Loading...
642 DCHECK(CalledOnValidThread());	644 DCHECK(CalledOnValidThread());

643 filename_ = filename;	645 filename_ = filename;

644 corruption_callback_ = corruption_callback;	646 corruption_callback_ = corruption_callback;

645 }	647 }

646	648

647 bool SafeBrowsingStoreFile::BeginChunk() {	649 bool SafeBrowsingStoreFile::BeginChunk() {

648 DCHECK(CalledOnValidThread());	650 DCHECK(CalledOnValidThread());

649 return ClearChunkBuffers();	651 return ClearChunkBuffers();

650 }	652 }

651	653

652 bool SafeBrowsingStoreFile::WriteAddPrefix(int32 chunk_id, SBPrefix prefix) {	654 bool SafeBrowsingStoreFile::WriteAddPrefix(int32_t chunk_id, SBPrefix prefix) {

653 DCHECK(CalledOnValidThread());	655 DCHECK(CalledOnValidThread());

654 add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix));	656 add_prefixes_.push_back(SBAddPrefix(chunk_id, prefix));

655 return true;	657 return true;

656 }	658 }

657	659

658 bool SafeBrowsingStoreFile::GetAddPrefixes(SBAddPrefixes* add_prefixes) {	660 bool SafeBrowsingStoreFile::GetAddPrefixes(SBAddPrefixes* add_prefixes) {

659 DCHECK(CalledOnValidThread());	661 DCHECK(CalledOnValidThread());

660	662

661 add_prefixes->clear();	663 add_prefixes->clear();

662 if (!base::PathExists(filename_))	664 if (!base::PathExists(filename_))

(...skipping 16 matching lines...) Expand all Loading...
679 return true;	681 return true;

680	682

681 StateInternal db_state;	683 StateInternal db_state;

682 if (!ReadDbStateHelper(filename_, &db_state))	684 if (!ReadDbStateHelper(filename_, &db_state))

683 return OnCorruptDatabase();	685 return OnCorruptDatabase();

684	686

685 add_full_hashes->swap(db_state.add_full_hashes_);	687 add_full_hashes->swap(db_state.add_full_hashes_);

686 return true;	688 return true;

687 }	689 }

688	690

689 bool SafeBrowsingStoreFile::WriteAddHash(int32 chunk_id,	691 bool SafeBrowsingStoreFile::WriteAddHash(int32_t chunk_id,

690 const SBFullHash& full_hash) {	692 const SBFullHash& full_hash) {

691 DCHECK(CalledOnValidThread());	693 DCHECK(CalledOnValidThread());

692 add_hashes_.push_back(SBAddFullHash(chunk_id, full_hash));	694 add_hashes_.push_back(SBAddFullHash(chunk_id, full_hash));

693 return true;	695 return true;

694 }	696 }

695	697

696 bool SafeBrowsingStoreFile::WriteSubPrefix(int32 chunk_id,	698 bool SafeBrowsingStoreFile::WriteSubPrefix(int32_t chunk_id,

697 int32 add_chunk_id,	699 int32_t add_chunk_id,

698 SBPrefix prefix) {	700 SBPrefix prefix) {

699 DCHECK(CalledOnValidThread());	701 DCHECK(CalledOnValidThread());

700 sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix));	702 sub_prefixes_.push_back(SBSubPrefix(chunk_id, add_chunk_id, prefix));

701 return true;	703 return true;

702 }	704 }

703	705

704 bool SafeBrowsingStoreFile::WriteSubHash(int32 chunk_id, int32 add_chunk_id,	706 bool SafeBrowsingStoreFile::WriteSubHash(int32_t chunk_id,

	707 int32_t add_chunk_id,

705 const SBFullHash& full_hash) {	708 const SBFullHash& full_hash) {

706 DCHECK(CalledOnValidThread());	709 DCHECK(CalledOnValidThread());

707 sub_hashes_.push_back(SBSubFullHash(chunk_id, add_chunk_id, full_hash));	710 sub_hashes_.push_back(SBSubFullHash(chunk_id, add_chunk_id, full_hash));

708 return true;	711 return true;

709 }	712 }

710	713

711 bool SafeBrowsingStoreFile::OnCorruptDatabase() {	714 bool SafeBrowsingStoreFile::OnCorruptDatabase() {

712 DCHECK(CalledOnValidThread());	715 DCHECK(CalledOnValidThread());

713	716

714 if (!corruption_seen_)	717 if (!corruption_seen_)

(...skipping 113 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
828 DCHECK(file_.get() \|\| empty_);	831 DCHECK(file_.get() \|\| empty_);

829 DCHECK(new_file_.get());	832 DCHECK(new_file_.get());

830 CHECK(builder);	833 CHECK(builder);

831 CHECK(add_full_hashes_result);	834 CHECK(add_full_hashes_result);

832	835

833 // Rewind the temporary storage.	836 // Rewind the temporary storage.

834 if (!FileRewind(new_file_.get()))	837 if (!FileRewind(new_file_.get()))

835 return false;	838 return false;

836	839

837 // Get chunk file's size for validating counts.	840 // Get chunk file's size for validating counts.

838 int64 update_size = 0;	841 int64_t update_size = 0;

839 if (!base::GetFileSize(TemporaryFileForFilename(filename_), &update_size))	842 if (!base::GetFileSize(TemporaryFileForFilename(filename_), &update_size))

840 return OnCorruptDatabase();	843 return OnCorruptDatabase();

841	844

842 // Track update size to answer questions at http://crbug.com/72216 .	845 // Track update size to answer questions at http://crbug.com/72216 .

843 // Log small updates as 1k so that the 0 (underflow) bucket can be	846 // Log small updates as 1k so that the 0 (underflow) bucket can be

844 // used for "empty" in SafeBrowsingDatabase.	847 // used for "empty" in SafeBrowsingDatabase.

845 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes",	848 UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes",

846 std::max(static_cast<int>(update_size / 1024), 1));	849 std::max(static_cast<int>(update_size / 1024), 1));

847	850

848 // Chunk updates to integrate.	851 // Chunk updates to integrate.

849 StateInternal new_state;	852 StateInternal new_state;

850	853

851 // Read update chunks.	854 // Read update chunks.

852 for (int i = 0; i < chunks_written_; ++i) {	855 for (int i = 0; i < chunks_written_; ++i) {

853 ChunkHeader header;	856 ChunkHeader header;

854	857

855 int64 ofs = ftell(new_file_.get());	858 int64_t ofs = ftell(new_file_.get());

856 if (ofs == -1)	859 if (ofs == -1)

857 return false;	860 return false;

858	861

859 if (!ReadItem(&header, new_file_.get(), NULL))	862 if (!ReadItem(&header, new_file_.get(), NULL))

860 return false;	863 return false;

861	864

862 // As a safety measure, make sure that the header describes a sane	865 // As a safety measure, make sure that the header describes a sane

863 // chunk, given the remaining file size.	866 // chunk, given the remaining file size.

864 int64 expected_size = ofs + sizeof(ChunkHeader);	867 int64_t expected_size = ofs + sizeof(ChunkHeader);

865 expected_size += header.add_prefix_count * sizeof(SBAddPrefix);	868 expected_size += header.add_prefix_count * sizeof(SBAddPrefix);

866 expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);	869 expected_size += header.sub_prefix_count * sizeof(SBSubPrefix);

867 expected_size += header.add_hash_count * sizeof(SBAddFullHash);	870 expected_size += header.add_hash_count * sizeof(SBAddFullHash);

868 expected_size += header.sub_hash_count * sizeof(SBSubFullHash);	871 expected_size += header.sub_hash_count * sizeof(SBSubFullHash);

869 if (expected_size > update_size)	872 if (expected_size > update_size)

870 return false;	873 return false;

871	874

872 if (!new_state.AppendData(header.add_prefix_count, header.sub_prefix_count,	875 if (!new_state.AppendData(header.add_prefix_count, header.sub_prefix_count,

873 header.add_hash_count, header.sub_hash_count,	876 header.add_hash_count, header.sub_hash_count,

874 new_file_.get(), NULL)) {	877 new_file_.get(), NULL)) {

875 return false;	878 return false;

876 }	879 }

877 }	880 }

878	881

879 // The state was accumulated by chunk, sort by prefix.	882 // The state was accumulated by chunk, sort by prefix.

880 new_state.SortData();	883 new_state.SortData();

881	884

882 // These strides control how much data is loaded into memory per pass.	885 // These strides control how much data is loaded into memory per pass.

883 // Strides must be an even power of two. \|in_stride\| will be derived from the	886 // Strides must be an even power of two. \|in_stride\| will be derived from the

884 // input file. \|out_stride\| will be derived from an estimate of the resulting	887 // input file. \|out_stride\| will be derived from an estimate of the resulting

885 // file's size. \|process_stride\| will be the max of both.	888 // file's size. \|process_stride\| will be the max of both.

886 uint64 in_stride = kMaxShardStride;	889 uint64_t in_stride = kMaxShardStride;

887 uint64 out_stride = kMaxShardStride;	890 uint64_t out_stride = kMaxShardStride;

888 uint64 process_stride = 0;	891 uint64_t process_stride = 0;

889	892

890 // Used to verify the input's checksum if \|!empty_\|.	893 // Used to verify the input's checksum if \|!empty_\|.

891 base::MD5Context in_context;	894 base::MD5Context in_context;

892	895

893 if (!empty_) {	896 if (!empty_) {

894 DCHECK(file_.get());	897 DCHECK(file_.get());

895	898

896 FileHeader header = {0};	899 FileHeader header = {0};

897 int version = ReadAndVerifyHeader(filename_, &header,	900 int version = ReadAndVerifyHeader(filename_, &header,

898 &add_chunks_cache_, &sub_chunks_cache_,	901 &add_chunks_cache_, &sub_chunks_cache_,

899 file_.get(), &in_context);	902 file_.get(), &in_context);

900 if (version == kInvalidVersion)	903 if (version == kInvalidVersion)

901 return OnCorruptDatabase();	904 return OnCorruptDatabase();

902	905

903 if (header.shard_stride)	906 if (header.shard_stride)

904 in_stride = header.shard_stride;	907 in_stride = header.shard_stride;

905	908

906 // The header checksum should have prevented this case, but the code will be	909 // The header checksum should have prevented this case, but the code will be

907 // broken if this is not correct.	910 // broken if this is not correct.

908 if (!IsPowerOfTwo(in_stride))	911 if (!IsPowerOfTwo(in_stride))

909 return OnCorruptDatabase();	912 return OnCorruptDatabase();

910 }	913 }

911	914

912 // We no longer need to track deleted chunks.	915 // We no longer need to track deleted chunks.

913 DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);	916 DeleteChunksFromSet(add_del_cache_, &add_chunks_cache_);

914 DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);	917 DeleteChunksFromSet(sub_del_cache_, &sub_chunks_cache_);

915	918

916 // Calculate \|out_stride\| to break the file down into reasonable shards.	919 // Calculate \|out_stride\| to break the file down into reasonable shards.

917 {	920 {

918 int64 original_size = 0;	921 int64_t original_size = 0;

919 if (!empty_ && !base::GetFileSize(filename_, &original_size))	922 if (!empty_ && !base::GetFileSize(filename_, &original_size))

920 return OnCorruptDatabase();	923 return OnCorruptDatabase();

921	924

922 // Approximate the final size as everything. Subs and deletes will reduce	925 // Approximate the final size as everything. Subs and deletes will reduce

923 // the size, but modest over-sharding won't hurt much.	926 // the size, but modest over-sharding won't hurt much.

924 int64 shard_size = original_size + update_size;	927 int64_t shard_size = original_size + update_size;

925	928

926 // Keep splitting until a single stride of data fits the target.	929 // Keep splitting until a single stride of data fits the target.

927 size_t shifts = 0;	930 size_t shifts = 0;

928 while (out_stride > kMinShardStride && shard_size > kUpdateStorageBytes) {	931 while (out_stride > kMinShardStride && shard_size > kUpdateStorageBytes) {

929 out_stride >>= 1;	932 out_stride >>= 1;

930 shard_size >>= 1;	933 shard_size >>= 1;

931 ++shifts;	934 ++shifts;

932 }	935 }

933 UMA_HISTOGRAM_COUNTS("SB2.OutShardShifts", shifts);	936 UMA_HISTOGRAM_COUNTS("SB2.OutShardShifts", shifts);

934	937

935 DCHECK(IsPowerOfTwo(out_stride));	938 DCHECK(IsPowerOfTwo(out_stride));

936 }	939 }

937	940

938 // Outer loop strides by the max of the input stride (to read integral shards)	941 // Outer loop strides by the max of the input stride (to read integral shards)

939 // and the output stride (to write integral shards).	942 // and the output stride (to write integral shards).

940 process_stride = std::max(in_stride, out_stride);	943 process_stride = std::max(in_stride, out_stride);

941 DCHECK(IsPowerOfTwo(process_stride));	944 DCHECK(IsPowerOfTwo(process_stride));

942 DCHECK_EQ(0u, process_stride % in_stride);	945 DCHECK_EQ(0u, process_stride % in_stride);

943 DCHECK_EQ(0u, process_stride % out_stride);	946 DCHECK_EQ(0u, process_stride % out_stride);

944	947

945 // Start writing the new data to \|new_file_\|.	948 // Start writing the new data to \|new_file_\|.

946 base::MD5Context out_context;	949 base::MD5Context out_context;

947 if (!WriteHeader(out_stride, add_chunks_cache_, sub_chunks_cache_,	950 if (!WriteHeader(out_stride, add_chunks_cache_, sub_chunks_cache_,

948 new_file_.get(), &out_context)) {	951 new_file_.get(), &out_context)) {

949 return false;	952 return false;

950 }	953 }

951	954

952 // Start at the beginning of the SBPrefix space.	955 // Start at the beginning of the SBPrefix space.

953 uint64 in_min = 0;	956 uint64_t in_min = 0;

954 uint64 out_min = 0;	957 uint64_t out_min = 0;

955 uint64 process_min = 0;	958 uint64_t process_min = 0;

956	959

957 // Start at the beginning of the updates.	960 // Start at the beginning of the updates.

958 StateInternalPos new_pos = new_state.StateBegin();	961 StateInternalPos new_pos = new_state.StateBegin();

959	962

960 // Re-usable container for shard processing.	963 // Re-usable container for shard processing.

961 StateInternal db_state;	964 StateInternal db_state;

962	965

963 // Track aggregate counts for histograms.	966 // Track aggregate counts for histograms.

964 size_t add_prefix_count = 0;	967 size_t add_prefix_count = 0;

965 size_t sub_prefix_count = 0;	968 size_t sub_prefix_count = 0;

(...skipping 125 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
1091 DCHECK(CalledOnValidThread());	1094 DCHECK(CalledOnValidThread());

1092 bool ret = Close();	1095 bool ret = Close();

1093	1096

1094 // Delete stale staging file.	1097 // Delete stale staging file.

1095 const base::FilePath new_filename = TemporaryFileForFilename(filename_);	1098 const base::FilePath new_filename = TemporaryFileForFilename(filename_);

1096 base::DeleteFile(new_filename, false);	1099 base::DeleteFile(new_filename, false);

1097	1100

1098 return ret;	1101 return ret;

1099 }	1102 }

1100	1103

1101 void SafeBrowsingStoreFile::SetAddChunk(int32 chunk_id) {	1104 void SafeBrowsingStoreFile::SetAddChunk(int32_t chunk_id) {

1102 DCHECK(CalledOnValidThread());	1105 DCHECK(CalledOnValidThread());

1103 add_chunks_cache_.insert(chunk_id);	1106 add_chunks_cache_.insert(chunk_id);

1104 }	1107 }

1105	1108

1106 bool SafeBrowsingStoreFile::CheckAddChunk(int32 chunk_id) {	1109 bool SafeBrowsingStoreFile::CheckAddChunk(int32_t chunk_id) {

1107 DCHECK(CalledOnValidThread());	1110 DCHECK(CalledOnValidThread());

1108 return add_chunks_cache_.count(chunk_id) > 0;	1111 return add_chunks_cache_.count(chunk_id) > 0;

1109 }	1112 }

1110	1113

1111 void SafeBrowsingStoreFile::GetAddChunks(std::vector<int32>* out) {	1114 void SafeBrowsingStoreFile::GetAddChunks(std::vector<int32_t>* out) {

1112 DCHECK(CalledOnValidThread());	1115 DCHECK(CalledOnValidThread());

1113 out->clear();	1116 out->clear();

1114 out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end());	1117 out->insert(out->end(), add_chunks_cache_.begin(), add_chunks_cache_.end());

1115 }	1118 }

1116	1119

1117 void SafeBrowsingStoreFile::SetSubChunk(int32 chunk_id) {	1120 void SafeBrowsingStoreFile::SetSubChunk(int32_t chunk_id) {

1118 DCHECK(CalledOnValidThread());	1121 DCHECK(CalledOnValidThread());

1119 sub_chunks_cache_.insert(chunk_id);	1122 sub_chunks_cache_.insert(chunk_id);

1120 }	1123 }

1121	1124

1122 bool SafeBrowsingStoreFile::CheckSubChunk(int32 chunk_id) {	1125 bool SafeBrowsingStoreFile::CheckSubChunk(int32_t chunk_id) {

1123 DCHECK(CalledOnValidThread());	1126 DCHECK(CalledOnValidThread());

1124 return sub_chunks_cache_.count(chunk_id) > 0;	1127 return sub_chunks_cache_.count(chunk_id) > 0;

1125 }	1128 }

1126	1129

1127 void SafeBrowsingStoreFile::GetSubChunks(std::vector<int32>* out) {	1130 void SafeBrowsingStoreFile::GetSubChunks(std::vector<int32_t>* out) {

1128 DCHECK(CalledOnValidThread());	1131 DCHECK(CalledOnValidThread());

1129 out->clear();	1132 out->clear();

1130 out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end());	1133 out->insert(out->end(), sub_chunks_cache_.begin(), sub_chunks_cache_.end());

1131 }	1134 }

1132	1135

1133 void SafeBrowsingStoreFile::DeleteAddChunk(int32 chunk_id) {	1136 void SafeBrowsingStoreFile::DeleteAddChunk(int32_t chunk_id) {

1134 DCHECK(CalledOnValidThread());	1137 DCHECK(CalledOnValidThread());

1135 add_del_cache_.insert(chunk_id);	1138 add_del_cache_.insert(chunk_id);

1136 }	1139 }

1137	1140

1138 void SafeBrowsingStoreFile::DeleteSubChunk(int32 chunk_id) {	1141 void SafeBrowsingStoreFile::DeleteSubChunk(int32_t chunk_id) {

1139 DCHECK(CalledOnValidThread());	1142 DCHECK(CalledOnValidThread());

1140 sub_del_cache_.insert(chunk_id);	1143 sub_del_cache_.insert(chunk_id);

1141 }	1144 }

1142	1145

1143 // static	1146 // static

1144 bool SafeBrowsingStoreFile::DeleteStore(const base::FilePath& basename) {	1147 bool SafeBrowsingStoreFile::DeleteStore(const base::FilePath& basename) {

1145 if (!base::DeleteFile(basename, false) &&	1148 if (!base::DeleteFile(basename, false) &&

1146 base::PathExists(basename)) {	1149 base::PathExists(basename)) {

1147 NOTREACHED();	1150 NOTREACHED();

1148 return false;	1151 return false;

(...skipping 11 matching lines...) Expand all Loading...
1160 // also removed.	1163 // also removed.

1161 const base::FilePath journal_filename(	1164 const base::FilePath journal_filename(

1162 basename.value() + FILE_PATH_LITERAL("-journal"));	1165 basename.value() + FILE_PATH_LITERAL("-journal"));

1163 if (base::PathExists(journal_filename))	1166 if (base::PathExists(journal_filename))

1164 base::DeleteFile(journal_filename, false);	1167 base::DeleteFile(journal_filename, false);

1165	1168

1166 return true;	1169 return true;

1167 }	1170 }

1168	1171

1169 } // namespace safe_browsing	1172 } // namespace safe_browsing

OLD	NEW