chrome/browser/safe_browsing/safe_browsing_store_file.h - Issue 1548133002: Switch to standard integer types in chrome/browser/, part 3 of 4.

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_store_file.h

Issue 1548133002: Switch to standard integer types in chrome/browser/, part 3 of 4. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 4 years, 12 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_	5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_

6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_	6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_

7	7

	8 #include <stdint.h>

	9

8 #include <set>	10 #include <set>

9 #include <vector>	11 #include <vector>

10	12

11 #include "chrome/browser/safe_browsing/safe_browsing_store.h"	13 #include "chrome/browser/safe_browsing/safe_browsing_store.h"

12	14

13 #include "base/callback.h"	15 #include "base/callback.h"

14 #include "base/files/file_path.h"	16 #include "base/files/file_path.h"

15 #include "base/files/scoped_file.h"	17 #include "base/files/scoped_file.h"

	18 #include "base/macros.h"

16 #include "base/sequenced_task_runner.h"	19 #include "base/sequenced_task_runner.h"

17	20

18 namespace safe_browsing {	21 namespace safe_browsing {

19	22

20 // Implement SafeBrowsingStore in terms of a flat file. The file	23 // Implement SafeBrowsingStore in terms of a flat file. The file

21 // format is pretty literal:	24 // format is pretty literal:

22 //	25 //

23 // int32 magic; // magic number "validating" file	26 // int32_t magic; // magic number "validating" file

24 // int32 version; // format version	27 // int32_t version; // format version

25 //	28 //

26 // // Counts for the various data which follows the header.	29 // // Counts for the various data which follows the header.

27 // uint32 add_chunk_count; // Chunks seen, including empties.	30 // uint32_t add_chunk_count; // Chunks seen, including empties.

28 // uint32 sub_chunk_count; // Ditto.	31 // uint32_t sub_chunk_count; // Ditto.

29 // uint32 shard_stride; // SBPrefix space covered per shard.	32 // uint32_t shard_stride; // SBPrefix space covered per shard.

30 // // 0==entire space in one shard.	33 // // 0==entire space in one shard.

31 // // Sorted by chunk_id.	34 // // Sorted by chunk_id.

32 // array[add_chunk_count] {	35 // array[add_chunk_count] {

33 // int32 chunk_id;	36 // int32_t chunk_id;

34 // }	37 // }

35 // // Sorted by chunk_id.	38 // // Sorted by chunk_id.

36 // array[sub_chunk_count] {	39 // array[sub_chunk_count] {

37 // int32 chunk_id;	40 // int32_t chunk_id;

38 // }	41 // }

39 // MD5Digest header_checksum; // Checksum over preceeding data.	42 // MD5Digest header_checksum; // Checksum over preceeding data.

40 //	43 //

41 // // Sorted by prefix, then add chunk_id, then hash, both within shards and	44 // // Sorted by prefix, then add chunk_id, then hash, both within shards and

42 // // overall.	45 // // overall.

43 // array[from 0 to wraparound to 0 by shard_stride] {	46 // array[from 0 to wraparound to 0 by shard_stride] {

44 // uint32 add_prefix_count;	47 // uint32_t add_prefix_count;

45 // uint32 sub_prefix_count;	48 // uint32_t sub_prefix_count;

46 // uint32 add_hash_count;	49 // uint32_t add_hash_count;

47 // uint32 sub_hash_count;	50 // uint32_t sub_hash_count;

48 // array[add_prefix_count] {	51 // array[add_prefix_count] {

49 // int32 chunk_id;	52 // int32_t chunk_id;

50 // uint32 prefix;	53 // uint32_t prefix;

51 // }	54 // }

52 // array[sub_prefix_count] {	55 // array[sub_prefix_count] {

53 // int32 chunk_id;	56 // int32_t chunk_id;

54 // int32 add_chunk_id;	57 // int32_t add_chunk_id;

55 // uint32 add_prefix;	58 // uint32_t add_prefix;

56 // }	59 // }

57 // array[add_hash_count] {	60 // array[add_hash_count] {

58 // int32 chunk_id;	61 // int32_t chunk_id;

59 // int32 received_time; // From base::Time::ToTimeT().	62 // int32_t received_time; // From base::Time::ToTimeT().

60 // char[32] full_hash;	63 // char[32] full_hash;

61 // }	64 // }

62 // array[sub_hash_count] {	65 // array[sub_hash_count] {

63 // int32 chunk_id;	66 // int32_t chunk_id;

64 // int32 add_chunk_id;	67 // int32_t add_chunk_id;

65 // char[32] add_full_hash;	68 // char[32] add_full_hash;

66 // }	69 // }

67 // }	70 // }

68 // MD5Digest checksum; // Checksum over entire file.	71 // MD5Digest checksum; // Checksum over entire file.

69 //	72 //

70 // The checksums are used to allow writing the file without doing an expensive	73 // The checksums are used to allow writing the file without doing an expensive

71 // fsync(). Since the data can be re-fetched, failing the checksum is not	74 // fsync(). Since the data can be re-fetched, failing the checksum is not

72 // catastrophic. Histograms indicate that file corruption here is pretty	75 // catastrophic. Histograms indicate that file corruption here is pretty

73 // uncommon.	76 // uncommon.

74 //	77 //

75 // The \|header_checksum\| is present to guarantee valid header and chunk data for	78 // The \|header_checksum\| is present to guarantee valid header and chunk data for

76 // updates. Only that part of the file needs to be read to post the update.	79 // updates. Only that part of the file needs to be read to post the update.

77 //	80 //

78 // \|shard_stride\| breaks the file into approximately-equal portions, allowing	81 // \|shard_stride\| breaks the file into approximately-equal portions, allowing

79 // updates to stream from one file to another with modest memory usage. It is	82 // updates to stream from one file to another with modest memory usage. It is

80 // dynamic to adjust to different file sizes without adding excessive overhead.	83 // dynamic to adjust to different file sizes without adding excessive overhead.

81 //	84 //

82 // During the course of an update, uncommitted data is stored in a	85 // During the course of an update, uncommitted data is stored in a

83 // temporary file (which is later re-used to commit). This is an	86 // temporary file (which is later re-used to commit). This is an

84 // array of chunks, with the count kept in memory until the end of the	87 // array of chunks, with the count kept in memory until the end of the

85 // transaction. The format of this file is like the main file, with	88 // transaction. The format of this file is like the main file, with

86 // the list of chunks seen omitted, as that data is tracked in-memory:	89 // the list of chunks seen omitted, as that data is tracked in-memory:

87 //	90 //

88 // array[] {	91 // array[] {

89 // uint32 add_prefix_count;	92 // uint32_t add_prefix_count;

90 // uint32 sub_prefix_count;	93 // uint32_t sub_prefix_count;

91 // uint32 add_hash_count;	94 // uint32_t add_hash_count;

92 // uint32 sub_hash_count;	95 // uint32_t sub_hash_count;

93 // array[add_prefix_count] {	96 // array[add_prefix_count] {

94 // int32 chunk_id;	97 // int32_t chunk_id;

95 // uint32 prefix;	98 // uint32_t prefix;

96 // }	99 // }

97 // array[sub_prefix_count] {	100 // array[sub_prefix_count] {

98 // int32 chunk_id;	101 // int32_t chunk_id;

99 // int32 add_chunk_id;	102 // int32_t add_chunk_id;

100 // uint32 add_prefix;	103 // uint32_t add_prefix;

101 // }	104 // }

102 // array[add_hash_count] {	105 // array[add_hash_count] {

103 // int32 chunk_id;	106 // int32_t chunk_id;

104 // int32 received_time; // From base::Time::ToTimeT().	107 // int32_t received_time; // From base::Time::ToTimeT().

105 // char[32] full_hash;	108 // char[32] full_hash;

106 // }	109 // }

107 // array[sub_hash_count] {	110 // array[sub_hash_count] {

108 // int32 chunk_id;	111 // int32_t chunk_id;

109 // int32 add_chunk_id;	112 // int32_t add_chunk_id;

110 // char[32] add_full_hash;	113 // char[32] add_full_hash;

111 // }	114 // }

112 // }	115 // }

113 //	116 //

114 // The overall transaction works like this:	117 // The overall transaction works like this:

115 // - Open the original file to get the chunks-seen data.	118 // - Open the original file to get the chunks-seen data.

116 // - Open a temp file for storing new chunk info.	119 // - Open a temp file for storing new chunk info.

117 // - Write new chunks to the temp file.	120 // - Write new chunks to the temp file.

118 // - When the transaction is finished:	121 // - When the transaction is finished:

119 // - Read the update data from the temp file into memory.	122 // - Read the update data from the temp file into memory.

(...skipping 17 matching lines...) Expand all Loading...
137 // Delete any on-disk files, including the permanent storage.	140 // Delete any on-disk files, including the permanent storage.

138 bool Delete() override;	141 bool Delete() override;

139	142

140 // Get all add hash prefixes and full-length hashes, respectively, from	143 // Get all add hash prefixes and full-length hashes, respectively, from

141 // the store.	144 // the store.

142 bool GetAddPrefixes(SBAddPrefixes* add_prefixes) override;	145 bool GetAddPrefixes(SBAddPrefixes* add_prefixes) override;

143 bool GetAddFullHashes(std::vector<SBAddFullHash>* add_full_hashes) override;	146 bool GetAddFullHashes(std::vector<SBAddFullHash>* add_full_hashes) override;

144	147

145 bool BeginChunk() override;	148 bool BeginChunk() override;

146	149

147 bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) override;	150 bool WriteAddPrefix(int32_t chunk_id, SBPrefix prefix) override;

148 bool WriteAddHash(int32 chunk_id, const SBFullHash& full_hash) override;	151 bool WriteAddHash(int32_t chunk_id, const SBFullHash& full_hash) override;

149 bool WriteSubPrefix(int32 chunk_id,	152 bool WriteSubPrefix(int32_t chunk_id,

150 int32 add_chunk_id,	153 int32_t add_chunk_id,

151 SBPrefix prefix) override;	154 SBPrefix prefix) override;

152 bool WriteSubHash(int32 chunk_id,	155 bool WriteSubHash(int32_t chunk_id,

153 int32 add_chunk_id,	156 int32_t add_chunk_id,

154 const SBFullHash& full_hash) override;	157 const SBFullHash& full_hash) override;

155 bool FinishChunk() override;	158 bool FinishChunk() override;

156	159

157 bool BeginUpdate() override;	160 bool BeginUpdate() override;

158 bool FinishUpdate(	161 bool FinishUpdate(

159 PrefixSetBuilder* builder,	162 PrefixSetBuilder* builder,

160 std::vector<SBAddFullHash>* add_full_hashes_result) override;	163 std::vector<SBAddFullHash>* add_full_hashes_result) override;

161 bool CancelUpdate() override;	164 bool CancelUpdate() override;

162	165

163 void SetAddChunk(int32 chunk_id) override;	166 void SetAddChunk(int32_t chunk_id) override;

164 bool CheckAddChunk(int32 chunk_id) override;	167 bool CheckAddChunk(int32_t chunk_id) override;

165 void GetAddChunks(std::vector<int32>* out) override;	168 void GetAddChunks(std::vector<int32_t>* out) override;

166 void SetSubChunk(int32 chunk_id) override;	169 void SetSubChunk(int32_t chunk_id) override;

167 bool CheckSubChunk(int32 chunk_id) override;	170 bool CheckSubChunk(int32_t chunk_id) override;

168 void GetSubChunks(std::vector<int32>* out) override;	171 void GetSubChunks(std::vector<int32_t>* out) override;

169	172

170 void DeleteAddChunk(int32 chunk_id) override;	173 void DeleteAddChunk(int32_t chunk_id) override;

171 void DeleteSubChunk(int32 chunk_id) override;	174 void DeleteSubChunk(int32_t chunk_id) override;

172	175

173 // Verify \|file_\|'s checksum, calling the corruption callback if it	176 // Verify \|file_\|'s checksum, calling the corruption callback if it

174 // does not check out. Empty input is considered valid.	177 // does not check out. Empty input is considered valid.

175 bool CheckValidity() override;	178 bool CheckValidity() override;

176	179

177 // Returns the name of the temporary file used to buffer data for	180 // Returns the name of the temporary file used to buffer data for

178 // \|filename\|. Exported for unit tests.	181 // \|filename\|. Exported for unit tests.

179 static const base::FilePath TemporaryFileForFilename(	182 static const base::FilePath TemporaryFileForFilename(

180 const base::FilePath& filename) {	183 const base::FilePath& filename) {

181 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));	184 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));

(...skipping 40 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
222 SBSubPrefixes().swap(sub_prefixes_);	225 SBSubPrefixes().swap(sub_prefixes_);

223 std::vector<SBAddFullHash>().swap(add_hashes_);	226 std::vector<SBAddFullHash>().swap(add_hashes_);

224 std::vector<SBSubFullHash>().swap(sub_hashes_);	227 std::vector<SBSubFullHash>().swap(sub_hashes_);

225 return true;	228 return true;

226 }	229 }

227	230

228 // Clear all buffers used during update.	231 // Clear all buffers used during update.

229 void ClearUpdateBuffers() {	232 void ClearUpdateBuffers() {

230 ClearChunkBuffers();	233 ClearChunkBuffers();

231 chunks_written_ = 0;	234 chunks_written_ = 0;

232 std::set<int32>().swap(add_chunks_cache_);	235 std::set<int32_t>().swap(add_chunks_cache_);

233 std::set<int32>().swap(sub_chunks_cache_);	236 std::set<int32_t>().swap(sub_chunks_cache_);

234 base::hash_set<int32>().swap(add_del_cache_);	237 base::hash_set<int32_t>().swap(add_del_cache_);

235 base::hash_set<int32>().swap(sub_del_cache_);	238 base::hash_set<int32_t>().swap(sub_del_cache_);

236 }	239 }

237	240

238 // The sequenced task runner for this object, used to verify that its state	241 // The sequenced task runner for this object, used to verify that its state

239 // is only ever accessed from the runner.	242 // is only ever accessed from the runner.

240 scoped_refptr<const base::SequencedTaskRunner> task_runner_;	243 scoped_refptr<const base::SequencedTaskRunner> task_runner_;

241	244

242 // Buffers for collecting data between BeginChunk() and	245 // Buffers for collecting data between BeginChunk() and

243 // FinishChunk().	246 // FinishChunk().

244 SBAddPrefixes add_prefixes_;	247 SBAddPrefixes add_prefixes_;

245 SBSubPrefixes sub_prefixes_;	248 SBSubPrefixes sub_prefixes_;

246 std::vector<SBAddFullHash> add_hashes_;	249 std::vector<SBAddFullHash> add_hashes_;

247 std::vector<SBSubFullHash> sub_hashes_;	250 std::vector<SBSubFullHash> sub_hashes_;

248	251

249 // Count of chunks collected in \|new_file_\|.	252 // Count of chunks collected in \|new_file_\|.

250 int chunks_written_;	253 int chunks_written_;

251	254

252 // Name of the main database file.	255 // Name of the main database file.

253 base::FilePath filename_;	256 base::FilePath filename_;

254	257

255 // Handles to the main and scratch files. \|empty_\| is true if the	258 // Handles to the main and scratch files. \|empty_\| is true if the

256 // main file didn't exist when the update was started.	259 // main file didn't exist when the update was started.

257 base::ScopedFILE file_;	260 base::ScopedFILE file_;

258 base::ScopedFILE new_file_;	261 base::ScopedFILE new_file_;

259 bool empty_;	262 bool empty_;

260	263

261 // Cache of chunks which have been seen. Loaded from the database	264 // Cache of chunks which have been seen. Loaded from the database

262 // on BeginUpdate() so that it can be queried during the	265 // on BeginUpdate() so that it can be queried during the

263 // transaction.	266 // transaction.

264 std::set<int32> add_chunks_cache_;	267 std::set<int32_t> add_chunks_cache_;

265 std::set<int32> sub_chunks_cache_;	268 std::set<int32_t> sub_chunks_cache_;

266	269

267 // Cache the set of deleted chunks during a transaction, applied on	270 // Cache the set of deleted chunks during a transaction, applied on

268 // FinishUpdate().	271 // FinishUpdate().

269 // TODO(shess): If the set is small enough, hash_set<> might be	272 // TODO(shess): If the set is small enough, hash_set<> might be

270 // slower than plain set<>.	273 // slower than plain set<>.

271 base::hash_set<int32> add_del_cache_;	274 base::hash_set<int32_t> add_del_cache_;

272 base::hash_set<int32> sub_del_cache_;	275 base::hash_set<int32_t> sub_del_cache_;

273	276

274 base::Closure corruption_callback_;	277 base::Closure corruption_callback_;

275	278

276 // Tracks whether corruption has already been seen in the current	279 // Tracks whether corruption has already been seen in the current

277 // update, so that only one instance is recorded in the stats.	280 // update, so that only one instance is recorded in the stats.

278 // TODO(shess): Remove with format-migration support.	281 // TODO(shess): Remove with format-migration support.

279 bool corruption_seen_;	282 bool corruption_seen_;

280	283

281 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreFile);	284 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreFile);

282 };	285 };

283	286

284 } // namespace safe_browsing	287 } // namespace safe_browsing

285	288

286 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_	289 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_

OLD	NEW

« no previous file with comments | « chrome/browser/safe_browsing/safe_browsing_store.cc ('k') | chrome/browser/safe_browsing/safe_browsing_store_file.cc » ('j') | no next file with comments »