OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ | 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ |
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ | 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ |
7 | 7 |
| 8 #include <stdint.h> |
| 9 |
8 #include <set> | 10 #include <set> |
9 #include <vector> | 11 #include <vector> |
10 | 12 |
11 #include "chrome/browser/safe_browsing/safe_browsing_store.h" | 13 #include "chrome/browser/safe_browsing/safe_browsing_store.h" |
12 | 14 |
13 #include "base/callback.h" | 15 #include "base/callback.h" |
14 #include "base/files/file_path.h" | 16 #include "base/files/file_path.h" |
15 #include "base/files/scoped_file.h" | 17 #include "base/files/scoped_file.h" |
| 18 #include "base/macros.h" |
16 #include "base/sequenced_task_runner.h" | 19 #include "base/sequenced_task_runner.h" |
17 | 20 |
18 namespace safe_browsing { | 21 namespace safe_browsing { |
19 | 22 |
20 // Implement SafeBrowsingStore in terms of a flat file. The file | 23 // Implement SafeBrowsingStore in terms of a flat file. The file |
21 // format is pretty literal: | 24 // format is pretty literal: |
22 // | 25 // |
23 // int32 magic; // magic number "validating" file | 26 // int32_t magic; // magic number "validating" file |
24 // int32 version; // format version | 27 // int32_t version; // format version |
25 // | 28 // |
26 // // Counts for the various data which follows the header. | 29 // // Counts for the various data which follows the header. |
27 // uint32 add_chunk_count; // Chunks seen, including empties. | 30 // uint32_t add_chunk_count; // Chunks seen, including empties. |
28 // uint32 sub_chunk_count; // Ditto. | 31 // uint32_t sub_chunk_count; // Ditto. |
29 // uint32 shard_stride; // SBPrefix space covered per shard. | 32 // uint32_t shard_stride; // SBPrefix space covered per shard. |
30 // // 0==entire space in one shard. | 33 // // 0==entire space in one shard. |
31 // // Sorted by chunk_id. | 34 // // Sorted by chunk_id. |
32 // array[add_chunk_count] { | 35 // array[add_chunk_count] { |
33 // int32 chunk_id; | 36 // int32_t chunk_id; |
34 // } | 37 // } |
35 // // Sorted by chunk_id. | 38 // // Sorted by chunk_id. |
36 // array[sub_chunk_count] { | 39 // array[sub_chunk_count] { |
37 // int32 chunk_id; | 40 // int32_t chunk_id; |
38 // } | 41 // } |
39 // MD5Digest header_checksum; // Checksum over preceeding data. | 42 // MD5Digest header_checksum; // Checksum over preceeding data. |
40 // | 43 // |
41 // // Sorted by prefix, then add chunk_id, then hash, both within shards and | 44 // // Sorted by prefix, then add chunk_id, then hash, both within shards and |
42 // // overall. | 45 // // overall. |
43 // array[from 0 to wraparound to 0 by shard_stride] { | 46 // array[from 0 to wraparound to 0 by shard_stride] { |
44 // uint32 add_prefix_count; | 47 // uint32_t add_prefix_count; |
45 // uint32 sub_prefix_count; | 48 // uint32_t sub_prefix_count; |
46 // uint32 add_hash_count; | 49 // uint32_t add_hash_count; |
47 // uint32 sub_hash_count; | 50 // uint32_t sub_hash_count; |
48 // array[add_prefix_count] { | 51 // array[add_prefix_count] { |
49 // int32 chunk_id; | 52 // int32_t chunk_id; |
50 // uint32 prefix; | 53 // uint32_t prefix; |
51 // } | 54 // } |
52 // array[sub_prefix_count] { | 55 // array[sub_prefix_count] { |
53 // int32 chunk_id; | 56 // int32_t chunk_id; |
54 // int32 add_chunk_id; | 57 // int32_t add_chunk_id; |
55 // uint32 add_prefix; | 58 // uint32_t add_prefix; |
56 // } | 59 // } |
57 // array[add_hash_count] { | 60 // array[add_hash_count] { |
58 // int32 chunk_id; | 61 // int32_t chunk_id; |
59 // int32 received_time; // From base::Time::ToTimeT(). | 62 // int32_t received_time; // From base::Time::ToTimeT(). |
60 // char[32] full_hash; | 63 // char[32] full_hash; |
61 // } | 64 // } |
62 // array[sub_hash_count] { | 65 // array[sub_hash_count] { |
63 // int32 chunk_id; | 66 // int32_t chunk_id; |
64 // int32 add_chunk_id; | 67 // int32_t add_chunk_id; |
65 // char[32] add_full_hash; | 68 // char[32] add_full_hash; |
66 // } | 69 // } |
67 // } | 70 // } |
68 // MD5Digest checksum; // Checksum over entire file. | 71 // MD5Digest checksum; // Checksum over entire file. |
69 // | 72 // |
70 // The checksums are used to allow writing the file without doing an expensive | 73 // The checksums are used to allow writing the file without doing an expensive |
71 // fsync(). Since the data can be re-fetched, failing the checksum is not | 74 // fsync(). Since the data can be re-fetched, failing the checksum is not |
72 // catastrophic. Histograms indicate that file corruption here is pretty | 75 // catastrophic. Histograms indicate that file corruption here is pretty |
73 // uncommon. | 76 // uncommon. |
74 // | 77 // |
75 // The |header_checksum| is present to guarantee valid header and chunk data for | 78 // The |header_checksum| is present to guarantee valid header and chunk data for |
76 // updates. Only that part of the file needs to be read to post the update. | 79 // updates. Only that part of the file needs to be read to post the update. |
77 // | 80 // |
78 // |shard_stride| breaks the file into approximately-equal portions, allowing | 81 // |shard_stride| breaks the file into approximately-equal portions, allowing |
79 // updates to stream from one file to another with modest memory usage. It is | 82 // updates to stream from one file to another with modest memory usage. It is |
80 // dynamic to adjust to different file sizes without adding excessive overhead. | 83 // dynamic to adjust to different file sizes without adding excessive overhead. |
81 // | 84 // |
82 // During the course of an update, uncommitted data is stored in a | 85 // During the course of an update, uncommitted data is stored in a |
83 // temporary file (which is later re-used to commit). This is an | 86 // temporary file (which is later re-used to commit). This is an |
84 // array of chunks, with the count kept in memory until the end of the | 87 // array of chunks, with the count kept in memory until the end of the |
85 // transaction. The format of this file is like the main file, with | 88 // transaction. The format of this file is like the main file, with |
86 // the list of chunks seen omitted, as that data is tracked in-memory: | 89 // the list of chunks seen omitted, as that data is tracked in-memory: |
87 // | 90 // |
88 // array[] { | 91 // array[] { |
89 // uint32 add_prefix_count; | 92 // uint32_t add_prefix_count; |
90 // uint32 sub_prefix_count; | 93 // uint32_t sub_prefix_count; |
91 // uint32 add_hash_count; | 94 // uint32_t add_hash_count; |
92 // uint32 sub_hash_count; | 95 // uint32_t sub_hash_count; |
93 // array[add_prefix_count] { | 96 // array[add_prefix_count] { |
94 // int32 chunk_id; | 97 // int32_t chunk_id; |
95 // uint32 prefix; | 98 // uint32_t prefix; |
96 // } | 99 // } |
97 // array[sub_prefix_count] { | 100 // array[sub_prefix_count] { |
98 // int32 chunk_id; | 101 // int32_t chunk_id; |
99 // int32 add_chunk_id; | 102 // int32_t add_chunk_id; |
100 // uint32 add_prefix; | 103 // uint32_t add_prefix; |
101 // } | 104 // } |
102 // array[add_hash_count] { | 105 // array[add_hash_count] { |
103 // int32 chunk_id; | 106 // int32_t chunk_id; |
104 // int32 received_time; // From base::Time::ToTimeT(). | 107 // int32_t received_time; // From base::Time::ToTimeT(). |
105 // char[32] full_hash; | 108 // char[32] full_hash; |
106 // } | 109 // } |
107 // array[sub_hash_count] { | 110 // array[sub_hash_count] { |
108 // int32 chunk_id; | 111 // int32_t chunk_id; |
109 // int32 add_chunk_id; | 112 // int32_t add_chunk_id; |
110 // char[32] add_full_hash; | 113 // char[32] add_full_hash; |
111 // } | 114 // } |
112 // } | 115 // } |
113 // | 116 // |
114 // The overall transaction works like this: | 117 // The overall transaction works like this: |
115 // - Open the original file to get the chunks-seen data. | 118 // - Open the original file to get the chunks-seen data. |
116 // - Open a temp file for storing new chunk info. | 119 // - Open a temp file for storing new chunk info. |
117 // - Write new chunks to the temp file. | 120 // - Write new chunks to the temp file. |
118 // - When the transaction is finished: | 121 // - When the transaction is finished: |
119 // - Read the update data from the temp file into memory. | 122 // - Read the update data from the temp file into memory. |
(...skipping 17 matching lines...) Expand all Loading... |
137 // Delete any on-disk files, including the permanent storage. | 140 // Delete any on-disk files, including the permanent storage. |
138 bool Delete() override; | 141 bool Delete() override; |
139 | 142 |
140 // Get all add hash prefixes and full-length hashes, respectively, from | 143 // Get all add hash prefixes and full-length hashes, respectively, from |
141 // the store. | 144 // the store. |
142 bool GetAddPrefixes(SBAddPrefixes* add_prefixes) override; | 145 bool GetAddPrefixes(SBAddPrefixes* add_prefixes) override; |
143 bool GetAddFullHashes(std::vector<SBAddFullHash>* add_full_hashes) override; | 146 bool GetAddFullHashes(std::vector<SBAddFullHash>* add_full_hashes) override; |
144 | 147 |
145 bool BeginChunk() override; | 148 bool BeginChunk() override; |
146 | 149 |
147 bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) override; | 150 bool WriteAddPrefix(int32_t chunk_id, SBPrefix prefix) override; |
148 bool WriteAddHash(int32 chunk_id, const SBFullHash& full_hash) override; | 151 bool WriteAddHash(int32_t chunk_id, const SBFullHash& full_hash) override; |
149 bool WriteSubPrefix(int32 chunk_id, | 152 bool WriteSubPrefix(int32_t chunk_id, |
150 int32 add_chunk_id, | 153 int32_t add_chunk_id, |
151 SBPrefix prefix) override; | 154 SBPrefix prefix) override; |
152 bool WriteSubHash(int32 chunk_id, | 155 bool WriteSubHash(int32_t chunk_id, |
153 int32 add_chunk_id, | 156 int32_t add_chunk_id, |
154 const SBFullHash& full_hash) override; | 157 const SBFullHash& full_hash) override; |
155 bool FinishChunk() override; | 158 bool FinishChunk() override; |
156 | 159 |
157 bool BeginUpdate() override; | 160 bool BeginUpdate() override; |
158 bool FinishUpdate( | 161 bool FinishUpdate( |
159 PrefixSetBuilder* builder, | 162 PrefixSetBuilder* builder, |
160 std::vector<SBAddFullHash>* add_full_hashes_result) override; | 163 std::vector<SBAddFullHash>* add_full_hashes_result) override; |
161 bool CancelUpdate() override; | 164 bool CancelUpdate() override; |
162 | 165 |
163 void SetAddChunk(int32 chunk_id) override; | 166 void SetAddChunk(int32_t chunk_id) override; |
164 bool CheckAddChunk(int32 chunk_id) override; | 167 bool CheckAddChunk(int32_t chunk_id) override; |
165 void GetAddChunks(std::vector<int32>* out) override; | 168 void GetAddChunks(std::vector<int32_t>* out) override; |
166 void SetSubChunk(int32 chunk_id) override; | 169 void SetSubChunk(int32_t chunk_id) override; |
167 bool CheckSubChunk(int32 chunk_id) override; | 170 bool CheckSubChunk(int32_t chunk_id) override; |
168 void GetSubChunks(std::vector<int32>* out) override; | 171 void GetSubChunks(std::vector<int32_t>* out) override; |
169 | 172 |
170 void DeleteAddChunk(int32 chunk_id) override; | 173 void DeleteAddChunk(int32_t chunk_id) override; |
171 void DeleteSubChunk(int32 chunk_id) override; | 174 void DeleteSubChunk(int32_t chunk_id) override; |
172 | 175 |
173 // Verify |file_|'s checksum, calling the corruption callback if it | 176 // Verify |file_|'s checksum, calling the corruption callback if it |
174 // does not check out. Empty input is considered valid. | 177 // does not check out. Empty input is considered valid. |
175 bool CheckValidity() override; | 178 bool CheckValidity() override; |
176 | 179 |
177 // Returns the name of the temporary file used to buffer data for | 180 // Returns the name of the temporary file used to buffer data for |
178 // |filename|. Exported for unit tests. | 181 // |filename|. Exported for unit tests. |
179 static const base::FilePath TemporaryFileForFilename( | 182 static const base::FilePath TemporaryFileForFilename( |
180 const base::FilePath& filename) { | 183 const base::FilePath& filename) { |
181 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new")); | 184 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new")); |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
222 SBSubPrefixes().swap(sub_prefixes_); | 225 SBSubPrefixes().swap(sub_prefixes_); |
223 std::vector<SBAddFullHash>().swap(add_hashes_); | 226 std::vector<SBAddFullHash>().swap(add_hashes_); |
224 std::vector<SBSubFullHash>().swap(sub_hashes_); | 227 std::vector<SBSubFullHash>().swap(sub_hashes_); |
225 return true; | 228 return true; |
226 } | 229 } |
227 | 230 |
228 // Clear all buffers used during update. | 231 // Clear all buffers used during update. |
229 void ClearUpdateBuffers() { | 232 void ClearUpdateBuffers() { |
230 ClearChunkBuffers(); | 233 ClearChunkBuffers(); |
231 chunks_written_ = 0; | 234 chunks_written_ = 0; |
232 std::set<int32>().swap(add_chunks_cache_); | 235 std::set<int32_t>().swap(add_chunks_cache_); |
233 std::set<int32>().swap(sub_chunks_cache_); | 236 std::set<int32_t>().swap(sub_chunks_cache_); |
234 base::hash_set<int32>().swap(add_del_cache_); | 237 base::hash_set<int32_t>().swap(add_del_cache_); |
235 base::hash_set<int32>().swap(sub_del_cache_); | 238 base::hash_set<int32_t>().swap(sub_del_cache_); |
236 } | 239 } |
237 | 240 |
238 // The sequenced task runner for this object, used to verify that its state | 241 // The sequenced task runner for this object, used to verify that its state |
239 // is only ever accessed from the runner. | 242 // is only ever accessed from the runner. |
240 scoped_refptr<const base::SequencedTaskRunner> task_runner_; | 243 scoped_refptr<const base::SequencedTaskRunner> task_runner_; |
241 | 244 |
242 // Buffers for collecting data between BeginChunk() and | 245 // Buffers for collecting data between BeginChunk() and |
243 // FinishChunk(). | 246 // FinishChunk(). |
244 SBAddPrefixes add_prefixes_; | 247 SBAddPrefixes add_prefixes_; |
245 SBSubPrefixes sub_prefixes_; | 248 SBSubPrefixes sub_prefixes_; |
246 std::vector<SBAddFullHash> add_hashes_; | 249 std::vector<SBAddFullHash> add_hashes_; |
247 std::vector<SBSubFullHash> sub_hashes_; | 250 std::vector<SBSubFullHash> sub_hashes_; |
248 | 251 |
249 // Count of chunks collected in |new_file_|. | 252 // Count of chunks collected in |new_file_|. |
250 int chunks_written_; | 253 int chunks_written_; |
251 | 254 |
252 // Name of the main database file. | 255 // Name of the main database file. |
253 base::FilePath filename_; | 256 base::FilePath filename_; |
254 | 257 |
255 // Handles to the main and scratch files. |empty_| is true if the | 258 // Handles to the main and scratch files. |empty_| is true if the |
256 // main file didn't exist when the update was started. | 259 // main file didn't exist when the update was started. |
257 base::ScopedFILE file_; | 260 base::ScopedFILE file_; |
258 base::ScopedFILE new_file_; | 261 base::ScopedFILE new_file_; |
259 bool empty_; | 262 bool empty_; |
260 | 263 |
261 // Cache of chunks which have been seen. Loaded from the database | 264 // Cache of chunks which have been seen. Loaded from the database |
262 // on BeginUpdate() so that it can be queried during the | 265 // on BeginUpdate() so that it can be queried during the |
263 // transaction. | 266 // transaction. |
264 std::set<int32> add_chunks_cache_; | 267 std::set<int32_t> add_chunks_cache_; |
265 std::set<int32> sub_chunks_cache_; | 268 std::set<int32_t> sub_chunks_cache_; |
266 | 269 |
267 // Cache the set of deleted chunks during a transaction, applied on | 270 // Cache the set of deleted chunks during a transaction, applied on |
268 // FinishUpdate(). | 271 // FinishUpdate(). |
269 // TODO(shess): If the set is small enough, hash_set<> might be | 272 // TODO(shess): If the set is small enough, hash_set<> might be |
270 // slower than plain set<>. | 273 // slower than plain set<>. |
271 base::hash_set<int32> add_del_cache_; | 274 base::hash_set<int32_t> add_del_cache_; |
272 base::hash_set<int32> sub_del_cache_; | 275 base::hash_set<int32_t> sub_del_cache_; |
273 | 276 |
274 base::Closure corruption_callback_; | 277 base::Closure corruption_callback_; |
275 | 278 |
276 // Tracks whether corruption has already been seen in the current | 279 // Tracks whether corruption has already been seen in the current |
277 // update, so that only one instance is recorded in the stats. | 280 // update, so that only one instance is recorded in the stats. |
278 // TODO(shess): Remove with format-migration support. | 281 // TODO(shess): Remove with format-migration support. |
279 bool corruption_seen_; | 282 bool corruption_seen_; |
280 | 283 |
281 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreFile); | 284 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreFile); |
282 }; | 285 }; |
283 | 286 |
284 } // namespace safe_browsing | 287 } // namespace safe_browsing |
285 | 288 |
286 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ | 289 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ |
OLD | NEW |