| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ | 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ |
| 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ | 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ |
| 7 | 7 |
| 8 #include <stdint.h> |
| 9 |
| 8 #include <set> | 10 #include <set> |
| 9 #include <vector> | 11 #include <vector> |
| 10 | 12 |
| 11 #include "chrome/browser/safe_browsing/safe_browsing_store.h" | 13 #include "chrome/browser/safe_browsing/safe_browsing_store.h" |
| 12 | 14 |
| 13 #include "base/callback.h" | 15 #include "base/callback.h" |
| 14 #include "base/files/file_path.h" | 16 #include "base/files/file_path.h" |
| 15 #include "base/files/scoped_file.h" | 17 #include "base/files/scoped_file.h" |
| 18 #include "base/macros.h" |
| 16 #include "base/sequenced_task_runner.h" | 19 #include "base/sequenced_task_runner.h" |
| 17 | 20 |
| 18 namespace safe_browsing { | 21 namespace safe_browsing { |
| 19 | 22 |
| 20 // Implement SafeBrowsingStore in terms of a flat file. The file | 23 // Implement SafeBrowsingStore in terms of a flat file. The file |
| 21 // format is pretty literal: | 24 // format is pretty literal: |
| 22 // | 25 // |
| 23 // int32 magic; // magic number "validating" file | 26 // int32_t magic; // magic number "validating" file |
| 24 // int32 version; // format version | 27 // int32_t version; // format version |
| 25 // | 28 // |
| 26 // // Counts for the various data which follows the header. | 29 // // Counts for the various data which follows the header. |
| 27 // uint32 add_chunk_count; // Chunks seen, including empties. | 30 // uint32_t add_chunk_count; // Chunks seen, including empties. |
| 28 // uint32 sub_chunk_count; // Ditto. | 31 // uint32_t sub_chunk_count; // Ditto. |
| 29 // uint32 shard_stride; // SBPrefix space covered per shard. | 32 // uint32_t shard_stride; // SBPrefix space covered per shard. |
| 30 // // 0==entire space in one shard. | 33 // // 0==entire space in one shard. |
| 31 // // Sorted by chunk_id. | 34 // // Sorted by chunk_id. |
| 32 // array[add_chunk_count] { | 35 // array[add_chunk_count] { |
| 33 // int32 chunk_id; | 36 // int32_t chunk_id; |
| 34 // } | 37 // } |
| 35 // // Sorted by chunk_id. | 38 // // Sorted by chunk_id. |
| 36 // array[sub_chunk_count] { | 39 // array[sub_chunk_count] { |
| 37 // int32 chunk_id; | 40 // int32_t chunk_id; |
| 38 // } | 41 // } |
| 39 // MD5Digest header_checksum; // Checksum over preceeding data. | 42 // MD5Digest header_checksum; // Checksum over preceeding data. |
| 40 // | 43 // |
| 41 // // Sorted by prefix, then add chunk_id, then hash, both within shards and | 44 // // Sorted by prefix, then add chunk_id, then hash, both within shards and |
| 42 // // overall. | 45 // // overall. |
| 43 // array[from 0 to wraparound to 0 by shard_stride] { | 46 // array[from 0 to wraparound to 0 by shard_stride] { |
| 44 // uint32 add_prefix_count; | 47 // uint32_t add_prefix_count; |
| 45 // uint32 sub_prefix_count; | 48 // uint32_t sub_prefix_count; |
| 46 // uint32 add_hash_count; | 49 // uint32_t add_hash_count; |
| 47 // uint32 sub_hash_count; | 50 // uint32_t sub_hash_count; |
| 48 // array[add_prefix_count] { | 51 // array[add_prefix_count] { |
| 49 // int32 chunk_id; | 52 // int32_t chunk_id; |
| 50 // uint32 prefix; | 53 // uint32_t prefix; |
| 51 // } | 54 // } |
| 52 // array[sub_prefix_count] { | 55 // array[sub_prefix_count] { |
| 53 // int32 chunk_id; | 56 // int32_t chunk_id; |
| 54 // int32 add_chunk_id; | 57 // int32_t add_chunk_id; |
| 55 // uint32 add_prefix; | 58 // uint32_t add_prefix; |
| 56 // } | 59 // } |
| 57 // array[add_hash_count] { | 60 // array[add_hash_count] { |
| 58 // int32 chunk_id; | 61 // int32_t chunk_id; |
| 59 // int32 received_time; // From base::Time::ToTimeT(). | 62 // int32_t received_time; // From base::Time::ToTimeT(). |
| 60 // char[32] full_hash; | 63 // char[32] full_hash; |
| 61 // } | 64 // } |
| 62 // array[sub_hash_count] { | 65 // array[sub_hash_count] { |
| 63 // int32 chunk_id; | 66 // int32_t chunk_id; |
| 64 // int32 add_chunk_id; | 67 // int32_t add_chunk_id; |
| 65 // char[32] add_full_hash; | 68 // char[32] add_full_hash; |
| 66 // } | 69 // } |
| 67 // } | 70 // } |
| 68 // MD5Digest checksum; // Checksum over entire file. | 71 // MD5Digest checksum; // Checksum over entire file. |
| 69 // | 72 // |
| 70 // The checksums are used to allow writing the file without doing an expensive | 73 // The checksums are used to allow writing the file without doing an expensive |
| 71 // fsync(). Since the data can be re-fetched, failing the checksum is not | 74 // fsync(). Since the data can be re-fetched, failing the checksum is not |
| 72 // catastrophic. Histograms indicate that file corruption here is pretty | 75 // catastrophic. Histograms indicate that file corruption here is pretty |
| 73 // uncommon. | 76 // uncommon. |
| 74 // | 77 // |
| 75 // The |header_checksum| is present to guarantee valid header and chunk data for | 78 // The |header_checksum| is present to guarantee valid header and chunk data for |
| 76 // updates. Only that part of the file needs to be read to post the update. | 79 // updates. Only that part of the file needs to be read to post the update. |
| 77 // | 80 // |
| 78 // |shard_stride| breaks the file into approximately-equal portions, allowing | 81 // |shard_stride| breaks the file into approximately-equal portions, allowing |
| 79 // updates to stream from one file to another with modest memory usage. It is | 82 // updates to stream from one file to another with modest memory usage. It is |
| 80 // dynamic to adjust to different file sizes without adding excessive overhead. | 83 // dynamic to adjust to different file sizes without adding excessive overhead. |
| 81 // | 84 // |
| 82 // During the course of an update, uncommitted data is stored in a | 85 // During the course of an update, uncommitted data is stored in a |
| 83 // temporary file (which is later re-used to commit). This is an | 86 // temporary file (which is later re-used to commit). This is an |
| 84 // array of chunks, with the count kept in memory until the end of the | 87 // array of chunks, with the count kept in memory until the end of the |
| 85 // transaction. The format of this file is like the main file, with | 88 // transaction. The format of this file is like the main file, with |
| 86 // the list of chunks seen omitted, as that data is tracked in-memory: | 89 // the list of chunks seen omitted, as that data is tracked in-memory: |
| 87 // | 90 // |
| 88 // array[] { | 91 // array[] { |
| 89 // uint32 add_prefix_count; | 92 // uint32_t add_prefix_count; |
| 90 // uint32 sub_prefix_count; | 93 // uint32_t sub_prefix_count; |
| 91 // uint32 add_hash_count; | 94 // uint32_t add_hash_count; |
| 92 // uint32 sub_hash_count; | 95 // uint32_t sub_hash_count; |
| 93 // array[add_prefix_count] { | 96 // array[add_prefix_count] { |
| 94 // int32 chunk_id; | 97 // int32_t chunk_id; |
| 95 // uint32 prefix; | 98 // uint32_t prefix; |
| 96 // } | 99 // } |
| 97 // array[sub_prefix_count] { | 100 // array[sub_prefix_count] { |
| 98 // int32 chunk_id; | 101 // int32_t chunk_id; |
| 99 // int32 add_chunk_id; | 102 // int32_t add_chunk_id; |
| 100 // uint32 add_prefix; | 103 // uint32_t add_prefix; |
| 101 // } | 104 // } |
| 102 // array[add_hash_count] { | 105 // array[add_hash_count] { |
| 103 // int32 chunk_id; | 106 // int32_t chunk_id; |
| 104 // int32 received_time; // From base::Time::ToTimeT(). | 107 // int32_t received_time; // From base::Time::ToTimeT(). |
| 105 // char[32] full_hash; | 108 // char[32] full_hash; |
| 106 // } | 109 // } |
| 107 // array[sub_hash_count] { | 110 // array[sub_hash_count] { |
| 108 // int32 chunk_id; | 111 // int32_t chunk_id; |
| 109 // int32 add_chunk_id; | 112 // int32_t add_chunk_id; |
| 110 // char[32] add_full_hash; | 113 // char[32] add_full_hash; |
| 111 // } | 114 // } |
| 112 // } | 115 // } |
| 113 // | 116 // |
| 114 // The overall transaction works like this: | 117 // The overall transaction works like this: |
| 115 // - Open the original file to get the chunks-seen data. | 118 // - Open the original file to get the chunks-seen data. |
| 116 // - Open a temp file for storing new chunk info. | 119 // - Open a temp file for storing new chunk info. |
| 117 // - Write new chunks to the temp file. | 120 // - Write new chunks to the temp file. |
| 118 // - When the transaction is finished: | 121 // - When the transaction is finished: |
| 119 // - Read the update data from the temp file into memory. | 122 // - Read the update data from the temp file into memory. |
| (...skipping 17 matching lines...) Expand all Loading... |
| 137 // Delete any on-disk files, including the permanent storage. | 140 // Delete any on-disk files, including the permanent storage. |
| 138 bool Delete() override; | 141 bool Delete() override; |
| 139 | 142 |
| 140 // Get all add hash prefixes and full-length hashes, respectively, from | 143 // Get all add hash prefixes and full-length hashes, respectively, from |
| 141 // the store. | 144 // the store. |
| 142 bool GetAddPrefixes(SBAddPrefixes* add_prefixes) override; | 145 bool GetAddPrefixes(SBAddPrefixes* add_prefixes) override; |
| 143 bool GetAddFullHashes(std::vector<SBAddFullHash>* add_full_hashes) override; | 146 bool GetAddFullHashes(std::vector<SBAddFullHash>* add_full_hashes) override; |
| 144 | 147 |
| 145 bool BeginChunk() override; | 148 bool BeginChunk() override; |
| 146 | 149 |
| 147 bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) override; | 150 bool WriteAddPrefix(int32_t chunk_id, SBPrefix prefix) override; |
| 148 bool WriteAddHash(int32 chunk_id, const SBFullHash& full_hash) override; | 151 bool WriteAddHash(int32_t chunk_id, const SBFullHash& full_hash) override; |
| 149 bool WriteSubPrefix(int32 chunk_id, | 152 bool WriteSubPrefix(int32_t chunk_id, |
| 150 int32 add_chunk_id, | 153 int32_t add_chunk_id, |
| 151 SBPrefix prefix) override; | 154 SBPrefix prefix) override; |
| 152 bool WriteSubHash(int32 chunk_id, | 155 bool WriteSubHash(int32_t chunk_id, |
| 153 int32 add_chunk_id, | 156 int32_t add_chunk_id, |
| 154 const SBFullHash& full_hash) override; | 157 const SBFullHash& full_hash) override; |
| 155 bool FinishChunk() override; | 158 bool FinishChunk() override; |
| 156 | 159 |
| 157 bool BeginUpdate() override; | 160 bool BeginUpdate() override; |
| 158 bool FinishUpdate( | 161 bool FinishUpdate( |
| 159 PrefixSetBuilder* builder, | 162 PrefixSetBuilder* builder, |
| 160 std::vector<SBAddFullHash>* add_full_hashes_result) override; | 163 std::vector<SBAddFullHash>* add_full_hashes_result) override; |
| 161 bool CancelUpdate() override; | 164 bool CancelUpdate() override; |
| 162 | 165 |
| 163 void SetAddChunk(int32 chunk_id) override; | 166 void SetAddChunk(int32_t chunk_id) override; |
| 164 bool CheckAddChunk(int32 chunk_id) override; | 167 bool CheckAddChunk(int32_t chunk_id) override; |
| 165 void GetAddChunks(std::vector<int32>* out) override; | 168 void GetAddChunks(std::vector<int32_t>* out) override; |
| 166 void SetSubChunk(int32 chunk_id) override; | 169 void SetSubChunk(int32_t chunk_id) override; |
| 167 bool CheckSubChunk(int32 chunk_id) override; | 170 bool CheckSubChunk(int32_t chunk_id) override; |
| 168 void GetSubChunks(std::vector<int32>* out) override; | 171 void GetSubChunks(std::vector<int32_t>* out) override; |
| 169 | 172 |
| 170 void DeleteAddChunk(int32 chunk_id) override; | 173 void DeleteAddChunk(int32_t chunk_id) override; |
| 171 void DeleteSubChunk(int32 chunk_id) override; | 174 void DeleteSubChunk(int32_t chunk_id) override; |
| 172 | 175 |
| 173 // Verify |file_|'s checksum, calling the corruption callback if it | 176 // Verify |file_|'s checksum, calling the corruption callback if it |
| 174 // does not check out. Empty input is considered valid. | 177 // does not check out. Empty input is considered valid. |
| 175 bool CheckValidity() override; | 178 bool CheckValidity() override; |
| 176 | 179 |
| 177 // Returns the name of the temporary file used to buffer data for | 180 // Returns the name of the temporary file used to buffer data for |
| 178 // |filename|. Exported for unit tests. | 181 // |filename|. Exported for unit tests. |
| 179 static const base::FilePath TemporaryFileForFilename( | 182 static const base::FilePath TemporaryFileForFilename( |
| 180 const base::FilePath& filename) { | 183 const base::FilePath& filename) { |
| 181 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new")); | 184 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new")); |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 222 SBSubPrefixes().swap(sub_prefixes_); | 225 SBSubPrefixes().swap(sub_prefixes_); |
| 223 std::vector<SBAddFullHash>().swap(add_hashes_); | 226 std::vector<SBAddFullHash>().swap(add_hashes_); |
| 224 std::vector<SBSubFullHash>().swap(sub_hashes_); | 227 std::vector<SBSubFullHash>().swap(sub_hashes_); |
| 225 return true; | 228 return true; |
| 226 } | 229 } |
| 227 | 230 |
| 228 // Clear all buffers used during update. | 231 // Clear all buffers used during update. |
| 229 void ClearUpdateBuffers() { | 232 void ClearUpdateBuffers() { |
| 230 ClearChunkBuffers(); | 233 ClearChunkBuffers(); |
| 231 chunks_written_ = 0; | 234 chunks_written_ = 0; |
| 232 std::set<int32>().swap(add_chunks_cache_); | 235 std::set<int32_t>().swap(add_chunks_cache_); |
| 233 std::set<int32>().swap(sub_chunks_cache_); | 236 std::set<int32_t>().swap(sub_chunks_cache_); |
| 234 base::hash_set<int32>().swap(add_del_cache_); | 237 base::hash_set<int32_t>().swap(add_del_cache_); |
| 235 base::hash_set<int32>().swap(sub_del_cache_); | 238 base::hash_set<int32_t>().swap(sub_del_cache_); |
| 236 } | 239 } |
| 237 | 240 |
| 238 // The sequenced task runner for this object, used to verify that its state | 241 // The sequenced task runner for this object, used to verify that its state |
| 239 // is only ever accessed from the runner. | 242 // is only ever accessed from the runner. |
| 240 scoped_refptr<const base::SequencedTaskRunner> task_runner_; | 243 scoped_refptr<const base::SequencedTaskRunner> task_runner_; |
| 241 | 244 |
| 242 // Buffers for collecting data between BeginChunk() and | 245 // Buffers for collecting data between BeginChunk() and |
| 243 // FinishChunk(). | 246 // FinishChunk(). |
| 244 SBAddPrefixes add_prefixes_; | 247 SBAddPrefixes add_prefixes_; |
| 245 SBSubPrefixes sub_prefixes_; | 248 SBSubPrefixes sub_prefixes_; |
| 246 std::vector<SBAddFullHash> add_hashes_; | 249 std::vector<SBAddFullHash> add_hashes_; |
| 247 std::vector<SBSubFullHash> sub_hashes_; | 250 std::vector<SBSubFullHash> sub_hashes_; |
| 248 | 251 |
| 249 // Count of chunks collected in |new_file_|. | 252 // Count of chunks collected in |new_file_|. |
| 250 int chunks_written_; | 253 int chunks_written_; |
| 251 | 254 |
| 252 // Name of the main database file. | 255 // Name of the main database file. |
| 253 base::FilePath filename_; | 256 base::FilePath filename_; |
| 254 | 257 |
| 255 // Handles to the main and scratch files. |empty_| is true if the | 258 // Handles to the main and scratch files. |empty_| is true if the |
| 256 // main file didn't exist when the update was started. | 259 // main file didn't exist when the update was started. |
| 257 base::ScopedFILE file_; | 260 base::ScopedFILE file_; |
| 258 base::ScopedFILE new_file_; | 261 base::ScopedFILE new_file_; |
| 259 bool empty_; | 262 bool empty_; |
| 260 | 263 |
| 261 // Cache of chunks which have been seen. Loaded from the database | 264 // Cache of chunks which have been seen. Loaded from the database |
| 262 // on BeginUpdate() so that it can be queried during the | 265 // on BeginUpdate() so that it can be queried during the |
| 263 // transaction. | 266 // transaction. |
| 264 std::set<int32> add_chunks_cache_; | 267 std::set<int32_t> add_chunks_cache_; |
| 265 std::set<int32> sub_chunks_cache_; | 268 std::set<int32_t> sub_chunks_cache_; |
| 266 | 269 |
| 267 // Cache the set of deleted chunks during a transaction, applied on | 270 // Cache the set of deleted chunks during a transaction, applied on |
| 268 // FinishUpdate(). | 271 // FinishUpdate(). |
| 269 // TODO(shess): If the set is small enough, hash_set<> might be | 272 // TODO(shess): If the set is small enough, hash_set<> might be |
| 270 // slower than plain set<>. | 273 // slower than plain set<>. |
| 271 base::hash_set<int32> add_del_cache_; | 274 base::hash_set<int32_t> add_del_cache_; |
| 272 base::hash_set<int32> sub_del_cache_; | 275 base::hash_set<int32_t> sub_del_cache_; |
| 273 | 276 |
| 274 base::Closure corruption_callback_; | 277 base::Closure corruption_callback_; |
| 275 | 278 |
| 276 // Tracks whether corruption has already been seen in the current | 279 // Tracks whether corruption has already been seen in the current |
| 277 // update, so that only one instance is recorded in the stats. | 280 // update, so that only one instance is recorded in the stats. |
| 278 // TODO(shess): Remove with format-migration support. | 281 // TODO(shess): Remove with format-migration support. |
| 279 bool corruption_seen_; | 282 bool corruption_seen_; |
| 280 | 283 |
| 281 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreFile); | 284 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStoreFile); |
| 282 }; | 285 }; |
| 283 | 286 |
| 284 } // namespace safe_browsing | 287 } // namespace safe_browsing |
| 285 | 288 |
| 286 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ | 289 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_FILE_H_ |
| OLD | NEW |