Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/safe_browsing/prefix_set.h" | 5 #include "chrome/browser/safe_browsing/prefix_set.h" |
| 6 | 6 |
| 7 #include <algorithm> | 7 #include <algorithm> |
| 8 #include <math.h> | 8 #include <math.h> |
| 9 | 9 |
| 10 #include "base/file_util.h" | 10 #include "base/file_util.h" |
| 11 #include "base/files/scoped_file.h" | 11 #include "base/files/scoped_file.h" |
| 12 #include "base/logging.h" | 12 #include "base/logging.h" |
| 13 #include "base/md5.h" | 13 #include "base/md5.h" |
| 14 #include "base/metrics/histogram.h" | 14 #include "base/metrics/histogram.h" |
| 15 #include "base/metrics/sparse_histogram.h" | 15 #include "base/metrics/sparse_histogram.h" |
| 16 | 16 |
| 17 namespace { | 17 namespace { |
| 18 | 18 |
| 19 // |kMagic| should be reasonably unique, and not match itself across | 19 // |kMagic| should be reasonably unique, and not match itself across |
| 20 // endianness changes. I generated this value with: | 20 // endianness changes. I generated this value with: |
| 21 // md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 | 21 // md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 |
| 22 static uint32 kMagic = 0x864088dd; | 22 static uint32 kMagic = 0x864088dd; |
| 23 | 23 |
| 24 // Version history: | 24 // Version history: |
| 25 // Version 1: b6cb7cfe/r74487 by shess@chromium.org on 2011-02-10 | 25 // Version 1: b6cb7cfe/r74487 by shess@chromium.org on 2011-02-10 |
| 26 // Version 2: 2b59b0a6/r253924 by shess@chromium.org on 2014-02-27 | 26 // Version 2: 2b59b0a6/r253924 by shess@chromium.org on 2014-02-27 |
| 27 // Version 3: ????????/r?????? by shess@chromium.org on 2014-04-?? | |
| 27 | 28 |
| 28 // Version 2 layout is identical to version 1. The sort order of |index_| | 29 // Version 2 layout is identical to version 1. The sort order of |index_| |
| 29 // changed from |int32| to |uint32| to match the change of |SBPrefix|. | 30 // changed from |int32| to |uint32| to match the change of |SBPrefix|. |
| 30 static uint32 kVersion = 0x2; | 31 // Version 3 adds storage for full hashes. |
| 32 static uint32 kVersion = 0x3; | |
| 31 | 33 |
| 32 typedef struct { | 34 typedef struct { |
| 33 uint32 magic; | 35 uint32 magic; |
| 34 uint32 version; | 36 uint32 version; |
| 35 uint32 index_size; | 37 uint32 index_size; |
| 36 uint32 deltas_size; | 38 uint32 deltas_size; |
| 39 } FileHeader_v2; | |
| 40 | |
| 41 typedef struct { | |
| 42 uint32 magic; | |
| 43 uint32 version; | |
| 44 uint32 index_size; | |
| 45 uint32 deltas_size; | |
| 46 uint32 full_hashes_size; | |
| 37 } FileHeader; | 47 } FileHeader; |
| 38 | 48 |
| 39 // Common std::vector<> implementations add capacity by multiplying from the | 49 // Common std::vector<> implementations add capacity by multiplying from the |
| 40 // current size (usually either by 2 or 1.5) to satisfy push_back() running in | 50 // current size (usually either by 2 or 1.5) to satisfy push_back() running in |
| 41 // amortized constant time. This is not necessary for insert() at end(), but | 51 // amortized constant time. This is not necessary for insert() at end(), but |
| 42 // AFAICT it seems true for some implementations. SBPrefix values should | 52 // AFAICT it seems true for some implementations. SBPrefix values should |
| 43 // uniformly cover the 32-bit space, so the final size can be estimated given a | 53 // uniformly cover the 32-bit space, so the final size can be estimated given a |
| 44 // subset of the input. | 54 // subset of the input. |
| 45 // | 55 // |
| 46 // |kEstimateThreshold| is when estimates start converging. Results are strong | 56 // |kEstimateThreshold| is when estimates start converging. Results are strong |
| (...skipping 30 matching lines...) Expand all Loading... | |
| 77 | 87 |
| 78 // For |std::upper_bound()| to find a prefix w/in a vector of pairs. | 88 // For |std::upper_bound()| to find a prefix w/in a vector of pairs. |
| 79 // static | 89 // static |
| 80 bool PrefixSet::PrefixLess(const IndexPair& a, const IndexPair& b) { | 90 bool PrefixSet::PrefixLess(const IndexPair& a, const IndexPair& b) { |
| 81 return a.first < b.first; | 91 return a.first < b.first; |
| 82 } | 92 } |
| 83 | 93 |
| 84 PrefixSet::PrefixSet() { | 94 PrefixSet::PrefixSet() { |
| 85 } | 95 } |
| 86 | 96 |
| 87 PrefixSet::PrefixSet(IndexVector* index, std::vector<uint16>* deltas) { | 97 PrefixSet::PrefixSet(IndexVector* index, |
| 88 DCHECK(index && deltas); | 98 std::vector<uint16>* deltas, |
| 99 std::vector<SBFullHash>* full_hashes) { | |
| 100 DCHECK(index && deltas && full_hashes); | |
| 89 index_.swap(*index); | 101 index_.swap(*index); |
| 90 deltas_.swap(*deltas); | 102 deltas_.swap(*deltas); |
| 103 full_hashes_.swap(*full_hashes); | |
| 91 } | 104 } |
| 92 | 105 |
| 93 PrefixSet::~PrefixSet() {} | 106 PrefixSet::~PrefixSet() {} |
| 94 | 107 |
| 95 bool PrefixSet::PrefixExists(SBPrefix prefix) const { | 108 bool PrefixSet::PrefixExists(SBPrefix prefix) const { |
| 96 if (index_.empty()) | 109 if (index_.empty()) |
| 97 return false; | 110 return false; |
| 98 | 111 |
| 99 // Find the first position after |prefix| in |index_|. | 112 // Find the first position after |prefix| in |index_|. |
| 100 IndexVector::const_iterator iter = | 113 IndexVector::const_iterator iter = |
| (...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 149 } | 162 } |
| 150 } | 163 } |
| 151 } | 164 } |
| 152 | 165 |
| 153 // static | 166 // static |
| 154 scoped_ptr<PrefixSet> PrefixSet::LoadFile(const base::FilePath& filter_name) { | 167 scoped_ptr<PrefixSet> PrefixSet::LoadFile(const base::FilePath& filter_name) { |
| 155 int64 size_64; | 168 int64 size_64; |
| 156 if (!base::GetFileSize(filter_name, &size_64)) | 169 if (!base::GetFileSize(filter_name, &size_64)) |
| 157 return scoped_ptr<PrefixSet>(); | 170 return scoped_ptr<PrefixSet>(); |
| 158 using base::MD5Digest; | 171 using base::MD5Digest; |
| 159 if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest))) | 172 // TODO(shess): Revert to sizeof(FileHeader) for sanity check once v2 is |
| 173 // deprecated. | |
| 174 if (size_64 < static_cast<int64>(sizeof(FileHeader_v2) + sizeof(MD5Digest))) | |
| 160 return scoped_ptr<PrefixSet>(); | 175 return scoped_ptr<PrefixSet>(); |
| 161 | 176 |
| 162 base::ScopedFILE file(base::OpenFile(filter_name, "rb")); | 177 base::ScopedFILE file(base::OpenFile(filter_name, "rb")); |
| 163 if (!file.get()) | 178 if (!file.get()) |
| 164 return scoped_ptr<PrefixSet>(); | 179 return scoped_ptr<PrefixSet>(); |
| 165 | 180 |
| 166 FileHeader header; | 181 FileHeader header; |
| 167 size_t read = fread(&header, sizeof(header), 1, file.get()); | 182 size_t read = fread(&header, sizeof(header), 1, file.get()); |
| 168 if (read != 1) | 183 if (read != 1) |
| 169 return scoped_ptr<PrefixSet>(); | 184 return scoped_ptr<PrefixSet>(); |
| 170 | 185 |
| 186 // The file looks valid, start building the digest. | |
| 187 base::MD5Context context; | |
| 188 base::MD5Init(&context); | |
| 189 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header), | |
| 190 sizeof(header))); | |
| 191 | |
| 171 if (header.magic != kMagic) | 192 if (header.magic != kMagic) |
| 172 return scoped_ptr<PrefixSet>(); | 193 return scoped_ptr<PrefixSet>(); |
| 173 | 194 |
| 174 // Track version read to inform removal of support for older versions. | 195 // Track version read to inform removal of support for older versions. |
| 175 UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.PrefixSetVersionRead", header.version); | 196 UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.PrefixSetVersionRead", header.version); |
| 176 | 197 |
| 177 // TODO(shess): Version 1 and 2 use the same file structure, with version 1 | 198 // TODO(shess): Version 1 and 2 use the same file structure, with version 1 |
| 178 // data using a signed sort. For M-35, the data is re-sorted before return. | 199 // data using a signed sort. For M-35, the data is re-sorted before return. |
| 179 // After M-35, just drop v1 support. <http://crbug.com/346405> | 200 // After M-36, just drop v1 support. <http://crbug.com/346405> |
| 180 if (header.version != kVersion && header.version != 1) | 201 // TODO(shess): <http://crbug.com/368044> for removing v2 support. |
| 202 size_t header_size = sizeof(header); | |
| 203 if (header.version == 2 || header.version == 1) { | |
| 204 // Rewind the file and restart building the digest with the old header | |
| 205 // structure. | |
| 206 FileHeader_v2 v2_header; | |
| 207 if (0 != fseek(file.get(), 0, SEEK_SET)) | |
| 208 return scoped_ptr<PrefixSet>(); | |
| 209 | |
| 210 size_t read = fread(&v2_header, sizeof(v2_header), 1, file.get()); | |
| 211 if (read != 1) | |
| 212 return scoped_ptr<PrefixSet>(); | |
| 213 | |
| 214 base::MD5Init(&context); | |
| 215 base::MD5Update(&context, | |
| 216 base::StringPiece(reinterpret_cast<char*>(&v2_header), | |
| 217 sizeof(v2_header))); | |
| 218 | |
| 219 // The current header is a superset of the old header, fill it in with the | |
| 220 // information read. | |
| 221 header.magic = v2_header.magic; | |
| 222 header.version = v2_header.version; | |
| 223 header.index_size = v2_header.index_size; | |
| 224 header.deltas_size = v2_header.deltas_size; | |
| 225 header.full_hashes_size = 0; | |
| 226 header_size = sizeof(v2_header); | |
| 227 } else if (header.version != kVersion) { | |
| 181 return scoped_ptr<PrefixSet>(); | 228 return scoped_ptr<PrefixSet>(); |
| 229 } | |
| 182 | 230 |
| 183 IndexVector index; | 231 IndexVector index; |
| 184 const size_t index_bytes = sizeof(index[0]) * header.index_size; | 232 const size_t index_bytes = sizeof(index[0]) * header.index_size; |
| 185 | 233 |
| 186 std::vector<uint16> deltas; | 234 std::vector<uint16> deltas; |
| 187 const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; | 235 const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; |
| 188 | 236 |
| 237 std::vector<SBFullHash> full_hashes; | |
| 238 const size_t full_hashes_bytes = | |
| 239 sizeof(full_hashes[0]) * header.full_hashes_size; | |
| 240 | |
| 189 // Check for bogus sizes before allocating any space. | 241 // Check for bogus sizes before allocating any space. |
| 190 const size_t expected_bytes = | 242 const size_t expected_bytes = header_size + |
| 191 sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest); | 243 index_bytes + deltas_bytes + full_hashes_bytes + sizeof(MD5Digest); |
| 192 if (static_cast<int64>(expected_bytes) != size_64) | 244 if (static_cast<int64>(expected_bytes) != size_64) |
| 193 return scoped_ptr<PrefixSet>(); | 245 return scoped_ptr<PrefixSet>(); |
| 194 | 246 |
| 195 // The file looks valid, start building the digest. | |
| 196 base::MD5Context context; | |
| 197 base::MD5Init(&context); | |
| 198 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header), | |
| 199 sizeof(header))); | |
| 200 | |
| 201 // Read the index vector. Herb Sutter indicates that vectors are | 247 // Read the index vector. Herb Sutter indicates that vectors are |
| 202 // guaranteed to be contiuguous, so reading to where element 0 lives | 248 // guaranteed to be contiuguous, so reading to where element 0 lives |
| 203 // is valid. | 249 // is valid. |
| 204 if (header.index_size) { | 250 if (header.index_size) { |
| 205 index.resize(header.index_size); | 251 index.resize(header.index_size); |
| 206 read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); | 252 read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); |
| 207 if (read != index.size()) | 253 if (read != index.size()) |
| 208 return scoped_ptr<PrefixSet>(); | 254 return scoped_ptr<PrefixSet>(); |
| 209 base::MD5Update(&context, | 255 base::MD5Update(&context, |
| 210 base::StringPiece(reinterpret_cast<char*>(&(index[0])), | 256 base::StringPiece(reinterpret_cast<char*>(&(index[0])), |
| 211 index_bytes)); | 257 index_bytes)); |
| 212 } | 258 } |
| 213 | 259 |
| 214 // Read vector of deltas. | 260 // Read vector of deltas. |
| 215 if (header.deltas_size) { | 261 if (header.deltas_size) { |
| 216 deltas.resize(header.deltas_size); | 262 deltas.resize(header.deltas_size); |
| 217 read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); | 263 read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); |
| 218 if (read != deltas.size()) | 264 if (read != deltas.size()) |
| 219 return scoped_ptr<PrefixSet>(); | 265 return scoped_ptr<PrefixSet>(); |
| 220 base::MD5Update(&context, | 266 base::MD5Update(&context, |
| 221 base::StringPiece(reinterpret_cast<char*>(&(deltas[0])), | 267 base::StringPiece(reinterpret_cast<char*>(&(deltas[0])), |
| 222 deltas_bytes)); | 268 deltas_bytes)); |
| 223 } | 269 } |
| 224 | 270 |
| 271 // Read vector of full hashes. | |
| 272 if (header.full_hashes_size) { | |
| 273 full_hashes.resize(header.full_hashes_size); | |
| 274 read = fread(&(full_hashes[0]), sizeof(full_hashes[0]), full_hashes.size(), | |
| 275 file.get()); | |
| 276 if (read != full_hashes.size()) | |
| 277 return scoped_ptr<PrefixSet>(); | |
| 278 base::MD5Update(&context, | |
| 279 base::StringPiece( | |
| 280 reinterpret_cast<char*>(&(full_hashes[0])), | |
| 281 full_hashes_bytes)); | |
| 282 } | |
| 283 | |
| 225 base::MD5Digest calculated_digest; | 284 base::MD5Digest calculated_digest; |
| 226 base::MD5Final(&calculated_digest, &context); | 285 base::MD5Final(&calculated_digest, &context); |
| 227 | 286 |
| 228 base::MD5Digest file_digest; | 287 base::MD5Digest file_digest; |
| 229 read = fread(&file_digest, sizeof(file_digest), 1, file.get()); | 288 read = fread(&file_digest, sizeof(file_digest), 1, file.get()); |
| 230 if (read != 1) | 289 if (read != 1) |
| 231 return scoped_ptr<PrefixSet>(); | 290 return scoped_ptr<PrefixSet>(); |
| 232 | 291 |
| 233 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) | 292 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) |
| 234 return scoped_ptr<PrefixSet>(); | 293 return scoped_ptr<PrefixSet>(); |
| 235 | 294 |
| 236 // For version 1, fetch the prefixes and re-sort. | 295 // For version 1, fetch the prefixes and re-sort. |
| 237 if (header.version == 1) { | 296 if (header.version == 1) { |
| 238 std::vector<SBPrefix> prefixes; | 297 std::vector<SBPrefix> prefixes; |
| 239 PrefixSet(&index, &deltas).GetPrefixes(&prefixes); | 298 PrefixSet(&index, &deltas, &full_hashes).GetPrefixes(&prefixes); |
| 240 std::sort(prefixes.begin(), prefixes.end()); | 299 std::sort(prefixes.begin(), prefixes.end()); |
| 300 | |
| 301 // v1 cannot have full hashes, so no need to propagate a copy here. | |
| 241 return PrefixSetBuilder(prefixes).GetPrefixSetNoHashes().Pass(); | 302 return PrefixSetBuilder(prefixes).GetPrefixSetNoHashes().Pass(); |
| 242 } | 303 } |
| 243 | 304 |
| 244 // Steals contents of |index| and |deltas| via swap(). | 305 // Steals contents of |index| and |deltas| via swap(). |
|
mattm
2014/05/01 22:26:51
update comment
Scott Hess - ex-Googler
2014/05/05 03:47:07
Done.
| |
| 245 return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas)); | 306 return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas, &full_hashes)); |
| 246 } | 307 } |
| 247 | 308 |
| 248 bool PrefixSet::WriteFile(const base::FilePath& filter_name) const { | 309 bool PrefixSet::WriteFile(const base::FilePath& filter_name) const { |
| 249 FileHeader header; | 310 FileHeader header; |
| 250 header.magic = kMagic; | 311 header.magic = kMagic; |
| 251 header.version = kVersion; | 312 header.version = kVersion; |
| 252 header.index_size = static_cast<uint32>(index_.size()); | 313 header.index_size = static_cast<uint32>(index_.size()); |
| 253 header.deltas_size = static_cast<uint32>(deltas_.size()); | 314 header.deltas_size = static_cast<uint32>(deltas_.size()); |
| 315 header.full_hashes_size = static_cast<uint32>(full_hashes_.size()); | |
| 254 | 316 |
| 255 // Sanity check that the 32-bit values never mess things up. | 317 // Sanity check that the 32-bit values never mess things up. |
| 256 if (static_cast<size_t>(header.index_size) != index_.size() || | 318 if (static_cast<size_t>(header.index_size) != index_.size() || |
| 257 static_cast<size_t>(header.deltas_size) != deltas_.size()) { | 319 static_cast<size_t>(header.deltas_size) != deltas_.size() || |
| 320 static_cast<size_t>(header.full_hashes_size) != full_hashes_.size()) { | |
| 258 NOTREACHED(); | 321 NOTREACHED(); |
| 259 return false; | 322 return false; |
| 260 } | 323 } |
| 261 | 324 |
| 262 base::ScopedFILE file(base::OpenFile(filter_name, "wb")); | 325 base::ScopedFILE file(base::OpenFile(filter_name, "wb")); |
| 263 if (!file.get()) | 326 if (!file.get()) |
| 264 return false; | 327 return false; |
| 265 | 328 |
| 266 base::MD5Context context; | 329 base::MD5Context context; |
| 267 base::MD5Init(&context); | 330 base::MD5Init(&context); |
| (...skipping 25 matching lines...) Expand all Loading... | |
| 293 written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), | 356 written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), |
| 294 file.get()); | 357 file.get()); |
| 295 if (written != deltas_.size()) | 358 if (written != deltas_.size()) |
| 296 return false; | 359 return false; |
| 297 base::MD5Update(&context, | 360 base::MD5Update(&context, |
| 298 base::StringPiece( | 361 base::StringPiece( |
| 299 reinterpret_cast<const char*>(&(deltas_[0])), | 362 reinterpret_cast<const char*>(&(deltas_[0])), |
| 300 deltas_bytes)); | 363 deltas_bytes)); |
| 301 } | 364 } |
| 302 | 365 |
| 366 if (full_hashes_.size()) { | |
| 367 const size_t elt_size = sizeof(full_hashes_[0]); | |
| 368 const size_t elts = full_hashes_.size(); | |
| 369 const size_t full_hashes_bytes = elt_size * elts; | |
| 370 written = fwrite(&(full_hashes_[0]), elt_size, elts, file.get()); | |
| 371 if (written != elts) | |
| 372 return false; | |
| 373 base::MD5Update(&context, | |
| 374 base::StringPiece( | |
| 375 reinterpret_cast<const char*>(&(full_hashes_[0])), | |
| 376 full_hashes_bytes)); | |
| 377 } | |
| 378 | |
| 303 base::MD5Digest digest; | 379 base::MD5Digest digest; |
| 304 base::MD5Final(&digest, &context); | 380 base::MD5Final(&digest, &context); |
| 305 written = fwrite(&digest, sizeof(digest), 1, file.get()); | 381 written = fwrite(&digest, sizeof(digest), 1, file.get()); |
| 306 if (written != 1) | 382 if (written != 1) |
| 307 return false; | 383 return false; |
| 308 | 384 |
| 309 // TODO(shess): Can this code check that the close was successful? | 385 // TODO(shess): Can this code check that the close was successful? |
| 310 file.reset(); | 386 file.reset(); |
| 311 | 387 |
| 312 return true; | 388 return true; |
| (...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 407 } | 483 } |
| 408 buffer_.push_back(prefix); | 484 buffer_.push_back(prefix); |
| 409 | 485 |
| 410 // Flush buffer when a run can be constructed. +1 for the index item, and +1 | 486 // Flush buffer when a run can be constructed. +1 for the index item, and +1 |
| 411 // to leave at least one item in the buffer for dropping duplicates. | 487 // to leave at least one item in the buffer for dropping duplicates. |
| 412 if (buffer_.size() > PrefixSet::kMaxRun + 2) | 488 if (buffer_.size() > PrefixSet::kMaxRun + 2) |
| 413 EmitRun(); | 489 EmitRun(); |
| 414 } | 490 } |
| 415 | 491 |
| 416 } // namespace safe_browsing | 492 } // namespace safe_browsing |
| OLD | NEW |