Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(321)

Side by Side Diff: chrome/browser/safe_browsing/prefix_set.cc

Issue 266793002: [safe browsing] Serialize full hashes with prefix set. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Add a v3 golden test. Created 6 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/safe_browsing/prefix_set.h" 5 #include "chrome/browser/safe_browsing/prefix_set.h"
6 6
7 #include <algorithm> 7 #include <algorithm>
8 #include <math.h> 8 #include <math.h>
9 9
10 #include "base/file_util.h" 10 #include "base/file_util.h"
11 #include "base/files/scoped_file.h" 11 #include "base/files/scoped_file.h"
12 #include "base/logging.h" 12 #include "base/logging.h"
13 #include "base/md5.h" 13 #include "base/md5.h"
14 #include "base/metrics/histogram.h" 14 #include "base/metrics/histogram.h"
15 #include "base/metrics/sparse_histogram.h" 15 #include "base/metrics/sparse_histogram.h"
16 16
17 namespace { 17 namespace {
18 18
19 // |kMagic| should be reasonably unique, and not match itself across 19 // |kMagic| should be reasonably unique, and not match itself across
20 // endianness changes. I generated this value with: 20 // endianness changes. I generated this value with:
21 // md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 21 // md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9
22 static uint32 kMagic = 0x864088dd; 22 static uint32 kMagic = 0x864088dd;
23 23
24 // Version history: 24 // Version history:
25 // Version 1: b6cb7cfe/r74487 by shess@chromium.org on 2011-02-10 25 // Version 1: b6cb7cfe/r74487 by shess@chromium.org on 2011-02-10
26 // Version 2: 2b59b0a6/r253924 by shess@chromium.org on 2014-02-27 26 // Version 2: 2b59b0a6/r253924 by shess@chromium.org on 2014-02-27
27 // Version 3: ????????/r?????? by shess@chromium.org on 2014-04-??
27 28
28 // Version 2 layout is identical to version 1. The sort order of |index_| 29 // Version 2 layout is identical to version 1. The sort order of |index_|
29 // changed from |int32| to |uint32| to match the change of |SBPrefix|. 30 // changed from |int32| to |uint32| to match the change of |SBPrefix|.
30 static uint32 kVersion = 0x2; 31 // Version 3 adds storage for full hashes.
32 static uint32 kVersion = 0x3;
31 33
32 typedef struct { 34 typedef struct {
33 uint32 magic; 35 uint32 magic;
34 uint32 version; 36 uint32 version;
35 uint32 index_size; 37 uint32 index_size;
36 uint32 deltas_size; 38 uint32 deltas_size;
39 } FileHeader_v2;
40
41 typedef struct {
42 uint32 magic;
43 uint32 version;
44 uint32 index_size;
45 uint32 deltas_size;
46 uint32 full_hashes_size;
37 } FileHeader; 47 } FileHeader;
38 48
39 // Common std::vector<> implementations add capacity by multiplying from the 49 // Common std::vector<> implementations add capacity by multiplying from the
40 // current size (usually either by 2 or 1.5) to satisfy push_back() running in 50 // current size (usually either by 2 or 1.5) to satisfy push_back() running in
41 // amortized constant time. This is not necessary for insert() at end(), but 51 // amortized constant time. This is not necessary for insert() at end(), but
42 // AFAICT it seems true for some implementations. SBPrefix values should 52 // AFAICT it seems true for some implementations. SBPrefix values should
43 // uniformly cover the 32-bit space, so the final size can be estimated given a 53 // uniformly cover the 32-bit space, so the final size can be estimated given a
44 // subset of the input. 54 // subset of the input.
45 // 55 //
46 // |kEstimateThreshold| is when estimates start converging. Results are strong 56 // |kEstimateThreshold| is when estimates start converging. Results are strong
(...skipping 30 matching lines...) Expand all
77 87
78 // For |std::upper_bound()| to find a prefix w/in a vector of pairs. 88 // For |std::upper_bound()| to find a prefix w/in a vector of pairs.
79 // static 89 // static
80 bool PrefixSet::PrefixLess(const IndexPair& a, const IndexPair& b) { 90 bool PrefixSet::PrefixLess(const IndexPair& a, const IndexPair& b) {
81 return a.first < b.first; 91 return a.first < b.first;
82 } 92 }
83 93
84 PrefixSet::PrefixSet() { 94 PrefixSet::PrefixSet() {
85 } 95 }
86 96
87 PrefixSet::PrefixSet(IndexVector* index, std::vector<uint16>* deltas) { 97 PrefixSet::PrefixSet(IndexVector* index,
88 DCHECK(index && deltas); 98 std::vector<uint16>* deltas,
99 std::vector<SBFullHash>* full_hashes) {
100 DCHECK(index && deltas && full_hashes);
89 index_.swap(*index); 101 index_.swap(*index);
90 deltas_.swap(*deltas); 102 deltas_.swap(*deltas);
103 full_hashes_.swap(*full_hashes);
91 } 104 }
92 105
93 PrefixSet::~PrefixSet() {} 106 PrefixSet::~PrefixSet() {}
94 107
95 bool PrefixSet::PrefixExists(SBPrefix prefix) const { 108 bool PrefixSet::PrefixExists(SBPrefix prefix) const {
96 if (index_.empty()) 109 if (index_.empty())
97 return false; 110 return false;
98 111
99 // Find the first position after |prefix| in |index_|. 112 // Find the first position after |prefix| in |index_|.
100 IndexVector::const_iterator iter = 113 IndexVector::const_iterator iter =
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after
149 } 162 }
150 } 163 }
151 } 164 }
152 165
153 // static 166 // static
154 scoped_ptr<PrefixSet> PrefixSet::LoadFile(const base::FilePath& filter_name) { 167 scoped_ptr<PrefixSet> PrefixSet::LoadFile(const base::FilePath& filter_name) {
155 int64 size_64; 168 int64 size_64;
156 if (!base::GetFileSize(filter_name, &size_64)) 169 if (!base::GetFileSize(filter_name, &size_64))
157 return scoped_ptr<PrefixSet>(); 170 return scoped_ptr<PrefixSet>();
158 using base::MD5Digest; 171 using base::MD5Digest;
159 if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest))) 172 // TODO(shess): Revert to sizeof(FileHeader) for sanity check once v2 is
173 // deprecated.
174 if (size_64 < static_cast<int64>(sizeof(FileHeader_v2) + sizeof(MD5Digest)))
160 return scoped_ptr<PrefixSet>(); 175 return scoped_ptr<PrefixSet>();
161 176
162 base::ScopedFILE file(base::OpenFile(filter_name, "rb")); 177 base::ScopedFILE file(base::OpenFile(filter_name, "rb"));
163 if (!file.get()) 178 if (!file.get())
164 return scoped_ptr<PrefixSet>(); 179 return scoped_ptr<PrefixSet>();
165 180
166 FileHeader header; 181 FileHeader header;
167 size_t read = fread(&header, sizeof(header), 1, file.get()); 182 size_t read = fread(&header, sizeof(header), 1, file.get());
168 if (read != 1) 183 if (read != 1)
169 return scoped_ptr<PrefixSet>(); 184 return scoped_ptr<PrefixSet>();
170 185
186 // The file looks valid, start building the digest.
187 base::MD5Context context;
188 base::MD5Init(&context);
189 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
190 sizeof(header)));
191
171 if (header.magic != kMagic) 192 if (header.magic != kMagic)
172 return scoped_ptr<PrefixSet>(); 193 return scoped_ptr<PrefixSet>();
173 194
174 // Track version read to inform removal of support for older versions. 195 // Track version read to inform removal of support for older versions.
175 UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.PrefixSetVersionRead", header.version); 196 UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.PrefixSetVersionRead", header.version);
176 197
177 // TODO(shess): Version 1 and 2 use the same file structure, with version 1 198 // TODO(shess): Version 1 and 2 use the same file structure, with version 1
178 // data using a signed sort. For M-35, the data is re-sorted before return. 199 // data using a signed sort. For M-35, the data is re-sorted before return.
179 // After M-35, just drop v1 support. <http://crbug.com/346405> 200 // After M-36, just drop v1 support. <http://crbug.com/346405>
180 if (header.version != kVersion && header.version != 1) 201 // TODO(shess): <http://crbug.com/368044> for removing v2 support.
202 size_t header_size = sizeof(header);
203 if (header.version == 2 || header.version == 1) {
204 // Rewind the file and restart building the digest with the old header
205 // structure.
206 FileHeader_v2 v2_header;
207 if (0 != fseek(file.get(), 0, SEEK_SET))
208 return scoped_ptr<PrefixSet>();
209
210 size_t read = fread(&v2_header, sizeof(v2_header), 1, file.get());
211 if (read != 1)
212 return scoped_ptr<PrefixSet>();
213
214 base::MD5Init(&context);
215 base::MD5Update(&context,
216 base::StringPiece(reinterpret_cast<char*>(&v2_header),
217 sizeof(v2_header)));
218
219 // The current header is a superset of the old header, fill it in with the
220 // information read.
221 header.magic = v2_header.magic;
222 header.version = v2_header.version;
223 header.index_size = v2_header.index_size;
224 header.deltas_size = v2_header.deltas_size;
225 header.full_hashes_size = 0;
226 header_size = sizeof(v2_header);
227 } else if (header.version != kVersion) {
181 return scoped_ptr<PrefixSet>(); 228 return scoped_ptr<PrefixSet>();
229 }
182 230
183 IndexVector index; 231 IndexVector index;
184 const size_t index_bytes = sizeof(index[0]) * header.index_size; 232 const size_t index_bytes = sizeof(index[0]) * header.index_size;
185 233
186 std::vector<uint16> deltas; 234 std::vector<uint16> deltas;
187 const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; 235 const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size;
188 236
237 std::vector<SBFullHash> full_hashes;
238 const size_t full_hashes_bytes =
239 sizeof(full_hashes[0]) * header.full_hashes_size;
240
189 // Check for bogus sizes before allocating any space. 241 // Check for bogus sizes before allocating any space.
190 const size_t expected_bytes = 242 const size_t expected_bytes = header_size +
191 sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest); 243 index_bytes + deltas_bytes + full_hashes_bytes + sizeof(MD5Digest);
192 if (static_cast<int64>(expected_bytes) != size_64) 244 if (static_cast<int64>(expected_bytes) != size_64)
193 return scoped_ptr<PrefixSet>(); 245 return scoped_ptr<PrefixSet>();
194 246
195 // The file looks valid, start building the digest.
196 base::MD5Context context;
197 base::MD5Init(&context);
198 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header),
199 sizeof(header)));
200
201 // Read the index vector. Herb Sutter indicates that vectors are 247 // Read the index vector. Herb Sutter indicates that vectors are
202 // guaranteed to be contiuguous, so reading to where element 0 lives 248 // guaranteed to be contiuguous, so reading to where element 0 lives
203 // is valid. 249 // is valid.
204 if (header.index_size) { 250 if (header.index_size) {
205 index.resize(header.index_size); 251 index.resize(header.index_size);
206 read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); 252 read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get());
207 if (read != index.size()) 253 if (read != index.size())
208 return scoped_ptr<PrefixSet>(); 254 return scoped_ptr<PrefixSet>();
209 base::MD5Update(&context, 255 base::MD5Update(&context,
210 base::StringPiece(reinterpret_cast<char*>(&(index[0])), 256 base::StringPiece(reinterpret_cast<char*>(&(index[0])),
211 index_bytes)); 257 index_bytes));
212 } 258 }
213 259
214 // Read vector of deltas. 260 // Read vector of deltas.
215 if (header.deltas_size) { 261 if (header.deltas_size) {
216 deltas.resize(header.deltas_size); 262 deltas.resize(header.deltas_size);
217 read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); 263 read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get());
218 if (read != deltas.size()) 264 if (read != deltas.size())
219 return scoped_ptr<PrefixSet>(); 265 return scoped_ptr<PrefixSet>();
220 base::MD5Update(&context, 266 base::MD5Update(&context,
221 base::StringPiece(reinterpret_cast<char*>(&(deltas[0])), 267 base::StringPiece(reinterpret_cast<char*>(&(deltas[0])),
222 deltas_bytes)); 268 deltas_bytes));
223 } 269 }
224 270
271 // Read vector of full hashes.
272 if (header.full_hashes_size) {
273 full_hashes.resize(header.full_hashes_size);
274 read = fread(&(full_hashes[0]), sizeof(full_hashes[0]), full_hashes.size(),
275 file.get());
276 if (read != full_hashes.size())
277 return scoped_ptr<PrefixSet>();
278 base::MD5Update(&context,
279 base::StringPiece(
280 reinterpret_cast<char*>(&(full_hashes[0])),
281 full_hashes_bytes));
282 }
283
225 base::MD5Digest calculated_digest; 284 base::MD5Digest calculated_digest;
226 base::MD5Final(&calculated_digest, &context); 285 base::MD5Final(&calculated_digest, &context);
227 286
228 base::MD5Digest file_digest; 287 base::MD5Digest file_digest;
229 read = fread(&file_digest, sizeof(file_digest), 1, file.get()); 288 read = fread(&file_digest, sizeof(file_digest), 1, file.get());
230 if (read != 1) 289 if (read != 1)
231 return scoped_ptr<PrefixSet>(); 290 return scoped_ptr<PrefixSet>();
232 291
233 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) 292 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest)))
234 return scoped_ptr<PrefixSet>(); 293 return scoped_ptr<PrefixSet>();
235 294
236 // For version 1, fetch the prefixes and re-sort. 295 // For version 1, fetch the prefixes and re-sort.
237 if (header.version == 1) { 296 if (header.version == 1) {
238 std::vector<SBPrefix> prefixes; 297 std::vector<SBPrefix> prefixes;
239 PrefixSet(&index, &deltas).GetPrefixes(&prefixes); 298 PrefixSet(&index, &deltas, &full_hashes).GetPrefixes(&prefixes);
240 std::sort(prefixes.begin(), prefixes.end()); 299 std::sort(prefixes.begin(), prefixes.end());
300
301 // v1 cannot have full hashes, so no need to propagate a copy here.
241 return PrefixSetBuilder(prefixes).GetPrefixSetNoHashes().Pass(); 302 return PrefixSetBuilder(prefixes).GetPrefixSetNoHashes().Pass();
242 } 303 }
243 304
244 // Steals contents of |index| and |deltas| via swap(). 305 // Steals contents of |index| and |deltas| via swap().
mattm 2014/05/01 22:26:51 update comment
Scott Hess - ex-Googler 2014/05/05 03:47:07 Done.
245 return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas)); 306 return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas, &full_hashes));
246 } 307 }
247 308
248 bool PrefixSet::WriteFile(const base::FilePath& filter_name) const { 309 bool PrefixSet::WriteFile(const base::FilePath& filter_name) const {
249 FileHeader header; 310 FileHeader header;
250 header.magic = kMagic; 311 header.magic = kMagic;
251 header.version = kVersion; 312 header.version = kVersion;
252 header.index_size = static_cast<uint32>(index_.size()); 313 header.index_size = static_cast<uint32>(index_.size());
253 header.deltas_size = static_cast<uint32>(deltas_.size()); 314 header.deltas_size = static_cast<uint32>(deltas_.size());
315 header.full_hashes_size = static_cast<uint32>(full_hashes_.size());
254 316
255 // Sanity check that the 32-bit values never mess things up. 317 // Sanity check that the 32-bit values never mess things up.
256 if (static_cast<size_t>(header.index_size) != index_.size() || 318 if (static_cast<size_t>(header.index_size) != index_.size() ||
257 static_cast<size_t>(header.deltas_size) != deltas_.size()) { 319 static_cast<size_t>(header.deltas_size) != deltas_.size() ||
320 static_cast<size_t>(header.full_hashes_size) != full_hashes_.size()) {
258 NOTREACHED(); 321 NOTREACHED();
259 return false; 322 return false;
260 } 323 }
261 324
262 base::ScopedFILE file(base::OpenFile(filter_name, "wb")); 325 base::ScopedFILE file(base::OpenFile(filter_name, "wb"));
263 if (!file.get()) 326 if (!file.get())
264 return false; 327 return false;
265 328
266 base::MD5Context context; 329 base::MD5Context context;
267 base::MD5Init(&context); 330 base::MD5Init(&context);
(...skipping 25 matching lines...) Expand all
293 written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), 356 written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(),
294 file.get()); 357 file.get());
295 if (written != deltas_.size()) 358 if (written != deltas_.size())
296 return false; 359 return false;
297 base::MD5Update(&context, 360 base::MD5Update(&context,
298 base::StringPiece( 361 base::StringPiece(
299 reinterpret_cast<const char*>(&(deltas_[0])), 362 reinterpret_cast<const char*>(&(deltas_[0])),
300 deltas_bytes)); 363 deltas_bytes));
301 } 364 }
302 365
366 if (full_hashes_.size()) {
367 const size_t elt_size = sizeof(full_hashes_[0]);
368 const size_t elts = full_hashes_.size();
369 const size_t full_hashes_bytes = elt_size * elts;
370 written = fwrite(&(full_hashes_[0]), elt_size, elts, file.get());
371 if (written != elts)
372 return false;
373 base::MD5Update(&context,
374 base::StringPiece(
375 reinterpret_cast<const char*>(&(full_hashes_[0])),
376 full_hashes_bytes));
377 }
378
303 base::MD5Digest digest; 379 base::MD5Digest digest;
304 base::MD5Final(&digest, &context); 380 base::MD5Final(&digest, &context);
305 written = fwrite(&digest, sizeof(digest), 1, file.get()); 381 written = fwrite(&digest, sizeof(digest), 1, file.get());
306 if (written != 1) 382 if (written != 1)
307 return false; 383 return false;
308 384
309 // TODO(shess): Can this code check that the close was successful? 385 // TODO(shess): Can this code check that the close was successful?
310 file.reset(); 386 file.reset();
311 387
312 return true; 388 return true;
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
407 } 483 }
408 buffer_.push_back(prefix); 484 buffer_.push_back(prefix);
409 485
410 // Flush buffer when a run can be constructed. +1 for the index item, and +1 486 // Flush buffer when a run can be constructed. +1 for the index item, and +1
411 // to leave at least one item in the buffer for dropping duplicates. 487 // to leave at least one item in the buffer for dropping duplicates.
412 if (buffer_.size() > PrefixSet::kMaxRun + 2) 488 if (buffer_.size() > PrefixSet::kMaxRun + 2)
413 EmitRun(); 489 EmitRun();
414 } 490 }
415 491
416 } // namespace safe_browsing 492 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698