OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/safe_browsing/prefix_set.h" | 5 #include "chrome/browser/safe_browsing/prefix_set.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 #include <math.h> | 8 #include <math.h> |
9 | 9 |
10 #include "base/file_util.h" | 10 #include "base/file_util.h" |
11 #include "base/files/scoped_file.h" | 11 #include "base/files/scoped_file.h" |
12 #include "base/logging.h" | 12 #include "base/logging.h" |
13 #include "base/md5.h" | 13 #include "base/md5.h" |
14 #include "base/metrics/histogram.h" | 14 #include "base/metrics/histogram.h" |
15 #include "base/metrics/sparse_histogram.h" | 15 #include "base/metrics/sparse_histogram.h" |
16 | 16 |
17 namespace { | 17 namespace { |
18 | 18 |
19 // |kMagic| should be reasonably unique, and not match itself across | 19 // |kMagic| should be reasonably unique, and not match itself across |
20 // endianness changes. I generated this value with: | 20 // endianness changes. I generated this value with: |
21 // md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 | 21 // md5 -qs chrome/browser/safe_browsing/prefix_set.cc | colrm 9 |
22 static uint32 kMagic = 0x864088dd; | 22 static uint32 kMagic = 0x864088dd; |
23 | 23 |
24 // Version history: | 24 // Version history: |
25 // Version 1: b6cb7cfe/r74487 by shess@chromium.org on 2011-02-10 | 25 // Version 1: b6cb7cfe/r74487 by shess@chromium.org on 2011-02-10 |
26 // Version 2: 2b59b0a6/r253924 by shess@chromium.org on 2014-02-27 | 26 // Version 2: 2b59b0a6/r253924 by shess@chromium.org on 2014-02-27 |
27 // Version 3: ????????/r?????? by shess@chromium.org on 2014-04-?? | |
27 | 28 |
28 // Version 2 layout is identical to version 1. The sort order of |index_| | 29 // Version 2 layout is identical to version 1. The sort order of |index_| |
29 // changed from |int32| to |uint32| to match the change of |SBPrefix|. | 30 // changed from |int32| to |uint32| to match the change of |SBPrefix|. |
30 static uint32 kVersion = 0x2; | 31 // Version 3 adds storage for full hashes. |
32 static uint32 kVersion = 0x3; | |
31 | 33 |
32 typedef struct { | 34 typedef struct { |
33 uint32 magic; | 35 uint32 magic; |
34 uint32 version; | 36 uint32 version; |
35 uint32 index_size; | 37 uint32 index_size; |
36 uint32 deltas_size; | 38 uint32 deltas_size; |
39 } FileHeader_v2; | |
40 | |
41 typedef struct { | |
42 uint32 magic; | |
43 uint32 version; | |
44 uint32 index_size; | |
45 uint32 deltas_size; | |
46 uint32 full_hashes_size; | |
37 } FileHeader; | 47 } FileHeader; |
38 | 48 |
39 // Common std::vector<> implementations add capacity by multiplying from the | 49 // Common std::vector<> implementations add capacity by multiplying from the |
40 // current size (usually either by 2 or 1.5) to satisfy push_back() running in | 50 // current size (usually either by 2 or 1.5) to satisfy push_back() running in |
41 // amortized constant time. This is not necessary for insert() at end(), but | 51 // amortized constant time. This is not necessary for insert() at end(), but |
42 // AFAICT it seems true for some implementations. SBPrefix values should | 52 // AFAICT it seems true for some implementations. SBPrefix values should |
43 // uniformly cover the 32-bit space, so the final size can be estimated given a | 53 // uniformly cover the 32-bit space, so the final size can be estimated given a |
44 // subset of the input. | 54 // subset of the input. |
45 // | 55 // |
46 // |kEstimateThreshold| is when estimates start converging. Results are strong | 56 // |kEstimateThreshold| is when estimates start converging. Results are strong |
(...skipping 30 matching lines...) Expand all Loading... | |
77 | 87 |
78 // For |std::upper_bound()| to find a prefix w/in a vector of pairs. | 88 // For |std::upper_bound()| to find a prefix w/in a vector of pairs. |
79 // static | 89 // static |
80 bool PrefixSet::PrefixLess(const IndexPair& a, const IndexPair& b) { | 90 bool PrefixSet::PrefixLess(const IndexPair& a, const IndexPair& b) { |
81 return a.first < b.first; | 91 return a.first < b.first; |
82 } | 92 } |
83 | 93 |
84 PrefixSet::PrefixSet() { | 94 PrefixSet::PrefixSet() { |
85 } | 95 } |
86 | 96 |
87 PrefixSet::PrefixSet(IndexVector* index, std::vector<uint16>* deltas) { | 97 PrefixSet::PrefixSet(IndexVector* index, |
88 DCHECK(index && deltas); | 98 std::vector<uint16>* deltas, |
99 std::vector<SBFullHash>* full_hashes) { | |
100 DCHECK(index && deltas && full_hashes); | |
89 index_.swap(*index); | 101 index_.swap(*index); |
90 deltas_.swap(*deltas); | 102 deltas_.swap(*deltas); |
103 full_hashes_.swap(*full_hashes); | |
91 } | 104 } |
92 | 105 |
93 PrefixSet::~PrefixSet() {} | 106 PrefixSet::~PrefixSet() {} |
94 | 107 |
95 bool PrefixSet::PrefixExists(SBPrefix prefix) const { | 108 bool PrefixSet::PrefixExists(SBPrefix prefix) const { |
96 if (index_.empty()) | 109 if (index_.empty()) |
97 return false; | 110 return false; |
98 | 111 |
99 // Find the first position after |prefix| in |index_|. | 112 // Find the first position after |prefix| in |index_|. |
100 IndexVector::const_iterator iter = | 113 IndexVector::const_iterator iter = |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
149 } | 162 } |
150 } | 163 } |
151 } | 164 } |
152 | 165 |
153 // static | 166 // static |
154 scoped_ptr<PrefixSet> PrefixSet::LoadFile(const base::FilePath& filter_name) { | 167 scoped_ptr<PrefixSet> PrefixSet::LoadFile(const base::FilePath& filter_name) { |
155 int64 size_64; | 168 int64 size_64; |
156 if (!base::GetFileSize(filter_name, &size_64)) | 169 if (!base::GetFileSize(filter_name, &size_64)) |
157 return scoped_ptr<PrefixSet>(); | 170 return scoped_ptr<PrefixSet>(); |
158 using base::MD5Digest; | 171 using base::MD5Digest; |
159 if (size_64 < static_cast<int64>(sizeof(FileHeader) + sizeof(MD5Digest))) | 172 // TODO(shess): Revert to sizeof(FileHeader) for sanity check once v2 is |
173 // deprecated. | |
174 if (size_64 < static_cast<int64>(sizeof(FileHeader_v2) + sizeof(MD5Digest))) | |
160 return scoped_ptr<PrefixSet>(); | 175 return scoped_ptr<PrefixSet>(); |
161 | 176 |
162 base::ScopedFILE file(base::OpenFile(filter_name, "rb")); | 177 base::ScopedFILE file(base::OpenFile(filter_name, "rb")); |
163 if (!file.get()) | 178 if (!file.get()) |
164 return scoped_ptr<PrefixSet>(); | 179 return scoped_ptr<PrefixSet>(); |
165 | 180 |
166 FileHeader header; | 181 FileHeader header; |
167 size_t read = fread(&header, sizeof(header), 1, file.get()); | 182 size_t read = fread(&header, sizeof(header), 1, file.get()); |
168 if (read != 1) | 183 if (read != 1) |
169 return scoped_ptr<PrefixSet>(); | 184 return scoped_ptr<PrefixSet>(); |
170 | 185 |
186 // The file looks valid, start building the digest. | |
187 base::MD5Context context; | |
188 base::MD5Init(&context); | |
189 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header), | |
190 sizeof(header))); | |
191 | |
171 if (header.magic != kMagic) | 192 if (header.magic != kMagic) |
172 return scoped_ptr<PrefixSet>(); | 193 return scoped_ptr<PrefixSet>(); |
173 | 194 |
174 // Track version read to inform removal of support for older versions. | 195 // Track version read to inform removal of support for older versions. |
175 UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.PrefixSetVersionRead", header.version); | 196 UMA_HISTOGRAM_SPARSE_SLOWLY("SB2.PrefixSetVersionRead", header.version); |
176 | 197 |
177 // TODO(shess): Version 1 and 2 use the same file structure, with version 1 | 198 // TODO(shess): Version 1 and 2 use the same file structure, with version 1 |
178 // data using a signed sort. For M-35, the data is re-sorted before return. | 199 // data using a signed sort. For M-35, the data is re-sorted before return. |
179 // After M-35, just drop v1 support. <http://crbug.com/346405> | 200 // After M-36, just drop v1 support. <http://crbug.com/346405> |
180 if (header.version != kVersion && header.version != 1) | 201 // TODO(shess): <http://crbug.com/368044> for removing v2 support. |
202 size_t header_size = sizeof(header); | |
203 if (header.version == 2 || header.version == 1) { | |
204 // Rewind the file and restart building the digest with the old header | |
205 // structure. | |
206 FileHeader_v2 v2_header; | |
207 if (0 != fseek(file.get(), 0, SEEK_SET)) | |
208 return scoped_ptr<PrefixSet>(); | |
209 | |
210 size_t read = fread(&v2_header, sizeof(v2_header), 1, file.get()); | |
211 if (read != 1) | |
212 return scoped_ptr<PrefixSet>(); | |
213 | |
214 base::MD5Init(&context); | |
215 base::MD5Update(&context, | |
216 base::StringPiece(reinterpret_cast<char*>(&v2_header), | |
217 sizeof(v2_header))); | |
218 | |
219 // The current header is a superset of the old header, fill it in with the | |
220 // information read. | |
221 header.magic = v2_header.magic; | |
222 header.version = v2_header.version; | |
223 header.index_size = v2_header.index_size; | |
224 header.deltas_size = v2_header.deltas_size; | |
225 header.full_hashes_size = 0; | |
226 header_size = sizeof(v2_header); | |
227 } else if (header.version != kVersion) { | |
181 return scoped_ptr<PrefixSet>(); | 228 return scoped_ptr<PrefixSet>(); |
229 } | |
182 | 230 |
183 IndexVector index; | 231 IndexVector index; |
184 const size_t index_bytes = sizeof(index[0]) * header.index_size; | 232 const size_t index_bytes = sizeof(index[0]) * header.index_size; |
185 | 233 |
186 std::vector<uint16> deltas; | 234 std::vector<uint16> deltas; |
187 const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; | 235 const size_t deltas_bytes = sizeof(deltas[0]) * header.deltas_size; |
188 | 236 |
237 std::vector<SBFullHash> full_hashes; | |
238 const size_t full_hashes_bytes = | |
239 sizeof(full_hashes[0]) * header.full_hashes_size; | |
240 | |
189 // Check for bogus sizes before allocating any space. | 241 // Check for bogus sizes before allocating any space. |
190 const size_t expected_bytes = | 242 const size_t expected_bytes = header_size + |
191 sizeof(header) + index_bytes + deltas_bytes + sizeof(MD5Digest); | 243 index_bytes + deltas_bytes + full_hashes_bytes + sizeof(MD5Digest); |
192 if (static_cast<int64>(expected_bytes) != size_64) | 244 if (static_cast<int64>(expected_bytes) != size_64) |
193 return scoped_ptr<PrefixSet>(); | 245 return scoped_ptr<PrefixSet>(); |
194 | 246 |
195 // The file looks valid, start building the digest. | |
196 base::MD5Context context; | |
197 base::MD5Init(&context); | |
198 base::MD5Update(&context, base::StringPiece(reinterpret_cast<char*>(&header), | |
199 sizeof(header))); | |
200 | |
201 // Read the index vector. Herb Sutter indicates that vectors are | 247 // Read the index vector. Herb Sutter indicates that vectors are |
202 // guaranteed to be contiuguous, so reading to where element 0 lives | 248 // guaranteed to be contiuguous, so reading to where element 0 lives |
203 // is valid. | 249 // is valid. |
204 if (header.index_size) { | 250 if (header.index_size) { |
205 index.resize(header.index_size); | 251 index.resize(header.index_size); |
206 read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); | 252 read = fread(&(index[0]), sizeof(index[0]), index.size(), file.get()); |
207 if (read != index.size()) | 253 if (read != index.size()) |
208 return scoped_ptr<PrefixSet>(); | 254 return scoped_ptr<PrefixSet>(); |
209 base::MD5Update(&context, | 255 base::MD5Update(&context, |
210 base::StringPiece(reinterpret_cast<char*>(&(index[0])), | 256 base::StringPiece(reinterpret_cast<char*>(&(index[0])), |
211 index_bytes)); | 257 index_bytes)); |
212 } | 258 } |
213 | 259 |
214 // Read vector of deltas. | 260 // Read vector of deltas. |
215 if (header.deltas_size) { | 261 if (header.deltas_size) { |
216 deltas.resize(header.deltas_size); | 262 deltas.resize(header.deltas_size); |
217 read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); | 263 read = fread(&(deltas[0]), sizeof(deltas[0]), deltas.size(), file.get()); |
218 if (read != deltas.size()) | 264 if (read != deltas.size()) |
219 return scoped_ptr<PrefixSet>(); | 265 return scoped_ptr<PrefixSet>(); |
220 base::MD5Update(&context, | 266 base::MD5Update(&context, |
221 base::StringPiece(reinterpret_cast<char*>(&(deltas[0])), | 267 base::StringPiece(reinterpret_cast<char*>(&(deltas[0])), |
222 deltas_bytes)); | 268 deltas_bytes)); |
223 } | 269 } |
224 | 270 |
271 // Read vector of full hashes. | |
272 if (header.full_hashes_size) { | |
273 full_hashes.resize(header.full_hashes_size); | |
274 read = fread(&(full_hashes[0]), sizeof(full_hashes[0]), full_hashes.size(), | |
275 file.get()); | |
276 if (read != full_hashes.size()) | |
277 return scoped_ptr<PrefixSet>(); | |
278 base::MD5Update(&context, | |
279 base::StringPiece( | |
280 reinterpret_cast<char*>(&(full_hashes[0])), | |
281 full_hashes_bytes)); | |
282 } | |
283 | |
225 base::MD5Digest calculated_digest; | 284 base::MD5Digest calculated_digest; |
226 base::MD5Final(&calculated_digest, &context); | 285 base::MD5Final(&calculated_digest, &context); |
227 | 286 |
228 base::MD5Digest file_digest; | 287 base::MD5Digest file_digest; |
229 read = fread(&file_digest, sizeof(file_digest), 1, file.get()); | 288 read = fread(&file_digest, sizeof(file_digest), 1, file.get()); |
230 if (read != 1) | 289 if (read != 1) |
231 return scoped_ptr<PrefixSet>(); | 290 return scoped_ptr<PrefixSet>(); |
232 | 291 |
233 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) | 292 if (0 != memcmp(&file_digest, &calculated_digest, sizeof(file_digest))) |
234 return scoped_ptr<PrefixSet>(); | 293 return scoped_ptr<PrefixSet>(); |
235 | 294 |
236 // For version 1, fetch the prefixes and re-sort. | 295 // For version 1, fetch the prefixes and re-sort. |
237 if (header.version == 1) { | 296 if (header.version == 1) { |
238 std::vector<SBPrefix> prefixes; | 297 std::vector<SBPrefix> prefixes; |
239 PrefixSet(&index, &deltas).GetPrefixes(&prefixes); | 298 PrefixSet(&index, &deltas, &full_hashes).GetPrefixes(&prefixes); |
240 std::sort(prefixes.begin(), prefixes.end()); | 299 std::sort(prefixes.begin(), prefixes.end()); |
300 | |
301 // v1 cannot have full hashes, so no need to propagate a copy here. | |
241 return PrefixSetBuilder(prefixes).GetPrefixSetNoHashes().Pass(); | 302 return PrefixSetBuilder(prefixes).GetPrefixSetNoHashes().Pass(); |
242 } | 303 } |
243 | 304 |
244 // Steals contents of |index| and |deltas| via swap(). | 305 // Steals contents of |index| and |deltas| via swap(). |
mattm
2014/05/01 22:26:51
update comment
Scott Hess - ex-Googler
2014/05/05 03:47:07
Done.
| |
245 return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas)); | 306 return scoped_ptr<PrefixSet>(new PrefixSet(&index, &deltas, &full_hashes)); |
246 } | 307 } |
247 | 308 |
248 bool PrefixSet::WriteFile(const base::FilePath& filter_name) const { | 309 bool PrefixSet::WriteFile(const base::FilePath& filter_name) const { |
249 FileHeader header; | 310 FileHeader header; |
250 header.magic = kMagic; | 311 header.magic = kMagic; |
251 header.version = kVersion; | 312 header.version = kVersion; |
252 header.index_size = static_cast<uint32>(index_.size()); | 313 header.index_size = static_cast<uint32>(index_.size()); |
253 header.deltas_size = static_cast<uint32>(deltas_.size()); | 314 header.deltas_size = static_cast<uint32>(deltas_.size()); |
315 header.full_hashes_size = static_cast<uint32>(full_hashes_.size()); | |
254 | 316 |
255 // Sanity check that the 32-bit values never mess things up. | 317 // Sanity check that the 32-bit values never mess things up. |
256 if (static_cast<size_t>(header.index_size) != index_.size() || | 318 if (static_cast<size_t>(header.index_size) != index_.size() || |
257 static_cast<size_t>(header.deltas_size) != deltas_.size()) { | 319 static_cast<size_t>(header.deltas_size) != deltas_.size() || |
320 static_cast<size_t>(header.full_hashes_size) != full_hashes_.size()) { | |
258 NOTREACHED(); | 321 NOTREACHED(); |
259 return false; | 322 return false; |
260 } | 323 } |
261 | 324 |
262 base::ScopedFILE file(base::OpenFile(filter_name, "wb")); | 325 base::ScopedFILE file(base::OpenFile(filter_name, "wb")); |
263 if (!file.get()) | 326 if (!file.get()) |
264 return false; | 327 return false; |
265 | 328 |
266 base::MD5Context context; | 329 base::MD5Context context; |
267 base::MD5Init(&context); | 330 base::MD5Init(&context); |
(...skipping 25 matching lines...) Expand all Loading... | |
293 written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), | 356 written = fwrite(&(deltas_[0]), sizeof(deltas_[0]), deltas_.size(), |
294 file.get()); | 357 file.get()); |
295 if (written != deltas_.size()) | 358 if (written != deltas_.size()) |
296 return false; | 359 return false; |
297 base::MD5Update(&context, | 360 base::MD5Update(&context, |
298 base::StringPiece( | 361 base::StringPiece( |
299 reinterpret_cast<const char*>(&(deltas_[0])), | 362 reinterpret_cast<const char*>(&(deltas_[0])), |
300 deltas_bytes)); | 363 deltas_bytes)); |
301 } | 364 } |
302 | 365 |
366 if (full_hashes_.size()) { | |
367 const size_t elt_size = sizeof(full_hashes_[0]); | |
368 const size_t elts = full_hashes_.size(); | |
369 const size_t full_hashes_bytes = elt_size * elts; | |
370 written = fwrite(&(full_hashes_[0]), elt_size, elts, file.get()); | |
371 if (written != elts) | |
372 return false; | |
373 base::MD5Update(&context, | |
374 base::StringPiece( | |
375 reinterpret_cast<const char*>(&(full_hashes_[0])), | |
376 full_hashes_bytes)); | |
377 } | |
378 | |
303 base::MD5Digest digest; | 379 base::MD5Digest digest; |
304 base::MD5Final(&digest, &context); | 380 base::MD5Final(&digest, &context); |
305 written = fwrite(&digest, sizeof(digest), 1, file.get()); | 381 written = fwrite(&digest, sizeof(digest), 1, file.get()); |
306 if (written != 1) | 382 if (written != 1) |
307 return false; | 383 return false; |
308 | 384 |
309 // TODO(shess): Can this code check that the close was successful? | 385 // TODO(shess): Can this code check that the close was successful? |
310 file.reset(); | 386 file.reset(); |
311 | 387 |
312 return true; | 388 return true; |
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
407 } | 483 } |
408 buffer_.push_back(prefix); | 484 buffer_.push_back(prefix); |
409 | 485 |
410 // Flush buffer when a run can be constructed. +1 for the index item, and +1 | 486 // Flush buffer when a run can be constructed. +1 for the index item, and +1 |
411 // to leave at least one item in the buffer for dropping duplicates. | 487 // to leave at least one item in the buffer for dropping duplicates. |
412 if (buffer_.size() > PrefixSet::kMaxRun + 2) | 488 if (buffer_.size() > PrefixSet::kMaxRun + 2) |
413 EmitRun(); | 489 EmitRun(); |
414 } | 490 } |
415 | 491 |
416 } // namespace safe_browsing | 492 } // namespace safe_browsing |
OLD | NEW |