components/safe_browsing_db/v4_store.cc - Issue 2151953003: PVer4: Drop hash prefixes based on the removals field in the response

Side by Side Diff: components/safe_browsing_db/v4_store.cc

Issue 2151953003: PVer4: Drop hash prefixes based on the removals field in the response (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@02_iterators

Patch Set: Use const& Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/base64.h"	5 #include "base/base64.h"

6 #include "base/bind.h"	6 #include "base/bind.h"

7 #include "base/files/file_util.h"	7 #include "base/files/file_util.h"

8 #include "base/metrics/histogram_macros.h"	8 #include "base/metrics/histogram_macros.h"

9 #include "base/metrics/sparse_histogram.h"	9 #include "base/metrics/sparse_histogram.h"

10 #include "base/strings/stringprintf.h"	10 #include "base/strings/stringprintf.h"

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
47 }	47 }

48	48

49 // Returns the name of the temporary file used to buffer data for	49 // Returns the name of the temporary file used to buffer data for

50 // \|filename\|. Exported for unit tests.	50 // \|filename\|. Exported for unit tests.

51 const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {	51 const base::FilePath TemporaryFileForFilename(const base::FilePath& filename) {

52 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));	52 return base::FilePath(filename.value() + FILE_PATH_LITERAL("_new"));

53 }	53 }

54	54

55 } // namespace	55 } // namespace

56	56

	57 using ::google::protobuf::RepeatedField;

	58 using ::google::protobuf::RepeatedPtrField;

	59 using ::google::protobuf::int32;
	Nathan Parker 2016/07/15 16:49:38 seems like this might conflict with an existing in seems like this might conflict with an existing int32... not sure though. vakh (use Gerrit instead) 2016/07/18 18:43:17 Can't find any definitions: https://cs.chromium.or Show quoted text On 2016/07/15 16:49:38, Nathan Parker wrote: > seems like this might conflict with an existing int32... not sure though. Can't find any definitions: https://cs.chromium.org/search/?q=%5Cbint32%5Cb+f:cc$+case:yes&sq=package:chr...
	60

57 std::ostream& operator<<(std::ostream& os, const V4Store& store) {	61 std::ostream& operator<<(std::ostream& os, const V4Store& store) {

58 os << store.DebugString();	62 os << store.DebugString();

59 return os;	63 return os;

60 }	64 }

61	65

62 V4Store* V4StoreFactory::CreateV4Store(	66 V4Store* V4StoreFactory::CreateV4Store(

63 const scoped_refptr<base::SequencedTaskRunner>& task_runner,	67 const scoped_refptr<base::SequencedTaskRunner>& task_runner,

64 const base::FilePath& store_path) {	68 const base::FilePath& store_path) {

65 V4Store* new_store = new V4Store(task_runner, store_path);	69 V4Store* new_store = new V4Store(task_runner, store_path);

66 new_store->Initialize();	70 new_store->Initialize();

(...skipping 36 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
103 new V4Store(this->task_runner_, this->store_path_));	107 new V4Store(this->task_runner_, this->store_path_));

104 new_store->state_ = response->new_client_state();	108 new_store->state_ = response->new_client_state();

105 // TODO(vakh):	109 // TODO(vakh):

106 // 1. Merge the old store and the new update in new_store.	110 // 1. Merge the old store and the new update in new_store.

107 // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and	111 // 2. Create a ListUpdateResponse containing RICE encoded hash-prefixes and

108 // response_type as FULL_UPDATE, and write that to disk.	112 // response_type as FULL_UPDATE, and write that to disk.

109 // 3. Remove this if condition after completing 1. and 2.	113 // 3. Remove this if condition after completing 1. and 2.

110 HashPrefixMap hash_prefix_map;	114 HashPrefixMap hash_prefix_map;

111 ApplyUpdateResult apply_update_result;	115 ApplyUpdateResult apply_update_result;

112 if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {	116 if (response->response_type() == ListUpdateResponse::PARTIAL_UPDATE) {

	117 int removals_size = response->removals_size();
	Nathan Parker 2016/07/15 16:49:38 no need for the temp removals_size if you're not r no need for the temp removals_size if you're not reusing it. vakh (use Gerrit instead) 2016/07/18 18:43:17 Done. Show quoted text On 2016/07/15 16:49:38, Nathan Parker wrote: > no need for the temp removals_size if you're not reusing it. Done.
	118 DCHECK(removals_size <= 1);

	119 RepeatedField<int32> raw_removals;

113 for (const auto& removal : response->removals()) {	120 for (const auto& removal : response->removals()) {
	Nathan Parker 2016/07/15 16:49:38 You could simplify this since you expect 0 or 1 re You could simplify this since you expect 0 or 1 removals (i.e. you don't need a loop). DCHECK_LE(response->removals_size(), 1); const RepeatedField<int32> empty_removals; // this could be static, up top. const RepeatedField<int32>* raw_removals = &empty_removals; if (!response->removals_empty()) { // (dcheck that it's raw) raw_removals = &removals[0].raw_indices().raw_removals() } vakh (use Gerrit instead) 2016/07/18 18:43:17 Done. Show quoted text On 2016/07/15 16:49:38, Nathan Parker wrote: > You could simplify this since you expect 0 or 1 removals (i.e. you don't need a > loop). > > DCHECK_LE(response->removals_size(), 1); > const RepeatedField<int32> empty_removals; // this could be static, up top. > const RepeatedField<int32>* raw_removals = &empty_removals; > if (!response->removals_empty()) { > // (dcheck that it's raw) > raw_removals = &removals[0].raw_indices().raw_removals() > } Done.
114 // TODO(vakh): Allow other compression types.	121 // TODO(vakh): Allow other compression types.

115 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567	122 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567

116 DCHECK_EQ(RAW, removal.compression_type());	123 DCHECK_EQ(RAW, removal.compression_type());

	124 const RawIndices& raw_indices = removal.raw_indices();

	125 raw_removals = raw_indices.indices();
	Nathan Parker 2016/07/15 16:49:38 This does a copy This does a copy vakh (use Gerrit instead) 2016/07/18 18:43:17 Done. Show quoted text On 2016/07/15 16:49:38, Nathan Parker wrote: > This does a copy Done.
117 }	126 }

118	127

119 apply_update_result = UpdateHashPrefixMapFromAdditions(	128 apply_update_result = UpdateHashPrefixMapFromAdditions(

120 response->additions(), &hash_prefix_map);	129 response->additions(), &hash_prefix_map);

121 if (apply_update_result == APPLY_UPDATE_SUCCESS) {	130 if (apply_update_result == APPLY_UPDATE_SUCCESS) {

122 apply_update_result =	131 apply_update_result = new_store->MergeUpdate(

123 new_store->MergeUpdate(hash_prefix_map_, hash_prefix_map);	132 hash_prefix_map_, hash_prefix_map, raw_removals);

124 }	133 }

125 // TODO(vakh): Generate the updated ListUpdateResponse to write to disk.	134 // TODO(vakh): Generate the updated ListUpdateResponse to write to disk.

126 } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {	135 } else if (response->response_type() == ListUpdateResponse::FULL_UPDATE) {

127 apply_update_result = UpdateHashPrefixMapFromAdditions(	136 apply_update_result = UpdateHashPrefixMapFromAdditions(

128 response->additions(), &hash_prefix_map);	137 response->additions(), &hash_prefix_map);

129 if (apply_update_result == APPLY_UPDATE_SUCCESS) {	138 if (apply_update_result == APPLY_UPDATE_SUCCESS) {

130 new_store->hash_prefix_map_ = hash_prefix_map;	139 new_store->hash_prefix_map_ = hash_prefix_map;

131 RecordStoreWriteResult(new_store->WriteToDisk(std::move(response)));	140 RecordStoreWriteResult(new_store->WriteToDisk(std::move(response)));

132 }	141 }

133 } else {	142 } else {

134 apply_update_result = UNEXPECTED_RESPONSE_TYPE_FAILURE;	143 apply_update_result = UNEXPECTED_RESPONSE_TYPE_FAILURE;

135 NOTREACHED() << "Unexpected response type: " << response->response_type();	144 NOTREACHED() << "Unexpected response type: " << response->response_type();

136 }	145 }

137	146

138 if (apply_update_result == APPLY_UPDATE_SUCCESS) {	147 if (apply_update_result == APPLY_UPDATE_SUCCESS) {

139 // new_store is done updating, pass it to the callback.	148 // new_store is done updating, pass it to the callback.

140 callback_task_runner->PostTask(	149 callback_task_runner->PostTask(

141 FROM_HERE, base::Bind(callback, base::Passed(&new_store)));	150 FROM_HERE, base::Bind(callback, base::Passed(&new_store)));

142 } else {	151 } else {

143 // new_store failed updating. Pass a nullptr to the callback.	152 // new_store failed updating. Pass a nullptr to the callback.

144 callback_task_runner->PostTask(FROM_HERE, base::Bind(callback, nullptr));	153 callback_task_runner->PostTask(FROM_HERE, base::Bind(callback, nullptr));

145 }	154 }

146	155

147 RecordApplyUpdateResult(apply_update_result);	156 RecordApplyUpdateResult(apply_update_result);

148 }	157 }

149	158

150 // static	159 // static

151 ApplyUpdateResult V4Store::UpdateHashPrefixMapFromAdditions(	160 ApplyUpdateResult V4Store::UpdateHashPrefixMapFromAdditions(

152 const ::google::protobuf::RepeatedPtrField<ThreatEntrySet>& additions,	161 const RepeatedPtrField<ThreatEntrySet>& additions,

153 HashPrefixMap* additions_map) {	162 HashPrefixMap* additions_map) {

154 for (const auto& addition : additions) {	163 for (const auto& addition : additions) {

155 // TODO(vakh): Allow other compression types.	164 // TODO(vakh): Allow other compression types.

156 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567	165 // See: https://bugs.chromium.org/p/chromium/issues/detail?id=624567

157 DCHECK_EQ(RAW, addition.compression_type());	166 DCHECK_EQ(RAW, addition.compression_type());

158	167

159 DCHECK(addition.has_raw_hashes());	168 DCHECK(addition.has_raw_hashes());

160 DCHECK(addition.raw_hashes().has_raw_hashes());	169 DCHECK(addition.raw_hashes().has_raw_hashes());

161	170

162 PrefixSize prefix_size = addition.raw_hashes().prefix_size();	171 PrefixSize prefix_size = addition.raw_hashes().prefix_size();

(...skipping 69 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
232	241

233 const HashPrefixes& existing_prefixes =	242 const HashPrefixes& existing_prefixes =

234 (*prefix_map_to_update)[prefix_size];	243 (*prefix_map_to_update)[prefix_size];

235 size_t existing_capacity = existing_prefixes.capacity();	244 size_t existing_capacity = existing_prefixes.capacity();

236	245

237 (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +	246 (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +

238 prefix_length_to_add);	247 prefix_length_to_add);

239 }	248 }

240 }	249 }

241	250

242 ApplyUpdateResult V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,	251 ApplyUpdateResult V4Store::MergeUpdate(

243 const HashPrefixMap& additions_map) {	252 const HashPrefixMap& old_prefixes_map,

	253 const HashPrefixMap& additions_map,

	254 const RepeatedField<int32>& raw_removals) {

	255 const int* removals_iter = raw_removals.begin();
	Nathan Parker 2016/07/15 16:49:38 Put this down by total_picked_from_old, since you Put this down by total_picked_from_old, since you don't need it before then. vakh (use Gerrit instead) 2016/07/18 18:43:17 Done. Show quoted text On 2016/07/15 16:49:38, Nathan Parker wrote: > Put this down by total_picked_from_old, since you don't need it before then. Done.
	256

244 DCHECK(hash_prefix_map_.empty());	257 DCHECK(hash_prefix_map_.empty());

245 hash_prefix_map_.clear();	258 hash_prefix_map_.clear();

246 ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);	259 ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);

247 ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);	260 ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);

248	261

249 IteratorMap old_iterator_map;	262 IteratorMap old_iterator_map;

250 HashPrefix next_smallest_prefix_old;	263 HashPrefix next_smallest_prefix_old;

251 InitializeIteratorMap(old_prefixes_map, &old_iterator_map);	264 InitializeIteratorMap(old_prefixes_map, &old_iterator_map);

252 bool old_has_unmerged = GetNextSmallestUnmergedPrefix(	265 bool old_has_unmerged = GetNextSmallestUnmergedPrefix(

253 old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);	266 old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);

254	267

255 IteratorMap additions_iterator_map;	268 IteratorMap additions_iterator_map;

256 HashPrefix next_smallest_prefix_additions;	269 HashPrefix next_smallest_prefix_additions;

257 InitializeIteratorMap(additions_map, &additions_iterator_map);	270 InitializeIteratorMap(additions_map, &additions_iterator_map);

258 bool additions_has_unmerged = GetNextSmallestUnmergedPrefix(	271 bool additions_has_unmerged = GetNextSmallestUnmergedPrefix(

259 additions_map, additions_iterator_map, &next_smallest_prefix_additions);	272 additions_map, additions_iterator_map, &next_smallest_prefix_additions);

260	273

261 // Classical merge sort.	274 // Classical merge sort.

262 // The two constructs to merge are maps: old_prefixes_map, additions_map.	275 // The two constructs to merge are maps: old_prefixes_map, additions_map.

263 // At least one of the maps still has elements that need to be merged into the	276 // At least one of the maps still has elements that need to be merged into the

264 // new store.	277 // new store.

	278

	279 // Keep track of the number of elements picked from the old map. This is used

	280 // to determine which elements to drop based on the raw_removals. Note that

	281 // picked is not the same as merged. A picked element isn't merged if its

	282 // index is on the raw_removals list.

	283 int total_picked_from_old = 0;

265 while (old_has_unmerged \|\| additions_has_unmerged) {	284 while (old_has_unmerged \|\| additions_has_unmerged) {

266 // If the same hash prefix appears in the existing store and the additions	285 // If the same hash prefix appears in the existing store and the additions

267 // list, something is clearly wrong. Discard the update.	286 // list, something is clearly wrong. Discard the update.

268 if (old_has_unmerged && additions_has_unmerged &&	287 if (old_has_unmerged && additions_has_unmerged &&

269 next_smallest_prefix_old == next_smallest_prefix_additions) {	288 next_smallest_prefix_old == next_smallest_prefix_additions) {

270 return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE;	289 return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE;

271 }	290 }

272	291

273 // Select which map to pick the next hash prefix from to keep the result in	292 // Select which map to pick the next hash prefix from to keep the result in

274 // lexographically sorted order.	293 // lexographically sorted order.

275 bool pick_from_old =	294 bool pick_from_old =

276 old_has_unmerged &&	295 old_has_unmerged &&

277 (!additions_has_unmerged \|\|	296 (!additions_has_unmerged \|\|

278 (next_smallest_prefix_old < next_smallest_prefix_additions));	297 (next_smallest_prefix_old < next_smallest_prefix_additions));

279	298

280 PrefixSize next_smallest_prefix_size;	299 PrefixSize next_smallest_prefix_size;

281 if (pick_from_old) {	300 if (pick_from_old) {

282 next_smallest_prefix_size = next_smallest_prefix_old.size();	301 next_smallest_prefix_size = next_smallest_prefix_old.size();

283	302

284 // Append the smallest hash to the appropriate list.

285 hash_prefix_map_[next_smallest_prefix_size] += next_smallest_prefix_old;

286

287 // Update the iterator map, which means that we have merged one hash	303 // Update the iterator map, which means that we have merged one hash

288 // prefix of size \|next_size_for_old\| from the old store.	304 // prefix of size \|next_size_for_old\| from the old store.

289 old_iterator_map[next_smallest_prefix_size] += next_smallest_prefix_size;	305 old_iterator_map[next_smallest_prefix_size] += next_smallest_prefix_size;

290	306

	307 if (removals_iter == raw_removals.end() \|\|

	308 *removals_iter != total_picked_from_old) {

	309 // Append the smallest hash to the appropriate list.

	310 hash_prefix_map_[next_smallest_prefix_size] += next_smallest_prefix_old;

	311 } else {

	312 // Element not added to new map. Move the removals iterator forward.

	313 removals_iter++;

	314 }

	315

	316 total_picked_from_old++;

	317

291 // Find the next smallest unmerged element in the old store's map.	318 // Find the next smallest unmerged element in the old store's map.

292 old_has_unmerged = GetNextSmallestUnmergedPrefix(	319 old_has_unmerged = GetNextSmallestUnmergedPrefix(

293 old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);	320 old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);

294 } else {	321 } else {

295 next_smallest_prefix_size = next_smallest_prefix_additions.size();	322 next_smallest_prefix_size = next_smallest_prefix_additions.size();

296	323

297 // Append the smallest hash to the appropriate list.	324 // Append the smallest hash to the appropriate list.

298 hash_prefix_map_[next_smallest_prefix_size] +=	325 hash_prefix_map_[next_smallest_prefix_size] +=

299 next_smallest_prefix_additions;	326 next_smallest_prefix_additions;

300	327

301 // Update the iterator map, which means that we have merged one hash	328 // Update the iterator map, which means that we have merged one hash

302 // prefix of size \|next_smallest_prefix_size\| from the update.	329 // prefix of size \|next_smallest_prefix_size\| from the update.

303 additions_iterator_map[next_smallest_prefix_size] +=	330 additions_iterator_map[next_smallest_prefix_size] +=

304 next_smallest_prefix_size;	331 next_smallest_prefix_size;

305	332

306 // Find the next smallest unmerged element in the additions map.	333 // Find the next smallest unmerged element in the additions map.

307 additions_has_unmerged =	334 additions_has_unmerged =

308 GetNextSmallestUnmergedPrefix(additions_map, additions_iterator_map,	335 GetNextSmallestUnmergedPrefix(additions_map, additions_iterator_map,

309 &next_smallest_prefix_additions);	336 &next_smallest_prefix_additions);

310 }	337 }

311 }	338 }

312	339

313 return APPLY_UPDATE_SUCCESS;	340 return (removals_iter == raw_removals.end()) ? APPLY_UPDATE_SUCCESS

	341 : REMOVALS_INDEX_TOO_LARGE;

314 }	342 }

315	343

316 StoreReadResult V4Store::ReadFromDisk() {	344 StoreReadResult V4Store::ReadFromDisk() {

317 DCHECK(task_runner_->RunsTasksOnCurrentThread());	345 DCHECK(task_runner_->RunsTasksOnCurrentThread());

318	346

319 std::string contents;	347 std::string contents;

320 bool read_success = base::ReadFileToString(store_path_, &contents);	348 bool read_success = base::ReadFileToString(store_path_, &contents);

321 if (!read_success) {	349 if (!read_success) {

322 return FILE_UNREADABLE_FAILURE;	350 return FILE_UNREADABLE_FAILURE;

323 }	351 }

(...skipping 68 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
392	420

393 if (!base::Move(new_filename, store_path_)) {	421 if (!base::Move(new_filename, store_path_)) {

394 DVLOG(1) << "store_path_: " << store_path_.value();	422 DVLOG(1) << "store_path_: " << store_path_.value();

395 return UNABLE_TO_RENAME_FAILURE;	423 return UNABLE_TO_RENAME_FAILURE;

396 }	424 }

397	425

398 return WRITE_SUCCESS;	426 return WRITE_SUCCESS;

399 }	427 }

400	428

401 } // namespace safe_browsing	429 } // namespace safe_browsing

OLD	NEW

« no previous file with comments | « components/safe_browsing_db/v4_store.h ('k') | components/safe_browsing_db/v4_store_unittest.cc » ('j') | no next file with comments »