components/safe_browsing_db/v4_store.cc - Issue 2154973002: Reland: PVer4: Keep track of the smallest hashes not their sizes.

Side by Side Diff: components/safe_browsing_db/v4_store.cc

Issue 2154973002: Reland: PVer4: Keep track of the smallest hashes not their sizes. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Don't increment the iterator past the end. Check first. Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "base/base64.h"	5 #include "base/base64.h"

6 #include "base/bind.h"	6 #include "base/bind.h"

7 #include "base/files/file_util.h"	7 #include "base/files/file_util.h"

8 #include "base/metrics/histogram_macros.h"	8 #include "base/metrics/histogram_macros.h"

9 #include "base/metrics/sparse_histogram.h"	9 #include "base/metrics/sparse_histogram.h"

10 #include "base/strings/stringprintf.h"	10 #include "base/strings/stringprintf.h"

(...skipping 173 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
184 }	184 }

185 if (lumped_hashes.size() % prefix_size != 0) {	185 if (lumped_hashes.size() % prefix_size != 0) {

186 return ADDITIONS_SIZE_UNEXPECTED_FAILURE;	186 return ADDITIONS_SIZE_UNEXPECTED_FAILURE;

187 }	187 }

188 // TODO(vakh): Figure out a way to avoid the following copy operation.	188 // TODO(vakh): Figure out a way to avoid the following copy operation.

189 (*additions_map)[prefix_size] = lumped_hashes;	189 (*additions_map)[prefix_size] = lumped_hashes;

190 return APPLY_UPDATE_SUCCESS;	190 return APPLY_UPDATE_SUCCESS;

191 }	191 }

192	192

193 // static	193 // static

194 bool V4Store::GetNextSmallestPrefixSize(const HashPrefixMap& hash_prefix_map,	194 bool V4Store::GetNextSmallestUnmergedPrefix(

195 const CounterMap& counter_map,	195 const HashPrefixMap& hash_prefix_map,

196 PrefixSize* smallest_prefix_size) {	196 const IteratorMap& iterator_map,

197 HashPrefix smallest_prefix, current_prefix;	197 HashPrefix* smallest_hash_prefix) {

198 for (const auto& counter_pair : counter_map) {	198 HashPrefix current_hash_prefix;

199 PrefixSize prefix_size = counter_pair.first;	199 bool has_unmerged = false;

200 size_t index = counter_pair.second;

201 size_t sized_index = prefix_size * index;

202	200

	201 for (const auto& iterator_pair : iterator_map) {

	202 PrefixSize prefix_size = iterator_pair.first;

	203 HashPrefixes::const_iterator start = iterator_pair.second;

203 const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);	204 const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);

204 if (sized_index < hash_prefixes.size()) {	205 if (prefix_size <=

205 current_prefix = hash_prefixes.substr(sized_index, prefix_size);	206 static_cast<PrefixSize>(std::distance(start, hash_prefixes.end()))) {
	Nathan Parker 2016/07/18 16:14:21 What was wrong with the code before? What was wrong with the code before?
206 if (smallest_prefix.empty() \|\| smallest_prefix > current_prefix) {	207 current_hash_prefix = std::string(start, start + prefix_size);

207 smallest_prefix = current_prefix;	208 if (!has_unmerged \|\| *smallest_hash_prefix > current_hash_prefix) {

208 *smallest_prefix_size = prefix_size;	209 has_unmerged = true;

	210 smallest_hash_prefix->swap(current_hash_prefix);

209 }	211 }

210 }	212 }

211 }	213 }

212 return !smallest_prefix.empty();	214 return has_unmerged;

213 }	215 }

214	216

215 // static	217 // static

216 void V4Store::InitializeCounterMap(const HashPrefixMap& hash_prefix_map,	218 void V4Store::InitializeIteratorMap(const HashPrefixMap& hash_prefix_map,

217 CounterMap* counter_map) {	219 IteratorMap* iterator_map) {

218 for (const auto& map_pair : hash_prefix_map) {	220 for (const auto& map_pair : hash_prefix_map) {

219 (*counter_map)[map_pair.first] = 0;	221 (*iterator_map)[map_pair.first] = map_pair.second.begin();

220 }	222 }

221 }	223 }

222	224

223 // static	225 // static

224 HashPrefix V4Store::GetNextUnmergedPrefixForSize(

225 PrefixSize prefix_size,

226 const HashPrefixMap& hash_prefix_map,

227 const CounterMap& counter_map) {

228 const HashPrefixes& hash_prefixes = hash_prefix_map.at(prefix_size);

229 size_t index_within_list = counter_map.at(prefix_size);

230 size_t sized_index = prefix_size * index_within_list;

231 return hash_prefixes.substr(sized_index, prefix_size);

232 }

233

234 // static

235 void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map,	226 void V4Store::ReserveSpaceInPrefixMap(const HashPrefixMap& other_prefixes_map,

236 HashPrefixMap* prefix_map_to_update) {	227 HashPrefixMap* prefix_map_to_update) {

237 for (const auto& pair : other_prefixes_map) {	228 for (const auto& pair : other_prefixes_map) {

238 PrefixSize prefix_size = pair.first;	229 PrefixSize prefix_size = pair.first;

239 size_t prefix_length_to_add = pair.second.length();	230 size_t prefix_length_to_add = pair.second.length();

240	231

241 const HashPrefixes& existing_prefixes =	232 const HashPrefixes& existing_prefixes =

242 (*prefix_map_to_update)[prefix_size];	233 (*prefix_map_to_update)[prefix_size];

243 size_t existing_capacity = existing_prefixes.capacity();	234 size_t existing_capacity = existing_prefixes.capacity();

244	235

245 (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +	236 (*prefix_map_to_update)[prefix_size].reserve(existing_capacity +

246 prefix_length_to_add);	237 prefix_length_to_add);

247 }	238 }

248 }	239 }

249	240

250 ApplyUpdateResult V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,	241 ApplyUpdateResult V4Store::MergeUpdate(const HashPrefixMap& old_prefixes_map,

251 const HashPrefixMap& additions_map) {	242 const HashPrefixMap& additions_map) {

252 DCHECK(hash_prefix_map_.empty());	243 DCHECK(hash_prefix_map_.empty());

253 hash_prefix_map_.clear();	244 hash_prefix_map_.clear();

254 ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);	245 ReserveSpaceInPrefixMap(old_prefixes_map, &hash_prefix_map_);

255 ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);	246 ReserveSpaceInPrefixMap(additions_map, &hash_prefix_map_);

256	247

257 PrefixSize next_size_for_old;	248 IteratorMap old_iterator_map;

258 CounterMap old_counter_map;	249 HashPrefix next_smallest_prefix_old;

259 InitializeCounterMap(old_prefixes_map, &old_counter_map);	250 InitializeIteratorMap(old_prefixes_map, &old_iterator_map);

260 bool old_has_unmerged = GetNextSmallestPrefixSize(	251 bool old_has_unmerged = GetNextSmallestUnmergedPrefix(

261 old_prefixes_map, old_counter_map, &next_size_for_old);	252 old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);

262	253

263 PrefixSize next_size_for_additions;	254 IteratorMap additions_iterator_map;

264 CounterMap additions_counter_map;	255 HashPrefix next_smallest_prefix_additions;

265 InitializeCounterMap(additions_map, &additions_counter_map);	256 InitializeIteratorMap(additions_map, &additions_iterator_map);

266 bool additions_has_unmerged = GetNextSmallestPrefixSize(	257 bool additions_has_unmerged = GetNextSmallestUnmergedPrefix(

267 additions_map, additions_counter_map, &next_size_for_additions);	258 additions_map, additions_iterator_map, &next_smallest_prefix_additions);

268	259

269 // Classical merge sort.	260 // Classical merge sort.

270 // The two constructs to merge are maps: old_prefixes_map, additions_map.	261 // The two constructs to merge are maps: old_prefixes_map, additions_map.

271

272 HashPrefix next_smallest_prefix_old, next_smallest_prefix_additions;

273 // At least one of the maps still has elements that need to be merged into the	262 // At least one of the maps still has elements that need to be merged into the

274 // new store.	263 // new store.

275 while (old_has_unmerged \|\| additions_has_unmerged) {	264 while (old_has_unmerged \|\| additions_has_unmerged) {

276 // Old map still has elements that need to be merged.

277 if (old_has_unmerged) {

278 // Get the lexographically smallest hash prefix from the old store.

279 next_smallest_prefix_old = GetNextUnmergedPrefixForSize(

280 next_size_for_old, old_prefixes_map, old_counter_map);

281 }

282

283 // Additions map still has elements that need to be merged.

284 if (additions_has_unmerged) {

285 // Get the lexographically smallest hash prefix from the additions in the

286 // latest update from the server.

287 next_smallest_prefix_additions = GetNextUnmergedPrefixForSize(

288 next_size_for_additions, additions_map, additions_counter_map);

289 }

290

291 // If the same hash prefix appears in the existing store and the additions	265 // If the same hash prefix appears in the existing store and the additions

292 // list, something is clearly wrong. Discard the update.	266 // list, something is clearly wrong. Discard the update.

293 if (old_has_unmerged && additions_has_unmerged &&	267 if (old_has_unmerged && additions_has_unmerged &&

294 next_smallest_prefix_old == next_smallest_prefix_additions) {	268 next_smallest_prefix_old == next_smallest_prefix_additions) {

295 return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE;	269 return ADDITIONS_HAS_EXISTING_PREFIX_FAILURE;

296 }	270 }

297	271

298 // Select which map to pick the next hash prefix from to keep the result in	272 // Select which map to pick the next hash prefix from to keep the result in

299 // lexographically sorted order.	273 // lexographically sorted order.

300 bool pick_from_old =	274 bool pick_from_old =

301 old_has_unmerged &&	275 old_has_unmerged &&

302 (!additions_has_unmerged \|\|	276 (!additions_has_unmerged \|\|

303 (next_smallest_prefix_old < next_smallest_prefix_additions));	277 (next_smallest_prefix_old < next_smallest_prefix_additions));

304	278

	279 PrefixSize next_smallest_prefix_size;

305 if (pick_from_old) {	280 if (pick_from_old) {

306 hash_prefix_map_[next_size_for_old] += next_smallest_prefix_old;	281 next_smallest_prefix_size = next_smallest_prefix_old.size();

307	282

308 // Update the counter map, which means that we have merged one hash	283 // Append the smallest hash to the appropriate list.

	284 hash_prefix_map_[next_smallest_prefix_size] += next_smallest_prefix_old;

	285

	286 // Update the iterator map, which means that we have merged one hash

309 // prefix of size \|next_size_for_old\| from the old store.	287 // prefix of size \|next_size_for_old\| from the old store.

310 old_counter_map[next_size_for_old]++;	288 old_iterator_map[next_smallest_prefix_size] += next_smallest_prefix_size;

311	289

312 // Find the next smallest unmerged element in the old store's map.	290 // Find the next smallest unmerged element in the old store's map.

313 old_has_unmerged = GetNextSmallestPrefixSize(	291 old_has_unmerged = GetNextSmallestUnmergedPrefix(

314 old_prefixes_map, old_counter_map, &next_size_for_old);	292 old_prefixes_map, old_iterator_map, &next_smallest_prefix_old);

315 } else {	293 } else {

316 hash_prefix_map_[next_size_for_additions] +=	294 next_smallest_prefix_size = next_smallest_prefix_additions.size();

	295

	296 // Append the smallest hash to the appropriate list.

	297 hash_prefix_map_[next_smallest_prefix_size] +=

317 next_smallest_prefix_additions;	298 next_smallest_prefix_additions;

318	299

319 // Update the counter map, which means that we have merged one hash	300 // Update the iterator map, which means that we have merged one hash

320 // prefix of size \|next_size_for_additions\| from the update.	301 // prefix of size \|next_smallest_prefix_size\| from the update.

321 additions_counter_map[next_size_for_additions]++;	302 additions_iterator_map[next_smallest_prefix_size] +=

	303 next_smallest_prefix_size;

322	304

323 // Find the next smallest unmerged element in the additions map.	305 // Find the next smallest unmerged element in the additions map.

324 additions_has_unmerged = GetNextSmallestPrefixSize(	306 additions_has_unmerged =

325 additions_map, additions_counter_map, &next_size_for_additions);	307 GetNextSmallestUnmergedPrefix(additions_map, additions_iterator_map,

	308 &next_smallest_prefix_additions);

326 }	309 }

327 }	310 }

328	311

329 return APPLY_UPDATE_SUCCESS;	312 return APPLY_UPDATE_SUCCESS;

330 }	313 }

331	314

332 StoreReadResult V4Store::ReadFromDisk() {	315 StoreReadResult V4Store::ReadFromDisk() {

333 DCHECK(task_runner_->RunsTasksOnCurrentThread());	316 DCHECK(task_runner_->RunsTasksOnCurrentThread());

334	317

335 std::string contents;	318 std::string contents;

(...skipping 72 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
408	391

409 if (!base::Move(new_filename, store_path_)) {	392 if (!base::Move(new_filename, store_path_)) {

410 DVLOG(1) << "store_path_: " << store_path_.value();	393 DVLOG(1) << "store_path_: " << store_path_.value();

411 return UNABLE_TO_RENAME_FAILURE;	394 return UNABLE_TO_RENAME_FAILURE;

412 }	395 }

413	396

414 return WRITE_SUCCESS;	397 return WRITE_SUCCESS;

415 }	398 }

416	399

417 } // namespace safe_browsing	400 } // namespace safe_browsing

OLD	NEW

« no previous file with comments | « components/safe_browsing_db/v4_store.h ('k') | components/safe_browsing_db/v4_store_unittest.cc » ('j') | no next file with comments »