OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/safe_browsing/safe_browsing_store.h" | 5 #include "chrome/browser/safe_browsing/safe_browsing_store.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/metrics/histogram.h" |
10 | 11 |
11 namespace { | 12 namespace { |
12 | 13 |
13 // Return |true| if the range is sorted by the given comparator. | 14 // Return |true| if the range is sorted by the given comparator. |
14 template <typename CTI, typename LESS> | 15 template <typename CTI, typename LESS> |
15 bool sorted(CTI beg, CTI end, LESS less) { | 16 bool sorted(CTI beg, CTI end, LESS less) { |
16 while ((end - beg) > 2) { | 17 while ((end - beg) > 2) { |
17 CTI n = beg++; | 18 CTI n = beg++; |
18 if (less(*beg, *n)) | 19 if (less(*beg, *n)) |
19 return false; | 20 return false; |
(...skipping 61 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
81 for (typename ItemsT::iterator iter = end_iter; | 82 for (typename ItemsT::iterator iter = end_iter; |
82 iter != items->end(); ++iter) { | 83 iter != items->end(); ++iter) { |
83 if (del_set.count(iter->chunk_id) == 0) { | 84 if (del_set.count(iter->chunk_id) == 0) { |
84 *end_iter = *iter; | 85 *end_iter = *iter; |
85 ++end_iter; | 86 ++end_iter; |
86 } | 87 } |
87 } | 88 } |
88 items->erase(end_iter, items->end()); | 89 items->erase(end_iter, items->end()); |
89 } | 90 } |
90 | 91 |
| 92 // Remove prefixes which are in the same chunk as their fullhash. This was a |
| 93 // mistake in earlier implementations. |
| 94 template <typename HashesT, typename PrefixesT> |
| 95 size_t KnockoutPrefixVolunteers(const HashesT& full_hashes, |
| 96 PrefixesT* prefixes) { |
| 97 typename PrefixesT::iterator prefixes_process = prefixes->begin(); |
| 98 typename PrefixesT::iterator prefixes_out = prefixes->begin(); |
| 99 typename HashesT::const_iterator hashes_process = full_hashes.begin(); |
| 100 |
| 101 size_t skipped_count = 0; |
| 102 |
| 103 while (hashes_process != full_hashes.end()) { |
| 104 // Scan prefixes forward until an item is not less than the current hash. |
| 105 while (prefixes_process != prefixes->end() && |
| 106 SBAddPrefixLess(*prefixes_process, *hashes_process)) { |
| 107 if (prefixes_process != prefixes_out) { |
| 108 *prefixes_out = *prefixes_process; |
| 109 } |
| 110 prefixes_out++; |
| 111 prefixes_process++; |
| 112 } |
| 113 |
| 114 // If the current hash is also not less than the prefix, that implies they |
| 115 // are equal. Skip the prefix. |
| 116 if (prefixes_process != prefixes->end() && |
| 117 !SBAddPrefixLess(*hashes_process, *prefixes_process)) { |
| 118 skipped_count++; |
| 119 prefixes_process++; |
| 120 } |
| 121 |
| 122 hashes_process++; |
| 123 } |
| 124 |
| 125 // If any prefixes were skipped, copy over the tail and erase the excess. |
| 126 if (prefixes_process != prefixes_out) { |
| 127 prefixes_out = std::copy(prefixes_process, prefixes->end(), prefixes_out); |
| 128 prefixes->erase(prefixes_out, prefixes->end()); |
| 129 } |
| 130 |
| 131 return skipped_count; |
| 132 } |
| 133 |
91 } // namespace | 134 } // namespace |
92 | 135 |
93 void SBProcessSubs(SBAddPrefixes* add_prefixes, | 136 void SBProcessSubs(SBAddPrefixes* add_prefixes, |
94 SBSubPrefixes* sub_prefixes, | 137 SBSubPrefixes* sub_prefixes, |
95 std::vector<SBAddFullHash>* add_full_hashes, | 138 std::vector<SBAddFullHash>* add_full_hashes, |
96 std::vector<SBSubFullHash>* sub_full_hashes, | 139 std::vector<SBSubFullHash>* sub_full_hashes, |
97 const base::hash_set<int32>& add_chunks_deleted, | 140 const base::hash_set<int32>& add_chunks_deleted, |
98 const base::hash_set<int32>& sub_chunks_deleted) { | 141 const base::hash_set<int32>& sub_chunks_deleted) { |
99 // It is possible to structure templates and template | 142 // It is possible to structure templates and template |
100 // specializations such that the following calls work without having | 143 // specializations such that the following calls work without having |
101 // to qualify things. It becomes very arbitrary, though, and less | 144 // to qualify things. It becomes very arbitrary, though, and less |
102 // clear how things are working. | 145 // clear how things are working. |
103 | 146 |
104 // Make sure things are sorted appropriately. | 147 // Make sure things are sorted appropriately. |
105 DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(), | 148 DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(), |
106 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>)); | 149 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>)); |
107 DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(), | 150 DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(), |
108 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>)); | 151 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>)); |
109 DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(), | 152 DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(), |
110 SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>)); | 153 SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>)); |
111 DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(), | 154 DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(), |
112 SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>)); | 155 SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>)); |
113 | 156 |
| 157 // Earlier database code added prefixes when it saw fullhashes. The protocol |
| 158 // should never send a chunk of mixed prefixes and fullhashes, the following |
| 159 // removes any such cases which are seen. |
| 160 // TODO(shess): Remove this code once most databases have been processed. |
| 161 // Chunk churn should clean up anyone left over. This only takes a few ms to |
| 162 // run through my current database, so it doesn't seem worthwhile to do much |
| 163 // more than that. |
| 164 size_t skipped = KnockoutPrefixVolunteers(*add_full_hashes, add_prefixes); |
| 165 skipped += KnockoutPrefixVolunteers(*sub_full_hashes, sub_prefixes); |
| 166 UMA_HISTOGRAM_COUNTS("SB2.VolunteerPrefixesRemoved", skipped); |
| 167 |
114 // Factor out the prefix subs. | 168 // Factor out the prefix subs. |
115 KnockoutSubs(sub_prefixes, add_prefixes, | 169 KnockoutSubs(sub_prefixes, add_prefixes, |
116 SBAddPrefixLess<SBAddPrefix,SBSubPrefix>, | 170 SBAddPrefixLess<SBAddPrefix,SBSubPrefix>, |
117 SBAddPrefixLess<SBSubPrefix,SBAddPrefix>); | 171 SBAddPrefixLess<SBSubPrefix,SBAddPrefix>); |
118 | 172 |
119 // Factor out the full-hash subs. | 173 // Factor out the full-hash subs. |
120 KnockoutSubs(sub_full_hashes, add_full_hashes, | 174 KnockoutSubs(sub_full_hashes, add_full_hashes, |
121 SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>, | 175 SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>, |
122 SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>); | 176 SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>); |
123 | 177 |
124 // Remove items from the deleted chunks. This is done after other | 178 // Remove items from the deleted chunks. This is done after other |
125 // processing to allow subs to knock out adds (and be removed) even | 179 // processing to allow subs to knock out adds (and be removed) even |
126 // if the add's chunk is deleted. | 180 // if the add's chunk is deleted. |
127 RemoveDeleted(add_prefixes, add_chunks_deleted); | 181 RemoveDeleted(add_prefixes, add_chunks_deleted); |
128 RemoveDeleted(sub_prefixes, sub_chunks_deleted); | 182 RemoveDeleted(sub_prefixes, sub_chunks_deleted); |
129 RemoveDeleted(add_full_hashes, add_chunks_deleted); | 183 RemoveDeleted(add_full_hashes, add_chunks_deleted); |
130 RemoveDeleted(sub_full_hashes, sub_chunks_deleted); | 184 RemoveDeleted(sub_full_hashes, sub_chunks_deleted); |
131 } | 185 } |
OLD | NEW |