OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/safe_browsing/safe_browsing_store.h" | 5 #include "chrome/browser/safe_browsing/safe_browsing_store.h" |
6 | 6 |
7 #include <algorithm> | 7 #include <algorithm> |
8 | 8 |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/metrics/histogram.h" |
10 | 11 |
11 namespace { | 12 namespace { |
12 | 13 |
13 // Return |true| if the range is sorted by the given comparator. | 14 // Return |true| if the range is sorted by the given comparator. |
14 template <typename CTI, typename LESS> | 15 template <typename CTI, typename LESS> |
15 bool sorted(CTI beg, CTI end, LESS less) { | 16 bool sorted(CTI beg, CTI end, LESS less) { |
16 while ((end - beg) > 2) { | 17 while ((end - beg) > 2) { |
17 CTI n = beg++; | 18 CTI n = beg++; |
18 if (less(*beg, *n)) | 19 if (less(*beg, *n)) |
19 return false; | 20 return false; |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
124 for (typename ItemsT::iterator iter = end_iter; | 125 for (typename ItemsT::iterator iter = end_iter; |
125 iter != items->end(); ++iter) { | 126 iter != items->end(); ++iter) { |
126 if (del_set.count(iter->chunk_id) == 0) { | 127 if (del_set.count(iter->chunk_id) == 0) { |
127 *end_iter = *iter; | 128 *end_iter = *iter; |
128 ++end_iter; | 129 ++end_iter; |
129 } | 130 } |
130 } | 131 } |
131 items->erase(end_iter, items->end()); | 132 items->erase(end_iter, items->end()); |
132 } | 133 } |
133 | 134 |
| 135 // Remove prefixes which are in the same chunk as their fullhash. This was a |
| 136 // mistake in earlier implementations. |
| 137 template <typename HashesT, typename PrefixesT> |
| 138 size_t KnockoutPrefixVolunteers(const HashesT& full_hashes, |
| 139 PrefixesT* prefixes) { |
| 140 typename PrefixesT::iterator prefixes_process = prefixes->begin(); |
| 141 typename PrefixesT::iterator prefixes_out = prefixes->begin(); |
| 142 typename HashesT::const_iterator hashes_process = full_hashes.begin(); |
| 143 |
| 144 size_t skipped_count = 0; |
| 145 |
| 146 while (hashes_process != full_hashes.end()) { |
| 147 // Scan prefixes forward until an item is not less than the current hash. |
| 148 while (prefixes_process != prefixes->end() && |
| 149 SBAddPrefixLess(*prefixes_process, *hashes_process)) { |
| 150 if (prefixes_process != prefixes_out) { |
| 151 *prefixes_out = *prefixes_process; |
| 152 } |
| 153 prefixes_out++; |
| 154 prefixes_process++; |
| 155 } |
| 156 |
| 157 // If the current hash is also not less than the prefix, that implies they |
| 158 // are equal. Skip the prefix. |
| 159 if (prefixes_process != prefixes->end() && |
| 160 !SBAddPrefixLess(*hashes_process, *prefixes_process)) { |
| 161 skipped_count++; |
| 162 prefixes_process++; |
| 163 } |
| 164 |
| 165 hashes_process++; |
| 166 } |
| 167 |
| 168 // If any prefixes have been dropped, shift the last batch over and remove the |
| 169 // trailing elements. |
| 170 if (prefixes_process != prefixes_out) { |
| 171 prefixes_out = std::copy(prefixes_process, prefixes->end(), prefixes_out); |
| 172 prefixes->erase(prefixes_out, prefixes_process); |
| 173 } |
| 174 |
| 175 return skipped_count; |
| 176 } |
| 177 |
134 } // namespace | 178 } // namespace |
135 | 179 |
136 void SBProcessSubs(SBAddPrefixes* add_prefixes, | 180 void SBProcessSubs(SBAddPrefixes* add_prefixes, |
137 SBSubPrefixes* sub_prefixes, | 181 SBSubPrefixes* sub_prefixes, |
138 std::vector<SBAddFullHash>* add_full_hashes, | 182 std::vector<SBAddFullHash>* add_full_hashes, |
139 std::vector<SBSubFullHash>* sub_full_hashes, | 183 std::vector<SBSubFullHash>* sub_full_hashes, |
140 const base::hash_set<int32>& add_chunks_deleted, | 184 const base::hash_set<int32>& add_chunks_deleted, |
141 const base::hash_set<int32>& sub_chunks_deleted) { | 185 const base::hash_set<int32>& sub_chunks_deleted) { |
142 // It is possible to structure templates and template | 186 // It is possible to structure templates and template |
143 // specializations such that the following calls work without having | 187 // specializations such that the following calls work without having |
144 // to qualify things. It becomes very arbitrary, though, and less | 188 // to qualify things. It becomes very arbitrary, though, and less |
145 // clear how things are working. | 189 // clear how things are working. |
146 | 190 |
147 // Make sure things are sorted appropriately. | 191 // Make sure things are sorted appropriately. |
148 DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(), | 192 DCHECK(sorted(add_prefixes->begin(), add_prefixes->end(), |
149 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>)); | 193 SBAddPrefixLess<SBAddPrefix,SBAddPrefix>)); |
150 DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(), | 194 DCHECK(sorted(sub_prefixes->begin(), sub_prefixes->end(), |
151 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>)); | 195 SBAddPrefixLess<SBSubPrefix,SBSubPrefix>)); |
152 DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(), | 196 DCHECK(sorted(add_full_hashes->begin(), add_full_hashes->end(), |
153 SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>)); | 197 SBAddPrefixHashLess<SBAddFullHash,SBAddFullHash>)); |
154 DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(), | 198 DCHECK(sorted(sub_full_hashes->begin(), sub_full_hashes->end(), |
155 SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>)); | 199 SBAddPrefixHashLess<SBSubFullHash,SBSubFullHash>)); |
156 | 200 |
| 201 // Earlier database code added prefixes when it saw fullhashes. The protocol |
| 202 // should never send a chunk of mixed prefixes and fullhashes, the following |
| 203 // removes any such cases which are seen. |
| 204 // TODO(shess): Remove this code once most databases have been processed. |
| 205 // Chunk churn should clean up anyone left over. This only takes a few ms to |
| 206 // run through my current database, so it doesn't seem worthwhile to do much |
| 207 // more than that. |
| 208 size_t skipped = KnockoutPrefixVolunteers(*add_full_hashes, add_prefixes); |
| 209 skipped += KnockoutPrefixVolunteers(*sub_full_hashes, sub_prefixes); |
| 210 UMA_HISTOGRAM_COUNTS("SB2.VolunteerPrefixesRemoved", skipped); |
| 211 |
157 // Factor out the prefix subs. | 212 // Factor out the prefix subs. |
158 SBAddPrefixes removed_adds; | 213 SBAddPrefixes removed_adds; |
159 KnockoutSubs(sub_prefixes, add_prefixes, | 214 KnockoutSubs(sub_prefixes, add_prefixes, |
160 SBAddPrefixLess<SBAddPrefix,SBSubPrefix>, | 215 SBAddPrefixLess<SBAddPrefix,SBSubPrefix>, |
161 SBAddPrefixLess<SBSubPrefix,SBAddPrefix>, | 216 SBAddPrefixLess<SBSubPrefix,SBAddPrefix>, |
162 &removed_adds); | 217 &removed_adds); |
163 | 218 |
164 // Remove the full-hashes corrosponding to the adds which | 219 // Remove the full-hashes corrosponding to the adds which |
165 // KnockoutSubs() removed. Processing these w/in KnockoutSubs() | 220 // KnockoutSubs() removed. Processing these w/in KnockoutSubs() |
166 // would make the code more complicated, and they are very small | 221 // would make the code more complicated, and they are very small |
167 // relative to the prefix lists so the gain would be modest. | 222 // relative to the prefix lists so the gain would be modest. |
168 RemoveMatchingPrefixes(removed_adds, add_full_hashes); | 223 RemoveMatchingPrefixes(removed_adds, add_full_hashes); |
169 RemoveMatchingPrefixes(removed_adds, sub_full_hashes); | 224 RemoveMatchingPrefixes(removed_adds, sub_full_hashes); |
170 | 225 |
171 // Factor out the full-hash subs. | 226 // Factor out the full-hash subs. |
172 std::vector<SBAddFullHash> removed_full_adds; | 227 std::vector<SBAddFullHash> removed_full_adds; |
173 KnockoutSubs(sub_full_hashes, add_full_hashes, | 228 KnockoutSubs(sub_full_hashes, add_full_hashes, |
174 SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>, | 229 SBAddPrefixHashLess<SBAddFullHash,SBSubFullHash>, |
175 SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>, | 230 SBAddPrefixHashLess<SBSubFullHash,SBAddFullHash>, |
176 &removed_full_adds); | 231 &removed_full_adds); |
177 | 232 |
178 // Remove items from the deleted chunks. This is done after other | 233 // Remove items from the deleted chunks. This is done after other |
179 // processing to allow subs to knock out adds (and be removed) even | 234 // processing to allow subs to knock out adds (and be removed) even |
180 // if the add's chunk is deleted. | 235 // if the add's chunk is deleted. |
181 RemoveDeleted(add_prefixes, add_chunks_deleted); | 236 RemoveDeleted(add_prefixes, add_chunks_deleted); |
182 RemoveDeleted(sub_prefixes, sub_chunks_deleted); | 237 RemoveDeleted(sub_prefixes, sub_chunks_deleted); |
183 RemoveDeleted(add_full_hashes, add_chunks_deleted); | 238 RemoveDeleted(add_full_hashes, add_chunks_deleted); |
184 RemoveDeleted(sub_full_hashes, sub_chunks_deleted); | 239 RemoveDeleted(sub_full_hashes, sub_chunks_deleted); |
185 } | 240 } |
OLD | NEW |