| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // A read-only set implementation for |SBPrefix| items. Prefixes are | 5 // A read-only set implementation for |SBPrefix| items. Prefixes are |
| 6 // sorted and stored as 16-bit deltas from the previous prefix. An | 6 // sorted and stored as 16-bit deltas from the previous prefix. An |
| 7 // index structure provides quick random access, and also handles | 7 // index structure provides quick random access, and also handles |
| 8 // cases where 16 bits cannot encode a delta. | 8 // cases where 16 bits cannot encode a delta. |
| 9 // | 9 // |
| 10 // For example, the sequence {20, 25, 41, 65432, 150000, 160000} would | 10 // For example, the sequence {20, 25, 41, 65432, 150000, 160000} would |
| (...skipping 19 matching lines...) Expand all Loading... |
| 30 // Experimenting with random selections of the above data, storage | 30 // Experimenting with random selections of the above data, storage |
| 31 // size drops almost linearly as prefix count drops, until the index | 31 // size drops almost linearly as prefix count drops, until the index |
| 32 // overhead starts to become a problem a bit under 200k prefixes. The | 32 // overhead starts to become a problem a bit under 200k prefixes. The |
| 33 // memory footprint gets worse than storing the raw prefix data around | 33 // memory footprint gets worse than storing the raw prefix data around |
| 34 // 75k prefixes. Fortunately, the actual memory footprint also falls. | 34 // 75k prefixes. Fortunately, the actual memory footprint also falls. |
| 35 // If the prefix count increases the memory footprint should increase | 35 // If the prefix count increases the memory footprint should increase |
| 36 // approximately linearly. The worst-case would be 2^16 items all | 36 // approximately linearly. The worst-case would be 2^16 items all |
| 37 // 2^16 apart, which would need 512k (versus 256k to store the raw | 37 // 2^16 apart, which would need 512k (versus 256k to store the raw |
| 38 // data). | 38 // data). |
| 39 // | 39 // |
| 40 // TODO(shess): Write serialization code. Something like this should | 40 // The on-disk format looks like: |
| 41 // work: | |
| 42 // 4 byte magic number | 41 // 4 byte magic number |
| 43 // 4 byte version number | 42 // 4 byte version number |
| 44 // 4 byte |index_.size()| | 43 // 4 byte |index_.size()| |
| 45 // 4 byte |deltas_.size()| | 44 // 4 byte |deltas_.size()| |
| 46 // n * 8 byte |&index_[0]..&index_[n]| | 45 // n * 8 byte |&index_[0]..&index_[n]| |
| 47 // m * 2 byte |&deltas_[0]..&deltas_[m]| | 46 // m * 2 byte |&deltas_[0]..&deltas_[m]| |
| 48 // 16 byte digest | 47 // 16 byte digest |
| 49 | 48 |
| 50 #ifndef CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ | 49 #ifndef CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ |
| 51 #define CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ | 50 #define CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ |
| 52 #pragma once | 51 #pragma once |
| 53 | 52 |
| 54 #include <vector> | 53 #include <vector> |
| 55 | 54 |
| 56 #include "chrome/browser/safe_browsing/safe_browsing_util.h" | 55 #include "chrome/browser/safe_browsing/safe_browsing_util.h" |
| 57 | 56 |
| 57 class FilePath; |
| 58 |
| 58 namespace safe_browsing { | 59 namespace safe_browsing { |
| 59 | 60 |
| 60 class PrefixSet { | 61 class PrefixSet { |
| 61 public: | 62 public: |
| 62 explicit PrefixSet(const std::vector<SBPrefix>& sorted_prefixes); | 63 explicit PrefixSet(const std::vector<SBPrefix>& sorted_prefixes); |
| 63 ~PrefixSet(); | 64 ~PrefixSet(); |
| 64 | 65 |
| 65 // |true| if |prefix| was in |prefixes| passed to the constructor. | 66 // |true| if |prefix| was in |prefixes| passed to the constructor. |
| 66 bool Exists(SBPrefix prefix) const; | 67 bool Exists(SBPrefix prefix) const; |
| 67 | 68 |
| 69 // Persist the set on disk. |
| 70 static PrefixSet* LoadFile(const FilePath& filter_name); |
| 71 bool WriteFile(const FilePath& filter_name) const; |
| 72 |
| 68 // Regenerate the vector of prefixes passed to the constructor into | 73 // Regenerate the vector of prefixes passed to the constructor into |
| 69 // |prefixes|. Prefixes will be added in sorted order. | 74 // |prefixes|. Prefixes will be added in sorted order. |
| 70 void GetPrefixes(std::vector<SBPrefix>* prefixes); | 75 void GetPrefixes(std::vector<SBPrefix>* prefixes); |
| 71 | 76 |
| 72 private: | 77 private: |
| 73 // Maximum delta that can be encoded in a 16-bit unsigned. | 78 // Maximum delta that can be encoded in a 16-bit unsigned. |
| 74 static const unsigned kMaxDelta = 256 * 256; | 79 static const unsigned kMaxDelta = 256 * 256; |
| 75 | 80 |
| 76 // Maximum number of consecutive deltas to encode before generating | 81 // Maximum number of consecutive deltas to encode before generating |
| 77 // a new index entry. This helps keep the worst-case performance | 82 // a new index entry. This helps keep the worst-case performance |
| 78 // for |Exists()| under control. | 83 // for |Exists()| under control. |
| 79 static const size_t kMaxRun = 100; | 84 static const size_t kMaxRun = 100; |
| 80 | 85 |
| 86 // Helper for |LoadFile()|. Steals the contents of |index| and |
| 87 // |deltas| using |swap()|. |
| 88 PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index, |
| 89 std::vector<uint16> *deltas); |
| 90 |
| 81 // Top-level index of prefix to offset in |deltas_|. Each pair | 91 // Top-level index of prefix to offset in |deltas_|. Each pair |
| 82 // indicates a base prefix and where the deltas from that prefix | 92 // indicates a base prefix and where the deltas from that prefix |
| 83 // begin in |deltas_|. The deltas for a pair end at the next pair's | 93 // begin in |deltas_|. The deltas for a pair end at the next pair's |
| 84 // index into |deltas_|. | 94 // index into |deltas_|. |
| 85 std::vector<std::pair<SBPrefix,size_t> > index_; | 95 std::vector<std::pair<SBPrefix,size_t> > index_; |
| 86 | 96 |
| 87 // Deltas which are added to the prefix in |index_| to generate | 97 // Deltas which are added to the prefix in |index_| to generate |
| 88 // prefixes. Deltas are only valid between consecutive items from | 98 // prefixes. Deltas are only valid between consecutive items from |
| 89 // |index_|, or the end of |deltas_| for the last |index_| pair. | 99 // |index_|, or the end of |deltas_| for the last |index_| pair. |
| 90 std::vector<uint16> deltas_; | 100 std::vector<uint16> deltas_; |
| 91 | 101 |
| 92 DISALLOW_COPY_AND_ASSIGN(PrefixSet); | 102 DISALLOW_COPY_AND_ASSIGN(PrefixSet); |
| 93 }; | 103 }; |
| 94 | 104 |
| 95 } // namespace safe_browsing | 105 } // namespace safe_browsing |
| 96 | 106 |
| 97 #endif // CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ | 107 #endif // CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ |
| OLD | NEW |