OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // A read-only set implementation for |SBPrefix| items. Prefixes are | 5 // A read-only set implementation for |SBPrefix| items. Prefixes are |
6 // sorted and stored as 16-bit deltas from the previous prefix. An | 6 // sorted and stored as 16-bit deltas from the previous prefix. An |
7 // index structure provides quick random access, and also handles | 7 // index structure provides quick random access, and also handles |
8 // cases where 16 bits cannot encode a delta. | 8 // cases where 16 bits cannot encode a delta. |
9 // | 9 // |
10 // For example, the sequence {20, 25, 41, 65432, 150000, 160000} would | 10 // For example, the sequence {20, 25, 41, 65432, 150000, 160000} would |
(...skipping 19 matching lines...) Expand all Loading... |
30 // Experimenting with random selections of the above data, storage | 30 // Experimenting with random selections of the above data, storage |
31 // size drops almost linearly as prefix count drops, until the index | 31 // size drops almost linearly as prefix count drops, until the index |
32 // overhead starts to become a problem a bit under 200k prefixes. The | 32 // overhead starts to become a problem a bit under 200k prefixes. The |
33 // memory footprint gets worse than storing the raw prefix data around | 33 // memory footprint gets worse than storing the raw prefix data around |
34 // 75k prefixes. Fortunately, the actual memory footprint also falls. | 34 // 75k prefixes. Fortunately, the actual memory footprint also falls. |
35 // If the prefix count increases the memory footprint should increase | 35 // If the prefix count increases the memory footprint should increase |
36 // approximately linearly. The worst-case would be 2^16 items all | 36 // approximately linearly. The worst-case would be 2^16 items all |
37 // 2^16 apart, which would need 512k (versus 256k to store the raw | 37 // 2^16 apart, which would need 512k (versus 256k to store the raw |
38 // data). | 38 // data). |
39 // | 39 // |
40 // TODO(shess): Write serialization code. Something like this should | 40 // The on-disk format looks like: |
41 // work: | |
42 // 4 byte magic number | 41 // 4 byte magic number |
43 // 4 byte version number | 42 // 4 byte version number |
44 // 4 byte |index_.size()| | 43 // 4 byte |index_.size()| |
45 // 4 byte |deltas_.size()| | 44 // 4 byte |deltas_.size()| |
46 // n * 8 byte |&index_[0]..&index_[n]| | 45 // n * 8 byte |&index_[0]..&index_[n]| |
47 // m * 2 byte |&deltas_[0]..&deltas_[m]| | 46 // m * 2 byte |&deltas_[0]..&deltas_[m]| |
48 // 16 byte digest | 47 // 16 byte digest |
49 | 48 |
50 #ifndef CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ | 49 #ifndef CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ |
51 #define CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ | 50 #define CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ |
52 #pragma once | 51 #pragma once |
53 | 52 |
54 #include <vector> | 53 #include <vector> |
55 | 54 |
56 #include "chrome/browser/safe_browsing/safe_browsing_util.h" | 55 #include "chrome/browser/safe_browsing/safe_browsing_util.h" |
57 | 56 |
| 57 class FilePath; |
| 58 |
58 namespace safe_browsing { | 59 namespace safe_browsing { |
59 | 60 |
60 class PrefixSet { | 61 class PrefixSet { |
61 public: | 62 public: |
62 explicit PrefixSet(const std::vector<SBPrefix>& sorted_prefixes); | 63 explicit PrefixSet(const std::vector<SBPrefix>& sorted_prefixes); |
63 ~PrefixSet(); | 64 ~PrefixSet(); |
64 | 65 |
65 // |true| if |prefix| was in |prefixes| passed to the constructor. | 66 // |true| if |prefix| was in |prefixes| passed to the constructor. |
66 bool Exists(SBPrefix prefix) const; | 67 bool Exists(SBPrefix prefix) const; |
67 | 68 |
| 69 // Persist the set on disk. |
| 70 static PrefixSet* LoadFile(const FilePath& filter_name); |
| 71 bool WriteFile(const FilePath& filter_name) const; |
| 72 |
68 // Regenerate the vector of prefixes passed to the constructor into | 73 // Regenerate the vector of prefixes passed to the constructor into |
69 // |prefixes|. Prefixes will be added in sorted order. | 74 // |prefixes|. Prefixes will be added in sorted order. |
70 void GetPrefixes(std::vector<SBPrefix>* prefixes); | 75 void GetPrefixes(std::vector<SBPrefix>* prefixes); |
71 | 76 |
72 private: | 77 private: |
73 // Maximum delta that can be encoded in a 16-bit unsigned. | 78 // Maximum delta that can be encoded in a 16-bit unsigned. |
74 static const unsigned kMaxDelta = 256 * 256; | 79 static const unsigned kMaxDelta = 256 * 256; |
75 | 80 |
76 // Maximum number of consecutive deltas to encode before generating | 81 // Maximum number of consecutive deltas to encode before generating |
77 // a new index entry. This helps keep the worst-case performance | 82 // a new index entry. This helps keep the worst-case performance |
78 // for |Exists()| under control. | 83 // for |Exists()| under control. |
79 static const size_t kMaxRun = 100; | 84 static const size_t kMaxRun = 100; |
80 | 85 |
| 86 // Helper for |LoadFile()|. Steals the contents of |index| and |
| 87 // |deltas| using |swap()|. |
| 88 PrefixSet(std::vector<std::pair<SBPrefix,size_t> > *index, |
| 89 std::vector<uint16> *deltas); |
| 90 |
81 // Top-level index of prefix to offset in |deltas_|. Each pair | 91 // Top-level index of prefix to offset in |deltas_|. Each pair |
82 // indicates a base prefix and where the deltas from that prefix | 92 // indicates a base prefix and where the deltas from that prefix |
83 // begin in |deltas_|. The deltas for a pair end at the next pair's | 93 // begin in |deltas_|. The deltas for a pair end at the next pair's |
84 // index into |deltas_|. | 94 // index into |deltas_|. |
85 std::vector<std::pair<SBPrefix,size_t> > index_; | 95 std::vector<std::pair<SBPrefix,size_t> > index_; |
86 | 96 |
87 // Deltas which are added to the prefix in |index_| to generate | 97 // Deltas which are added to the prefix in |index_| to generate |
88 // prefixes. Deltas are only valid between consecutive items from | 98 // prefixes. Deltas are only valid between consecutive items from |
89 // |index_|, or the end of |deltas_| for the last |index_| pair. | 99 // |index_|, or the end of |deltas_| for the last |index_| pair. |
90 std::vector<uint16> deltas_; | 100 std::vector<uint16> deltas_; |
91 | 101 |
92 DISALLOW_COPY_AND_ASSIGN(PrefixSet); | 102 DISALLOW_COPY_AND_ASSIGN(PrefixSet); |
93 }; | 103 }; |
94 | 104 |
95 } // namespace safe_browsing | 105 } // namespace safe_browsing |
96 | 106 |
97 #endif // CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ | 107 #endif // CHROME_BROWSER_SAFE_BROWSING_PREFIX_SET_H_ |
OLD | NEW |