| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 // Rice-Golomb decoder for blacklist updates. | 5 // Rice-Golomb decoder for blacklist updates. |
| 6 // Details at: https://en.wikipedia.org/wiki/Golomb_coding | 6 // Details at: https://en.wikipedia.org/wiki/Golomb_coding |
| 7 | 7 |
| 8 #ifndef COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_ | 8 #ifndef COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_ |
| 9 #define COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_ | 9 #define COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_ |
| 10 | 10 |
| 11 #include <ostream> | 11 #include <ostream> |
| 12 #include <string> | 12 #include <string> |
| 13 #include <vector> |
| 13 #include "base/gtest_prod_util.h" | 14 #include "base/gtest_prod_util.h" |
| 14 | 15 |
| 15 #if defined(USE_SYSTEM_PROTOBUF) | 16 #if defined(USE_SYSTEM_PROTOBUF) |
| 16 #include <google/protobuf/repeated_field.h> | 17 #include <google/protobuf/repeated_field.h> |
| 17 #else | 18 #else |
| 18 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" | 19 #include "third_party/protobuf/src/google/protobuf/repeated_field.h" |
| 19 #endif | 20 #endif |
| 20 | 21 |
| 21 namespace safe_browsing { | 22 namespace safe_browsing { |
| 22 | 23 |
| 23 // Enumerate different failure events while decoding the Rice-encoded string | 24 // Enumerate different failure events while decoding the Rice-encoded string |
| 24 // sent by the server for histogramming purposes. DO NOT CHANGE THE ORDERING OF | 25 // sent by the server for histogramming purposes. DO NOT CHANGE THE ORDERING OF |
| 25 // THESE VALUES. | 26 // THESE VALUES. |
| 26 enum V4DecodeResult { | 27 enum V4DecodeResult { |
| 27 // No error. | 28 // No error. |
| 28 DECODE_SUCCESS = 0, | 29 DECODE_SUCCESS = 0, |
| 29 | 30 |
| 30 // Exceeded the number of entries to expect. | 31 // Exceeded the number of entries to expect. |
| 31 DECODE_NO_MORE_ENTRIES_FAILURE = 1, | 32 DECODE_NO_MORE_ENTRIES_FAILURE = 1, |
| 32 | 33 |
| 33 // Requested to decode >32 bits. | 34 // Requested to decode >32 bits. |
| 34 DECODE_REQUESTED_TOO_MANY_BITS_FAILURE = 2, | 35 DECODE_REQUESTED_TOO_MANY_BITS_FAILURE = 2, |
| 35 | 36 |
| 36 // All bits had already been read and interpreted in the encoded string. | 37 // All bits had already been read and interpreted in the encoded string. |
| 37 DECODE_RAN_OUT_OF_BITS_FAILURE = 3, | 38 DECODE_RAN_OUT_OF_BITS_FAILURE = 3, |
| 38 | 39 |
| 39 // The num_entries argument to DecodeBytes or DecodeIntegers was negative. | 40 // The num_entries argument to DecodePrefixes or DecodeIntegers was negative. |
| 40 NUM_ENTRIES_NEGATIVE_FAILURE = 4, | 41 NUM_ENTRIES_NEGATIVE_FAILURE = 4, |
| 41 | 42 |
| 42 // Rice-encoding parameter was non-positive when the number of encoded entries | 43 // Rice-encoding parameter was non-positive when the number of encoded entries |
| 43 // was > 0. | 44 // was > 0. |
| 44 RICE_PARAMETER_NON_POSITIVE_FAILURE = 5, | 45 RICE_PARAMETER_NON_POSITIVE_FAILURE = 5, |
| 45 | 46 |
| 46 // |encoded_data| was empty when the number of encoded entries was > 0. | 47 // |encoded_data| was empty when the number of encoded entries was > 0. |
| 47 ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE = 6, | 48 ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE = 6, |
| 48 | 49 |
| 49 // decoded value had an integer overflow, which is unexpected. | 50 // decoded value had an integer overflow, which is unexpected. |
| 50 DECODED_INTEGER_OVERFLOW_FAILURE = 7, | 51 DECODED_INTEGER_OVERFLOW_FAILURE = 7, |
| 51 | 52 |
| 52 // Memory space for histograms is determined by the max. ALWAYS | 53 // Memory space for histograms is determined by the max. ALWAYS |
| 53 // ADD NEW VALUES BEFORE THIS ONE. | 54 // ADD NEW VALUES BEFORE THIS ONE. |
| 54 DECODE_RESULT_MAX | 55 DECODE_RESULT_MAX |
| 55 }; | 56 }; |
| 56 | 57 |
| 57 class V4RiceDecoder { | 58 class V4RiceDecoder { |
| 58 public: | 59 public: |
| 59 // Decodes the Rice-encoded string in |encoded_data| as a list of integers | 60 // Decodes the Rice-encoded string in |encoded_data| as a list of integers |
| 60 // and stores them in |out|. |rice_parameter| is the exponent of 2 for | 61 // and stores them in |out|. |rice_parameter| is the exponent of 2 for |
| 61 // calculating 'M', |first_value| is the first value in the output sequence, | 62 // calculating 'M', |first_value| is the first value in the output sequence, |
| 62 // |num_entries| is the number of subsequent encoded entries. Each decoded | 63 // |num_entries| is the number of subsequent encoded entries. Each decoded |
| 63 // value is a positive offset from the previous value. | 64 // value is a positive offset from the previous value. |
| 64 // So, for instance, if the unencoded sequence is: [3, 7, 25], then | 65 // So, for instance, if the unencoded sequence is: [3, 7, 25], then |
| 65 // |first_value| = 3, |num_entries| = 2 and decoding the |encoded_data| will | 66 // |first_value| = 3, |num_entries| = 2 and decoding the |encoded_data| will |
| 66 // produce the offsets: [4, 18]. | 67 // produce the offsets: [4, 18]. |
| 67 static V4DecodeResult DecodeIntegers( | 68 static V4DecodeResult DecodeIntegers( |
| 68 const ::google::protobuf::int32 first_value, | 69 const ::google::protobuf::int64 first_value, |
| 69 const ::google::protobuf::int32 rice_parameter, | 70 const ::google::protobuf::int32 rice_parameter, |
| 70 const ::google::protobuf::int32 num_entries, | 71 const ::google::protobuf::int32 num_entries, |
| 71 const std::string& encoded_data, | 72 const std::string& encoded_data, |
| 72 ::google::protobuf::RepeatedField<::google::protobuf::int32>* out); | 73 ::google::protobuf::RepeatedField<::google::protobuf::int32>* out); |
| 73 | 74 |
| 74 // Decodes the Rice-encoded string in |encoded_data| as a string of 4-byte | 75 // Decodes the Rice-encoded string in |encoded_data| as a string of 4-byte |
| 75 // hash prefixes and stores them in |out|. The rest of the arguments are the | 76 // hash prefixes and stores them in |out|. The rest of the arguments are the |
| 76 // same as for |DecodeIntegers|. | 77 // same as for |DecodeIntegers|. |
| 77 static V4DecodeResult DecodeBytes( | 78 // Important: |out| is only meant to be used as a concatenated list of sorted |
| 78 const ::google::protobuf::int32 first_value, | 79 // 4-byte hash prefixes, not as a vector of uint32_t values. |
| 80 // This method does the following: |
| 81 // 1. Rice-decode the |encoded_data| as a list of uint32_t values. |
| 82 // 2. Flip the endianness (on little-endian machines) of each of these |
| 83 // values. This is done because when a hash prefix is represented as a |
| 84 // uint32_t, the bytes get reordered. This generates the hash prefix that |
| 85 // the server would have sent in the absence of Rice-encoding. |
| 86 // 3. Sort the resulting list of uint32_t values. |
| 87 // 4. Flip the endianness once again since the uint32_t are expected to be |
| 88 // consumed as a concatenated list of 4-byte hash prefixes, when merging |
| 89 // the |
| 90 // update with the existing state. |
| 91 static V4DecodeResult DecodePrefixes( |
| 92 const ::google::protobuf::int64 first_value, |
| 79 const ::google::protobuf::int32 rice_parameter, | 93 const ::google::protobuf::int32 rice_parameter, |
| 80 const ::google::protobuf::int32 num_entries, | 94 const ::google::protobuf::int32 num_entries, |
| 81 const std::string& encoded_data, | 95 const std::string& encoded_data, |
| 82 std::string* out); | 96 std::vector<uint32_t>* out); |
| 83 | 97 |
| 84 virtual ~V4RiceDecoder(); | 98 virtual ~V4RiceDecoder(); |
| 85 | 99 |
| 86 std::string DebugString() const; | 100 std::string DebugString() const; |
| 87 | 101 |
| 88 private: | 102 private: |
| 89 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextWordWithNoData); | 103 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextWordWithNoData); |
| 90 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextBitsWithNoData); | 104 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextBitsWithNoData); |
| 91 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoData); | 105 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoData); |
| 92 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoEntries); | 106 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoEntries); |
| 93 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithSmallValues); | 107 friend class V4RiceTest; |
| 94 FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithLargeValues); | |
| 95 | 108 |
| 96 // Validate some of the parameters passed to the decode methods. | 109 // Validate some of the parameters passed to the decode methods. |
| 97 static V4DecodeResult ValidateInput( | 110 static V4DecodeResult ValidateInput( |
| 98 const ::google::protobuf::int32 rice_parameter, | 111 const ::google::protobuf::int32 rice_parameter, |
| 99 const ::google::protobuf::int32 num_entries, | 112 const ::google::protobuf::int32 num_entries, |
| 100 const std::string& encoded_data); | 113 const std::string& encoded_data); |
| 101 | 114 |
| 102 // The |rice_parameter| is the exponent of 2 for calculating 'M', | 115 // The |rice_parameter| is the exponent of 2 for calculating 'M', |
| 103 // |num_entries| is the number of encoded entries in the |encoded_data| and | 116 // |num_entries| is the number of encoded entries in the |encoded_data| and |
| 104 // |encoded_data| is the Rice-encoded string to decode. | 117 // |encoded_data| is the Rice-encoded string to decode. |
| (...skipping 10 matching lines...) Expand all Loading... |
| 115 | 128 |
| 116 // Reads in up to 32 bits from |encoded_data| into |word|, from which | 129 // Reads in up to 32 bits from |encoded_data| into |word|, from which |
| 117 // subsequent GetNextBits() calls read bits. | 130 // subsequent GetNextBits() calls read bits. |
| 118 V4DecodeResult GetNextWord(uint32_t* word); | 131 V4DecodeResult GetNextWord(uint32_t* word); |
| 119 | 132 |
| 120 // Reads |num_requested_bits| into |x| from |current_word_| and advances it | 133 // Reads |num_requested_bits| into |x| from |current_word_| and advances it |
| 121 // if needed by calling GetNextWord(). | 134 // if needed by calling GetNextWord(). |
| 122 V4DecodeResult GetNextBits(unsigned int num_requested_bits, uint32_t* x); | 135 V4DecodeResult GetNextBits(unsigned int num_requested_bits, uint32_t* x); |
| 123 | 136 |
| 124 // Reads |num_requested_bits| from |current_word_|. | 137 // Reads |num_requested_bits| from |current_word_|. |
| 125 void GetBitsFromCurrentWord(unsigned int num_requested_bits, uint32_t* x); | 138 uint32_t GetBitsFromCurrentWord(unsigned int num_requested_bits); |
| 126 | 139 |
| 127 // The Rice parameter, which is the exponent of two for calculating 'M'. 'M' | 140 // The Rice parameter, which is the exponent of two for calculating 'M'. 'M' |
| 128 // is used as the base to calculate the quotient and remainder in the | 141 // is used as the base to calculate the quotient and remainder in the |
| 129 // algorithm. | 142 // algorithm. |
| 130 const unsigned int rice_parameter_; | 143 const unsigned int rice_parameter_; |
| 131 | 144 |
| 132 // The number of entries encoded in the data stream. | 145 // The number of entries encoded in the data stream. |
| 133 ::google::protobuf::int32 num_entries_; | 146 ::google::protobuf::int32 num_entries_; |
| 134 | 147 |
| 135 // The Rice-encoded string. | 148 // The Rice-encoded string. |
| (...skipping 11 matching lines...) Expand all Loading... |
| 147 // The 32-bit value read from |data_|. All bit reading operations operate on | 160 // The 32-bit value read from |data_|. All bit reading operations operate on |
| 148 // |current_word_|. | 161 // |current_word_|. |
| 149 uint32_t current_word_; | 162 uint32_t current_word_; |
| 150 }; | 163 }; |
| 151 | 164 |
| 152 std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder); | 165 std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder); |
| 153 | 166 |
| 154 } // namespace safe_browsing | 167 } // namespace safe_browsing |
| 155 | 168 |
| 156 #endif // COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_ | 169 #endif // COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_ |
| OLD | NEW |