Chromium Code Reviews| Index: components/safe_browsing_db/v4_rice.h |
| diff --git a/components/safe_browsing_db/v4_rice.h b/components/safe_browsing_db/v4_rice.h |
| new file mode 100644 |
| index 0000000000000000000000000000000000000000..650ec8a41f34ae27937ac61f899daee4967c6a14 |
| --- /dev/null |
| +++ b/components/safe_browsing_db/v4_rice.h |
| @@ -0,0 +1,131 @@ |
| +// Copyright 2016 The Chromium Authors. All rights reserved. |
| +// Use of this source code is governed by a BSD-style license that can be |
| +// found in the LICENSE file. |
| + |
| +// Rice-Golomb decoder for blacklist updates. |
| +// Details at: https://en.wikipedia.org/wiki/Golomb_coding |
| +// This implementation has "M" as a power of 2, where this power is specified as |
|
palmer
2016/07/26 23:59:49
Perhaps you can leave this sentence out, since you
vakh (use Gerrit instead)
2016/07/27 00:54:34
Done.
|
| +// rice parameter. |
| + |
| +#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_ |
| +#define COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_ |
| + |
| +#include <ostream> |
| +#include <string> |
| +#include <vector> |
| +#include "base/gtest_prod_util.h" |
| + |
| +namespace safe_browsing { |
| + |
| +// Enumerate different failure events while decoding the rice encoded string |
|
palmer
2016/07/26 23:59:49
Style nit: "Rice-encoded".
vakh (use Gerrit instead)
2016/07/27 00:54:33
Done.
|
| +// sent by the server for histogramming purposes. DO NOT CHANGE THE ORDERING OF |
| +// THESE VALUES. |
| +enum V4DecodeResult { |
| + // No error. |
| + DECODE_SUCCESS = 0, |
| + |
| + // Exceeded the number of entries to expect. |
| + DECODE_NO_MORE_ENTRIES_FAILURE = 1, |
| + |
| + // Requested to decode >32 bits. |
| + DECODE_REQUESTED_TOO_MANY_BITS_FAILURE = 2, |
| + |
| + // All bits had already been read and interpreted in the encoded string. |
| + DECODE_RAN_OUT_OF_BITS_FAILURE = 3, |
| + |
| + // The output parameter isn't a writable memory location. |
| + DECODE_OUTPUT_IS_NULL_FAILURE = 4, |
| + |
| + // Memory space for histograms is determined by the max. ALWAYS |
| + // ADD NEW VALUES BEFORE THIS ONE. |
| + DECODE_RESULT_MAX |
| +}; |
| + |
| +class V4RiceDecoder { |
| + public: |
| + // Decodes the RICE encoded string in |encoded_data| as a list of integers |
|
palmer
2016/07/26 23:59:49
Style nit: Rice-encoded
vakh (use Gerrit instead)
2016/07/27 00:54:33
Done.
|
| + // and stores them in |out|. |rice_parameter| is the exponent of 2 for |
| + // calculating 'M', |num_entries| is the number of encoded entries, and |
|
palmer
2016/07/26 23:59:49
Explaining that it's for calculating M doesn't exp
vakh (use Gerrit instead)
2016/07/27 00:54:33
I see what you mean but calling it |M| would be in
|
| + // |first_value| is the first value in the sequence (even when |num_entries| |
|
palmer
2016/07/26 23:59:49
What should |first_value| be in the case that |num
vakh (use Gerrit instead)
2016/07/27 00:54:33
Done.
|
| + // is zero). Each decoded value is a positive offset from the previous value. |
| + static V4DecodeResult DecodeIntegers(const uint32_t first_value, |
| + const int rice_parameter, |
| + const int num_entries, |
| + const std::string& encoded_data, |
| + std::vector<uint32_t>* out); |
| + |
| + // Decodes the RICE encoded string in |encoded_data| as a string of 4-byte |
|
palmer
2016/07/26 23:59:48
Style nit: Rice-encoded
vakh (use Gerrit instead)
2016/07/27 00:54:33
Done.
|
| + // hash prefixes and stores them in |out|. The rest of the arguments are the |
| + // same as for |DecodeIntegers|. |
| + static V4DecodeResult DecodeBytes(const uint32_t first_value, |
| + const int32_t rice_parameter, |
| + const int32_t num_entries, |
| + const std::string& encoded_data, |
| + std::string* out); |
| + |
| + virtual ~V4RiceDecoder(); |
| + |
| + std::string DebugString() const; |
| + |
| + private: |
| + FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextWordWithNoData); |
| + FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextBitsWithNoData); |
| + FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoData); |
| + FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoEntries); |
| + FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithSmallValues); |
| + FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithLargeValues); |
| + |
| + // The |rice_parameter| is the exponent of 2 for calculating 'M', |
| + // |num_entries| is the number of encoded entries in the |encoded_data| and |
| + // |encoded_data| is the rice encoded string to decode. |
| + V4RiceDecoder(const int rice_parameter, |
| + const int num_entries, |
| + const std::string& encoded_data); |
| + |
| + // Returns true until |num_entries| number of values have been decoded. |
|
palmer
2016/07/26 23:59:49
Style nit: "Returns true until |num_entries_| entr
vakh (use Gerrit instead)
2016/07/27 00:54:34
Done.
|
| + bool HasAnotherValue() const; |
| + |
| + // Populates |value| with the next 32-bit unsigned integer decoded from |
| + // |encoded_data|. |
| + V4DecodeResult GetNextValue(uint32_t* value); |
| + |
| + // Reads in upto 32 bits from |encoded_data| into |word|, from which |
|
palmer
2016/07/26 23:59:49
Style nit: "up to"
vakh (use Gerrit instead)
2016/07/27 00:54:33
Done.
|
| + // subsequent GetNextBits() calls read bits. |
| + V4DecodeResult GetNextWord(uint32_t* word); |
| + |
| + // Reads |num_requested_bits| into |x| from |current_word_| and advances it |
| + // if needed by calling GetNextWord(). |
| + V4DecodeResult GetNextBits(size_t num_requested_bits, uint32_t* x); |
| + |
| + // Reads |num_requested_bits| from |current_word_|. |
| + void GetBitsFromCurrentWord(size_t num_requested_bits, uint32_t* x); |
| + |
| + // The rice parameter, which is the exponent of two for calculating 'M'. 'M' |
|
palmer
2016/07/26 23:59:49
Style nit: Rice parameter (but, again, I'd call it
vakh (use Gerrit instead)
2016/07/27 00:54:33
See above.
|
| + // is used as the base to calculate the quotient and remainder in the |
| + // algorithm. |
| + const int rice_parameter_; |
| + |
| + // The number of entries encoded in the data stream. |
| + int num_entries_; |
| + |
| + // The RICE encoded string. |
|
palmer
2016/07/26 23:59:49
Style nit: Rice-encoded
vakh (use Gerrit instead)
2016/07/27 00:54:33
Done.
|
| + const std::string data_; |
| + |
| + // Represents how many total bytes have we read from data_ into current_word_. |
|
palmer
2016/07/26 23:59:49
Nit: Mark identifiers with |...|.
vakh (use Gerrit instead)
2016/07/27 00:54:33
Done.
|
| + unsigned int data_byte_index_; |
| + |
| + // Represents the number of bits that we have read from current_word_. When |
|
palmer
2016/07/26 23:59:49
Same
vakh (use Gerrit instead)
2016/07/27 00:54:33
Done.
|
| + // this becomes 32, which is the size of current_word_, a new current_word_ |
| + // needs to be read from data_. |
| + unsigned int current_word_bit_index_; |
| + |
| + // The 32-bit value read from data_. All bit reading operations operate on |
|
palmer
2016/07/26 23:59:48
Same
vakh (use Gerrit instead)
2016/07/27 00:54:33
Done.
|
| + // current_word_. |
| + uint32_t current_word_; |
| +}; |
| + |
| +std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder); |
| + |
| +} // namespace safe_browsing |
| + |
| +#endif // COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_ |