components/safe_browsing_db/v4_rice.h - Issue 2183433002: PVer4: RICE decode bytes to list of uint32 or 4-byte hash prefixes

Unified Diff: components/safe_browsing_db/v4_rice.h

Issue 2183433002: PVer4: RICE decode bytes to list of uint32 or 4-byte hash prefixes (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: s/RICE/Rice and use safe-math to check integer overflow Created 4 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: components/safe_browsing_db/v4_rice.h

diff --git a/components/safe_browsing_db/v4_rice.h b/components/safe_browsing_db/v4_rice.h

new file mode 100644

index 0000000000000000000000000000000000000000..b9ab9f265f334e25fedc5bbd53e95508dc934cd7

--- /dev/null

+++ b/components/safe_browsing_db/v4_rice.h

@@ -0,0 +1,156 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+// Rice-Golomb decoder for blacklist updates.

+// Details at: https://en.wikipedia.org/wiki/Golomb_coding

+#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_

+#define COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_

+#include <ostream>

+#include <string>

+#include "base/gtest_prod_util.h"

+#if defined(USE_SYSTEM_PROTOBUF)

+#include <google/protobuf/repeated_field.h>

+#else

+#include "third_party/protobuf/src/google/protobuf/repeated_field.h"

+#endif

+namespace safe_browsing {

+// Enumerate different failure events while decoding the Rice-encoded string

+// sent by the server for histogramming purposes. DO NOT CHANGE THE ORDERING OF

+// THESE VALUES.

+enum V4DecodeResult {

+ // No error.

+ DECODE_SUCCESS = 0,

+ // Exceeded the number of entries to expect.

+ DECODE_NO_MORE_ENTRIES_FAILURE = 1,

+ // Requested to decode >32 bits.

+ DECODE_REQUESTED_TOO_MANY_BITS_FAILURE = 2,

+ // All bits had already been read and interpreted in the encoded string.

+ DECODE_RAN_OUT_OF_BITS_FAILURE = 3,

+ // The num_entries argument to DecodeBytes or DecodeIntegers was negative.

+ NUM_ENTRIES_NEGATIVE_FAILURE = 4,

+ // Rice-encoding parameter was non-positive when the number of encoded entries

+ // was > 0.

+ RICE_PARAMETER_NON_POSITIVE_FAILURE = 5,

+ // |encoded_data| was empty when the number of encoded entries was > 0.

+ ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE = 6,

+ // decoded value had an integer overflow, which is unexpected.

+ DECODED_INTEGER_OVERFLOW_FAILURE = 7,

+ // Memory space for histograms is determined by the max. ALWAYS

+ // ADD NEW VALUES BEFORE THIS ONE.

+ DECODE_RESULT_MAX

+};

+class V4RiceDecoder {

+ public:

+ // Decodes the Rice-encoded string in |encoded_data| as a list of integers

+ // and stores them in |out|. |rice_parameter| is the exponent of 2 for

+ // calculating 'M', |first_value| is the first value in the output sequence,

+ // |num_entries| is the number of subsequent encoded entries. Each decoded

+ // value is a positive offset from the previous value.

+ // So, for instance, if the unencoded sequence is: [3, 7, 25], then

+ // produce the offsets: [4, 18].

+ static V4DecodeResult DecodeIntegers(

+ const ::google::protobuf::int32 first_value,

+ const ::google::protobuf::int32 rice_parameter,

+ const ::google::protobuf::int32 num_entries,

+ const std::string& encoded_data,

+ ::google::protobuf::RepeatedField<::google::protobuf::int32>* out);

+ // Decodes the Rice-encoded string in |encoded_data| as a string of 4-byte

+ // hash prefixes and stores them in |out|. The rest of the arguments are the

+ // same as for |DecodeIntegers|.

+ static V4DecodeResult DecodeBytes(

+ const ::google::protobuf::int32 first_value,

+ const ::google::protobuf::int32 rice_parameter,

+ const ::google::protobuf::int32 num_entries,

+ const std::string& encoded_data,

+ std::string* out);

+ virtual ~V4RiceDecoder();

+ std::string DebugString() const;

+ private:

+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextWordWithNoData);

+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextBitsWithNoData);

+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoData);

+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoEntries);

+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithSmallValues);

+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithLargeValues);

+ // Validate some of the parameters passed to the decode methods.

+ static V4DecodeResult ValidateInput(

+ const ::google::protobuf::int32 rice_parameter,

+ const ::google::protobuf::int32 num_entries,

+ const std::string& encoded_data);

+ // The |rice_parameter| is the exponent of 2 for calculating 'M',

+ // |num_entries| is the number of encoded entries in the |encoded_data| and

+ // |encoded_data| is the Rice-encoded string to decode.

+ V4RiceDecoder(const ::google::protobuf::int32 rice_parameter,

+ const ::google::protobuf::int32 num_entries,

+ const std::string& encoded_data);

+ // Returns true until |num_entries| entries have been decoded.

+ bool HasAnotherValue() const;

+ // Populates |value| with the next 32-bit unsigned integer decoded from

+ // |encoded_data|.

+ V4DecodeResult GetNextValue(uint32_t* value);

+ // Reads in up to 32 bits from |encoded_data| into |word|, from which

+ // subsequent GetNextBits() calls read bits.

+ V4DecodeResult GetNextWord(uint32_t* word);

+ // if needed by calling GetNextWord().

+ V4DecodeResult GetNextBits(unsigned int num_requested_bits, uint32_t* x);

+ // Reads |num_requested_bits| from |current_word_|.

+ void GetBitsFromCurrentWord(unsigned int num_requested_bits, uint32_t* x);

+ // The Rice parameter, which is the exponent of two for calculating 'M'. 'M'

+ // is used as the base to calculate the quotient and remainder in the

+ // algorithm.

+ const unsigned int rice_parameter_;

+ // The number of entries encoded in the data stream.

+ ::google::protobuf::int32 num_entries_;

+ // The Rice-encoded string.

+ const std::string data_;

+ // Represents how many total bytes have we read from |data_| into

+ // |current_word_|.

+ unsigned int data_byte_index_;

+ // Represents the number of bits that we have read from |current_word_|. When

+ // this becomes 32, which is the size of |current_word_|, a new

+ // |current_word_| needs to be read from |data_|.

+ unsigned int current_word_bit_index_;

+ // The 32-bit value read from |data_|. All bit reading operations operate on

+ // |current_word_|.

+ uint32_t current_word_;

+};

+std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder);

+} // namespace safe_browsing

+#endif // COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_

« no previous file with comments | « components/safe_browsing_db/DEPS ('k') | components/safe_browsing_db/v4_rice.cc » ('j') | no next file with comments »