Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(612)

Unified Diff: components/safe_browsing_db/v4_rice.h

Issue 2183433002: PVer4: RICE decode bytes to list of uint32 or 4-byte hash prefixes (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: s/RICE/Rice and use safe-math to check integer overflow Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « components/safe_browsing_db/DEPS ('k') | components/safe_browsing_db/v4_rice.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: components/safe_browsing_db/v4_rice.h
diff --git a/components/safe_browsing_db/v4_rice.h b/components/safe_browsing_db/v4_rice.h
new file mode 100644
index 0000000000000000000000000000000000000000..b9ab9f265f334e25fedc5bbd53e95508dc934cd7
--- /dev/null
+++ b/components/safe_browsing_db/v4_rice.h
@@ -0,0 +1,156 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Rice-Golomb decoder for blacklist updates.
+// Details at: https://en.wikipedia.org/wiki/Golomb_coding
+
+#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_
+#define COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_
+
+#include <ostream>
+#include <string>
+#include "base/gtest_prod_util.h"
+
+#if defined(USE_SYSTEM_PROTOBUF)
+#include <google/protobuf/repeated_field.h>
+#else
+#include "third_party/protobuf/src/google/protobuf/repeated_field.h"
+#endif
+
+namespace safe_browsing {
+
+// Enumerate different failure events while decoding the Rice-encoded string
+// sent by the server for histogramming purposes. DO NOT CHANGE THE ORDERING OF
+// THESE VALUES.
+enum V4DecodeResult {
+ // No error.
+ DECODE_SUCCESS = 0,
+
+ // Exceeded the number of entries to expect.
+ DECODE_NO_MORE_ENTRIES_FAILURE = 1,
+
+ // Requested to decode >32 bits.
+ DECODE_REQUESTED_TOO_MANY_BITS_FAILURE = 2,
+
+ // All bits had already been read and interpreted in the encoded string.
+ DECODE_RAN_OUT_OF_BITS_FAILURE = 3,
+
+ // The num_entries argument to DecodeBytes or DecodeIntegers was negative.
+ NUM_ENTRIES_NEGATIVE_FAILURE = 4,
+
+ // Rice-encoding parameter was non-positive when the number of encoded entries
+ // was > 0.
+ RICE_PARAMETER_NON_POSITIVE_FAILURE = 5,
+
+ // |encoded_data| was empty when the number of encoded entries was > 0.
+ ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE = 6,
+
+ // decoded value had an integer overflow, which is unexpected.
+ DECODED_INTEGER_OVERFLOW_FAILURE = 7,
+
+ // Memory space for histograms is determined by the max. ALWAYS
+ // ADD NEW VALUES BEFORE THIS ONE.
+ DECODE_RESULT_MAX
+};
+
+class V4RiceDecoder {
+ public:
+ // Decodes the Rice-encoded string in |encoded_data| as a list of integers
+ // and stores them in |out|. |rice_parameter| is the exponent of 2 for
+ // calculating 'M', |first_value| is the first value in the output sequence,
+ // |num_entries| is the number of subsequent encoded entries. Each decoded
+ // value is a positive offset from the previous value.
+ // So, for instance, if the unencoded sequence is: [3, 7, 25], then
+ // |first_value| = 3, |num_entries| = 2 and decoding the |encoded_data| will
+ // produce the offsets: [4, 18].
+ static V4DecodeResult DecodeIntegers(
+ const ::google::protobuf::int32 first_value,
+ const ::google::protobuf::int32 rice_parameter,
+ const ::google::protobuf::int32 num_entries,
+ const std::string& encoded_data,
+ ::google::protobuf::RepeatedField<::google::protobuf::int32>* out);
+
+ // Decodes the Rice-encoded string in |encoded_data| as a string of 4-byte
+ // hash prefixes and stores them in |out|. The rest of the arguments are the
+ // same as for |DecodeIntegers|.
+ static V4DecodeResult DecodeBytes(
+ const ::google::protobuf::int32 first_value,
+ const ::google::protobuf::int32 rice_parameter,
+ const ::google::protobuf::int32 num_entries,
+ const std::string& encoded_data,
+ std::string* out);
+
+ virtual ~V4RiceDecoder();
+
+ std::string DebugString() const;
+
+ private:
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextWordWithNoData);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextBitsWithNoData);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoData);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoEntries);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithSmallValues);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithLargeValues);
+
+ // Validate some of the parameters passed to the decode methods.
+ static V4DecodeResult ValidateInput(
+ const ::google::protobuf::int32 rice_parameter,
+ const ::google::protobuf::int32 num_entries,
+ const std::string& encoded_data);
+
+ // The |rice_parameter| is the exponent of 2 for calculating 'M',
+ // |num_entries| is the number of encoded entries in the |encoded_data| and
+ // |encoded_data| is the Rice-encoded string to decode.
+ V4RiceDecoder(const ::google::protobuf::int32 rice_parameter,
+ const ::google::protobuf::int32 num_entries,
+ const std::string& encoded_data);
+
+ // Returns true until |num_entries| entries have been decoded.
+ bool HasAnotherValue() const;
+
+ // Populates |value| with the next 32-bit unsigned integer decoded from
+ // |encoded_data|.
+ V4DecodeResult GetNextValue(uint32_t* value);
+
+ // Reads in up to 32 bits from |encoded_data| into |word|, from which
+ // subsequent GetNextBits() calls read bits.
+ V4DecodeResult GetNextWord(uint32_t* word);
+
+ // Reads |num_requested_bits| into |x| from |current_word_| and advances it
+ // if needed by calling GetNextWord().
+ V4DecodeResult GetNextBits(unsigned int num_requested_bits, uint32_t* x);
+
+ // Reads |num_requested_bits| from |current_word_|.
+ void GetBitsFromCurrentWord(unsigned int num_requested_bits, uint32_t* x);
+
+ // The Rice parameter, which is the exponent of two for calculating 'M'. 'M'
+ // is used as the base to calculate the quotient and remainder in the
+ // algorithm.
+ const unsigned int rice_parameter_;
+
+ // The number of entries encoded in the data stream.
+ ::google::protobuf::int32 num_entries_;
+
+ // The Rice-encoded string.
+ const std::string data_;
+
+ // Represents how many total bytes have we read from |data_| into
+ // |current_word_|.
+ unsigned int data_byte_index_;
+
+ // Represents the number of bits that we have read from |current_word_|. When
+ // this becomes 32, which is the size of |current_word_|, a new
+ // |current_word_| needs to be read from |data_|.
+ unsigned int current_word_bit_index_;
+
+ // The 32-bit value read from |data_|. All bit reading operations operate on
+ // |current_word_|.
+ uint32_t current_word_;
+};
+
+std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder);
+
+} // namespace safe_browsing
+
+#endif // COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_
« no previous file with comments | « components/safe_browsing_db/DEPS ('k') | components/safe_browsing_db/v4_rice.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698