Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(414)

Unified Diff: components/safe_browsing_db/v4_rice.h

Issue 2183433002: PVer4: RICE decode bytes to list of uint32 or 4-byte hash prefixes (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Tiny: Change the type of data_byte_index_ and current_word_bit_index_ to unsigned int Created 4 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/safe_browsing_db/v4_rice.h
diff --git a/components/safe_browsing_db/v4_rice.h b/components/safe_browsing_db/v4_rice.h
new file mode 100644
index 0000000000000000000000000000000000000000..650ec8a41f34ae27937ac61f899daee4967c6a14
--- /dev/null
+++ b/components/safe_browsing_db/v4_rice.h
@@ -0,0 +1,131 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Rice-Golomb decoder for blacklist updates.
+// Details at: https://en.wikipedia.org/wiki/Golomb_coding
+// This implementation has "M" as a power of 2, where this power is specified as
palmer 2016/07/26 23:59:49 Perhaps you can leave this sentence out, since you
vakh (use Gerrit instead) 2016/07/27 00:54:34 Done.
+// rice parameter.
+
+#ifndef COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_
+#define COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_
+
+#include <ostream>
+#include <string>
+#include <vector>
+#include "base/gtest_prod_util.h"
+
+namespace safe_browsing {
+
+// Enumerate different failure events while decoding the rice encoded string
palmer 2016/07/26 23:59:49 Style nit: "Rice-encoded".
vakh (use Gerrit instead) 2016/07/27 00:54:33 Done.
+// sent by the server for histogramming purposes. DO NOT CHANGE THE ORDERING OF
+// THESE VALUES.
+enum V4DecodeResult {
+ // No error.
+ DECODE_SUCCESS = 0,
+
+ // Exceeded the number of entries to expect.
+ DECODE_NO_MORE_ENTRIES_FAILURE = 1,
+
+ // Requested to decode >32 bits.
+ DECODE_REQUESTED_TOO_MANY_BITS_FAILURE = 2,
+
+ // All bits had already been read and interpreted in the encoded string.
+ DECODE_RAN_OUT_OF_BITS_FAILURE = 3,
+
+ // The output parameter isn't a writable memory location.
+ DECODE_OUTPUT_IS_NULL_FAILURE = 4,
+
+ // Memory space for histograms is determined by the max. ALWAYS
+ // ADD NEW VALUES BEFORE THIS ONE.
+ DECODE_RESULT_MAX
+};
+
+class V4RiceDecoder {
+ public:
+ // Decodes the RICE encoded string in |encoded_data| as a list of integers
palmer 2016/07/26 23:59:49 Style nit: Rice-encoded
vakh (use Gerrit instead) 2016/07/27 00:54:33 Done.
+ // and stores them in |out|. |rice_parameter| is the exponent of 2 for
+ // calculating 'M', |num_entries| is the number of encoded entries, and
palmer 2016/07/26 23:59:49 Explaining that it's for calculating M doesn't exp
vakh (use Gerrit instead) 2016/07/27 00:54:33 I see what you mean but calling it |M| would be in
+ // |first_value| is the first value in the sequence (even when |num_entries|
palmer 2016/07/26 23:59:49 What should |first_value| be in the case that |num
vakh (use Gerrit instead) 2016/07/27 00:54:33 Done.
+ // is zero). Each decoded value is a positive offset from the previous value.
+ static V4DecodeResult DecodeIntegers(const uint32_t first_value,
+ const int rice_parameter,
+ const int num_entries,
+ const std::string& encoded_data,
+ std::vector<uint32_t>* out);
+
+ // Decodes the RICE encoded string in |encoded_data| as a string of 4-byte
palmer 2016/07/26 23:59:48 Style nit: Rice-encoded
vakh (use Gerrit instead) 2016/07/27 00:54:33 Done.
+ // hash prefixes and stores them in |out|. The rest of the arguments are the
+ // same as for |DecodeIntegers|.
+ static V4DecodeResult DecodeBytes(const uint32_t first_value,
+ const int32_t rice_parameter,
+ const int32_t num_entries,
+ const std::string& encoded_data,
+ std::string* out);
+
+ virtual ~V4RiceDecoder();
+
+ std::string DebugString() const;
+
+ private:
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextWordWithNoData);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextBitsWithNoData);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoData);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithNoEntries);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithSmallValues);
+ FRIEND_TEST_ALL_PREFIXES(V4RiceTest, TestDecoderGetNextValueWithLargeValues);
+
+ // The |rice_parameter| is the exponent of 2 for calculating 'M',
+ // |num_entries| is the number of encoded entries in the |encoded_data| and
+ // |encoded_data| is the rice encoded string to decode.
+ V4RiceDecoder(const int rice_parameter,
+ const int num_entries,
+ const std::string& encoded_data);
+
+ // Returns true until |num_entries| number of values have been decoded.
palmer 2016/07/26 23:59:49 Style nit: "Returns true until |num_entries_| entr
vakh (use Gerrit instead) 2016/07/27 00:54:34 Done.
+ bool HasAnotherValue() const;
+
+ // Populates |value| with the next 32-bit unsigned integer decoded from
+ // |encoded_data|.
+ V4DecodeResult GetNextValue(uint32_t* value);
+
+ // Reads in upto 32 bits from |encoded_data| into |word|, from which
palmer 2016/07/26 23:59:49 Style nit: "up to"
vakh (use Gerrit instead) 2016/07/27 00:54:33 Done.
+ // subsequent GetNextBits() calls read bits.
+ V4DecodeResult GetNextWord(uint32_t* word);
+
+ // Reads |num_requested_bits| into |x| from |current_word_| and advances it
+ // if needed by calling GetNextWord().
+ V4DecodeResult GetNextBits(size_t num_requested_bits, uint32_t* x);
+
+ // Reads |num_requested_bits| from |current_word_|.
+ void GetBitsFromCurrentWord(size_t num_requested_bits, uint32_t* x);
+
+ // The rice parameter, which is the exponent of two for calculating 'M'. 'M'
palmer 2016/07/26 23:59:49 Style nit: Rice parameter (but, again, I'd call it
vakh (use Gerrit instead) 2016/07/27 00:54:33 See above.
+ // is used as the base to calculate the quotient and remainder in the
+ // algorithm.
+ const int rice_parameter_;
+
+ // The number of entries encoded in the data stream.
+ int num_entries_;
+
+ // The RICE encoded string.
palmer 2016/07/26 23:59:49 Style nit: Rice-encoded
vakh (use Gerrit instead) 2016/07/27 00:54:33 Done.
+ const std::string data_;
+
+ // Represents how many total bytes have we read from data_ into current_word_.
palmer 2016/07/26 23:59:49 Nit: Mark identifiers with |...|.
vakh (use Gerrit instead) 2016/07/27 00:54:33 Done.
+ unsigned int data_byte_index_;
+
+ // Represents the number of bits that we have read from current_word_. When
palmer 2016/07/26 23:59:49 Same
vakh (use Gerrit instead) 2016/07/27 00:54:33 Done.
+ // this becomes 32, which is the size of current_word_, a new current_word_
+ // needs to be read from data_.
+ unsigned int current_word_bit_index_;
+
+ // The 32-bit value read from data_. All bit reading operations operate on
palmer 2016/07/26 23:59:48 Same
vakh (use Gerrit instead) 2016/07/27 00:54:33 Done.
+ // current_word_.
+ uint32_t current_word_;
+};
+
+std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder);
+
+} // namespace safe_browsing
+
+#endif // COMPONENTS_SAFE_BROWSING_DB_V4_RICE_H_

Powered by Google App Engine
This is Rietveld 408576698