Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(163)

Unified Diff: components/safe_browsing_db/v4_rice.cc

Issue 2228393003: PVer4: DecodeHashes needs to sort the output of the Rice decoder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@01_checksum
Patch Set: Fix the corresponding test in v4_store_unittest.cc also Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: components/safe_browsing_db/v4_rice.cc
diff --git a/components/safe_browsing_db/v4_rice.cc b/components/safe_browsing_db/v4_rice.cc
index 0e7148048fc379e1a3645906d818845e9932ab16..2486962fb12a5bed3cfb2c29c8496e1c8ca03150 100644
--- a/components/safe_browsing_db/v4_rice.cc
+++ b/components/safe_browsing_db/v4_rice.cc
@@ -2,13 +2,15 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
-#include "base/logging.h"
Scott Hess - ex-Googler 2016/08/12 23:25:46 I think you still need logging.h for things like N
vakh (use Gerrit instead) 2016/08/13 00:15:14 I agree. Didn't realize it was needed for that. Ad
+#include <set>
+
#include "base/numerics/safe_math.h"
#include "base/strings/stringprintf.h"
#include "components/safe_browsing_db/v4_rice.h"
using ::google::protobuf::RepeatedField;
using ::google::protobuf::int32;
+using ::google::protobuf::int64;
namespace safe_browsing {
@@ -17,12 +19,17 @@ namespace {
const int kBitsPerByte = 8;
const unsigned int kMaxBitIndex = kBitsPerByte * sizeof(uint32_t);
+uint32_t FlipEndianness(uint32_t word) {
Scott Hess - ex-Googler 2016/08/12 23:25:47 Is this right? I don't _think_ we have a mix of l
vakh (use Gerrit instead) 2016/08/13 00:15:14 Is this flip always needed: Waiting to get a confi
Scott Hess - ex-Googler 2016/08/15 21:18:15 To be clear - I suspect they are just serving netw
vakh (use Gerrit instead) 2016/08/16 00:23:03 Noe -- can you please review this part of the code
+ return (word >> 24) | ((word << 8) & 0x00FF0000) |
+ ((word >> 8) & 0x0000FF00) | (word << 24);
+}
+
void GetBytesFromUInt32(uint32_t word, char* bytes) {
const size_t mask = 0xFF;
- bytes[0] = (char)(word & mask);
- bytes[1] = (char)((word >> 8) & mask);
- bytes[2] = (char)((word >> 16) & mask);
- bytes[3] = (char)((word >> 24) & mask);
+ bytes[3] = (char)(word & mask);
+ bytes[2] = (char)((word >> 8) & mask);
+ bytes[1] = (char)((word >> 16) & mask);
+ bytes[0] = (char)((word >> 24) & mask);
}
} // namespace
@@ -54,7 +61,7 @@ V4DecodeResult V4RiceDecoder::ValidateInput(const int32 rice_parameter,
}
// static
-V4DecodeResult V4RiceDecoder::DecodeIntegers(const int32 first_value,
+V4DecodeResult V4RiceDecoder::DecodeIntegers(const int64 first_value,
const int32 rice_parameter,
const int32 num_entries,
const std::string& encoded_data,
@@ -95,7 +102,7 @@ V4DecodeResult V4RiceDecoder::DecodeIntegers(const int32 first_value,
}
// static
-V4DecodeResult V4RiceDecoder::DecodeBytes(const int32 first_value,
+V4DecodeResult V4RiceDecoder::DecodeBytes(const int64 first_value,
const int32 rice_parameter,
const int32 num_entries,
const std::string& encoded_data,
@@ -109,34 +116,32 @@ V4DecodeResult V4RiceDecoder::DecodeBytes(const int32 first_value,
}
out->reserve((num_entries + 1) * 4);
- // Cast to unsigned since we don't look at the sign bit as a sign bit.
- // first_value should have been an unsigned to begin with but proto don't
- // allow that.
- uint32_t first_value_unsigned = static_cast<uint32_t>(first_value);
- base::CheckedNumeric<uint32_t> last_value(first_value_unsigned);
- char bytes[4];
- GetBytesFromUInt32(last_value.ValueOrDie(), bytes);
- out->append(bytes, 4);
-
- if (num_entries == 0) {
- return DECODE_SUCCESS;
- }
+ base::CheckedNumeric<uint32_t> last_value(first_value);
+ std::set<uint32_t> hash_prefixes;
+ hash_prefixes.insert(FlipEndianness(last_value.ValueOrDie()));
+
+ if (num_entries > 0) {
+ V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);
+ while (decoder.HasAnotherValue()) {
+ uint32_t offset;
+ result = decoder.GetNextValue(&offset);
+ if (result != DECODE_SUCCESS) {
+ return result;
+ }
- V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);
- while (decoder.HasAnotherValue()) {
- uint32_t offset;
- result = decoder.GetNextValue(&offset);
- if (result != DECODE_SUCCESS) {
- return result;
- }
+ last_value += offset;
+ if (!last_value.IsValid()) {
+ NOTREACHED();
+ return DECODED_INTEGER_OVERFLOW_FAILURE;
+ }
- last_value += offset;
- if (!last_value.IsValid()) {
- NOTREACHED();
- return DECODED_INTEGER_OVERFLOW_FAILURE;
+ hash_prefixes.insert(FlipEndianness(last_value.ValueOrDie()));
}
+ }
- GetBytesFromUInt32(last_value.ValueOrDie(), bytes);
+ char bytes[4];
+ for (const uint32_t& hash_prefix : hash_prefixes) {
+ GetBytesFromUInt32(hash_prefix, bytes);
out->append(bytes, 4);
}
@@ -238,20 +243,24 @@ V4DecodeResult V4RiceDecoder::GetNextBits(unsigned int num_requested_bits,
// All the bits that we need are in |current_word_|.
GetBitsFromCurrentWord(num_requested_bits, x);
} else {
- // |current_word_| contains fewer bits than we need so we store the current
- // value of |current_word_| in |lower|, then read in a new |current_word_|,
- // and then pick the remaining bits from the new value of |current_word_|.
- uint32_t lower = current_word_;
- // Bits we need after we read all of |current_word_|
+ // |current_word_| contains fewer bits than we need so read the remaining
+ // bits from |current_word_| into |lower|, and then call GetNextBits on the
+ // remaining number of bits, which will read in a new word into
+ // |current_word_|.
Scott Hess - ex-Googler 2016/08/12 23:25:46 I'm finding this confusing. I _think_ what it's s
vakh (use Gerrit instead) 2016/08/13 00:15:14 Let me illustrate with an example: Let's say we wa
Scott Hess - ex-Googler 2016/08/15 21:18:15 Yes, but the comment is written in terms of curren
vakh (use Gerrit instead) 2016/08/16 00:23:03 Done.
+ uint32_t lower;
+ V4DecodeResult result = GetNextBits(num_bits_left_in_current_word, &lower);
+ if (result != DECODE_SUCCESS) {
+ return result;
+ }
+
unsigned int num_bits_from_next_word =
num_requested_bits - num_bits_left_in_current_word;
uint32_t upper;
- V4DecodeResult result = GetNextWord(&current_word_);
+ result = GetNextBits(num_bits_from_next_word, &upper);
if (result != DECODE_SUCCESS) {
return result;
}
- GetBitsFromCurrentWord(num_bits_from_next_word, &upper);
- *x = (upper << (num_requested_bits - num_bits_from_next_word)) | lower;
+ *x = (upper << num_bits_left_in_current_word) | lower;
}
return DECODE_SUCCESS;
}

Powered by Google App Engine
This is Rietveld 408576698