components/safe_browsing_db/v4_rice.cc - Issue 2228393003: PVer4: DecodeHashes needs to sort the output of the Rice decoder

Unified Diff: components/safe_browsing_db/v4_rice.cc

Issue 2228393003: PVer4: DecodeHashes needs to sort the output of the Rice decoder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@01_checksum

Patch Set: Fix the corresponding test in v4_store_unittest.cc also Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « components/safe_browsing_db/v4_rice.h ('k') | components/safe_browsing_db/v4_rice_unittest.cc » ('j') | components/safe_browsing_db/v4_rice_unittest.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: components/safe_browsing_db/v4_rice.cc

diff --git a/components/safe_browsing_db/v4_rice.cc b/components/safe_browsing_db/v4_rice.cc

index 0e7148048fc379e1a3645906d818845e9932ab16..2486962fb12a5bed3cfb2c29c8496e1c8ca03150 100644

--- a/components/safe_browsing_db/v4_rice.cc

+++ b/components/safe_browsing_db/v4_rice.cc

@@ -2,13 +2,15 @@

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.

-#include "base/logging.h"

Scott Hess - ex-Googler 2016/08/12 23:25:46 I think you still need logging.h for things like N

vakh (use Gerrit instead) 2016/08/13 00:15:14 I agree. Didn't realize it was needed for that. Ad

+#include <set>

#include "base/numerics/safe_math.h"

#include "base/strings/stringprintf.h"

#include "components/safe_browsing_db/v4_rice.h"

using ::google::protobuf::RepeatedField;

using ::google::protobuf::int32;

+using ::google::protobuf::int64;

namespace safe_browsing {

@@ -17,12 +19,17 @@ namespace {

const int kBitsPerByte = 8;

const unsigned int kMaxBitIndex = kBitsPerByte * sizeof(uint32_t);

+uint32_t FlipEndianness(uint32_t word) {

Scott Hess - ex-Googler 2016/08/12 23:25:47 Is this right? I don't _think_ we have a mix of l

vakh (use Gerrit instead) 2016/08/13 00:15:14 Is this flip always needed: Waiting to get a confi

Scott Hess - ex-Googler 2016/08/15 21:18:15 To be clear - I suspect they are just serving netw

vakh (use Gerrit instead) 2016/08/16 00:23:03 Noe -- can you please review this part of the code

Noe -- can you please review this part of the code since you are most aware of the server side implementation? This is what my implementation does for now: 1. decode encoded_data field using Rice decoder. 2. Flip the bits of each output uint32_t value 3. Sort these uint32_t (since flipping the bits destroys sort order). 4. Flip the bits back and reinterpret them as bytes in a string. Can you specifically comment on what parts of this are avoidable and what parts are platform specific?

+ return (word >> 24) | ((word << 8) & 0x00FF0000) |

+ ((word >> 8) & 0x0000FF00) | (word << 24);

void GetBytesFromUInt32(uint32_t word, char* bytes) {

const size_t mask = 0xFF;

- bytes[0] = (char)(word & mask);

- bytes[1] = (char)((word >> 8) & mask);

- bytes[2] = (char)((word >> 16) & mask);

- bytes[3] = (char)((word >> 24) & mask);

+ bytes[3] = (char)(word & mask);

+ bytes[2] = (char)((word >> 8) & mask);

+ bytes[1] = (char)((word >> 16) & mask);

+ bytes[0] = (char)((word >> 24) & mask);

}

} // namespace

@@ -54,7 +61,7 @@ V4DecodeResult V4RiceDecoder::ValidateInput(const int32 rice_parameter,

}

// static

-V4DecodeResult V4RiceDecoder::DecodeIntegers(const int32 first_value,

+V4DecodeResult V4RiceDecoder::DecodeIntegers(const int64 first_value,

const int32 rice_parameter,

const int32 num_entries,

const std::string& encoded_data,

@@ -95,7 +102,7 @@ V4DecodeResult V4RiceDecoder::DecodeIntegers(const int32 first_value,

}

// static

-V4DecodeResult V4RiceDecoder::DecodeBytes(const int32 first_value,

+V4DecodeResult V4RiceDecoder::DecodeBytes(const int64 first_value,

const int32 rice_parameter,

const int32 num_entries,

const std::string& encoded_data,

@@ -109,34 +116,32 @@ V4DecodeResult V4RiceDecoder::DecodeBytes(const int32 first_value,

}

out->reserve((num_entries + 1) * 4);

- // Cast to unsigned since we don't look at the sign bit as a sign bit.

- // first_value should have been an unsigned to begin with but proto don't

- // allow that.

- uint32_t first_value_unsigned = static_cast<uint32_t>(first_value);

- base::CheckedNumeric<uint32_t> last_value(first_value_unsigned);

- char bytes[4];

- GetBytesFromUInt32(last_value.ValueOrDie(), bytes);

- out->append(bytes, 4);

- if (num_entries == 0) {

- return DECODE_SUCCESS;

- }

+ base::CheckedNumeric<uint32_t> last_value(first_value);

+ std::set<uint32_t> hash_prefixes;

+ hash_prefixes.insert(FlipEndianness(last_value.ValueOrDie()));

+ if (num_entries > 0) {

+ V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);

+ while (decoder.HasAnotherValue()) {

+ uint32_t offset;

+ result = decoder.GetNextValue(&offset);

+ if (result != DECODE_SUCCESS) {

+ return result;

+ }

- V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);

- while (decoder.HasAnotherValue()) {

- uint32_t offset;

- result = decoder.GetNextValue(&offset);

- if (result != DECODE_SUCCESS) {

- return result;

- }

+ last_value += offset;

+ if (!last_value.IsValid()) {

+ NOTREACHED();

+ return DECODED_INTEGER_OVERFLOW_FAILURE;

+ }

- last_value += offset;

- if (!last_value.IsValid()) {

- NOTREACHED();

- return DECODED_INTEGER_OVERFLOW_FAILURE;

+ hash_prefixes.insert(FlipEndianness(last_value.ValueOrDie()));

}

+ }

- GetBytesFromUInt32(last_value.ValueOrDie(), bytes);

+ char bytes[4];

+ for (const uint32_t& hash_prefix : hash_prefixes) {

+ GetBytesFromUInt32(hash_prefix, bytes);

out->append(bytes, 4);

}

@@ -238,20 +243,24 @@ V4DecodeResult V4RiceDecoder::GetNextBits(unsigned int num_requested_bits,

// All the bits that we need are in |current_word_|.

GetBitsFromCurrentWord(num_requested_bits, x);

} else {

- // |current_word_| contains fewer bits than we need so we store the current

- // and then pick the remaining bits from the new value of |current_word_|.

- uint32_t lower = current_word_;

- // Bits we need after we read all of |current_word_|

+ // |current_word_| contains fewer bits than we need so read the remaining

+ // bits from |current_word_| into |lower|, and then call GetNextBits on the

+ // remaining number of bits, which will read in a new word into

+ // |current_word_|.

Scott Hess - ex-Googler 2016/08/12 23:25:46 I'm finding this confusing. I _think_ what it's s

vakh (use Gerrit instead) 2016/08/13 00:15:14 Let me illustrate with an example: Let's say we wa

Scott Hess - ex-Googler 2016/08/15 21:18:15 Yes, but the comment is written in terms of curren

vakh (use Gerrit instead) 2016/08/16 00:23:03 Done.

+ uint32_t lower;

+ V4DecodeResult result = GetNextBits(num_bits_left_in_current_word, &lower);

+ if (result != DECODE_SUCCESS) {

+ return result;

+ }

unsigned int num_bits_from_next_word =

num_requested_bits - num_bits_left_in_current_word;

uint32_t upper;

- V4DecodeResult result = GetNextWord(&current_word_);

+ result = GetNextBits(num_bits_from_next_word, &upper);

if (result != DECODE_SUCCESS) {

return result;

}

- GetBitsFromCurrentWord(num_bits_from_next_word, &upper);

- *x = (upper << (num_requested_bits - num_bits_from_next_word)) | lower;

+ *x = (upper << num_bits_left_in_current_word) | lower;

}

return DECODE_SUCCESS;

}