components/safe_browsing_db/v4_rice.cc - Issue 2228393003: PVer4: DecodeHashes needs to sort the output of the Rice decoder

Unified Diff: components/safe_browsing_db/v4_rice.cc

Issue 2228393003: PVer4: DecodeHashes needs to sort the output of the Rice decoder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@01_checksum

Patch Set: rebase Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: components/safe_browsing_db/v4_rice.cc

diff --git a/components/safe_browsing_db/v4_rice.cc b/components/safe_browsing_db/v4_rice.cc

index 0e7148048fc379e1a3645906d818845e9932ab16..b130526ee91c268dc3edaba01516433cfda106bd 100644

--- a/components/safe_browsing_db/v4_rice.cc

+++ b/components/safe_browsing_db/v4_rice.cc

@@ -2,6 +2,9 @@

// Use of this source code is governed by a BSD-style license that can be

// found in the LICENSE file.

+#include <algorithm>

+#include <vector>

#include "base/logging.h"

#include "base/numerics/safe_math.h"

#include "base/strings/stringprintf.h"

@@ -9,6 +12,11 @@

using ::google::protobuf::RepeatedField;

using ::google::protobuf::int32;

+using ::google::protobuf::int64;

+#if !defined(ARCH_CPU_LITTLE_ENDIAN) || (ARCH_CPU_LITTLE_ENDIAN != 1)

+#error The code below assumes little-endianness.

+#endif

namespace safe_browsing {

@@ -17,12 +25,9 @@ namespace {

const int kBitsPerByte = 8;

const unsigned int kMaxBitIndex = kBitsPerByte * sizeof(uint32_t);

-void GetBytesFromUInt32(uint32_t word, char* bytes) {

- const size_t mask = 0xFF;

- bytes[0] = (char)(word & mask);

- bytes[1] = (char)((word >> 8) & mask);

- bytes[2] = (char)((word >> 16) & mask);

- bytes[3] = (char)((word >> 24) & mask);

+uint32_t FlipEndianness(uint32_t word) {

Nathan Parker 2016/08/17 20:57:27 I feel like there's no point in building in an end

vakh (use Gerrit instead) 2016/08/17 22:10:41 [I don't feel strongly about it bit have a slight

Scott Hess - ex-Googler 2016/08/17 22:42:26 I agree with this, if it's reasonable to just do i

vakh (use Gerrit instead) 2016/08/17 23:40:07 Done.

+ return (word >> 24) | ((word << 8) & 0x00FF0000) |

+ ((word >> 8) & 0x0000FF00) | (word << 24);

}

} // namespace

@@ -54,7 +59,7 @@ V4DecodeResult V4RiceDecoder::ValidateInput(const int32 rice_parameter,

}

// static

-V4DecodeResult V4RiceDecoder::DecodeIntegers(const int32 first_value,

+V4DecodeResult V4RiceDecoder::DecodeIntegers(const int64 first_value,

const int32 rice_parameter,

const int32 num_entries,

const std::string& encoded_data,

@@ -95,11 +100,11 @@ V4DecodeResult V4RiceDecoder::DecodeIntegers(const int32 first_value,

}

// static

-V4DecodeResult V4RiceDecoder::DecodeBytes(const int32 first_value,

+V4DecodeResult V4RiceDecoder::DecodeBytes(const int64 first_value,

const int32 rice_parameter,

const int32 num_entries,

const std::string& encoded_data,

- std::string* out) {

+ std::vector<uint32_t>* out) {

Scott Hess - ex-Googler 2016/08/17 22:42:26 "DecodeBytes" decodes to a vector of uint32_t. Ma

vakh (use Gerrit instead) 2016/08/17 23:40:07 Done.

DCHECK(out);

V4DecodeResult result =

@@ -107,37 +112,41 @@ V4DecodeResult V4RiceDecoder::DecodeBytes(const int32 first_value,

if (result != DECODE_SUCCESS) {

return result;

}

- out->reserve((num_entries + 1) * 4);

- // Cast to unsigned since we don't look at the sign bit as a sign bit.

- // first_value should have been an unsigned to begin with but proto don't

- // allow that.

- uint32_t first_value_unsigned = static_cast<uint32_t>(first_value);

- base::CheckedNumeric<uint32_t> last_value(first_value_unsigned);

- char bytes[4];

- GetBytesFromUInt32(last_value.ValueOrDie(), bytes);

- out->append(bytes, 4);

+ out->reserve((num_entries + 1));

+ base::CheckedNumeric<uint32_t> last_value(first_value);

+ out->push_back(FlipEndianness(last_value.ValueOrDie()));

+ if (num_entries > 0) {

+ V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);

+ while (decoder.HasAnotherValue()) {

+ uint32_t offset;

+ result = decoder.GetNextValue(&offset);

+ if (result != DECODE_SUCCESS) {

+ return result;

+ }

- if (num_entries == 0) {

- return DECODE_SUCCESS;

- }

+ last_value += offset;

+ if (!last_value.IsValid()) {

+ NOTREACHED();

+ return DECODED_INTEGER_OVERFLOW_FAILURE;

+ }

- V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);

- while (decoder.HasAnotherValue()) {

- uint32_t offset;

- result = decoder.GetNextValue(&offset);

- if (result != DECODE_SUCCESS) {

- return result;

+ // This flipping is done so that the decoded uint32 is interpreted

Nathan Parker 2016/08/17 20:57:27 re-flow comment. Make a comment that the ints need

vakh (use Gerrit instead) 2016/08/17 22:10:41 Please see my other comment about this below.

+ // correcly

+ // as a string of 4 bytes.

+ out->push_back(FlipEndianness(last_value.ValueOrDie()));

}

+ }

- last_value += offset;

- if (!last_value.IsValid()) {

- NOTREACHED();

- return DECODED_INTEGER_OVERFLOW_FAILURE;

- }

+ // Flipping the bytes, as done above, destroys the sort order. Sort the

+ // values back.

+ std::sort(out->begin(), out->end());

- GetBytesFromUInt32(last_value.ValueOrDie(), bytes);

- out->append(bytes, 4);

+ // This flipping is done so that when the vector is interpreted as a string,

+ // the bytes are in the correct order.

+ for (unsigned i = 0; i < out->size(); i++) {

Scott Hess - ex-Googler 2016/08/17 22:42:26 size_t rather than unsigned, here.

vakh (use Gerrit instead) 2016/08/17 23:40:07 Done.

+ (*out)[i] = FlipEndianness((*out)[i]);

Nathan Parker 2016/08/17 20:57:27 Wait, now I'm confused again. You're flipping, so

vakh (use Gerrit instead) 2016/08/17 22:10:41 I think the explanation goes like this. The follow

}

return DECODE_SUCCESS;

@@ -238,20 +247,21 @@ V4DecodeResult V4RiceDecoder::GetNextBits(unsigned int num_requested_bits,

// All the bits that we need are in |current_word_|.

GetBitsFromCurrentWord(num_requested_bits, x);

} else {

- // |current_word_| contains fewer bits than we need so we store the current

- // and then pick the remaining bits from the new value of |current_word_|.

- uint32_t lower = current_word_;

- // Bits we need after we read all of |current_word_|

+ // |current_word_| contains fewer bits than we need so read the remaining

+ // bits from |current_word_| into |lower|, and then call GetNextBits on the

+ // remaining number of bits, which will read in a new word into

+ // |current_word_|.

+ uint32_t lower;

+ GetBitsFromCurrentWord(num_bits_left_in_current_word, &lower);

unsigned int num_bits_from_next_word =

num_requested_bits - num_bits_left_in_current_word;

uint32_t upper;

- V4DecodeResult result = GetNextWord(&current_word_);

+ V4DecodeResult result = GetNextBits(num_bits_from_next_word, &upper);

if (result != DECODE_SUCCESS) {

return result;

}

- GetBitsFromCurrentWord(num_bits_from_next_word, &upper);

- *x = (upper << (num_requested_bits - num_bits_from_next_word)) | lower;

+ *x = (upper << num_bits_left_in_current_word) | lower;

}

return DECODE_SUCCESS;

}

« no previous file with comments | « components/safe_browsing_db/v4_rice.h ('k') | components/safe_browsing_db/v4_rice_unittest.cc » ('j') | no next file with comments »