Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(945)

Side by Side Diff: components/safe_browsing_db/v4_rice.cc

Issue 2228393003: PVer4: DecodeHashes needs to sort the output of the Rice decoder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@01_checksum
Patch Set: rebase Created 4 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « components/safe_browsing_db/v4_rice.h ('k') | components/safe_browsing_db/v4_rice_unittest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include <algorithm>
6 #include <vector>
7
5 #include "base/logging.h" 8 #include "base/logging.h"
6 #include "base/numerics/safe_math.h" 9 #include "base/numerics/safe_math.h"
7 #include "base/strings/stringprintf.h" 10 #include "base/strings/stringprintf.h"
8 #include "components/safe_browsing_db/v4_rice.h" 11 #include "components/safe_browsing_db/v4_rice.h"
9 12
10 using ::google::protobuf::RepeatedField; 13 using ::google::protobuf::RepeatedField;
11 using ::google::protobuf::int32; 14 using ::google::protobuf::int32;
15 using ::google::protobuf::int64;
16
17 #if !defined(ARCH_CPU_LITTLE_ENDIAN) || (ARCH_CPU_LITTLE_ENDIAN != 1)
18 #error The code below assumes little-endianness.
19 #endif
12 20
13 namespace safe_browsing { 21 namespace safe_browsing {
14 22
15 namespace { 23 namespace {
16 24
17 const int kBitsPerByte = 8; 25 const int kBitsPerByte = 8;
18 const unsigned int kMaxBitIndex = kBitsPerByte * sizeof(uint32_t); 26 const unsigned int kMaxBitIndex = kBitsPerByte * sizeof(uint32_t);
19 27
20 void GetBytesFromUInt32(uint32_t word, char* bytes) { 28 uint32_t FlipEndianness(uint32_t word) {
Nathan Parker 2016/08/17 20:57:27 I feel like there's no point in building in an end
vakh (use Gerrit instead) 2016/08/17 22:10:41 [I don't feel strongly about it bit have a slight
Scott Hess - ex-Googler 2016/08/17 22:42:26 I agree with this, if it's reasonable to just do i
vakh (use Gerrit instead) 2016/08/17 23:40:07 Done.
21 const size_t mask = 0xFF; 29 return (word >> 24) | ((word << 8) & 0x00FF0000) |
22 bytes[0] = (char)(word & mask); 30 ((word >> 8) & 0x0000FF00) | (word << 24);
23 bytes[1] = (char)((word >> 8) & mask);
24 bytes[2] = (char)((word >> 16) & mask);
25 bytes[3] = (char)((word >> 24) & mask);
26 } 31 }
27 32
28 } // namespace 33 } // namespace
29 34
30 // static 35 // static
31 V4DecodeResult V4RiceDecoder::ValidateInput(const int32 rice_parameter, 36 V4DecodeResult V4RiceDecoder::ValidateInput(const int32 rice_parameter,
32 const int32 num_entries, 37 const int32 num_entries,
33 const std::string& encoded_data) { 38 const std::string& encoded_data) {
34 if (num_entries < 0) { 39 if (num_entries < 0) {
35 NOTREACHED(); 40 NOTREACHED();
(...skipping 11 matching lines...) Expand all
47 52
48 if (encoded_data.empty()) { 53 if (encoded_data.empty()) {
49 NOTREACHED(); 54 NOTREACHED();
50 return ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE; 55 return ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE;
51 } 56 }
52 57
53 return DECODE_SUCCESS; 58 return DECODE_SUCCESS;
54 } 59 }
55 60
56 // static 61 // static
57 V4DecodeResult V4RiceDecoder::DecodeIntegers(const int32 first_value, 62 V4DecodeResult V4RiceDecoder::DecodeIntegers(const int64 first_value,
58 const int32 rice_parameter, 63 const int32 rice_parameter,
59 const int32 num_entries, 64 const int32 num_entries,
60 const std::string& encoded_data, 65 const std::string& encoded_data,
61 RepeatedField<int32>* out) { 66 RepeatedField<int32>* out) {
62 DCHECK(out); 67 DCHECK(out);
63 68
64 V4DecodeResult result = 69 V4DecodeResult result =
65 ValidateInput(rice_parameter, num_entries, encoded_data); 70 ValidateInput(rice_parameter, num_entries, encoded_data);
66 if (result != DECODE_SUCCESS) { 71 if (result != DECODE_SUCCESS) {
67 return result; 72 return result;
(...skipping 20 matching lines...) Expand all
88 return DECODED_INTEGER_OVERFLOW_FAILURE; 93 return DECODED_INTEGER_OVERFLOW_FAILURE;
89 } 94 }
90 95
91 out->Add(last_value.ValueOrDie()); 96 out->Add(last_value.ValueOrDie());
92 } 97 }
93 98
94 return DECODE_SUCCESS; 99 return DECODE_SUCCESS;
95 } 100 }
96 101
97 // static 102 // static
98 V4DecodeResult V4RiceDecoder::DecodeBytes(const int32 first_value, 103 V4DecodeResult V4RiceDecoder::DecodeBytes(const int64 first_value,
99 const int32 rice_parameter, 104 const int32 rice_parameter,
100 const int32 num_entries, 105 const int32 num_entries,
101 const std::string& encoded_data, 106 const std::string& encoded_data,
102 std::string* out) { 107 std::vector<uint32_t>* out) {
Scott Hess - ex-Googler 2016/08/17 22:42:26 "DecodeBytes" decodes to a vector of uint32_t. Ma
vakh (use Gerrit instead) 2016/08/17 23:40:07 Done.
103 DCHECK(out); 108 DCHECK(out);
104 109
105 V4DecodeResult result = 110 V4DecodeResult result =
106 ValidateInput(rice_parameter, num_entries, encoded_data); 111 ValidateInput(rice_parameter, num_entries, encoded_data);
107 if (result != DECODE_SUCCESS) { 112 if (result != DECODE_SUCCESS) {
108 return result; 113 return result;
109 } 114 }
110 out->reserve((num_entries + 1) * 4); 115 out->reserve((num_entries + 1));
111 116
112 // Cast to unsigned since we don't look at the sign bit as a sign bit. 117 base::CheckedNumeric<uint32_t> last_value(first_value);
113 // first_value should have been an unsigned to begin with but proto don't 118 out->push_back(FlipEndianness(last_value.ValueOrDie()));
114 // allow that.
115 uint32_t first_value_unsigned = static_cast<uint32_t>(first_value);
116 base::CheckedNumeric<uint32_t> last_value(first_value_unsigned);
117 char bytes[4];
118 GetBytesFromUInt32(last_value.ValueOrDie(), bytes);
119 out->append(bytes, 4);
120 119
121 if (num_entries == 0) { 120 if (num_entries > 0) {
122 return DECODE_SUCCESS; 121 V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);
122 while (decoder.HasAnotherValue()) {
123 uint32_t offset;
124 result = decoder.GetNextValue(&offset);
125 if (result != DECODE_SUCCESS) {
126 return result;
127 }
128
129 last_value += offset;
130 if (!last_value.IsValid()) {
131 NOTREACHED();
132 return DECODED_INTEGER_OVERFLOW_FAILURE;
133 }
134
135 // This flipping is done so that the decoded uint32 is interpreted
Nathan Parker 2016/08/17 20:57:27 re-flow comment. Make a comment that the ints need
vakh (use Gerrit instead) 2016/08/17 22:10:41 Please see my other comment about this below.
136 // correcly
137 // as a string of 4 bytes.
138 out->push_back(FlipEndianness(last_value.ValueOrDie()));
139 }
123 } 140 }
124 141
125 V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data); 142 // Flipping the bytes, as done above, destroys the sort order. Sort the
126 while (decoder.HasAnotherValue()) { 143 // values back.
127 uint32_t offset; 144 std::sort(out->begin(), out->end());
128 result = decoder.GetNextValue(&offset);
129 if (result != DECODE_SUCCESS) {
130 return result;
131 }
132 145
133 last_value += offset; 146 // This flipping is done so that when the vector is interpreted as a string,
134 if (!last_value.IsValid()) { 147 // the bytes are in the correct order.
135 NOTREACHED(); 148 for (unsigned i = 0; i < out->size(); i++) {
Scott Hess - ex-Googler 2016/08/17 22:42:26 size_t rather than unsigned, here.
vakh (use Gerrit instead) 2016/08/17 23:40:07 Done.
136 return DECODED_INTEGER_OVERFLOW_FAILURE; 149 (*out)[i] = FlipEndianness((*out)[i]);
Nathan Parker 2016/08/17 20:57:27 Wait, now I'm confused again. You're flipping, so
vakh (use Gerrit instead) 2016/08/17 22:10:41 I think the explanation goes like this. The follow
137 }
138
139 GetBytesFromUInt32(last_value.ValueOrDie(), bytes);
140 out->append(bytes, 4);
141 } 150 }
142 151
143 return DECODE_SUCCESS; 152 return DECODE_SUCCESS;
144 } 153 }
145 154
146 V4RiceDecoder::V4RiceDecoder(const int rice_parameter, 155 V4RiceDecoder::V4RiceDecoder(const int rice_parameter,
147 const int num_entries, 156 const int num_entries,
148 const std::string& encoded_data) 157 const std::string& encoded_data)
149 : rice_parameter_(rice_parameter), 158 : rice_parameter_(rice_parameter),
150 num_entries_(num_entries), 159 num_entries_(num_entries),
(...skipping 80 matching lines...) Expand 10 before | Expand all | Expand 10 after
231 return result; 240 return result;
232 } 241 }
233 } 242 }
234 243
235 unsigned int num_bits_left_in_current_word = 244 unsigned int num_bits_left_in_current_word =
236 kMaxBitIndex - current_word_bit_index_; 245 kMaxBitIndex - current_word_bit_index_;
237 if (num_bits_left_in_current_word >= num_requested_bits) { 246 if (num_bits_left_in_current_word >= num_requested_bits) {
238 // All the bits that we need are in |current_word_|. 247 // All the bits that we need are in |current_word_|.
239 GetBitsFromCurrentWord(num_requested_bits, x); 248 GetBitsFromCurrentWord(num_requested_bits, x);
240 } else { 249 } else {
241 // |current_word_| contains fewer bits than we need so we store the current 250 // |current_word_| contains fewer bits than we need so read the remaining
242 // value of |current_word_| in |lower|, then read in a new |current_word_|, 251 // bits from |current_word_| into |lower|, and then call GetNextBits on the
243 // and then pick the remaining bits from the new value of |current_word_|. 252 // remaining number of bits, which will read in a new word into
244 uint32_t lower = current_word_; 253 // |current_word_|.
245 // Bits we need after we read all of |current_word_| 254 uint32_t lower;
255 GetBitsFromCurrentWord(num_bits_left_in_current_word, &lower);
256
246 unsigned int num_bits_from_next_word = 257 unsigned int num_bits_from_next_word =
247 num_requested_bits - num_bits_left_in_current_word; 258 num_requested_bits - num_bits_left_in_current_word;
248 uint32_t upper; 259 uint32_t upper;
249 V4DecodeResult result = GetNextWord(&current_word_); 260 V4DecodeResult result = GetNextBits(num_bits_from_next_word, &upper);
250 if (result != DECODE_SUCCESS) { 261 if (result != DECODE_SUCCESS) {
251 return result; 262 return result;
252 } 263 }
253 GetBitsFromCurrentWord(num_bits_from_next_word, &upper); 264 *x = (upper << num_bits_left_in_current_word) | lower;
254 *x = (upper << (num_requested_bits - num_bits_from_next_word)) | lower;
255 } 265 }
256 return DECODE_SUCCESS; 266 return DECODE_SUCCESS;
257 } 267 }
258 268
259 void V4RiceDecoder::GetBitsFromCurrentWord(unsigned int num_requested_bits, 269 void V4RiceDecoder::GetBitsFromCurrentWord(unsigned int num_requested_bits,
Nathan Parker 2016/08/17 20:57:27 Could this just return x?
260 uint32_t* x) { 270 uint32_t* x) {
261 uint32_t mask = 0xFFFFFFFF >> (kMaxBitIndex - num_requested_bits); 271 uint32_t mask = 0xFFFFFFFF >> (kMaxBitIndex - num_requested_bits);
262 *x = current_word_ & mask; 272 *x = current_word_ & mask;
263 current_word_ = current_word_ >> num_requested_bits; 273 current_word_ = current_word_ >> num_requested_bits;
264 current_word_bit_index_ += num_requested_bits; 274 current_word_bit_index_ += num_requested_bits;
265 }; 275 };
266 276
267 std::string V4RiceDecoder::DebugString() const { 277 std::string V4RiceDecoder::DebugString() const {
268 // Calculates the total number of bits that we have read from the buffer, 278 // Calculates the total number of bits that we have read from the buffer,
269 // excluding those that have been read into current_word_ but not yet 279 // excluding those that have been read into current_word_ but not yet
270 // consumed byt GetNextBits(). 280 // consumed byt GetNextBits().
271 unsigned bits_read = (data_byte_index_ - sizeof(uint32_t)) * kBitsPerByte + 281 unsigned bits_read = (data_byte_index_ - sizeof(uint32_t)) * kBitsPerByte +
272 current_word_bit_index_; 282 current_word_bit_index_;
273 return base::StringPrintf( 283 return base::StringPrintf(
274 "bits_read: %x; current_word_: %x; data_byte_index_; %x, " 284 "bits_read: %x; current_word_: %x; data_byte_index_; %x, "
275 "current_word_bit_index_: %x; rice_parameter_: %x", 285 "current_word_bit_index_: %x; rice_parameter_: %x",
276 bits_read, current_word_, data_byte_index_, current_word_bit_index_, 286 bits_read, current_word_, data_byte_index_, current_word_bit_index_,
277 rice_parameter_); 287 rice_parameter_);
278 } 288 }
279 289
280 std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder) { 290 std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder) {
281 os << rice_decoder.DebugString(); 291 os << rice_decoder.DebugString();
282 return os; 292 return os;
283 } 293 }
284 294
285 } // namespace safe_browsing 295 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « components/safe_browsing_db/v4_rice.h ('k') | components/safe_browsing_db/v4_rice_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698