components/safe_browsing_db/v4_rice.cc - Issue 2228393003: PVer4: DecodeHashes needs to sort the output of the Rice decoder

Side by Side Diff: components/safe_browsing_db/v4_rice.cc

Issue 2228393003: PVer4: DecodeHashes needs to sort the output of the Rice decoder (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@01_checksum

Patch Set: Use a vector to decode and sort uint32_t values. re-interpret the memory of that vector as raw hash… Created 4 years, 4 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
1 // Copyright 2016 The Chromium Authors. All rights reserved.	1 // Copyright 2016 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

	5 #include <algorithm>

	6 #include <vector>

	7

5 #include "base/logging.h"	8 #include "base/logging.h"

6 #include "base/numerics/safe_math.h"	9 #include "base/numerics/safe_math.h"

7 #include "base/strings/stringprintf.h"	10 #include "base/strings/stringprintf.h"

8 #include "components/safe_browsing_db/v4_rice.h"	11 #include "components/safe_browsing_db/v4_rice.h"

9	12

10 using ::google::protobuf::RepeatedField;	13 using ::google::protobuf::RepeatedField;

11 using ::google::protobuf::int32;	14 using ::google::protobuf::int32;

	15 using ::google::protobuf::int64;

12	16

13 namespace safe_browsing {	17 namespace safe_browsing {

14	18

15 namespace {	19 namespace {

16	20

17 const int kBitsPerByte = 8;	21 const int kBitsPerByte = 8;

18 const unsigned int kMaxBitIndex = kBitsPerByte * sizeof(uint32_t);	22 const unsigned int kMaxBitIndex = kBitsPerByte * sizeof(uint32_t);

19	23

20 void GetBytesFromUInt32(uint32_t word, char* bytes) {	24 uint32_t FlipEndianness(uint32_t word) {

21 const size_t mask = 0xFF;	25 return (word >> 24) \| ((word << 8) & 0x00FF0000) \|

22 bytes[0] = (char)(word & mask);	26 ((word >> 8) & 0x0000FF00) \| (word << 24);

23 bytes[1] = (char)((word >> 8) & mask);

24 bytes[2] = (char)((word >> 16) & mask);

25 bytes[3] = (char)((word >> 24) & mask);

26 }	27 }

27	28

28 } // namespace	29 } // namespace

29	30

30 // static	31 // static

31 V4DecodeResult V4RiceDecoder::ValidateInput(const int32 rice_parameter,	32 V4DecodeResult V4RiceDecoder::ValidateInput(const int32 rice_parameter,

32 const int32 num_entries,	33 const int32 num_entries,

33 const std::string& encoded_data) {	34 const std::string& encoded_data) {

34 if (num_entries < 0) {	35 if (num_entries < 0) {

35 NOTREACHED();	36 NOTREACHED();

(...skipping 11 matching lines...) Expand all Loading...
47	48

48 if (encoded_data.empty()) {	49 if (encoded_data.empty()) {

49 NOTREACHED();	50 NOTREACHED();

50 return ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE;	51 return ENCODED_DATA_UNEXPECTED_EMPTY_FAILURE;

51 }	52 }

52	53

53 return DECODE_SUCCESS;	54 return DECODE_SUCCESS;

54 }	55 }

55	56

56 // static	57 // static

57 V4DecodeResult V4RiceDecoder::DecodeIntegers(const int32 first_value,	58 V4DecodeResult V4RiceDecoder::DecodeIntegers(const int64 first_value,

58 const int32 rice_parameter,	59 const int32 rice_parameter,

59 const int32 num_entries,	60 const int32 num_entries,

60 const std::string& encoded_data,	61 const std::string& encoded_data,

61 RepeatedField<int32>* out) {	62 RepeatedField<int32>* out) {

62 DCHECK(out);	63 DCHECK(out);

63	64

64 V4DecodeResult result =	65 V4DecodeResult result =

65 ValidateInput(rice_parameter, num_entries, encoded_data);	66 ValidateInput(rice_parameter, num_entries, encoded_data);

66 if (result != DECODE_SUCCESS) {	67 if (result != DECODE_SUCCESS) {

67 return result;	68 return result;

(...skipping 20 matching lines...) Expand all Loading...
88 return DECODED_INTEGER_OVERFLOW_FAILURE;	89 return DECODED_INTEGER_OVERFLOW_FAILURE;

89 }	90 }

90	91

91 out->Add(last_value.ValueOrDie());	92 out->Add(last_value.ValueOrDie());

92 }	93 }

93	94

94 return DECODE_SUCCESS;	95 return DECODE_SUCCESS;

95 }	96 }

96	97

97 // static	98 // static

98 V4DecodeResult V4RiceDecoder::DecodeBytes(const int32 first_value,	99 V4DecodeResult V4RiceDecoder::DecodeBytes(const int64 first_value,

99 const int32 rice_parameter,	100 const int32 rice_parameter,

100 const int32 num_entries,	101 const int32 num_entries,

101 const std::string& encoded_data,	102 const std::string& encoded_data,

102 std::string* out) {	103 std::vector<uint32_t>* out) {

103 DCHECK(out);	104 DCHECK(out);

104	105

105 V4DecodeResult result =	106 V4DecodeResult result =

106 ValidateInput(rice_parameter, num_entries, encoded_data);	107 ValidateInput(rice_parameter, num_entries, encoded_data);

107 if (result != DECODE_SUCCESS) {	108 if (result != DECODE_SUCCESS) {

108 return result;	109 return result;

109 }	110 }

110 out->reserve((num_entries + 1) * 4);	111 out->reserve((num_entries + 1));

111	112

112 // Cast to unsigned since we don't look at the sign bit as a sign bit.	113 base::CheckedNumeric<uint32_t> last_value(first_value);

113 // first_value should have been an unsigned to begin with but proto don't	114 out->push_back(FlipEndianness(last_value.ValueOrDie()));

114 // allow that.

115 uint32_t first_value_unsigned = static_cast<uint32_t>(first_value);

116 base::CheckedNumeric<uint32_t> last_value(first_value_unsigned);

117 char bytes[4];

118 GetBytesFromUInt32(last_value.ValueOrDie(), bytes);

119 out->append(bytes, 4);

120	115

121 if (num_entries == 0) {	116 if (num_entries > 0) {

122 return DECODE_SUCCESS;	117 V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);

	118 while (decoder.HasAnotherValue()) {

	119 uint32_t offset;

	120 result = decoder.GetNextValue(&offset);

	121 if (result != DECODE_SUCCESS) {

	122 return result;

	123 }

	124

	125 last_value += offset;

	126 if (!last_value.IsValid()) {

	127 NOTREACHED();

	128 return DECODED_INTEGER_OVERFLOW_FAILURE;

	129 }

	130

	131 // This flipping is mandated by the protocol. For details, see:

	132 // https://developers.google.com/safe-browsing/v4/compression
	Scott Hess - ex-Googler 2016/08/15 21:18:15 The protocol says the data is in network byte orde The protocol says the data is in network byte order, it doesn't say that it needs to be flipped. If the platform is little-endian, flipping would be incorrect. vakh (use Gerrit instead) 2016/08/16 00:23:03 My machine is little-endian and it does require by Show quoted text On 2016/08/15 21:18:15, Scott Hess wrote: > The protocol says the data is in network byte order, it doesn't say that it > needs to be flipped. If the platform is little-endian, flipping would be > incorrect. My machine is little-endian and it does require byte flipping. The encoded data (in the form of a string) passed by the server is read bit-by-bit to produce the Rice-decoded values. I believe, that part of the process is independent of the machine's endianness. Once that decoder has produced a uint32_t value, the only way to match the checksum is to flip those bits (at least on a little-endian machine). I am not sure about the expected behavior on big-endian machines so I'd let Noe comment on that. FWIW, I know that the SafeBrowsing code in Android does this bit flipping unconditionally.
	133 out->push_back(FlipEndianness(last_value.ValueOrDie()));

	134 }

123 }	135 }

	136 std::sort(out->begin(), out->end());
	Scott Hess - ex-Googler 2016/08/15 21:18:15 Whoa! I could see ASSERT_TRUE(std::is_sorted(...) Whoa! I could see ASSERT_TRUE(std::is_sorted(...)) here, as if these aren't already sorted, I think something is fubar. vakh (use Gerrit instead) 2016/08/16 00:23:03 These aren't already sorted. That's because of the Show quoted text On 2016/08/15 21:18:15, Scott Hess wrote: > Whoa! I could see ASSERT_TRUE(std::is_sorted(...)) here, as if these aren't > already sorted, I think something is fubar. These aren't already sorted. That's because of the bit flipping I do just before push_back.
124	137

125 V4RiceDecoder decoder(rice_parameter, num_entries, encoded_data);	138 // This flipping is done so that when the vector is interpreted as a string,

126 while (decoder.HasAnotherValue()) {	139 // the bytes are in the correct order.

127 uint32_t offset;	140 for (unsigned i = 0; i < out->size(); i++) {

128 result = decoder.GetNextValue(&offset);	141 (out)[i] = FlipEndianness((out)[i]);

129 if (result != DECODE_SUCCESS) {

130 return result;

131 }

132

133 last_value += offset;

134 if (!last_value.IsValid()) {

135 NOTREACHED();

136 return DECODED_INTEGER_OVERFLOW_FAILURE;

137 }

138

139 GetBytesFromUInt32(last_value.ValueOrDie(), bytes);

140 out->append(bytes, 4);

141 }	142 }

142	143

143 return DECODE_SUCCESS;	144 return DECODE_SUCCESS;

144 }	145 }

145	146

146 V4RiceDecoder::V4RiceDecoder(const int rice_parameter,	147 V4RiceDecoder::V4RiceDecoder(const int rice_parameter,

147 const int num_entries,	148 const int num_entries,

148 const std::string& encoded_data)	149 const std::string& encoded_data)

149 : rice_parameter_(rice_parameter),	150 : rice_parameter_(rice_parameter),

150 num_entries_(num_entries),	151 num_entries_(num_entries),

(...skipping 80 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
231 return result;	232 return result;

232 }	233 }

233 }	234 }

234	235

235 unsigned int num_bits_left_in_current_word =	236 unsigned int num_bits_left_in_current_word =

236 kMaxBitIndex - current_word_bit_index_;	237 kMaxBitIndex - current_word_bit_index_;

237 if (num_bits_left_in_current_word >= num_requested_bits) {	238 if (num_bits_left_in_current_word >= num_requested_bits) {

238 // All the bits that we need are in \|current_word_\|.	239 // All the bits that we need are in \|current_word_\|.

239 GetBitsFromCurrentWord(num_requested_bits, x);	240 GetBitsFromCurrentWord(num_requested_bits, x);

240 } else {	241 } else {

241 // \|current_word_\| contains fewer bits than we need so we store the current	242 // \|current_word_\| contains fewer bits than we need so read the remaining

242 // value of \|current_word_\| in \|lower\|, then read in a new \|current_word_\|,	243 // bits from \|current_word_\| into \|lower\|, and then call GetNextBits on the

243 // and then pick the remaining bits from the new value of \|current_word_\|.	244 // remaining number of bits, which will read in a new word into

244 uint32_t lower = current_word_;	245 // \|current_word_\|.

245 // Bits we need after we read all of \|current_word_\|	246 uint32_t lower;

	247 V4DecodeResult result = GetNextBits(num_bits_left_in_current_word, &lower);

	248 if (result != DECODE_SUCCESS) {

	249 return result;

	250 }

	251

246 unsigned int num_bits_from_next_word =	252 unsigned int num_bits_from_next_word =

247 num_requested_bits - num_bits_left_in_current_word;	253 num_requested_bits - num_bits_left_in_current_word;

248 uint32_t upper;	254 uint32_t upper;

249 V4DecodeResult result = GetNextWord(&current_word_);	255 result = GetNextBits(num_bits_from_next_word, &upper);

250 if (result != DECODE_SUCCESS) {	256 if (result != DECODE_SUCCESS) {

251 return result;	257 return result;

252 }	258 }

253 GetBitsFromCurrentWord(num_bits_from_next_word, &upper);	259 *x = (upper << num_bits_left_in_current_word) \| lower;

254 *x = (upper << (num_requested_bits - num_bits_from_next_word)) \| lower;

255 }	260 }

256 return DECODE_SUCCESS;	261 return DECODE_SUCCESS;

257 }	262 }

258	263

259 void V4RiceDecoder::GetBitsFromCurrentWord(unsigned int num_requested_bits,	264 void V4RiceDecoder::GetBitsFromCurrentWord(unsigned int num_requested_bits,

260 uint32_t* x) {	265 uint32_t* x) {

261 uint32_t mask = 0xFFFFFFFF >> (kMaxBitIndex - num_requested_bits);	266 uint32_t mask = 0xFFFFFFFF >> (kMaxBitIndex - num_requested_bits);

262 *x = current_word_ & mask;	267 *x = current_word_ & mask;

263 current_word_ = current_word_ >> num_requested_bits;	268 current_word_ = current_word_ >> num_requested_bits;

264 current_word_bit_index_ += num_requested_bits;	269 current_word_bit_index_ += num_requested_bits;

(...skipping 11 matching lines...) Expand all Loading...
276 bits_read, current_word_, data_byte_index_, current_word_bit_index_,	281 bits_read, current_word_, data_byte_index_, current_word_bit_index_,

277 rice_parameter_);	282 rice_parameter_);

278 }	283 }

279	284

280 std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder) {	285 std::ostream& operator<<(std::ostream& os, const V4RiceDecoder& rice_decoder) {

281 os << rice_decoder.DebugString();	286 os << rice_decoder.DebugString();

282 return os;	287 return os;

283 }	288 }

284	289

285 } // namespace safe_browsing	290 } // namespace safe_browsing

OLD	NEW

« no previous file with comments | « components/safe_browsing_db/v4_rice.h ('k') | components/safe_browsing_db/v4_rice_unittest.cc » ('j') | no next file with comments »