OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "sync/internal_api/public/base/unique_position.h" | 5 #include "sync/internal_api/public/base/unique_position.h" |
6 | 6 |
7 #include "base/basictypes.h" | 7 #include <limits> |
| 8 |
8 #include "base/logging.h" | 9 #include "base/logging.h" |
9 #include "base/rand_util.h" | 10 #include "base/rand_util.h" |
10 #include "base/stl_util.h" | 11 #include "base/stl_util.h" |
11 #include "base/strings/string_number_conversions.h" | 12 #include "base/strings/string_number_conversions.h" |
12 #include "sync/protocol/unique_position.pb.h" | 13 #include "sync/protocol/unique_position.pb.h" |
13 #include "third_party/zlib/zlib.h" | 14 #include "third_party/zlib/zlib.h" |
14 | 15 |
15 namespace syncer { | 16 namespace syncer { |
16 | 17 |
17 const size_t UniquePosition::kSuffixLength = 28; | 18 const size_t UniquePosition::kSuffixLength = 28; |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
77 << " did not match specified length " << proto.uncompressed_length(); | 78 << " did not match specified length " << proto.uncompressed_length(); |
78 return UniquePosition::CreateInvalid(); | 79 return UniquePosition::CreateInvalid(); |
79 } | 80 } |
80 return UniquePosition(Compress(un_gzipped)); | 81 return UniquePosition(Compress(un_gzipped)); |
81 } else { | 82 } else { |
82 return UniquePosition::CreateInvalid(); | 83 return UniquePosition::CreateInvalid(); |
83 } | 84 } |
84 } | 85 } |
85 | 86 |
86 // static. | 87 // static. |
87 UniquePosition UniquePosition::FromInt64( | 88 UniquePosition UniquePosition::FromInt64(int64_t x, const std::string& suffix) { |
88 int64 x, const std::string& suffix) { | 89 uint64_t y = static_cast<uint64_t>(x); |
89 uint64 y = static_cast<uint64>(x); | |
90 y ^= 0x8000000000000000ULL; // Make it non-negative. | 90 y ^= 0x8000000000000000ULL; // Make it non-negative. |
91 std::string bytes(8, 0); | 91 std::string bytes(8, 0); |
92 for (int i = 7; i >= 0; --i) { | 92 for (int i = 7; i >= 0; --i) { |
93 bytes[i] = static_cast<uint8>(y); | 93 bytes[i] = static_cast<uint8_t>(y); |
94 y >>= 8; | 94 y >>= 8; |
95 } | 95 } |
96 return UniquePosition(bytes + suffix, suffix); | 96 return UniquePosition(bytes + suffix, suffix); |
97 } | 97 } |
98 | 98 |
99 // static. | 99 // static. |
100 UniquePosition UniquePosition::InitialPosition( | 100 UniquePosition UniquePosition::InitialPosition( |
101 const std::string& suffix) { | 101 const std::string& suffix) { |
102 DCHECK(IsValidSuffix(suffix)); | 102 DCHECK(IsValidSuffix(suffix)); |
103 return UniquePosition(suffix, suffix); | 103 return UniquePosition(suffix, suffix); |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
169 // obsolete for a long time. See the proto definition for details. | 169 // obsolete for a long time. See the proto definition for details. |
170 } | 170 } |
171 | 171 |
172 void UniquePosition::SerializeToString(std::string* blob) const { | 172 void UniquePosition::SerializeToString(std::string* blob) const { |
173 DCHECK(blob); | 173 DCHECK(blob); |
174 sync_pb::UniquePosition proto; | 174 sync_pb::UniquePosition proto; |
175 ToProto(&proto); | 175 ToProto(&proto); |
176 proto.SerializeToString(blob); | 176 proto.SerializeToString(blob); |
177 } | 177 } |
178 | 178 |
179 int64 UniquePosition::ToInt64() const { | 179 int64_t UniquePosition::ToInt64() const { |
180 uint64 y = 0; | 180 uint64_t y = 0; |
181 const std::string& s = Uncompress(compressed_); | 181 const std::string& s = Uncompress(compressed_); |
182 size_t l = sizeof(int64); | 182 size_t l = sizeof(int64_t); |
183 if (s.length() < l) { | 183 if (s.length() < l) { |
184 NOTREACHED(); | 184 NOTREACHED(); |
185 l = s.length(); | 185 l = s.length(); |
186 } | 186 } |
187 for (size_t i = 0; i < l; ++i) { | 187 for (size_t i = 0; i < l; ++i) { |
188 const uint8 byte = s[l - i - 1]; | 188 const uint8_t byte = s[l - i - 1]; |
189 y |= static_cast<uint64>(byte) << (i * 8); | 189 y |= static_cast<uint64_t>(byte) << (i * 8); |
190 } | 190 } |
191 y ^= 0x8000000000000000ULL; | 191 y ^= 0x8000000000000000ULL; |
192 // This is technically implementation-defined if y > INT64_MAX, so | 192 // This is technically implementation-defined if y > INT64_MAX, so |
193 // we're assuming that we're on a twos-complement machine. | 193 // we're assuming that we're on a twos-complement machine. |
194 return static_cast<int64>(y); | 194 return static_cast<int64_t>(y); |
195 } | 195 } |
196 | 196 |
197 bool UniquePosition::IsValid() const { | 197 bool UniquePosition::IsValid() const { |
198 return is_valid_; | 198 return is_valid_; |
199 } | 199 } |
200 | 200 |
201 std::string UniquePosition::ToDebugString() const { | 201 std::string UniquePosition::ToDebugString() const { |
202 const std::string bytes = Uncompress(compressed_); | 202 const std::string bytes = Uncompress(compressed_); |
203 if (bytes.empty()) | 203 if (bytes.empty()) |
204 return std::string("INVALID[]"); | 204 return std::string("INVALID[]"); |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
240 // Prepend zeroes so the result has as many zero digits as |reference|. | 240 // Prepend zeroes so the result has as many zero digits as |reference|. |
241 return std::string(ref_zeroes - suffix_zeroes, '\0'); | 241 return std::string(ref_zeroes - suffix_zeroes, '\0'); |
242 } else if (suffix_zeroes > 1) { | 242 } else if (suffix_zeroes > 1) { |
243 // Prepend zeroes so the result has one more zero digit than |reference|. | 243 // Prepend zeroes so the result has one more zero digit than |reference|. |
244 // We could also take the "else" branch below, but taking this branch will | 244 // We could also take the "else" branch below, but taking this branch will |
245 // give us a smaller result. | 245 // give us a smaller result. |
246 return std::string(ref_zeroes - suffix_zeroes + 1, '\0'); | 246 return std::string(ref_zeroes - suffix_zeroes + 1, '\0'); |
247 } else { | 247 } else { |
248 // Prepend zeroes to match those in the |reference|, then something smaller | 248 // Prepend zeroes to match those in the |reference|, then something smaller |
249 // than the first non-zero digit in |reference|. | 249 // than the first non-zero digit in |reference|. |
250 char lt_digit = static_cast<uint8>(reference[ref_zeroes])/2; | 250 char lt_digit = static_cast<uint8_t>(reference[ref_zeroes]) / 2; |
251 return std::string(ref_zeroes, '\0') + lt_digit; | 251 return std::string(ref_zeroes, '\0') + lt_digit; |
252 } | 252 } |
253 } | 253 } |
254 | 254 |
255 // static | 255 // static |
256 std::string UniquePosition::FindGreaterWithSuffix( | 256 std::string UniquePosition::FindGreaterWithSuffix( |
257 const std::string& reference, | 257 const std::string& reference, |
258 const std::string& suffix) { | 258 const std::string& suffix) { |
259 size_t ref_FFs = reference.find_first_not_of(kuint8max); | 259 size_t ref_FFs = |
260 size_t suffix_FFs = suffix.find_first_not_of(kuint8max); | 260 reference.find_first_not_of(std::numeric_limits<uint8_t>::max()); |
| 261 size_t suffix_FFs = |
| 262 suffix.find_first_not_of(std::numeric_limits<uint8_t>::max()); |
261 | 263 |
262 if (ref_FFs == std::string::npos) { | 264 if (ref_FFs == std::string::npos) { |
263 ref_FFs = reference.length(); | 265 ref_FFs = reference.length(); |
264 } | 266 } |
265 if (suffix_FFs == std::string::npos) { | 267 if (suffix_FFs == std::string::npos) { |
266 suffix_FFs = suffix.length(); | 268 suffix_FFs = suffix.length(); |
267 } | 269 } |
268 | 270 |
269 if (suffix_FFs > ref_FFs) { | 271 if (suffix_FFs > ref_FFs) { |
270 // Implies suffix > reference. | 272 // Implies suffix > reference. |
271 return std::string(); | 273 return std::string(); |
272 } | 274 } |
273 | 275 |
274 if (suffix.substr(suffix_FFs) > reference.substr(ref_FFs)) { | 276 if (suffix.substr(suffix_FFs) > reference.substr(ref_FFs)) { |
275 // Prepend FF digits to match those in |reference|. | 277 // Prepend FF digits to match those in |reference|. |
276 return std::string(ref_FFs - suffix_FFs, kuint8max); | 278 return std::string(ref_FFs - suffix_FFs, |
| 279 std::numeric_limits<uint8_t>::max()); |
277 } else if (suffix_FFs > 1) { | 280 } else if (suffix_FFs > 1) { |
278 // Prepend enough leading FF digits so result has one more of them than | 281 // Prepend enough leading FF digits so result has one more of them than |
279 // |reference| does. We could also take the "else" branch below, but this | 282 // |reference| does. We could also take the "else" branch below, but this |
280 // gives us a smaller result. | 283 // gives us a smaller result. |
281 return std::string(ref_FFs - suffix_FFs + 1, kuint8max); | 284 return std::string(ref_FFs - suffix_FFs + 1, |
| 285 std::numeric_limits<uint8_t>::max()); |
282 } else { | 286 } else { |
283 // Prepend FF digits to match those in |reference|, then something larger | 287 // Prepend FF digits to match those in |reference|, then something larger |
284 // than the first non-FF digit in |reference|. | 288 // than the first non-FF digit in |reference|. |
285 char gt_digit = static_cast<uint8>(reference[ref_FFs]) + | 289 char gt_digit = static_cast<uint8_t>(reference[ref_FFs]) + |
286 (kuint8max - static_cast<uint8>(reference[ref_FFs]) + 1) / 2; | 290 (std::numeric_limits<uint8_t>::max() - |
287 return std::string(ref_FFs, kuint8max) + gt_digit; | 291 static_cast<uint8_t>(reference[ref_FFs]) + 1) / |
| 292 2; |
| 293 return std::string(ref_FFs, std::numeric_limits<uint8_t>::max()) + gt_digit; |
288 } | 294 } |
289 } | 295 } |
290 | 296 |
291 // static | 297 // static |
292 std::string UniquePosition::FindBetweenWithSuffix( | 298 std::string UniquePosition::FindBetweenWithSuffix( |
293 const std::string& before, | 299 const std::string& before, |
294 const std::string& after, | 300 const std::string& after, |
295 const std::string& suffix) { | 301 const std::string& suffix) { |
296 DCHECK(IsValidSuffix(suffix)); | 302 DCHECK(IsValidSuffix(suffix)); |
297 DCHECK_NE(before, after); | 303 DCHECK_NE(before, after); |
298 DCHECK_LT(before, after); | 304 DCHECK_LT(before, after); |
299 | 305 |
300 std::string mid; | 306 std::string mid; |
301 | 307 |
302 // Sometimes our suffix puts us where we want to be. | 308 // Sometimes our suffix puts us where we want to be. |
303 if (before < suffix && suffix < after) { | 309 if (before < suffix && suffix < after) { |
304 return std::string(); | 310 return std::string(); |
305 } | 311 } |
306 | 312 |
307 size_t i = 0; | 313 size_t i = 0; |
308 for ( ; i < std::min(before.length(), after.length()); ++i) { | 314 for ( ; i < std::min(before.length(), after.length()); ++i) { |
309 uint8 a_digit = before[i]; | 315 uint8_t a_digit = before[i]; |
310 uint8 b_digit = after[i]; | 316 uint8_t b_digit = after[i]; |
311 | 317 |
312 if (b_digit - a_digit >= 2) { | 318 if (b_digit - a_digit >= 2) { |
313 mid.push_back(a_digit + (b_digit - a_digit)/2); | 319 mid.push_back(a_digit + (b_digit - a_digit)/2); |
314 return mid; | 320 return mid; |
315 } else if (a_digit == b_digit) { | 321 } else if (a_digit == b_digit) { |
316 mid.push_back(a_digit); | 322 mid.push_back(a_digit); |
317 | 323 |
318 // Both strings are equal so far. Will appending the suffix at this point | 324 // Both strings are equal so far. Will appending the suffix at this point |
319 // give us the comparison we're looking for? | 325 // give us the comparison we're looking for? |
320 if (before.substr(i+1) < suffix && suffix < after.substr(i+1)) { | 326 if (before.substr(i+1) < suffix && suffix < after.substr(i+1)) { |
(...skipping 136 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
457 // The key insight is that the comparison value of a repeated character block | 463 // The key insight is that the comparison value of a repeated character block |
458 // depends on the value of the character that follows it. If the character | 464 // depends on the value of the character that follows it. If the character |
459 // follows the repeated character has a value greater than the repeated | 465 // follows the repeated character has a value greater than the repeated |
460 // character itself, then a shorter run length should translate to a higher | 466 // character itself, then a shorter run length should translate to a higher |
461 // comparison value. Therefore, we encode its count using the low encoding. | 467 // comparison value. Therefore, we encode its count using the low encoding. |
462 // Similarly, if the following character is lower, we use the high encoding. | 468 // Similarly, if the following character is lower, we use the high encoding. |
463 | 469 |
464 namespace { | 470 namespace { |
465 | 471 |
466 // Appends an encoded run length to |output_str|. | 472 // Appends an encoded run length to |output_str|. |
467 static void WriteEncodedRunLength(uint32 length, | 473 static void WriteEncodedRunLength(uint32_t length, |
468 bool high_encoding, | 474 bool high_encoding, |
469 std::string* output_str) { | 475 std::string* output_str) { |
470 CHECK_GE(length, 4U); | 476 CHECK_GE(length, 4U); |
471 CHECK_LT(length, 0x80000000); | 477 CHECK_LT(length, 0x80000000); |
472 | 478 |
473 // Step 1: Invert the count, if necessary, to account for the following digit. | 479 // Step 1: Invert the count, if necessary, to account for the following digit. |
474 uint32 encoded_length; | 480 uint32_t encoded_length; |
475 if (high_encoding) { | 481 if (high_encoding) { |
476 encoded_length = 0xffffffff - length; | 482 encoded_length = 0xffffffff - length; |
477 } else { | 483 } else { |
478 encoded_length = length; | 484 encoded_length = length; |
479 } | 485 } |
480 | 486 |
481 // Step 2: Write it as big-endian so it compares correctly with memcmp(3). | 487 // Step 2: Write it as big-endian so it compares correctly with memcmp(3). |
482 output_str->append(1, 0xff & (encoded_length >> 24U)); | 488 output_str->append(1, 0xff & (encoded_length >> 24U)); |
483 output_str->append(1, 0xff & (encoded_length >> 16U)); | 489 output_str->append(1, 0xff & (encoded_length >> 16U)); |
484 output_str->append(1, 0xff & (encoded_length >> 8U)); | 490 output_str->append(1, 0xff & (encoded_length >> 8U)); |
485 output_str->append(1, 0xff & (encoded_length >> 0U)); | 491 output_str->append(1, 0xff & (encoded_length >> 0U)); |
486 } | 492 } |
487 | 493 |
488 // Reads an encoded run length for |str| at position |i|. | 494 // Reads an encoded run length for |str| at position |i|. |
489 static uint32 ReadEncodedRunLength(const std::string& str, size_t i) { | 495 static uint32_t ReadEncodedRunLength(const std::string& str, size_t i) { |
490 DCHECK_LE(i + 4, str.length()); | 496 DCHECK_LE(i + 4, str.length()); |
491 | 497 |
492 // Step 1: Extract the big-endian count. | 498 // Step 1: Extract the big-endian count. |
493 uint32 encoded_length = | 499 uint32_t encoded_length = |
494 ((uint8)(str[i+3]) << 0) | | 500 ((uint8_t)(str[i + 3]) << 0) | ((uint8_t)(str[i + 2]) << 8) | |
495 ((uint8)(str[i+2]) << 8) | | 501 ((uint8_t)(str[i + 1]) << 16) | ((uint8_t)(str[i + 0]) << 24); |
496 ((uint8)(str[i+1]) << 16) | | |
497 ((uint8)(str[i+0]) << 24); | |
498 | 502 |
499 // Step 2: If this was an inverted count, un-invert it. | 503 // Step 2: If this was an inverted count, un-invert it. |
500 uint32 length; | 504 uint32_t length; |
501 if (encoded_length & 0x80000000) { | 505 if (encoded_length & 0x80000000) { |
502 length = 0xffffffff - encoded_length; | 506 length = 0xffffffff - encoded_length; |
503 } else { | 507 } else { |
504 length = encoded_length; | 508 length = encoded_length; |
505 } | 509 } |
506 | 510 |
507 return length; | 511 return length; |
508 } | 512 } |
509 | 513 |
510 // A series of four identical chars at the beginning of a block indicates | 514 // A series of four identical chars at the beginning of a block indicates |
(...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
552 const size_t runs_until = str.find_first_not_of(rep_digit, i+4); | 556 const size_t runs_until = str.find_first_not_of(rep_digit, i+4); |
553 | 557 |
554 // Handle the 'runs until end' special case specially. | 558 // Handle the 'runs until end' special case specially. |
555 size_t run_length; | 559 size_t run_length; |
556 bool encode_high; // True if the next byte is greater than |rep_digit|. | 560 bool encode_high; // True if the next byte is greater than |rep_digit|. |
557 if (runs_until == std::string::npos) { | 561 if (runs_until == std::string::npos) { |
558 run_length = str.length() - i; | 562 run_length = str.length() - i; |
559 encode_high = false; | 563 encode_high = false; |
560 } else { | 564 } else { |
561 run_length = runs_until - i; | 565 run_length = runs_until - i; |
562 encode_high = static_cast<uint8>(str[runs_until]) > | 566 encode_high = static_cast<uint8_t>(str[runs_until]) > |
563 static_cast<uint8>(rep_digit); | 567 static_cast<uint8_t>(rep_digit); |
564 } | 568 } |
565 DCHECK_LT(run_length, static_cast<size_t>(kint32max)) | 569 DCHECK_LT(run_length, |
| 570 static_cast<size_t>(std::numeric_limits<int32_t>::max())) |
566 << "This implementation can't encode run-lengths greater than 2^31."; | 571 << "This implementation can't encode run-lengths greater than 2^31."; |
567 | 572 |
568 WriteEncodedRunLength(run_length, encode_high, &output); | 573 WriteEncodedRunLength(run_length, encode_high, &output); |
569 i += run_length; // Jump forward by the size of the run length. | 574 i += run_length; // Jump forward by the size of the run length. |
570 } else { | 575 } else { |
571 // Output up to eight bytes without any encoding. | 576 // Output up to eight bytes without any encoding. |
572 const size_t len = std::min(static_cast<size_t>(8), str.length() - i); | 577 const size_t len = std::min(static_cast<size_t>(8), str.length() - i); |
573 output.append(str, i, len); | 578 output.append(str, i, len); |
574 i += len; // Jump forward by the amount of input consumed (usually 8). | 579 i += len; // Jump forward by the amount of input consumed (usually 8). |
575 } | 580 } |
576 } | 581 } |
577 | 582 |
578 return output; | 583 return output; |
579 } | 584 } |
580 | 585 |
581 // static | 586 // static |
582 // Uncompresses strings that were compresed with UniquePosition::Compress. | 587 // Uncompresses strings that were compresed with UniquePosition::Compress. |
583 std::string UniquePosition::Uncompress(const std::string& str) { | 588 std::string UniquePosition::Uncompress(const std::string& str) { |
584 std::string output; | 589 std::string output; |
585 size_t i = 0; | 590 size_t i = 0; |
586 // Iterate through the compressed string one block at a time. | 591 // Iterate through the compressed string one block at a time. |
587 for (i = 0; i + 8 <= str.length(); i += 8) { | 592 for (i = 0; i + 8 <= str.length(); i += 8) { |
588 if (IsRepeatedCharPrefix(str, i)) { | 593 if (IsRepeatedCharPrefix(str, i)) { |
589 // Found a repeated character block. Expand it. | 594 // Found a repeated character block. Expand it. |
590 const char rep_digit = str[i]; | 595 const char rep_digit = str[i]; |
591 uint32 length = ReadEncodedRunLength(str, i+4); | 596 uint32_t length = ReadEncodedRunLength(str, i + 4); |
592 output.append(length, rep_digit); | 597 output.append(length, rep_digit); |
593 } else { | 598 } else { |
594 // Found a regular block. Copy it. | 599 // Found a regular block. Copy it. |
595 output.append(str, i, 8); | 600 output.append(str, i, 8); |
596 } | 601 } |
597 } | 602 } |
598 // Copy the remaining bytes that were too small to form a block. | 603 // Copy the remaining bytes that were too small to form a block. |
599 output.append(str, i, std::string::npos); | 604 output.append(str, i, std::string::npos); |
600 return output; | 605 return output; |
601 } | 606 } |
602 | 607 |
603 bool UniquePosition::IsValidCompressed(const std::string& str) { | 608 bool UniquePosition::IsValidCompressed(const std::string& str) { |
604 for (size_t i = 0; i + 8 <= str.length(); i += 8) { | 609 for (size_t i = 0; i + 8 <= str.length(); i += 8) { |
605 if (IsRepeatedCharPrefix(str, i)) { | 610 if (IsRepeatedCharPrefix(str, i)) { |
606 uint32 count = ReadEncodedRunLength(str, i+4); | 611 uint32_t count = ReadEncodedRunLength(str, i + 4); |
607 if (count < 4) { | 612 if (count < 4) { |
608 // A repeated character block should at least represent the four | 613 // A repeated character block should at least represent the four |
609 // characters that started it. | 614 // characters that started it. |
610 return false; | 615 return false; |
611 } | 616 } |
612 if (str[i] == str[i+4]) { | 617 if (str[i] == str[i+4]) { |
613 // Does the next digit after a count match the repeated character? Then | 618 // Does the next digit after a count match the repeated character? Then |
614 // this is not the highest possible count. | 619 // this is not the highest possible count. |
615 return false; | 620 return false; |
616 } | 621 } |
617 } | 622 } |
618 } | 623 } |
619 // We don't bother looking for the existence or checking the validity of | 624 // We don't bother looking for the existence or checking the validity of |
620 // any partial blocks. There's no way they could be invalid anyway. | 625 // any partial blocks. There's no way they could be invalid anyway. |
621 return true; | 626 return true; |
622 } | 627 } |
623 | 628 |
624 } // namespace syncer | 629 } // namespace syncer |
OLD | NEW |