| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "sync/internal_api/public/base/unique_position.h" | 5 #include "components/sync/base/unique_position.h" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 #include <stdint.h> | 8 #include <stdint.h> |
| 9 | 9 |
| 10 #include <algorithm> | 10 #include <algorithm> |
| 11 #include <limits> | 11 #include <limits> |
| 12 | 12 |
| 13 #include "base/logging.h" | 13 #include "base/logging.h" |
| 14 #include "base/rand_util.h" | 14 #include "base/rand_util.h" |
| 15 #include "base/stl_util.h" | 15 #include "base/stl_util.h" |
| 16 #include "base/strings/string_number_conversions.h" | 16 #include "base/strings/string_number_conversions.h" |
| 17 #include "sync/protocol/unique_position.pb.h" | 17 #include "components/sync/protocol/unique_position.pb.h" |
| 18 #include "third_party/zlib/zlib.h" | 18 #include "third_party/zlib/zlib.h" |
| 19 | 19 |
| 20 namespace syncer { | 20 namespace syncer { |
| 21 | 21 |
| 22 const size_t UniquePosition::kSuffixLength = 28; | 22 const size_t UniquePosition::kSuffixLength = 28; |
| 23 const size_t UniquePosition::kCompressBytesThreshold = 128; | 23 const size_t UniquePosition::kCompressBytesThreshold = 128; |
| 24 | 24 |
| 25 // static. | 25 // static. |
| 26 bool UniquePosition::IsValidSuffix(const std::string& suffix) { | 26 bool UniquePosition::IsValidSuffix(const std::string& suffix) { |
| 27 // The suffix must be exactly the specified length, otherwise unique suffixes | 27 // The suffix must be exactly the specified length, otherwise unique suffixes |
| 28 // are not sufficient to guarantee unique positions (because prefix + suffix | 28 // are not sufficient to guarantee unique positions (because prefix + suffix |
| 29 // == p + refixsuffix). | 29 // == p + refixsuffix). |
| 30 return suffix.length() == kSuffixLength | 30 return suffix.length() == kSuffixLength && suffix[kSuffixLength - 1] != 0; |
| 31 && suffix[kSuffixLength-1] != 0; | |
| 32 } | 31 } |
| 33 | 32 |
| 34 // static. | 33 // static. |
| 35 bool UniquePosition::IsValidBytes(const std::string& bytes) { | 34 bool UniquePosition::IsValidBytes(const std::string& bytes) { |
| 36 // The first condition ensures that our suffix uniqueness is sufficient to | 35 // The first condition ensures that our suffix uniqueness is sufficient to |
| 37 // guarantee position uniqueness. Otherwise, it's possible the end of some | 36 // guarantee position uniqueness. Otherwise, it's possible the end of some |
| 38 // prefix + some short suffix == some long suffix. | 37 // prefix + some short suffix == some long suffix. |
| 39 // The second condition ensures that FindSmallerWithSuffix can always return a | 38 // The second condition ensures that FindSmallerWithSuffix can always return a |
| 40 // result. | 39 // result. |
| 41 return bytes.length() >= kSuffixLength | 40 return bytes.length() >= kSuffixLength && bytes[bytes.length() - 1] != 0; |
| 42 && bytes[bytes.length()-1] != 0; | |
| 43 } | 41 } |
| 44 | 42 |
| 45 // static. | 43 // static. |
| 46 std::string UniquePosition::RandomSuffix() { | 44 std::string UniquePosition::RandomSuffix() { |
| 47 // Users random data for all but the last byte. The last byte must not be | 45 // Users random data for all but the last byte. The last byte must not be |
| 48 // zero. We arbitrarily set it to 0x7f. | 46 // zero. We arbitrarily set it to 0x7f. |
| 49 return base::RandBytesAsString(kSuffixLength - 1) + "\x7f"; | 47 return base::RandBytesAsString(kSuffixLength - 1) + "\x7f"; |
| 50 } | 48 } |
| 51 | 49 |
| 52 // static. | 50 // static. |
| (...skipping 17 matching lines...) Expand all Loading... |
| 70 int result = uncompress( | 68 int result = uncompress( |
| 71 reinterpret_cast<Bytef*>(string_as_array(&un_gzipped)), | 69 reinterpret_cast<Bytef*>(string_as_array(&un_gzipped)), |
| 72 &uncompressed_len, | 70 &uncompressed_len, |
| 73 reinterpret_cast<const Bytef*>(proto.compressed_value().data()), | 71 reinterpret_cast<const Bytef*>(proto.compressed_value().data()), |
| 74 proto.compressed_value().size()); | 72 proto.compressed_value().size()); |
| 75 if (result != Z_OK) { | 73 if (result != Z_OK) { |
| 76 DLOG(ERROR) << "Unzip failed " << result; | 74 DLOG(ERROR) << "Unzip failed " << result; |
| 77 return UniquePosition::CreateInvalid(); | 75 return UniquePosition::CreateInvalid(); |
| 78 } | 76 } |
| 79 if (uncompressed_len != proto.uncompressed_length()) { | 77 if (uncompressed_len != proto.uncompressed_length()) { |
| 80 DLOG(ERROR) | 78 DLOG(ERROR) << "Uncompressed length " << uncompressed_len |
| 81 << "Uncompressed length " << uncompressed_len | 79 << " did not match specified length " |
| 82 << " did not match specified length " << proto.uncompressed_length(); | 80 << proto.uncompressed_length(); |
| 83 return UniquePosition::CreateInvalid(); | 81 return UniquePosition::CreateInvalid(); |
| 84 } | 82 } |
| 85 return UniquePosition(Compress(un_gzipped)); | 83 return UniquePosition(Compress(un_gzipped)); |
| 86 } else { | 84 } else { |
| 87 return UniquePosition::CreateInvalid(); | 85 return UniquePosition::CreateInvalid(); |
| 88 } | 86 } |
| 89 } | 87 } |
| 90 | 88 |
| 91 // static. | 89 // static. |
| 92 UniquePosition UniquePosition::FromInt64(int64_t x, const std::string& suffix) { | 90 UniquePosition UniquePosition::FromInt64(int64_t x, const std::string& suffix) { |
| 93 uint64_t y = static_cast<uint64_t>(x); | 91 uint64_t y = static_cast<uint64_t>(x); |
| 94 y ^= 0x8000000000000000ULL; // Make it non-negative. | 92 y ^= 0x8000000000000000ULL; // Make it non-negative. |
| 95 std::string bytes(8, 0); | 93 std::string bytes(8, 0); |
| 96 for (int i = 7; i >= 0; --i) { | 94 for (int i = 7; i >= 0; --i) { |
| 97 bytes[i] = static_cast<uint8_t>(y); | 95 bytes[i] = static_cast<uint8_t>(y); |
| 98 y >>= 8; | 96 y >>= 8; |
| 99 } | 97 } |
| 100 return UniquePosition(bytes + suffix, suffix); | 98 return UniquePosition(bytes + suffix, suffix); |
| 101 } | 99 } |
| 102 | 100 |
| 103 // static. | 101 // static. |
| 104 UniquePosition UniquePosition::InitialPosition( | 102 UniquePosition UniquePosition::InitialPosition(const std::string& suffix) { |
| 105 const std::string& suffix) { | |
| 106 DCHECK(IsValidSuffix(suffix)); | 103 DCHECK(IsValidSuffix(suffix)); |
| 107 return UniquePosition(suffix, suffix); | 104 return UniquePosition(suffix, suffix); |
| 108 } | 105 } |
| 109 | 106 |
| 110 // static. | 107 // static. |
| 111 UniquePosition UniquePosition::Before( | 108 UniquePosition UniquePosition::Before(const UniquePosition& x, |
| 112 const UniquePosition& x, | 109 const std::string& suffix) { |
| 113 const std::string& suffix) { | |
| 114 DCHECK(IsValidSuffix(suffix)); | 110 DCHECK(IsValidSuffix(suffix)); |
| 115 DCHECK(x.IsValid()); | 111 DCHECK(x.IsValid()); |
| 116 const std::string& before = FindSmallerWithSuffix( | 112 const std::string& before = |
| 117 Uncompress(x.compressed_), suffix); | 113 FindSmallerWithSuffix(Uncompress(x.compressed_), suffix); |
| 118 return UniquePosition(before + suffix, suffix); | 114 return UniquePosition(before + suffix, suffix); |
| 119 } | 115 } |
| 120 | 116 |
| 121 // static. | 117 // static. |
| 122 UniquePosition UniquePosition::After( | 118 UniquePosition UniquePosition::After(const UniquePosition& x, |
| 123 const UniquePosition& x, | 119 const std::string& suffix) { |
| 124 const std::string& suffix) { | |
| 125 DCHECK(IsValidSuffix(suffix)); | 120 DCHECK(IsValidSuffix(suffix)); |
| 126 DCHECK(x.IsValid()); | 121 DCHECK(x.IsValid()); |
| 127 const std::string& after = FindGreaterWithSuffix( | 122 const std::string& after = |
| 128 Uncompress(x.compressed_), suffix); | 123 FindGreaterWithSuffix(Uncompress(x.compressed_), suffix); |
| 129 return UniquePosition(after + suffix, suffix); | 124 return UniquePosition(after + suffix, suffix); |
| 130 } | 125 } |
| 131 | 126 |
| 132 // static. | 127 // static. |
| 133 UniquePosition UniquePosition::Between( | 128 UniquePosition UniquePosition::Between(const UniquePosition& before, |
| 134 const UniquePosition& before, | 129 const UniquePosition& after, |
| 135 const UniquePosition& after, | 130 const std::string& suffix) { |
| 136 const std::string& suffix) { | |
| 137 DCHECK(before.IsValid()); | 131 DCHECK(before.IsValid()); |
| 138 DCHECK(after.IsValid()); | 132 DCHECK(after.IsValid()); |
| 139 DCHECK(before.LessThan(after)); | 133 DCHECK(before.LessThan(after)); |
| 140 DCHECK(IsValidSuffix(suffix)); | 134 DCHECK(IsValidSuffix(suffix)); |
| 141 const std::string& mid = FindBetweenWithSuffix( | 135 const std::string& mid = FindBetweenWithSuffix( |
| 142 Uncompress(before.compressed_), | 136 Uncompress(before.compressed_), Uncompress(after.compressed_), suffix); |
| 143 Uncompress(after.compressed_), | |
| 144 suffix); | |
| 145 return UniquePosition(mid + suffix, suffix); | 137 return UniquePosition(mid + suffix, suffix); |
| 146 } | 138 } |
| 147 | 139 |
| 148 UniquePosition::UniquePosition() : is_valid_(false) {} | 140 UniquePosition::UniquePosition() : is_valid_(false) {} |
| 149 | 141 |
| 150 bool UniquePosition::LessThan(const UniquePosition& other) const { | 142 bool UniquePosition::LessThan(const UniquePosition& other) const { |
| 151 DCHECK(this->IsValid()); | 143 DCHECK(this->IsValid()); |
| 152 DCHECK(other.IsValid()); | 144 DCHECK(other.IsValid()); |
| 153 | 145 |
| 154 return compressed_ < other.compressed_; | 146 return compressed_ < other.compressed_; |
| (...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 217 debug_string.append(", compressed: " + compressed_string); | 209 debug_string.append(", compressed: " + compressed_string); |
| 218 return debug_string; | 210 return debug_string; |
| 219 } | 211 } |
| 220 | 212 |
| 221 std::string UniquePosition::GetSuffixForTest() const { | 213 std::string UniquePosition::GetSuffixForTest() const { |
| 222 const std::string bytes = Uncompress(compressed_); | 214 const std::string bytes = Uncompress(compressed_); |
| 223 const size_t prefix_len = bytes.length() - kSuffixLength; | 215 const size_t prefix_len = bytes.length() - kSuffixLength; |
| 224 return bytes.substr(prefix_len, std::string::npos); | 216 return bytes.substr(prefix_len, std::string::npos); |
| 225 } | 217 } |
| 226 | 218 |
| 227 std::string UniquePosition::FindSmallerWithSuffix( | 219 std::string UniquePosition::FindSmallerWithSuffix(const std::string& reference, |
| 228 const std::string& reference, | 220 const std::string& suffix) { |
| 229 const std::string& suffix) { | |
| 230 size_t ref_zeroes = reference.find_first_not_of('\0'); | 221 size_t ref_zeroes = reference.find_first_not_of('\0'); |
| 231 size_t suffix_zeroes = suffix.find_first_not_of('\0'); | 222 size_t suffix_zeroes = suffix.find_first_not_of('\0'); |
| 232 | 223 |
| 233 // Neither of our inputs are allowed to have trailing zeroes, so the following | 224 // Neither of our inputs are allowed to have trailing zeroes, so the following |
| 234 // must be true. | 225 // must be true. |
| 235 DCHECK_NE(ref_zeroes, std::string::npos); | 226 DCHECK_NE(ref_zeroes, std::string::npos); |
| 236 DCHECK_NE(suffix_zeroes, std::string::npos); | 227 DCHECK_NE(suffix_zeroes, std::string::npos); |
| 237 | 228 |
| 238 if (suffix_zeroes > ref_zeroes) { | 229 if (suffix_zeroes > ref_zeroes) { |
| 239 // Implies suffix < ref. | 230 // Implies suffix < ref. |
| (...skipping 10 matching lines...) Expand all Loading... |
| 250 return std::string(ref_zeroes - suffix_zeroes + 1, '\0'); | 241 return std::string(ref_zeroes - suffix_zeroes + 1, '\0'); |
| 251 } else { | 242 } else { |
| 252 // Prepend zeroes to match those in the |reference|, then something smaller | 243 // Prepend zeroes to match those in the |reference|, then something smaller |
| 253 // than the first non-zero digit in |reference|. | 244 // than the first non-zero digit in |reference|. |
| 254 char lt_digit = static_cast<uint8_t>(reference[ref_zeroes]) / 2; | 245 char lt_digit = static_cast<uint8_t>(reference[ref_zeroes]) / 2; |
| 255 return std::string(ref_zeroes, '\0') + lt_digit; | 246 return std::string(ref_zeroes, '\0') + lt_digit; |
| 256 } | 247 } |
| 257 } | 248 } |
| 258 | 249 |
| 259 // static | 250 // static |
| 260 std::string UniquePosition::FindGreaterWithSuffix( | 251 std::string UniquePosition::FindGreaterWithSuffix(const std::string& reference, |
| 261 const std::string& reference, | 252 const std::string& suffix) { |
| 262 const std::string& suffix) { | |
| 263 size_t ref_FFs = | 253 size_t ref_FFs = |
| 264 reference.find_first_not_of(std::numeric_limits<uint8_t>::max()); | 254 reference.find_first_not_of(std::numeric_limits<uint8_t>::max()); |
| 265 size_t suffix_FFs = | 255 size_t suffix_FFs = |
| 266 suffix.find_first_not_of(std::numeric_limits<uint8_t>::max()); | 256 suffix.find_first_not_of(std::numeric_limits<uint8_t>::max()); |
| 267 | 257 |
| 268 if (ref_FFs == std::string::npos) { | 258 if (ref_FFs == std::string::npos) { |
| 269 ref_FFs = reference.length(); | 259 ref_FFs = reference.length(); |
| 270 } | 260 } |
| 271 if (suffix_FFs == std::string::npos) { | 261 if (suffix_FFs == std::string::npos) { |
| 272 suffix_FFs = suffix.length(); | 262 suffix_FFs = suffix.length(); |
| (...skipping 19 matching lines...) Expand all Loading... |
| 292 // than the first non-FF digit in |reference|. | 282 // than the first non-FF digit in |reference|. |
| 293 char gt_digit = static_cast<uint8_t>(reference[ref_FFs]) + | 283 char gt_digit = static_cast<uint8_t>(reference[ref_FFs]) + |
| 294 (std::numeric_limits<uint8_t>::max() - | 284 (std::numeric_limits<uint8_t>::max() - |
| 295 static_cast<uint8_t>(reference[ref_FFs]) + 1) / | 285 static_cast<uint8_t>(reference[ref_FFs]) + 1) / |
| 296 2; | 286 2; |
| 297 return std::string(ref_FFs, std::numeric_limits<uint8_t>::max()) + gt_digit; | 287 return std::string(ref_FFs, std::numeric_limits<uint8_t>::max()) + gt_digit; |
| 298 } | 288 } |
| 299 } | 289 } |
| 300 | 290 |
| 301 // static | 291 // static |
| 302 std::string UniquePosition::FindBetweenWithSuffix( | 292 std::string UniquePosition::FindBetweenWithSuffix(const std::string& before, |
| 303 const std::string& before, | 293 const std::string& after, |
| 304 const std::string& after, | 294 const std::string& suffix) { |
| 305 const std::string& suffix) { | |
| 306 DCHECK(IsValidSuffix(suffix)); | 295 DCHECK(IsValidSuffix(suffix)); |
| 307 DCHECK_NE(before, after); | 296 DCHECK_NE(before, after); |
| 308 DCHECK_LT(before, after); | 297 DCHECK_LT(before, after); |
| 309 | 298 |
| 310 std::string mid; | 299 std::string mid; |
| 311 | 300 |
| 312 // Sometimes our suffix puts us where we want to be. | 301 // Sometimes our suffix puts us where we want to be. |
| 313 if (before < suffix && suffix < after) { | 302 if (before < suffix && suffix < after) { |
| 314 return std::string(); | 303 return std::string(); |
| 315 } | 304 } |
| 316 | 305 |
| 317 size_t i = 0; | 306 size_t i = 0; |
| 318 for ( ; i < std::min(before.length(), after.length()); ++i) { | 307 for (; i < std::min(before.length(), after.length()); ++i) { |
| 319 uint8_t a_digit = before[i]; | 308 uint8_t a_digit = before[i]; |
| 320 uint8_t b_digit = after[i]; | 309 uint8_t b_digit = after[i]; |
| 321 | 310 |
| 322 if (b_digit - a_digit >= 2) { | 311 if (b_digit - a_digit >= 2) { |
| 323 mid.push_back(a_digit + (b_digit - a_digit)/2); | 312 mid.push_back(a_digit + (b_digit - a_digit) / 2); |
| 324 return mid; | 313 return mid; |
| 325 } else if (a_digit == b_digit) { | 314 } else if (a_digit == b_digit) { |
| 326 mid.push_back(a_digit); | 315 mid.push_back(a_digit); |
| 327 | 316 |
| 328 // Both strings are equal so far. Will appending the suffix at this point | 317 // Both strings are equal so far. Will appending the suffix at this point |
| 329 // give us the comparison we're looking for? | 318 // give us the comparison we're looking for? |
| 330 if (before.substr(i+1) < suffix && suffix < after.substr(i+1)) { | 319 if (before.substr(i + 1) < suffix && suffix < after.substr(i + 1)) { |
| 331 return mid; | 320 return mid; |
| 332 } | 321 } |
| 333 } else { | 322 } else { |
| 334 DCHECK_EQ(b_digit - a_digit, 1); // Implied by above if branches. | 323 DCHECK_EQ(b_digit - a_digit, 1); // Implied by above if branches. |
| 335 | 324 |
| 336 // The two options are off by one digit. The choice of whether to round | 325 // The two options are off by one digit. The choice of whether to round |
| 337 // up or down here will have consequences on what we do with the remaining | 326 // up or down here will have consequences on what we do with the remaining |
| 338 // digits. Exploring both options is an optimization and is not required | 327 // digits. Exploring both options is an optimization and is not required |
| 339 // for the correctness of this algorithm. | 328 // for the correctness of this algorithm. |
| 340 | 329 |
| 341 // Option A: Round down the current digit. This makes our |mid| < | 330 // Option A: Round down the current digit. This makes our |mid| < |
| 342 // |after|, no matter what we append afterwards. We then focus on | 331 // |after|, no matter what we append afterwards. We then focus on |
| 343 // appending digits until |mid| > |before|. | 332 // appending digits until |mid| > |before|. |
| 344 std::string mid_a = mid; | 333 std::string mid_a = mid; |
| 345 mid_a.push_back(a_digit); | 334 mid_a.push_back(a_digit); |
| 346 mid_a.append(FindGreaterWithSuffix(before.substr(i+1), suffix)); | 335 mid_a.append(FindGreaterWithSuffix(before.substr(i + 1), suffix)); |
| 347 | 336 |
| 348 // Option B: Round up the current digit. This makes our |mid| > |before|, | 337 // Option B: Round up the current digit. This makes our |mid| > |before|, |
| 349 // no matter what we append afterwards. We then focus on appending digits | 338 // no matter what we append afterwards. We then focus on appending digits |
| 350 // until |mid| < |after|. Note that this option may not be viable if the | 339 // until |mid| < |after|. Note that this option may not be viable if the |
| 351 // current digit is the last one in |after|, so we skip the option in that | 340 // current digit is the last one in |after|, so we skip the option in that |
| 352 // case. | 341 // case. |
| 353 if (after.length() > i+1) { | 342 if (after.length() > i + 1) { |
| 354 std::string mid_b = mid; | 343 std::string mid_b = mid; |
| 355 mid_b.push_back(b_digit); | 344 mid_b.push_back(b_digit); |
| 356 mid_b.append(FindSmallerWithSuffix(after.substr(i+1), suffix)); | 345 mid_b.append(FindSmallerWithSuffix(after.substr(i + 1), suffix)); |
| 357 | 346 |
| 358 // Does this give us a shorter position value? If so, use it. | 347 // Does this give us a shorter position value? If so, use it. |
| 359 if (mid_b.length() < mid_a.length()) { | 348 if (mid_b.length() < mid_a.length()) { |
| 360 return mid_b; | 349 return mid_b; |
| 361 } | 350 } |
| 362 } | 351 } |
| 363 return mid_a; | 352 return mid_a; |
| 364 } | 353 } |
| 365 } | 354 } |
| 366 | 355 |
| 367 // If we haven't found a midpoint yet, the following must be true: | 356 // If we haven't found a midpoint yet, the following must be true: |
| 368 DCHECK_EQ(before.substr(0, i), after.substr(0, i)); | 357 DCHECK_EQ(before.substr(0, i), after.substr(0, i)); |
| 369 DCHECK_EQ(before, mid); | 358 DCHECK_EQ(before, mid); |
| 370 DCHECK_LT(before.length(), after.length()); | 359 DCHECK_LT(before.length(), after.length()); |
| 371 | 360 |
| 372 // We know that we'll need to append at least one more byte to |mid| in the | 361 // We know that we'll need to append at least one more byte to |mid| in the |
| 373 // process of making it < |after|. Appending any digit, regardless of the | 362 // process of making it < |after|. Appending any digit, regardless of the |
| 374 // value, will make |before| < |mid|. Therefore, the following will get us a | 363 // value, will make |before| < |mid|. Therefore, the following will get us a |
| 375 // valid position. | 364 // valid position. |
| 376 | 365 |
| 377 mid.append(FindSmallerWithSuffix(after.substr(i), suffix)); | 366 mid.append(FindSmallerWithSuffix(after.substr(i), suffix)); |
| 378 return mid; | 367 return mid; |
| 379 } | 368 } |
| 380 | 369 |
| 381 UniquePosition::UniquePosition(const std::string& internal_rep) | 370 UniquePosition::UniquePosition(const std::string& internal_rep) |
| 382 : compressed_(internal_rep), | 371 : compressed_(internal_rep), |
| 383 is_valid_(IsValidBytes(Uncompress(internal_rep))) { | 372 is_valid_(IsValidBytes(Uncompress(internal_rep))) {} |
| 384 } | |
| 385 | 373 |
| 386 UniquePosition::UniquePosition( | 374 UniquePosition::UniquePosition(const std::string& uncompressed, |
| 387 const std::string& uncompressed, | 375 const std::string& suffix) |
| 388 const std::string& suffix) | 376 : compressed_(Compress(uncompressed)), |
| 389 : compressed_(Compress(uncompressed)), | 377 is_valid_(IsValidBytes(uncompressed)) { |
| 390 is_valid_(IsValidBytes(uncompressed)) { | |
| 391 DCHECK(uncompressed.rfind(suffix) + kSuffixLength == uncompressed.length()); | 378 DCHECK(uncompressed.rfind(suffix) + kSuffixLength == uncompressed.length()); |
| 392 DCHECK(IsValidSuffix(suffix)); | 379 DCHECK(IsValidSuffix(suffix)); |
| 393 DCHECK(IsValid()); | 380 DCHECK(IsValid()); |
| 394 } | 381 } |
| 395 | 382 |
| 396 // On custom compression: | 383 // On custom compression: |
| 397 // | 384 // |
| 398 // Let C(x) be the compression function and U(x) be the uncompression function. | 385 // Let C(x) be the compression function and U(x) be the uncompression function. |
| 399 // | 386 // |
| 400 // This compression scheme has a few special properties. For one, it is | 387 // This compression scheme has a few special properties. For one, it is |
| (...skipping 110 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 511 } else { | 498 } else { |
| 512 length = encoded_length; | 499 length = encoded_length; |
| 513 } | 500 } |
| 514 | 501 |
| 515 return length; | 502 return length; |
| 516 } | 503 } |
| 517 | 504 |
| 518 // A series of four identical chars at the beginning of a block indicates | 505 // A series of four identical chars at the beginning of a block indicates |
| 519 // the beginning of a repeated character block. | 506 // the beginning of a repeated character block. |
| 520 static bool IsRepeatedCharPrefix(const std::string& chars, size_t start_index) { | 507 static bool IsRepeatedCharPrefix(const std::string& chars, size_t start_index) { |
| 521 return chars[start_index] == chars[start_index+1] | 508 return chars[start_index] == chars[start_index + 1] && |
| 522 && chars[start_index] == chars[start_index+2] | 509 chars[start_index] == chars[start_index + 2] && |
| 523 && chars[start_index] == chars[start_index+3]; | 510 chars[start_index] == chars[start_index + 3]; |
| 524 } | 511 } |
| 525 | 512 |
| 526 } // namespace | 513 } // namespace |
| 527 | 514 |
| 528 // static | 515 // static |
| 529 // Wraps the CompressImpl function with a bunch of DCHECKs. | 516 // Wraps the CompressImpl function with a bunch of DCHECKs. |
| 530 std::string UniquePosition::Compress(const std::string& str) { | 517 std::string UniquePosition::Compress(const std::string& str) { |
| 531 DCHECK(IsValidBytes(str)); | 518 DCHECK(IsValidBytes(str)); |
| 532 std::string compressed = CompressImpl(str); | 519 std::string compressed = CompressImpl(str); |
| 533 DCHECK(IsValidCompressed(compressed)); | 520 DCHECK(IsValidCompressed(compressed)); |
| 534 DCHECK_EQ(str, Uncompress(compressed)); | 521 DCHECK_EQ(str, Uncompress(compressed)); |
| 535 return compressed; | 522 return compressed; |
| 536 } | 523 } |
| 537 | 524 |
| 538 // static | 525 // static |
| 539 // Performs the order preserving run length compression of a given input string. | 526 // Performs the order preserving run length compression of a given input string. |
| 540 std::string UniquePosition::CompressImpl(const std::string& str) { | 527 std::string UniquePosition::CompressImpl(const std::string& str) { |
| 541 std::string output; | 528 std::string output; |
| 542 | 529 |
| 543 // The compressed length will usually be at least as long as the suffix (28), | 530 // The compressed length will usually be at least as long as the suffix (28), |
| 544 // since the suffix bytes are mostly random. Most are a few bytes longer; a | 531 // since the suffix bytes are mostly random. Most are a few bytes longer; a |
| 545 // small few are tens of bytes longer. Some early tests indicated that | 532 // small few are tens of bytes longer. Some early tests indicated that |
| 546 // roughly 99% had length 40 or smaller. We guess that pre-sizing for 48 is a | 533 // roughly 99% had length 40 or smaller. We guess that pre-sizing for 48 is a |
| 547 // good trade-off, but that has not been confirmed through profiling. | 534 // good trade-off, but that has not been confirmed through profiling. |
| 548 output.reserve(48); | 535 output.reserve(48); |
| 549 | 536 |
| 550 // Each loop iteration will consume 8, or N bytes, where N >= 4 and is the | 537 // Each loop iteration will consume 8, or N bytes, where N >= 4 and is the |
| 551 // length of a string of identical digits starting at i. | 538 // length of a string of identical digits starting at i. |
| 552 for (size_t i = 0; i < str.length(); ) { | 539 for (size_t i = 0; i < str.length();) { |
| 553 if (i + 4 <= str.length() && IsRepeatedCharPrefix(str, i)) { | 540 if (i + 4 <= str.length() && IsRepeatedCharPrefix(str, i)) { |
| 554 // Four identical bytes in a row at this position means that we must start | 541 // Four identical bytes in a row at this position means that we must start |
| 555 // a repeated character block. Begin by outputting those four bytes. | 542 // a repeated character block. Begin by outputting those four bytes. |
| 556 output.append(str, i, 4); | 543 output.append(str, i, 4); |
| 557 | 544 |
| 558 // Determine the size of the run. | 545 // Determine the size of the run. |
| 559 const char rep_digit = str[i]; | 546 const char rep_digit = str[i]; |
| 560 const size_t runs_until = str.find_first_not_of(rep_digit, i+4); | 547 const size_t runs_until = str.find_first_not_of(rep_digit, i + 4); |
| 561 | 548 |
| 562 // Handle the 'runs until end' special case specially. | 549 // Handle the 'runs until end' special case specially. |
| 563 size_t run_length; | 550 size_t run_length; |
| 564 bool encode_high; // True if the next byte is greater than |rep_digit|. | 551 bool encode_high; // True if the next byte is greater than |rep_digit|. |
| 565 if (runs_until == std::string::npos) { | 552 if (runs_until == std::string::npos) { |
| 566 run_length = str.length() - i; | 553 run_length = str.length() - i; |
| 567 encode_high = false; | 554 encode_high = false; |
| 568 } else { | 555 } else { |
| 569 run_length = runs_until - i; | 556 run_length = runs_until - i; |
| 570 encode_high = static_cast<uint8_t>(str[runs_until]) > | 557 encode_high = static_cast<uint8_t>(str[runs_until]) > |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 611 | 598 |
| 612 bool UniquePosition::IsValidCompressed(const std::string& str) { | 599 bool UniquePosition::IsValidCompressed(const std::string& str) { |
| 613 for (size_t i = 0; i + 8 <= str.length(); i += 8) { | 600 for (size_t i = 0; i + 8 <= str.length(); i += 8) { |
| 614 if (IsRepeatedCharPrefix(str, i)) { | 601 if (IsRepeatedCharPrefix(str, i)) { |
| 615 uint32_t count = ReadEncodedRunLength(str, i + 4); | 602 uint32_t count = ReadEncodedRunLength(str, i + 4); |
| 616 if (count < 4) { | 603 if (count < 4) { |
| 617 // A repeated character block should at least represent the four | 604 // A repeated character block should at least represent the four |
| 618 // characters that started it. | 605 // characters that started it. |
| 619 return false; | 606 return false; |
| 620 } | 607 } |
| 621 if (str[i] == str[i+4]) { | 608 if (str[i] == str[i + 4]) { |
| 622 // Does the next digit after a count match the repeated character? Then | 609 // Does the next digit after a count match the repeated character? Then |
| 623 // this is not the highest possible count. | 610 // this is not the highest possible count. |
| 624 return false; | 611 return false; |
| 625 } | 612 } |
| 626 } | 613 } |
| 627 } | 614 } |
| 628 // We don't bother looking for the existence or checking the validity of | 615 // We don't bother looking for the existence or checking the validity of |
| 629 // any partial blocks. There's no way they could be invalid anyway. | 616 // any partial blocks. There's no way they could be invalid anyway. |
| 630 return true; | 617 return true; |
| 631 } | 618 } |
| 632 | 619 |
| 633 } // namespace syncer | 620 } // namespace syncer |
| OLD | NEW |