| OLD | NEW |
| (Empty) |
| 1 // Copyright 2006 Google Inc. | |
| 2 // Authors: Sanjay Ghemawat, Jeff Dean, Chandra Chereddi, Lincoln Smith | |
| 3 // | |
| 4 // Licensed under the Apache License, Version 2.0 (the "License"); | |
| 5 // you may not use this file except in compliance with the License. | |
| 6 // You may obtain a copy of the License at | |
| 7 // | |
| 8 // http://www.apache.org/licenses/LICENSE-2.0 | |
| 9 // | |
| 10 // Unless required by applicable law or agreed to in writing, software | |
| 11 // distributed under the License is distributed on an "AS IS" BASIS, | |
| 12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 13 // See the License for the specific language governing permissions and | |
| 14 // limitations under the License. | |
| 15 // | |
| 16 // Implementation of the Bentley/McIlroy algorithm for finding differences. | |
| 17 // Bentley, McIlroy. DCC 1999. Data Compression Using Long Common Strings. | |
| 18 // http://citeseer.ist.psu.edu/555557.html | |
| 19 | |
| 20 #ifndef OPEN_VCDIFF_BLOCKHASH_H_ | |
| 21 #define OPEN_VCDIFF_BLOCKHASH_H_ | |
| 22 | |
| 23 #include <config.h> | |
| 24 #include <stdint.h> // uint32_t | |
| 25 #include <cstddef> // size_t | |
| 26 #include <vector> | |
| 27 | |
| 28 namespace open_vcdiff { | |
| 29 | |
| 30 // A generic hash table which will be used to keep track of byte runs | |
| 31 // of size kBlockSize in both the incrementally processed target data | |
| 32 // and the preprocessed source dictionary. | |
| 33 // | |
| 34 // A custom hash table implementation is used instead of the standard | |
| 35 // hash_map template because we know that there will be exactly one | |
| 36 // entry in the BlockHash corresponding to each kBlockSize bytes | |
| 37 // in the source data, which makes certain optimizations possible: | |
| 38 // * The memory for the hash table and for all hash entries can be allocated | |
| 39 // in one step rather than incrementally for each insert operation. | |
| 40 // * A single integer can be used to represent both | |
| 41 // the index of the next hash entry in the chain | |
| 42 // and the position of the entry within the source data | |
| 43 // (== kBlockSize * block_number). This greatly reduces the size | |
| 44 // of a hash entry. | |
| 45 // | |
| 46 class BlockHash { | |
| 47 public: | |
| 48 // Block size as per Bentley/McIlroy; must be a power of two. | |
| 49 // | |
| 50 // Using (for example) kBlockSize = 4 guarantees that no match smaller | |
| 51 // than size 4 will be identified, that some matches having sizes | |
| 52 // 4, 5, or 6 may be identified, and that all matches | |
| 53 // having size 7 or greater will be identified (because any string of | |
| 54 // 7 bytes must contain a complete aligned block of 4 bytes.) | |
| 55 // | |
| 56 // Increasing kBlockSize by a factor of two will halve the amount of | |
| 57 // memory needed for the next block table, and will halve the setup time | |
| 58 // for a new BlockHash. However, it also doubles the minimum | |
| 59 // match length that is guaranteed to be found in FindBestMatch(), | |
| 60 // so that function will be less effective in finding matches. | |
| 61 // | |
| 62 // Computational effort in FindBestMatch (which is the inner loop of | |
| 63 // the encoding algorithm) will be proportional to the number of | |
| 64 // matches found, and a low value of kBlockSize will waste time | |
| 65 // tracking down small matches. On the other hand, if this value | |
| 66 // is set too high, no matches will be found at all. | |
| 67 // | |
| 68 // It is suggested that different values of kBlockSize be tried against | |
| 69 // a representative data set to find the best tradeoff between | |
| 70 // memory/CPU and the effectiveness of FindBestMatch(). | |
| 71 // | |
| 72 // If you change kBlockSize to a smaller value, please increase | |
| 73 // kMaxMatchesToCheck accordingly. | |
| 74 static const int kBlockSize = 32; | |
| 75 | |
| 76 // This class is used to store the best match found by FindBestMatch() | |
| 77 // and return it to the caller. | |
| 78 class Match { | |
| 79 public: | |
| 80 Match() : size_(0), source_offset_(-1), target_offset_(-1) { } | |
| 81 | |
| 82 void ReplaceIfBetterMatch(size_t candidate_size, | |
| 83 int candidate_source_offset, | |
| 84 int candidate_target_offset) { | |
| 85 if (candidate_size > size_) { | |
| 86 size_ = candidate_size; | |
| 87 source_offset_ = candidate_source_offset; | |
| 88 target_offset_ = candidate_target_offset; | |
| 89 } | |
| 90 } | |
| 91 | |
| 92 size_t size() const { return size_; } | |
| 93 int source_offset() const { return source_offset_; } | |
| 94 int target_offset() const { return target_offset_; } | |
| 95 | |
| 96 private: | |
| 97 // The size of the best (longest) match passed to ReplaceIfBetterMatch(). | |
| 98 size_t size_; | |
| 99 | |
| 100 // The source offset of the match, including the starting_offset_ | |
| 101 // of the BlockHash for which the match was found. | |
| 102 int source_offset_; | |
| 103 | |
| 104 // The target offset of the match. An offset of 0 corresponds to the | |
| 105 // data at target_start, which is an argument of FindBestMatch(). | |
| 106 int target_offset_; | |
| 107 | |
| 108 // Making these private avoids implicit copy constructor | |
| 109 // & assignment operator | |
| 110 Match(const Match&); // NOLINT | |
| 111 void operator=(const Match&); | |
| 112 }; | |
| 113 | |
| 114 // A BlockHash is created using a buffer of source data. The hash table | |
| 115 // will contain one entry for each kBlockSize-byte block in the | |
| 116 // source data. | |
| 117 // | |
| 118 // See the comments for starting_offset_, below, for a description of | |
| 119 // the starting_offset argument. For a hash of source (dictionary) data, | |
| 120 // starting_offset_ will be zero; for a hash of previously encoded | |
| 121 // target data, starting_offset_ will be equal to the dictionary size. | |
| 122 // | |
| 123 BlockHash(const char* source_data, size_t source_size, int starting_offset); | |
| 124 | |
| 125 ~BlockHash(); | |
| 126 | |
| 127 // Initializes the object before use. | |
| 128 // This method must be called after constructing a BlockHash object, | |
| 129 // and before any other method may be called. This is because | |
| 130 // Init() dynamically allocates hash_table_ and next_block_table_. | |
| 131 // Returns true if initialization succeeded, or false if an error occurred, | |
| 132 // in which case no other method except the destructor may then be used | |
| 133 // on the object. | |
| 134 // | |
| 135 // If populate_hash_table is true, then AddAllBlocks() will be called | |
| 136 // to populate the hash table. If populate_hash_table is false, then | |
| 137 // classes that inherit from BlockHash are expected to call AddBlock() | |
| 138 // to incrementally populate individual blocks of data. | |
| 139 // | |
| 140 bool Init(bool populate_hash_table); | |
| 141 | |
| 142 // In the context of the open-vcdiff encoder, BlockHash is used for two | |
| 143 // purposes: to hash the source (dictionary) data, and to hash | |
| 144 // the previously encoded target data. The main differences between | |
| 145 // a dictionary BlockHash and a target BlockHash are as follows: | |
| 146 // | |
| 147 // 1. The best_match->source_offset() returned from FindBestMatch() | |
| 148 // for a target BlockHash is computed in the following manner: | |
| 149 // the starting offset of the first byte in the target data | |
| 150 // is equal to the dictionary size. FindBestMatch() will add | |
| 151 // starting_offset_ to any best_match->source_offset() value it returns, | |
| 152 // in order to produce the correct offset value for a target BlockHash. | |
| 153 // 2. For a dictionary BlockHash, the entire data set is hashed at once | |
| 154 // when Init() is called with the parameter populate_hash_table = true. | |
| 155 // For a target BlockHash, because the previously encoded target data | |
| 156 // includes only the data seen up to the current encoding position, | |
| 157 // the data blocks are hashed incrementally as the encoding position | |
| 158 // advances, using AddOneIndexHash() and AddAllBlocksThroughIndex(). | |
| 159 // | |
| 160 // The following two factory functions can be used to create BlockHash | |
| 161 // objects for each of these two purposes. Each factory function calls | |
| 162 // the object constructor and also calls Init(). If an error occurs, | |
| 163 // NULL is returned; otherwise a valid BlockHash object is returned. | |
| 164 // Since a dictionary BlockHash is not expected to be modified after | |
| 165 // initialization, a const object is returned. | |
| 166 // The caller is responsible for deleting the returned object | |
| 167 // (using the C++ delete operator) once it is no longer needed. | |
| 168 static const BlockHash* CreateDictionaryHash(const char* dictionary_data, | |
| 169 size_t dictionary_size); | |
| 170 static BlockHash* CreateTargetHash(const char* target_data, | |
| 171 size_t target_size, | |
| 172 size_t dictionary_size); | |
| 173 | |
| 174 const size_t table_size() const { return hash_table_.size(); } | |
| 175 const int starting_offset() const { return starting_offset_; } | |
| 176 | |
| 177 // This function will be called to add blocks incrementally to the target hash | |
| 178 // as the encoding position advances through the target data. It will be | |
| 179 // called for every kBlockSize-byte block in the target data, regardless | |
| 180 // of whether the block is aligned evenly on a block boundary. The | |
| 181 // BlockHash will only store hash entries for the evenly-aligned blocks. | |
| 182 // | |
| 183 void AddOneIndexHash(int index, uint32_t hash_value) { | |
| 184 if (index == NextIndexToAdd()) { | |
| 185 AddBlock(hash_value); | |
| 186 } | |
| 187 } | |
| 188 | |
| 189 // Calls AddBlock() for each kBlockSize-byte block in the range | |
| 190 // (last_block_added_ * kBlockSize, end_index), exclusive of the endpoints. | |
| 191 // If end_index <= the last index added (last_block_added_ * kBlockSize), | |
| 192 // this function does nothing. | |
| 193 // | |
| 194 // A partial block beginning anywhere up to (end_index - 1) is also added, | |
| 195 // unless it extends outside the end of the source data. Like AddAllBlocks(), | |
| 196 // this function computes the hash value for each of the blocks in question | |
| 197 // from scratch, so it is not a good option if the hash values have already | |
| 198 // been computed for some other purpose. | |
| 199 // | |
| 200 // Example: assume kBlockSize = 4, last_block_added_ = 1, and there are | |
| 201 // 14 bytes of source data. | |
| 202 // If AddAllBlocksThroughIndex(9) is invoked, then it will call AddBlock() | |
| 203 // only for block number 2 (at index 8). | |
| 204 // If, after that, AddAllBlocksThroughIndex(14) is invoked, it will not call | |
| 205 // AddBlock() at all, because block 3 (beginning at index 12) would | |
| 206 // fall outside the range of source data. | |
| 207 // | |
| 208 // VCDiffEngine::Encode (in vcdiffengine.cc) uses this function to | |
| 209 // add a whole range of data to a target hash when a COPY instruction | |
| 210 // is generated. | |
| 211 void AddAllBlocksThroughIndex(int end_index); | |
| 212 | |
| 213 // FindBestMatch takes a position within the unencoded target data | |
| 214 // (target_candidate_start) and the hash value of the kBlockSize bytes | |
| 215 // beginning at that position (hash_value). It attempts to find a matching | |
| 216 // set of bytes within the source (== dictionary) data, expanding | |
| 217 // the match both below and above the target block. It cannot expand | |
| 218 // the match outside the bounds of the source data, or below | |
| 219 // target_start within the target data, or past | |
| 220 // the end limit of (target_start + target_length). | |
| 221 // | |
| 222 // target_candidate_start is the start of the candidate block within the | |
| 223 // target data for which a match will be sought, while | |
| 224 // target_start (which is <= target_candidate_start) | |
| 225 // is the start of the target data that has yet to be encoded. | |
| 226 // | |
| 227 // If a match is found whose size is greater than the size | |
| 228 // of best_match, this function populates *best_match with the | |
| 229 // size, source_offset, and target_offset of the match found. | |
| 230 // best_match->source_offset() will contain the index of the start of the | |
| 231 // matching source data, plus starting_offset_ | |
| 232 // (see description of starting_offset_ for details); | |
| 233 // best_match->target_offset() will contain the offset of the match | |
| 234 // beginning with target_start = offset 0, such that | |
| 235 // 0 <= best_match->target_offset() | |
| 236 // <= (target_candidate_start - target_start); | |
| 237 // and best_match->size() will contain the size of the match. | |
| 238 // If no such match is found, this function leaves *best_match unmodified. | |
| 239 // | |
| 240 // On calling FindBestMatch(), best_match must | |
| 241 // point to a valid Match object, and cannot be NULL. | |
| 242 // The same Match object can be passed | |
| 243 // when calling FindBestMatch() on a different BlockHash object | |
| 244 // for the same candidate data block, in order to find | |
| 245 // the best match possible across both objects. For example: | |
| 246 // | |
| 247 // open_vcdiff::BlockHash::Match best_match; | |
| 248 // uint32_t hash_value = | |
| 249 // RollingHash<BlockHash::kBlockSize>::Hash(target_candidate_start); | |
| 250 // bh1.FindBestMatch(hash_value, | |
| 251 // target_candidate_start, | |
| 252 // target_start, | |
| 253 // target_length, | |
| 254 // &best_match); | |
| 255 // bh2.FindBestMatch(hash_value, | |
| 256 // target_candidate_start, | |
| 257 // target_start, | |
| 258 // target_length, | |
| 259 // &best_match); | |
| 260 // if (best_size >= 0) { | |
| 261 // // a match was found; its size, source offset, and target offset | |
| 262 // // can be found in best_match | |
| 263 // } | |
| 264 // | |
| 265 // hash_value is passed as a separate parameter from target_candidate_start, | |
| 266 // (rather than calculated within FindBestMatch) in order to take | |
| 267 // advantage of the rolling hash, which quickly calculates the hash value | |
| 268 // of the block starting at target_candidate_start based on | |
| 269 // the known hash value of the block starting at (target_candidate_start - 1). | |
| 270 // See vcdiffengine.cc for more details. | |
| 271 // | |
| 272 // Example: | |
| 273 // kBlockSize: 4 | |
| 274 // target text: "ANDREW LLOYD WEBBER" | |
| 275 // 1^ 5^2^ 3^ | |
| 276 // dictionary: "INSURANCE : LLOYDS OF LONDON" | |
| 277 // 4^ | |
| 278 // hashed dictionary blocks: | |
| 279 // "INSU", "RANC", "E : ", "LLOY", "DS O", "F LON" | |
| 280 // | |
| 281 // 1: target_start (beginning of unencoded data) | |
| 282 // 2: target_candidate_start (for the block "LLOY") | |
| 283 // 3: target_length (points one byte beyond the last byte of data.) | |
| 284 // 4: best_match->source_offset() (after calling FindBestMatch) | |
| 285 // 5: best_match->target_offset() (after calling FindBestMatch) | |
| 286 // | |
| 287 // Under these conditions, FindBestMatch will find a matching | |
| 288 // hashed dictionary block for "LLOY", and will extend the beginning of | |
| 289 // this match backwards by one byte, and the end of the match forwards | |
| 290 // by one byte, finding that the best match is " LLOYD" | |
| 291 // with best_match->source_offset() = 10 | |
| 292 // (offset of " LLOYD" in the source string), | |
| 293 // best_match->target_offset() = 6 | |
| 294 // (offset of " LLOYD" in the target string), | |
| 295 // and best_match->size() = 6. | |
| 296 // | |
| 297 void FindBestMatch(uint32_t hash_value, | |
| 298 const char* target_candidate_start, | |
| 299 const char* target_start, | |
| 300 size_t target_size, | |
| 301 Match* best_match) const; | |
| 302 | |
| 303 protected: | |
| 304 // FindBestMatch() will not process more than this number | |
| 305 // of matching hash entries. | |
| 306 // | |
| 307 // It is necessary to have a limit on the maximum number of matches | |
| 308 // that will be checked in order to avoid the worst-case performance | |
| 309 // possible if, for example, all the blocks in the dictionary have | |
| 310 // the same hash value. See the unit test SearchStringFindsTooManyMatches | |
| 311 // for an example of such a case. The encoder uses a loop in | |
| 312 // VCDiffEngine::Encode over each target byte, containing a loop in | |
| 313 // BlockHash::FindBestMatch over the number of matches (up to a maximum | |
| 314 // of the number of source blocks), containing two loops that extend | |
| 315 // the match forwards and backwards up to the number of source bytes. | |
| 316 // Total complexity in the worst case is | |
| 317 // O([target size] * source_size_ * source_size_ | |
| 318 // Placing a limit on the possible number of matches checked changes this to | |
| 319 // O([target size] * source_size_ * kMaxMatchesToCheck) | |
| 320 // | |
| 321 // In empirical testing on real HTML text, using a block size of 4, | |
| 322 // the number of true matches per call to FindBestMatch() did not exceed 78; | |
| 323 // with a block size of 32, the number of matches did not exceed 3. | |
| 324 // | |
| 325 // The expected number of true matches scales super-linearly | |
| 326 // with the inverse of kBlockSize, but here a linear scale is used | |
| 327 // for block sizes smaller than 32. | |
| 328 static const int kMaxMatchesToCheck = (kBlockSize >= 32) ? 8 : | |
| 329 (8 * (32 / kBlockSize)); | |
| 330 | |
| 331 // Do not skip more than this number of non-matching hash collisions | |
| 332 // to find the next matching entry in the hash chain. | |
| 333 static const int kMaxProbes = 16; | |
| 334 | |
| 335 // Internal routine which calculates a hash table size based on kBlockSize and | |
| 336 // the dictionary_size. Will return a power of two if successful, or 0 if an | |
| 337 // internal error occurs. Some calculations (such as GetHashTableIndex()) | |
| 338 // depend on the table size being a power of two. | |
| 339 static const size_t CalcTableSize(const size_t dictionary_size); | |
| 340 | |
| 341 const size_t GetNumberOfBlocks() const { | |
| 342 return source_size_ / kBlockSize; | |
| 343 } | |
| 344 | |
| 345 // Use the lowest-order bits of the hash value | |
| 346 // as the index into the hash table. | |
| 347 int GetHashTableIndex(uint32_t hash_value) const { | |
| 348 return hash_value & (table_size() - 1); | |
| 349 } | |
| 350 | |
| 351 // The index within source_data_ of the next block | |
| 352 // for which AddBlock() should be called. | |
| 353 int NextIndexToAdd() const { | |
| 354 return (last_block_added_ + 1) * kBlockSize; | |
| 355 } | |
| 356 | |
| 357 static inline bool TooManyMatches(int* match_counter); | |
| 358 | |
| 359 const char* const source_data() { return source_data_; } | |
| 360 const size_t source_size() { return source_size_; } | |
| 361 | |
| 362 // Adds an entry to the hash table for one block of source data of length | |
| 363 // kBlockSize, starting at source_data_[block_number * kBlockSize], | |
| 364 // where block_number is always (last_block_added_ + 1). That is, | |
| 365 // AddBlock() must be called once for each block in source_data_ | |
| 366 // in increasing order. | |
| 367 void AddBlock(uint32_t hash_value); | |
| 368 | |
| 369 // Calls AddBlock() for each complete kBlockSize-byte block between | |
| 370 // source_data_ and (source_data_ + source_size_). It is equivalent | |
| 371 // to calling AddAllBlocksThroughIndex(source_data + source_size). | |
| 372 // This function is called when Init(true) is invoked. | |
| 373 void AddAllBlocks(); | |
| 374 | |
| 375 // Returns true if the contents of the kBlockSize-byte block | |
| 376 // beginning at block1 are identical to the contents of | |
| 377 // the block beginning at block2; false otherwise. | |
| 378 static bool BlockContentsMatch(const char* block1, const char* block2); | |
| 379 | |
| 380 // Finds the first block number within the hashed data | |
| 381 // that represents a match for the given hash value. | |
| 382 // Returns -1 if no match was found. | |
| 383 // | |
| 384 // Init() must have been called and returned true before using | |
| 385 // FirstMatchingBlock or NextMatchingBlock. No check is performed | |
| 386 // for this condition; the code will crash if this condition is violated. | |
| 387 // | |
| 388 // The hash table is initially populated with -1 (not found) values, | |
| 389 // so if this function is called before the hash table has been populated | |
| 390 // using AddAllBlocks() or AddBlock(), it will simply return -1 | |
| 391 // for any value of hash_value. | |
| 392 int FirstMatchingBlock(uint32_t hash_value, const char* block_ptr) const; | |
| 393 | |
| 394 // Given a block number returned by FirstMatchingBlock() | |
| 395 // or by a previous call to NextMatchingBlock(), returns | |
| 396 // the next block number that matches the same hash value. | |
| 397 // Returns -1 if no match was found. | |
| 398 int NextMatchingBlock(int block_number, const char* block_ptr) const; | |
| 399 | |
| 400 // Inline versions of BlockContentsMatch and FirstMatchingBlock. | |
| 401 // These save the cost of a function call | |
| 402 // when these routines are called from within the module. | |
| 403 // The external (non-inlined) versions are called only by unit tests. | |
| 404 static inline bool BlockContentsMatchInline(const char* block1, | |
| 405 const char* block2); | |
| 406 inline int FirstMatchingBlockInline(uint32_t hash_value, | |
| 407 const char* block_ptr) const; | |
| 408 | |
| 409 // Walk through the hash entry chain, skipping over any false matches | |
| 410 // (for which the lowest bits of the fingerprints match, | |
| 411 // but the actual block data does not.) Returns the block number of | |
| 412 // the first true match found, or -1 if no true match was found. | |
| 413 // If block_number is a matching block, the function will return block_number | |
| 414 // without skipping to the next block. | |
| 415 int SkipNonMatchingBlocks(int block_number, const char* block_ptr) const; | |
| 416 | |
| 417 // Returns the number of bytes to the left of source_match_start | |
| 418 // that match the corresponding bytes to the left of target_match_start. | |
| 419 // Will not examine more than max_bytes bytes, which is to say that | |
| 420 // the return value will be in the range [0, max_bytes] inclusive. | |
| 421 static int MatchingBytesToLeft(const char* source_match_start, | |
| 422 const char* target_match_start, | |
| 423 int max_bytes); | |
| 424 | |
| 425 // Returns the number of bytes starting at source_match_end | |
| 426 // that match the corresponding bytes starting at target_match_end. | |
| 427 // Will not examine more than max_bytes bytes, which is to say that | |
| 428 // the return value will be in the range [0, max_bytes] inclusive. | |
| 429 static int MatchingBytesToRight(const char* source_match_end, | |
| 430 const char* target_match_end, | |
| 431 int max_bytes); | |
| 432 | |
| 433 // The protected functions BlockContentsMatch, FirstMatchingBlock, | |
| 434 // NextMatchingBlock, MatchingBytesToLeft, and MatchingBytesToRight | |
| 435 // should be made accessible to unit tests. | |
| 436 friend class BlockHashTest; | |
| 437 | |
| 438 private: | |
| 439 const char* const source_data_; | |
| 440 const size_t source_size_; | |
| 441 | |
| 442 // The size of this array is determined using CalcTableSize(). It has at | |
| 443 // least one element for each kBlockSize-byte block in the source data. | |
| 444 // GetHashTableIndex() returns an index into this table for a given hash | |
| 445 // value. The value of each element of hash_table_ is the lowest block | |
| 446 // number in the source data whose hash value would return the same value from | |
| 447 // GetHashTableIndex(), or -1 if there is no matching block. This value can | |
| 448 // then be used as an index into next_block_table_ to retrieve the entire set | |
| 449 // of matching block numbers. | |
| 450 std::vector<int> hash_table_; | |
| 451 | |
| 452 // An array containing one element for each source block. Each element is | |
| 453 // either -1 (== not found) or the index of the next block whose hash value | |
| 454 // would produce a matching result from GetHashTableIndex(). | |
| 455 std::vector<int> next_block_table_; | |
| 456 | |
| 457 // This array has the same size as hash_table_. For every block number B that | |
| 458 // is referenced in hash_table_, last_block_table_[B] will contain the maximum | |
| 459 // block number among all blocks that have the same GetHashTableIndex() value | |
| 460 // as the block B. This number may be B itself. For a block number B' that | |
| 461 // is not referenced in hash_table_, the value of last_block_table_[B'] is -1. | |
| 462 // This table is used only while populating the hash table, not while looking | |
| 463 // up hash values in the table. Keeping track of the last block number in the | |
| 464 // chain allows us to construct the block chains as FIFO rather than LIFO | |
| 465 // lists, so that the match with the lowest index is returned first. This | |
| 466 // should result in a more compact encoding because the VCDIFF format favors | |
| 467 // smaller index values and repeated index values. | |
| 468 std::vector<int> last_block_table_; | |
| 469 | |
| 470 // The offset of the first byte of source data (the data at source_data_[0]). | |
| 471 // For the purpose of computing offsets, the source data and target data | |
| 472 // are considered to be concatenated -- not literally in a single memory | |
| 473 // buffer, but conceptually as described in the RFC. | |
| 474 // The first byte of the previously encoded target data | |
| 475 // has an offset that is equal to dictionary_size, i.e., just after | |
| 476 // the last byte of source data. | |
| 477 // For a hash of source (dictionary) data, starting_offset_ will be zero; | |
| 478 // for a hash of previously encoded target data, starting_offset_ will be | |
| 479 // equal to the dictionary size. | |
| 480 const int starting_offset_; | |
| 481 | |
| 482 // The last index added by AddBlock(). This determines the block number | |
| 483 // for successive calls to AddBlock(), and is also | |
| 484 // used to determine the starting block for AddAllBlocksThroughIndex(). | |
| 485 int last_block_added_; | |
| 486 | |
| 487 // Making these private avoids implicit copy constructor & assignment operator | |
| 488 BlockHash(const BlockHash&); // NOLINT | |
| 489 void operator=(const BlockHash&); | |
| 490 }; | |
| 491 | |
| 492 } // namespace open_vcdiff | |
| 493 | |
| 494 #endif // OPEN_VCDIFF_BLOCKHASH_H_ | |
| OLD | NEW |