| Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiffengine.cc
|
| ===================================================================
|
| --- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiffengine.cc (revision 2678)
|
| +++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiffengine.cc (working copy)
|
| @@ -1,228 +0,0 @@
|
| -// Copyright 2006, 2008 Google Inc.
|
| -// Authors: Chandra Chereddi, Lincoln Smith
|
| -//
|
| -// Licensed under the Apache License, Version 2.0 (the "License");
|
| -// you may not use this file except in compliance with the License.
|
| -// You may obtain a copy of the License at
|
| -//
|
| -// http://www.apache.org/licenses/LICENSE-2.0
|
| -//
|
| -// Unless required by applicable law or agreed to in writing, software
|
| -// distributed under the License is distributed on an "AS IS" BASIS,
|
| -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -// See the License for the specific language governing permissions and
|
| -// limitations under the License.
|
| -
|
| -#include <config.h>
|
| -#include "vcdiffengine.h"
|
| -#include <stdint.h> // uint32_t
|
| -#include <string>
|
| -#include "blockhash.h"
|
| -#include "encodetable.h"
|
| -#include "logging.h"
|
| -#include "rolling_hash.h"
|
| -
|
| -namespace open_vcdiff {
|
| -
|
| -using std::string;
|
| -
|
| -VCDiffEngine::~VCDiffEngine() {
|
| - delete hashed_dictionary_;
|
| -}
|
| -
|
| -bool VCDiffEngine::Init() {
|
| - if (hashed_dictionary_) {
|
| - LOG(DFATAL) << "Init() called twice for same VCDiffEngine object"
|
| - << LOG_ENDL;
|
| - return false;
|
| - }
|
| - hashed_dictionary_ = BlockHash::CreateDictionaryHash(dictionary_.data(),
|
| - dictionary_size());
|
| - if (!hashed_dictionary_) {
|
| - LOG(DFATAL) << "Creation of dictionary hash failed" << LOG_ENDL;
|
| - return false;
|
| - }
|
| - if (!RollingHash<BlockHash::kBlockSize>::Init()) {
|
| - LOG(DFATAL) << "RollingHash initialization failed" << LOG_ENDL;
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -// This helper function tries to find an appropriate match within
|
| -// hashed_dictionary_ for the block starting at the current target position.
|
| -// If target_hash is not NULL, this function will also look for a match
|
| -// within the previously encoded target data.
|
| -//
|
| -// If a match is found, this function will generate an ADD instruction
|
| -// for all unencoded data that precedes the match,
|
| -// and a COPY instruction for the match itself; then it returns
|
| -// the number of bytes processed by both instructions,
|
| -// which is guaranteed to be > 0.
|
| -// If no appropriate match is found, the function returns 0.
|
| -//
|
| -// The first four parameters are input parameters which are passed
|
| -// directly to BlockHash::FindBestMatch; please see that function
|
| -// for a description of their allowable values.
|
| -size_t VCDiffEngine::EncodeCopyForBestMatch(
|
| - uint32_t hash_value,
|
| - const char* target_candidate_start,
|
| - const char* unencoded_target_start,
|
| - size_t unencoded_target_size,
|
| - const BlockHash* target_hash,
|
| - VCDiffCodeTableWriter* coder) const {
|
| - // When FindBestMatch() comes up with a match for a candidate block,
|
| - // it will populate best_match with the size, source offset,
|
| - // and target offset of the match.
|
| - BlockHash::Match best_match;
|
| -
|
| - // First look for a match in the dictionary.
|
| - hashed_dictionary_->FindBestMatch(hash_value,
|
| - target_candidate_start,
|
| - unencoded_target_start,
|
| - unencoded_target_size,
|
| - &best_match);
|
| - // If target matching is enabled, then see if there is a better match
|
| - // within the target data that has been encoded so far.
|
| - if (target_hash) {
|
| - target_hash->FindBestMatch(hash_value,
|
| - target_candidate_start,
|
| - unencoded_target_start,
|
| - unencoded_target_size,
|
| - &best_match);
|
| - }
|
| - if (!ShouldGenerateCopyInstructionForMatchOfSize(best_match.size())) {
|
| - return 0;
|
| - }
|
| - if (best_match.target_offset() > 0) {
|
| - // Create an ADD instruction to encode all target bytes
|
| - // from the end of the last COPY match, if any, up to
|
| - // the beginning of this COPY match.
|
| - coder->Add(unencoded_target_start, best_match.target_offset());
|
| - }
|
| - coder->Copy(best_match.source_offset(), best_match.size());
|
| - return best_match.target_offset() // ADD size
|
| - + best_match.size(); // + COPY size
|
| -}
|
| -
|
| -// Once the encoder loop has finished checking for matches in the target data,
|
| -// this function creates an ADD instruction to encode all target bytes
|
| -// from the end of the last COPY match, if any, through the end of
|
| -// the target data. In the worst case, if no matches were found at all,
|
| -// this function will create one big ADD instruction
|
| -// for the entire buffer of target data.
|
| -inline void VCDiffEngine::AddUnmatchedRemainder(
|
| - const char* unencoded_target_start,
|
| - size_t unencoded_target_size,
|
| - VCDiffCodeTableWriter* coder) const {
|
| - if (unencoded_target_size > 0) {
|
| - coder->Add(unencoded_target_start, unencoded_target_size);
|
| - }
|
| -}
|
| -
|
| -// This helper function tells the coder to finish the encoding and write
|
| -// the results into the output string "diff".
|
| -inline void VCDiffEngine::FinishEncoding(size_t target_size,
|
| - OutputStringInterface* diff,
|
| - VCDiffCodeTableWriter* coder) const {
|
| - if (target_size != static_cast<size_t>(coder->target_length())) {
|
| - LOG(DFATAL) << "Internal error in VCDiffEngine::Encode: "
|
| - "original target size (" << target_size
|
| - << ") does not match number of bytes processed ("
|
| - << coder->target_length() << ")" << LOG_ENDL;
|
| - }
|
| - coder->Output(diff);
|
| -}
|
| -
|
| -void VCDiffEngine::Encode(const char* target_data,
|
| - size_t target_size,
|
| - bool look_for_target_matches,
|
| - OutputStringInterface* diff,
|
| - VCDiffCodeTableWriter* coder) const {
|
| - if (!hashed_dictionary_) {
|
| - LOG(DFATAL) << "Internal error: VCDiffEngine::Encode() "
|
| - "called before VCDiffEngine::Init()" << LOG_ENDL;
|
| - return;
|
| - }
|
| - if (target_size == 0) {
|
| - return; // Do nothing for empty target
|
| - }
|
| - if (!coder->Init(dictionary_size())) {
|
| - LOG(DFATAL) << "Internal error: "
|
| - "Initialization of VCDiffCodeTableWriter failed" << LOG_ENDL;
|
| - return;
|
| - }
|
| - // Special case for really small input
|
| - if (target_size < static_cast<size_t>(BlockHash::kBlockSize)) {
|
| - AddUnmatchedRemainder(target_data, target_size, coder);
|
| - FinishEncoding(target_size, diff, coder);
|
| - return;
|
| - }
|
| - RollingHash<BlockHash::kBlockSize> hasher;
|
| - BlockHash* target_hash = NULL;
|
| - if (look_for_target_matches) {
|
| - // Check matches against previously encoded target data
|
| - // in this same target window, as well as against the dictionary
|
| - target_hash = BlockHash::CreateTargetHash(target_data,
|
| - target_size,
|
| - dictionary_size());
|
| - if (!target_hash) {
|
| - LOG(ERROR) << "Instantiation of target hash failed" << LOG_ENDL;
|
| - // Keep going despite the error. Since target_hash is NULL,
|
| - // only the source hash will be used to find matches.
|
| - }
|
| - }
|
| - const char* const target_end = target_data + target_size;
|
| - const char* const start_of_last_block = target_end - BlockHash::kBlockSize;
|
| - // Offset of next bytes in string to ADD if NOT copied (i.e., not found in
|
| - // dictionary)
|
| - const char* next_encode = target_data;
|
| - // candidate_pos points to the start of the kBlockSize-byte block that may
|
| - // begin a match with the dictionary or previously encoded target data.
|
| - const char* candidate_pos = target_data;
|
| - uint32_t hash_value = hasher.Hash(candidate_pos);
|
| - while (1) {
|
| - const size_t bytes_encoded =
|
| - EncodeCopyForBestMatch(hash_value,
|
| - candidate_pos,
|
| - next_encode,
|
| - (target_end - next_encode),
|
| - target_hash,
|
| - coder);
|
| - if (bytes_encoded > 0) {
|
| - next_encode += bytes_encoded; // Advance past COPYed data
|
| - candidate_pos = next_encode;
|
| - if (candidate_pos > start_of_last_block) {
|
| - break; // Reached end of target data
|
| - }
|
| - // candidate_pos has jumped ahead by bytes_encoded bytes, so UpdateHash
|
| - // can't be used to calculate the hash value at its new position.
|
| - hash_value = hasher.Hash(candidate_pos);
|
| - if (target_hash) {
|
| - // Update the target hash for the ADDed and COPYed data
|
| - target_hash->AddAllBlocksThroughIndex(
|
| - static_cast<int>(next_encode - target_data));
|
| - }
|
| - } else {
|
| - // No match, or match is too small to be worth a COPY instruction.
|
| - // Move to the next position in the target data.
|
| - if ((candidate_pos + 1) > start_of_last_block) {
|
| - break; // Reached end of target data
|
| - }
|
| - if (target_hash) {
|
| - target_hash->AddOneIndexHash(
|
| - static_cast<int>(candidate_pos - target_data),
|
| - hash_value);
|
| - }
|
| - hash_value = hasher.UpdateHash(hash_value,
|
| - candidate_pos[0],
|
| - candidate_pos[BlockHash::kBlockSize]);
|
| - ++candidate_pos;
|
| - }
|
| - }
|
| - AddUnmatchedRemainder(next_encode, target_end - next_encode, coder);
|
| - FinishEncoding(target_size, diff, coder);
|
| - delete target_hash;
|
| -}
|
| -
|
| -} // namespace open_vcdiff
|
|
|