| Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/addrcache.cc
|
| ===================================================================
|
| --- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/addrcache.cc (revision 2678)
|
| +++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/addrcache.cc (working copy)
|
| @@ -1,331 +0,0 @@
|
| -// Copyright 2007 Google Inc.
|
| -// Author: Lincoln Smith
|
| -//
|
| -// Licensed under the Apache License, Version 2.0 (the "License");
|
| -// you may not use this file except in compliance with the License.
|
| -// You may obtain a copy of the License at
|
| -//
|
| -// http://www.apache.org/licenses/LICENSE-2.0
|
| -//
|
| -// Unless required by applicable law or agreed to in writing, software
|
| -// distributed under the License is distributed on an "AS IS" BASIS,
|
| -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -// See the License for the specific language governing permissions and
|
| -// limitations under the License.
|
| -//
|
| -// Implementation of the Address Cache and Address Encoding
|
| -// algorithms described in sections 5.1 - 5.4 of RFC 3284 -
|
| -// The VCDIFF Generic Differencing and Compression Data Format.
|
| -// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
|
| -//
|
| -// Assumptions:
|
| -// * The VCDAddress type is large enough to hold any offset within
|
| -// the source and target windows. The limit (for int32_t) is 2^31-1 bytes.
|
| -// The source (dictionary) should not approach this size limit;
|
| -// to compress a target file that is larger than
|
| -// INT_MAX - (dictionary size) bytes, the encoder must
|
| -// break it up into multiple target windows.
|
| -
|
| -#include <config.h>
|
| -#include "addrcache.h"
|
| -#include "logging.h"
|
| -#include "varint_bigendian.h"
|
| -#include "vcdiff_defs.h" // RESULT_ERROR
|
| -
|
| -namespace open_vcdiff {
|
| -
|
| -// The constructor does not initialize near_addresses_ and same_addresses_.
|
| -// Therefore, Init() must be called before any other method can be used.
|
| -//
|
| -// Arguments:
|
| -// near_cache_size: Size of the NEAR cache (number of 4-byte integers)
|
| -// same_cache_size: Size of the SAME cache (number of blocks of
|
| -// 256 4-byte integers per block)
|
| -// Because the mode is expressed as a byte value,
|
| -// near_cache_size + same_cache_size should not exceed 254.
|
| -//
|
| -VCDiffAddressCache::VCDiffAddressCache(int near_cache_size,
|
| - int same_cache_size)
|
| - : near_cache_size_(near_cache_size),
|
| - same_cache_size_(same_cache_size),
|
| - next_slot_(0) { }
|
| -
|
| -VCDiffAddressCache::VCDiffAddressCache()
|
| - : near_cache_size_(kDefaultNearCacheSize),
|
| - same_cache_size_(kDefaultSameCacheSize),
|
| - next_slot_(0) { }
|
| -
|
| -// Sets up data structures needed to call other methods. Operations that may
|
| -// fail at runtime (for example, validating the provided near_cache_size_ and
|
| -// same_cache_size_ parameters against their maximum allowed values) are
|
| -// confined to this routine in order to guarantee that the class constructor
|
| -// will never fail. Other methods (except the destructor) cannot be invoked
|
| -// until this method has been called successfully. After the object has been
|
| -// initialized and used, Init() can be called again to reset it to its initial
|
| -// state.
|
| -//
|
| -// Return value: "true" if initialization succeeded, "false" if it failed.
|
| -// No other method except the destructor may be invoked if this function
|
| -// returns false. The caller is responsible for checking the return value
|
| -// and providing an exit path in case of error.
|
| -//
|
| -bool VCDiffAddressCache::Init() {
|
| - // The mode is expressed as a byte value, so there is only room for 256 modes,
|
| - // including the two non-cached modes (SELF and HERE). Do not allow a larger
|
| - // number of modes to be defined. We do a separate sanity check for
|
| - // near_cache_size_ and same_cache_size_ because adding them together can
|
| - // cause an integer overflow if each is set to, say, INT_MAX.
|
| - if ((near_cache_size_ > (VCD_MAX_MODES - 2)) || (near_cache_size_ < 0)) {
|
| - LOG(ERROR) << "Near cache size " << near_cache_size_ << " is invalid"
|
| - << LOG_ENDL;
|
| - return false;
|
| - }
|
| - if ((same_cache_size_ > (VCD_MAX_MODES - 2)) || (same_cache_size_ < 0)) {
|
| - LOG(ERROR) << "Same cache size " << same_cache_size_ << " is invalid"
|
| - << LOG_ENDL;
|
| - return false;
|
| - }
|
| - if ((near_cache_size_ + same_cache_size_) > VCD_MAX_MODES - 2) {
|
| - LOG(ERROR) << "Using near cache size " << near_cache_size_
|
| - << " and same cache size " << same_cache_size_
|
| - << " would exceed maximum number of COPY modes ("
|
| - << VCD_MAX_MODES << ")" << LOG_ENDL;
|
| - return false;
|
| - }
|
| - if (near_cache_size_ > 0) {
|
| - near_addresses_.assign(near_cache_size_, 0);
|
| - }
|
| - if (same_cache_size_ > 0) {
|
| - same_addresses_.assign(same_cache_size_ * 256, 0);
|
| - }
|
| - next_slot_ = 0; // in case Init() is called a second time to reinit
|
| - return true;
|
| -}
|
| -
|
| -// This method will be called whenever an address is calculated for an
|
| -// encoded or decoded COPY instruction, and will update the contents
|
| -// of the SAME and NEAR caches. It is vital that the use of
|
| -// UpdateCache (called cache_update in the RFC examples) exactly match
|
| -// the RFC standard, and that the same caching logic be used in the
|
| -// decoder as in the encoder, in order for the decoded addresses to
|
| -// match.
|
| -//
|
| -// Argument:
|
| -// address: This must be a valid address between 0 and
|
| -// (source window size + target window size). It is assumed that
|
| -// these bounds have been checked before calling UpdateCache.
|
| -//
|
| -void VCDiffAddressCache::UpdateCache(VCDAddress address) {
|
| - if (near_cache_size_ > 0) {
|
| - near_addresses_[next_slot_] = address;
|
| - next_slot_ = (next_slot_ + 1) % near_cache_size_;
|
| - }
|
| - if (same_cache_size_ > 0) {
|
| - same_addresses_[address % (same_cache_size_ * 256)] = address;
|
| - }
|
| -}
|
| -
|
| -// Determines the address mode that yields the most compact encoding
|
| -// of the given address value, writes the encoded address into the
|
| -// address stream, and returns the mode used. The most compact encoding
|
| -// is found by looking for the numerically lowest encoded address.
|
| -// The Init() function must already have been called.
|
| -//
|
| -// Arguments:
|
| -// address: The address to be encoded. Must be a non-negative integer
|
| -// between 0 and (here_address - 1).
|
| -// here_address: The current location in the target data (i.e., the
|
| -// position just after the last encoded value.) Must be non-negative.
|
| -// encoded_addr: Points to an VCDAddress that will be replaced
|
| -// with the encoded representation of address.
|
| -// If WriteAddressAsVarintForMode returns true when passed
|
| -// the return value, then encoded_addr should be written
|
| -// into the delta file as a variable-length integer (Varint);
|
| -// otherwise, it should be written as a byte (unsigned char).
|
| -//
|
| -// Return value: A mode value between 0 and 255. The mode will tell
|
| -// how to interpret the next value in the address stream.
|
| -// The values 0 and 1 correspond to SELF and HERE addressing.
|
| -//
|
| -// The function is guaranteed to succeed unless the conditions on the arguments
|
| -// have not been met, in which case a LOG(DFATAL) message will be produced,
|
| -// 0 will be returned, and *encoded_addr will be replaced with 0.
|
| -//
|
| -unsigned char VCDiffAddressCache::EncodeAddress(VCDAddress address,
|
| - VCDAddress here_address,
|
| - VCDAddress* encoded_addr) {
|
| - if (address < 0) {
|
| - LOG(DFATAL) << "EncodeAddress was passed a negative address: "
|
| - << address << LOG_ENDL;
|
| - *encoded_addr = 0;
|
| - return 0;
|
| - }
|
| - if (address >= here_address) {
|
| - LOG(DFATAL) << "EncodeAddress was called with address (" << address
|
| - << ") < here_address (" << here_address << ")" << LOG_ENDL;
|
| - *encoded_addr = 0;
|
| - return 0;
|
| - }
|
| - // Try using the SAME cache. This method, if available, always
|
| - // results in the smallest encoding and takes priority over other modes.
|
| - if (same_cache_size() > 0) {
|
| - const VCDAddress same_cache_pos =
|
| - address % (same_cache_size() * 256);
|
| - if (SameAddress(same_cache_pos) == address) {
|
| - // This is the only mode for which an single byte will be written
|
| - // to the address stream instead of a variable-length integer.
|
| - UpdateCache(address);
|
| - *encoded_addr = same_cache_pos % 256;
|
| - return FirstSameMode() + (same_cache_pos / 256); // SAME mode
|
| - }
|
| - }
|
| -
|
| - // Try SELF mode
|
| - unsigned char best_mode = VCD_SELF_MODE;
|
| - VCDAddress best_encoded_address = address;
|
| -
|
| - // Try HERE mode
|
| - {
|
| - const VCDAddress here_encoded_address = here_address - address;
|
| - if (here_encoded_address < best_encoded_address) {
|
| - best_mode = VCD_HERE_MODE;
|
| - best_encoded_address = here_encoded_address;
|
| - }
|
| - }
|
| -
|
| - // Try using the NEAR cache
|
| - for (int i = 0; i < near_cache_size(); ++i) {
|
| - const VCDAddress near_encoded_address = address - NearAddress(i);
|
| - if ((near_encoded_address >= 0) &&
|
| - (near_encoded_address < best_encoded_address)) {
|
| - best_mode = FirstNearMode() + i;
|
| - best_encoded_address = near_encoded_address;
|
| - }
|
| - }
|
| -
|
| - UpdateCache(address);
|
| - *encoded_addr = best_encoded_address;
|
| - return best_mode;
|
| -}
|
| -
|
| -// Increments *byte_pointer and returns the byte it pointed to before the
|
| -// increment. The caller must check bounds to ensure that *byte_pointer
|
| -// points to a valid address in memory.
|
| -static unsigned char ParseByte(const char** byte_pointer) {
|
| - unsigned char byte_value = static_cast<unsigned char>(**byte_pointer);
|
| - ++(*byte_pointer);
|
| - return byte_value;
|
| -}
|
| -
|
| -// Checks the given decoded address for validity. Returns true if the
|
| -// address is valid; otherwise, prints an error message to the log and
|
| -// returns false.
|
| -static bool IsDecodedAddressValid(VCDAddress decoded_address,
|
| - VCDAddress here_address) {
|
| - if (decoded_address < 0) {
|
| - LOG(ERROR) << "Decoded address " << decoded_address << " is invalid"
|
| - << LOG_ENDL;
|
| - return false;
|
| - } else if (decoded_address >= here_address) {
|
| - LOG(ERROR) << "Decoded address (" << decoded_address
|
| - << ") is beyond location in target file (" << here_address
|
| - << ")" << LOG_ENDL;
|
| - return false;
|
| - }
|
| - return true;
|
| -}
|
| -
|
| -// Interprets the next value in the address_stream using the provided mode,
|
| -// which may need to access the SAME or NEAR address cache. Returns the
|
| -// decoded address.
|
| -// The Init() function must already have been called.
|
| -//
|
| -// Arguments:
|
| -// here_address: The current location in the source + target data (i.e., the
|
| -// location into which the COPY instruction will copy.) By definition,
|
| -// all addresses between 0 and (here_address - 1) are valid, and
|
| -// any other address is invalid.
|
| -// mode: A byte value between 0 and (near_cache_size_ + same_cache_size_ + 1)
|
| -// which tells how to interpret the next value in the address stream.
|
| -// The values 0 and 1 correspond to SELF and HERE addressing.
|
| -// The validity of "mode" should already have been checked before
|
| -// calling this function.
|
| -// address_stream: Points to a pointer holding the position
|
| -// in the "Addresses section for COPYs" part of the input data.
|
| -// That section must already have been uncompressed
|
| -// using a secondary decompressor (if necessary.)
|
| -// This is an IN/OUT argument; the value of *address_stream will be
|
| -// incremented by the size of an integer, or (if the SAME cache
|
| -// was used) by the size of a byte (1).
|
| -// address_stream_end: Points to the position just after the end of
|
| -// the address stream buffer. All addresses between *address_stream
|
| -// and address_stream_end should contain valid address data.
|
| -//
|
| -// Return value: If the input conditions were met, and the address section
|
| -// of the input data contains properly encoded addresses that match
|
| -// the instructions section, then an integer between 0 and here_address - 1
|
| -// will be returned, representing the address from which data should
|
| -// be copied from the source or target window into the output stream.
|
| -// If an invalid address value is found in address_stream, then
|
| -// RESULT_ERROR will be returned. If the limit address_stream_end
|
| -// is reached before the address can be decoded, then
|
| -// RESULT_END_OF_DATA will be returned. If more streamed data
|
| -// is expected, this means that the consumer should block and wait
|
| -// for more data before continuing to decode. If no more data is expected,
|
| -// this return value signals an error condition.
|
| -//
|
| -VCDAddress VCDiffAddressCache::DecodeAddress(VCDAddress here_address,
|
| - unsigned char mode,
|
| - const char** address_stream,
|
| - const char* address_stream_end) {
|
| - if (here_address < 0) {
|
| - LOG(DFATAL) << "DecodeAddress was passed a negative value"
|
| - " for here_address: " << here_address << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - const char* new_address_pos = *address_stream;
|
| - if (new_address_pos >= address_stream_end) {
|
| - return RESULT_END_OF_DATA;
|
| - }
|
| - VCDAddress decoded_address;
|
| - if (IsSameMode(mode)) {
|
| - // SAME mode expects a byte value as the encoded address
|
| - unsigned char encoded_address = ParseByte(&new_address_pos);
|
| - decoded_address = DecodeSameAddress(mode, encoded_address);
|
| - } else {
|
| - // All modes except SAME mode expect a VarintBE as the encoded address
|
| - int32_t encoded_address = VarintBE<int32_t>::Parse(address_stream_end,
|
| - &new_address_pos);
|
| - switch (encoded_address) {
|
| - case RESULT_ERROR:
|
| - LOG(ERROR) << "Found invalid variable-length integer "
|
| - "as encoded address value" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - case RESULT_END_OF_DATA:
|
| - return RESULT_END_OF_DATA;
|
| - default:
|
| - break;
|
| - }
|
| - if (IsSelfMode(mode)) {
|
| - decoded_address = DecodeSelfAddress(encoded_address);
|
| - } else if (IsHereMode(mode)) {
|
| - decoded_address = DecodeHereAddress(encoded_address, here_address);
|
| - } else if (IsNearMode(mode)) {
|
| - decoded_address = DecodeNearAddress(mode, encoded_address);
|
| - } else {
|
| - LOG(DFATAL) << "Invalid mode value (" << static_cast<int>(mode)
|
| - << ") passed to DecodeAddress; maximum mode value = "
|
| - << static_cast<int>(LastMode()) << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - }
|
| - // Check for an out-of-bounds address (corrupt/malicious data)
|
| - if (!IsDecodedAddressValid(decoded_address, here_address)) {
|
| - return RESULT_ERROR;
|
| - }
|
| - *address_stream = new_address_pos;
|
| - UpdateCache(decoded_address);
|
| - return decoded_address;
|
| -}
|
| -
|
| -} // namespace open_vcdiff
|
|
|