| Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdecoder.cc
|
| ===================================================================
|
| --- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdecoder.cc (revision 2678)
|
| +++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdecoder.cc (working copy)
|
| @@ -1,1401 +0,0 @@
|
| -// Copyright 2008 Google Inc.
|
| -// Author: Lincoln Smith
|
| -//
|
| -// Licensed under the Apache License, Version 2.0 (the "License");
|
| -// you may not use this file except in compliance with the License.
|
| -// You may obtain a copy of the License at
|
| -//
|
| -// http://www.apache.org/licenses/LICENSE-2.0
|
| -//
|
| -// Unless required by applicable law or agreed to in writing, software
|
| -// distributed under the License is distributed on an "AS IS" BASIS,
|
| -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -// See the License for the specific language governing permissions and
|
| -// limitations under the License.
|
| -//
|
| -// Implements a Decoder for the format described in
|
| -// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format.
|
| -// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html
|
| -//
|
| -// The RFC describes the possibility of using a secondary compressor
|
| -// to further reduce the size of each section of the VCDIFF output.
|
| -// That feature is not supported in this implementation of the encoder
|
| -// and decoder.
|
| -// No secondary compressor types have been publicly registered with
|
| -// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids
|
| -// in the more than five years since the registry was created, so there
|
| -// is no standard set of compressor IDs which would be generated by other
|
| -// encoders or accepted by other decoders.
|
| -
|
| -#include <config.h>
|
| -#include "google/vcdecoder.h"
|
| -#include <stdint.h> // int32_t
|
| -#include <cstddef> // size_t, ptrdiff_t
|
| -#include <memory> // auto_ptr
|
| -#include <string>
|
| -#include "addrcache.h"
|
| -#include "checksum.h"
|
| -#include "codetable.h"
|
| -#include "decodetable.h"
|
| -#include "headerparser.h"
|
| -#include "logging.h"
|
| -#include "google/output_string.h"
|
| -#include "varint_bigendian.h"
|
| -#include "vcdiff_defs.h"
|
| -
|
| -namespace open_vcdiff {
|
| -
|
| -namespace {
|
| -
|
| -using std::string;
|
| -
|
| -enum VCDiffAnnotationType {
|
| - VCD_ANNOTATION_LITERAL,
|
| - VCD_ANNOTATION_DMATCH,
|
| - VCD_ANNOTATION_BMATCH
|
| -};
|
| -
|
| -static const char* kAnnotationStartTags[] = {
|
| - "<literal>",
|
| - "<dmatch>",
|
| - "<bmatch>"
|
| -};
|
| -
|
| -static const char* kAnnotationEndTags[] = {
|
| - "</literal>",
|
| - "</dmatch>",
|
| - "</bmatch>"
|
| -};
|
| -
|
| -} // anonymous namespace
|
| -
|
| -// This class is used to parse delta file windows as described
|
| -// in RFC sections 4.2 and 4.3. Its methods are not thread-safe.
|
| -//
|
| -// Here is the window format copied from the RFC:
|
| -//
|
| -// Window1
|
| -// Win_Indicator - byte
|
| -// [Source segment size] - integer
|
| -// [Source segment position] - integer
|
| -// The delta encoding of the target window
|
| -// Length of the delta encoding - integer
|
| -// The delta encoding
|
| -// Size of the target window - integer
|
| -// Delta_Indicator - byte
|
| -// Length of data for ADDs and RUNs - integer
|
| -// Length of instructions and sizes - integer
|
| -// Length of addresses for COPYs - integer
|
| -// Data section for ADDs and RUNs - array of bytes
|
| -// Instructions and sizes section - array of bytes
|
| -// Addresses section for COPYs - array of bytes
|
| -// Window2
|
| -// ...
|
| -//
|
| -// Sample usage:
|
| -//
|
| -// VCDiffDeltaFileWindow delta_window_;
|
| -// delta_window_.Init(parent);
|
| -// ParseableChunk parseable_chunk(input_buffer,
|
| -// input_size,
|
| -// leftover_unencoded_bytes);
|
| -// switch (delta_window_.DecodeWindows(&parseable_chunk)) {
|
| -// case RESULT_END_OF_DATA:
|
| -// <Read more input and retry DecodeWindows later.>
|
| -// case RESULT_ERROR:
|
| -// <Handle error case. An error log message has already been generated.>
|
| -// }
|
| -//
|
| -// DecodeWindows consumes as many windows from the input as it can. It only
|
| -// needs to be placed within a loop if the loop is used to obtain more input
|
| -// (delta file) data.
|
| -//
|
| -class VCDiffDeltaFileWindow {
|
| - public:
|
| - VCDiffDeltaFileWindow();
|
| - ~VCDiffDeltaFileWindow();
|
| -
|
| - // Init() should be called immediately after constructing the
|
| - // VCDiffDeltaFileWindow(). It must be called before DecodeWindows() can be
|
| - // invoked, or an error will occur.
|
| - void Init(VCDiffStreamingDecoderImpl* parent);
|
| -
|
| - // Resets the pointers to the data sections in the current window.
|
| - void Reset();
|
| -
|
| - bool UseCodeTable(const VCDiffCodeTableData& code_table_data,
|
| - unsigned char max_mode) {
|
| - return reader_.UseCodeTable(code_table_data, max_mode);
|
| - }
|
| -
|
| - // Decodes as many delta windows as possible using the input data from
|
| - // *parseable_chunk. Appends the decoded target windows to
|
| - // parent_->decoded_target(). If annotated output is enabled, appends
|
| - // annotated output to parent_->annotated_output(). Returns RESULT_SUCCESS on
|
| - // success, or RESULT_END_OF_DATA if the end of input was reached before the
|
| - // entire window could be decoded and more input is expected (only possible if
|
| - // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
|
| - // decoding. In the RESULT_ERROR case, the value of parseable_chunk->pointer_
|
| - // is undefined; otherwise, parseable_chunk->Advance() is called to point to
|
| - // the input data position just after the data that has been decoded.
|
| - //
|
| - // If expected_target_bytes is not set to kUnlimitedBytes, then the decoder
|
| - // expects *exactly* this number of target bytes to be decoded from one or
|
| - // more delta file windows. If this number is met exactly after finishing a
|
| - // delta window, this function will return RESULT_SUCCESS without processing
|
| - // any more bytes from data_pointer. If this number is exceeded while
|
| - // decoding a window, but was not met before starting that window,
|
| - // then RESULT_ERROR will be returned.
|
| - //
|
| - VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk);
|
| -
|
| - bool FoundWindowHeader() const {
|
| - return found_header_;
|
| - }
|
| -
|
| - bool MoreDataExpected() const {
|
| - // When parsing an interleaved-format delta file,
|
| - // every time DecodeBody() exits, interleaved_bytes_expected_
|
| - // will be decremented by the number of bytes parsed. If it
|
| - // reaches zero, then there is no more data expected because
|
| - // the size of the interleaved section (given in the window
|
| - // header) has been reached.
|
| - return IsInterleaved() && (interleaved_bytes_expected_ > 0);
|
| - }
|
| -
|
| - // Returns the number of bytes remaining to be decoded in the target window.
|
| - // If not in the process of decoding a window, returns 0.
|
| - size_t TargetBytesRemaining();
|
| -
|
| - void EnableAnnotatedOutput() {
|
| - if (!annotated_output_.get()) {
|
| - annotated_output_.reset(new string);
|
| - }
|
| - }
|
| -
|
| - void DisableAnnotatedOutput() {
|
| - annotated_output_.reset(NULL);
|
| - }
|
| -
|
| - private:
|
| - // Reads the header of the window section as described in RFC sections 4.2 and
|
| - // 4.3, up to and including the value "Length of addresses for COPYs". If the
|
| - // entire header is found, this function sets up the DeltaWindowSections
|
| - // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so
|
| - // that the decoder can begin decoding the opcodes in these sections. Returns
|
| - // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of
|
| - // available data was reached before the entire header could be read. (The
|
| - // latter may be an error condition if there is no more data available.)
|
| - // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the
|
| - // parsed header.
|
| - //
|
| - VCDiffResult ReadHeader(ParseableChunk* parseable_chunk);
|
| -
|
| - // After the window header has been parsed as far as the Delta_Indicator,
|
| - // this function is called to parse the following delta window header fields:
|
| - //
|
| - // Length of data for ADDs and RUNs - integer (VarintBE format)
|
| - // Length of instructions and sizes - integer (VarintBE format)
|
| - // Length of addresses for COPYs - integer (VarintBE format)
|
| - //
|
| - // If has_checksum_ is true, it also looks for the following element:
|
| - //
|
| - // Adler32 checksum - unsigned 32-bit integer (VarintBE format)
|
| - //
|
| - // It sets up the DeltaWindowSections instructions_and_sizes_,
|
| - // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format
|
| - // is being used, all three sections will include the entire window body; if
|
| - // the standard format is used, three non-overlapping window sections will be
|
| - // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA
|
| - // if standard format is being used and there is not enough input data to read
|
| - // the entire window body. Otherwise, returns RESULT_SUCCESS.
|
| - VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser);
|
| -
|
| - // Decodes the body of the window section as described in RFC sections 4.3,
|
| - // including the sections "Data section for ADDs and RUNs", "Instructions
|
| - // and sizes section", and "Addresses section for COPYs". These sections
|
| - // must already have been set up by ReadWindowHeader(). Returns a
|
| - // non-negative value on success, or RESULT_END_OF_DATA if the end of input
|
| - // was reached before the entire window could be decoded (only possible if
|
| - // IsInterleaved() is true), or RESULT_ERROR if an error occurred during
|
| - // decoding. Appends as much of the decoded target window as possible to
|
| - // parent->decoded_target().
|
| - //
|
| - int DecodeBody(ParseableChunk* parseable_chunk);
|
| -
|
| - // Returns the number of bytes already decoded into the target window.
|
| - size_t TargetBytesDecoded();
|
| -
|
| - // Decodes a single ADD instruction, updating parent_->decoded_target_.
|
| - VCDiffResult DecodeAdd(size_t size);
|
| -
|
| - // Decodes a single RUN instruction, updating parent_->decoded_target_.
|
| - VCDiffResult DecodeRun(size_t size);
|
| -
|
| - // Decodes a single COPY instruction, updating parent_->decoded_target_.
|
| - VCDiffResult DecodeCopy(size_t size, unsigned char mode);
|
| -
|
| - // When using the interleaved format, this function is called both on parsing
|
| - // the header and on resuming after a RESULT_END_OF_DATA was returned from a
|
| - // previous call to DecodeBody(). It sets up all three section pointers to
|
| - // reference the same interleaved stream of instructions, sizes, addresses,
|
| - // and data. These pointers must be reset every time that work resumes on a
|
| - // delta window, because the input data string may have been changed or
|
| - // resized since DecodeBody() last returned.
|
| - void UpdateInterleavedSectionPointers(const char* data_pos,
|
| - const char* data_end) {
|
| - const ptrdiff_t available_data = data_end - data_pos;
|
| - // Don't read past the end of currently-available data
|
| - if (available_data > interleaved_bytes_expected_) {
|
| - instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_);
|
| - } else {
|
| - instructions_and_sizes_.Init(data_pos, available_data);
|
| - }
|
| - data_for_add_and_run_.Init(&instructions_and_sizes_);
|
| - addresses_for_copy_.Init(&instructions_and_sizes_);
|
| - }
|
| -
|
| - // If true, the interleaved format described in AllowInterleaved() is used
|
| - // for the current delta file. Only valid after ReadWindowHeader() has been
|
| - // called and returned a positive number (i.e., the whole header was parsed),
|
| - // but before the window has finished decoding.
|
| - //
|
| - bool IsInterleaved() const {
|
| - // If the sections are interleaved, both addresses_for_copy_ and
|
| - // data_for_add_and_run_ should point at instructions_and_sizes_.
|
| - return !addresses_for_copy_.IsOwned();
|
| - }
|
| -
|
| - // Executes a single COPY or ADD instruction, appending data to
|
| - // parent_->decoded_target().
|
| - void CopyBytes(const char* data,
|
| - size_t size,
|
| - VCDiffAnnotationType annotation_type);
|
| -
|
| - // Executes a single RUN instruction, appending data to
|
| - // parent_->decoded_target().
|
| - void RunByte(unsigned char byte, size_t size);
|
| -
|
| - void AppendAnnotatedOutput(string* annotated_output) {
|
| - if (annotated_output_.get()) {
|
| - annotated_output->append(*annotated_output_.get());
|
| - }
|
| - }
|
| -
|
| - // Advance *parseable_chunk to point to the current position in the
|
| - // instructions/sizes section. If interleaved format is used, then
|
| - // decrement the number of expected bytes in the instructions/sizes section
|
| - // by the number of instruction/size bytes parsed.
|
| - void UpdateInstructionPointer(ParseableChunk* parseable_chunk);
|
| -
|
| - // The parent object which was passed to Init().
|
| - VCDiffStreamingDecoderImpl* parent_;
|
| -
|
| - // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader()
|
| - // has been called and succeeded in parsing the delta window header, but the
|
| - // entire window has not yet been decoded.
|
| - bool found_header_;
|
| -
|
| - // Contents and length of the current source window. source_segment_ptr_
|
| - // will be non-NULL if (a) the window section header for the current window
|
| - // has been read, but the window has not yet finished decoding; or
|
| - // (b) the window did not specify a source segment.
|
| - const char* source_segment_ptr_;
|
| - size_t source_segment_length_;
|
| -
|
| - // The delta encoding window sections as defined in RFC section 4.3.
|
| - // The pointer for each section will be incremented as data is consumed and
|
| - // decoded from that section. If the interleaved format is used,
|
| - // data_for_add_and_run_ and addresses_for_copy_ will both point to
|
| - // instructions_and_sizes_; otherwise, they will be separate data sections.
|
| - //
|
| - DeltaWindowSection instructions_and_sizes_;
|
| - DeltaWindowSection data_for_add_and_run_;
|
| - DeltaWindowSection addresses_for_copy_;
|
| -
|
| - // The expected bytes left to decode in instructions_and_sizes_. Only used
|
| - // for the interleaved format.
|
| - int interleaved_bytes_expected_;
|
| -
|
| - // The expected length of the target window once it has been decoded.
|
| - size_t target_window_length_;
|
| -
|
| - // The index in decoded_target at which the first byte of the current
|
| - // target window was/will be written.
|
| - size_t target_window_start_pos_;
|
| -
|
| - // If has_checksum_ is true, then expected_checksum_ contains an Adler32
|
| - // checksum of the target window data. This is an extension included in the
|
| - // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard.
|
| - bool has_checksum_;
|
| - VCDChecksum expected_checksum_;
|
| -
|
| - VCDiffCodeTableReader reader_;
|
| -
|
| - // This value is initialized to NULL, which means that annotated output is
|
| - // disabled. If EnableAnnotatedOutput() is called, it will be set to point
|
| - // to a new string object, and annotated output will be gathered into that
|
| - // string.
|
| - std::auto_ptr<string> annotated_output_;
|
| -
|
| - // Making these private avoids implicit copy constructor & assignment operator
|
| - VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT
|
| - void operator=(const VCDiffDeltaFileWindow&);
|
| -};
|
| -
|
| -class VCDiffStreamingDecoderImpl {
|
| - public:
|
| - // A constant that is the default value for expected_target_bytes_,
|
| - // indicating that the decoder does not have an expected length
|
| - // for the target data.
|
| - static const size_t kUnlimitedBytes = static_cast<size_t>(-3);
|
| -
|
| - VCDiffStreamingDecoderImpl();
|
| - ~VCDiffStreamingDecoderImpl();
|
| -
|
| - // Resets all member variables to their initial states.
|
| - void Reset();
|
| -
|
| - // These functions are identical to their counterparts
|
| - // in VCDiffStreamingDecoder.
|
| - //
|
| - void StartDecoding(const char* dictionary_ptr, size_t dictionary_size);
|
| -
|
| - bool DecodeChunk(const char* data,
|
| - size_t len,
|
| - OutputStringInterface* output_string);
|
| -
|
| - bool FinishDecoding();
|
| -
|
| - // If true, the version of VCDIFF used in the current delta file allows
|
| - // for the interleaved format, in which instructions, addresses and data
|
| - // are all sent interleaved in the instructions section of each window
|
| - // rather than being sent in separate sections. This is not part of
|
| - // the VCDIFF draft standard, so we've defined a special version code
|
| - // 'S' which implies that this feature is available. Even if interleaving
|
| - // is supported, it is not mandatory; interleaved format will be implied
|
| - // if the address and data sections are both zero-length.
|
| - //
|
| - bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; }
|
| -
|
| - // If true, the version of VCDIFF used in the current delta file allows
|
| - // each delta window to contain an Adler32 checksum of the target window data.
|
| - // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then
|
| - // this checksum will appear as a variable-length integer, just after the
|
| - // "length of addresses for COPYs" value and before the window data sections.
|
| - // It is possible for some windows in a delta file to use the checksum feature
|
| - // and for others not to use it (and leave the flag bit set to 0.)
|
| - // Just as with AllowInterleaved(), this extension is not part of the draft
|
| - // standard and is only available when the version code 'S' is specified.
|
| - //
|
| - bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; }
|
| -
|
| - // See description of expected_target_bytes_, below.
|
| - bool HasTargetByteLimit() const {
|
| - return expected_target_bytes_ != kUnlimitedBytes;
|
| - }
|
| -
|
| - void SetTargetByteLimit(size_t expected_target_bytes) {
|
| - expected_target_bytes_ = expected_target_bytes;
|
| - }
|
| -
|
| - // Checks to see whether the decoded target data has reached the expected
|
| - // size.
|
| - bool MetTargetByteLimit() const {
|
| - if (!HasTargetByteLimit()) {
|
| - return false;
|
| - }
|
| - // The target byte limit should not have been exceeded, because each target
|
| - // window size is checked against that limit in ReadHeader(), and
|
| - // DecodeBody() will return RESULT_ERROR if the actual decoded output ever
|
| - // exceeds the advertised target window size.
|
| - if (decoded_target_.size() > expected_target_bytes_) {
|
| - LOG(DFATAL) << "Internal error: Decoded data size "
|
| - << decoded_target_.size()
|
| - << " exceeds target byte limit "
|
| - << expected_target_bytes_ << LOG_ENDL;
|
| - return true;
|
| - }
|
| - return decoded_target_.size() == expected_target_bytes_;
|
| - }
|
| -
|
| - // Checks to see whether adding a new target window of the specified size
|
| - // would exceed the expected target size. If so, logs an error and returns
|
| - // true; otherwise, returns false.
|
| - bool TargetWindowWouldExceedTargetByteLimit(size_t window_size) const {
|
| - if (!HasTargetByteLimit()) {
|
| - return false;
|
| - }
|
| - // The logical expression to check would be:
|
| - //
|
| - // decoded_target_.size() + target_bytes_to_add > expected_target_bytes_
|
| - //
|
| - // but the addition might cause an integer overflow if target_bytes_to_add
|
| - // is very large. So it is better to check target_bytes_to_add against
|
| - // the remaining expected target bytes.
|
| - size_t remaining_expected_target_bytes =
|
| - expected_target_bytes_ - decoded_target_.size();
|
| - if (window_size > remaining_expected_target_bytes) {
|
| - LOG(ERROR) << "Length of target window (" << window_size
|
| - << " bytes) plus previous windows (" << decoded_target_.size()
|
| - << " bytes) would exceed expected size of "
|
| - << expected_target_bytes_ << " bytes" << LOG_ENDL;
|
| - return true;
|
| - } else {
|
| - return false;
|
| - }
|
| - }
|
| -
|
| - // Returns the amount of input data passed to the last DecodeChunk()
|
| - // that was not consumed by the decoder. This is essential if
|
| - // SetExpectedTargetBytes() is being used, in order to preserve
|
| - // the input data stream beyond the expected encoding.
|
| - size_t GetUnconsumedDataSize() const {
|
| - return unparsed_bytes_.size();
|
| - }
|
| -
|
| - // This function will return true if the decoder has parsed a complete delta
|
| - // file header plus zero or more delta file windows, with no data left over.
|
| - // It will also return true if no delta data at all was decoded. If these
|
| - // conditions are not met, then FinishDecoding() should not be called.
|
| - bool IsDecodingComplete() const {
|
| - if (!FoundFileHeader()) {
|
| - // No complete delta file header has been parsed yet. DecodeChunk()
|
| - // may have received some data that it hasn't yet parsed, in which case
|
| - // decoding is incomplete.
|
| - return unparsed_bytes_.empty();
|
| - } else if (custom_code_table_decoder_.get()) {
|
| - // The decoder is in the middle of parsing a custom code table.
|
| - return false;
|
| - } else if (delta_window_.FoundWindowHeader()) {
|
| - // The decoder is in the middle of parsing an interleaved format delta
|
| - // window.
|
| - return false;
|
| - } else if (MetTargetByteLimit()) {
|
| - // The decoder found exactly the expected number of bytes. In this case
|
| - // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover
|
| - // data after the end of the delta file.
|
| - return true;
|
| - } else {
|
| - // No complete delta file window has been parsed yet. DecodeChunk()
|
| - // may have received some data that it hasn't yet parsed, in which case
|
| - // decoding is incomplete.
|
| - return unparsed_bytes_.empty();
|
| - }
|
| - }
|
| -
|
| - const char* dictionary_ptr() const { return dictionary_ptr_; }
|
| -
|
| - size_t dictionary_size() const { return dictionary_size_; }
|
| -
|
| - VCDiffAddressCache* addr_cache() { return addr_cache_.get(); }
|
| -
|
| - string* decoded_target() { return &decoded_target_; }
|
| -
|
| - string* annotated_output() { return &annotated_output_; }
|
| -
|
| - // The variable that determines whether annotated output is enabled is
|
| - // delta_window_.annotated_output_. If that member is NULL, then the feature
|
| - // is disabled.
|
| - void EnableAnnotatedOutput() {
|
| - delta_window_.EnableAnnotatedOutput();
|
| - }
|
| -
|
| - void DisableAnnotatedOutput() {
|
| - delta_window_.DisableAnnotatedOutput();
|
| - }
|
| -
|
| - void GetAnnotatedOutput(OutputStringInterface* annotated_output) {
|
| - // We could use annotated_output->assign(), but that method is not defined
|
| - // for some output string types, so use clear() + append() to accomplish the
|
| - // same thing.
|
| - annotated_output->clear();
|
| - annotated_output->append(annotated_output_.data(),
|
| - annotated_output_.size());
|
| - }
|
| -
|
| - private:
|
| - // Reads the VCDiff delta file header section as described in RFC section 4.1,
|
| - // except the custom code table data. Returns RESULT_ERROR if an error
|
| - // occurred, or RESULT_END_OF_DATA if the end of available data was reached
|
| - // before the entire header could be read. (The latter may be an error
|
| - // condition if there is no more data available.) Otherwise, advances
|
| - // data->position_ past the header and returns RESULT_SUCCESS.
|
| - //
|
| - VCDiffResult ReadDeltaFileHeader(ParseableChunk* data);
|
| -
|
| - // Indicates whether or not the header has already been read.
|
| - bool FoundFileHeader() const { return addr_cache_.get() != NULL; }
|
| -
|
| - // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta
|
| - // file header, this function parses the custom cache sizes and initializes
|
| - // a nested VCDiffStreamingDecoderImpl object that will be used to parse the
|
| - // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an
|
| - // error occurred, or RESULT_END_OF_DATA if the end of available data was
|
| - // reached before the custom cache sizes could be read. Otherwise, returns
|
| - // the number of bytes read.
|
| - //
|
| - int InitCustomCodeTable(const char* data_start, const char* data_end);
|
| -
|
| - // If a custom code table was specified in the header section that was parsed
|
| - // by ReadDeltaFileHeader(), this function makes a recursive call to another
|
| - // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the
|
| - // custom code table is expected to be supplied as an embedded VCDIFF
|
| - // encoding that uses the standard code table. Returns RESULT_ERROR if an
|
| - // error occurs, or RESULT_END_OF_DATA if the end of available data was
|
| - // reached before the entire custom code table could be read. Otherwise,
|
| - // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded
|
| - // custom code table. If the function returns RESULT_SUCCESS or
|
| - // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes.
|
| - //
|
| - VCDiffResult ReadCustomCodeTable(ParseableChunk* data);
|
| -
|
| - // Contents and length of the source (dictionary) data.
|
| - const char* dictionary_ptr_;
|
| - size_t dictionary_size_;
|
| -
|
| - // This string will be used to store any unparsed bytes left over when
|
| - // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA.
|
| - // It will also be used to concatenate those unparsed bytes with the data
|
| - // supplied to the next call to DecodeChunk(), so that they appear in
|
| - // contiguous memory.
|
| - string unparsed_bytes_;
|
| -
|
| - // The portion of the target file that has been decoded so far. This will be
|
| - // used to fill the output string for DecodeChunk(), and will also be used to
|
| - // execute COPY instructions that reference target data. Since the source
|
| - // window can come from a range of addresses in the previously decoded target
|
| - // data, the entire target file needs to be available to the decoder, not just
|
| - // the current target window.
|
| - string decoded_target_;
|
| -
|
| - // The VCDIFF version byte (also known as "header4") from the
|
| - // delta file header.
|
| - unsigned char vcdiff_version_code_;
|
| -
|
| - VCDiffDeltaFileWindow delta_window_;
|
| -
|
| - std::auto_ptr<VCDiffAddressCache> addr_cache_;
|
| -
|
| - // Will be NULL unless a custom code table has been defined.
|
| - std::auto_ptr<VCDiffCodeTableData> custom_code_table_;
|
| -
|
| - // Used to receive the decoded custom code table.
|
| - string custom_code_table_string_;
|
| -
|
| - // If a custom code table is specified, it will be expressed
|
| - // as an embedded VCDIFF delta file which uses the default code table
|
| - // as the source file (dictionary). Use a child decoder object
|
| - // to decode that delta file.
|
| - std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_;
|
| -
|
| - // If set, then the decoder is expecting *exactly* this number of
|
| - // target bytes to be decoded from one or more delta file windows.
|
| - // If this number is exceeded while decoding a window, but was not met
|
| - // before starting on that window, an error will be reported.
|
| - // If FinishDecoding() is called before this number is met, an error
|
| - // will also be reported. This feature is used for decoding the
|
| - // embedded code table data within a VCDIFF delta file; we want to
|
| - // stop processing the embedded data once the entire code table has
|
| - // been decoded, and treat the rest of the available data as part
|
| - // of the enclosing delta file.
|
| - size_t expected_target_bytes_;
|
| -
|
| - // This string will always be empty until EnableAnnotatedOutput() is called,
|
| - // at which point it will start to accumulate annotated delta windows each
|
| - // time DecodeChunk() finishes a window. It will be cleared each time that
|
| - // StartDecoding() is called.
|
| - string annotated_output_;
|
| -
|
| - // This value is used to ensure the correct order of calls to the interface
|
| - // functions, i.e., a single call to StartDecoding(), followed by zero or
|
| - // more calls to DecodeChunk(), followed by a single call to
|
| - // FinishDecoding().
|
| - bool start_decoding_was_called_;
|
| -
|
| - // Making these private avoids implicit copy constructor & assignment operator
|
| - VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT
|
| - void operator=(const VCDiffStreamingDecoderImpl&);
|
| -};
|
| -
|
| -// *** Methods for VCDiffStreamingDecoderImpl
|
| -
|
| -VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl() {
|
| - delta_window_.Init(this);
|
| - Reset();
|
| -}
|
| -
|
| -// Reset() will delete the component objects without reallocating them.
|
| -VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); }
|
| -
|
| -void VCDiffStreamingDecoderImpl::Reset() {
|
| - start_decoding_was_called_ = false;
|
| - dictionary_ptr_ = NULL;
|
| - dictionary_size_ = 0;
|
| - vcdiff_version_code_ = '\0';
|
| - expected_target_bytes_ = kUnlimitedBytes;
|
| - addr_cache_.reset();
|
| - custom_code_table_.reset();
|
| - custom_code_table_decoder_.reset();
|
| - delta_window_.Reset();
|
| -}
|
| -
|
| -void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr,
|
| - size_t dictionary_size) {
|
| - if (start_decoding_was_called_) {
|
| - LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()"
|
| - << LOG_ENDL;
|
| - return;
|
| - }
|
| - unparsed_bytes_.clear();
|
| - decoded_target_.clear(); // delta_window_.Reset() depends on this
|
| - annotated_output_.clear();
|
| - Reset();
|
| - dictionary_ptr_ = dictionary_ptr;
|
| - dictionary_size_ = dictionary_size;
|
| - start_decoding_was_called_ = true;
|
| -}
|
| -
|
| -// Reads the VCDiff delta file header section as described in RFC section 4.1:
|
| -//
|
| -// Header1 - byte = 0xD6 (ASCII 'V' | 0x80)
|
| -// Header2 - byte = 0xC3 (ASCII 'C' | 0x80)
|
| -// Header3 - byte = 0xC4 (ASCII 'D' | 0x80)
|
| -// Header4 - byte
|
| -// Hdr_Indicator - byte
|
| -// [Secondary compressor ID] - byte
|
| -// [Length of code table data] - integer
|
| -// [Code table data]
|
| -//
|
| -// Initializes the code table and address cache objects. Returns RESULT_ERROR
|
| -// if an error occurred, and RESULT_END_OF_DATA if the end of available data was
|
| -// reached before the entire header could be read. (The latter may be an error
|
| -// condition if there is no more data available.) Otherwise, returns
|
| -// RESULT_SUCCESS, and removes the header bytes from the data string.
|
| -//
|
| -// It's relatively inefficient to expect this function to parse any number of
|
| -// input bytes available, down to 1 byte, but it is necessary in case the input
|
| -// is not a properly formatted VCDIFF delta file. If the entire input consists
|
| -// of two bytes "12", then we should recognize that it does not match the
|
| -// initial VCDIFF magic number "VCD" and report an error, rather than waiting
|
| -// indefinitely for more input that will never arrive.
|
| -//
|
| -VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader(
|
| - ParseableChunk* data) {
|
| - if (FoundFileHeader()) {
|
| - return RESULT_SUCCESS;
|
| - }
|
| - size_t data_size = data->UnparsedSize();
|
| - const DeltaFileHeader* header =
|
| - reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData());
|
| - bool wrong_magic_number = false;
|
| - switch (data_size) {
|
| - // Verify only the bytes that are available.
|
| - default:
|
| - // Found header contents up to and including VCDIFF version
|
| - vcdiff_version_code_ = header->header4;
|
| - if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284)
|
| - (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol
|
| - LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - // fall through
|
| - case 3:
|
| - if (header->header3 != 0xC4) { // magic value 'D' | 0x80
|
| - wrong_magic_number = true;
|
| - }
|
| - // fall through
|
| - case 2:
|
| - if (header->header2 != 0xC3) { // magic value 'C' | 0x80
|
| - wrong_magic_number = true;
|
| - }
|
| - // fall through
|
| - case 1:
|
| - if (header->header1 != 0xD6) { // magic value 'V' | 0x80
|
| - wrong_magic_number = true;
|
| - }
|
| - // fall through
|
| - case 0:
|
| - if (wrong_magic_number) {
|
| - LOG(ERROR) << "Did not find VCDIFF header bytes; "
|
| - "input is not a VCDIFF delta file" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA;
|
| - }
|
| - // Secondary compressor not supported.
|
| - if (header->hdr_indicator & VCD_DECOMPRESS) {
|
| - LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - if (header->hdr_indicator & VCD_CODETABLE) {
|
| - int bytes_parsed = InitCustomCodeTable(
|
| - data->UnparsedData() + sizeof(DeltaFileHeader),
|
| - data->End());
|
| - switch (bytes_parsed) {
|
| - case RESULT_ERROR:
|
| - return RESULT_ERROR;
|
| - case RESULT_END_OF_DATA:
|
| - return RESULT_END_OF_DATA;
|
| - default:
|
| - data->Advance(sizeof(DeltaFileHeader) + bytes_parsed);
|
| - }
|
| - } else {
|
| - addr_cache_.reset(new VCDiffAddressCache);
|
| - // addr_cache_->Init() will be called
|
| - // from VCDiffStreamingDecoderImpl::DecodeChunk()
|
| - data->Advance(sizeof(DeltaFileHeader));
|
| - }
|
| - return RESULT_SUCCESS;
|
| -}
|
| -
|
| -int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start,
|
| - const char* data_end) {
|
| - // A custom code table is being specified. Parse the variable-length
|
| - // cache sizes and begin parsing the encoded custom code table.
|
| - int32_t near_cache_size = 0, same_cache_size = 0;
|
| - VCDiffHeaderParser header_parser(data_start, data_end);
|
| - if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) {
|
| - return header_parser.GetResult();
|
| - }
|
| - if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) {
|
| - return header_parser.GetResult();
|
| - }
|
| - custom_code_table_.reset(new struct VCDiffCodeTableData);
|
| - memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData));
|
| - custom_code_table_string_.clear();
|
| - addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size));
|
| - // addr_cache_->Init() will be called
|
| - // from VCDiffStreamingDecoderImpl::DecodeChunk()
|
| -
|
| - // If we reach this point (the start of the custom code table)
|
| - // without encountering a RESULT_END_OF_DATA condition, then we won't call
|
| - // ReadDeltaFileHeader() again for this delta file.
|
| - //
|
| - // Instantiate a recursive decoder to interpret the custom code table
|
| - // as a VCDIFF encoding of the default code table.
|
| - custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl);
|
| - custom_code_table_decoder_->StartDecoding(
|
| - reinterpret_cast<const char*>(
|
| - &VCDiffCodeTableData::kDefaultCodeTableData),
|
| - sizeof(VCDiffCodeTableData::kDefaultCodeTableData));
|
| - custom_code_table_decoder_->SetTargetByteLimit(sizeof(*custom_code_table_));
|
| - return static_cast<int>(header_parser.ParsedSize());
|
| -}
|
| -
|
| -VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable(
|
| - ParseableChunk* data) {
|
| - if (!custom_code_table_decoder_.get()) {
|
| - return RESULT_SUCCESS;
|
| - }
|
| - if (!custom_code_table_.get()) {
|
| - LOG(DFATAL) << "Internal error: custom_code_table_decoder_ is set,"
|
| - " but custom_code_table_ is NULL" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - OutputString<string> output_string(&custom_code_table_string_);
|
| - if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(),
|
| - data->UnparsedSize(),
|
| - &output_string)) {
|
| - return RESULT_ERROR;
|
| - }
|
| - if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) {
|
| - // Skip over the consumed data.
|
| - data->Finish();
|
| - return RESULT_END_OF_DATA;
|
| - }
|
| - if (!custom_code_table_decoder_->FinishDecoding()) {
|
| - return RESULT_ERROR;
|
| - }
|
| - if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) {
|
| - LOG(DFATAL) << "Decoded custom code table size "
|
| - << custom_code_table_string_.length()
|
| - << " does not match expected size "
|
| - << sizeof(*custom_code_table_) << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - memcpy(custom_code_table_.get(),
|
| - custom_code_table_string_.data(),
|
| - sizeof(*custom_code_table_));
|
| - custom_code_table_string_.clear();
|
| - // Skip over the consumed data.
|
| - data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize());
|
| - custom_code_table_decoder_.reset();
|
| - delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode());
|
| - return RESULT_SUCCESS;
|
| -}
|
| -
|
| -namespace {
|
| -
|
| -class TrackNewOutputText {
|
| - public:
|
| - explicit TrackNewOutputText(const string& decoded_target)
|
| - : decoded_target_(decoded_target),
|
| - initial_decoded_target_size_(decoded_target.size()) { }
|
| -
|
| - void AppendNewOutputText(size_t target_bytes_remaining,
|
| - OutputStringInterface* output_string) {
|
| - const size_t bytes_decoded_this_chunk =
|
| - decoded_target_.size() - initial_decoded_target_size_;
|
| - if (bytes_decoded_this_chunk > 0) {
|
| - if (target_bytes_remaining > 0) {
|
| - // The decoder is midway through decoding a target window. Resize
|
| - // output_string to match the expected length. The interface guarantees
|
| - // not to resize the output_string more than once per target window
|
| - // decoded.
|
| - output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk
|
| - + target_bytes_remaining);
|
| - }
|
| - output_string->append(
|
| - decoded_target_.data() + initial_decoded_target_size_,
|
| - bytes_decoded_this_chunk);
|
| - }
|
| - }
|
| -
|
| - private:
|
| - const string& decoded_target_;
|
| - size_t initial_decoded_target_size_;
|
| -};
|
| -
|
| -} // anonymous namespace
|
| -
|
| -bool VCDiffStreamingDecoderImpl::DecodeChunk(
|
| - const char* data,
|
| - size_t len,
|
| - OutputStringInterface* output_string) {
|
| - if (!start_decoding_was_called_) {
|
| - LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL;
|
| - Reset();
|
| - return false;
|
| - }
|
| - ParseableChunk parseable_chunk(data, len);
|
| - if (!unparsed_bytes_.empty()) {
|
| - unparsed_bytes_.append(data, len);
|
| - parseable_chunk.SetDataBuffer(unparsed_bytes_.data(),
|
| - unparsed_bytes_.size());
|
| - }
|
| - TrackNewOutputText output_tracker(decoded_target_);
|
| - VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk);
|
| - if (RESULT_SUCCESS == result) {
|
| - result = ReadCustomCodeTable(&parseable_chunk);
|
| - }
|
| - if (RESULT_SUCCESS == result) {
|
| - result = delta_window_.DecodeWindows(&parseable_chunk);
|
| - }
|
| - if (RESULT_ERROR == result) {
|
| - Reset(); // Don't allow further DecodeChunk calls
|
| - return false;
|
| - }
|
| - unparsed_bytes_.assign(parseable_chunk.UnparsedData(),
|
| - parseable_chunk.UnparsedSize());
|
| - output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(),
|
| - output_string);
|
| - return true;
|
| -}
|
| -
|
| -// Finishes decoding after all data has been received. Returns true
|
| -// if decoding of the entire stream was successful.
|
| -bool VCDiffStreamingDecoderImpl::FinishDecoding() {
|
| - bool success = true;
|
| - if (!start_decoding_was_called_) {
|
| - LOG(WARNING) << "FinishDecoding() called before StartDecoding(),"
|
| - " or called after DecodeChunk() returned false"
|
| - << LOG_ENDL;
|
| - success = false;
|
| - } else if (!IsDecodingComplete()) {
|
| - LOG(ERROR) << "FinishDecoding() called before parsing entire"
|
| - " delta file window" << LOG_ENDL;
|
| - success = false;
|
| - }
|
| - // Reset the object state for the next decode operation
|
| - Reset();
|
| - return success;
|
| -}
|
| -
|
| -// *** Methods for VCDiffDeltaFileWindow
|
| -
|
| -inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) {
|
| - Reset();
|
| -}
|
| -
|
| -inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { }
|
| -
|
| -inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) {
|
| - parent_ = parent;
|
| -}
|
| -
|
| -void VCDiffDeltaFileWindow::Reset() {
|
| - found_header_ = false;
|
| -
|
| - // Mark the start of the current target window.
|
| - target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U;
|
| - target_window_length_ = 0;
|
| -
|
| - source_segment_ptr_ = NULL;
|
| - source_segment_length_ = 0;
|
| -
|
| - instructions_and_sizes_.Invalidate();
|
| - data_for_add_and_run_.Invalidate();
|
| - addresses_for_copy_.Invalidate();
|
| -
|
| - interleaved_bytes_expected_ = 0;
|
| -
|
| - has_checksum_ = false;
|
| - expected_checksum_ = 0;
|
| - if (annotated_output_.get()) {
|
| - annotated_output_->clear();
|
| - }
|
| -}
|
| -
|
| -VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections(
|
| - VCDiffHeaderParser* header_parser) {
|
| - size_t add_and_run_data_length = 0;
|
| - size_t instructions_and_sizes_length = 0;
|
| - size_t addresses_length = 0;
|
| - if (!header_parser->ParseSectionLengths(has_checksum_,
|
| - &add_and_run_data_length,
|
| - &instructions_and_sizes_length,
|
| - &addresses_length,
|
| - &expected_checksum_)) {
|
| - return header_parser->GetResult();
|
| - }
|
| - if (parent_->AllowInterleaved() &&
|
| - (add_and_run_data_length == 0) &&
|
| - (addresses_length == 0)) {
|
| - // The interleaved format is being used.
|
| - interleaved_bytes_expected_ =
|
| - static_cast<int>(instructions_and_sizes_length);
|
| - UpdateInterleavedSectionPointers(header_parser->UnparsedData(),
|
| - header_parser->End());
|
| - } else {
|
| - // If interleaved format is not used, then the whole window contents
|
| - // must be available before decoding can begin. If only part of
|
| - // the current window is available, then report end of data
|
| - // and re-parse the whole header when DecodeChunk() is called again.
|
| - if (header_parser->UnparsedSize() < (add_and_run_data_length +
|
| - instructions_and_sizes_length +
|
| - addresses_length)) {
|
| - return RESULT_END_OF_DATA;
|
| - }
|
| - data_for_add_and_run_.Init(header_parser->UnparsedData(),
|
| - add_and_run_data_length);
|
| - instructions_and_sizes_.Init(data_for_add_and_run_.End(),
|
| - instructions_and_sizes_length);
|
| - addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length);
|
| - if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) {
|
| - LOG(ERROR) << "The end of the instructions section "
|
| - "does not match the end of the delta window" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - }
|
| - reader_.Init(instructions_and_sizes_.UnparsedDataAddr(),
|
| - instructions_and_sizes_.End());
|
| - return RESULT_SUCCESS;
|
| -}
|
| -
|
| -// Here are the elements of the delta window header to be parsed,
|
| -// from section 4 of the RFC:
|
| -//
|
| -// Window1
|
| -// Win_Indicator - byte
|
| -// [Source segment size] - integer
|
| -// [Source segment position] - integer
|
| -// The delta encoding of the target window
|
| -// Length of the delta encoding - integer
|
| -// The delta encoding
|
| -// Size of the target window - integer
|
| -// Delta_Indicator - byte
|
| -// Length of data for ADDs and RUNs - integer
|
| -// Length of instructions and sizes - integer
|
| -// Length of addresses for COPYs - integer
|
| -// Data section for ADDs and RUNs - array of bytes
|
| -// Instructions and sizes section - array of bytes
|
| -// Addresses section for COPYs - array of bytes
|
| -//
|
| -VCDiffResult VCDiffDeltaFileWindow::ReadHeader(
|
| - ParseableChunk* parseable_chunk) {
|
| - string* decoded_target = parent_->decoded_target();
|
| - VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(),
|
| - parseable_chunk->End());
|
| - size_t source_segment_position = 0;
|
| - unsigned char win_indicator = 0;
|
| - if (!header_parser.ParseWinIndicatorAndSourceSegment(
|
| - parent_->dictionary_size(),
|
| - decoded_target->size(),
|
| - &win_indicator,
|
| - &source_segment_length_,
|
| - &source_segment_position)) {
|
| - return header_parser.GetResult();
|
| - }
|
| - has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM);
|
| - if (!header_parser.ParseWindowLengths(&target_window_length_)) {
|
| - return header_parser.GetResult();
|
| - }
|
| - if (parent_->TargetWindowWouldExceedTargetByteLimit(target_window_length_)) {
|
| - // An error has been logged by TargetWindowWouldExceedTargetByteLimit().
|
| - return RESULT_ERROR;
|
| - }
|
| - header_parser.ParseDeltaIndicator();
|
| - VCDiffResult setup_return_code = SetUpWindowSections(&header_parser);
|
| - if (RESULT_SUCCESS != setup_return_code) {
|
| - return setup_return_code;
|
| - }
|
| - // Reserve enough space in the output string for the current target window.
|
| - decoded_target->reserve(target_window_start_pos_ + target_window_length_);
|
| - // Get a pointer to the start of the source segment.
|
| - if (win_indicator & VCD_SOURCE) {
|
| - source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position;
|
| - } else if (win_indicator & VCD_TARGET) {
|
| - // This assignment must happen after the reserve().
|
| - // decoded_target should not be resized again while processing this window,
|
| - // so source_segment_ptr_ should remain valid.
|
| - source_segment_ptr_ = decoded_target->data() + source_segment_position;
|
| - }
|
| - // The whole window header was found and parsed successfully.
|
| - found_header_ = true;
|
| - parseable_chunk->Advance(header_parser.ParsedSize());
|
| - return RESULT_SUCCESS;
|
| -}
|
| -
|
| -void VCDiffDeltaFileWindow::UpdateInstructionPointer(
|
| - ParseableChunk* parseable_chunk) {
|
| - if (IsInterleaved()) {
|
| - size_t bytes_parsed = instructions_and_sizes_.ParsedSize();
|
| - // Reduce expected instruction segment length by bytes parsed
|
| - interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed);
|
| - parseable_chunk->Advance(bytes_parsed);
|
| - }
|
| -}
|
| -
|
| -size_t VCDiffDeltaFileWindow::TargetBytesDecoded() {
|
| - return parent_->decoded_target()->size() - target_window_start_pos_;
|
| -}
|
| -
|
| -size_t VCDiffDeltaFileWindow::TargetBytesRemaining() {
|
| - if (target_window_length_ == 0) {
|
| - // There is no window being decoded at present
|
| - return 0;
|
| - } else {
|
| - return target_window_length_ - TargetBytesDecoded();
|
| - }
|
| -}
|
| -
|
| -void VCDiffDeltaFileWindow::CopyBytes(const char* data,
|
| - size_t size,
|
| - VCDiffAnnotationType annotation_type) {
|
| - parent_->decoded_target()->append(data, size);
|
| - if (annotated_output_.get()) {
|
| - annotated_output_->append(kAnnotationStartTags[annotation_type]);
|
| - annotated_output_->append(data, size);
|
| - annotated_output_->append(kAnnotationEndTags[annotation_type]);
|
| - }
|
| -}
|
| -
|
| -void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) {
|
| - parent_->decoded_target()->append(size, byte);
|
| - if (annotated_output_.get()) {
|
| - annotated_output_->append(kAnnotationStartTags[VCD_ANNOTATION_LITERAL]);
|
| - annotated_output_->append(size, byte);
|
| - annotated_output_->append(kAnnotationEndTags[VCD_ANNOTATION_LITERAL]);
|
| - }
|
| -}
|
| -
|
| -VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) {
|
| - if (size > data_for_add_and_run_.UnparsedSize()) {
|
| - return RESULT_END_OF_DATA;
|
| - }
|
| - // Write the next "size" data bytes
|
| - CopyBytes(data_for_add_and_run_.UnparsedData(), size, VCD_ANNOTATION_LITERAL);
|
| - data_for_add_and_run_.Advance(size);
|
| - return RESULT_SUCCESS;
|
| -}
|
| -
|
| -VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) {
|
| - if (data_for_add_and_run_.Empty()) {
|
| - return RESULT_END_OF_DATA;
|
| - }
|
| - // Write "size" copies of the next data byte
|
| - RunByte(*data_for_add_and_run_.UnparsedData(), size);
|
| - data_for_add_and_run_.Advance(1);
|
| - return RESULT_SUCCESS;
|
| -}
|
| -
|
| -VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size,
|
| - unsigned char mode) {
|
| - // Keep track of the number of target bytes decoded as a local variable
|
| - // to avoid recalculating it each time it is needed.
|
| - size_t target_bytes_decoded = TargetBytesDecoded();
|
| - const VCDAddress here_address =
|
| - static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded);
|
| - const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress(
|
| - here_address,
|
| - mode,
|
| - addresses_for_copy_.UnparsedDataAddr(),
|
| - addresses_for_copy_.End());
|
| - switch (decoded_address) {
|
| - case RESULT_ERROR:
|
| - LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - case RESULT_END_OF_DATA:
|
| - return RESULT_END_OF_DATA;
|
| - default:
|
| - if ((decoded_address < 0) || (decoded_address > here_address)) {
|
| - LOG(DFATAL) << "Internal error: unexpected address " << decoded_address
|
| - << " returned from DecodeAddress, with here_address = "
|
| - << here_address << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - break;
|
| - }
|
| - size_t address = static_cast<size_t>(decoded_address);
|
| - if ((address + size) <= source_segment_length_) {
|
| - // Copy all data from source segment
|
| - CopyBytes(&source_segment_ptr_[address], size, VCD_ANNOTATION_DMATCH);
|
| - return RESULT_SUCCESS;
|
| - }
|
| - // Copy some data from target window...
|
| - if (address < source_segment_length_) {
|
| - // ... plus some data from source segment
|
| - const size_t partial_copy_size = source_segment_length_ - address;
|
| - CopyBytes(&source_segment_ptr_[address],
|
| - partial_copy_size,
|
| - VCD_ANNOTATION_DMATCH);
|
| - target_bytes_decoded += partial_copy_size;
|
| - address += partial_copy_size;
|
| - size -= partial_copy_size;
|
| - }
|
| - address -= source_segment_length_;
|
| - // address is now based at start of target window
|
| - const char* const target_segment_ptr = parent_->decoded_target()->data() +
|
| - target_window_start_pos_;
|
| - while (size > (target_bytes_decoded - address)) {
|
| - // Recursive copy that extends into the yet-to-be-copied target data
|
| - const size_t partial_copy_size = target_bytes_decoded - address;
|
| - CopyBytes(&target_segment_ptr[address],
|
| - partial_copy_size,
|
| - VCD_ANNOTATION_BMATCH);
|
| - target_bytes_decoded += partial_copy_size;
|
| - address += partial_copy_size;
|
| - size -= partial_copy_size;
|
| - }
|
| - CopyBytes(&target_segment_ptr[address], size, VCD_ANNOTATION_BMATCH);
|
| - return RESULT_SUCCESS;
|
| -}
|
| -
|
| -int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) {
|
| - if (IsInterleaved() && (instructions_and_sizes_.UnparsedData()
|
| - != parseable_chunk->UnparsedData())) {
|
| - LOG(DFATAL) << "Internal error: interleaved format is used, but the"
|
| - " input pointer does not point to the instructions section"
|
| - << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - while (TargetBytesDecoded() < target_window_length_) {
|
| - int32_t decoded_size = VCD_INSTRUCTION_ERROR;
|
| - unsigned char mode = 0;
|
| - VCDiffInstructionType instruction =
|
| - reader_.GetNextInstruction(&decoded_size, &mode);
|
| - switch (instruction) {
|
| - case VCD_INSTRUCTION_END_OF_DATA:
|
| - UpdateInstructionPointer(parseable_chunk);
|
| - return RESULT_END_OF_DATA;
|
| - case VCD_INSTRUCTION_ERROR:
|
| - return RESULT_ERROR;
|
| - default:
|
| - break;
|
| - }
|
| - const size_t size = static_cast<size_t>(decoded_size);
|
| - // The value of "size" itself could be enormous (say, INT32_MAX)
|
| - // so check it individually against the limit to protect against
|
| - // overflow when adding it to something else.
|
| - if ((size > target_window_length_) ||
|
| - ((size + TargetBytesDecoded()) > target_window_length_)) {
|
| - LOG(ERROR) << VCDiffInstructionName(instruction)
|
| - << " with size " << size
|
| - << " plus existing " << TargetBytesDecoded()
|
| - << " bytes of target data exceeds length of target"
|
| - " window (" << target_window_length_ << " bytes)"
|
| - << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - VCDiffResult result = RESULT_SUCCESS;
|
| - switch (instruction) {
|
| - case VCD_ADD:
|
| - result = DecodeAdd(size);
|
| - break;
|
| - case VCD_RUN:
|
| - result = DecodeRun(size);
|
| - break;
|
| - case VCD_COPY:
|
| - result = DecodeCopy(size, mode);
|
| - break;
|
| - default:
|
| - LOG(DFATAL) << "Unexpected instruction type " << instruction
|
| - << "in opcode stream" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - switch (result) {
|
| - case RESULT_END_OF_DATA:
|
| - reader_.UnGetInstruction();
|
| - UpdateInstructionPointer(parseable_chunk);
|
| - return RESULT_END_OF_DATA;
|
| - case RESULT_ERROR:
|
| - return RESULT_ERROR;
|
| - case RESULT_SUCCESS:
|
| - break;
|
| - }
|
| - }
|
| - if (TargetBytesDecoded() != target_window_length_) {
|
| - LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded()
|
| - << " bytes) does not match expected size ("
|
| - << target_window_length_ << " bytes)" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - const char* const target_window_start =
|
| - parent_->decoded_target()->data() + target_window_start_pos_;
|
| - if (has_checksum_ &&
|
| - (ComputeAdler32(target_window_start, target_window_length_)
|
| - != expected_checksum_)) {
|
| - LOG(ERROR) << "Target data does not match checksum; this could mean "
|
| - "that the wrong dictionary was used" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - if (!instructions_and_sizes_.Empty()) {
|
| - LOG(ERROR) << "Excess instructions and sizes left over "
|
| - "after decoding target window" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - if (!IsInterleaved()) {
|
| - // Standard format is being used, with three separate sections for the
|
| - // instructions, data, and addresses.
|
| - if (!data_for_add_and_run_.Empty()) {
|
| - LOG(ERROR) << "Excess ADD/RUN data left over "
|
| - "after decoding target window" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - if (!addresses_for_copy_.Empty()) {
|
| - LOG(ERROR) << "Excess COPY addresses left over "
|
| - "after decoding target window" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - // Reached the end of the window. Update the ParseableChunk to point to the
|
| - // end of the addresses section, which is the last section in the window.
|
| - parseable_chunk->SetPosition(addresses_for_copy_.End());
|
| - } else {
|
| - // Interleaved format is being used. The window may have been only
|
| - // partially decoded.
|
| - UpdateInstructionPointer(parseable_chunk);
|
| - }
|
| - return RESULT_SUCCESS;
|
| -}
|
| -
|
| -VCDiffResult VCDiffDeltaFileWindow::DecodeWindows(
|
| - ParseableChunk* parseable_chunk) {
|
| - if (!parent_) {
|
| - LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() "
|
| - "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - while (!parseable_chunk->Empty()) {
|
| - if (!found_header_) {
|
| - switch (ReadHeader(parseable_chunk)) {
|
| - case RESULT_END_OF_DATA:
|
| - return RESULT_END_OF_DATA;
|
| - case RESULT_ERROR:
|
| - return RESULT_ERROR;
|
| - default:
|
| - // Reset address cache between windows (RFC section 5.1)
|
| - if (!parent_->addr_cache()->Init()) {
|
| - LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - }
|
| - } else {
|
| - // We are resuming a window that was partially decoded before a
|
| - // RESULT_END_OF_DATA was returned. This can only happen on the first
|
| - // loop iteration, and only if the interleaved format is enabled and used.
|
| - if (!IsInterleaved()) {
|
| - LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window"
|
| - " when interleaved format is not being used" << LOG_ENDL;
|
| - return RESULT_ERROR;
|
| - }
|
| - UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(),
|
| - parseable_chunk->End());
|
| - reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(),
|
| - instructions_and_sizes_.End());
|
| - }
|
| - switch (DecodeBody(parseable_chunk)) {
|
| - case RESULT_END_OF_DATA:
|
| - if (MoreDataExpected()) {
|
| - return RESULT_END_OF_DATA;
|
| - } else {
|
| - LOG(ERROR) << "End of data reached while decoding VCDIFF delta file"
|
| - << LOG_ENDL;
|
| - // fall through to RESULT_ERROR case
|
| - }
|
| - case RESULT_ERROR:
|
| - return RESULT_ERROR;
|
| - default:
|
| - break; // DecodeBody succeeded
|
| - }
|
| - AppendAnnotatedOutput(parent_->annotated_output());
|
| - // Get ready to read a new delta window
|
| - Reset();
|
| - if (parent_->MetTargetByteLimit()) {
|
| - // Found exactly the length expected. Stop decoding.
|
| - return RESULT_SUCCESS;
|
| - }
|
| - }
|
| - return RESULT_SUCCESS;
|
| -}
|
| -
|
| -// *** Methods for VCDiffStreamingDecoder
|
| -
|
| -VCDiffStreamingDecoder::VCDiffStreamingDecoder()
|
| -: impl_(new VCDiffStreamingDecoderImpl) { }
|
| -
|
| -VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; }
|
| -
|
| -void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) {
|
| - impl_->StartDecoding(source, len);
|
| -}
|
| -
|
| -bool VCDiffStreamingDecoder::DecodeChunkToInterface(
|
| - const char* data,
|
| - size_t len,
|
| - OutputStringInterface* output_string) {
|
| - return impl_->DecodeChunk(data, len, output_string);
|
| -}
|
| -
|
| -bool VCDiffStreamingDecoder::FinishDecoding() {
|
| - return impl_->FinishDecoding();
|
| -}
|
| -
|
| -void VCDiffStreamingDecoder::EnableAnnotatedOutput() {
|
| - impl_->EnableAnnotatedOutput();
|
| -}
|
| -
|
| -void VCDiffStreamingDecoder::DisableAnnotatedOutput() {
|
| - impl_->DisableAnnotatedOutput();
|
| -}
|
| -
|
| -void VCDiffStreamingDecoder::GetAnnotatedOutputToInterface(
|
| - OutputStringInterface* annotated_output) {
|
| - impl_->GetAnnotatedOutput(annotated_output);
|
| -}
|
| -
|
| -bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr,
|
| - size_t dictionary_size,
|
| - const string& encoding,
|
| - OutputStringInterface* target) {
|
| - target->clear();
|
| - decoder_.StartDecoding(dictionary_ptr, dictionary_size);
|
| - if (!decoder_.DecodeChunkToInterface(encoding.data(),
|
| - encoding.size(),
|
| - target)) {
|
| - return false;
|
| - }
|
| - return decoder_.FinishDecoding();
|
| -}
|
| -
|
| -} // namespace open_vcdiff
|
|
|