Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdecoder.cc |
=================================================================== |
--- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdecoder.cc (revision 2678) |
+++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdecoder.cc (working copy) |
@@ -1,1401 +0,0 @@ |
-// Copyright 2008 Google Inc. |
-// Author: Lincoln Smith |
-// |
-// Licensed under the Apache License, Version 2.0 (the "License"); |
-// you may not use this file except in compliance with the License. |
-// You may obtain a copy of the License at |
-// |
-// http://www.apache.org/licenses/LICENSE-2.0 |
-// |
-// Unless required by applicable law or agreed to in writing, software |
-// distributed under the License is distributed on an "AS IS" BASIS, |
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
-// See the License for the specific language governing permissions and |
-// limitations under the License. |
-// |
-// Implements a Decoder for the format described in |
-// RFC 3284 - The VCDIFF Generic Differencing and Compression Data Format. |
-// The RFC text can be found at http://www.faqs.org/rfcs/rfc3284.html |
-// |
-// The RFC describes the possibility of using a secondary compressor |
-// to further reduce the size of each section of the VCDIFF output. |
-// That feature is not supported in this implementation of the encoder |
-// and decoder. |
-// No secondary compressor types have been publicly registered with |
-// the IANA at http://www.iana.org/assignments/vcdiff-comp-ids |
-// in the more than five years since the registry was created, so there |
-// is no standard set of compressor IDs which would be generated by other |
-// encoders or accepted by other decoders. |
- |
-#include <config.h> |
-#include "google/vcdecoder.h" |
-#include <stdint.h> // int32_t |
-#include <cstddef> // size_t, ptrdiff_t |
-#include <memory> // auto_ptr |
-#include <string> |
-#include "addrcache.h" |
-#include "checksum.h" |
-#include "codetable.h" |
-#include "decodetable.h" |
-#include "headerparser.h" |
-#include "logging.h" |
-#include "google/output_string.h" |
-#include "varint_bigendian.h" |
-#include "vcdiff_defs.h" |
- |
-namespace open_vcdiff { |
- |
-namespace { |
- |
-using std::string; |
- |
-enum VCDiffAnnotationType { |
- VCD_ANNOTATION_LITERAL, |
- VCD_ANNOTATION_DMATCH, |
- VCD_ANNOTATION_BMATCH |
-}; |
- |
-static const char* kAnnotationStartTags[] = { |
- "<literal>", |
- "<dmatch>", |
- "<bmatch>" |
-}; |
- |
-static const char* kAnnotationEndTags[] = { |
- "</literal>", |
- "</dmatch>", |
- "</bmatch>" |
-}; |
- |
-} // anonymous namespace |
- |
-// This class is used to parse delta file windows as described |
-// in RFC sections 4.2 and 4.3. Its methods are not thread-safe. |
-// |
-// Here is the window format copied from the RFC: |
-// |
-// Window1 |
-// Win_Indicator - byte |
-// [Source segment size] - integer |
-// [Source segment position] - integer |
-// The delta encoding of the target window |
-// Length of the delta encoding - integer |
-// The delta encoding |
-// Size of the target window - integer |
-// Delta_Indicator - byte |
-// Length of data for ADDs and RUNs - integer |
-// Length of instructions and sizes - integer |
-// Length of addresses for COPYs - integer |
-// Data section for ADDs and RUNs - array of bytes |
-// Instructions and sizes section - array of bytes |
-// Addresses section for COPYs - array of bytes |
-// Window2 |
-// ... |
-// |
-// Sample usage: |
-// |
-// VCDiffDeltaFileWindow delta_window_; |
-// delta_window_.Init(parent); |
-// ParseableChunk parseable_chunk(input_buffer, |
-// input_size, |
-// leftover_unencoded_bytes); |
-// switch (delta_window_.DecodeWindows(&parseable_chunk)) { |
-// case RESULT_END_OF_DATA: |
-// <Read more input and retry DecodeWindows later.> |
-// case RESULT_ERROR: |
-// <Handle error case. An error log message has already been generated.> |
-// } |
-// |
-// DecodeWindows consumes as many windows from the input as it can. It only |
-// needs to be placed within a loop if the loop is used to obtain more input |
-// (delta file) data. |
-// |
-class VCDiffDeltaFileWindow { |
- public: |
- VCDiffDeltaFileWindow(); |
- ~VCDiffDeltaFileWindow(); |
- |
- // Init() should be called immediately after constructing the |
- // VCDiffDeltaFileWindow(). It must be called before DecodeWindows() can be |
- // invoked, or an error will occur. |
- void Init(VCDiffStreamingDecoderImpl* parent); |
- |
- // Resets the pointers to the data sections in the current window. |
- void Reset(); |
- |
- bool UseCodeTable(const VCDiffCodeTableData& code_table_data, |
- unsigned char max_mode) { |
- return reader_.UseCodeTable(code_table_data, max_mode); |
- } |
- |
- // Decodes as many delta windows as possible using the input data from |
- // *parseable_chunk. Appends the decoded target windows to |
- // parent_->decoded_target(). If annotated output is enabled, appends |
- // annotated output to parent_->annotated_output(). Returns RESULT_SUCCESS on |
- // success, or RESULT_END_OF_DATA if the end of input was reached before the |
- // entire window could be decoded and more input is expected (only possible if |
- // IsInterleaved() is true), or RESULT_ERROR if an error occurred during |
- // decoding. In the RESULT_ERROR case, the value of parseable_chunk->pointer_ |
- // is undefined; otherwise, parseable_chunk->Advance() is called to point to |
- // the input data position just after the data that has been decoded. |
- // |
- // If expected_target_bytes is not set to kUnlimitedBytes, then the decoder |
- // expects *exactly* this number of target bytes to be decoded from one or |
- // more delta file windows. If this number is met exactly after finishing a |
- // delta window, this function will return RESULT_SUCCESS without processing |
- // any more bytes from data_pointer. If this number is exceeded while |
- // decoding a window, but was not met before starting that window, |
- // then RESULT_ERROR will be returned. |
- // |
- VCDiffResult DecodeWindows(ParseableChunk* parseable_chunk); |
- |
- bool FoundWindowHeader() const { |
- return found_header_; |
- } |
- |
- bool MoreDataExpected() const { |
- // When parsing an interleaved-format delta file, |
- // every time DecodeBody() exits, interleaved_bytes_expected_ |
- // will be decremented by the number of bytes parsed. If it |
- // reaches zero, then there is no more data expected because |
- // the size of the interleaved section (given in the window |
- // header) has been reached. |
- return IsInterleaved() && (interleaved_bytes_expected_ > 0); |
- } |
- |
- // Returns the number of bytes remaining to be decoded in the target window. |
- // If not in the process of decoding a window, returns 0. |
- size_t TargetBytesRemaining(); |
- |
- void EnableAnnotatedOutput() { |
- if (!annotated_output_.get()) { |
- annotated_output_.reset(new string); |
- } |
- } |
- |
- void DisableAnnotatedOutput() { |
- annotated_output_.reset(NULL); |
- } |
- |
- private: |
- // Reads the header of the window section as described in RFC sections 4.2 and |
- // 4.3, up to and including the value "Length of addresses for COPYs". If the |
- // entire header is found, this function sets up the DeltaWindowSections |
- // instructions_and_sizes_, data_for_add_and_run_, and addresses_for_copy_ so |
- // that the decoder can begin decoding the opcodes in these sections. Returns |
- // RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA if the end of |
- // available data was reached before the entire header could be read. (The |
- // latter may be an error condition if there is no more data available.) |
- // Otherwise, returns RESULT_SUCCESS and advances parseable_chunk past the |
- // parsed header. |
- // |
- VCDiffResult ReadHeader(ParseableChunk* parseable_chunk); |
- |
- // After the window header has been parsed as far as the Delta_Indicator, |
- // this function is called to parse the following delta window header fields: |
- // |
- // Length of data for ADDs and RUNs - integer (VarintBE format) |
- // Length of instructions and sizes - integer (VarintBE format) |
- // Length of addresses for COPYs - integer (VarintBE format) |
- // |
- // If has_checksum_ is true, it also looks for the following element: |
- // |
- // Adler32 checksum - unsigned 32-bit integer (VarintBE format) |
- // |
- // It sets up the DeltaWindowSections instructions_and_sizes_, |
- // data_for_add_and_run_, and addresses_for_copy_. If the interleaved format |
- // is being used, all three sections will include the entire window body; if |
- // the standard format is used, three non-overlapping window sections will be |
- // defined. Returns RESULT_ERROR if an error occurred, or RESULT_END_OF_DATA |
- // if standard format is being used and there is not enough input data to read |
- // the entire window body. Otherwise, returns RESULT_SUCCESS. |
- VCDiffResult SetUpWindowSections(VCDiffHeaderParser* header_parser); |
- |
- // Decodes the body of the window section as described in RFC sections 4.3, |
- // including the sections "Data section for ADDs and RUNs", "Instructions |
- // and sizes section", and "Addresses section for COPYs". These sections |
- // must already have been set up by ReadWindowHeader(). Returns a |
- // non-negative value on success, or RESULT_END_OF_DATA if the end of input |
- // was reached before the entire window could be decoded (only possible if |
- // IsInterleaved() is true), or RESULT_ERROR if an error occurred during |
- // decoding. Appends as much of the decoded target window as possible to |
- // parent->decoded_target(). |
- // |
- int DecodeBody(ParseableChunk* parseable_chunk); |
- |
- // Returns the number of bytes already decoded into the target window. |
- size_t TargetBytesDecoded(); |
- |
- // Decodes a single ADD instruction, updating parent_->decoded_target_. |
- VCDiffResult DecodeAdd(size_t size); |
- |
- // Decodes a single RUN instruction, updating parent_->decoded_target_. |
- VCDiffResult DecodeRun(size_t size); |
- |
- // Decodes a single COPY instruction, updating parent_->decoded_target_. |
- VCDiffResult DecodeCopy(size_t size, unsigned char mode); |
- |
- // When using the interleaved format, this function is called both on parsing |
- // the header and on resuming after a RESULT_END_OF_DATA was returned from a |
- // previous call to DecodeBody(). It sets up all three section pointers to |
- // reference the same interleaved stream of instructions, sizes, addresses, |
- // and data. These pointers must be reset every time that work resumes on a |
- // delta window, because the input data string may have been changed or |
- // resized since DecodeBody() last returned. |
- void UpdateInterleavedSectionPointers(const char* data_pos, |
- const char* data_end) { |
- const ptrdiff_t available_data = data_end - data_pos; |
- // Don't read past the end of currently-available data |
- if (available_data > interleaved_bytes_expected_) { |
- instructions_and_sizes_.Init(data_pos, interleaved_bytes_expected_); |
- } else { |
- instructions_and_sizes_.Init(data_pos, available_data); |
- } |
- data_for_add_and_run_.Init(&instructions_and_sizes_); |
- addresses_for_copy_.Init(&instructions_and_sizes_); |
- } |
- |
- // If true, the interleaved format described in AllowInterleaved() is used |
- // for the current delta file. Only valid after ReadWindowHeader() has been |
- // called and returned a positive number (i.e., the whole header was parsed), |
- // but before the window has finished decoding. |
- // |
- bool IsInterleaved() const { |
- // If the sections are interleaved, both addresses_for_copy_ and |
- // data_for_add_and_run_ should point at instructions_and_sizes_. |
- return !addresses_for_copy_.IsOwned(); |
- } |
- |
- // Executes a single COPY or ADD instruction, appending data to |
- // parent_->decoded_target(). |
- void CopyBytes(const char* data, |
- size_t size, |
- VCDiffAnnotationType annotation_type); |
- |
- // Executes a single RUN instruction, appending data to |
- // parent_->decoded_target(). |
- void RunByte(unsigned char byte, size_t size); |
- |
- void AppendAnnotatedOutput(string* annotated_output) { |
- if (annotated_output_.get()) { |
- annotated_output->append(*annotated_output_.get()); |
- } |
- } |
- |
- // Advance *parseable_chunk to point to the current position in the |
- // instructions/sizes section. If interleaved format is used, then |
- // decrement the number of expected bytes in the instructions/sizes section |
- // by the number of instruction/size bytes parsed. |
- void UpdateInstructionPointer(ParseableChunk* parseable_chunk); |
- |
- // The parent object which was passed to Init(). |
- VCDiffStreamingDecoderImpl* parent_; |
- |
- // This value will be true if VCDiffDeltaFileWindow::ReadDeltaWindowHeader() |
- // has been called and succeeded in parsing the delta window header, but the |
- // entire window has not yet been decoded. |
- bool found_header_; |
- |
- // Contents and length of the current source window. source_segment_ptr_ |
- // will be non-NULL if (a) the window section header for the current window |
- // has been read, but the window has not yet finished decoding; or |
- // (b) the window did not specify a source segment. |
- const char* source_segment_ptr_; |
- size_t source_segment_length_; |
- |
- // The delta encoding window sections as defined in RFC section 4.3. |
- // The pointer for each section will be incremented as data is consumed and |
- // decoded from that section. If the interleaved format is used, |
- // data_for_add_and_run_ and addresses_for_copy_ will both point to |
- // instructions_and_sizes_; otherwise, they will be separate data sections. |
- // |
- DeltaWindowSection instructions_and_sizes_; |
- DeltaWindowSection data_for_add_and_run_; |
- DeltaWindowSection addresses_for_copy_; |
- |
- // The expected bytes left to decode in instructions_and_sizes_. Only used |
- // for the interleaved format. |
- int interleaved_bytes_expected_; |
- |
- // The expected length of the target window once it has been decoded. |
- size_t target_window_length_; |
- |
- // The index in decoded_target at which the first byte of the current |
- // target window was/will be written. |
- size_t target_window_start_pos_; |
- |
- // If has_checksum_ is true, then expected_checksum_ contains an Adler32 |
- // checksum of the target window data. This is an extension included in the |
- // VCDIFF 'S' (SDCH) format, but is not part of the RFC 3284 draft standard. |
- bool has_checksum_; |
- VCDChecksum expected_checksum_; |
- |
- VCDiffCodeTableReader reader_; |
- |
- // This value is initialized to NULL, which means that annotated output is |
- // disabled. If EnableAnnotatedOutput() is called, it will be set to point |
- // to a new string object, and annotated output will be gathered into that |
- // string. |
- std::auto_ptr<string> annotated_output_; |
- |
- // Making these private avoids implicit copy constructor & assignment operator |
- VCDiffDeltaFileWindow(const VCDiffDeltaFileWindow&); // NOLINT |
- void operator=(const VCDiffDeltaFileWindow&); |
-}; |
- |
-class VCDiffStreamingDecoderImpl { |
- public: |
- // A constant that is the default value for expected_target_bytes_, |
- // indicating that the decoder does not have an expected length |
- // for the target data. |
- static const size_t kUnlimitedBytes = static_cast<size_t>(-3); |
- |
- VCDiffStreamingDecoderImpl(); |
- ~VCDiffStreamingDecoderImpl(); |
- |
- // Resets all member variables to their initial states. |
- void Reset(); |
- |
- // These functions are identical to their counterparts |
- // in VCDiffStreamingDecoder. |
- // |
- void StartDecoding(const char* dictionary_ptr, size_t dictionary_size); |
- |
- bool DecodeChunk(const char* data, |
- size_t len, |
- OutputStringInterface* output_string); |
- |
- bool FinishDecoding(); |
- |
- // If true, the version of VCDIFF used in the current delta file allows |
- // for the interleaved format, in which instructions, addresses and data |
- // are all sent interleaved in the instructions section of each window |
- // rather than being sent in separate sections. This is not part of |
- // the VCDIFF draft standard, so we've defined a special version code |
- // 'S' which implies that this feature is available. Even if interleaving |
- // is supported, it is not mandatory; interleaved format will be implied |
- // if the address and data sections are both zero-length. |
- // |
- bool AllowInterleaved() const { return vcdiff_version_code_ == 'S'; } |
- |
- // If true, the version of VCDIFF used in the current delta file allows |
- // each delta window to contain an Adler32 checksum of the target window data. |
- // If the bit 0x08 (VCD_CHECKSUM) is set in the Win_Indicator flags, then |
- // this checksum will appear as a variable-length integer, just after the |
- // "length of addresses for COPYs" value and before the window data sections. |
- // It is possible for some windows in a delta file to use the checksum feature |
- // and for others not to use it (and leave the flag bit set to 0.) |
- // Just as with AllowInterleaved(), this extension is not part of the draft |
- // standard and is only available when the version code 'S' is specified. |
- // |
- bool AllowChecksum() const { return vcdiff_version_code_ == 'S'; } |
- |
- // See description of expected_target_bytes_, below. |
- bool HasTargetByteLimit() const { |
- return expected_target_bytes_ != kUnlimitedBytes; |
- } |
- |
- void SetTargetByteLimit(size_t expected_target_bytes) { |
- expected_target_bytes_ = expected_target_bytes; |
- } |
- |
- // Checks to see whether the decoded target data has reached the expected |
- // size. |
- bool MetTargetByteLimit() const { |
- if (!HasTargetByteLimit()) { |
- return false; |
- } |
- // The target byte limit should not have been exceeded, because each target |
- // window size is checked against that limit in ReadHeader(), and |
- // DecodeBody() will return RESULT_ERROR if the actual decoded output ever |
- // exceeds the advertised target window size. |
- if (decoded_target_.size() > expected_target_bytes_) { |
- LOG(DFATAL) << "Internal error: Decoded data size " |
- << decoded_target_.size() |
- << " exceeds target byte limit " |
- << expected_target_bytes_ << LOG_ENDL; |
- return true; |
- } |
- return decoded_target_.size() == expected_target_bytes_; |
- } |
- |
- // Checks to see whether adding a new target window of the specified size |
- // would exceed the expected target size. If so, logs an error and returns |
- // true; otherwise, returns false. |
- bool TargetWindowWouldExceedTargetByteLimit(size_t window_size) const { |
- if (!HasTargetByteLimit()) { |
- return false; |
- } |
- // The logical expression to check would be: |
- // |
- // decoded_target_.size() + target_bytes_to_add > expected_target_bytes_ |
- // |
- // but the addition might cause an integer overflow if target_bytes_to_add |
- // is very large. So it is better to check target_bytes_to_add against |
- // the remaining expected target bytes. |
- size_t remaining_expected_target_bytes = |
- expected_target_bytes_ - decoded_target_.size(); |
- if (window_size > remaining_expected_target_bytes) { |
- LOG(ERROR) << "Length of target window (" << window_size |
- << " bytes) plus previous windows (" << decoded_target_.size() |
- << " bytes) would exceed expected size of " |
- << expected_target_bytes_ << " bytes" << LOG_ENDL; |
- return true; |
- } else { |
- return false; |
- } |
- } |
- |
- // Returns the amount of input data passed to the last DecodeChunk() |
- // that was not consumed by the decoder. This is essential if |
- // SetExpectedTargetBytes() is being used, in order to preserve |
- // the input data stream beyond the expected encoding. |
- size_t GetUnconsumedDataSize() const { |
- return unparsed_bytes_.size(); |
- } |
- |
- // This function will return true if the decoder has parsed a complete delta |
- // file header plus zero or more delta file windows, with no data left over. |
- // It will also return true if no delta data at all was decoded. If these |
- // conditions are not met, then FinishDecoding() should not be called. |
- bool IsDecodingComplete() const { |
- if (!FoundFileHeader()) { |
- // No complete delta file header has been parsed yet. DecodeChunk() |
- // may have received some data that it hasn't yet parsed, in which case |
- // decoding is incomplete. |
- return unparsed_bytes_.empty(); |
- } else if (custom_code_table_decoder_.get()) { |
- // The decoder is in the middle of parsing a custom code table. |
- return false; |
- } else if (delta_window_.FoundWindowHeader()) { |
- // The decoder is in the middle of parsing an interleaved format delta |
- // window. |
- return false; |
- } else if (MetTargetByteLimit()) { |
- // The decoder found exactly the expected number of bytes. In this case |
- // it is OK for unparsed_bytes_ to be non-empty; it contains the leftover |
- // data after the end of the delta file. |
- return true; |
- } else { |
- // No complete delta file window has been parsed yet. DecodeChunk() |
- // may have received some data that it hasn't yet parsed, in which case |
- // decoding is incomplete. |
- return unparsed_bytes_.empty(); |
- } |
- } |
- |
- const char* dictionary_ptr() const { return dictionary_ptr_; } |
- |
- size_t dictionary_size() const { return dictionary_size_; } |
- |
- VCDiffAddressCache* addr_cache() { return addr_cache_.get(); } |
- |
- string* decoded_target() { return &decoded_target_; } |
- |
- string* annotated_output() { return &annotated_output_; } |
- |
- // The variable that determines whether annotated output is enabled is |
- // delta_window_.annotated_output_. If that member is NULL, then the feature |
- // is disabled. |
- void EnableAnnotatedOutput() { |
- delta_window_.EnableAnnotatedOutput(); |
- } |
- |
- void DisableAnnotatedOutput() { |
- delta_window_.DisableAnnotatedOutput(); |
- } |
- |
- void GetAnnotatedOutput(OutputStringInterface* annotated_output) { |
- // We could use annotated_output->assign(), but that method is not defined |
- // for some output string types, so use clear() + append() to accomplish the |
- // same thing. |
- annotated_output->clear(); |
- annotated_output->append(annotated_output_.data(), |
- annotated_output_.size()); |
- } |
- |
- private: |
- // Reads the VCDiff delta file header section as described in RFC section 4.1, |
- // except the custom code table data. Returns RESULT_ERROR if an error |
- // occurred, or RESULT_END_OF_DATA if the end of available data was reached |
- // before the entire header could be read. (The latter may be an error |
- // condition if there is no more data available.) Otherwise, advances |
- // data->position_ past the header and returns RESULT_SUCCESS. |
- // |
- VCDiffResult ReadDeltaFileHeader(ParseableChunk* data); |
- |
- // Indicates whether or not the header has already been read. |
- bool FoundFileHeader() const { return addr_cache_.get() != NULL; } |
- |
- // If ReadDeltaFileHeader() finds the VCD_CODETABLE flag set within the delta |
- // file header, this function parses the custom cache sizes and initializes |
- // a nested VCDiffStreamingDecoderImpl object that will be used to parse the |
- // custom code table in ReadCustomCodeTable(). Returns RESULT_ERROR if an |
- // error occurred, or RESULT_END_OF_DATA if the end of available data was |
- // reached before the custom cache sizes could be read. Otherwise, returns |
- // the number of bytes read. |
- // |
- int InitCustomCodeTable(const char* data_start, const char* data_end); |
- |
- // If a custom code table was specified in the header section that was parsed |
- // by ReadDeltaFileHeader(), this function makes a recursive call to another |
- // VCDiffStreamingDecoderImpl object (custom_code_table_decoder_), since the |
- // custom code table is expected to be supplied as an embedded VCDIFF |
- // encoding that uses the standard code table. Returns RESULT_ERROR if an |
- // error occurs, or RESULT_END_OF_DATA if the end of available data was |
- // reached before the entire custom code table could be read. Otherwise, |
- // returns RESULT_SUCCESS and sets *data_ptr to the position after the encoded |
- // custom code table. If the function returns RESULT_SUCCESS or |
- // RESULT_END_OF_DATA, it advances data->position_ past the parsed bytes. |
- // |
- VCDiffResult ReadCustomCodeTable(ParseableChunk* data); |
- |
- // Contents and length of the source (dictionary) data. |
- const char* dictionary_ptr_; |
- size_t dictionary_size_; |
- |
- // This string will be used to store any unparsed bytes left over when |
- // DecodeChunk() reaches the end of its input and returns RESULT_END_OF_DATA. |
- // It will also be used to concatenate those unparsed bytes with the data |
- // supplied to the next call to DecodeChunk(), so that they appear in |
- // contiguous memory. |
- string unparsed_bytes_; |
- |
- // The portion of the target file that has been decoded so far. This will be |
- // used to fill the output string for DecodeChunk(), and will also be used to |
- // execute COPY instructions that reference target data. Since the source |
- // window can come from a range of addresses in the previously decoded target |
- // data, the entire target file needs to be available to the decoder, not just |
- // the current target window. |
- string decoded_target_; |
- |
- // The VCDIFF version byte (also known as "header4") from the |
- // delta file header. |
- unsigned char vcdiff_version_code_; |
- |
- VCDiffDeltaFileWindow delta_window_; |
- |
- std::auto_ptr<VCDiffAddressCache> addr_cache_; |
- |
- // Will be NULL unless a custom code table has been defined. |
- std::auto_ptr<VCDiffCodeTableData> custom_code_table_; |
- |
- // Used to receive the decoded custom code table. |
- string custom_code_table_string_; |
- |
- // If a custom code table is specified, it will be expressed |
- // as an embedded VCDIFF delta file which uses the default code table |
- // as the source file (dictionary). Use a child decoder object |
- // to decode that delta file. |
- std::auto_ptr<VCDiffStreamingDecoderImpl> custom_code_table_decoder_; |
- |
- // If set, then the decoder is expecting *exactly* this number of |
- // target bytes to be decoded from one or more delta file windows. |
- // If this number is exceeded while decoding a window, but was not met |
- // before starting on that window, an error will be reported. |
- // If FinishDecoding() is called before this number is met, an error |
- // will also be reported. This feature is used for decoding the |
- // embedded code table data within a VCDIFF delta file; we want to |
- // stop processing the embedded data once the entire code table has |
- // been decoded, and treat the rest of the available data as part |
- // of the enclosing delta file. |
- size_t expected_target_bytes_; |
- |
- // This string will always be empty until EnableAnnotatedOutput() is called, |
- // at which point it will start to accumulate annotated delta windows each |
- // time DecodeChunk() finishes a window. It will be cleared each time that |
- // StartDecoding() is called. |
- string annotated_output_; |
- |
- // This value is used to ensure the correct order of calls to the interface |
- // functions, i.e., a single call to StartDecoding(), followed by zero or |
- // more calls to DecodeChunk(), followed by a single call to |
- // FinishDecoding(). |
- bool start_decoding_was_called_; |
- |
- // Making these private avoids implicit copy constructor & assignment operator |
- VCDiffStreamingDecoderImpl(const VCDiffStreamingDecoderImpl&); // NOLINT |
- void operator=(const VCDiffStreamingDecoderImpl&); |
-}; |
- |
-// *** Methods for VCDiffStreamingDecoderImpl |
- |
-VCDiffStreamingDecoderImpl::VCDiffStreamingDecoderImpl() { |
- delta_window_.Init(this); |
- Reset(); |
-} |
- |
-// Reset() will delete the component objects without reallocating them. |
-VCDiffStreamingDecoderImpl::~VCDiffStreamingDecoderImpl() { Reset(); } |
- |
-void VCDiffStreamingDecoderImpl::Reset() { |
- start_decoding_was_called_ = false; |
- dictionary_ptr_ = NULL; |
- dictionary_size_ = 0; |
- vcdiff_version_code_ = '\0'; |
- expected_target_bytes_ = kUnlimitedBytes; |
- addr_cache_.reset(); |
- custom_code_table_.reset(); |
- custom_code_table_decoder_.reset(); |
- delta_window_.Reset(); |
-} |
- |
-void VCDiffStreamingDecoderImpl::StartDecoding(const char* dictionary_ptr, |
- size_t dictionary_size) { |
- if (start_decoding_was_called_) { |
- LOG(DFATAL) << "StartDecoding() called twice without FinishDecoding()" |
- << LOG_ENDL; |
- return; |
- } |
- unparsed_bytes_.clear(); |
- decoded_target_.clear(); // delta_window_.Reset() depends on this |
- annotated_output_.clear(); |
- Reset(); |
- dictionary_ptr_ = dictionary_ptr; |
- dictionary_size_ = dictionary_size; |
- start_decoding_was_called_ = true; |
-} |
- |
-// Reads the VCDiff delta file header section as described in RFC section 4.1: |
-// |
-// Header1 - byte = 0xD6 (ASCII 'V' | 0x80) |
-// Header2 - byte = 0xC3 (ASCII 'C' | 0x80) |
-// Header3 - byte = 0xC4 (ASCII 'D' | 0x80) |
-// Header4 - byte |
-// Hdr_Indicator - byte |
-// [Secondary compressor ID] - byte |
-// [Length of code table data] - integer |
-// [Code table data] |
-// |
-// Initializes the code table and address cache objects. Returns RESULT_ERROR |
-// if an error occurred, and RESULT_END_OF_DATA if the end of available data was |
-// reached before the entire header could be read. (The latter may be an error |
-// condition if there is no more data available.) Otherwise, returns |
-// RESULT_SUCCESS, and removes the header bytes from the data string. |
-// |
-// It's relatively inefficient to expect this function to parse any number of |
-// input bytes available, down to 1 byte, but it is necessary in case the input |
-// is not a properly formatted VCDIFF delta file. If the entire input consists |
-// of two bytes "12", then we should recognize that it does not match the |
-// initial VCDIFF magic number "VCD" and report an error, rather than waiting |
-// indefinitely for more input that will never arrive. |
-// |
-VCDiffResult VCDiffStreamingDecoderImpl::ReadDeltaFileHeader( |
- ParseableChunk* data) { |
- if (FoundFileHeader()) { |
- return RESULT_SUCCESS; |
- } |
- size_t data_size = data->UnparsedSize(); |
- const DeltaFileHeader* header = |
- reinterpret_cast<const DeltaFileHeader*>(data->UnparsedData()); |
- bool wrong_magic_number = false; |
- switch (data_size) { |
- // Verify only the bytes that are available. |
- default: |
- // Found header contents up to and including VCDIFF version |
- vcdiff_version_code_ = header->header4; |
- if ((vcdiff_version_code_ != 0x00) && // Draft standard VCDIFF (RFC 3284) |
- (vcdiff_version_code_ != 'S')) { // Enhancements for SDCH protocol |
- LOG(ERROR) << "Unrecognized VCDIFF format version" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- // fall through |
- case 3: |
- if (header->header3 != 0xC4) { // magic value 'D' | 0x80 |
- wrong_magic_number = true; |
- } |
- // fall through |
- case 2: |
- if (header->header2 != 0xC3) { // magic value 'C' | 0x80 |
- wrong_magic_number = true; |
- } |
- // fall through |
- case 1: |
- if (header->header1 != 0xD6) { // magic value 'V' | 0x80 |
- wrong_magic_number = true; |
- } |
- // fall through |
- case 0: |
- if (wrong_magic_number) { |
- LOG(ERROR) << "Did not find VCDIFF header bytes; " |
- "input is not a VCDIFF delta file" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- if (data_size < sizeof(DeltaFileHeader)) return RESULT_END_OF_DATA; |
- } |
- // Secondary compressor not supported. |
- if (header->hdr_indicator & VCD_DECOMPRESS) { |
- LOG(ERROR) << "Secondary compression is not supported" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- if (header->hdr_indicator & VCD_CODETABLE) { |
- int bytes_parsed = InitCustomCodeTable( |
- data->UnparsedData() + sizeof(DeltaFileHeader), |
- data->End()); |
- switch (bytes_parsed) { |
- case RESULT_ERROR: |
- return RESULT_ERROR; |
- case RESULT_END_OF_DATA: |
- return RESULT_END_OF_DATA; |
- default: |
- data->Advance(sizeof(DeltaFileHeader) + bytes_parsed); |
- } |
- } else { |
- addr_cache_.reset(new VCDiffAddressCache); |
- // addr_cache_->Init() will be called |
- // from VCDiffStreamingDecoderImpl::DecodeChunk() |
- data->Advance(sizeof(DeltaFileHeader)); |
- } |
- return RESULT_SUCCESS; |
-} |
- |
-int VCDiffStreamingDecoderImpl::InitCustomCodeTable(const char* data_start, |
- const char* data_end) { |
- // A custom code table is being specified. Parse the variable-length |
- // cache sizes and begin parsing the encoded custom code table. |
- int32_t near_cache_size = 0, same_cache_size = 0; |
- VCDiffHeaderParser header_parser(data_start, data_end); |
- if (!header_parser.ParseInt32("size of near cache", &near_cache_size)) { |
- return header_parser.GetResult(); |
- } |
- if (!header_parser.ParseInt32("size of same cache", &same_cache_size)) { |
- return header_parser.GetResult(); |
- } |
- custom_code_table_.reset(new struct VCDiffCodeTableData); |
- memset(custom_code_table_.get(), 0, sizeof(struct VCDiffCodeTableData)); |
- custom_code_table_string_.clear(); |
- addr_cache_.reset(new VCDiffAddressCache(near_cache_size, same_cache_size)); |
- // addr_cache_->Init() will be called |
- // from VCDiffStreamingDecoderImpl::DecodeChunk() |
- |
- // If we reach this point (the start of the custom code table) |
- // without encountering a RESULT_END_OF_DATA condition, then we won't call |
- // ReadDeltaFileHeader() again for this delta file. |
- // |
- // Instantiate a recursive decoder to interpret the custom code table |
- // as a VCDIFF encoding of the default code table. |
- custom_code_table_decoder_.reset(new VCDiffStreamingDecoderImpl); |
- custom_code_table_decoder_->StartDecoding( |
- reinterpret_cast<const char*>( |
- &VCDiffCodeTableData::kDefaultCodeTableData), |
- sizeof(VCDiffCodeTableData::kDefaultCodeTableData)); |
- custom_code_table_decoder_->SetTargetByteLimit(sizeof(*custom_code_table_)); |
- return static_cast<int>(header_parser.ParsedSize()); |
-} |
- |
-VCDiffResult VCDiffStreamingDecoderImpl::ReadCustomCodeTable( |
- ParseableChunk* data) { |
- if (!custom_code_table_decoder_.get()) { |
- return RESULT_SUCCESS; |
- } |
- if (!custom_code_table_.get()) { |
- LOG(DFATAL) << "Internal error: custom_code_table_decoder_ is set," |
- " but custom_code_table_ is NULL" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- OutputString<string> output_string(&custom_code_table_string_); |
- if (!custom_code_table_decoder_->DecodeChunk(data->UnparsedData(), |
- data->UnparsedSize(), |
- &output_string)) { |
- return RESULT_ERROR; |
- } |
- if (custom_code_table_string_.length() < sizeof(*custom_code_table_)) { |
- // Skip over the consumed data. |
- data->Finish(); |
- return RESULT_END_OF_DATA; |
- } |
- if (!custom_code_table_decoder_->FinishDecoding()) { |
- return RESULT_ERROR; |
- } |
- if (custom_code_table_string_.length() != sizeof(*custom_code_table_)) { |
- LOG(DFATAL) << "Decoded custom code table size " |
- << custom_code_table_string_.length() |
- << " does not match expected size " |
- << sizeof(*custom_code_table_) << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- memcpy(custom_code_table_.get(), |
- custom_code_table_string_.data(), |
- sizeof(*custom_code_table_)); |
- custom_code_table_string_.clear(); |
- // Skip over the consumed data. |
- data->FinishExcept(custom_code_table_decoder_->GetUnconsumedDataSize()); |
- custom_code_table_decoder_.reset(); |
- delta_window_.UseCodeTable(*custom_code_table_, addr_cache_->LastMode()); |
- return RESULT_SUCCESS; |
-} |
- |
-namespace { |
- |
-class TrackNewOutputText { |
- public: |
- explicit TrackNewOutputText(const string& decoded_target) |
- : decoded_target_(decoded_target), |
- initial_decoded_target_size_(decoded_target.size()) { } |
- |
- void AppendNewOutputText(size_t target_bytes_remaining, |
- OutputStringInterface* output_string) { |
- const size_t bytes_decoded_this_chunk = |
- decoded_target_.size() - initial_decoded_target_size_; |
- if (bytes_decoded_this_chunk > 0) { |
- if (target_bytes_remaining > 0) { |
- // The decoder is midway through decoding a target window. Resize |
- // output_string to match the expected length. The interface guarantees |
- // not to resize the output_string more than once per target window |
- // decoded. |
- output_string->ReserveAdditionalBytes(bytes_decoded_this_chunk |
- + target_bytes_remaining); |
- } |
- output_string->append( |
- decoded_target_.data() + initial_decoded_target_size_, |
- bytes_decoded_this_chunk); |
- } |
- } |
- |
- private: |
- const string& decoded_target_; |
- size_t initial_decoded_target_size_; |
-}; |
- |
-} // anonymous namespace |
- |
-bool VCDiffStreamingDecoderImpl::DecodeChunk( |
- const char* data, |
- size_t len, |
- OutputStringInterface* output_string) { |
- if (!start_decoding_was_called_) { |
- LOG(DFATAL) << "DecodeChunk() called without StartDecoding()" << LOG_ENDL; |
- Reset(); |
- return false; |
- } |
- ParseableChunk parseable_chunk(data, len); |
- if (!unparsed_bytes_.empty()) { |
- unparsed_bytes_.append(data, len); |
- parseable_chunk.SetDataBuffer(unparsed_bytes_.data(), |
- unparsed_bytes_.size()); |
- } |
- TrackNewOutputText output_tracker(decoded_target_); |
- VCDiffResult result = ReadDeltaFileHeader(&parseable_chunk); |
- if (RESULT_SUCCESS == result) { |
- result = ReadCustomCodeTable(&parseable_chunk); |
- } |
- if (RESULT_SUCCESS == result) { |
- result = delta_window_.DecodeWindows(&parseable_chunk); |
- } |
- if (RESULT_ERROR == result) { |
- Reset(); // Don't allow further DecodeChunk calls |
- return false; |
- } |
- unparsed_bytes_.assign(parseable_chunk.UnparsedData(), |
- parseable_chunk.UnparsedSize()); |
- output_tracker.AppendNewOutputText(delta_window_.TargetBytesRemaining(), |
- output_string); |
- return true; |
-} |
- |
-// Finishes decoding after all data has been received. Returns true |
-// if decoding of the entire stream was successful. |
-bool VCDiffStreamingDecoderImpl::FinishDecoding() { |
- bool success = true; |
- if (!start_decoding_was_called_) { |
- LOG(WARNING) << "FinishDecoding() called before StartDecoding()," |
- " or called after DecodeChunk() returned false" |
- << LOG_ENDL; |
- success = false; |
- } else if (!IsDecodingComplete()) { |
- LOG(ERROR) << "FinishDecoding() called before parsing entire" |
- " delta file window" << LOG_ENDL; |
- success = false; |
- } |
- // Reset the object state for the next decode operation |
- Reset(); |
- return success; |
-} |
- |
-// *** Methods for VCDiffDeltaFileWindow |
- |
-inline VCDiffDeltaFileWindow::VCDiffDeltaFileWindow() : parent_(NULL) { |
- Reset(); |
-} |
- |
-inline VCDiffDeltaFileWindow::~VCDiffDeltaFileWindow() { } |
- |
-inline void VCDiffDeltaFileWindow::Init(VCDiffStreamingDecoderImpl* parent) { |
- parent_ = parent; |
-} |
- |
-void VCDiffDeltaFileWindow::Reset() { |
- found_header_ = false; |
- |
- // Mark the start of the current target window. |
- target_window_start_pos_ = parent_ ? parent_->decoded_target()->size() : 0U; |
- target_window_length_ = 0; |
- |
- source_segment_ptr_ = NULL; |
- source_segment_length_ = 0; |
- |
- instructions_and_sizes_.Invalidate(); |
- data_for_add_and_run_.Invalidate(); |
- addresses_for_copy_.Invalidate(); |
- |
- interleaved_bytes_expected_ = 0; |
- |
- has_checksum_ = false; |
- expected_checksum_ = 0; |
- if (annotated_output_.get()) { |
- annotated_output_->clear(); |
- } |
-} |
- |
-VCDiffResult VCDiffDeltaFileWindow::SetUpWindowSections( |
- VCDiffHeaderParser* header_parser) { |
- size_t add_and_run_data_length = 0; |
- size_t instructions_and_sizes_length = 0; |
- size_t addresses_length = 0; |
- if (!header_parser->ParseSectionLengths(has_checksum_, |
- &add_and_run_data_length, |
- &instructions_and_sizes_length, |
- &addresses_length, |
- &expected_checksum_)) { |
- return header_parser->GetResult(); |
- } |
- if (parent_->AllowInterleaved() && |
- (add_and_run_data_length == 0) && |
- (addresses_length == 0)) { |
- // The interleaved format is being used. |
- interleaved_bytes_expected_ = |
- static_cast<int>(instructions_and_sizes_length); |
- UpdateInterleavedSectionPointers(header_parser->UnparsedData(), |
- header_parser->End()); |
- } else { |
- // If interleaved format is not used, then the whole window contents |
- // must be available before decoding can begin. If only part of |
- // the current window is available, then report end of data |
- // and re-parse the whole header when DecodeChunk() is called again. |
- if (header_parser->UnparsedSize() < (add_and_run_data_length + |
- instructions_and_sizes_length + |
- addresses_length)) { |
- return RESULT_END_OF_DATA; |
- } |
- data_for_add_and_run_.Init(header_parser->UnparsedData(), |
- add_and_run_data_length); |
- instructions_and_sizes_.Init(data_for_add_and_run_.End(), |
- instructions_and_sizes_length); |
- addresses_for_copy_.Init(instructions_and_sizes_.End(), addresses_length); |
- if (addresses_for_copy_.End() != header_parser->EndOfDeltaWindow()) { |
- LOG(ERROR) << "The end of the instructions section " |
- "does not match the end of the delta window" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- } |
- reader_.Init(instructions_and_sizes_.UnparsedDataAddr(), |
- instructions_and_sizes_.End()); |
- return RESULT_SUCCESS; |
-} |
- |
-// Here are the elements of the delta window header to be parsed, |
-// from section 4 of the RFC: |
-// |
-// Window1 |
-// Win_Indicator - byte |
-// [Source segment size] - integer |
-// [Source segment position] - integer |
-// The delta encoding of the target window |
-// Length of the delta encoding - integer |
-// The delta encoding |
-// Size of the target window - integer |
-// Delta_Indicator - byte |
-// Length of data for ADDs and RUNs - integer |
-// Length of instructions and sizes - integer |
-// Length of addresses for COPYs - integer |
-// Data section for ADDs and RUNs - array of bytes |
-// Instructions and sizes section - array of bytes |
-// Addresses section for COPYs - array of bytes |
-// |
-VCDiffResult VCDiffDeltaFileWindow::ReadHeader( |
- ParseableChunk* parseable_chunk) { |
- string* decoded_target = parent_->decoded_target(); |
- VCDiffHeaderParser header_parser(parseable_chunk->UnparsedData(), |
- parseable_chunk->End()); |
- size_t source_segment_position = 0; |
- unsigned char win_indicator = 0; |
- if (!header_parser.ParseWinIndicatorAndSourceSegment( |
- parent_->dictionary_size(), |
- decoded_target->size(), |
- &win_indicator, |
- &source_segment_length_, |
- &source_segment_position)) { |
- return header_parser.GetResult(); |
- } |
- has_checksum_ = parent_->AllowChecksum() && (win_indicator & VCD_CHECKSUM); |
- if (!header_parser.ParseWindowLengths(&target_window_length_)) { |
- return header_parser.GetResult(); |
- } |
- if (parent_->TargetWindowWouldExceedTargetByteLimit(target_window_length_)) { |
- // An error has been logged by TargetWindowWouldExceedTargetByteLimit(). |
- return RESULT_ERROR; |
- } |
- header_parser.ParseDeltaIndicator(); |
- VCDiffResult setup_return_code = SetUpWindowSections(&header_parser); |
- if (RESULT_SUCCESS != setup_return_code) { |
- return setup_return_code; |
- } |
- // Reserve enough space in the output string for the current target window. |
- decoded_target->reserve(target_window_start_pos_ + target_window_length_); |
- // Get a pointer to the start of the source segment. |
- if (win_indicator & VCD_SOURCE) { |
- source_segment_ptr_ = parent_->dictionary_ptr() + source_segment_position; |
- } else if (win_indicator & VCD_TARGET) { |
- // This assignment must happen after the reserve(). |
- // decoded_target should not be resized again while processing this window, |
- // so source_segment_ptr_ should remain valid. |
- source_segment_ptr_ = decoded_target->data() + source_segment_position; |
- } |
- // The whole window header was found and parsed successfully. |
- found_header_ = true; |
- parseable_chunk->Advance(header_parser.ParsedSize()); |
- return RESULT_SUCCESS; |
-} |
- |
-void VCDiffDeltaFileWindow::UpdateInstructionPointer( |
- ParseableChunk* parseable_chunk) { |
- if (IsInterleaved()) { |
- size_t bytes_parsed = instructions_and_sizes_.ParsedSize(); |
- // Reduce expected instruction segment length by bytes parsed |
- interleaved_bytes_expected_ -= static_cast<int>(bytes_parsed); |
- parseable_chunk->Advance(bytes_parsed); |
- } |
-} |
- |
-size_t VCDiffDeltaFileWindow::TargetBytesDecoded() { |
- return parent_->decoded_target()->size() - target_window_start_pos_; |
-} |
- |
-size_t VCDiffDeltaFileWindow::TargetBytesRemaining() { |
- if (target_window_length_ == 0) { |
- // There is no window being decoded at present |
- return 0; |
- } else { |
- return target_window_length_ - TargetBytesDecoded(); |
- } |
-} |
- |
-void VCDiffDeltaFileWindow::CopyBytes(const char* data, |
- size_t size, |
- VCDiffAnnotationType annotation_type) { |
- parent_->decoded_target()->append(data, size); |
- if (annotated_output_.get()) { |
- annotated_output_->append(kAnnotationStartTags[annotation_type]); |
- annotated_output_->append(data, size); |
- annotated_output_->append(kAnnotationEndTags[annotation_type]); |
- } |
-} |
- |
-void VCDiffDeltaFileWindow::RunByte(unsigned char byte, size_t size) { |
- parent_->decoded_target()->append(size, byte); |
- if (annotated_output_.get()) { |
- annotated_output_->append(kAnnotationStartTags[VCD_ANNOTATION_LITERAL]); |
- annotated_output_->append(size, byte); |
- annotated_output_->append(kAnnotationEndTags[VCD_ANNOTATION_LITERAL]); |
- } |
-} |
- |
-VCDiffResult VCDiffDeltaFileWindow::DecodeAdd(size_t size) { |
- if (size > data_for_add_and_run_.UnparsedSize()) { |
- return RESULT_END_OF_DATA; |
- } |
- // Write the next "size" data bytes |
- CopyBytes(data_for_add_and_run_.UnparsedData(), size, VCD_ANNOTATION_LITERAL); |
- data_for_add_and_run_.Advance(size); |
- return RESULT_SUCCESS; |
-} |
- |
-VCDiffResult VCDiffDeltaFileWindow::DecodeRun(size_t size) { |
- if (data_for_add_and_run_.Empty()) { |
- return RESULT_END_OF_DATA; |
- } |
- // Write "size" copies of the next data byte |
- RunByte(*data_for_add_and_run_.UnparsedData(), size); |
- data_for_add_and_run_.Advance(1); |
- return RESULT_SUCCESS; |
-} |
- |
-VCDiffResult VCDiffDeltaFileWindow::DecodeCopy(size_t size, |
- unsigned char mode) { |
- // Keep track of the number of target bytes decoded as a local variable |
- // to avoid recalculating it each time it is needed. |
- size_t target_bytes_decoded = TargetBytesDecoded(); |
- const VCDAddress here_address = |
- static_cast<VCDAddress>(source_segment_length_ + target_bytes_decoded); |
- const VCDAddress decoded_address = parent_->addr_cache()->DecodeAddress( |
- here_address, |
- mode, |
- addresses_for_copy_.UnparsedDataAddr(), |
- addresses_for_copy_.End()); |
- switch (decoded_address) { |
- case RESULT_ERROR: |
- LOG(ERROR) << "Unable to decode address for COPY" << LOG_ENDL; |
- return RESULT_ERROR; |
- case RESULT_END_OF_DATA: |
- return RESULT_END_OF_DATA; |
- default: |
- if ((decoded_address < 0) || (decoded_address > here_address)) { |
- LOG(DFATAL) << "Internal error: unexpected address " << decoded_address |
- << " returned from DecodeAddress, with here_address = " |
- << here_address << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- break; |
- } |
- size_t address = static_cast<size_t>(decoded_address); |
- if ((address + size) <= source_segment_length_) { |
- // Copy all data from source segment |
- CopyBytes(&source_segment_ptr_[address], size, VCD_ANNOTATION_DMATCH); |
- return RESULT_SUCCESS; |
- } |
- // Copy some data from target window... |
- if (address < source_segment_length_) { |
- // ... plus some data from source segment |
- const size_t partial_copy_size = source_segment_length_ - address; |
- CopyBytes(&source_segment_ptr_[address], |
- partial_copy_size, |
- VCD_ANNOTATION_DMATCH); |
- target_bytes_decoded += partial_copy_size; |
- address += partial_copy_size; |
- size -= partial_copy_size; |
- } |
- address -= source_segment_length_; |
- // address is now based at start of target window |
- const char* const target_segment_ptr = parent_->decoded_target()->data() + |
- target_window_start_pos_; |
- while (size > (target_bytes_decoded - address)) { |
- // Recursive copy that extends into the yet-to-be-copied target data |
- const size_t partial_copy_size = target_bytes_decoded - address; |
- CopyBytes(&target_segment_ptr[address], |
- partial_copy_size, |
- VCD_ANNOTATION_BMATCH); |
- target_bytes_decoded += partial_copy_size; |
- address += partial_copy_size; |
- size -= partial_copy_size; |
- } |
- CopyBytes(&target_segment_ptr[address], size, VCD_ANNOTATION_BMATCH); |
- return RESULT_SUCCESS; |
-} |
- |
-int VCDiffDeltaFileWindow::DecodeBody(ParseableChunk* parseable_chunk) { |
- if (IsInterleaved() && (instructions_and_sizes_.UnparsedData() |
- != parseable_chunk->UnparsedData())) { |
- LOG(DFATAL) << "Internal error: interleaved format is used, but the" |
- " input pointer does not point to the instructions section" |
- << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- while (TargetBytesDecoded() < target_window_length_) { |
- int32_t decoded_size = VCD_INSTRUCTION_ERROR; |
- unsigned char mode = 0; |
- VCDiffInstructionType instruction = |
- reader_.GetNextInstruction(&decoded_size, &mode); |
- switch (instruction) { |
- case VCD_INSTRUCTION_END_OF_DATA: |
- UpdateInstructionPointer(parseable_chunk); |
- return RESULT_END_OF_DATA; |
- case VCD_INSTRUCTION_ERROR: |
- return RESULT_ERROR; |
- default: |
- break; |
- } |
- const size_t size = static_cast<size_t>(decoded_size); |
- // The value of "size" itself could be enormous (say, INT32_MAX) |
- // so check it individually against the limit to protect against |
- // overflow when adding it to something else. |
- if ((size > target_window_length_) || |
- ((size + TargetBytesDecoded()) > target_window_length_)) { |
- LOG(ERROR) << VCDiffInstructionName(instruction) |
- << " with size " << size |
- << " plus existing " << TargetBytesDecoded() |
- << " bytes of target data exceeds length of target" |
- " window (" << target_window_length_ << " bytes)" |
- << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- VCDiffResult result = RESULT_SUCCESS; |
- switch (instruction) { |
- case VCD_ADD: |
- result = DecodeAdd(size); |
- break; |
- case VCD_RUN: |
- result = DecodeRun(size); |
- break; |
- case VCD_COPY: |
- result = DecodeCopy(size, mode); |
- break; |
- default: |
- LOG(DFATAL) << "Unexpected instruction type " << instruction |
- << "in opcode stream" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- switch (result) { |
- case RESULT_END_OF_DATA: |
- reader_.UnGetInstruction(); |
- UpdateInstructionPointer(parseable_chunk); |
- return RESULT_END_OF_DATA; |
- case RESULT_ERROR: |
- return RESULT_ERROR; |
- case RESULT_SUCCESS: |
- break; |
- } |
- } |
- if (TargetBytesDecoded() != target_window_length_) { |
- LOG(ERROR) << "Decoded target window size (" << TargetBytesDecoded() |
- << " bytes) does not match expected size (" |
- << target_window_length_ << " bytes)" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- const char* const target_window_start = |
- parent_->decoded_target()->data() + target_window_start_pos_; |
- if (has_checksum_ && |
- (ComputeAdler32(target_window_start, target_window_length_) |
- != expected_checksum_)) { |
- LOG(ERROR) << "Target data does not match checksum; this could mean " |
- "that the wrong dictionary was used" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- if (!instructions_and_sizes_.Empty()) { |
- LOG(ERROR) << "Excess instructions and sizes left over " |
- "after decoding target window" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- if (!IsInterleaved()) { |
- // Standard format is being used, with three separate sections for the |
- // instructions, data, and addresses. |
- if (!data_for_add_and_run_.Empty()) { |
- LOG(ERROR) << "Excess ADD/RUN data left over " |
- "after decoding target window" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- if (!addresses_for_copy_.Empty()) { |
- LOG(ERROR) << "Excess COPY addresses left over " |
- "after decoding target window" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- // Reached the end of the window. Update the ParseableChunk to point to the |
- // end of the addresses section, which is the last section in the window. |
- parseable_chunk->SetPosition(addresses_for_copy_.End()); |
- } else { |
- // Interleaved format is being used. The window may have been only |
- // partially decoded. |
- UpdateInstructionPointer(parseable_chunk); |
- } |
- return RESULT_SUCCESS; |
-} |
- |
-VCDiffResult VCDiffDeltaFileWindow::DecodeWindows( |
- ParseableChunk* parseable_chunk) { |
- if (!parent_) { |
- LOG(DFATAL) << "Internal error: VCDiffDeltaFileWindow::DecodeWindows() " |
- "called before VCDiffDeltaFileWindow::Init()" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- while (!parseable_chunk->Empty()) { |
- if (!found_header_) { |
- switch (ReadHeader(parseable_chunk)) { |
- case RESULT_END_OF_DATA: |
- return RESULT_END_OF_DATA; |
- case RESULT_ERROR: |
- return RESULT_ERROR; |
- default: |
- // Reset address cache between windows (RFC section 5.1) |
- if (!parent_->addr_cache()->Init()) { |
- LOG(DFATAL) << "Error initializing address cache" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- } |
- } else { |
- // We are resuming a window that was partially decoded before a |
- // RESULT_END_OF_DATA was returned. This can only happen on the first |
- // loop iteration, and only if the interleaved format is enabled and used. |
- if (!IsInterleaved()) { |
- LOG(DFATAL) << "Internal error: Resumed decoding of a delta file window" |
- " when interleaved format is not being used" << LOG_ENDL; |
- return RESULT_ERROR; |
- } |
- UpdateInterleavedSectionPointers(parseable_chunk->UnparsedData(), |
- parseable_chunk->End()); |
- reader_.UpdatePointers(instructions_and_sizes_.UnparsedDataAddr(), |
- instructions_and_sizes_.End()); |
- } |
- switch (DecodeBody(parseable_chunk)) { |
- case RESULT_END_OF_DATA: |
- if (MoreDataExpected()) { |
- return RESULT_END_OF_DATA; |
- } else { |
- LOG(ERROR) << "End of data reached while decoding VCDIFF delta file" |
- << LOG_ENDL; |
- // fall through to RESULT_ERROR case |
- } |
- case RESULT_ERROR: |
- return RESULT_ERROR; |
- default: |
- break; // DecodeBody succeeded |
- } |
- AppendAnnotatedOutput(parent_->annotated_output()); |
- // Get ready to read a new delta window |
- Reset(); |
- if (parent_->MetTargetByteLimit()) { |
- // Found exactly the length expected. Stop decoding. |
- return RESULT_SUCCESS; |
- } |
- } |
- return RESULT_SUCCESS; |
-} |
- |
-// *** Methods for VCDiffStreamingDecoder |
- |
-VCDiffStreamingDecoder::VCDiffStreamingDecoder() |
-: impl_(new VCDiffStreamingDecoderImpl) { } |
- |
-VCDiffStreamingDecoder::~VCDiffStreamingDecoder() { delete impl_; } |
- |
-void VCDiffStreamingDecoder::StartDecoding(const char* source, size_t len) { |
- impl_->StartDecoding(source, len); |
-} |
- |
-bool VCDiffStreamingDecoder::DecodeChunkToInterface( |
- const char* data, |
- size_t len, |
- OutputStringInterface* output_string) { |
- return impl_->DecodeChunk(data, len, output_string); |
-} |
- |
-bool VCDiffStreamingDecoder::FinishDecoding() { |
- return impl_->FinishDecoding(); |
-} |
- |
-void VCDiffStreamingDecoder::EnableAnnotatedOutput() { |
- impl_->EnableAnnotatedOutput(); |
-} |
- |
-void VCDiffStreamingDecoder::DisableAnnotatedOutput() { |
- impl_->DisableAnnotatedOutput(); |
-} |
- |
-void VCDiffStreamingDecoder::GetAnnotatedOutputToInterface( |
- OutputStringInterface* annotated_output) { |
- impl_->GetAnnotatedOutput(annotated_output); |
-} |
- |
-bool VCDiffDecoder::DecodeToInterface(const char* dictionary_ptr, |
- size_t dictionary_size, |
- const string& encoding, |
- OutputStringInterface* target) { |
- target->clear(); |
- decoder_.StartDecoding(dictionary_ptr, dictionary_size); |
- if (!decoder_.DecodeChunkToInterface(encoding.data(), |
- encoding.size(), |
- target)) { |
- return false; |
- } |
- return decoder_.FinishDecoding(); |
-} |
- |
-} // namespace open_vcdiff |