| Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/encodetable.h
|
| ===================================================================
|
| --- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/encodetable.h (revision 2678)
|
| +++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/encodetable.h (working copy)
|
| @@ -1,428 +0,0 @@
|
| -// Copyright 2008 Google Inc.
|
| -// Author: Lincoln Smith
|
| -//
|
| -// Licensed under the Apache License, Version 2.0 (the "License");
|
| -// you may not use this file except in compliance with the License.
|
| -// You may obtain a copy of the License at
|
| -//
|
| -// http://www.apache.org/licenses/LICENSE-2.0
|
| -//
|
| -// Unless required by applicable law or agreed to in writing, software
|
| -// distributed under the License is distributed on an "AS IS" BASIS,
|
| -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -// See the License for the specific language governing permissions and
|
| -// limitations under the License.
|
| -//
|
| -// There are two different representations of a Code Table's contents:
|
| -// VCDiffCodeTableData is the same as the format given in section 7
|
| -// of the RFC, and is used for transmission and decoding. However,
|
| -// on the encoding side, it is useful to have a representation that
|
| -// can map efficiently from delta instructions to opcodes:
|
| -// VCDiffInstructionMap. A VCDiffInstructionMap is constructed
|
| -// using a VCDiffCodeTableData. For a custom code table, it is recommended
|
| -// that the VCDiffCodeTableData be defined as a static struct and that the
|
| -// VCDiffInstructionMap be a static pointer that gets initialized only once.
|
| -
|
| -#ifndef OPEN_VCDIFF_ENCODETABLE_H_
|
| -#define OPEN_VCDIFF_ENCODETABLE_H_
|
| -
|
| -#include <config.h>
|
| -#include <stdint.h> // int32_t
|
| -#include <cstddef> // size_t
|
| -#include <string>
|
| -#include <vector>
|
| -#include "addrcache.h"
|
| -#include "codetable.h"
|
| -#include "checksum.h"
|
| -#include "vcdiff_defs.h"
|
| -
|
| -namespace open_vcdiff {
|
| -
|
| -using std::string;
|
| -
|
| -class OutputStringInterface;
|
| -struct VCDiffCodeTableData;
|
| -
|
| -// An alternate representation of the data in a VCDiffCodeTableData that
|
| -// optimizes for fast encoding, that is, for taking a delta instruction
|
| -// inst (also known as instruction type), size, and mode and arriving at
|
| -// the corresponding opcode.
|
| -//
|
| -class VCDiffInstructionMap {
|
| - public:
|
| - // Create a VCDiffInstructionMap from the information in code_table_data.
|
| - // Does not save a pointer to code_table_data after using its contents
|
| - // to create the instruction->opcode mappings. The caller *must* have
|
| - // verified that code_table_data->Validate() returned true before
|
| - // attempting to use this constructor.
|
| - // max_mode is the maximum value for the mode of a COPY instruction.
|
| - //
|
| - VCDiffInstructionMap(const VCDiffCodeTableData& code_table_data,
|
| - unsigned char max_mode);
|
| -
|
| - static VCDiffInstructionMap* GetDefaultInstructionMap();
|
| -
|
| - // Finds an opcode that has the given inst, size, and mode for its first
|
| - // instruction and NOOP for its second instruction (or vice versa.)
|
| - // Returns kNoOpcode if the code table does not have any matching
|
| - // opcode. Otherwise, returns an opcode value between 0 and 255.
|
| - //
|
| - // If this function returns kNoOpcode for size > 0, the caller will
|
| - // usually want to try again with size == 0 to find an opcode that
|
| - // doesn't have a fixed size value.
|
| - //
|
| - // If this function returns kNoOpcode for size == 0, it is an error condition,
|
| - // because any code table that passed the Validate() check should have a way
|
| - // of expressing all combinations of inst and mode with size=0.
|
| - //
|
| - OpcodeOrNone LookupFirstOpcode(unsigned char inst,
|
| - unsigned char size,
|
| - unsigned char mode) const {
|
| - return first_instruction_map_.Lookup(inst, size, mode);
|
| - }
|
| -
|
| - // Given a first opcode (presumed to have been returned by a previous call to
|
| - // lookupFirstOpcode), finds an opcode that has the same first instruction as
|
| - // the first opcode, and has the given inst, size, and mode for its second
|
| - // instruction.
|
| - //
|
| - // If this function returns kNoOpcode for size > 0, the caller will
|
| - // usually want to try again with size == 0 to find an opcode that
|
| - // doesn't have a fixed size value.
|
| - //
|
| - OpcodeOrNone LookupSecondOpcode(unsigned char first_opcode,
|
| - unsigned char inst,
|
| - unsigned char size,
|
| - unsigned char mode) const {
|
| - return second_instruction_map_.Lookup(first_opcode, inst, size, mode);
|
| - }
|
| -
|
| - private:
|
| - // Data structure used to implement LookupFirstOpcode efficiently.
|
| - //
|
| - class FirstInstructionMap {
|
| - public:
|
| - FirstInstructionMap(int num_insts_and_modes, int max_size_1);
|
| - ~FirstInstructionMap();
|
| -
|
| - void Add(unsigned char inst,
|
| - unsigned char size,
|
| - unsigned char mode,
|
| - unsigned char opcode) {
|
| - OpcodeOrNone* opcode_slot = &first_opcodes_[inst + mode][size];
|
| - if (*opcode_slot == kNoOpcode) {
|
| - *opcode_slot = opcode;
|
| - }
|
| - }
|
| -
|
| - // See comments for LookupFirstOpcode, above.
|
| - //
|
| - OpcodeOrNone Lookup(unsigned char inst,
|
| - unsigned char size,
|
| - unsigned char mode) const {
|
| - int inst_mode = (inst == VCD_COPY) ? (inst + mode) : inst;
|
| - if (size > max_size_1_) {
|
| - return kNoOpcode;
|
| - }
|
| - // Lookup specific-sized opcode
|
| - return first_opcodes_[inst_mode][size];
|
| - }
|
| -
|
| - private:
|
| - // The number of possible combinations of inst (a VCDiffInstructionType) and
|
| - // mode. Since the mode is only used for COPY instructions, this number
|
| - // is not (number of VCDiffInstructionType values) * (number of modes), but
|
| - // rather (number of VCDiffInstructionType values other than VCD_COPY)
|
| - // + (number of COPY modes).
|
| - //
|
| - // Compressing inst and mode into a single integer relies on
|
| - // VCD_COPY being the last instruction type. The inst+mode values are:
|
| - // 0 (NOOP), 1 (ADD), 2 (RUN), 3 (COPY mode 0), 4 (COPY mode 1), ...
|
| - //
|
| - const int num_instruction_type_modes_;
|
| -
|
| - // The maximum value of a size1 element in code_table_data
|
| - //
|
| - const int max_size_1_;
|
| -
|
| - // There are two levels to first_opcodes_:
|
| - // 1) A dynamically-allocated pointer array of size
|
| - // num_instruction_type_modes_ (one element for each combination of inst
|
| - // and mode.) Every element of this array is non-NULL and contains
|
| - // a pointer to:
|
| - // 2) A dynamically-allocated array of OpcodeOrNone values, with one element
|
| - // for each possible first instruction size (size1) in the code table.
|
| - // (In the default code table, for example, the maximum size used is 18,
|
| - // so these arrays would have 19 elements representing values 0
|
| - // through 18.)
|
| - //
|
| - OpcodeOrNone** first_opcodes_;
|
| -
|
| - // Making these private avoids implicit copy constructor
|
| - // and assignment operator
|
| - FirstInstructionMap(const FirstInstructionMap&); // NOLINT
|
| - void operator=(const FirstInstructionMap&);
|
| - } first_instruction_map_;
|
| -
|
| - // Data structure used to implement LookupSecondOpcode efficiently.
|
| - //
|
| - class SecondInstructionMap {
|
| - public:
|
| - SecondInstructionMap(int num_insts_and_modes, int max_size_2);
|
| - ~SecondInstructionMap();
|
| - void Add(unsigned char first_opcode,
|
| - unsigned char inst,
|
| - unsigned char size,
|
| - unsigned char mode,
|
| - unsigned char second_opcode);
|
| -
|
| - // See comments for LookupSecondOpcode, above.
|
| - OpcodeOrNone Lookup(unsigned char first_opcode,
|
| - unsigned char inst,
|
| - unsigned char size,
|
| - unsigned char mode) const;
|
| - private:
|
| - // See the member of the same name in FirstInstructionMap.
|
| - const int num_instruction_type_modes_;
|
| -
|
| - // The maximum value of a size2 element in code_table_data
|
| - const int max_size_2_;
|
| -
|
| - // There are three levels to second_opcodes_:
|
| - // 1) A statically-allocated pointer array with one element
|
| - // for each possible opcode. Each element can be NULL, or can point to:
|
| - // 2) A dynamically-allocated pointer array of size
|
| - // num_instruction_type_modes_ (one element for each combination of inst
|
| - // and mode.) Each element can be NULL, or can point to:
|
| - // 3) A dynamically-allocated array with one element for each possible
|
| - // second instruction size in the code table. (In the default code
|
| - // table, for example, the maximum size used is 6, so these arrays would
|
| - // have 7 elements representing values 0 through 6.)
|
| - //
|
| - OpcodeOrNone** second_opcodes_[VCDiffCodeTableData::kCodeTableSize];
|
| -
|
| - // Making these private avoids implicit copy constructor
|
| - // and assignment operator
|
| - SecondInstructionMap(const SecondInstructionMap&); // NOLINT
|
| - void operator=(const SecondInstructionMap&);
|
| - } second_instruction_map_;
|
| -
|
| - static VCDiffInstructionMap* default_instruction_map;
|
| -
|
| - // Making these private avoids implicit copy constructor & assignment operator
|
| - VCDiffInstructionMap(const VCDiffInstructionMap&); // NOLINT
|
| - void operator=(const VCDiffInstructionMap&);
|
| -};
|
| -
|
| -// The method calls after construction *must* conform
|
| -// to the following pattern:
|
| -// {{Add|Copy|Run}* [AddChecksum] Output}*
|
| -//
|
| -// When Output has been called in this sequence, a complete target window
|
| -// (as defined in RFC 3284 section 4.3) will have been appended to
|
| -// out (unless no calls to Add, Run, or Copy were made, in which
|
| -// case Output will do nothing.) The output will not be available for use
|
| -// until after each call to Output().
|
| -//
|
| -// This class can also write a compressed
|
| -// representation of a non-standard code table to an string so that it
|
| -// can be sent with the data that was encoded using that code table.
|
| -//
|
| -// NOT threadsafe.
|
| -//
|
| -class VCDiffCodeTableWriter {
|
| - public:
|
| - // This constructor uses the default code table.
|
| - // If interleaved is true, the encoder writes each delta file window
|
| - // by interleaving instructions and sizes with their corresponding
|
| - // addresses and data, rather than placing these elements into three
|
| - // separate sections. This facilitates providing partially
|
| - // decoded results when only a portion of a delta file window
|
| - // is received (e.g. when HTTP over TCP is used as the
|
| - // transmission protocol.) The interleaved format is
|
| - // not consistent with the VCDIFF draft standard.
|
| - //
|
| - explicit VCDiffCodeTableWriter(bool interleaved);
|
| -
|
| - // Uses a non-standard code table and non-standard cache sizes. The caller
|
| - // must guarantee that code_table_data remains allocated for the lifetime of
|
| - // the VCDiffCodeTableWriter object. Note that this is different from how
|
| - // VCDiffCodeTableReader::UseCodeTable works. It is assumed that a given
|
| - // encoder will use either the default code table or a statically-defined
|
| - // non-standard code table, whereas the decoder must have the ability to read
|
| - // an arbitrary non-standard code table from a delta file and discard it once
|
| - // the file has been decoded.
|
| - //
|
| - VCDiffCodeTableWriter(bool interleaved,
|
| - int near_cache_size,
|
| - int same_cache_size,
|
| - const VCDiffCodeTableData& code_table_data,
|
| - unsigned char max_mode);
|
| -
|
| - ~VCDiffCodeTableWriter();
|
| -
|
| - // Initializes the constructed object for use.
|
| - // This method must be called after a VCDiffCodeTableWriter is constructed
|
| - // and before any of its other methods can be called. It will return
|
| - // false if there was an error initializing the object, or true if it
|
| - // was successful. After the object has been initialized and used,
|
| - // Init() can be called again to restore the initial state of the object.
|
| - //
|
| - bool Init(size_t dictionary_size);
|
| -
|
| - size_t target_length() const { return target_length_; }
|
| -
|
| - // Encode an ADD opcode with the "size" bytes starting at data
|
| - //
|
| - void Add(const char* data, size_t size);
|
| -
|
| - // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes.
|
| - //
|
| - void Copy(int32_t offset, size_t size);
|
| -
|
| - // Encode a RUN opcode for "size" copies of the value "byte".
|
| - //
|
| - void Run(size_t size, unsigned char byte);
|
| -
|
| - void AddChecksum(VCDChecksum checksum) {
|
| - add_checksum_ = true;
|
| - checksum_ = checksum;
|
| - }
|
| -
|
| - // Finishes encoding and appends the encoded delta window to the output
|
| - // string. The output string is not null-terminated and may contain embedded
|
| - // '\0' characters.
|
| - void Output(OutputStringInterface* out);
|
| -
|
| - const std::vector<int>& match_counts() const { return match_counts_; }
|
| -
|
| - private:
|
| - // This is an estimate of the longest match size the encoder expects to find.
|
| - // It is used to determine the initial size of the vector match_counts_.
|
| - // If it is too large, then some space will be wasted on vector elements
|
| - // that are not used. If it is too small, then some time will be wasted
|
| - // expanding match_counts_ to accommodate larger match sizes.
|
| - static const size_t kMaxMatchSize = 2000;
|
| -
|
| - // The maximum value for the mode of a COPY instruction.
|
| - const unsigned char max_mode_;
|
| -
|
| - // If interleaved is true, sets data_for_add_and_run_ and
|
| - // addresses_for_copy_ to point at instructions_and_sizes_,
|
| - // so that instructions, sizes, addresses and data will be
|
| - // combined into a single interleaved stream.
|
| - // If interleaved is false, sets data_for_add_and_run_ and
|
| - // addresses_for_copy_ to point at their corresponding
|
| - // separate_... strings, so that the three sections will
|
| - // be generated separately from one another.
|
| - //
|
| - void InitSectionPointers(bool interleaved);
|
| -
|
| - // Determines the best opcode to encode an instruction, and appends
|
| - // or substitutes that opcode and its size into the
|
| - // instructions_and_sizes_ string.
|
| - //
|
| - void EncodeInstruction(VCDiffInstructionType inst,
|
| - size_t size,
|
| - unsigned char mode);
|
| -
|
| - void EncodeInstruction(VCDiffInstructionType inst, size_t size) {
|
| - return EncodeInstruction(inst, size, 0);
|
| - }
|
| -
|
| - // Calculates the number of bytes needed to store the given size value as a
|
| - // variable-length integer (VarintBE).
|
| - static size_t CalculateLengthOfSizeAsVarint(size_t size);
|
| -
|
| - // Appends the size value to the string as a variable-length integer.
|
| - static void AppendSizeToString(size_t size, string* out);
|
| -
|
| - // Appends the size value to the output string as a variable-length integer.
|
| - static void AppendSizeToOutputString(size_t size, OutputStringInterface* out);
|
| -
|
| - // Calculates the "Length of the delta encoding" field for the delta window
|
| - // header, based on the sizes of the sections and of the other header
|
| - // elements.
|
| - size_t CalculateLengthOfTheDeltaEncoding() const;
|
| -
|
| - // None of the following 'string' objects are null-terminated.
|
| -
|
| - // A series of instruction opcodes, each of which may be followed
|
| - // by one or two Varint values representing the size parameters
|
| - // of the first and second instruction in the opcode.
|
| - string instructions_and_sizes_;
|
| -
|
| - // A series of data arguments (byte values) used for ADD and RUN
|
| - // instructions. Depending on whether interleaved output is used
|
| - // for streaming or not, the pointer may point to
|
| - // separate_data_for_add_and_run_ or to instructions_and_sizes_.
|
| - string *data_for_add_and_run_;
|
| - string separate_data_for_add_and_run_;
|
| -
|
| - // A series of Varint addresses used for COPY instructions.
|
| - // For the SAME mode, a byte value is stored instead of a Varint.
|
| - // Depending on whether interleaved output is used
|
| - // for streaming or not, the pointer may point to
|
| - // separate_addresses_for_copy_ or to instructions_and_sizes_.
|
| - string *addresses_for_copy_;
|
| - string separate_addresses_for_copy_;
|
| -
|
| - VCDiffAddressCache address_cache_;
|
| -
|
| - size_t dictionary_size_;
|
| -
|
| - // The number of bytes of target data that has been encoded so far.
|
| - // Each time Add(), Copy(), or Run() is called, this will be incremented.
|
| - // The target length is used to compute HERE mode addresses
|
| - // for COPY instructions, and is also written into the header
|
| - // of the delta window when Output() is called.
|
| - //
|
| - size_t target_length_;
|
| -
|
| - const VCDiffCodeTableData* code_table_data_;
|
| -
|
| - // The instruction map facilitates finding an opcode quickly given an
|
| - // instruction inst, size, and mode. This is an alternate representation
|
| - // of the same information that is found in code_table_data_.
|
| - //
|
| - const VCDiffInstructionMap* instruction_map_;
|
| -
|
| - // The zero-based index within instructions_and_sizes_ of the byte
|
| - // that contains the last single-instruction opcode generated by
|
| - // EncodeInstruction(). (See that function for exhaustive details.)
|
| - // It is necessary to use an index rather than a pointer for this value
|
| - // because instructions_and_sizes_ may be resized, which would invalidate
|
| - // any pointers into its data buffer. The value -1 is reserved to mean that
|
| - // either no opcodes have been generated yet, or else the last opcode
|
| - // generated was a double-instruction opcode.
|
| - //
|
| - int last_opcode_index_;
|
| -
|
| - // If true, an Adler32 checksum of the target window data will be written as
|
| - // a variable-length integer, just after the size of the addresses section.
|
| - //
|
| - bool add_checksum_;
|
| -
|
| - // The checksum to be written to the current target window,
|
| - // if add_checksum_ is true.
|
| - // This will not be calculated based on the individual calls to Add(), Run(),
|
| - // and Copy(), which would be unnecessarily expensive. Instead, the code
|
| - // that uses the VCDiffCodeTableWriter object is expected to calculate
|
| - // the checksum all at once and to call AddChecksum() with that value.
|
| - // Must be called sometime before calling Output(), though it can be called
|
| - // either before or after the calls to Add(), Run(), and Copy().
|
| - //
|
| - VCDChecksum checksum_;
|
| -
|
| - // The value of match_counts_[n] is equal to the number of matches
|
| - // of length n (that is, COPY instructions of size n) found so far.
|
| - std::vector<int> match_counts_;
|
| -
|
| - // Making these private avoids implicit copy constructor & assignment operator
|
| - VCDiffCodeTableWriter(const VCDiffCodeTableWriter&); // NOLINT
|
| - void operator=(const VCDiffCodeTableWriter&);
|
| -};
|
| -
|
| -}; // namespace open_vcdiff
|
| -
|
| -#endif // OPEN_VCDIFF_ENCODETABLE_H_
|
|
|