sdch/open_vcdiff/depot/opensource/open-vcdiff/src/encodetable.h - Issue 5203: Transition to pulling open-vcdiff from repository, instead of using snapshot...

Unified Diff: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/encodetable.h

Issue 5203: Transition to pulling open-vcdiff from repository, instead of using snapshot... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 12 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « sdch/open_vcdiff/depot/opensource/open-vcdiff/src/decodetable_test.cc ('k') | sdch/open_vcdiff/depot/opensource/open-vcdiff/src/encodetable.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/encodetable.h

===================================================================

--- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/encodetable.h (revision 2678)

+++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/encodetable.h (working copy)

@@ -1,428 +0,0 @@

-// Author: Lincoln Smith

-//

-// Licensed under the Apache License, Version 2.0 (the "License");

-// you may not use this file except in compliance with the License.

-// You may obtain a copy of the License at

-//

-// http://www.apache.org/licenses/LICENSE-2.0

-//

-// Unless required by applicable law or agreed to in writing, software

-// distributed under the License is distributed on an "AS IS" BASIS,

-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-// See the License for the specific language governing permissions and

-// limitations under the License.

-//

-// There are two different representations of a Code Table's contents:

-// VCDiffCodeTableData is the same as the format given in section 7

-// of the RFC, and is used for transmission and decoding. However,

-// on the encoding side, it is useful to have a representation that

-// can map efficiently from delta instructions to opcodes:

-// VCDiffInstructionMap. A VCDiffInstructionMap is constructed

-// using a VCDiffCodeTableData. For a custom code table, it is recommended

-// that the VCDiffCodeTableData be defined as a static struct and that the

-// VCDiffInstructionMap be a static pointer that gets initialized only once.

-#ifndef OPEN_VCDIFF_ENCODETABLE_H_

-#define OPEN_VCDIFF_ENCODETABLE_H_

-#include <config.h>

-#include <stdint.h> // int32_t

-#include <cstddef> // size_t

-#include <string>

-#include <vector>

-#include "addrcache.h"

-#include "codetable.h"

-#include "checksum.h"

-#include "vcdiff_defs.h"

-namespace open_vcdiff {

-using std::string;

-class OutputStringInterface;

-struct VCDiffCodeTableData;

-// An alternate representation of the data in a VCDiffCodeTableData that

-// optimizes for fast encoding, that is, for taking a delta instruction

-// inst (also known as instruction type), size, and mode and arriving at

-// the corresponding opcode.

-//

-class VCDiffInstructionMap {

- public:

- // Create a VCDiffInstructionMap from the information in code_table_data.

- // Does not save a pointer to code_table_data after using its contents

- // to create the instruction->opcode mappings. The caller *must* have

- // verified that code_table_data->Validate() returned true before

- // attempting to use this constructor.

- // max_mode is the maximum value for the mode of a COPY instruction.

- //

- VCDiffInstructionMap(const VCDiffCodeTableData& code_table_data,

- unsigned char max_mode);

- static VCDiffInstructionMap* GetDefaultInstructionMap();

- // Finds an opcode that has the given inst, size, and mode for its first

- // instruction and NOOP for its second instruction (or vice versa.)

- // Returns kNoOpcode if the code table does not have any matching

- // opcode. Otherwise, returns an opcode value between 0 and 255.

- //

- // If this function returns kNoOpcode for size > 0, the caller will

- // usually want to try again with size == 0 to find an opcode that

- // doesn't have a fixed size value.

- //

- // If this function returns kNoOpcode for size == 0, it is an error condition,

- // because any code table that passed the Validate() check should have a way

- // of expressing all combinations of inst and mode with size=0.

- //

- OpcodeOrNone LookupFirstOpcode(unsigned char inst,

- unsigned char size,

- unsigned char mode) const {

- return first_instruction_map_.Lookup(inst, size, mode);

- }

- // Given a first opcode (presumed to have been returned by a previous call to

- // lookupFirstOpcode), finds an opcode that has the same first instruction as

- // the first opcode, and has the given inst, size, and mode for its second

- // instruction.

- //

- // If this function returns kNoOpcode for size > 0, the caller will

- // usually want to try again with size == 0 to find an opcode that

- // doesn't have a fixed size value.

- //

- OpcodeOrNone LookupSecondOpcode(unsigned char first_opcode,

- unsigned char inst,

- unsigned char size,

- unsigned char mode) const {

- return second_instruction_map_.Lookup(first_opcode, inst, size, mode);

- }

- private:

- // Data structure used to implement LookupFirstOpcode efficiently.

- //

- class FirstInstructionMap {

- public:

- FirstInstructionMap(int num_insts_and_modes, int max_size_1);

- ~FirstInstructionMap();

- void Add(unsigned char inst,

- unsigned char size,

- unsigned char mode,

- unsigned char opcode) {

- OpcodeOrNone* opcode_slot = &first_opcodes_[inst + mode][size];

- if (*opcode_slot == kNoOpcode) {

- *opcode_slot = opcode;

- }

- // See comments for LookupFirstOpcode, above.

- //

- OpcodeOrNone Lookup(unsigned char inst,

- unsigned char size,

- unsigned char mode) const {

- int inst_mode = (inst == VCD_COPY) ? (inst + mode) : inst;

- if (size > max_size_1_) {

- return kNoOpcode;

- }

- // Lookup specific-sized opcode

- return first_opcodes_[inst_mode][size];

- }

- private:

- // The number of possible combinations of inst (a VCDiffInstructionType) and

- // mode. Since the mode is only used for COPY instructions, this number

- // is not (number of VCDiffInstructionType values) * (number of modes), but

- // rather (number of VCDiffInstructionType values other than VCD_COPY)

- // + (number of COPY modes).

- //

- // Compressing inst and mode into a single integer relies on

- // VCD_COPY being the last instruction type. The inst+mode values are:

- // 0 (NOOP), 1 (ADD), 2 (RUN), 3 (COPY mode 0), 4 (COPY mode 1), ...

- //

- const int num_instruction_type_modes_;

- // The maximum value of a size1 element in code_table_data

- //

- const int max_size_1_;

- // There are two levels to first_opcodes_:

- // 1) A dynamically-allocated pointer array of size

- // num_instruction_type_modes_ (one element for each combination of inst

- // and mode.) Every element of this array is non-NULL and contains

- // a pointer to:

- // 2) A dynamically-allocated array of OpcodeOrNone values, with one element

- // for each possible first instruction size (size1) in the code table.

- // (In the default code table, for example, the maximum size used is 18,

- // so these arrays would have 19 elements representing values 0

- // through 18.)

- //

- OpcodeOrNone** first_opcodes_;

- // Making these private avoids implicit copy constructor

- // and assignment operator

- FirstInstructionMap(const FirstInstructionMap&); // NOLINT

- void operator=(const FirstInstructionMap&);

- } first_instruction_map_;

- // Data structure used to implement LookupSecondOpcode efficiently.

- //

- class SecondInstructionMap {

- public:

- SecondInstructionMap(int num_insts_and_modes, int max_size_2);

- ~SecondInstructionMap();

- void Add(unsigned char first_opcode,

- unsigned char inst,

- unsigned char size,

- unsigned char mode,

- unsigned char second_opcode);

- // See comments for LookupSecondOpcode, above.

- OpcodeOrNone Lookup(unsigned char first_opcode,

- unsigned char inst,

- unsigned char size,

- unsigned char mode) const;

- private:

- // See the member of the same name in FirstInstructionMap.

- const int num_instruction_type_modes_;

- // The maximum value of a size2 element in code_table_data

- const int max_size_2_;

- // There are three levels to second_opcodes_:

- // 1) A statically-allocated pointer array with one element

- // for each possible opcode. Each element can be NULL, or can point to:

- // 2) A dynamically-allocated pointer array of size

- // num_instruction_type_modes_ (one element for each combination of inst

- // and mode.) Each element can be NULL, or can point to:

- // 3) A dynamically-allocated array with one element for each possible

- // second instruction size in the code table. (In the default code

- // table, for example, the maximum size used is 6, so these arrays would

- // have 7 elements representing values 0 through 6.)

- //

- OpcodeOrNone** second_opcodes_[VCDiffCodeTableData::kCodeTableSize];

- // Making these private avoids implicit copy constructor

- // and assignment operator

- SecondInstructionMap(const SecondInstructionMap&); // NOLINT

- void operator=(const SecondInstructionMap&);

- } second_instruction_map_;

- static VCDiffInstructionMap* default_instruction_map;

- // Making these private avoids implicit copy constructor & assignment operator

- VCDiffInstructionMap(const VCDiffInstructionMap&); // NOLINT

- void operator=(const VCDiffInstructionMap&);

-};

-// The method calls after construction *must* conform

-// to the following pattern:

-// {{Add|Copy|Run}* [AddChecksum] Output}*

-//

-// When Output has been called in this sequence, a complete target window

-// (as defined in RFC 3284 section 4.3) will have been appended to

-// out (unless no calls to Add, Run, or Copy were made, in which

-// case Output will do nothing.) The output will not be available for use

-// until after each call to Output().

-//

-// This class can also write a compressed

-// representation of a non-standard code table to an string so that it

-// can be sent with the data that was encoded using that code table.

-//

-// NOT threadsafe.

-//

-class VCDiffCodeTableWriter {

- public:

- // This constructor uses the default code table.

- // If interleaved is true, the encoder writes each delta file window

- // by interleaving instructions and sizes with their corresponding

- // addresses and data, rather than placing these elements into three

- // separate sections. This facilitates providing partially

- // decoded results when only a portion of a delta file window

- // is received (e.g. when HTTP over TCP is used as the

- // transmission protocol.) The interleaved format is

- // not consistent with the VCDIFF draft standard.

- //

- explicit VCDiffCodeTableWriter(bool interleaved);

- // Uses a non-standard code table and non-standard cache sizes. The caller

- // must guarantee that code_table_data remains allocated for the lifetime of

- // the VCDiffCodeTableWriter object. Note that this is different from how

- // VCDiffCodeTableReader::UseCodeTable works. It is assumed that a given

- // encoder will use either the default code table or a statically-defined

- // non-standard code table, whereas the decoder must have the ability to read

- // an arbitrary non-standard code table from a delta file and discard it once

- // the file has been decoded.

- //

- VCDiffCodeTableWriter(bool interleaved,

- int near_cache_size,

- int same_cache_size,

- const VCDiffCodeTableData& code_table_data,

- unsigned char max_mode);

- ~VCDiffCodeTableWriter();

- // Initializes the constructed object for use.

- // This method must be called after a VCDiffCodeTableWriter is constructed

- // and before any of its other methods can be called. It will return

- // false if there was an error initializing the object, or true if it

- // was successful. After the object has been initialized and used,

- // Init() can be called again to restore the initial state of the object.

- //

- bool Init(size_t dictionary_size);

- size_t target_length() const { return target_length_; }

- // Encode an ADD opcode with the "size" bytes starting at data

- //

- void Add(const char* data, size_t size);

- // Encode a COPY opcode with args "offset" (into dictionary) and "size" bytes.

- //

- void Copy(int32_t offset, size_t size);

- // Encode a RUN opcode for "size" copies of the value "byte".

- //

- void Run(size_t size, unsigned char byte);

- void AddChecksum(VCDChecksum checksum) {

- add_checksum_ = true;

- checksum_ = checksum;

- }

- // Finishes encoding and appends the encoded delta window to the output

- // string. The output string is not null-terminated and may contain embedded

- // '\0' characters.

- void Output(OutputStringInterface* out);

- const std::vector<int>& match_counts() const { return match_counts_; }

- private:

- // This is an estimate of the longest match size the encoder expects to find.

- // It is used to determine the initial size of the vector match_counts_.

- // If it is too large, then some space will be wasted on vector elements

- // that are not used. If it is too small, then some time will be wasted

- // expanding match_counts_ to accommodate larger match sizes.

- static const size_t kMaxMatchSize = 2000;

- // The maximum value for the mode of a COPY instruction.

- const unsigned char max_mode_;

- // If interleaved is true, sets data_for_add_and_run_ and

- // addresses_for_copy_ to point at instructions_and_sizes_,

- // so that instructions, sizes, addresses and data will be

- // combined into a single interleaved stream.

- // If interleaved is false, sets data_for_add_and_run_ and

- // addresses_for_copy_ to point at their corresponding

- // separate_... strings, so that the three sections will

- // be generated separately from one another.

- //

- void InitSectionPointers(bool interleaved);

- // Determines the best opcode to encode an instruction, and appends

- // or substitutes that opcode and its size into the

- // instructions_and_sizes_ string.

- //

- void EncodeInstruction(VCDiffInstructionType inst,

- size_t size,

- unsigned char mode);

- void EncodeInstruction(VCDiffInstructionType inst, size_t size) {

- return EncodeInstruction(inst, size, 0);

- }

- // Calculates the number of bytes needed to store the given size value as a

- // variable-length integer (VarintBE).

- static size_t CalculateLengthOfSizeAsVarint(size_t size);

- // Appends the size value to the string as a variable-length integer.

- static void AppendSizeToString(size_t size, string* out);

- // Appends the size value to the output string as a variable-length integer.

- static void AppendSizeToOutputString(size_t size, OutputStringInterface* out);

- // Calculates the "Length of the delta encoding" field for the delta window

- // header, based on the sizes of the sections and of the other header

- // elements.

- size_t CalculateLengthOfTheDeltaEncoding() const;

- // None of the following 'string' objects are null-terminated.

- // A series of instruction opcodes, each of which may be followed

- // by one or two Varint values representing the size parameters

- // of the first and second instruction in the opcode.

- string instructions_and_sizes_;

- // A series of data arguments (byte values) used for ADD and RUN

- // instructions. Depending on whether interleaved output is used

- // for streaming or not, the pointer may point to

- // separate_data_for_add_and_run_ or to instructions_and_sizes_.

- string *data_for_add_and_run_;

- string separate_data_for_add_and_run_;

- // A series of Varint addresses used for COPY instructions.

- // For the SAME mode, a byte value is stored instead of a Varint.

- // Depending on whether interleaved output is used

- // for streaming or not, the pointer may point to

- // separate_addresses_for_copy_ or to instructions_and_sizes_.

- string *addresses_for_copy_;

- string separate_addresses_for_copy_;

- VCDiffAddressCache address_cache_;

- size_t dictionary_size_;

- // The number of bytes of target data that has been encoded so far.

- // Each time Add(), Copy(), or Run() is called, this will be incremented.

- // The target length is used to compute HERE mode addresses

- // for COPY instructions, and is also written into the header

- // of the delta window when Output() is called.

- //

- size_t target_length_;

- const VCDiffCodeTableData* code_table_data_;

- // The instruction map facilitates finding an opcode quickly given an

- // instruction inst, size, and mode. This is an alternate representation

- // of the same information that is found in code_table_data_.

- //

- const VCDiffInstructionMap* instruction_map_;

- // The zero-based index within instructions_and_sizes_ of the byte

- // that contains the last single-instruction opcode generated by

- // EncodeInstruction(). (See that function for exhaustive details.)

- // It is necessary to use an index rather than a pointer for this value

- // because instructions_and_sizes_ may be resized, which would invalidate

- // any pointers into its data buffer. The value -1 is reserved to mean that

- // either no opcodes have been generated yet, or else the last opcode

- // generated was a double-instruction opcode.

- //

- int last_opcode_index_;

- // If true, an Adler32 checksum of the target window data will be written as

- // a variable-length integer, just after the size of the addresses section.

- //

- bool add_checksum_;

- // The checksum to be written to the current target window,

- // if add_checksum_ is true.

- // This will not be calculated based on the individual calls to Add(), Run(),

- // and Copy(), which would be unnecessarily expensive. Instead, the code

- // that uses the VCDiffCodeTableWriter object is expected to calculate

- // the checksum all at once and to call AddChecksum() with that value.

- // Must be called sometime before calling Output(), though it can be called

- // either before or after the calls to Add(), Run(), and Copy().

- //

- VCDChecksum checksum_;

- // The value of match_counts_[n] is equal to the number of matches

- // of length n (that is, COPY instructions of size n) found so far.

- std::vector<int> match_counts_;

- // Making these private avoids implicit copy constructor & assignment operator

- VCDiffCodeTableWriter(const VCDiffCodeTableWriter&); // NOLINT

- void operator=(const VCDiffCodeTableWriter&);

-};

-}; // namespace open_vcdiff

-#endif // OPEN_VCDIFF_ENCODETABLE_H_