| Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiffengine_test.cc
|
| ===================================================================
|
| --- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiffengine_test.cc (revision 2678)
|
| +++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiffengine_test.cc (working copy)
|
| @@ -1,1002 +0,0 @@
|
| -// Copyright 2008 Google Inc.
|
| -// Author: Lincoln Smith
|
| -//
|
| -// Licensed under the Apache License, Version 2.0 (the "License");
|
| -// you may not use this file except in compliance with the License.
|
| -// You may obtain a copy of the License at
|
| -//
|
| -// http://www.apache.org/licenses/LICENSE-2.0
|
| -//
|
| -// Unless required by applicable law or agreed to in writing, software
|
| -// distributed under the License is distributed on an "AS IS" BASIS,
|
| -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -// See the License for the specific language governing permissions and
|
| -// limitations under the License.
|
| -
|
| -#include <config.h>
|
| -#include "vcdiffengine.h"
|
| -#include <algorithm>
|
| -#include <string>
|
| -#include <vector>
|
| -#include "addrcache.h"
|
| -#include "blockhash.h"
|
| -#include "encodetable.h"
|
| -#include "google/output_string.h"
|
| -#include "rolling_hash.h"
|
| -#include "testing.h"
|
| -#include "varint_bigendian.h"
|
| -#include "vcdiff_defs.h"
|
| -
|
| -namespace open_vcdiff {
|
| -
|
| -namespace {
|
| -
|
| -using std::string;
|
| -
|
| -class VCDiffEngineTestBase : public testing::Test {
|
| - protected:
|
| - // Some common definitions and helper functions used in the various tests
|
| - // for VCDiffEngine.
|
| - static const int kBlockSize = BlockHash::kBlockSize;
|
| -
|
| - VCDiffEngineTestBase() : interleaved_(false),
|
| - diff_output_string_(&diff_),
|
| - verify_position_(0),
|
| - saved_total_size_position_(0),
|
| - saved_delta_encoding_position_(0),
|
| - saved_section_sizes_position_(0),
|
| - data_bytes_(0),
|
| - instruction_bytes_(0),
|
| - address_bytes_(0) { }
|
| -
|
| - virtual ~VCDiffEngineTestBase() { }
|
| -
|
| - virtual void TearDown() {
|
| - VerifyMatchCounts();
|
| - }
|
| -
|
| - // Copy string_without_spaces into newly allocated result buffer,
|
| - // but pad its contents with space characters so that every character
|
| - // in string_without_spaces corresponds to (block_size - 1)
|
| - // spaces in the result, followed by that character.
|
| - // For example:
|
| - // If string_without_spaces begins "The only thing"... and block_size is 4,
|
| - // then 3 space characters will be inserted
|
| - // between each letter in the result, as follows:
|
| - // " T h e o n l y t h i n g"...
|
| - // This makes testing simpler, because finding a block_size-byte match
|
| - // between the dictionary and target only depends on the
|
| - // trailing letter in each block.
|
| - // If no_initial_padding is true, then the first letter will not have
|
| - // spaces added in front of it.
|
| - static void MakeEachLetterABlock(const char* string_without_spaces,
|
| - const char** result,
|
| - int block_size,
|
| - bool no_initial_padding) {
|
| - const size_t length_without_spaces = strlen(string_without_spaces);
|
| - char* padded_text = new char[(block_size * length_without_spaces) + 1];
|
| - memset(padded_text, ' ', block_size * length_without_spaces);
|
| - char* padded_text_ptr = padded_text;
|
| - if (!no_initial_padding) {
|
| - padded_text_ptr += block_size - 1;
|
| - }
|
| - for (size_t i = 0; i < length_without_spaces; ++i) {
|
| - *padded_text_ptr = string_without_spaces[i];
|
| - padded_text_ptr += block_size;
|
| - }
|
| - *(padded_text_ptr - block_size + 1) = '\0';
|
| - *result = padded_text;
|
| - }
|
| -
|
| - // These functions iterate through the decoded output and expect
|
| - // simple elements: bytes or variable-length integers.
|
| - void ExpectByte(char byte) {
|
| - EXPECT_GT(diff_.size(), verify_position_);
|
| - EXPECT_EQ(byte, diff_[verify_position_]);
|
| - ++verify_position_;
|
| - }
|
| -
|
| - size_t ExpectVarint(int32_t expected_value) {
|
| - EXPECT_GT(diff_.size(), verify_position_);
|
| - const char* const original_position = &diff_[verify_position_];
|
| - const char* new_position = original_position;
|
| - const size_t expected_length = VarintBE<int32_t>::Length(expected_value);
|
| - int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(),
|
| - &new_position);
|
| - EXPECT_LE(0, parsed_value);
|
| - size_t parsed_length = new_position - original_position;
|
| - EXPECT_EQ(expected_value, parsed_value);
|
| - EXPECT_EQ(expected_length, parsed_length);
|
| - verify_position_ += parsed_length;
|
| - return parsed_length;
|
| - }
|
| -
|
| - size_t ExpectSize(size_t size) {
|
| - return ExpectVarint(static_cast<int32_t>(size));
|
| - }
|
| -
|
| - size_t ExpectStringLength(const char* s) {
|
| - return ExpectSize(strlen(s));
|
| - }
|
| -
|
| - void SkipVarint() {
|
| - EXPECT_GT(diff_.size(), verify_position_);
|
| - const char* const original_position = &diff_[verify_position_];
|
| - const char* new_position = original_position;
|
| - VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position);
|
| - size_t parsed_length = new_position - original_position;
|
| - verify_position_ += parsed_length;
|
| - }
|
| -
|
| - void ExpectDataByte(char byte) {
|
| - ExpectByte(byte);
|
| - if (interleaved_) {
|
| - ++instruction_bytes_;
|
| - } else {
|
| - ++data_bytes_;
|
| - }
|
| - }
|
| -
|
| - void ExpectDataString(const char *expected_string) {
|
| - const char* ptr = expected_string;
|
| - while (*ptr) {
|
| - ExpectDataByte(*ptr);
|
| - ++ptr;
|
| - }
|
| - }
|
| -
|
| - void ExpectDataStringWithBlockSpacing(const char *expected_string,
|
| - bool trailing_spaces) {
|
| - const char* ptr = expected_string;
|
| - while (*ptr) {
|
| - for (int i = 0; i < (kBlockSize - 1); ++i) {
|
| - ExpectDataByte(' ');
|
| - }
|
| - ExpectDataByte(*ptr);
|
| - ++ptr;
|
| - }
|
| - if (trailing_spaces) {
|
| - for (int i = 0; i < (kBlockSize - 1); ++i) {
|
| - ExpectDataByte(' ');
|
| - }
|
| - }
|
| - }
|
| -
|
| - void ExpectInstructionByte(char byte) {
|
| - ExpectByte(byte);
|
| - ++instruction_bytes_;
|
| - }
|
| -
|
| - void ExpectInstructionVarint(int32_t value) {
|
| - instruction_bytes_ += ExpectVarint(value);
|
| - }
|
| -
|
| - void ExpectAddressByte(char byte) {
|
| - ExpectByte(byte);
|
| - if (interleaved_) {
|
| - ++instruction_bytes_;
|
| - } else {
|
| - ++address_bytes_;
|
| - }
|
| - }
|
| -
|
| - void ExpectAddressVarint(int32_t value) {
|
| - if (interleaved_) {
|
| - instruction_bytes_ += ExpectVarint(value);
|
| - } else {
|
| - address_bytes_ += ExpectVarint(value);
|
| - }
|
| - }
|
| -
|
| - // The following functions leverage the fact that the encoder uses
|
| - // the default code table and cache sizes. They are able to search for
|
| - // instructions of a particular size. The logic for mapping from
|
| - // instruction type, mode, and size to opcode value is very different here
|
| - // from the logic used in encodetable.{h,cc}, so hopefully
|
| - // this version will help validate that the other is correct.
|
| - // This version uses conditional statements, while encodetable.h
|
| - // looks up values in a mapping table.
|
| - void ExpectAddress(int32_t address, int copy_mode) {
|
| - if ((copy_mode >= default_cache_.FirstSameMode()) &&
|
| - (copy_mode <= default_cache_.LastMode())) {
|
| - ExpectAddressByte(address);
|
| - } else {
|
| - ExpectAddressVarint(address);
|
| - }
|
| - }
|
| -
|
| - void ExpectAddInstruction(int size) {
|
| - if (size <= 18) {
|
| - ExpectInstructionByte(0x01 + size);
|
| - } else {
|
| - ExpectInstructionByte(0x01);
|
| - ExpectInstructionVarint(size);
|
| - }
|
| - }
|
| -
|
| - void ExpectCopyInstruction(int size, int mode) {
|
| - if ((size >= 4) && (size <= 16)) {
|
| - ExpectInstructionByte(0x10 + (0x10 * mode) + size);
|
| - } else {
|
| - ExpectInstructionByte(0x13 + (0x10 * mode));
|
| - ExpectInstructionVarint(size);
|
| - }
|
| - ExpectMatch(size);
|
| - }
|
| -
|
| - bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) {
|
| - if ((copy_mode < default_cache_.FirstSameMode()) &&
|
| - (add_size <= 4) &&
|
| - (copy_size >= 4) &&
|
| - (copy_size <= 6)) {
|
| - ExpectInstructionByte(0x9C +
|
| - (0x0C * copy_mode) +
|
| - (0x03 * add_size) +
|
| - copy_size);
|
| - ExpectMatch(copy_size);
|
| - return true;
|
| - } else if ((copy_mode >= default_cache_.FirstSameMode()) &&
|
| - (add_size <= 4) &&
|
| - (copy_size == 4)) {
|
| - ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size);
|
| - ExpectMatch(copy_size);
|
| - return true;
|
| - } else {
|
| - ExpectAddInstruction(add_size);
|
| - return false;
|
| - }
|
| - }
|
| -
|
| - void ExpectAddInstructionForStringLength(const char* s) {
|
| - ExpectAddInstruction(static_cast<int>(strlen(s)));
|
| - }
|
| -
|
| - // Call this function before beginning to iterate through the diff string
|
| - // using the Expect... functions.
|
| - // text must be NULL-terminated.
|
| - void VerifyHeaderForDictionaryAndTargetText(const char* dictionary,
|
| - const char* target_text) {
|
| - ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary)
|
| - ExpectStringLength(dictionary);
|
| - ExpectByte(0x00); // Source segment position: start of dictionary
|
| - saved_total_size_position_ = verify_position_;
|
| - SkipVarint(); // Length of the delta encoding
|
| - saved_delta_encoding_position_ = verify_position_;
|
| - ExpectStringLength(target_text);
|
| - ExpectByte(0x00); // Delta_indicator (no compression)
|
| - saved_section_sizes_position_ = verify_position_;
|
| - SkipVarint(); // length of data for ADDs and RUNs
|
| - SkipVarint(); // length of instructions section
|
| - SkipVarint(); // length of addresses for COPYs
|
| - }
|
| -
|
| - // Call this function before beginning to iterating through the entire
|
| - // diff string using the Expect... functions. It makes sure that the
|
| - // size totals in the window header match the number of bytes that
|
| - // were parsed.
|
| - void VerifySizes() {
|
| - EXPECT_EQ(verify_position_, diff_.size());
|
| - const size_t delta_encoding_size = verify_position_ -
|
| - saved_delta_encoding_position_;
|
| - verify_position_ = saved_total_size_position_;
|
| - ExpectSize(delta_encoding_size);
|
| - verify_position_ = saved_section_sizes_position_;
|
| - ExpectSize(data_bytes_);
|
| - ExpectSize(instruction_bytes_);
|
| - ExpectSize(address_bytes_);
|
| - }
|
| -
|
| - void ExpectMatch(size_t match_size) {
|
| - if (match_size >= expected_match_counts_.size()) {
|
| - // Be generous to avoid resizing again
|
| - expected_match_counts_.resize(match_size * 2, 0);
|
| - }
|
| - ++expected_match_counts_[match_size];
|
| - }
|
| -
|
| - void VerifyMatchCounts() {
|
| - EXPECT_TRUE(std::equal(expected_match_counts_.begin(),
|
| - expected_match_counts_.end(),
|
| - actual_match_counts_.begin()));
|
| - }
|
| -
|
| - bool interleaved_;
|
| - string diff_;
|
| - OutputString<string> diff_output_string_;
|
| - size_t verify_position_;
|
| - size_t saved_total_size_position_;
|
| - size_t saved_delta_encoding_position_;
|
| - size_t saved_section_sizes_position_;
|
| - size_t data_bytes_;
|
| - size_t instruction_bytes_;
|
| - size_t address_bytes_;
|
| - VCDiffAddressCache default_cache_; // Used for finding mode values
|
| - std::vector<int> expected_match_counts_;
|
| - std::vector<int> actual_match_counts_;
|
| -};
|
| -
|
| -class VCDiffEngineTest : public VCDiffEngineTestBase {
|
| - protected:
|
| - VCDiffEngineTest() :
|
| - engine_(dictionary_, strlen(dictionary_)) {
|
| - EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
|
| - }
|
| -
|
| - virtual ~VCDiffEngineTest() { }
|
| -
|
| -
|
| - static void SetUpTestCase() {
|
| - MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_,
|
| - kBlockSize, false);
|
| - MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false);
|
| - }
|
| -
|
| - static void TearDownTestCase() {
|
| - delete[] dictionary_;
|
| - delete[] target_;
|
| - }
|
| -
|
| - void EncodeNothing(bool interleaved, bool target_matching) {
|
| - interleaved_ = interleaved;
|
| - VCDiffCodeTableWriter coder(interleaved);
|
| - engine_.Encode("", 0, target_matching, &diff_output_string_, &coder);
|
| - EXPECT_TRUE(diff_.empty());
|
| - actual_match_counts_ = coder.match_counts();
|
| - }
|
| -
|
| - // text must be NULL-terminated
|
| - void EncodeText(const char* text, bool interleaved, bool target_matching) {
|
| - interleaved_ = interleaved;
|
| - VCDiffCodeTableWriter coder(interleaved);
|
| - engine_.Encode(text,
|
| - strlen(text),
|
| - target_matching,
|
| - &diff_output_string_,
|
| - &coder);
|
| - actual_match_counts_ = coder.match_counts();
|
| - }
|
| -
|
| - void Encode(bool interleaved, bool target_matching) {
|
| - EncodeText(target_, interleaved, target_matching);
|
| - VerifyHeader();
|
| - }
|
| -
|
| - void VerifyHeader() {
|
| - VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
|
| - }
|
| -
|
| - static const char dictionary_without_spaces_[];
|
| - static const char target_without_spaces_[];
|
| -
|
| - static const char* dictionary_;
|
| - static const char* target_;
|
| -
|
| - const VCDiffEngine engine_;
|
| -};
|
| -
|
| -#ifdef GTEST_HAS_DEATH_TEST
|
| -typedef VCDiffEngineTest VCDiffEngineDeathTest;
|
| -#endif // GTEST_HAS_DEATH_TEST
|
| -
|
| -const char VCDiffEngineTest::dictionary_without_spaces_[] =
|
| - "The only thing we have to fear is fear itself";
|
| -
|
| -const char VCDiffEngineTest::target_without_spaces_[] =
|
| - "What we hear is fearsome";
|
| -
|
| -const char* VCDiffEngineTest::dictionary_ = NULL;
|
| -const char* VCDiffEngineTest::target_ = NULL;
|
| -
|
| -#ifdef GTEST_HAS_DEATH_TEST
|
| -TEST_F(VCDiffEngineDeathTest, InitCalledTwice) {
|
| - EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()),
|
| - "twice");
|
| -}
|
| -#endif // GTEST_HAS_DEATH_TEST
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeNothing) {
|
| - EncodeNothing(/* interleaved = */ false, /* target matching = */ false);
|
| -}
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) {
|
| - EncodeNothing(/* interleaved = */ true, /* target matching = */ false);
|
| -}
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) {
|
| - EncodeNothing(/* interleaved = */ false, /* target matching = */ true);
|
| -}
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) {
|
| - EncodeNothing(/* interleaved = */ true, /* target matching = */ true);
|
| -}
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) {
|
| - const char* small_text = " ";
|
| - EncodeText(small_text,
|
| - /* interleaved = */ false,
|
| - /* target matching = */ false);
|
| - VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
|
| - // Data for ADDs
|
| - ExpectDataString(small_text);
|
| - // Instructions and sizes
|
| - ExpectAddInstructionForStringLength(small_text);
|
| -}
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) {
|
| - const char* small_text = " ";
|
| - EncodeText(small_text,
|
| - /* interleaved = */ true,
|
| - /* target matching = */ false);
|
| - VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text);
|
| - // Interleaved section
|
| - ExpectAddInstructionForStringLength(small_text);
|
| - ExpectDataString(small_text);
|
| -}
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeSampleText) {
|
| - Encode(/* interleaved = */ false, /* target matching = */ false);
|
| - // Data for ADDs
|
| - ExpectDataStringWithBlockSpacing("W", false);
|
| - ExpectDataByte('t');
|
| - ExpectDataByte('s');
|
| - if (kBlockSize < 4) {
|
| - ExpectDataStringWithBlockSpacing("ome", false);
|
| - } else {
|
| - ExpectDataByte('m');
|
| - }
|
| - // Instructions and sizes
|
| - if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
|
| - VCD_SELF_MODE)) {
|
| - ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - }
|
| - ExpectAddInstruction(1);
|
| - ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - ExpectCopyInstruction(11 * kBlockSize,
|
| - default_cache_.FirstNearMode());
|
| - if (kBlockSize < 4) {
|
| - // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1
|
| - // are too small to be selected
|
| - ExpectAddInstruction((3 * kBlockSize) + 1);
|
| - } else {
|
| - if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
|
| - ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - }
|
| - if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
|
| - ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
|
| - }
|
| - }
|
| - // Addresses for COPY
|
| - ExpectAddressVarint(18 * kBlockSize); // "ha"
|
| - ExpectAddressVarint(14 * kBlockSize); // " we h"
|
| - ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
|
| - if (kBlockSize >= 4) {
|
| - ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
|
| - ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
|
| - }
|
| - VerifySizes();
|
| -}
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) {
|
| - Encode(/* interleaved = */ true, /* target matching = */ false);
|
| - // Interleaved section
|
| - if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
|
| - VCD_SELF_MODE)) {
|
| - ExpectDataStringWithBlockSpacing("W", false);
|
| - ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - } else {
|
| - ExpectDataStringWithBlockSpacing("W", false);
|
| - }
|
| - ExpectAddressVarint(18 * kBlockSize); // "ha"
|
| - ExpectAddInstruction(1);
|
| - ExpectDataByte('t');
|
| - ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - ExpectAddressVarint(14 * kBlockSize); // " we h"
|
| - ExpectCopyInstruction(11 * kBlockSize,
|
| - default_cache_.FirstNearMode());
|
| - ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
|
| - if (kBlockSize < 4) {
|
| - // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1
|
| - // are too small to be selected
|
| - ExpectAddInstruction((3 * kBlockSize) + 1);
|
| - ExpectDataByte('s');
|
| - ExpectDataStringWithBlockSpacing("ome", false);
|
| - } else {
|
| - if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
|
| - ExpectDataByte('s');
|
| - ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - } else {
|
| - ExpectDataByte('s');
|
| - }
|
| - ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
|
| - if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
|
| - ExpectDataByte('m');
|
| - ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
|
| - } else {
|
| - ExpectDataByte('m');
|
| - }
|
| - ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
|
| - }
|
| - VerifySizes();
|
| -}
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) {
|
| - Encode(/* interleaved = */ false, /* target matching = */ true);
|
| - // Data for ADDs
|
| - ExpectDataStringWithBlockSpacing("W", false);
|
| - ExpectDataByte('t');
|
| - ExpectDataByte('s');
|
| - if (kBlockSize < 4) {
|
| - ExpectDataStringWithBlockSpacing("ome", false);
|
| - } else {
|
| - ExpectDataByte('m');
|
| - }
|
| - // Instructions and sizes
|
| - if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
|
| - VCD_SELF_MODE)) {
|
| - ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - }
|
| - ExpectAddInstruction(1);
|
| - ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - ExpectCopyInstruction(11 * kBlockSize,
|
| - default_cache_.FirstNearMode());
|
| - if (kBlockSize < 4) {
|
| - // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1
|
| - // are too small to be selected
|
| - ExpectAddInstruction((3 * kBlockSize) + 1);
|
| - } else {
|
| - if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
|
| - ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - }
|
| - if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
|
| - ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
|
| - }
|
| - }
|
| - // Addresses for COPY
|
| - ExpectAddressVarint(18 * kBlockSize); // "ha"
|
| - ExpectAddressVarint(14 * kBlockSize); // " we h"
|
| - ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
|
| - if (kBlockSize >= 4) {
|
| - ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
|
| - ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
|
| - }
|
| - VerifySizes();
|
| -}
|
| -
|
| -TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) {
|
| - Encode(/* interleaved = */ true, /* target matching = */ false);
|
| - // Interleaved section
|
| - if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1,
|
| - VCD_SELF_MODE)) {
|
| - ExpectDataStringWithBlockSpacing("W", false);
|
| - ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - } else {
|
| - ExpectDataStringWithBlockSpacing("W", false);
|
| - }
|
| - ExpectAddressVarint(18 * kBlockSize); // "ha"
|
| - ExpectAddInstruction(1);
|
| - ExpectDataByte('t');
|
| - ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - ExpectAddressVarint(14 * kBlockSize); // " we h"
|
| - ExpectCopyInstruction(11 * kBlockSize,
|
| - default_cache_.FirstNearMode());
|
| - ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear"
|
| - if (kBlockSize < 4) {
|
| - // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1
|
| - // are too small to be selected
|
| - ExpectAddInstruction((3 * kBlockSize) + 1);
|
| - ExpectDataByte('s');
|
| - ExpectDataStringWithBlockSpacing("ome", false);
|
| - } else {
|
| - if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) {
|
| - ExpectDataByte('s');
|
| - ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE);
|
| - } else {
|
| - ExpectDataByte('s');
|
| - }
|
| - ExpectAddressVarint(4 * kBlockSize); // "o" from "The only"
|
| - if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) {
|
| - ExpectDataByte('m');
|
| - ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE);
|
| - } else {
|
| - ExpectDataByte('m');
|
| - }
|
| - ExpectAddressVarint(2 * kBlockSize); // "e" from "The only"
|
| - }
|
| - VerifySizes();
|
| -}
|
| -
|
| -// This test case takes a dictionary containing several instances of the string
|
| -// "weasel", and a target string which is identical to the dictionary
|
| -// except that all instances of "weasel" have been replaced with the string
|
| -// "moon-pie". It tests that COPY instructions are generated for all
|
| -// boilerplate text (that is, the text between the "moon-pie" instances in
|
| -// the target) and, if target matching is enabled, that each instance of
|
| -// "moon-pie" (except the first one) is encoded using a COPY instruction
|
| -// rather than an ADD.
|
| -class WeaselsToMoonpiesTest : public VCDiffEngineTestBase {
|
| - protected:
|
| - // kCompressibleTestBlockSize:
|
| - // The size of the block to create for each letter in the
|
| - // dictionary and search string for the "compressible text" test.
|
| - // See MakeEachLetterABlock, below.
|
| - // If we use kCompressibleTestBlockSize = kBlockSize, then the
|
| - // encoder will find one match per unique letter in the HTML text.
|
| - // There are too many examples of "<" in the text for the encoder
|
| - // to iterate through them all, and some matches are not found.
|
| - // If we use kCompressibleTextBlockSize = 1, then the boilerplate
|
| - // text between "weasel" strings in the dictionary and "moon-pie"
|
| - // strings in the target may not be long enough to be found by
|
| - // the encoder's block-hash algorithm. A good value, that will give
|
| - // reproducible results across all block sizes, will be somewhere
|
| - // in between these extremes.
|
| - static const int kCompressibleTestBlockSize =
|
| - (kBlockSize < 4) ? 1 : kBlockSize / 4;
|
| - static const int kTrailingSpaces = kCompressibleTestBlockSize - 1;
|
| -
|
| - WeaselsToMoonpiesTest() :
|
| - engine_(dictionary_, strlen(dictionary_)),
|
| - match_index_(0),
|
| - search_dictionary_(dictionary_, strlen(dictionary_)),
|
| - copied_moonpie_address_(0) {
|
| - EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init());
|
| - weasel_positions_[0] = 0;
|
| - after_weasel_[0] = 0;
|
| - moonpie_positions_[0] = 0;
|
| - after_moonpie_[0] = 0;
|
| - }
|
| -
|
| - virtual ~WeaselsToMoonpiesTest() { }
|
| -
|
| - static void SetUpTestCase() {
|
| - MakeEachLetterABlock(dictionary_without_spaces_,
|
| - &dictionary_,
|
| - kCompressibleTestBlockSize,
|
| - false);
|
| - MakeEachLetterABlock(target_without_spaces_,
|
| - &target_,
|
| - kCompressibleTestBlockSize,
|
| - false);
|
| - MakeEachLetterABlock(weasel_text_without_spaces_,
|
| - &weasel_text_,
|
| - kCompressibleTestBlockSize,
|
| - true);
|
| - MakeEachLetterABlock(moonpie_text_without_spaces_,
|
| - &moonpie_text_,
|
| - kCompressibleTestBlockSize,
|
| - true);
|
| - }
|
| -
|
| - static void TearDownTestCase() {
|
| - delete[] dictionary_;
|
| - delete[] target_;
|
| - delete[] weasel_text_;
|
| - delete[] moonpie_text_;
|
| - }
|
| -
|
| - // text must be NULL-terminated
|
| - void EncodeText(const char* text, bool interleaved, bool target_matching) {
|
| - interleaved_ = interleaved;
|
| - VCDiffCodeTableWriter coder(interleaved);
|
| - engine_.Encode(text,
|
| - strlen(text),
|
| - target_matching,
|
| - &diff_output_string_,
|
| - &coder);
|
| - actual_match_counts_ = coder.match_counts();
|
| - }
|
| -
|
| - void Encode(bool interleaved, bool target_matching) {
|
| - EncodeText(target_, interleaved, target_matching);
|
| - VerifyHeader();
|
| - }
|
| -
|
| - void VerifyHeader() {
|
| - VerifyHeaderForDictionaryAndTargetText(dictionary_, target_);
|
| - }
|
| -
|
| - void ExpectCopyForSize(size_t size, int mode) {
|
| - ExpectCopyInstruction(static_cast<int>(size), mode);
|
| - }
|
| -
|
| - void ExpectAddForSize(size_t size) {
|
| - ExpectAddInstruction(static_cast<int>(size));
|
| - }
|
| -
|
| - void ExpectAddressVarintForSize(size_t value) {
|
| - ExpectAddressVarint(static_cast<int32_t>(value));
|
| - }
|
| -
|
| - void FindNextMoonpie(bool include_trailing_spaces) {
|
| - ++match_index_;
|
| - SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_,
|
| - AfterLastWeasel()));
|
| - if (CurrentWeaselPosition() == string::npos) {
|
| - SetCurrentMoonpiePosition(string::npos);
|
| - } else {
|
| - SetCurrentAfterWeaselPosition(CurrentWeaselPosition()
|
| - + strlen(weasel_text_)
|
| - + (include_trailing_spaces ?
|
| - kTrailingSpaces : 0));
|
| - SetCurrentMoonpiePosition(AfterLastMoonpie()
|
| - + CurrentBoilerplateLength());
|
| - SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition()
|
| - + strlen(moonpie_text_)
|
| - + (include_trailing_spaces ?
|
| - kTrailingSpaces : 0));
|
| - }
|
| - }
|
| - bool NoMoreMoonpies() const {
|
| - return CurrentMoonpiePosition() == string::npos;
|
| - }
|
| - size_t CurrentWeaselPosition() const {
|
| - return weasel_positions_[match_index_];
|
| - }
|
| - size_t LastWeaselPosition() const {
|
| - return weasel_positions_[match_index_ - 1];
|
| - }
|
| - size_t CurrentMoonpiePosition() const {
|
| - return moonpie_positions_[match_index_];
|
| - }
|
| - size_t LastMoonpiePosition() const {
|
| - return moonpie_positions_[match_index_ - 1];
|
| - }
|
| - size_t AfterLastWeasel() const {
|
| - CHECK_GE(match_index_, 1);
|
| - return after_weasel_[match_index_ - 1];
|
| - }
|
| - size_t AfterPreviousWeasel() const {
|
| - CHECK_GE(match_index_, 2);
|
| - return after_weasel_[match_index_ - 2];
|
| - }
|
| - size_t AfterLastMoonpie() const {
|
| - CHECK_GE(match_index_, 1);
|
| - return after_moonpie_[match_index_ - 1];
|
| - }
|
| - size_t AfterPreviousMoonpie() const {
|
| - CHECK_GE(match_index_, 2);
|
| - return after_moonpie_[match_index_ - 2];
|
| - }
|
| -
|
| - void SetCurrentWeaselPosition(size_t value) {
|
| - weasel_positions_[match_index_] = value;
|
| - }
|
| - void SetCurrentAfterWeaselPosition(size_t value) {
|
| - after_weasel_[match_index_] = value;
|
| - }
|
| - void SetCurrentMoonpiePosition(size_t value) {
|
| - moonpie_positions_[match_index_] = value;
|
| - }
|
| - void SetCurrentAfterMoonpiePosition(size_t value) {
|
| - after_moonpie_[match_index_] = value;
|
| - }
|
| -
|
| - // Find the length of the text in between the "weasel" strings in the
|
| - // compressible dictionary, which is the same as the text between
|
| - // the "moon-pie" strings in the compressible target.
|
| - size_t CurrentBoilerplateLength() const {
|
| - CHECK_GE(match_index_, 1);
|
| - return CurrentWeaselPosition() - AfterLastWeasel();
|
| - }
|
| - size_t DistanceFromLastWeasel() const {
|
| - CHECK_GE(match_index_, 1);
|
| - return CurrentWeaselPosition() - LastWeaselPosition();
|
| - }
|
| - size_t DistanceFromLastMoonpie() const {
|
| - CHECK_GE(match_index_, 1);
|
| - return CurrentMoonpiePosition() - LastMoonpiePosition();
|
| - }
|
| - size_t DistanceBetweenLastTwoWeasels() const {
|
| - CHECK_GE(match_index_, 2);
|
| - return AfterLastWeasel() - AfterPreviousWeasel();
|
| - }
|
| - size_t DistanceBetweenLastTwoMoonpies() const {
|
| - CHECK_GE(match_index_, 2);
|
| - return AfterLastMoonpie() - AfterPreviousMoonpie();
|
| - }
|
| -
|
| - int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const;
|
| - int UpdateCopyModeForMoonpie(int copy_mode) const;
|
| - int32_t FindMoonpieAddressForCopyMode(int copy_mode) const;
|
| -
|
| - void CopyBoilerplateAndAddMoonpie(int copy_mode);
|
| - void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode);
|
| -
|
| - static const char dictionary_without_spaces_[];
|
| - static const char target_without_spaces_[];
|
| - static const char weasel_text_without_spaces_[];
|
| - static const char moonpie_text_without_spaces_[];
|
| -
|
| - static const char* dictionary_;
|
| - static const char* target_;
|
| - static const char* weasel_text_;
|
| - static const char* moonpie_text_;
|
| -
|
| - const VCDiffEngine engine_;
|
| - size_t weasel_positions_[128];
|
| - size_t after_weasel_[128];
|
| - size_t moonpie_positions_[128];
|
| - size_t after_moonpie_[128];
|
| - int match_index_;
|
| - string search_dictionary_;
|
| - size_t copied_moonpie_address_;
|
| -};
|
| -
|
| -// Care is taken in the formulation of the dictionary
|
| -// to ensure that the surrounding letters do not match; for example,
|
| -// there are not two instances of the string "weasels". Otherwise,
|
| -// the matching behavior would not be as predictable.
|
| -const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] =
|
| - "<html>\n"
|
| - "<head>\n"
|
| - "<meta content=\"text/html; charset=ISO-8859-1\"\n"
|
| - "http-equiv=\"content-type\">\n"
|
| - "<title>All about weasels</title>\n"
|
| - "</head>\n"
|
| - "<!-- You will notice that the word \"weasel\" may be replaced"
|
| - " with something else -->\n"
|
| - "<body>\n"
|
| - "<h1>All about the weasel: highly compressible HTML text</h1>"
|
| - "<ul>\n"
|
| - "<li>Don\'t look a gift weasel in its mouth.</li>\n"
|
| - "<li>This item makes sure the next occurrence is found.</li>\n"
|
| - "<li>Don\'t count your weasel, before it\'s hatched.</li>\n"
|
| - "</ul>\n"
|
| - "<br>\n"
|
| - "</body>\n"
|
| - "</html>\n";
|
| -
|
| -const char WeaselsToMoonpiesTest::target_without_spaces_[] =
|
| - "<html>\n"
|
| - "<head>\n"
|
| - "<meta content=\"text/html; charset=ISO-8859-1\"\n"
|
| - "http-equiv=\"content-type\">\n"
|
| - "<title>All about moon-pies</title>\n"
|
| - "</head>\n"
|
| - "<!-- You will notice that the word \"moon-pie\" may be replaced"
|
| - " with something else -->\n"
|
| - "<body>\n"
|
| - "<h1>All about the moon-pie: highly compressible HTML text</h1>"
|
| - "<ul>\n"
|
| - "<li>Don\'t look a gift moon-pie in its mouth.</li>\n"
|
| - "<li>This item makes sure the next occurrence is found.</li>\n"
|
| - "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n"
|
| - "</ul>\n"
|
| - "<br>\n"
|
| - "</body>\n"
|
| - "</html>\n";
|
| -
|
| -const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel";
|
| -const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie";
|
| -
|
| -const char* WeaselsToMoonpiesTest::dictionary_ = NULL;
|
| -const char* WeaselsToMoonpiesTest::target_ = NULL;
|
| -const char* WeaselsToMoonpiesTest::weasel_text_ = NULL;
|
| -const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL;
|
| -
|
| -int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode(
|
| - int copy_mode) const {
|
| - size_t copy_address = 0;
|
| - if (copy_mode == VCD_SELF_MODE) {
|
| - copy_address = AfterLastWeasel();
|
| - } else if ((copy_mode >= default_cache_.FirstNearMode()) &&
|
| - (copy_mode < default_cache_.FirstSameMode())) {
|
| - copy_address = DistanceBetweenLastTwoWeasels();
|
| - } else if ((copy_mode >= default_cache_.FirstSameMode()) &&
|
| - (copy_mode <= default_cache_.LastMode())) {
|
| - copy_address = AfterLastWeasel() % 256;
|
| - } else {
|
| - LOG(FATAL) << "Unexpected copy mode " << copy_mode;
|
| - }
|
| - return static_cast<int32_t>(copy_address);
|
| -}
|
| -
|
| -int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const {
|
| - if (copy_mode == default_cache_.FirstSameMode()) {
|
| - return default_cache_.FirstSameMode()
|
| - + static_cast<int>((copied_moonpie_address_ / 256) % 3);
|
| - } else {
|
| - return copy_mode;
|
| - }
|
| -}
|
| -
|
| -int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode(
|
| - int copy_mode) const {
|
| - size_t copy_address = 0;
|
| - if (copy_mode == VCD_HERE_MODE) {
|
| - copy_address = DistanceFromLastMoonpie();
|
| - } else if ((copy_mode >= default_cache_.FirstNearMode()) &&
|
| - (copy_mode < default_cache_.FirstSameMode())) {
|
| - copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces;
|
| - } else if ((copy_mode >= default_cache_.FirstSameMode()) &&
|
| - (copy_mode <= default_cache_.LastMode())) {
|
| - copy_address = copied_moonpie_address_ % 256;
|
| - } else {
|
| - LOG(FATAL) << "Unexpected copy mode " << copy_mode;
|
| - }
|
| - return static_cast<int32_t>(copy_address);
|
| -}
|
| -
|
| -// Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
|
| -// in the encoding.
|
| -void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) {
|
| - EXPECT_FALSE(NoMoreMoonpies());
|
| - ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
|
| - ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
|
| - ExpectAddInstructionForStringLength(moonpie_text_);
|
| - ExpectDataString(moonpie_text_);
|
| -}
|
| -
|
| -// Expect one dictionary instance of "weasel" to be replaced with "moon-pie"
|
| -// in the encoding. The "moon-pie" text will be copied from the previously
|
| -// encoded target.
|
| -void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie(
|
| - int copy_mode,
|
| - int moonpie_copy_mode) {
|
| - EXPECT_FALSE(NoMoreMoonpies());
|
| - ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode);
|
| - ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode);
|
| - moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode);
|
| - ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode);
|
| - ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode),
|
| - moonpie_copy_mode);
|
| -}
|
| -
|
| -TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) {
|
| - Encode(/* interleaved = */ true, /* target matching = */ false);
|
| - FindNextMoonpie(false);
|
| - // Expect all five "weasel"s to be replaced with "moon-pie"s
|
| - CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
|
| - FindNextMoonpie(false);
|
| - CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE);
|
| - FindNextMoonpie(false);
|
| - CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1);
|
| - FindNextMoonpie(false);
|
| - CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2);
|
| - FindNextMoonpie(false);
|
| - CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3);
|
| - FindNextMoonpie(false);
|
| - EXPECT_TRUE(NoMoreMoonpies());
|
| - ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
|
| - default_cache_.FirstNearMode());
|
| - ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
|
| - VerifySizes();
|
| -}
|
| -
|
| -TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) {
|
| - Encode(/* interleaved = */ true, /* target matching = */ true);
|
| - // Expect all five "weasel"s to be replaced with "moon-pie"s.
|
| - // Every "moon-pie" after the first one should be copied from the
|
| - // previously encoded target text.
|
| - FindNextMoonpie(false);
|
| - CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode());
|
| - FindNextMoonpie(true);
|
| - CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE);
|
| - if (kBlockSize <= 4) {
|
| - copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition();
|
| - FindNextMoonpie(true);
|
| - CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
|
| - default_cache_.FirstSameMode());
|
| - } else { // kBlockSize > 4
|
| - copied_moonpie_address_ = strlen(dictionary_) + CurrentMoonpiePosition();
|
| - FindNextMoonpie(true);
|
| - CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
|
| - default_cache_.FirstNearMode() + 2);
|
| - }
|
| - LOG(INFO) << "copied_moonpie_address_ : "
|
| - << copied_moonpie_address_ << LOG_ENDL;
|
| - FindNextMoonpie(true);
|
| - CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3,
|
| - default_cache_.FirstSameMode());
|
| - FindNextMoonpie(true);
|
| - CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1,
|
| - default_cache_.FirstSameMode());
|
| - FindNextMoonpie(true);
|
| - EXPECT_TRUE(NoMoreMoonpies());
|
| - ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(),
|
| - default_cache_.FirstNearMode() + 3);
|
| - ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels());
|
| - VerifySizes();
|
| -}
|
| -
|
| -} // anonymous namespace
|
| -} // namespace open-vcdiff
|
|
|