Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiffengine_test.cc |
=================================================================== |
--- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiffengine_test.cc (revision 2678) |
+++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/vcdiffengine_test.cc (working copy) |
@@ -1,1002 +0,0 @@ |
-// Copyright 2008 Google Inc. |
-// Author: Lincoln Smith |
-// |
-// Licensed under the Apache License, Version 2.0 (the "License"); |
-// you may not use this file except in compliance with the License. |
-// You may obtain a copy of the License at |
-// |
-// http://www.apache.org/licenses/LICENSE-2.0 |
-// |
-// Unless required by applicable law or agreed to in writing, software |
-// distributed under the License is distributed on an "AS IS" BASIS, |
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
-// See the License for the specific language governing permissions and |
-// limitations under the License. |
- |
-#include <config.h> |
-#include "vcdiffengine.h" |
-#include <algorithm> |
-#include <string> |
-#include <vector> |
-#include "addrcache.h" |
-#include "blockhash.h" |
-#include "encodetable.h" |
-#include "google/output_string.h" |
-#include "rolling_hash.h" |
-#include "testing.h" |
-#include "varint_bigendian.h" |
-#include "vcdiff_defs.h" |
- |
-namespace open_vcdiff { |
- |
-namespace { |
- |
-using std::string; |
- |
-class VCDiffEngineTestBase : public testing::Test { |
- protected: |
- // Some common definitions and helper functions used in the various tests |
- // for VCDiffEngine. |
- static const int kBlockSize = BlockHash::kBlockSize; |
- |
- VCDiffEngineTestBase() : interleaved_(false), |
- diff_output_string_(&diff_), |
- verify_position_(0), |
- saved_total_size_position_(0), |
- saved_delta_encoding_position_(0), |
- saved_section_sizes_position_(0), |
- data_bytes_(0), |
- instruction_bytes_(0), |
- address_bytes_(0) { } |
- |
- virtual ~VCDiffEngineTestBase() { } |
- |
- virtual void TearDown() { |
- VerifyMatchCounts(); |
- } |
- |
- // Copy string_without_spaces into newly allocated result buffer, |
- // but pad its contents with space characters so that every character |
- // in string_without_spaces corresponds to (block_size - 1) |
- // spaces in the result, followed by that character. |
- // For example: |
- // If string_without_spaces begins "The only thing"... and block_size is 4, |
- // then 3 space characters will be inserted |
- // between each letter in the result, as follows: |
- // " T h e o n l y t h i n g"... |
- // This makes testing simpler, because finding a block_size-byte match |
- // between the dictionary and target only depends on the |
- // trailing letter in each block. |
- // If no_initial_padding is true, then the first letter will not have |
- // spaces added in front of it. |
- static void MakeEachLetterABlock(const char* string_without_spaces, |
- const char** result, |
- int block_size, |
- bool no_initial_padding) { |
- const size_t length_without_spaces = strlen(string_without_spaces); |
- char* padded_text = new char[(block_size * length_without_spaces) + 1]; |
- memset(padded_text, ' ', block_size * length_without_spaces); |
- char* padded_text_ptr = padded_text; |
- if (!no_initial_padding) { |
- padded_text_ptr += block_size - 1; |
- } |
- for (size_t i = 0; i < length_without_spaces; ++i) { |
- *padded_text_ptr = string_without_spaces[i]; |
- padded_text_ptr += block_size; |
- } |
- *(padded_text_ptr - block_size + 1) = '\0'; |
- *result = padded_text; |
- } |
- |
- // These functions iterate through the decoded output and expect |
- // simple elements: bytes or variable-length integers. |
- void ExpectByte(char byte) { |
- EXPECT_GT(diff_.size(), verify_position_); |
- EXPECT_EQ(byte, diff_[verify_position_]); |
- ++verify_position_; |
- } |
- |
- size_t ExpectVarint(int32_t expected_value) { |
- EXPECT_GT(diff_.size(), verify_position_); |
- const char* const original_position = &diff_[verify_position_]; |
- const char* new_position = original_position; |
- const size_t expected_length = VarintBE<int32_t>::Length(expected_value); |
- int32_t parsed_value = VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), |
- &new_position); |
- EXPECT_LE(0, parsed_value); |
- size_t parsed_length = new_position - original_position; |
- EXPECT_EQ(expected_value, parsed_value); |
- EXPECT_EQ(expected_length, parsed_length); |
- verify_position_ += parsed_length; |
- return parsed_length; |
- } |
- |
- size_t ExpectSize(size_t size) { |
- return ExpectVarint(static_cast<int32_t>(size)); |
- } |
- |
- size_t ExpectStringLength(const char* s) { |
- return ExpectSize(strlen(s)); |
- } |
- |
- void SkipVarint() { |
- EXPECT_GT(diff_.size(), verify_position_); |
- const char* const original_position = &diff_[verify_position_]; |
- const char* new_position = original_position; |
- VarintBE<int32_t>::Parse(diff_.data() + diff_.size(), &new_position); |
- size_t parsed_length = new_position - original_position; |
- verify_position_ += parsed_length; |
- } |
- |
- void ExpectDataByte(char byte) { |
- ExpectByte(byte); |
- if (interleaved_) { |
- ++instruction_bytes_; |
- } else { |
- ++data_bytes_; |
- } |
- } |
- |
- void ExpectDataString(const char *expected_string) { |
- const char* ptr = expected_string; |
- while (*ptr) { |
- ExpectDataByte(*ptr); |
- ++ptr; |
- } |
- } |
- |
- void ExpectDataStringWithBlockSpacing(const char *expected_string, |
- bool trailing_spaces) { |
- const char* ptr = expected_string; |
- while (*ptr) { |
- for (int i = 0; i < (kBlockSize - 1); ++i) { |
- ExpectDataByte(' '); |
- } |
- ExpectDataByte(*ptr); |
- ++ptr; |
- } |
- if (trailing_spaces) { |
- for (int i = 0; i < (kBlockSize - 1); ++i) { |
- ExpectDataByte(' '); |
- } |
- } |
- } |
- |
- void ExpectInstructionByte(char byte) { |
- ExpectByte(byte); |
- ++instruction_bytes_; |
- } |
- |
- void ExpectInstructionVarint(int32_t value) { |
- instruction_bytes_ += ExpectVarint(value); |
- } |
- |
- void ExpectAddressByte(char byte) { |
- ExpectByte(byte); |
- if (interleaved_) { |
- ++instruction_bytes_; |
- } else { |
- ++address_bytes_; |
- } |
- } |
- |
- void ExpectAddressVarint(int32_t value) { |
- if (interleaved_) { |
- instruction_bytes_ += ExpectVarint(value); |
- } else { |
- address_bytes_ += ExpectVarint(value); |
- } |
- } |
- |
- // The following functions leverage the fact that the encoder uses |
- // the default code table and cache sizes. They are able to search for |
- // instructions of a particular size. The logic for mapping from |
- // instruction type, mode, and size to opcode value is very different here |
- // from the logic used in encodetable.{h,cc}, so hopefully |
- // this version will help validate that the other is correct. |
- // This version uses conditional statements, while encodetable.h |
- // looks up values in a mapping table. |
- void ExpectAddress(int32_t address, int copy_mode) { |
- if ((copy_mode >= default_cache_.FirstSameMode()) && |
- (copy_mode <= default_cache_.LastMode())) { |
- ExpectAddressByte(address); |
- } else { |
- ExpectAddressVarint(address); |
- } |
- } |
- |
- void ExpectAddInstruction(int size) { |
- if (size <= 18) { |
- ExpectInstructionByte(0x01 + size); |
- } else { |
- ExpectInstructionByte(0x01); |
- ExpectInstructionVarint(size); |
- } |
- } |
- |
- void ExpectCopyInstruction(int size, int mode) { |
- if ((size >= 4) && (size <= 16)) { |
- ExpectInstructionByte(0x10 + (0x10 * mode) + size); |
- } else { |
- ExpectInstructionByte(0x13 + (0x10 * mode)); |
- ExpectInstructionVarint(size); |
- } |
- ExpectMatch(size); |
- } |
- |
- bool ExpectAddCopyInstruction(int add_size, int copy_size, int copy_mode) { |
- if ((copy_mode < default_cache_.FirstSameMode()) && |
- (add_size <= 4) && |
- (copy_size >= 4) && |
- (copy_size <= 6)) { |
- ExpectInstructionByte(0x9C + |
- (0x0C * copy_mode) + |
- (0x03 * add_size) + |
- copy_size); |
- ExpectMatch(copy_size); |
- return true; |
- } else if ((copy_mode >= default_cache_.FirstSameMode()) && |
- (add_size <= 4) && |
- (copy_size == 4)) { |
- ExpectInstructionByte(0xD2 + (0x04 * copy_mode) + add_size); |
- ExpectMatch(copy_size); |
- return true; |
- } else { |
- ExpectAddInstruction(add_size); |
- return false; |
- } |
- } |
- |
- void ExpectAddInstructionForStringLength(const char* s) { |
- ExpectAddInstruction(static_cast<int>(strlen(s))); |
- } |
- |
- // Call this function before beginning to iterate through the diff string |
- // using the Expect... functions. |
- // text must be NULL-terminated. |
- void VerifyHeaderForDictionaryAndTargetText(const char* dictionary, |
- const char* target_text) { |
- ExpectByte(0x01); // Win_Indicator: VCD_SOURCE (dictionary) |
- ExpectStringLength(dictionary); |
- ExpectByte(0x00); // Source segment position: start of dictionary |
- saved_total_size_position_ = verify_position_; |
- SkipVarint(); // Length of the delta encoding |
- saved_delta_encoding_position_ = verify_position_; |
- ExpectStringLength(target_text); |
- ExpectByte(0x00); // Delta_indicator (no compression) |
- saved_section_sizes_position_ = verify_position_; |
- SkipVarint(); // length of data for ADDs and RUNs |
- SkipVarint(); // length of instructions section |
- SkipVarint(); // length of addresses for COPYs |
- } |
- |
- // Call this function before beginning to iterating through the entire |
- // diff string using the Expect... functions. It makes sure that the |
- // size totals in the window header match the number of bytes that |
- // were parsed. |
- void VerifySizes() { |
- EXPECT_EQ(verify_position_, diff_.size()); |
- const size_t delta_encoding_size = verify_position_ - |
- saved_delta_encoding_position_; |
- verify_position_ = saved_total_size_position_; |
- ExpectSize(delta_encoding_size); |
- verify_position_ = saved_section_sizes_position_; |
- ExpectSize(data_bytes_); |
- ExpectSize(instruction_bytes_); |
- ExpectSize(address_bytes_); |
- } |
- |
- void ExpectMatch(size_t match_size) { |
- if (match_size >= expected_match_counts_.size()) { |
- // Be generous to avoid resizing again |
- expected_match_counts_.resize(match_size * 2, 0); |
- } |
- ++expected_match_counts_[match_size]; |
- } |
- |
- void VerifyMatchCounts() { |
- EXPECT_TRUE(std::equal(expected_match_counts_.begin(), |
- expected_match_counts_.end(), |
- actual_match_counts_.begin())); |
- } |
- |
- bool interleaved_; |
- string diff_; |
- OutputString<string> diff_output_string_; |
- size_t verify_position_; |
- size_t saved_total_size_position_; |
- size_t saved_delta_encoding_position_; |
- size_t saved_section_sizes_position_; |
- size_t data_bytes_; |
- size_t instruction_bytes_; |
- size_t address_bytes_; |
- VCDiffAddressCache default_cache_; // Used for finding mode values |
- std::vector<int> expected_match_counts_; |
- std::vector<int> actual_match_counts_; |
-}; |
- |
-class VCDiffEngineTest : public VCDiffEngineTestBase { |
- protected: |
- VCDiffEngineTest() : |
- engine_(dictionary_, strlen(dictionary_)) { |
- EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); |
- } |
- |
- virtual ~VCDiffEngineTest() { } |
- |
- |
- static void SetUpTestCase() { |
- MakeEachLetterABlock(dictionary_without_spaces_, &dictionary_, |
- kBlockSize, false); |
- MakeEachLetterABlock(target_without_spaces_, &target_, kBlockSize, false); |
- } |
- |
- static void TearDownTestCase() { |
- delete[] dictionary_; |
- delete[] target_; |
- } |
- |
- void EncodeNothing(bool interleaved, bool target_matching) { |
- interleaved_ = interleaved; |
- VCDiffCodeTableWriter coder(interleaved); |
- engine_.Encode("", 0, target_matching, &diff_output_string_, &coder); |
- EXPECT_TRUE(diff_.empty()); |
- actual_match_counts_ = coder.match_counts(); |
- } |
- |
- // text must be NULL-terminated |
- void EncodeText(const char* text, bool interleaved, bool target_matching) { |
- interleaved_ = interleaved; |
- VCDiffCodeTableWriter coder(interleaved); |
- engine_.Encode(text, |
- strlen(text), |
- target_matching, |
- &diff_output_string_, |
- &coder); |
- actual_match_counts_ = coder.match_counts(); |
- } |
- |
- void Encode(bool interleaved, bool target_matching) { |
- EncodeText(target_, interleaved, target_matching); |
- VerifyHeader(); |
- } |
- |
- void VerifyHeader() { |
- VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); |
- } |
- |
- static const char dictionary_without_spaces_[]; |
- static const char target_without_spaces_[]; |
- |
- static const char* dictionary_; |
- static const char* target_; |
- |
- const VCDiffEngine engine_; |
-}; |
- |
-#ifdef GTEST_HAS_DEATH_TEST |
-typedef VCDiffEngineTest VCDiffEngineDeathTest; |
-#endif // GTEST_HAS_DEATH_TEST |
- |
-const char VCDiffEngineTest::dictionary_without_spaces_[] = |
- "The only thing we have to fear is fear itself"; |
- |
-const char VCDiffEngineTest::target_without_spaces_[] = |
- "What we hear is fearsome"; |
- |
-const char* VCDiffEngineTest::dictionary_ = NULL; |
-const char* VCDiffEngineTest::target_ = NULL; |
- |
-#ifdef GTEST_HAS_DEATH_TEST |
-TEST_F(VCDiffEngineDeathTest, InitCalledTwice) { |
- EXPECT_DEBUG_DEATH(EXPECT_FALSE(const_cast<VCDiffEngine*>(&engine_)->Init()), |
- "twice"); |
-} |
-#endif // GTEST_HAS_DEATH_TEST |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeNothing) { |
- EncodeNothing(/* interleaved = */ false, /* target matching = */ false); |
-} |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeNothingInterleaved) { |
- EncodeNothing(/* interleaved = */ true, /* target matching = */ false); |
-} |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeNothingTarget) { |
- EncodeNothing(/* interleaved = */ false, /* target matching = */ true); |
-} |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeNothingTargetInterleaved) { |
- EncodeNothing(/* interleaved = */ true, /* target matching = */ true); |
-} |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlock) { |
- const char* small_text = " "; |
- EncodeText(small_text, |
- /* interleaved = */ false, |
- /* target matching = */ false); |
- VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); |
- // Data for ADDs |
- ExpectDataString(small_text); |
- // Instructions and sizes |
- ExpectAddInstructionForStringLength(small_text); |
-} |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeSmallerThanOneBlockInterleaved) { |
- const char* small_text = " "; |
- EncodeText(small_text, |
- /* interleaved = */ true, |
- /* target matching = */ false); |
- VerifyHeaderForDictionaryAndTargetText(dictionary_, small_text); |
- // Interleaved section |
- ExpectAddInstructionForStringLength(small_text); |
- ExpectDataString(small_text); |
-} |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeSampleText) { |
- Encode(/* interleaved = */ false, /* target matching = */ false); |
- // Data for ADDs |
- ExpectDataStringWithBlockSpacing("W", false); |
- ExpectDataByte('t'); |
- ExpectDataByte('s'); |
- if (kBlockSize < 4) { |
- ExpectDataStringWithBlockSpacing("ome", false); |
- } else { |
- ExpectDataByte('m'); |
- } |
- // Instructions and sizes |
- if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, |
- VCD_SELF_MODE)) { |
- ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); |
- } |
- ExpectAddInstruction(1); |
- ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); |
- ExpectCopyInstruction(11 * kBlockSize, |
- default_cache_.FirstNearMode()); |
- if (kBlockSize < 4) { |
- // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1 |
- // are too small to be selected |
- ExpectAddInstruction((3 * kBlockSize) + 1); |
- } else { |
- if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { |
- ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); |
- } |
- if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { |
- ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); |
- } |
- } |
- // Addresses for COPY |
- ExpectAddressVarint(18 * kBlockSize); // "ha" |
- ExpectAddressVarint(14 * kBlockSize); // " we h" |
- ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" |
- if (kBlockSize >= 4) { |
- ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" |
- ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" |
- } |
- VerifySizes(); |
-} |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleaved) { |
- Encode(/* interleaved = */ true, /* target matching = */ false); |
- // Interleaved section |
- if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, |
- VCD_SELF_MODE)) { |
- ExpectDataStringWithBlockSpacing("W", false); |
- ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); |
- } else { |
- ExpectDataStringWithBlockSpacing("W", false); |
- } |
- ExpectAddressVarint(18 * kBlockSize); // "ha" |
- ExpectAddInstruction(1); |
- ExpectDataByte('t'); |
- ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); |
- ExpectAddressVarint(14 * kBlockSize); // " we h" |
- ExpectCopyInstruction(11 * kBlockSize, |
- default_cache_.FirstNearMode()); |
- ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" |
- if (kBlockSize < 4) { |
- // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1 |
- // are too small to be selected |
- ExpectAddInstruction((3 * kBlockSize) + 1); |
- ExpectDataByte('s'); |
- ExpectDataStringWithBlockSpacing("ome", false); |
- } else { |
- if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { |
- ExpectDataByte('s'); |
- ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); |
- } else { |
- ExpectDataByte('s'); |
- } |
- ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" |
- if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { |
- ExpectDataByte('m'); |
- ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); |
- } else { |
- ExpectDataByte('m'); |
- } |
- ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" |
- } |
- VerifySizes(); |
-} |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeSampleTextWithTargetMatching) { |
- Encode(/* interleaved = */ false, /* target matching = */ true); |
- // Data for ADDs |
- ExpectDataStringWithBlockSpacing("W", false); |
- ExpectDataByte('t'); |
- ExpectDataByte('s'); |
- if (kBlockSize < 4) { |
- ExpectDataStringWithBlockSpacing("ome", false); |
- } else { |
- ExpectDataByte('m'); |
- } |
- // Instructions and sizes |
- if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, |
- VCD_SELF_MODE)) { |
- ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); |
- } |
- ExpectAddInstruction(1); |
- ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); |
- ExpectCopyInstruction(11 * kBlockSize, |
- default_cache_.FirstNearMode()); |
- if (kBlockSize < 4) { |
- // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1 |
- // are too small to be selected |
- ExpectAddInstruction((3 * kBlockSize) + 1); |
- } else { |
- if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { |
- ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); |
- } |
- if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { |
- ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); |
- } |
- } |
- // Addresses for COPY |
- ExpectAddressVarint(18 * kBlockSize); // "ha" |
- ExpectAddressVarint(14 * kBlockSize); // " we h" |
- ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" |
- if (kBlockSize >= 4) { |
- ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" |
- ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" |
- } |
- VerifySizes(); |
-} |
- |
-TEST_F(VCDiffEngineTest, EngineEncodeSampleTextInterleavedWithTargetMatching) { |
- Encode(/* interleaved = */ true, /* target matching = */ false); |
- // Interleaved section |
- if (!ExpectAddCopyInstruction(kBlockSize, (3 * kBlockSize) - 1, |
- VCD_SELF_MODE)) { |
- ExpectDataStringWithBlockSpacing("W", false); |
- ExpectCopyInstruction((3 * kBlockSize) - 1, VCD_SELF_MODE); |
- } else { |
- ExpectDataStringWithBlockSpacing("W", false); |
- } |
- ExpectAddressVarint(18 * kBlockSize); // "ha" |
- ExpectAddInstruction(1); |
- ExpectDataByte('t'); |
- ExpectCopyInstruction((6 * kBlockSize) - 1, VCD_SELF_MODE); |
- ExpectAddressVarint(14 * kBlockSize); // " we h" |
- ExpectCopyInstruction(11 * kBlockSize, |
- default_cache_.FirstNearMode()); |
- ExpectAddressVarint((9 * kBlockSize) + (kBlockSize - 1)); // "ear is fear" |
- if (kBlockSize < 4) { |
- // Copy instructions of size kBlockSize and (2 * kBlockSize) - 1 |
- // are too small to be selected |
- ExpectAddInstruction((3 * kBlockSize) + 1); |
- ExpectDataByte('s'); |
- ExpectDataStringWithBlockSpacing("ome", false); |
- } else { |
- if (!ExpectAddCopyInstruction(1, (2 * kBlockSize) - 1, VCD_SELF_MODE)) { |
- ExpectDataByte('s'); |
- ExpectCopyInstruction((2 * kBlockSize) - 1, VCD_SELF_MODE); |
- } else { |
- ExpectDataByte('s'); |
- } |
- ExpectAddressVarint(4 * kBlockSize); // "o" from "The only" |
- if (!ExpectAddCopyInstruction(1, kBlockSize, VCD_SELF_MODE)) { |
- ExpectDataByte('m'); |
- ExpectCopyInstruction(kBlockSize, VCD_SELF_MODE); |
- } else { |
- ExpectDataByte('m'); |
- } |
- ExpectAddressVarint(2 * kBlockSize); // "e" from "The only" |
- } |
- VerifySizes(); |
-} |
- |
-// This test case takes a dictionary containing several instances of the string |
-// "weasel", and a target string which is identical to the dictionary |
-// except that all instances of "weasel" have been replaced with the string |
-// "moon-pie". It tests that COPY instructions are generated for all |
-// boilerplate text (that is, the text between the "moon-pie" instances in |
-// the target) and, if target matching is enabled, that each instance of |
-// "moon-pie" (except the first one) is encoded using a COPY instruction |
-// rather than an ADD. |
-class WeaselsToMoonpiesTest : public VCDiffEngineTestBase { |
- protected: |
- // kCompressibleTestBlockSize: |
- // The size of the block to create for each letter in the |
- // dictionary and search string for the "compressible text" test. |
- // See MakeEachLetterABlock, below. |
- // If we use kCompressibleTestBlockSize = kBlockSize, then the |
- // encoder will find one match per unique letter in the HTML text. |
- // There are too many examples of "<" in the text for the encoder |
- // to iterate through them all, and some matches are not found. |
- // If we use kCompressibleTextBlockSize = 1, then the boilerplate |
- // text between "weasel" strings in the dictionary and "moon-pie" |
- // strings in the target may not be long enough to be found by |
- // the encoder's block-hash algorithm. A good value, that will give |
- // reproducible results across all block sizes, will be somewhere |
- // in between these extremes. |
- static const int kCompressibleTestBlockSize = |
- (kBlockSize < 4) ? 1 : kBlockSize / 4; |
- static const int kTrailingSpaces = kCompressibleTestBlockSize - 1; |
- |
- WeaselsToMoonpiesTest() : |
- engine_(dictionary_, strlen(dictionary_)), |
- match_index_(0), |
- search_dictionary_(dictionary_, strlen(dictionary_)), |
- copied_moonpie_address_(0) { |
- EXPECT_TRUE(const_cast<VCDiffEngine*>(&engine_)->Init()); |
- weasel_positions_[0] = 0; |
- after_weasel_[0] = 0; |
- moonpie_positions_[0] = 0; |
- after_moonpie_[0] = 0; |
- } |
- |
- virtual ~WeaselsToMoonpiesTest() { } |
- |
- static void SetUpTestCase() { |
- MakeEachLetterABlock(dictionary_without_spaces_, |
- &dictionary_, |
- kCompressibleTestBlockSize, |
- false); |
- MakeEachLetterABlock(target_without_spaces_, |
- &target_, |
- kCompressibleTestBlockSize, |
- false); |
- MakeEachLetterABlock(weasel_text_without_spaces_, |
- &weasel_text_, |
- kCompressibleTestBlockSize, |
- true); |
- MakeEachLetterABlock(moonpie_text_without_spaces_, |
- &moonpie_text_, |
- kCompressibleTestBlockSize, |
- true); |
- } |
- |
- static void TearDownTestCase() { |
- delete[] dictionary_; |
- delete[] target_; |
- delete[] weasel_text_; |
- delete[] moonpie_text_; |
- } |
- |
- // text must be NULL-terminated |
- void EncodeText(const char* text, bool interleaved, bool target_matching) { |
- interleaved_ = interleaved; |
- VCDiffCodeTableWriter coder(interleaved); |
- engine_.Encode(text, |
- strlen(text), |
- target_matching, |
- &diff_output_string_, |
- &coder); |
- actual_match_counts_ = coder.match_counts(); |
- } |
- |
- void Encode(bool interleaved, bool target_matching) { |
- EncodeText(target_, interleaved, target_matching); |
- VerifyHeader(); |
- } |
- |
- void VerifyHeader() { |
- VerifyHeaderForDictionaryAndTargetText(dictionary_, target_); |
- } |
- |
- void ExpectCopyForSize(size_t size, int mode) { |
- ExpectCopyInstruction(static_cast<int>(size), mode); |
- } |
- |
- void ExpectAddForSize(size_t size) { |
- ExpectAddInstruction(static_cast<int>(size)); |
- } |
- |
- void ExpectAddressVarintForSize(size_t value) { |
- ExpectAddressVarint(static_cast<int32_t>(value)); |
- } |
- |
- void FindNextMoonpie(bool include_trailing_spaces) { |
- ++match_index_; |
- SetCurrentWeaselPosition(search_dictionary_.find(weasel_text_, |
- AfterLastWeasel())); |
- if (CurrentWeaselPosition() == string::npos) { |
- SetCurrentMoonpiePosition(string::npos); |
- } else { |
- SetCurrentAfterWeaselPosition(CurrentWeaselPosition() |
- + strlen(weasel_text_) |
- + (include_trailing_spaces ? |
- kTrailingSpaces : 0)); |
- SetCurrentMoonpiePosition(AfterLastMoonpie() |
- + CurrentBoilerplateLength()); |
- SetCurrentAfterMoonpiePosition(CurrentMoonpiePosition() |
- + strlen(moonpie_text_) |
- + (include_trailing_spaces ? |
- kTrailingSpaces : 0)); |
- } |
- } |
- bool NoMoreMoonpies() const { |
- return CurrentMoonpiePosition() == string::npos; |
- } |
- size_t CurrentWeaselPosition() const { |
- return weasel_positions_[match_index_]; |
- } |
- size_t LastWeaselPosition() const { |
- return weasel_positions_[match_index_ - 1]; |
- } |
- size_t CurrentMoonpiePosition() const { |
- return moonpie_positions_[match_index_]; |
- } |
- size_t LastMoonpiePosition() const { |
- return moonpie_positions_[match_index_ - 1]; |
- } |
- size_t AfterLastWeasel() const { |
- CHECK_GE(match_index_, 1); |
- return after_weasel_[match_index_ - 1]; |
- } |
- size_t AfterPreviousWeasel() const { |
- CHECK_GE(match_index_, 2); |
- return after_weasel_[match_index_ - 2]; |
- } |
- size_t AfterLastMoonpie() const { |
- CHECK_GE(match_index_, 1); |
- return after_moonpie_[match_index_ - 1]; |
- } |
- size_t AfterPreviousMoonpie() const { |
- CHECK_GE(match_index_, 2); |
- return after_moonpie_[match_index_ - 2]; |
- } |
- |
- void SetCurrentWeaselPosition(size_t value) { |
- weasel_positions_[match_index_] = value; |
- } |
- void SetCurrentAfterWeaselPosition(size_t value) { |
- after_weasel_[match_index_] = value; |
- } |
- void SetCurrentMoonpiePosition(size_t value) { |
- moonpie_positions_[match_index_] = value; |
- } |
- void SetCurrentAfterMoonpiePosition(size_t value) { |
- after_moonpie_[match_index_] = value; |
- } |
- |
- // Find the length of the text in between the "weasel" strings in the |
- // compressible dictionary, which is the same as the text between |
- // the "moon-pie" strings in the compressible target. |
- size_t CurrentBoilerplateLength() const { |
- CHECK_GE(match_index_, 1); |
- return CurrentWeaselPosition() - AfterLastWeasel(); |
- } |
- size_t DistanceFromLastWeasel() const { |
- CHECK_GE(match_index_, 1); |
- return CurrentWeaselPosition() - LastWeaselPosition(); |
- } |
- size_t DistanceFromLastMoonpie() const { |
- CHECK_GE(match_index_, 1); |
- return CurrentMoonpiePosition() - LastMoonpiePosition(); |
- } |
- size_t DistanceBetweenLastTwoWeasels() const { |
- CHECK_GE(match_index_, 2); |
- return AfterLastWeasel() - AfterPreviousWeasel(); |
- } |
- size_t DistanceBetweenLastTwoMoonpies() const { |
- CHECK_GE(match_index_, 2); |
- return AfterLastMoonpie() - AfterPreviousMoonpie(); |
- } |
- |
- int32_t FindBoilerplateAddressForCopyMode(int copy_mode) const; |
- int UpdateCopyModeForMoonpie(int copy_mode) const; |
- int32_t FindMoonpieAddressForCopyMode(int copy_mode) const; |
- |
- void CopyBoilerplateAndAddMoonpie(int copy_mode); |
- void CopyBoilerplateAndCopyMoonpie(int copy_mode, int moonpie_copy_mode); |
- |
- static const char dictionary_without_spaces_[]; |
- static const char target_without_spaces_[]; |
- static const char weasel_text_without_spaces_[]; |
- static const char moonpie_text_without_spaces_[]; |
- |
- static const char* dictionary_; |
- static const char* target_; |
- static const char* weasel_text_; |
- static const char* moonpie_text_; |
- |
- const VCDiffEngine engine_; |
- size_t weasel_positions_[128]; |
- size_t after_weasel_[128]; |
- size_t moonpie_positions_[128]; |
- size_t after_moonpie_[128]; |
- int match_index_; |
- string search_dictionary_; |
- size_t copied_moonpie_address_; |
-}; |
- |
-// Care is taken in the formulation of the dictionary |
-// to ensure that the surrounding letters do not match; for example, |
-// there are not two instances of the string "weasels". Otherwise, |
-// the matching behavior would not be as predictable. |
-const char WeaselsToMoonpiesTest::dictionary_without_spaces_[] = |
- "<html>\n" |
- "<head>\n" |
- "<meta content=\"text/html; charset=ISO-8859-1\"\n" |
- "http-equiv=\"content-type\">\n" |
- "<title>All about weasels</title>\n" |
- "</head>\n" |
- "<!-- You will notice that the word \"weasel\" may be replaced" |
- " with something else -->\n" |
- "<body>\n" |
- "<h1>All about the weasel: highly compressible HTML text</h1>" |
- "<ul>\n" |
- "<li>Don\'t look a gift weasel in its mouth.</li>\n" |
- "<li>This item makes sure the next occurrence is found.</li>\n" |
- "<li>Don\'t count your weasel, before it\'s hatched.</li>\n" |
- "</ul>\n" |
- "<br>\n" |
- "</body>\n" |
- "</html>\n"; |
- |
-const char WeaselsToMoonpiesTest::target_without_spaces_[] = |
- "<html>\n" |
- "<head>\n" |
- "<meta content=\"text/html; charset=ISO-8859-1\"\n" |
- "http-equiv=\"content-type\">\n" |
- "<title>All about moon-pies</title>\n" |
- "</head>\n" |
- "<!-- You will notice that the word \"moon-pie\" may be replaced" |
- " with something else -->\n" |
- "<body>\n" |
- "<h1>All about the moon-pie: highly compressible HTML text</h1>" |
- "<ul>\n" |
- "<li>Don\'t look a gift moon-pie in its mouth.</li>\n" |
- "<li>This item makes sure the next occurrence is found.</li>\n" |
- "<li>Don\'t count your moon-pie, before it\'s hatched.</li>\n" |
- "</ul>\n" |
- "<br>\n" |
- "</body>\n" |
- "</html>\n"; |
- |
-const char WeaselsToMoonpiesTest::weasel_text_without_spaces_[] = "weasel"; |
-const char WeaselsToMoonpiesTest::moonpie_text_without_spaces_[] = "moon-pie"; |
- |
-const char* WeaselsToMoonpiesTest::dictionary_ = NULL; |
-const char* WeaselsToMoonpiesTest::target_ = NULL; |
-const char* WeaselsToMoonpiesTest::weasel_text_ = NULL; |
-const char* WeaselsToMoonpiesTest::moonpie_text_ = NULL; |
- |
-int32_t WeaselsToMoonpiesTest::FindBoilerplateAddressForCopyMode( |
- int copy_mode) const { |
- size_t copy_address = 0; |
- if (copy_mode == VCD_SELF_MODE) { |
- copy_address = AfterLastWeasel(); |
- } else if ((copy_mode >= default_cache_.FirstNearMode()) && |
- (copy_mode < default_cache_.FirstSameMode())) { |
- copy_address = DistanceBetweenLastTwoWeasels(); |
- } else if ((copy_mode >= default_cache_.FirstSameMode()) && |
- (copy_mode <= default_cache_.LastMode())) { |
- copy_address = AfterLastWeasel() % 256; |
- } else { |
- LOG(FATAL) << "Unexpected copy mode " << copy_mode; |
- } |
- return static_cast<int32_t>(copy_address); |
-} |
- |
-int WeaselsToMoonpiesTest::UpdateCopyModeForMoonpie(int copy_mode) const { |
- if (copy_mode == default_cache_.FirstSameMode()) { |
- return default_cache_.FirstSameMode() |
- + static_cast<int>((copied_moonpie_address_ / 256) % 3); |
- } else { |
- return copy_mode; |
- } |
-} |
- |
-int32_t WeaselsToMoonpiesTest::FindMoonpieAddressForCopyMode( |
- int copy_mode) const { |
- size_t copy_address = 0; |
- if (copy_mode == VCD_HERE_MODE) { |
- copy_address = DistanceFromLastMoonpie(); |
- } else if ((copy_mode >= default_cache_.FirstNearMode()) && |
- (copy_mode < default_cache_.FirstSameMode())) { |
- copy_address = DistanceBetweenLastTwoMoonpies() - kTrailingSpaces; |
- } else if ((copy_mode >= default_cache_.FirstSameMode()) && |
- (copy_mode <= default_cache_.LastMode())) { |
- copy_address = copied_moonpie_address_ % 256; |
- } else { |
- LOG(FATAL) << "Unexpected copy mode " << copy_mode; |
- } |
- return static_cast<int32_t>(copy_address); |
-} |
- |
-// Expect one dictionary instance of "weasel" to be replaced with "moon-pie" |
-// in the encoding. |
-void WeaselsToMoonpiesTest::CopyBoilerplateAndAddMoonpie(int copy_mode) { |
- EXPECT_FALSE(NoMoreMoonpies()); |
- ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); |
- ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); |
- ExpectAddInstructionForStringLength(moonpie_text_); |
- ExpectDataString(moonpie_text_); |
-} |
- |
-// Expect one dictionary instance of "weasel" to be replaced with "moon-pie" |
-// in the encoding. The "moon-pie" text will be copied from the previously |
-// encoded target. |
-void WeaselsToMoonpiesTest::CopyBoilerplateAndCopyMoonpie( |
- int copy_mode, |
- int moonpie_copy_mode) { |
- EXPECT_FALSE(NoMoreMoonpies()); |
- ExpectCopyForSize(CurrentBoilerplateLength(), copy_mode); |
- ExpectAddress(FindBoilerplateAddressForCopyMode(copy_mode), copy_mode); |
- moonpie_copy_mode = UpdateCopyModeForMoonpie(moonpie_copy_mode); |
- ExpectCopyForSize(strlen(moonpie_text_) + kTrailingSpaces, moonpie_copy_mode); |
- ExpectAddress(FindMoonpieAddressForCopyMode(moonpie_copy_mode), |
- moonpie_copy_mode); |
-} |
- |
-TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleNoTargetMatching) { |
- Encode(/* interleaved = */ true, /* target matching = */ false); |
- FindNextMoonpie(false); |
- // Expect all five "weasel"s to be replaced with "moon-pie"s |
- CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); |
- FindNextMoonpie(false); |
- CopyBoilerplateAndAddMoonpie(VCD_SELF_MODE); |
- FindNextMoonpie(false); |
- CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 1); |
- FindNextMoonpie(false); |
- CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 2); |
- FindNextMoonpie(false); |
- CopyBoilerplateAndAddMoonpie(default_cache_.FirstNearMode() + 3); |
- FindNextMoonpie(false); |
- EXPECT_TRUE(NoMoreMoonpies()); |
- ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), |
- default_cache_.FirstNearMode()); |
- ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); |
- VerifySizes(); |
-} |
- |
-TEST_F(WeaselsToMoonpiesTest, EngineEncodeCompressibleWithTargetMatching) { |
- Encode(/* interleaved = */ true, /* target matching = */ true); |
- // Expect all five "weasel"s to be replaced with "moon-pie"s. |
- // Every "moon-pie" after the first one should be copied from the |
- // previously encoded target text. |
- FindNextMoonpie(false); |
- CopyBoilerplateAndAddMoonpie(default_cache_.FirstSameMode()); |
- FindNextMoonpie(true); |
- CopyBoilerplateAndCopyMoonpie(VCD_SELF_MODE, VCD_HERE_MODE); |
- if (kBlockSize <= 4) { |
- copied_moonpie_address_ = strlen(dictionary_) + LastMoonpiePosition(); |
- FindNextMoonpie(true); |
- CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, |
- default_cache_.FirstSameMode()); |
- } else { // kBlockSize > 4 |
- copied_moonpie_address_ = strlen(dictionary_) + CurrentMoonpiePosition(); |
- FindNextMoonpie(true); |
- CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, |
- default_cache_.FirstNearMode() + 2); |
- } |
- LOG(INFO) << "copied_moonpie_address_ : " |
- << copied_moonpie_address_ << LOG_ENDL; |
- FindNextMoonpie(true); |
- CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 3, |
- default_cache_.FirstSameMode()); |
- FindNextMoonpie(true); |
- CopyBoilerplateAndCopyMoonpie(default_cache_.FirstNearMode() + 1, |
- default_cache_.FirstSameMode()); |
- FindNextMoonpie(true); |
- EXPECT_TRUE(NoMoreMoonpies()); |
- ExpectCopyForSize(strlen(dictionary_) - AfterLastWeasel(), |
- default_cache_.FirstNearMode() + 3); |
- ExpectAddressVarintForSize(DistanceBetweenLastTwoWeasels()); |
- VerifySizes(); |
-} |
- |
-} // anonymous namespace |
-} // namespace open-vcdiff |