| Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/blockhash_test.cc
|
| ===================================================================
|
| --- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/blockhash_test.cc (revision 2678)
|
| +++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/blockhash_test.cc (working copy)
|
| @@ -1,896 +0,0 @@
|
| -// Copyright 2008 Google Inc.
|
| -// Author: Lincoln Smith
|
| -//
|
| -// Licensed under the Apache License, Version 2.0 (the "License");
|
| -// you may not use this file except in compliance with the License.
|
| -// You may obtain a copy of the License at
|
| -//
|
| -// http://www.apache.org/licenses/LICENSE-2.0
|
| -//
|
| -// Unless required by applicable law or agreed to in writing, software
|
| -// distributed under the License is distributed on an "AS IS" BASIS,
|
| -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| -// See the License for the specific language governing permissions and
|
| -// limitations under the License.
|
| -
|
| -#include <config.h>
|
| -#include "blockhash.h"
|
| -#include <climits> // INT_MIN
|
| -#include <memory> // auto_ptr
|
| -#include "encodetable.h"
|
| -#include "logging.h"
|
| -#include "rolling_hash.h"
|
| -#include "testing.h"
|
| -
|
| -namespace open_vcdiff {
|
| -
|
| -const int kBlockSize = BlockHash::kBlockSize;
|
| -
|
| -class BlockHashTest : public testing::Test {
|
| - protected:
|
| - static const int kTimingTestSize = 1 << 21; // 2M
|
| - static const int kTimingTestIterations = 32;
|
| -
|
| - BlockHashTest() {
|
| - dh_.reset(BlockHash::CreateDictionaryHash(sample_text,
|
| - strlen(sample_text)));
|
| - th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0));
|
| - EXPECT_TRUE(dh_.get() != NULL);
|
| - EXPECT_TRUE(th_.get() != NULL);
|
| - }
|
| -
|
| - // BlockHashTest is a friend to BlockHash. Expose the protected functions
|
| - // that will be tested by the children of BlockHashTest.
|
| - static bool BlockContentsMatch(const char* block1, const char* block2) {
|
| - return BlockHash::BlockContentsMatch(block1, block2);
|
| - }
|
| -
|
| - int FirstMatchingBlock(const BlockHash& block_hash,
|
| - uint32_t hash_value,
|
| - const char* block_ptr) const {
|
| - return block_hash.FirstMatchingBlock(hash_value, block_ptr);
|
| - }
|
| -
|
| - int NextMatchingBlock(const BlockHash& block_hash,
|
| - int block_number,
|
| - const char* block_ptr) const {
|
| - return block_hash.NextMatchingBlock(block_number, block_ptr);
|
| - }
|
| -
|
| - static int MatchingBytesToLeft(const char* source_match_start,
|
| - const char* target_match_start,
|
| - int max_bytes) {
|
| - return BlockHash::MatchingBytesToLeft(source_match_start,
|
| - target_match_start,
|
| - max_bytes);
|
| - }
|
| -
|
| - static int MatchingBytesToRight(const char* source_match_end,
|
| - const char* target_match_end,
|
| - int max_bytes) {
|
| - return BlockHash::MatchingBytesToRight(source_match_end,
|
| - target_match_end,
|
| - max_bytes);
|
| - }
|
| -
|
| - static int StringLengthAsInt(const char* s) {
|
| - return static_cast<int>(strlen(s));
|
| - }
|
| -
|
| - void InitBlocksToDifferAtNthByte(int n) {
|
| - CHECK(n < kBlockSize);
|
| - memset(compare_buffer_1_, 0xBE, kTimingTestSize);
|
| - memset(compare_buffer_2_, 0xBE, kTimingTestSize);
|
| - for (int index = n; index < kTimingTestSize; index += kBlockSize) {
|
| - compare_buffer_1_[index] = 0x00;
|
| - compare_buffer_2_[index] = 0x01;
|
| - }
|
| - }
|
| -
|
| - void TestAndPrintTimesForCompareFunctions(bool should_be_identical) {
|
| - CHECK(compare_buffer_1_ != NULL);
|
| - CHECK(compare_buffer_2_ != NULL);
|
| - // Prime the memory cache.
|
| - prime_result_ =
|
| - memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize);
|
| - const char* const block1_limit =
|
| - &compare_buffer_1_[kTimingTestSize - kBlockSize];
|
| - int memcmp_result = 0;
|
| - CycleTimer memcmp_timer;
|
| - memcmp_timer.Start();
|
| - for (int i = 0; i < kTimingTestIterations; ++i) {
|
| - const char* block1 = compare_buffer_1_;
|
| - const char* block2 = compare_buffer_2_;
|
| - while (block1 < block1_limit) {
|
| - if (memcmp(block1, block2, kBlockSize) != 0) {
|
| - ++memcmp_result;
|
| - }
|
| - block1 += kBlockSize;
|
| - block2 += kBlockSize;
|
| - }
|
| - }
|
| - memcmp_timer.Stop();
|
| - double time_for_memcmp = static_cast<double>(memcmp_timer.GetInUsec())
|
| - / (kTimingTestSize * kTimingTestIterations);
|
| - int block_contents_match_result = 0;
|
| - CycleTimer block_contents_match_timer;
|
| - block_contents_match_timer.Start();
|
| - for (int i = 0; i < kTimingTestIterations; ++i) {
|
| - const char* block1 = compare_buffer_1_;
|
| - const char* block2 = compare_buffer_2_;
|
| - while (block1 < block1_limit) {
|
| - if (!BlockHash::BlockContentsMatch(block1, block2)) {
|
| - ++block_contents_match_result;
|
| - }
|
| - block1 += kBlockSize;
|
| - block2 += kBlockSize;
|
| - }
|
| - }
|
| - block_contents_match_timer.Stop();
|
| - double time_for_block_contents_match =
|
| - static_cast<double>(block_contents_match_timer.GetInUsec())
|
| - / (kTimingTestSize * kTimingTestIterations);
|
| - EXPECT_EQ(block_contents_match_result, memcmp_result);
|
| - if (should_be_identical) {
|
| - CHECK_EQ(0, memcmp_result);
|
| - } else {
|
| - CHECK_GT(memcmp_result, 0);
|
| - }
|
| - LOG(INFO) << "memcmp: " << time_for_memcmp << " us per operation"
|
| - << LOG_ENDL;
|
| - LOG(INFO) << "BlockHash::BlockContentsMatch: "
|
| - << time_for_block_contents_match << " us per operation"
|
| - << LOG_ENDL;
|
| - if (time_for_memcmp > 0) {
|
| - LOG(INFO) << "% change: "
|
| - << (((time_for_block_contents_match - time_for_memcmp)
|
| - / time_for_memcmp) * 100.0) << "%" << LOG_ENDL;
|
| - }
|
| -#ifdef NDEBUG
|
| - // Only check timings for optimized build
|
| - const double error_margin = 0.05;
|
| - EXPECT_GT(time_for_memcmp * (1.0 + error_margin),
|
| - time_for_block_contents_match);
|
| -#endif // NDEBUG
|
| - }
|
| -
|
| - void TimingTestForBlocksThatDifferAtByte(int n) {
|
| - InitBlocksToDifferAtNthByte(n);
|
| - LOG(INFO) << "Comparing blocks that differ at byte " << n << LOG_ENDL;
|
| - TestAndPrintTimesForCompareFunctions(false);
|
| - }
|
| -
|
| - // Copy sample_text_without_spaces and search_string_without_spaces
|
| - // into newly allocated sample_text and search_string buffers,
|
| - // but pad them with space characters so that every character
|
| - // in sample_text_without_spaces matches (kBlockSize - 1)
|
| - // space characters in sample_text, followed by that character.
|
| - // For example:
|
| - // Since sample_text_without_spaces begins "The only thing"...,
|
| - // if kBlockSize is 4, then 3 space characters will be inserted
|
| - // between each letter of sample_text, as follows:
|
| - // " T h e o n l y t h i n g"...
|
| - // This makes testing simpler, because finding a kBlockSize-byte match
|
| - // between the sample text and search string only depends on the
|
| - // trailing letter in each block.
|
| - static void MakeEachLetterABlock(const char* string_without_spaces,
|
| - const char** result) {
|
| - const size_t length_without_spaces = strlen(string_without_spaces);
|
| - char* padded_text = new char[(kBlockSize * length_without_spaces) + 1];
|
| - memset(padded_text, ' ', kBlockSize * length_without_spaces);
|
| - char* padded_text_ptr = padded_text + (kBlockSize - 1);
|
| - for (size_t i = 0; i < length_without_spaces; ++i) {
|
| - *padded_text_ptr = string_without_spaces[i];
|
| - padded_text_ptr += kBlockSize;
|
| - }
|
| - padded_text[kBlockSize * length_without_spaces] = '\0';
|
| - *result = padded_text;
|
| - }
|
| -
|
| - static void SetUpTestCase() {
|
| - MakeEachLetterABlock(sample_text_without_spaces, &sample_text);
|
| - MakeEachLetterABlock(search_string_without_spaces, &search_string);
|
| - MakeEachLetterABlock(search_string_altered_without_spaces,
|
| - &search_string_altered);
|
| - MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string);
|
| - MakeEachLetterABlock(search_to_beginning_without_spaces,
|
| - &search_to_beginning_string);
|
| - MakeEachLetterABlock(sample_text_many_matches_without_spaces,
|
| - &sample_text_many_matches);
|
| - MakeEachLetterABlock(search_string_many_matches_without_spaces,
|
| - &search_string_many_matches);
|
| - MakeEachLetterABlock("y", &test_string_y);
|
| - MakeEachLetterABlock("e", &test_string_e);
|
| - char* new_test_string_unaligned_e = new char[kBlockSize];
|
| - memset(new_test_string_unaligned_e, ' ', kBlockSize);
|
| - new_test_string_unaligned_e[kBlockSize - 2] = 'e';
|
| - test_string_unaligned_e = new_test_string_unaligned_e;
|
| - char* new_test_string_all_Qs = new char[kBlockSize];
|
| - memset(new_test_string_all_Qs, 'Q', kBlockSize);
|
| - test_string_all_Qs = new_test_string_all_Qs;
|
| - hashed_y = RollingHash<kBlockSize>::Hash(test_string_y);
|
| - hashed_e = RollingHash<kBlockSize>::Hash(test_string_e);
|
| - hashed_f =
|
| - RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]);
|
| - hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e);
|
| - hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs);
|
| - }
|
| -
|
| - static void TearDownTestCase() {
|
| - delete[] sample_text;
|
| - delete[] search_string;
|
| - delete[] search_string_altered;
|
| - delete[] search_to_end_string;
|
| - delete[] search_to_beginning_string;
|
| - delete[] sample_text_many_matches;
|
| - delete[] search_string_many_matches;
|
| - delete[] test_string_y;
|
| - delete[] test_string_e;
|
| - delete[] test_string_unaligned_e;
|
| - delete[] test_string_all_Qs;
|
| - }
|
| -
|
| - // Each block in the sample text and search string is kBlockSize bytes long,
|
| - // and consists of (kBlockSize - 1) space characters
|
| - // followed by a single letter of text.
|
| -
|
| - // Block numbers of certain characters within the sample text:
|
| - // All six occurrences of "e", in order.
|
| - static const int block_of_first_e = 2;
|
| - static const int block_of_second_e = 16;
|
| - static const int block_of_third_e = 21;
|
| - static const int block_of_fourth_e = 27;
|
| - static const int block_of_fifth_e = 35;
|
| - static const int block_of_sixth_e = 42;
|
| -
|
| - static const int block_of_y_in_only = 7;
|
| - // The block number is multiplied by kBlockSize to arrive at the
|
| - // index, which points to the (kBlockSize - 1) space characters before
|
| - // the letter specified.
|
| - // Indices of certain characters within the sample text.
|
| - static const int index_of_first_e = block_of_first_e * kBlockSize;
|
| - static const int index_of_fourth_e = block_of_fourth_e * kBlockSize;
|
| - static const int index_of_sixth_e = block_of_sixth_e * kBlockSize;
|
| - static const int index_of_y_in_only = block_of_y_in_only * kBlockSize;
|
| - static const int index_of_space_before_fear_is_fear = 25 * kBlockSize;
|
| - static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize;
|
| - static const int index_of_i_in_fear_is_fear = 31 * kBlockSize;
|
| - static const int index_of_space_before_fear_itself = 33 * kBlockSize;
|
| - static const int index_of_space_before_itself = 38 * kBlockSize;
|
| - static const int index_of_ababc = 4 * kBlockSize;
|
| -
|
| - // Indices of certain characters within the search strings.
|
| - static const int index_of_second_w_in_what_we = 5 * kBlockSize;
|
| - static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize;
|
| - static const int index_of_f_in_fearsome = 16 * kBlockSize;
|
| - static const int index_of_space_in_eat_itself = 12 * kBlockSize;
|
| - static const int index_of_i_in_itself = 13 * kBlockSize;
|
| - static const int index_of_t_in_use_the = 4 * kBlockSize;
|
| - static const int index_of_o_in_online = 8 * kBlockSize;
|
| -
|
| - static const char sample_text_without_spaces[];
|
| - static const char search_string_without_spaces[];
|
| - static const char search_string_altered_without_spaces[];
|
| - static const char search_to_end_without_spaces[];
|
| - static const char search_to_beginning_without_spaces[];
|
| - static const char sample_text_many_matches_without_spaces[];
|
| - static const char search_string_many_matches_without_spaces[];
|
| -
|
| - static const char* sample_text;
|
| - static const char* search_string;
|
| - static const char* search_string_altered;
|
| - static const char* search_to_end_string;
|
| - static const char* search_to_beginning_string;
|
| - static const char* sample_text_many_matches;
|
| - static const char* search_string_many_matches;
|
| -
|
| - static const char* test_string_y;
|
| - static const char* test_string_e;
|
| - static const char* test_string_all_Qs;
|
| - static const char* test_string_unaligned_e;
|
| -
|
| - static uint32_t hashed_y;
|
| - static uint32_t hashed_e;
|
| - static uint32_t hashed_f;
|
| - static uint32_t hashed_unaligned_e;
|
| - static uint32_t hashed_all_Qs;
|
| -
|
| - // Boost scoped_ptr, if available, could be used instead of std::auto_ptr.
|
| - std::auto_ptr<const BlockHash> dh_; // hash table is populated at startup
|
| - std::auto_ptr<BlockHash> th_; // hash table not populated;
|
| - // used to test incremental adds
|
| -
|
| - BlockHash::Match best_match_;
|
| - char* compare_buffer_1_;
|
| - char* compare_buffer_2_;
|
| - int prime_result_;
|
| -};
|
| -
|
| -#ifdef GTEST_HAS_DEATH_TEST
|
| -typedef BlockHashTest BlockHashDeathTest;
|
| -#endif // GTEST_HAS_DEATH_TEST
|
| -
|
| -// The C++ standard requires a separate definition of these static const values,
|
| -// even though their initializers are given within the class definition.
|
| -const int BlockHashTest::block_of_first_e;
|
| -const int BlockHashTest::block_of_second_e;
|
| -const int BlockHashTest::block_of_third_e;
|
| -const int BlockHashTest::block_of_fourth_e;
|
| -const int BlockHashTest::block_of_fifth_e;
|
| -const int BlockHashTest::block_of_sixth_e;
|
| -const int BlockHashTest::block_of_y_in_only;
|
| -const int BlockHashTest::index_of_first_e;
|
| -const int BlockHashTest::index_of_fourth_e;
|
| -const int BlockHashTest::index_of_sixth_e;
|
| -const int BlockHashTest::index_of_y_in_only;
|
| -const int BlockHashTest::index_of_space_before_fear_is_fear;
|
| -const int BlockHashTest::index_of_longest_match_ear_is_fear;
|
| -const int BlockHashTest::index_of_i_in_fear_is_fear;
|
| -const int BlockHashTest::index_of_space_before_fear_itself;
|
| -const int BlockHashTest::index_of_space_before_itself;
|
| -const int BlockHashTest::index_of_ababc;
|
| -const int BlockHashTest::index_of_second_w_in_what_we;
|
| -const int BlockHashTest::index_of_second_e_in_what_we_hear;
|
| -const int BlockHashTest::index_of_f_in_fearsome;
|
| -const int BlockHashTest::index_of_space_in_eat_itself;
|
| -const int BlockHashTest::index_of_i_in_itself;
|
| -const int BlockHashTest::index_of_t_in_use_the;
|
| -const int BlockHashTest::index_of_o_in_online;
|
| -
|
| -const char BlockHashTest::sample_text_without_spaces[] =
|
| - "The only thing we have to fear is fear itself";
|
| -
|
| -const char BlockHashTest::search_string_without_spaces[] =
|
| - "What we hear is fearsome";
|
| -
|
| -const char BlockHashTest::search_string_altered_without_spaces[] =
|
| - "Vhat ve hear is fearsomm";
|
| -
|
| -const char BlockHashTest::search_to_end_without_spaces[] =
|
| - "Pop will eat itself, eventually";
|
| -
|
| -const char BlockHashTest::search_to_beginning_without_spaces[] =
|
| - "Use The online dictionary";
|
| -
|
| -const char BlockHashTest::sample_text_many_matches_without_spaces[] =
|
| - "ababababcab";
|
| -
|
| -const char BlockHashTest::search_string_many_matches_without_spaces[] =
|
| - "ababc";
|
| -
|
| -const char* BlockHashTest::sample_text = NULL;
|
| -const char* BlockHashTest::search_string = NULL;
|
| -const char* BlockHashTest::search_string_altered = NULL;
|
| -const char* BlockHashTest::search_to_end_string = NULL;
|
| -const char* BlockHashTest::search_to_beginning_string = NULL;
|
| -const char* BlockHashTest::sample_text_many_matches = NULL;
|
| -const char* BlockHashTest::search_string_many_matches = NULL;
|
| -
|
| -const char* BlockHashTest::test_string_y = NULL;
|
| -const char* BlockHashTest::test_string_e = NULL;
|
| -const char* BlockHashTest::test_string_unaligned_e = NULL;
|
| -const char* BlockHashTest::test_string_all_Qs = NULL;
|
| -
|
| -uint32_t BlockHashTest::hashed_y = 0;
|
| -uint32_t BlockHashTest::hashed_e = 0;
|
| -uint32_t BlockHashTest::hashed_f = 0;
|
| -uint32_t BlockHashTest::hashed_unaligned_e = 0;
|
| -uint32_t BlockHashTest::hashed_all_Qs = 0;
|
| -
|
| -// The two strings passed to BlockHash::MatchingBytesToLeft do have matching
|
| -// characters -- in fact, they're the same string -- but since max_bytes is zero
|
| -// or negative, BlockHash::MatchingBytesToLeft should not read from the strings
|
| -// and should return 0.
|
| -TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) {
|
| - EXPECT_EQ(0, MatchingBytesToLeft(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_f_in_fearsome],
|
| - 0));
|
| - EXPECT_EQ(0, MatchingBytesToRight(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_f_in_fearsome],
|
| - 0));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) {
|
| - EXPECT_EQ(0, MatchingBytesToLeft(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_f_in_fearsome],
|
| - -1));
|
| - EXPECT_EQ(0, MatchingBytesToLeft(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_f_in_fearsome],
|
| - INT_MIN));
|
| - EXPECT_EQ(0, MatchingBytesToRight(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_f_in_fearsome],
|
| - -1));
|
| - EXPECT_EQ(0, MatchingBytesToRight(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_f_in_fearsome],
|
| - INT_MIN));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, MaxBytesOneMatch) {
|
| - EXPECT_EQ(1, MatchingBytesToLeft(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_f_in_fearsome],
|
| - 1));
|
| - EXPECT_EQ(1, MatchingBytesToRight(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_f_in_fearsome],
|
| - 1));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, MaxBytesOneNoMatch) {
|
| - EXPECT_EQ(0, MatchingBytesToLeft(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_second_e_in_what_we_hear],
|
| - 1));
|
| - EXPECT_EQ(0, MatchingBytesToRight(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string[index_of_second_e_in_what_we_hear - 1],
|
| - 1));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, LeftLimitedByMaxBytes) {
|
| - // The number of bytes that match between the original "we hear is fearsome"
|
| - // and the altered "ve hear is fearsome".
|
| - const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
|
| - const int max_bytes = expected_length - 1;
|
| - EXPECT_EQ(max_bytes, MatchingBytesToLeft(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string_altered[index_of_f_in_fearsome],
|
| - max_bytes));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, LeftNotLimited) {
|
| - // The number of bytes that match between the original "we hear is fearsome"
|
| - // and the altered "ve hear is fearsome".
|
| - const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");
|
| - const int max_bytes = expected_length + 1;
|
| - EXPECT_EQ(expected_length, MatchingBytesToLeft(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string_altered[index_of_f_in_fearsome],
|
| - max_bytes));
|
| - EXPECT_EQ(expected_length, MatchingBytesToLeft(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string_altered[index_of_f_in_fearsome],
|
| - INT_MAX));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, RightLimitedByMaxBytes) {
|
| - // The number of bytes that match between the original "fearsome"
|
| - // and the altered "fearsomm".
|
| - const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
|
| - + (kBlockSize - 1); // spacing between letters
|
| - const int max_bytes = expected_length - 1;
|
| - EXPECT_EQ(max_bytes, MatchingBytesToRight(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string_altered[index_of_f_in_fearsome],
|
| - max_bytes));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, RightNotLimited) {
|
| - // The number of bytes that match between the original "we hear is fearsome"
|
| - // and the altered "ve hear is fearsome".
|
| - const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))
|
| - + (kBlockSize - 1); // spacing between letters
|
| - const int max_bytes = expected_length + 1;
|
| - EXPECT_EQ(expected_length, MatchingBytesToRight(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string_altered[index_of_f_in_fearsome],
|
| - max_bytes));
|
| - EXPECT_EQ(expected_length, MatchingBytesToRight(
|
| - &search_string[index_of_f_in_fearsome],
|
| - &search_string_altered[index_of_f_in_fearsome],
|
| - INT_MAX));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, BlockContentsMatchIsFasterThanMemcmp) {
|
| - compare_buffer_1_ = new char[kTimingTestSize];
|
| - compare_buffer_2_ = new char[kTimingTestSize];
|
| -
|
| - // The value 0xBE is arbitrarily chosen. First test with identical contents
|
| - // in the buffers, so that the comparison functions cannot short-circuit
|
| - // and will return true.
|
| - memset(compare_buffer_1_, 0xBE, kTimingTestSize);
|
| - memset(compare_buffer_2_, 0xBE, kTimingTestSize);
|
| - LOG(INFO) << "Comparing " << kTimingTestSize << " identical values:"
|
| - << LOG_ENDL;
|
| - TestAndPrintTimesForCompareFunctions(true);
|
| -
|
| - // Now change one value in the middle of one buffer, so that the contents
|
| - // are no longer the same.
|
| - compare_buffer_1_[kTimingTestSize / 2] = 0x00;
|
| - LOG(INFO) << "Comparing " << (kTimingTestSize - 1) << " identical values"
|
| - << " and one mismatch:" << LOG_ENDL;
|
| - TestAndPrintTimesForCompareFunctions(false);
|
| -
|
| - // Set one of the bytes of each block to differ so that
|
| - // none of the compare operations will return true, and run timing tests.
|
| - // In practice, BlockHash::BlockContentsMatch will only be called
|
| - // for two blocks whose hash values match, and the two most important
|
| - // cases are: (1) the blocks are identical, or (2) none of their bytes match.
|
| - TimingTestForBlocksThatDifferAtByte(0);
|
| - TimingTestForBlocksThatDifferAtByte(1);
|
| - TimingTestForBlocksThatDifferAtByte(kBlockSize / 2);
|
| - TimingTestForBlocksThatDifferAtByte(kBlockSize - 1);
|
| -
|
| - delete[] compare_buffer_1_;
|
| - delete[] compare_buffer_2_;
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, FindFailsBeforeHashing) {
|
| - EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, HashOneFindOne) {
|
| - for (int i = 0; i <= index_of_y_in_only; ++i) {
|
| - th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i]));
|
| - }
|
| - EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y,
|
| - test_string_y));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, HashAllFindOne) {
|
| - EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y,
|
| - test_string_y));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, NonMatchingTextNotFound) {
|
| - EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs));
|
| -}
|
| -
|
| -// Search for unaligned text. The test string is contained in the
|
| -// sample text (unlike the non-matching string in NonMatchingTextNotFound,
|
| -// above), but it is not aligned on a block boundary. FindMatchingBlock
|
| -// will only work if the test string is aligned on a block boundary.
|
| -//
|
| -// " T h e o n l y"
|
| -// ^^^^ Here is the test string
|
| -//
|
| -TEST_F(BlockHashTest, UnalignedTextNotFound) {
|
| - EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e,
|
| - test_string_unaligned_e));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, FindSixMatches) {
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e,
|
| - test_string_e));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e));
|
| -
|
| - // Starting over gives same result
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,
|
| - test_string_e));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, AddRangeFindThreeMatches) {
|
| - // Add hash values only for those characters before the fourth instance
|
| - // of "e" in the sample text. Tests that the ending index
|
| - // of AddAllBlocksThroughIndex() is not inclusive: only three matches
|
| - // for "e" should be found.
|
| - th_->AddAllBlocksThroughIndex(index_of_fourth_e);
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
|
| - test_string_e));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
|
| -
|
| - // Starting over gives same result
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| -}
|
| -
|
| -// Try indices that are not even multiples of the block size.
|
| -// Add three ranges and verify the results after each
|
| -// call to AddAllBlocksThroughIndex().
|
| -TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) {
|
| - th_->AddAllBlocksThroughIndex(index_of_first_e + 1);
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e));
|
| -
|
| - // Starting over gives same result
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| -
|
| - // Add the second range to expand the result set
|
| - th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3);
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
|
| - test_string_e));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));
|
| -
|
| - // Starting over gives same result
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| -
|
| - // Add the third range to expand the result set
|
| - th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
|
| -
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
|
| - test_string_e));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
|
| -
|
| - // Starting over gives same result
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| -}
|
| -
|
| -#ifdef GTEST_HAS_DEATH_TEST
|
| -TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) {
|
| - th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);
|
| -
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
|
| - test_string_e));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
|
| -
|
| - // Starting over gives same result
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| -
|
| - // These calls will produce DFATAL error messages, and will do nothing,
|
| - // since the ranges have already been added.
|
| - EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3),
|
| - "<");
|
| - EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1),
|
| - "<");
|
| -
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
|
| - test_string_e));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));
|
| -
|
| - // Starting over gives same result
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| -}
|
| -#endif // GTEST_HAS_DEATH_TEST
|
| -
|
| -TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) {
|
| - th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text));
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e,
|
| - test_string_e));
|
| - EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e,
|
| - test_string_e));
|
| - EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e));
|
| -
|
| - // Starting over gives same result
|
| - EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,
|
| - test_string_e));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, ZeroSizeSourceAccepted) {
|
| - BlockHash zero_sized_hash(sample_text, 0, 0);
|
| - EXPECT_EQ(true, zero_sized_hash.Init(true));
|
| - EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));
|
| -}
|
| -
|
| -#ifdef GTEST_HAS_DEATH_TEST
|
| -TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) {
|
| - EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")),
|
| - "invalid");
|
| -}
|
| -#endif // GTEST_HAS_DEATH_TEST
|
| -
|
| -TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) {
|
| - EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA"));
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, FindBestMatch) {
|
| - dh_->FindBestMatch(hashed_f,
|
| - &search_string[index_of_f_in_fearsome],
|
| - search_string,
|
| - strlen(search_string),
|
| - &best_match_);
|
| - EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset());
|
| - EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
|
| - // The match includes the spaces after the final character,
|
| - // which is why (kBlockSize - 1) is added to the expected best size.
|
| - EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
|
| - best_match_.size());
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) {
|
| - BlockHash th2(sample_text, strlen(sample_text), 0x10000);
|
| - th2.Init(true); // hash all blocks
|
| - th2.FindBestMatch(hashed_f,
|
| - &search_string[index_of_f_in_fearsome],
|
| - search_string,
|
| - strlen(search_string),
|
| - &best_match_);
|
| - // Offset should begin with dictionary_size
|
| - EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear),
|
| - best_match_.source_offset());
|
| - EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());
|
| - // The match includes the spaces after the final character,
|
| - // which is why (kBlockSize - 1) is added to the expected best size.
|
| - EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),
|
| - best_match_.size());
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) {
|
| - // Hash the "i" in "fear itself"
|
| - uint32_t hash_value = RollingHash<kBlockSize>::Hash(
|
| - &search_to_end_string[index_of_i_in_itself]);
|
| - dh_->FindBestMatch(hash_value,
|
| - &search_to_end_string[index_of_i_in_itself],
|
| - search_to_end_string,
|
| - strlen(search_to_end_string),
|
| - &best_match_);
|
| - EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset());
|
| - EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset());
|
| - EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size());
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) {
|
| - // Hash the "i" in "fear itself"
|
| - uint32_t hash_value = RollingHash<kBlockSize>::Hash(
|
| - &search_to_beginning_string[index_of_o_in_online]);
|
| - dh_->FindBestMatch(hash_value,
|
| - &search_to_beginning_string[index_of_o_in_online],
|
| - search_to_beginning_string,
|
| - strlen(search_to_beginning_string),
|
| - &best_match_);
|
| - EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary
|
| - EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset());
|
| - // The match includes the spaces after the final character,
|
| - // which is why (kBlockSize - 1) is added to the expected best size.
|
| - EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1),
|
| - best_match_.size());
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, BestMatchWithManyMatches) {
|
| - BlockHash many_matches_hash(sample_text_many_matches,
|
| - strlen(sample_text_many_matches),
|
| - 0);
|
| - EXPECT_TRUE(many_matches_hash.Init(true));
|
| - // Hash the " a" at the beginning of the search string "ababc"
|
| - uint32_t hash_value =
|
| - RollingHash<kBlockSize>::Hash(search_string_many_matches);
|
| - many_matches_hash.FindBestMatch(hash_value,
|
| - search_string_many_matches,
|
| - search_string_many_matches,
|
| - strlen(search_string_many_matches),
|
| - &best_match_);
|
| - EXPECT_EQ(index_of_ababc, best_match_.source_offset());
|
| - EXPECT_EQ(0, best_match_.target_offset());
|
| - EXPECT_EQ(strlen(search_string_many_matches), best_match_.size());
|
| -}
|
| -
|
| -TEST_F(BlockHashTest, HashCollisionFindsNoMatch) {
|
| - char* collision_search_string = new char[strlen(search_string) + 1];
|
| - memcpy(collision_search_string, search_string, strlen(search_string) + 1);
|
| - char* fearsome_location = &collision_search_string[index_of_f_in_fearsome];
|
| -
|
| - // Tweak the collision string so that it has the same hash value
|
| - // but different text. The last four characters of the search string
|
| - // should be " f", and the bytes given below have the same hash value
|
| - // as those characters.
|
| - CHECK_GE(kBlockSize, 4);
|
| - fearsome_location[kBlockSize - 4] = 0x84;
|
| - fearsome_location[kBlockSize - 3] = 0xF1;
|
| - fearsome_location[kBlockSize - 2] = 0x51;
|
| - fearsome_location[kBlockSize - 1] = 0x00;
|
| - EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location));
|
| - EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome],
|
| - fearsome_location,
|
| - kBlockSize));
|
| - // No match should be found this time.
|
| - dh_->FindBestMatch(hashed_f,
|
| - fearsome_location,
|
| - collision_search_string,
|
| - strlen(search_string), // since collision_search_string has embedded \0
|
| - &best_match_);
|
| - EXPECT_EQ(-1, best_match_.source_offset());
|
| - EXPECT_EQ(-1, best_match_.target_offset());
|
| - EXPECT_EQ(0U, best_match_.size());
|
| - delete[] collision_search_string;
|
| -}
|
| -
|
| -// If the footprint passed to FindBestMatch does not actually match
|
| -// the search string, it should not find any matches.
|
| -TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) {
|
| - dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"!
|
| - &search_string[index_of_f_in_fearsome],
|
| - search_string,
|
| - strlen(search_string),
|
| - &best_match_);
|
| - EXPECT_EQ(-1, best_match_.source_offset());
|
| - EXPECT_EQ(-1, best_match_.target_offset());
|
| - EXPECT_EQ(0U, best_match_.size());
|
| -}
|
| -
|
| -// Use a dictionary containing 1M copies of the letter 'Q',
|
| -// and target data that also contains 1M Qs. If FindBestMatch
|
| -// is not throttled to find a maximum number of matches, this
|
| -// will take a very long time -- several seconds at least.
|
| -// If this test appears to hang, it is because the throttling code
|
| -// (see BlockHash::kMaxMatchesToCheck for details) is not working.
|
| -TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) {
|
| - const int kTestSize = 1 << 20; // 1M
|
| - char* huge_dictionary = new char[kTestSize];
|
| - memset(huge_dictionary, 'Q', kTestSize);
|
| - BlockHash huge_bh(huge_dictionary, kTestSize, 0);
|
| - EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true));
|
| - char* huge_target = new char[kTestSize];
|
| - memset(huge_target, 'Q', kTestSize);
|
| - CycleTimer timer;
|
| - timer.Start();
|
| - huge_bh.FindBestMatch(hashed_all_Qs,
|
| - huge_target + (kTestSize / 2), // middle of target
|
| - huge_target,
|
| - kTestSize,
|
| - &best_match_);
|
| - timer.Stop();
|
| - double elapsed_time_in_us = static_cast<double>(timer.GetInUsec());
|
| - LOG(INFO) << "Time to search for best match with 1M matches: "
|
| - << elapsed_time_in_us << " us" << LOG_ENDL;
|
| - // All blocks match the candidate block. FindBestMatch should have checked
|
| - // a certain number of matches before giving up. The best match
|
| - // should include at least half the source and target, since the candidate
|
| - // block was in the middle of the target data.
|
| - EXPECT_GT((kTestSize / 2), best_match_.source_offset());
|
| - EXPECT_GT((kTestSize / 2), best_match_.target_offset());
|
| - EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size());
|
| - EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second
|
| - delete[] huge_target;
|
| - delete[] huge_dictionary;
|
| -}
|
| -
|
| -#ifdef GTEST_HAS_DEATH_TEST
|
| -TEST_F(BlockHashDeathTest, AddTooManyBlocks) {
|
| - for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) {
|
| - th_->AddOneIndexHash(i * kBlockSize, hashed_e);
|
| - }
|
| - // Didn't expect another block to be added
|
| - EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text),
|
| - hashed_e),
|
| - "AddBlock");
|
| -}
|
| -#endif // GTEST_HAS_DEATH_TEST
|
| -
|
| -} // namespace open_vcdiff
|
|
|