sdch/open_vcdiff/depot/opensource/open-vcdiff/src/blockhash_test.cc - Issue 5203: Transition to pulling open-vcdiff from repository, instead of using snapshot...

Unified Diff: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/blockhash_test.cc

Issue 5203: Transition to pulling open-vcdiff from repository, instead of using snapshot... (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: Created 12 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « sdch/open_vcdiff/depot/opensource/open-vcdiff/src/blockhash.cc ('k') | sdch/open_vcdiff/depot/opensource/open-vcdiff/src/checksum.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: sdch/open_vcdiff/depot/opensource/open-vcdiff/src/blockhash_test.cc

===================================================================

--- sdch/open_vcdiff/depot/opensource/open-vcdiff/src/blockhash_test.cc (revision 2678)

+++ sdch/open_vcdiff/depot/opensource/open-vcdiff/src/blockhash_test.cc (working copy)

@@ -1,896 +0,0 @@

-// Author: Lincoln Smith

-//

-// Licensed under the Apache License, Version 2.0 (the "License");

-// you may not use this file except in compliance with the License.

-// You may obtain a copy of the License at

-//

-// http://www.apache.org/licenses/LICENSE-2.0

-//

-// Unless required by applicable law or agreed to in writing, software

-// distributed under the License is distributed on an "AS IS" BASIS,

-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

-// See the License for the specific language governing permissions and

-// limitations under the License.

-#include <config.h>

-#include "blockhash.h"

-#include <climits> // INT_MIN

-#include <memory> // auto_ptr

-#include "encodetable.h"

-#include "logging.h"

-#include "rolling_hash.h"

-#include "testing.h"

-namespace open_vcdiff {

-const int kBlockSize = BlockHash::kBlockSize;

-class BlockHashTest : public testing::Test {

- protected:

- static const int kTimingTestSize = 1 << 21; // 2M

- static const int kTimingTestIterations = 32;

- BlockHashTest() {

- dh_.reset(BlockHash::CreateDictionaryHash(sample_text,

- strlen(sample_text)));

- th_.reset(BlockHash::CreateTargetHash(sample_text, strlen(sample_text), 0));

- EXPECT_TRUE(dh_.get() != NULL);

- EXPECT_TRUE(th_.get() != NULL);

- }

- // BlockHashTest is a friend to BlockHash. Expose the protected functions

- // that will be tested by the children of BlockHashTest.

- static bool BlockContentsMatch(const char* block1, const char* block2) {

- return BlockHash::BlockContentsMatch(block1, block2);

- }

- int FirstMatchingBlock(const BlockHash& block_hash,

- uint32_t hash_value,

- const char* block_ptr) const {

- return block_hash.FirstMatchingBlock(hash_value, block_ptr);

- }

- int NextMatchingBlock(const BlockHash& block_hash,

- int block_number,

- const char* block_ptr) const {

- return block_hash.NextMatchingBlock(block_number, block_ptr);

- }

- static int MatchingBytesToLeft(const char* source_match_start,

- const char* target_match_start,

- int max_bytes) {

- return BlockHash::MatchingBytesToLeft(source_match_start,

- target_match_start,

- max_bytes);

- }

- static int MatchingBytesToRight(const char* source_match_end,

- const char* target_match_end,

- int max_bytes) {

- return BlockHash::MatchingBytesToRight(source_match_end,

- target_match_end,

- max_bytes);

- }

- static int StringLengthAsInt(const char* s) {

- return static_cast<int>(strlen(s));

- }

- void InitBlocksToDifferAtNthByte(int n) {

- CHECK(n < kBlockSize);

- memset(compare_buffer_1_, 0xBE, kTimingTestSize);

- memset(compare_buffer_2_, 0xBE, kTimingTestSize);

- for (int index = n; index < kTimingTestSize; index += kBlockSize) {

- compare_buffer_1_[index] = 0x00;

- compare_buffer_2_[index] = 0x01;

- }

- void TestAndPrintTimesForCompareFunctions(bool should_be_identical) {

- CHECK(compare_buffer_1_ != NULL);

- CHECK(compare_buffer_2_ != NULL);

- // Prime the memory cache.

- prime_result_ =

- memcmp(compare_buffer_1_, compare_buffer_2_, kTimingTestSize);

- const char* const block1_limit =

- &compare_buffer_1_[kTimingTestSize - kBlockSize];

- int memcmp_result = 0;

- CycleTimer memcmp_timer;

- memcmp_timer.Start();

- for (int i = 0; i < kTimingTestIterations; ++i) {

- const char* block1 = compare_buffer_1_;

- const char* block2 = compare_buffer_2_;

- while (block1 < block1_limit) {

- if (memcmp(block1, block2, kBlockSize) != 0) {

- ++memcmp_result;

- }

- block1 += kBlockSize;

- block2 += kBlockSize;

- }

- memcmp_timer.Stop();

- double time_for_memcmp = static_cast<double>(memcmp_timer.GetInUsec())

- / (kTimingTestSize * kTimingTestIterations);

- int block_contents_match_result = 0;

- CycleTimer block_contents_match_timer;

- block_contents_match_timer.Start();

- for (int i = 0; i < kTimingTestIterations; ++i) {

- const char* block1 = compare_buffer_1_;

- const char* block2 = compare_buffer_2_;

- while (block1 < block1_limit) {

- if (!BlockHash::BlockContentsMatch(block1, block2)) {

- ++block_contents_match_result;

- }

- block1 += kBlockSize;

- block2 += kBlockSize;

- }

- block_contents_match_timer.Stop();

- double time_for_block_contents_match =

- static_cast<double>(block_contents_match_timer.GetInUsec())

- / (kTimingTestSize * kTimingTestIterations);

- EXPECT_EQ(block_contents_match_result, memcmp_result);

- if (should_be_identical) {

- CHECK_EQ(0, memcmp_result);

- } else {

- CHECK_GT(memcmp_result, 0);

- }

- LOG(INFO) << "memcmp: " << time_for_memcmp << " us per operation"

- << LOG_ENDL;

- LOG(INFO) << "BlockHash::BlockContentsMatch: "

- << time_for_block_contents_match << " us per operation"

- << LOG_ENDL;

- if (time_for_memcmp > 0) {

- LOG(INFO) << "% change: "

- << (((time_for_block_contents_match - time_for_memcmp)

- / time_for_memcmp) * 100.0) << "%" << LOG_ENDL;

- }

-#ifdef NDEBUG

- // Only check timings for optimized build

- const double error_margin = 0.05;

- EXPECT_GT(time_for_memcmp * (1.0 + error_margin),

- time_for_block_contents_match);

-#endif // NDEBUG

- }

- void TimingTestForBlocksThatDifferAtByte(int n) {

- InitBlocksToDifferAtNthByte(n);

- LOG(INFO) << "Comparing blocks that differ at byte " << n << LOG_ENDL;

- TestAndPrintTimesForCompareFunctions(false);

- }

- // Copy sample_text_without_spaces and search_string_without_spaces

- // into newly allocated sample_text and search_string buffers,

- // but pad them with space characters so that every character

- // in sample_text_without_spaces matches (kBlockSize - 1)

- // space characters in sample_text, followed by that character.

- // For example:

- // Since sample_text_without_spaces begins "The only thing"...,

- // if kBlockSize is 4, then 3 space characters will be inserted

- // between each letter of sample_text, as follows:

- // " T h e o n l y t h i n g"...

- // This makes testing simpler, because finding a kBlockSize-byte match

- // between the sample text and search string only depends on the

- // trailing letter in each block.

- static void MakeEachLetterABlock(const char* string_without_spaces,

- const char** result) {

- const size_t length_without_spaces = strlen(string_without_spaces);

- char* padded_text = new char[(kBlockSize * length_without_spaces) + 1];

- memset(padded_text, ' ', kBlockSize * length_without_spaces);

- char* padded_text_ptr = padded_text + (kBlockSize - 1);

- for (size_t i = 0; i < length_without_spaces; ++i) {

- *padded_text_ptr = string_without_spaces[i];

- padded_text_ptr += kBlockSize;

- }

- padded_text[kBlockSize * length_without_spaces] = '\0';

- *result = padded_text;

- }

- static void SetUpTestCase() {

- MakeEachLetterABlock(sample_text_without_spaces, &sample_text);

- MakeEachLetterABlock(search_string_without_spaces, &search_string);

- MakeEachLetterABlock(search_string_altered_without_spaces,

- &search_string_altered);

- MakeEachLetterABlock(search_to_end_without_spaces, &search_to_end_string);

- MakeEachLetterABlock(search_to_beginning_without_spaces,

- &search_to_beginning_string);

- MakeEachLetterABlock(sample_text_many_matches_without_spaces,

- &sample_text_many_matches);

- MakeEachLetterABlock(search_string_many_matches_without_spaces,

- &search_string_many_matches);

- MakeEachLetterABlock("y", &test_string_y);

- MakeEachLetterABlock("e", &test_string_e);

- char* new_test_string_unaligned_e = new char[kBlockSize];

- memset(new_test_string_unaligned_e, ' ', kBlockSize);

- new_test_string_unaligned_e[kBlockSize - 2] = 'e';

- test_string_unaligned_e = new_test_string_unaligned_e;

- char* new_test_string_all_Qs = new char[kBlockSize];

- memset(new_test_string_all_Qs, 'Q', kBlockSize);

- test_string_all_Qs = new_test_string_all_Qs;

- hashed_y = RollingHash<kBlockSize>::Hash(test_string_y);

- hashed_e = RollingHash<kBlockSize>::Hash(test_string_e);

- hashed_f =

- RollingHash<kBlockSize>::Hash(&search_string[index_of_f_in_fearsome]);

- hashed_unaligned_e = RollingHash<kBlockSize>::Hash(test_string_unaligned_e);

- hashed_all_Qs = RollingHash<kBlockSize>::Hash(test_string_all_Qs);

- }

- static void TearDownTestCase() {

- delete[] sample_text;

- delete[] search_string;

- delete[] search_string_altered;

- delete[] search_to_end_string;

- delete[] search_to_beginning_string;

- delete[] sample_text_many_matches;

- delete[] search_string_many_matches;

- delete[] test_string_y;

- delete[] test_string_e;

- delete[] test_string_unaligned_e;

- delete[] test_string_all_Qs;

- }

- // Each block in the sample text and search string is kBlockSize bytes long,

- // and consists of (kBlockSize - 1) space characters

- // followed by a single letter of text.

- // Block numbers of certain characters within the sample text:

- // All six occurrences of "e", in order.

- static const int block_of_first_e = 2;

- static const int block_of_second_e = 16;

- static const int block_of_third_e = 21;

- static const int block_of_fourth_e = 27;

- static const int block_of_fifth_e = 35;

- static const int block_of_sixth_e = 42;

- static const int block_of_y_in_only = 7;

- // The block number is multiplied by kBlockSize to arrive at the

- // index, which points to the (kBlockSize - 1) space characters before

- // the letter specified.

- // Indices of certain characters within the sample text.

- static const int index_of_first_e = block_of_first_e * kBlockSize;

- static const int index_of_fourth_e = block_of_fourth_e * kBlockSize;

- static const int index_of_sixth_e = block_of_sixth_e * kBlockSize;

- static const int index_of_y_in_only = block_of_y_in_only * kBlockSize;

- static const int index_of_space_before_fear_is_fear = 25 * kBlockSize;

- static const int index_of_longest_match_ear_is_fear = 27 * kBlockSize;

- static const int index_of_i_in_fear_is_fear = 31 * kBlockSize;

- static const int index_of_space_before_fear_itself = 33 * kBlockSize;

- static const int index_of_space_before_itself = 38 * kBlockSize;

- static const int index_of_ababc = 4 * kBlockSize;

- // Indices of certain characters within the search strings.

- static const int index_of_second_w_in_what_we = 5 * kBlockSize;

- static const int index_of_second_e_in_what_we_hear = 9 * kBlockSize;

- static const int index_of_f_in_fearsome = 16 * kBlockSize;

- static const int index_of_space_in_eat_itself = 12 * kBlockSize;

- static const int index_of_i_in_itself = 13 * kBlockSize;

- static const int index_of_t_in_use_the = 4 * kBlockSize;

- static const int index_of_o_in_online = 8 * kBlockSize;

- static const char sample_text_without_spaces[];

- static const char search_string_without_spaces[];

- static const char search_string_altered_without_spaces[];

- static const char search_to_end_without_spaces[];

- static const char search_to_beginning_without_spaces[];

- static const char sample_text_many_matches_without_spaces[];

- static const char search_string_many_matches_without_spaces[];

- static const char* sample_text;

- static const char* search_string;

- static const char* search_string_altered;

- static const char* search_to_end_string;

- static const char* search_to_beginning_string;

- static const char* sample_text_many_matches;

- static const char* search_string_many_matches;

- static const char* test_string_y;

- static const char* test_string_e;

- static const char* test_string_all_Qs;

- static const char* test_string_unaligned_e;

- static uint32_t hashed_y;

- static uint32_t hashed_e;

- static uint32_t hashed_f;

- static uint32_t hashed_unaligned_e;

- static uint32_t hashed_all_Qs;

- // Boost scoped_ptr, if available, could be used instead of std::auto_ptr.

- std::auto_ptr<const BlockHash> dh_; // hash table is populated at startup

- std::auto_ptr<BlockHash> th_; // hash table not populated;

- // used to test incremental adds

- BlockHash::Match best_match_;

- char* compare_buffer_1_;

- char* compare_buffer_2_;

- int prime_result_;

-};

-#ifdef GTEST_HAS_DEATH_TEST

-typedef BlockHashTest BlockHashDeathTest;

-#endif // GTEST_HAS_DEATH_TEST

-// The C++ standard requires a separate definition of these static const values,

-// even though their initializers are given within the class definition.

-const int BlockHashTest::block_of_first_e;

-const int BlockHashTest::block_of_second_e;

-const int BlockHashTest::block_of_third_e;

-const int BlockHashTest::block_of_fourth_e;

-const int BlockHashTest::block_of_fifth_e;

-const int BlockHashTest::block_of_sixth_e;

-const int BlockHashTest::block_of_y_in_only;

-const int BlockHashTest::index_of_first_e;

-const int BlockHashTest::index_of_fourth_e;

-const int BlockHashTest::index_of_sixth_e;

-const int BlockHashTest::index_of_y_in_only;

-const int BlockHashTest::index_of_space_before_fear_is_fear;

-const int BlockHashTest::index_of_longest_match_ear_is_fear;

-const int BlockHashTest::index_of_i_in_fear_is_fear;

-const int BlockHashTest::index_of_space_before_fear_itself;

-const int BlockHashTest::index_of_space_before_itself;

-const int BlockHashTest::index_of_ababc;

-const int BlockHashTest::index_of_second_w_in_what_we;

-const int BlockHashTest::index_of_second_e_in_what_we_hear;

-const int BlockHashTest::index_of_f_in_fearsome;

-const int BlockHashTest::index_of_space_in_eat_itself;

-const int BlockHashTest::index_of_i_in_itself;

-const int BlockHashTest::index_of_t_in_use_the;

-const int BlockHashTest::index_of_o_in_online;

-const char BlockHashTest::sample_text_without_spaces[] =

- "The only thing we have to fear is fear itself";

-const char BlockHashTest::search_string_without_spaces[] =

- "What we hear is fearsome";

-const char BlockHashTest::search_string_altered_without_spaces[] =

- "Vhat ve hear is fearsomm";

-const char BlockHashTest::search_to_end_without_spaces[] =

- "Pop will eat itself, eventually";

-const char BlockHashTest::search_to_beginning_without_spaces[] =

- "Use The online dictionary";

-const char BlockHashTest::sample_text_many_matches_without_spaces[] =

- "ababababcab";

-const char BlockHashTest::search_string_many_matches_without_spaces[] =

- "ababc";

-const char* BlockHashTest::sample_text = NULL;

-const char* BlockHashTest::search_string = NULL;

-const char* BlockHashTest::search_string_altered = NULL;

-const char* BlockHashTest::search_to_end_string = NULL;

-const char* BlockHashTest::search_to_beginning_string = NULL;

-const char* BlockHashTest::sample_text_many_matches = NULL;

-const char* BlockHashTest::search_string_many_matches = NULL;

-const char* BlockHashTest::test_string_y = NULL;

-const char* BlockHashTest::test_string_e = NULL;

-const char* BlockHashTest::test_string_unaligned_e = NULL;

-const char* BlockHashTest::test_string_all_Qs = NULL;

-uint32_t BlockHashTest::hashed_y = 0;

-uint32_t BlockHashTest::hashed_e = 0;

-uint32_t BlockHashTest::hashed_f = 0;

-uint32_t BlockHashTest::hashed_unaligned_e = 0;

-uint32_t BlockHashTest::hashed_all_Qs = 0;

-// The two strings passed to BlockHash::MatchingBytesToLeft do have matching

-// characters -- in fact, they're the same string -- but since max_bytes is zero

-// or negative, BlockHash::MatchingBytesToLeft should not read from the strings

-// and should return 0.

-TEST_F(BlockHashTest, MaxBytesZeroDoesNothing) {

- EXPECT_EQ(0, MatchingBytesToLeft(

- &search_string[index_of_f_in_fearsome],

- 0));

- EXPECT_EQ(0, MatchingBytesToRight(

- &search_string[index_of_f_in_fearsome],

- 0));

-TEST_F(BlockHashTest, MaxBytesNegativeDoesNothing) {

- EXPECT_EQ(0, MatchingBytesToLeft(

- &search_string[index_of_f_in_fearsome],

- -1));

- EXPECT_EQ(0, MatchingBytesToLeft(

- &search_string[index_of_f_in_fearsome],

- INT_MIN));

- EXPECT_EQ(0, MatchingBytesToRight(

- &search_string[index_of_f_in_fearsome],

- -1));

- EXPECT_EQ(0, MatchingBytesToRight(

- &search_string[index_of_f_in_fearsome],

- INT_MIN));

-TEST_F(BlockHashTest, MaxBytesOneMatch) {

- EXPECT_EQ(1, MatchingBytesToLeft(

- &search_string[index_of_f_in_fearsome],

- 1));

- EXPECT_EQ(1, MatchingBytesToRight(

- &search_string[index_of_f_in_fearsome],

- 1));

-TEST_F(BlockHashTest, MaxBytesOneNoMatch) {

- EXPECT_EQ(0, MatchingBytesToLeft(

- &search_string[index_of_f_in_fearsome],

- &search_string[index_of_second_e_in_what_we_hear],

- 1));

- EXPECT_EQ(0, MatchingBytesToRight(

- &search_string[index_of_f_in_fearsome],

- &search_string[index_of_second_e_in_what_we_hear - 1],

- 1));

-TEST_F(BlockHashTest, LeftLimitedByMaxBytes) {

- // The number of bytes that match between the original "we hear is fearsome"

- // and the altered "ve hear is fearsome".

- const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");

- const int max_bytes = expected_length - 1;

- EXPECT_EQ(max_bytes, MatchingBytesToLeft(

- &search_string[index_of_f_in_fearsome],

- &search_string_altered[index_of_f_in_fearsome],

- max_bytes));

-TEST_F(BlockHashTest, LeftNotLimited) {

- // The number of bytes that match between the original "we hear is fearsome"

- // and the altered "ve hear is fearsome".

- const int expected_length = kBlockSize * StringLengthAsInt("e hear is ");

- const int max_bytes = expected_length + 1;

- EXPECT_EQ(expected_length, MatchingBytesToLeft(

- &search_string[index_of_f_in_fearsome],

- &search_string_altered[index_of_f_in_fearsome],

- max_bytes));

- EXPECT_EQ(expected_length, MatchingBytesToLeft(

- &search_string[index_of_f_in_fearsome],

- &search_string_altered[index_of_f_in_fearsome],

- INT_MAX));

-TEST_F(BlockHashTest, RightLimitedByMaxBytes) {

- // The number of bytes that match between the original "fearsome"

- // and the altered "fearsomm".

- const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))

- + (kBlockSize - 1); // spacing between letters

- const int max_bytes = expected_length - 1;

- EXPECT_EQ(max_bytes, MatchingBytesToRight(

- &search_string[index_of_f_in_fearsome],

- &search_string_altered[index_of_f_in_fearsome],

- max_bytes));

-TEST_F(BlockHashTest, RightNotLimited) {

- // The number of bytes that match between the original "we hear is fearsome"

- // and the altered "ve hear is fearsome".

- const int expected_length = (kBlockSize * StringLengthAsInt("fearsom"))

- + (kBlockSize - 1); // spacing between letters

- const int max_bytes = expected_length + 1;

- EXPECT_EQ(expected_length, MatchingBytesToRight(

- &search_string[index_of_f_in_fearsome],

- &search_string_altered[index_of_f_in_fearsome],

- max_bytes));

- EXPECT_EQ(expected_length, MatchingBytesToRight(

- &search_string[index_of_f_in_fearsome],

- &search_string_altered[index_of_f_in_fearsome],

- INT_MAX));

-TEST_F(BlockHashTest, BlockContentsMatchIsFasterThanMemcmp) {

- compare_buffer_1_ = new char[kTimingTestSize];

- compare_buffer_2_ = new char[kTimingTestSize];

- // The value 0xBE is arbitrarily chosen. First test with identical contents

- // in the buffers, so that the comparison functions cannot short-circuit

- // and will return true.

- memset(compare_buffer_1_, 0xBE, kTimingTestSize);

- memset(compare_buffer_2_, 0xBE, kTimingTestSize);

- LOG(INFO) << "Comparing " << kTimingTestSize << " identical values:"

- << LOG_ENDL;

- TestAndPrintTimesForCompareFunctions(true);

- // Now change one value in the middle of one buffer, so that the contents

- // are no longer the same.

- compare_buffer_1_[kTimingTestSize / 2] = 0x00;

- LOG(INFO) << "Comparing " << (kTimingTestSize - 1) << " identical values"

- << " and one mismatch:" << LOG_ENDL;

- TestAndPrintTimesForCompareFunctions(false);

- // Set one of the bytes of each block to differ so that

- // none of the compare operations will return true, and run timing tests.

- // In practice, BlockHash::BlockContentsMatch will only be called

- // for two blocks whose hash values match, and the two most important

- // cases are: (1) the blocks are identical, or (2) none of their bytes match.

- TimingTestForBlocksThatDifferAtByte(0);

- TimingTestForBlocksThatDifferAtByte(1);

- TimingTestForBlocksThatDifferAtByte(kBlockSize / 2);

- TimingTestForBlocksThatDifferAtByte(kBlockSize - 1);

- delete[] compare_buffer_1_;

- delete[] compare_buffer_2_;

-TEST_F(BlockHashTest, FindFailsBeforeHashing) {

- EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));

-TEST_F(BlockHashTest, HashOneFindOne) {

- for (int i = 0; i <= index_of_y_in_only; ++i) {

- th_->AddOneIndexHash(i, RollingHash<kBlockSize>::Hash(&sample_text[i]));

- }

- EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*th_, hashed_y,

- test_string_y));

- EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_y_in_only, test_string_y));

-TEST_F(BlockHashTest, HashAllFindOne) {

- EXPECT_EQ(block_of_y_in_only, FirstMatchingBlock(*dh_, hashed_y,

- test_string_y));

- EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_y_in_only, test_string_y));

-TEST_F(BlockHashTest, NonMatchingTextNotFound) {

- EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_all_Qs, test_string_all_Qs));

-// Search for unaligned text. The test string is contained in the

-// sample text (unlike the non-matching string in NonMatchingTextNotFound,

-// above), but it is not aligned on a block boundary. FindMatchingBlock

-// will only work if the test string is aligned on a block boundary.

-//

-// " T h e o n l y"

-// ^^^^ Here is the test string

-//

-TEST_F(BlockHashTest, UnalignedTextNotFound) {

- EXPECT_EQ(-1, FirstMatchingBlock(*dh_, hashed_unaligned_e,

- test_string_unaligned_e));

-TEST_F(BlockHashTest, FindSixMatches) {

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,

- test_string_e));

- EXPECT_EQ(block_of_second_e, NextMatchingBlock(*dh_, block_of_first_e,

- test_string_e));

- EXPECT_EQ(block_of_third_e, NextMatchingBlock(*dh_, block_of_second_e,

- test_string_e));

- EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*dh_, block_of_third_e,

- test_string_e));

- EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*dh_, block_of_fourth_e,

- test_string_e));

- EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*dh_, block_of_fifth_e,

- test_string_e));

- EXPECT_EQ(-1, NextMatchingBlock(*dh_, block_of_sixth_e, test_string_e));

- // Starting over gives same result

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*dh_, hashed_e,

- test_string_e));

-TEST_F(BlockHashTest, AddRangeFindThreeMatches) {

- // Add hash values only for those characters before the fourth instance

- // of "e" in the sample text. Tests that the ending index

- // of AddAllBlocksThroughIndex() is not inclusive: only three matches

- // for "e" should be found.

- th_->AddAllBlocksThroughIndex(index_of_fourth_e);

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,

- test_string_e));

- EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,

- test_string_e));

- EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));

- // Starting over gives same result

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

-// Try indices that are not even multiples of the block size.

-// Add three ranges and verify the results after each

-// call to AddAllBlocksThroughIndex().

-TEST_F(BlockHashTest, AddRangeWithUnalignedIndices) {

- th_->AddAllBlocksThroughIndex(index_of_first_e + 1);

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_first_e, test_string_e));

- // Starting over gives same result

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- // Add the second range to expand the result set

- th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3);

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,

- test_string_e));

- EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,

- test_string_e));

- EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_third_e, test_string_e));

- // Starting over gives same result

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- // Add the third range to expand the result set

- th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,

- test_string_e));

- EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,

- test_string_e));

- EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,

- test_string_e));

- EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));

- // Starting over gives same result

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

-#ifdef GTEST_HAS_DEATH_TEST

-TEST_F(BlockHashDeathTest, AddingRangesInDescendingOrderNoEffect) {

- th_->AddAllBlocksThroughIndex(index_of_fourth_e + 1);

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,

- test_string_e));

- EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,

- test_string_e));

- EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,

- test_string_e));

- EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));

- // Starting over gives same result

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- // These calls will produce DFATAL error messages, and will do nothing,

- // since the ranges have already been added.

- EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_fourth_e - 3),

- "<");

- EXPECT_DEBUG_DEATH(th_->AddAllBlocksThroughIndex(index_of_first_e + 1),

- "<");

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,

- test_string_e));

- EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,

- test_string_e));

- EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,

- test_string_e));

- EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_fourth_e, test_string_e));

- // Starting over gives same result

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

-#endif // GTEST_HAS_DEATH_TEST

-TEST_F(BlockHashTest, AddEntireRangeFindSixMatches) {

- th_->AddAllBlocksThroughIndex(StringLengthAsInt(sample_text));

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

- EXPECT_EQ(block_of_second_e, NextMatchingBlock(*th_, block_of_first_e,

- test_string_e));

- EXPECT_EQ(block_of_third_e, NextMatchingBlock(*th_, block_of_second_e,

- test_string_e));

- EXPECT_EQ(block_of_fourth_e, NextMatchingBlock(*th_, block_of_third_e,

- test_string_e));

- EXPECT_EQ(block_of_fifth_e, NextMatchingBlock(*th_, block_of_fourth_e,

- test_string_e));

- EXPECT_EQ(block_of_sixth_e, NextMatchingBlock(*th_, block_of_fifth_e,

- test_string_e));

- EXPECT_EQ(-1, NextMatchingBlock(*th_, block_of_sixth_e, test_string_e));

- // Starting over gives same result

- EXPECT_EQ(block_of_first_e, FirstMatchingBlock(*th_, hashed_e,

- test_string_e));

-TEST_F(BlockHashTest, ZeroSizeSourceAccepted) {

- BlockHash zero_sized_hash(sample_text, 0, 0);

- EXPECT_EQ(true, zero_sized_hash.Init(true));

- EXPECT_EQ(-1, FirstMatchingBlock(*th_, hashed_y, test_string_y));

-#ifdef GTEST_HAS_DEATH_TEST

-TEST_F(BlockHashDeathTest, BadNextMatchingBlockReturnsNoMatch) {

- EXPECT_DEBUG_DEATH(EXPECT_EQ(-1, NextMatchingBlock(*dh_, 0xFFFFFFFE, " ")),

- "invalid");

-#endif // GTEST_HAS_DEATH_TEST

-TEST_F(BlockHashTest, UnknownFingerprintReturnsNoMatch) {

- EXPECT_EQ(-1, FirstMatchingBlock(*dh_, 0xFAFAFAFA, "FAFA"));

-TEST_F(BlockHashTest, FindBestMatch) {

- dh_->FindBestMatch(hashed_f,

- &search_string[index_of_f_in_fearsome],

- search_string,

- strlen(search_string),

- &best_match_);

- EXPECT_EQ(index_of_longest_match_ear_is_fear, best_match_.source_offset());

- EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());

- // The match includes the spaces after the final character,

- // which is why (kBlockSize - 1) is added to the expected best size.

- EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),

- best_match_.size());

-TEST_F(BlockHashTest, FindBestMatchWithStartingOffset) {

- BlockHash th2(sample_text, strlen(sample_text), 0x10000);

- th2.Init(true); // hash all blocks

- th2.FindBestMatch(hashed_f,

- &search_string[index_of_f_in_fearsome],

- search_string,

- strlen(search_string),

- &best_match_);

- // Offset should begin with dictionary_size

- EXPECT_EQ(0x10000 + (index_of_longest_match_ear_is_fear),

- best_match_.source_offset());

- EXPECT_EQ(index_of_second_e_in_what_we_hear, best_match_.target_offset());

- // The match includes the spaces after the final character,

- // which is why (kBlockSize - 1) is added to the expected best size.

- EXPECT_EQ((strlen("ear is fear") * kBlockSize) + (kBlockSize - 1),

- best_match_.size());

-TEST_F(BlockHashTest, BestMatchReachesEndOfDictionary) {

- // Hash the "i" in "fear itself"

- uint32_t hash_value = RollingHash<kBlockSize>::Hash(

- &search_to_end_string[index_of_i_in_itself]);

- dh_->FindBestMatch(hash_value,

- &search_to_end_string[index_of_i_in_itself],

- search_to_end_string,

- strlen(search_to_end_string),

- &best_match_);

- EXPECT_EQ(index_of_space_before_itself, best_match_.source_offset());

- EXPECT_EQ(index_of_space_in_eat_itself, best_match_.target_offset());

- EXPECT_EQ(strlen(" itself") * kBlockSize, best_match_.size());

-TEST_F(BlockHashTest, BestMatchReachesStartOfDictionary) {

- // Hash the "i" in "fear itself"

- uint32_t hash_value = RollingHash<kBlockSize>::Hash(

- &search_to_beginning_string[index_of_o_in_online]);

- dh_->FindBestMatch(hash_value,

- &search_to_beginning_string[index_of_o_in_online],

- search_to_beginning_string,

- strlen(search_to_beginning_string),

- &best_match_);

- EXPECT_EQ(0, best_match_.source_offset()); // beginning of dictionary

- EXPECT_EQ(index_of_t_in_use_the, best_match_.target_offset());

- // The match includes the spaces after the final character,

- // which is why (kBlockSize - 1) is added to the expected best size.

- EXPECT_EQ((strlen("The onl") * kBlockSize) + (kBlockSize - 1),

- best_match_.size());

-TEST_F(BlockHashTest, BestMatchWithManyMatches) {

- BlockHash many_matches_hash(sample_text_many_matches,

- strlen(sample_text_many_matches),

- 0);

- EXPECT_TRUE(many_matches_hash.Init(true));

- // Hash the " a" at the beginning of the search string "ababc"

- uint32_t hash_value =

- RollingHash<kBlockSize>::Hash(search_string_many_matches);

- many_matches_hash.FindBestMatch(hash_value,

- search_string_many_matches,

- strlen(search_string_many_matches),

- &best_match_);

- EXPECT_EQ(index_of_ababc, best_match_.source_offset());

- EXPECT_EQ(0, best_match_.target_offset());

- EXPECT_EQ(strlen(search_string_many_matches), best_match_.size());

-TEST_F(BlockHashTest, HashCollisionFindsNoMatch) {

- char* collision_search_string = new char[strlen(search_string) + 1];

- memcpy(collision_search_string, search_string, strlen(search_string) + 1);

- char* fearsome_location = &collision_search_string[index_of_f_in_fearsome];

- // Tweak the collision string so that it has the same hash value

- // but different text. The last four characters of the search string

- // should be " f", and the bytes given below have the same hash value

- // as those characters.

- CHECK_GE(kBlockSize, 4);

- fearsome_location[kBlockSize - 4] = 0x84;

- fearsome_location[kBlockSize - 3] = 0xF1;

- fearsome_location[kBlockSize - 2] = 0x51;

- fearsome_location[kBlockSize - 1] = 0x00;

- EXPECT_EQ(hashed_f, RollingHash<kBlockSize>::Hash(fearsome_location));

- EXPECT_NE(0, memcmp(&search_string[index_of_f_in_fearsome],

- fearsome_location,

- kBlockSize));

- // No match should be found this time.

- dh_->FindBestMatch(hashed_f,

- fearsome_location,

- collision_search_string,

- strlen(search_string), // since collision_search_string has embedded \0

- &best_match_);

- EXPECT_EQ(-1, best_match_.source_offset());

- EXPECT_EQ(-1, best_match_.target_offset());

- EXPECT_EQ(0U, best_match_.size());

- delete[] collision_search_string;

-// If the footprint passed to FindBestMatch does not actually match

-// the search string, it should not find any matches.

-TEST_F(BlockHashTest, WrongFootprintFindsNoMatch) {

- dh_->FindBestMatch(hashed_e, // Using hashed value of "e" instead of "f"!

- &search_string[index_of_f_in_fearsome],

- search_string,

- strlen(search_string),

- &best_match_);

- EXPECT_EQ(-1, best_match_.source_offset());

- EXPECT_EQ(-1, best_match_.target_offset());

- EXPECT_EQ(0U, best_match_.size());

-// Use a dictionary containing 1M copies of the letter 'Q',

-// and target data that also contains 1M Qs. If FindBestMatch

-// is not throttled to find a maximum number of matches, this

-// will take a very long time -- several seconds at least.

-// If this test appears to hang, it is because the throttling code

-// (see BlockHash::kMaxMatchesToCheck for details) is not working.

-TEST_F(BlockHashTest, SearchStringFindsTooManyMatches) {

- const int kTestSize = 1 << 20; // 1M

- char* huge_dictionary = new char[kTestSize];

- memset(huge_dictionary, 'Q', kTestSize);

- BlockHash huge_bh(huge_dictionary, kTestSize, 0);

- EXPECT_TRUE(huge_bh.Init(/* populate_hash_table = */ true));

- char* huge_target = new char[kTestSize];

- memset(huge_target, 'Q', kTestSize);

- CycleTimer timer;

- timer.Start();

- huge_bh.FindBestMatch(hashed_all_Qs,

- huge_target + (kTestSize / 2), // middle of target

- huge_target,

- kTestSize,

- &best_match_);

- timer.Stop();

- double elapsed_time_in_us = static_cast<double>(timer.GetInUsec());

- LOG(INFO) << "Time to search for best match with 1M matches: "

- << elapsed_time_in_us << " us" << LOG_ENDL;

- // All blocks match the candidate block. FindBestMatch should have checked

- // a certain number of matches before giving up. The best match

- // should include at least half the source and target, since the candidate

- // block was in the middle of the target data.

- EXPECT_GT((kTestSize / 2), best_match_.source_offset());

- EXPECT_GT((kTestSize / 2), best_match_.target_offset());

- EXPECT_LT(static_cast<size_t>(kTestSize / 2), best_match_.size());

- EXPECT_GT(1000000, elapsed_time_in_us); // < 1 second

- delete[] huge_target;

- delete[] huge_dictionary;

-#ifdef GTEST_HAS_DEATH_TEST

-TEST_F(BlockHashDeathTest, AddTooManyBlocks) {

- for (int i = 0; i < StringLengthAsInt(sample_text_without_spaces); ++i) {

- th_->AddOneIndexHash(i * kBlockSize, hashed_e);

- }

- // Didn't expect another block to be added

- EXPECT_DEBUG_DEATH(th_->AddOneIndexHash(StringLengthAsInt(sample_text),

- hashed_e),

- "AddBlock");

-#endif // GTEST_HAS_DEATH_TEST

-} // namespace open_vcdiff