OLD | NEW |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "courgette/third_party/bsdiff/bsdiff_search.h" | 5 #include "courgette/third_party/bsdiff/bsdiff_search.h" |
6 | 6 |
7 #include <cstring> | 7 #include <cstring> |
8 #include <vector> | 8 #include <vector> |
9 | 9 |
10 #include "base/macros.h" | 10 #include "base/macros.h" |
11 #include "courgette/third_party/bsdiff/qsufsort.h" | 11 #include "courgette/third_party/bsdiff/qsufsort.h" |
12 #include "testing/gtest/include/gtest/gtest.h" | 12 #include "testing/gtest/include/gtest/gtest.h" |
13 | 13 |
14 TEST(BSDiffSearchTest, Search) { | 14 TEST(BSDiffSearchTest, Search) { |
15 // Initialize main string and the suffix array. | 15 // Initialize main string and the suffix array. |
16 // Positions: 000000000011111111111222222222333333333344444 | 16 // Positions: 000000000011111111111222222222333333333344444 |
17 // 012345678901234567890123456789012345678901234 | 17 // 012345678901234567890123456789012345678901234 |
18 const char* str = "the quick brown fox jumps over the lazy dog."; | 18 const char* str = "the quick brown fox jumps over the lazy dog."; |
19 int size = static_cast<int>(::strlen(str)); | 19 int size = static_cast<int>(::strlen(str)); |
20 const unsigned char* buf = reinterpret_cast<const unsigned char*>(str); | 20 const unsigned char* buf = reinterpret_cast<const unsigned char*>(str); |
21 std::vector<int> I(size + 1); | 21 std::vector<int> I(size + 1); |
22 std::vector<int> V(size + 1); | 22 std::vector<int> V(size + 1); |
23 courgette::qsuf::qsufsort<int*>(&I[0], &V[0], buf, size); | 23 qsuf::qsufsort<int*>(&I[0], &V[0], buf, size); |
24 | 24 |
25 // Specific queries. | 25 // Specific queries. |
26 const struct { | 26 const struct { |
27 int exp_pos; // -1 means "don't care". | 27 int exp_match_pos; // -1 means "don't care". |
28 int exp_match_size; | 28 int exp_match_size; |
29 const char* query_str; | 29 const char* query_str; |
30 } test_cases[] = { | 30 } test_cases[] = { |
31 // Entire string: exact and unique. | 31 // Entire string: exact and unique. |
32 {0, 44, "the quick brown fox jumps over the lazy dog."}, | 32 {0, 44, "the quick brown fox jumps over the lazy dog."}, |
33 // Empty string: exact and non-unique. | 33 // Empty string: exact and non-unique. |
34 {-1, 0, ""}, | 34 {-1, 0, ""}, |
35 // Exact and unique suffix matches. | 35 // Exact and unique suffix matches. |
36 {43, 1, "."}, | 36 {43, 1, "."}, |
37 {31, 13, "the lazy dog."}, | 37 {31, 13, "the lazy dog."}, |
38 // Exact and unique non-suffix matches. | 38 // Exact and unique non-suffix matches. |
39 {4, 5, "quick"}, | 39 {4, 5, "quick"}, |
40 {0, 9, "the quick"}, // Unique prefix. | 40 {0, 9, "the quick"}, // Unique prefix. |
41 // Partial and unique matches. | 41 // Partial and unique matches. |
42 {16, 10, "fox jumps with the hosps"}, // Unique prefix. | 42 {16, 10, "fox jumps with the hosps"}, // Unique prefix. |
43 {18, 1, "xyz"}, | 43 {18, 1, "xyz"}, |
44 // Exact and non-unique match: take lexicographical first. | 44 // Exact and non-unique match: take lexicographical first. |
45 {-1, 3, "the"}, // Non-unique prefix. | 45 {-1, 3, "the"}, // Non-unique prefix. |
46 {-1, 1, " "}, | 46 {-1, 1, " "}, |
47 // Partial and non-unique match: no guarantees on |pos|! | 47 // Partial and non-unique match: no guarantees on |match.pos|! |
48 {-1, 4, "the apple"}, // query < "the l"... < "the q"... | 48 {-1, 4, "the apple"}, // query < "the l"... < "the q"... |
49 {-1, 4, "the opera"}, // "the l"... < query < "the q"... | 49 {-1, 4, "the opera"}, // "the l"... < query < "the q"... |
50 {-1, 4, "the zebra"}, // "the l"... < "the q"... < query | 50 {-1, 4, "the zebra"}, // "the l"... < "the q"... < query |
51 // Prefix match dominates suffix match (unique). | 51 // Prefix match dominates suffix match (unique). |
52 {26, 5, "over quick brown fox"}, | 52 {26, 5, "over quick brown fox"}, |
53 // Empty matchs. | 53 // Empty matchs. |
54 {-1, 0, ","}, | 54 {-1, 0, ","}, |
55 {-1, 0, "1234"}, | 55 {-1, 0, "1234"}, |
56 {-1, 0, "THE QUICK BROWN FOX"}, | 56 {-1, 0, "THE QUICK BROWN FOX"}, |
57 {-1, 0, "(the"}, | 57 {-1, 0, "(the"}, |
58 }; | 58 }; |
59 | 59 |
60 for (size_t idx = 0; idx < arraysize(test_cases); ++idx) { | 60 for (size_t idx = 0; idx < arraysize(test_cases); ++idx) { |
61 const auto& test_case = test_cases[idx]; | 61 const auto& test_case = test_cases[idx]; |
62 int query_size = static_cast<int>(::strlen(test_case.query_str)); | 62 int query_size = static_cast<int>(::strlen(test_case.query_str)); |
63 const unsigned char* query_buf = | 63 const unsigned char* query_buf = |
64 reinterpret_cast<const unsigned char*>(test_case.query_str); | 64 reinterpret_cast<const unsigned char*>(test_case.query_str); |
65 | 65 |
66 // Perform the search. | 66 // Perform the search. |
67 int pos = 0; | 67 bsdiff::SearchResult match = |
68 int match_size = | 68 bsdiff::search(&I[0], buf, size, query_buf, query_size); |
69 courgette::search(&I[0], buf, size, query_buf, query_size, &pos); | |
70 | 69 |
71 // Check basic properties and match with expected values. | 70 // Check basic properties and match with expected values. |
72 EXPECT_GE(match_size, 0); | 71 EXPECT_GE(match.size, 0); |
73 EXPECT_LE(match_size, query_size); | 72 EXPECT_LE(match.size, query_size); |
74 if (match_size > 0) { | 73 if (match.size > 0) { |
75 EXPECT_GE(pos, 0); | 74 EXPECT_GE(match.pos, 0); |
76 EXPECT_LE(pos, size - match_size); | 75 EXPECT_LE(match.pos, size - match.size); |
77 EXPECT_EQ(0, ::memcmp(buf + pos, query_buf, match_size)); | 76 EXPECT_EQ(0, ::memcmp(buf + match.pos, query_buf, match.size)); |
78 } | 77 } |
79 if (test_case.exp_pos >= 0) { | 78 if (test_case.exp_match_pos >= 0) { |
80 EXPECT_EQ(test_case.exp_pos, pos); | 79 EXPECT_EQ(test_case.exp_match_pos, match.pos); |
81 } | 80 } |
82 EXPECT_EQ(test_case.exp_match_size, match_size); | 81 EXPECT_EQ(test_case.exp_match_size, match.size); |
83 } | 82 } |
84 } | 83 } |
85 | 84 |
86 TEST(BSDiffSearchTest, SearchExact) { | 85 TEST(BSDiffSearchTest, SearchExact) { |
87 const char* test_cases[] = { | 86 const char* test_cases[] = { |
88 "a", | 87 "a", |
89 "aa", | 88 "aa", |
90 "az", | 89 "az", |
91 "za", | 90 "za", |
92 "aaaaa", | 91 "aaaaa", |
93 "CACAO", | 92 "CACAO", |
94 "banana", | 93 "banana", |
95 "tobeornottobe", | 94 "tobeornottobe", |
96 "the quick brown fox jumps over the lazy dog.", | 95 "the quick brown fox jumps over the lazy dog.", |
97 "elephantelephantelephantelephantelephant", | 96 "elephantelephantelephantelephantelephant", |
98 "011010011001011010010110011010010", | 97 "011010011001011010010110011010010", |
99 }; | 98 }; |
100 for (size_t idx = 0; idx < arraysize(test_cases); ++idx) { | 99 for (size_t idx = 0; idx < arraysize(test_cases); ++idx) { |
101 int size = static_cast<int>(::strlen(test_cases[idx])); | 100 int size = static_cast<int>(::strlen(test_cases[idx])); |
102 const unsigned char* buf = | 101 const unsigned char* buf = |
103 reinterpret_cast<const unsigned char*>(test_cases[idx]); | 102 reinterpret_cast<const unsigned char*>(test_cases[idx]); |
104 std::vector<int> I(size + 1); | 103 std::vector<int> I(size + 1); |
105 std::vector<int> V(size + 1); | 104 std::vector<int> V(size + 1); |
106 courgette::qsuf::qsufsort<int*>(&I[0], &V[0], buf, size); | 105 qsuf::qsufsort<int*>(&I[0], &V[0], buf, size); |
107 | 106 |
108 // Test exact matches for every non-empty substring. | 107 // Test exact matches for every non-empty substring. |
109 for (int lo = 0; lo < size; ++lo) { | 108 for (int lo = 0; lo < size; ++lo) { |
110 for (int hi = lo + 1; hi <= size; ++hi) { | 109 for (int hi = lo + 1; hi <= size; ++hi) { |
111 std::string query(buf + lo, buf + hi); | 110 std::string query(buf + lo, buf + hi); |
112 int query_size = static_cast<int>(query.length()); | 111 int query_size = static_cast<int>(query.length()); |
113 ASSERT_EQ(query_size, hi - lo); | 112 ASSERT_EQ(query_size, hi - lo); |
114 const unsigned char* query_buf = | 113 const unsigned char* query_buf = |
115 reinterpret_cast<const unsigned char*>(query.c_str()); | 114 reinterpret_cast<const unsigned char*>(query.c_str()); |
116 int pos = 0; | 115 bsdiff::SearchResult match = |
117 int match_size = | 116 bsdiff::search(&I[0], buf, size, query_buf, query_size); |
118 courgette::search(&I[0], buf, size, query_buf, query_size, &pos); | |
119 | 117 |
120 EXPECT_EQ(query_size, match_size); | 118 EXPECT_EQ(query_size, match.size); |
121 EXPECT_GE(pos, 0); | 119 EXPECT_GE(match.pos, 0); |
122 EXPECT_LE(pos, size - match_size); | 120 EXPECT_LE(match.pos, size - match.size); |
123 std::string suffix(buf + pos, buf + size); | 121 std::string suffix(buf + match.pos, buf + size); |
124 EXPECT_EQ(suffix.substr(0, query_size), query); | 122 EXPECT_EQ(suffix.substr(0, query_size), query); |
125 } | 123 } |
126 } | 124 } |
127 } | 125 } |
128 } | 126 } |
OLD | NEW |