Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(167)

Unified Diff: courgette/third_party/qsufsort_unittest.cc

Issue 1272453003: [Courgette] Extract qsufsort module and add tests. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Using int, but with explicit cast from ::strlen(). Created 5 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « courgette/third_party/qsufsort.h ('k') | tools/checklicenses/checklicenses.py » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: courgette/third_party/qsufsort_unittest.cc
diff --git a/courgette/third_party/qsufsort_unittest.cc b/courgette/third_party/qsufsort_unittest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9eb6720f7aaf17ba8accf5f86d5a169f83ea80a1
--- /dev/null
+++ b/courgette/third_party/qsufsort_unittest.cc
@@ -0,0 +1,135 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "courgette/third_party/qsufsort.h"
+
+#include <algorithm>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "base/macros.h"
+#include "base/memory/scoped_ptr.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(QSufSortTest, Sort) {
+ const char* test_cases[] = {
+ "",
+ "a",
+ "za",
+ "CACAO",
+ "banana",
+ "tobeornottobe",
+ "The quick brown fox jumps over the lazy dog.",
+ "elephantelephantelephantelephantelephant",
+ "-------------------------",
+ "011010011001011010010110011010010",
+ "3141592653589793238462643383279502884197169399375105",
+ "\xFF\xFE\xFF\xFE\xFD\x80\x30\x31\x32\x80\x30\xFF\x01\xAB\xCD",
+ };
+
+ for (size_t idx = 0; idx < arraysize(test_cases); ++idx) {
+ int len = static_cast<int>(::strlen(test_cases[idx]));
+ const unsigned char* s =
+ reinterpret_cast<const unsigned char*>(test_cases[idx]);
+
+ // Generate the suffix array as I.
+ std::vector<int> I(len + 1);
+ std::vector<int> V(len + 1);
+ courgette::qsuf::qsufsort<int*>(&I[0], &V[0], s, len);
+
+ // Expect that I[] is a permutation of [0, len].
+ std::vector<int> I_sorted(I);
+ std::sort(I_sorted.begin(), I_sorted.end());
+ for (int i = 0; i < len + 1; ++i) {
+ EXPECT_EQ(i, I_sorted[i]) << "test_case[" << idx << "]";
+ }
+
+ // First string must be empty string.
+ EXPECT_EQ(len, I[0]) << "test_case[" << idx << "]";
+
+ // Expect that the |len + 1| suffixes are strictly ordered.
+ const unsigned char* end = s + len;
+ for (int i = 0; i < len; ++i) {
+ const unsigned char* suf1 = s + I[i];
+ const unsigned char* suf2 = s + I[i + 1];
+ bool is_less = std::lexicographical_compare(suf1, end, suf2, end);
+ EXPECT_TRUE(is_less) << "test_case[" << idx << "]";
+ }
+ }
+}
+
+TEST(QSufSortTest, Search) {
+ // Initialize main string and the suffix array.
+ // Positions: 00000000001111111111122222222233333333334444
+ // 01234567890123456789012345678901234567890123
+ const char* old_str = "the quick brown fox jumps over the lazy dog.";
+ int old_size = static_cast<int>(::strlen(old_str));
+ const unsigned char* old_buf =
+ reinterpret_cast<const unsigned char*>(old_str);
+ std::vector<int> I(old_size + 1);
+ std::vector<int> V(old_size + 1);
+ courgette::qsuf::qsufsort<int*>(&I[0], &V[0], old_buf, old_size);
+
+ // Test queries.
+ const struct {
+ int exp_pos; // -1 means "don't care".
+ int exp_match_len;
+ const char* query_str;
+ } test_cases[] = {
+ // Entire string.
+ {0, 44, "the quick brown fox jumps over the lazy dog."},
+ // Empty string.
+ {-1, 0, ""}, // Current algorithm does not enforce |pos| == 0.
+ // Exact and unique suffix match.
+ {43, 1, "."},
+ {31, 13, "the lazy dog."},
+ // Exact and unique non-suffix match.
+ {4, 5, "quick"},
+ {0, 9, "the quick"}, // Unique prefix.
+ // Entire word match with mutiple results: take lexicographical first.
+ {31, 3, "the"}, // Non-unique prefix: "the l"... < "the q"...
+ {9, 1, " "}, // " brown"... wins.
+ // Partial and unique match of query prefix.
+ {16, 10, "fox jumps with the hosps"},
+ // Partial and multiple match of query prefix: no guarantees on |pos|.
+ // Take lexicographical first for matching portion *only*, so same results:
+ {-1, 4, "the apple"}, // query < "the l"... < "the q"...
+ {-1, 4, "the opera"}, // "the l"... < query < "the q"...
+ {-1, 4, "the zebra"}, // "the l"... < "the q"... < query
+ // Prefix match dominates suffix match.
+ {26, 5, "over quick brown fox"},
+ // No match.
+ {-1, 0, ","},
+ {-1, 0, "1234"},
+ {-1, 0, "THE QUICK BROWN FOX"},
+ {-1, 0, "(the"},
+ };
+
+ for (size_t idx = 0; idx < arraysize(test_cases); ++idx) {
+ const auto& test_case = test_cases[idx];
+ int new_size = static_cast<int>(::strlen(test_case.query_str));
+ const unsigned char* new_buf =
+ reinterpret_cast<const unsigned char*>(test_case.query_str);
+
+ // Perform the search.
+ int pos = 0;
+ int match_len = courgette::qsuf::search(
+ &I[0], old_buf, old_size, new_buf, new_size, 0, old_size, &pos);
+
+ // Check basic properties and match with expected values.
+ EXPECT_GE(match_len, 0) << "test_case[" << idx << "]";
+ EXPECT_LE(match_len, new_size) << "test_case[" << idx << "]";
+ if (match_len > 0) {
+ EXPECT_GE(pos, 0) << "test_case[" << idx << "]";
+ EXPECT_LE(pos, old_size - match_len) << "test_case[" << idx << "]";
+ EXPECT_EQ(0, ::memcmp(old_buf + pos, new_buf, match_len))
+ << "test_case[" << idx << "]";
+ }
+ if (test_case.exp_pos >= 0) {
+ EXPECT_EQ(test_case.exp_pos, pos) << "test_case[" << idx << "]";
+ }
+ EXPECT_EQ(test_case.exp_match_len, match_len) << "test_case[" << idx << "]";
+ }
+}
« no previous file with comments | « courgette/third_party/qsufsort.h ('k') | tools/checklicenses/checklicenses.py » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698