Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(117)

Unified Diff: chrome/browser/history/top_sites_cache_unittest.cc

Issue 23477033: Implementing URL prefix match for history thumbnail cache. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/browser/history/top_sites_cache_unittest.cc
diff --git a/chrome/browser/history/top_sites_cache_unittest.cc b/chrome/browser/history/top_sites_cache_unittest.cc
new file mode 100755
index 0000000000000000000000000000000000000000..e83cfb0175c59e219cd04f41f3a1107623295dca
--- /dev/null
+++ b/chrome/browser/history/top_sites_cache_unittest.cc
@@ -0,0 +1,218 @@
+// Copyright (c) 2013 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+#include "chrome/browser/history/top_sites_cache.h"
+
+#include <set>
+
+#include "base/basictypes.h"
+#include "base/logging.h"
+#include "base/scoped_ptr.h"
+#include "base/strings/string16.h"
+#include "base/strings/string_number_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+
+namespace history {
+
+namespace {
+
+class TopSitesCacheTest : public testing::Test {
+ protected:
+ // Initializes |top_sites_| and |cache_| based on |spec|, which is a list of
+ // URL strings with optional indents: indentated URLs redirect to the last
+ // non-indented URL. Titles are assigned as "Title 1", "Title 2", etc., in the
+ // order of appearance. See |kTopSitesSpecBasic| for an example.
+ void InitTopSiteCache(char** spec, int size);
+
+ MostVisitedURLList top_sites_;
+ TopSitesCache cache_;
+};
+
+void TopSitesCacheTest::InitTopSiteCache(char** spec, int size) {
+ std::set<std::string> urls_seen;
+ for (int i = 0; i < size; ++i) {
+ char* spec_item = spec[i];
+ while (*spec_item && *spec_item == ' ') // Eat indent.
+ ++spec_item;
+ ASSERT_EQ(urls_seen.find(spec_item), urls_seen.end())
+ << "Duplicate URL found: " << spec_item;
+ urls_seen.insert(spec_item);
+ if (spec_item == spec[i]) { // No indent: add new MostVisitedURL.
+ string16 title(L"Title " + base::Uint64ToString16(top_sites_.size() + 1));
+ top_sites_.push_back(MostVisitedURL(GURL(spec_item), title));
+ }
+ ASSERT_TRUE(!top_sites_.empty());
+ // Set up redirect to canonical URL. Canonical URL redirects to itself, too.
+ top_sites_.back().redirects.push_back(GURL(spec_item));
+ }
+ cache_.SetTopSites(top_sites_);
+}
+
+TEST_F(TopSitesCacheTest, CanonicalURLComparator) {
+ // Comprehensive test by comparing each pair in sorted list. O(n^2).
+ const char* sorted_list[] = {
+ "http://www.gogle.com/redirects_to_google",
+ "http://www.google.com",
+ "http://www.google.com/",
+ "http://www.google.com/?q",
+ "http://www.google.com/A",
+ "http://www.google.com/index.html",
+ "http://www.google.com/test",
+ "http://www.google.com/test?q=3",
+ "http://www.google.com/test?r=3",
+ "http://www.google.com/test/zzzzz",
+ "http://www.google.com/test-case",
+ "http://www.google.com:80/",
+ "https://www.google.com",
+ };
+ for (int i = 0; i < arraysize(sorted_list); ++i) {
+ EXPECT_FALSE(
+ CanonicalURLComparator::CompareString(sorted_list[i], sorted_list[i]))
+ << " for \"" << sorted_list[i] << "\" < \"" << sorted_list[i] << "\"";
+ // Every disjoint pair-wise comparison.
+ for (int j = i + 1; j < arraysize(sorted_list); ++j) {
+ EXPECT_TRUE(
+ CanonicalURLComparator::CompareString(sorted_list[i], sorted_list[j]))
+ << " for \"" << sorted_list[i] << "\" < \"" << sorted_list[j] << "\"";
+ EXPECT_FALSE(
+ CanonicalURLComparator::CompareString(sorted_list[j], sorted_list[i]))
+ << " for \"" << sorted_list[j] << "\" < \"" << sorted_list[i] << "\"";
+ }
+ }
+}
+
+char* kTopSitesSpecBasic[] = {
+ "http://www.google.com",
+ " http://www.gogle.com",
+ " http://www.gooogle.com",
+ "http://www.youtube.com/a/b",
+ " http://www.youtube.com/a/b?test=1",
+ "https://www.google.com",
+ " https://www.gogle.com",
+ "http://www.example.com:3141",
+};
+
+TEST_F(TopSitesCacheTest, GetCanonicalURL) {
+ InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
+ // Already is canonical: redirects.
+ EXPECT_EQ(GURL("http://www.google.com"),
+ cache_.GetCanonicalURL(GURL("http://www.google.com")));
+ // Exact match with stored URL: redirects.
+ EXPECT_EQ(GURL("http://www.google.com"),
+ cache_.GetCanonicalURL(GURL("http://www.gooogle.com")));
+ // Recognizes despite trailing "/".
+ EXPECT_EQ(GURL("http://www.google.com"),
+ cache_.GetCanonicalURL(GURL("http://www.gooogle.com/")));
+ // Exact match with URL with query: redirects.
+ EXPECT_EQ(GURL("http://www.youtube.com/a/b"),
+ cache_.GetCanonicalURL(GURL("http://www.youtube.com/a/b?test=1")));
+ // No match with URL with query: as-is.
+ EXPECT_EQ(GURL("http://www.youtube.com/a/b?test"),
+ cache_.GetCanonicalURL(GURL("http://www.youtube.com/a/b?test")));
+ // Never-seen-before URL: as-is.
+ EXPECT_EQ(GURL("http://maps.google.com/"),
+ cache_.GetCanonicalURL(GURL("http://maps.google.com/")));
+ // Changing port number, does not match: as-is.
+ EXPECT_EQ(GURL("http://www.example.com:80"),
+ cache_.GetCanonicalURL(GURL("http://www.example.com:80")));
+}
+
+TEST_F(TopSitesCacheTest, IsKnownUrl) {
+ InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
+ // Matches.
+ EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com")));
+ EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.gooogle.com")));
+ EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com/")));
+
+ // Non-matches.
+ EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com?")));
+ EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.net")));
+ EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com/stuff")));
+ EXPECT_FALSE(cache_.IsKnownURL(GURL("https://www.gooogle.com")));
+}
+
+char* kTopSitesSpecPrefix[] = {
+ "http://www.g.com",
+ " http://www.g.com/test?q=3",
+ " http://www.g.com/test/y?b",
+ "http://www.g.com/2",
+ " http://www.g.com/test/q",
+ " http://www.g.com/test/y?a",
+ "http://www.g.com/3",
+ " http://www.g.com/testing",
+ "http://www.g.com/test-hyphen",
+};
+
+TEST_F(TopSitesCacheTest, GetCanonicalURLForPrefix) {
+ InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix));
+ // Already is canonical: redirects.
+ EXPECT_EQ(GURL("http://www.g.com"),
+ cache_.GetCanonicalURLForPrefix(GURL("http://www.g.com")));
+ // Exact match with stored URL: redirects.
+ EXPECT_EQ(GURL("http://www.g.com"),
+ cache_.GetCanonicalURLForPrefix(GURL("http://www.g.com/test?q=3")));
+ // Prefix match: redirects.
+ EXPECT_EQ(GURL("http://www.g.com"),
+ cache_.GetCanonicalURLForPrefix(GURL("http://www.g.com/test")));
+ // Competing prefix match: redirects to closest.
+ EXPECT_EQ(GURL("http://www.g.com/2"),
+ cache_.GetCanonicalURLForPrefix(GURL("http://www.g.com/test/q")));
+ // Multiple prefix matches: redirects to first.
+ EXPECT_EQ(GURL("http://www.g.com/2"),
+ cache_.GetCanonicalURLForPrefix(GURL("http://www.g.com/test/y")));
+ // No prefix match: as-is
+ EXPECT_EQ(GURL("http://www.g.com/no-match"),
+ cache_.GetCanonicalURLForPrefix(GURL("http://www.g.com/no-match")));
+ // String prefix match but not URL-prefix match: as-is.
+ EXPECT_EQ(GURL("http://www.g.com/t"),
+ cache_.GetCanonicalURLForPrefix(GURL("http://www.g.com/t")));
+ // Different protocol: as-is.
+ EXPECT_EQ(GURL("https://www.g.com/test"),
+ cache_.GetCanonicalURLForPrefix(GURL("https://www.g.com/test")));
+}
+
+TEST_F(TopSitesCacheTest, UrlStringIsPrefix) {
+ struct {
+ const char* s1;
+ const char* s2;
+ } true_cases[] = {
+ {"http://www.google.com", "http://www.google.com"},
+ {"http://www.google.com/a/b", "http://www.google.com/a/b"},
+ {"http://www.google.com?test=3", "http://www.google.com/"},
+ {"http://www.google.com/", "http://www.google.com/test/with/dir/"},
+ {"http://www.google.com:360", "http://www.google.com:360/?q=1234"},
+ {"http://www.google.com:80", "http://www.google.com/gurl/is/smart"},
+ {"http://www.google.com/test", "http://www.google.com/test/with/dir/"},
+ {"http://www.google.com/test/", "http://www.google.com/test/with/dir"},
+ {"http://www.google.com/test?", "http://www.google.com/test/with/dir/"},
+ };
+ for (int i = 0; i < arraysize(true_cases); ++i) {
+ EXPECT_TRUE(TopSitesCache::UrlIsPrefix(GURL(true_cases[i].s1),
+ GURL(true_cases[i].s2)))
+ << " for true_cases[" << i << "]";
+ }
+ struct {
+ const char* s1;
+ const char* s2;
+ } false_cases[] = {
+ {"http://www.google.com/test", "http://www.google.com"},
+ {"http://www.google.com/a/b/", "http://www.google.com/a/b"}, // Arguable.
+ {"http://www.google.co", "http://www.google.com"},
+ {"http://google.com", "http://www.google.com"},
+ {"http://www.google.com", "https://www.google.com"},
+ {"http://www.google.com/path", "http://www.google.com:137/path"},
+ {"http://www.google.com/same/dir", "http://www.youtube.com/same/dir"},
+ {"http://www.google.com/te", "http://www.google.com/test"},
+ {"http://www.google.com/test", "http://www.google.com/test-bed"},
+ {"http://www.google.com/test-", "http://www.google.com/test?"},
+ };
+ for (int i = 0; i < arraysize(false_cases); ++i) {
+ EXPECT_FALSE(TopSitesCache::UrlIsPrefix(GURL(false_cases[i].s1),
+ GURL(false_cases[i].s2)))
+ << " for false_cases[" << i << "]";
+ }
+}
+
+} // namespace
+
+} // namespace history

Powered by Google App Engine
This is Rietveld 408576698