Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(276)

Side by Side Diff: chrome/browser/history/top_sites_cache_unittest.cc

Issue 23477033: Implementing URL prefix match for history thumbnail cache. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Added high-level comments; fixed tests to compare strings; added more test cases. Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 #include "chrome/browser/history/top_sites_cache.h"
5
6 #include <set>
7
8 #include "base/basictypes.h"
9 #include "base/logging.h"
10 #include "base/scoped_ptr.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_number_conversions.h"
13 #include "testing/gtest/include/gtest/gtest.h"
14
15 namespace history {
16
17 namespace {
18
19 class TopSitesCacheTest : public testing::Test {
20 protected:
21 // Initializes |top_sites_| and |cache_| based on |spec|, which is a list of
22 // URL strings with optional indents: indentated URLs redirect to the last
23 // non-indented URL. Titles are assigned as "Title 1", "Title 2", etc., in the
24 // order of appearance. See |kTopSitesSpecBasic| for an example.
25 void InitTopSiteCache(char** spec, int size);
26
27 MostVisitedURLList top_sites_;
28 TopSitesCache cache_;
29 };
30
31 void TopSitesCacheTest::InitTopSiteCache(char** spec, int size) {
32 std::set<std::string> urls_seen;
33 for (int i = 0; i < size; ++i) {
34 char* spec_item = spec[i];
35 while (*spec_item && *spec_item == ' ') // Eat indent.
36 ++spec_item;
37 ASSERT_EQ(urls_seen.find(spec_item), urls_seen.end())
38 << "Duplicate URL found: " << spec_item;
39 urls_seen.insert(spec_item);
40 if (spec_item == spec[i]) { // No indent: add new MostVisitedURL.
41 string16 title(L"Title " + base::Uint64ToString16(top_sites_.size() + 1));
42 top_sites_.push_back(MostVisitedURL(GURL(spec_item), title));
43 }
44 ASSERT_TRUE(!top_sites_.empty());
45 // Set up redirect to canonical URL. Canonical URL redirects to itself, too.
46 top_sites_.back().redirects.push_back(GURL(spec_item));
47 }
48 cache_.SetTopSites(top_sites_);
49 }
50
51 TEST_F(TopSitesCacheTest, CanonicalURLComparator) {
52 // Comprehensive test by comparing each pair in sorted list. O(n^2).
53 const char* sorted_list[] = {
54 "http://www.gogle.com/redirects_to_google",
55 "http://www.google.com",
56 "http://www.google.com/",
57 "http://www.google.com/?q",
58 "http://www.google.com/A",
59 "http://www.google.com/index.html",
60 "http://www.google.com/test",
61 "http://www.google.com/test?q=3",
62 "http://www.google.com/test?r=3",
63 "http://www.google.com/test/zzzzz",
64 "http://www.google.com/test#hash",
65 "http://www.google.com/test$dollar",
66 "http://www.google.com/test%E9%9B%80",
67 "http://www.google.com/test-case",
68 "http://www.google.com:80/",
69 "https://www.google.com",
70 };
71 for (int i = 0; i < arraysize(sorted_list); ++i) {
72 EXPECT_FALSE(TopSitesCache::CanonicalURLComparator::CompareString(
73 sorted_list[i], sorted_list[i]))
74 << " for \"" << sorted_list[i] << "\" < \"" << sorted_list[i] << "\"";
75 // Every disjoint pair-wise comparison.
76 for (int j = i + 1; j < arraysize(sorted_list); ++j) {
77 EXPECT_TRUE(TopSitesCache::CanonicalURLComparator::CompareString(
78 sorted_list[i], sorted_list[j]))
79 << " for \"" << sorted_list[i] << "\" < \"" << sorted_list[j] << "\"";
80 EXPECT_FALSE(TopSitesCache::CanonicalURLComparator::CompareString(
81 sorted_list[j], sorted_list[i]))
82 << " for \"" << sorted_list[j] << "\" < \"" << sorted_list[i] << "\"";
83 }
84 }
85 }
86
87 char* kTopSitesSpecBasic[] = {
88 "http://www.google.com",
89 " http://www.gogle.com", // Redirects.
90 " http://www.gooogle.com", // Redirects.
91 "http://www.youtube.com/a/b",
92 " http://www.youtube.com/a/b?test=1", // Redirects.
93 "https://www.google.com/",
94 " https://www.gogle.com", // Redirects.
95 "http://www.example.com:3141/",
96 };
97
98 TEST_F(TopSitesCacheTest, GetCanonicalURL) {
99 InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
100 // Already is canonical: redirects.
101 EXPECT_EQ("http://www.google.com/",
102 cache_.GetCanonicalURL(GURL("http://www.google.com")).spec());
103 // Exact match with stored URL: redirects.
104 EXPECT_EQ("http://www.google.com/",
105 cache_.GetCanonicalURL(GURL("http://www.gooogle.com")).spec());
106 // Recognizes despite trailing "/": redirects
107 EXPECT_EQ("http://www.google.com/",
108 cache_.GetCanonicalURL(GURL("http://www.gooogle.com/")).spec());
109 // Exact match with URL with query: redirects.
110 EXPECT_EQ("http://www.youtube.com/a/b",
111 cache_.GetCanonicalURL(GURL("http://www.youtube.com/a/b?test=1")).spec());
112 // No match with URL with query: as-is.
113 EXPECT_EQ("http://www.youtube.com/a/b?test",
114 cache_.GetCanonicalURL(GURL("http://www.youtube.com/a/b?test")).spec());
115 // Never-seen-before URL: as-is.
116 EXPECT_EQ("http://maps.google.com/",
117 cache_.GetCanonicalURL(GURL("http://maps.google.com/")).spec());
118 // Changing port number, does not match: as-is.
119 EXPECT_EQ("http://www.example.com:1234/",
120 cache_.GetCanonicalURL(GURL("http://www.example.com:1234")).spec());
121 // Smart enough to know that port 80 is HTTP: redirects.
122 EXPECT_EQ("http://www.google.com/",
123 cache_.GetCanonicalURL(GURL("http://www.gooogle.com:80")).spec());
124 // Prefix should not work: as-is.
125 EXPECT_EQ("http://www.youtube.com/a",
126 cache_.GetCanonicalURL(GURL("http://www.youtube.com/a")).spec());
127 }
128
129 TEST_F(TopSitesCacheTest, IsKnownUrl) {
130 InitTopSiteCache(kTopSitesSpecBasic, arraysize(kTopSitesSpecBasic));
131 // Matches.
132 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com")));
133 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.gooogle.com")));
134 EXPECT_TRUE(cache_.IsKnownURL(GURL("http://www.google.com/")));
135
136 // Non-matches.
137 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com?")));
138 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.net")));
139 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.google.com/stuff")));
140 EXPECT_FALSE(cache_.IsKnownURL(GURL("https://www.gooogle.com")));
141 EXPECT_FALSE(cache_.IsKnownURL(GURL("http://www.youtube.com/a")));
142 }
143
144 char* kTopSitesSpecPrefix[] = {
145 "http://www.g.ca/",
146 " http://www.g.ca/test?q=3", // Redirects.
147 " http://www.g.ca/test/y?b", // Redirects.
148 "http://www.g.ca/2",
149 " http://www.g.ca/test/q", // Redirects.
150 " http://www.g.ca/test/y?a", // Redirects.
151 "http://www.g.ca/3",
152 " http://www.g.ca/testing", // Redirects.
153 "http://www.g.ca/test-hyphen",
154 "http://www.g.ca/sh",
155 " http://www.g.ca/sh/1/2", // Redirects.
156 "http://www.g.ca/sh/1",
157 };
158
159 TEST_F(TopSitesCacheTest, GetCanonicalURLForPrefix) {
160 InitTopSiteCache(kTopSitesSpecPrefix, arraysize(kTopSitesSpecPrefix));
161 // Already is canonical: redirects.
162 EXPECT_EQ("http://www.g.ca/",
163 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca")).spec());
beaudoin 2013/09/05 01:41:19 Not sure this is legal indentation, I think you ha
huangs 2013/09/05 15:24:46 Made this (and the GetCanonicalURL test) data-driv
164 // Exact match with stored URL: redirects.
165 EXPECT_EQ("http://www.g.ca/",
166 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca/test?q=3")).spec());
167 // Prefix match: redirects.
168 EXPECT_EQ("http://www.g.ca/",
169 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca/test")).spec());
170 // Competing prefix match: redirects to closest.
171 EXPECT_EQ("http://www.g.ca/2",
172 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca/test/q")).spec());
173 // Multiple prefix matches: redirects to first.
174 EXPECT_EQ("http://www.g.ca/2",
175 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca/test/y")).spec());
176 // No prefix match: as-is.
177 EXPECT_EQ("http://www.g.ca/no-match",
178 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca/no-match")).spec());
179 // String prefix match but not URL-prefix match: as-is.
180 EXPECT_EQ("http://www.g.ca/t",
181 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca/t")).spec());
182 // Different protocol: as-is.
183 EXPECT_EQ("https://www.g.ca/test",
184 cache_.GetCanonicalURLForPrefix(GURL("https://www.g.ca/test")).spec());
185 // Smart enough to know that port 80 is HTTP: redirects.
186 EXPECT_EQ("http://www.g.ca/",
187 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca:80/test")).spec());
188 // Exact match, unaffected by "http://www.g.ca/sh/1": redirects.
189 EXPECT_EQ("http://www.g.ca/sh",
190 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca/sh/1/2")).spec());
191 // Suffix match only: as-is
192 EXPECT_EQ("http://www.g.ca/sh/1/2/3",
193 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca/sh/1/2/3")).spec());
194 // Exact match, unaffected by "http://www.g.ca/sh": redirects.
195 EXPECT_EQ("http://www.g.ca/sh/1",
196 cache_.GetCanonicalURLForPrefix(GURL("http://www.g.ca/sh/1")).spec());
197 }
198
199 TEST_F(TopSitesCacheTest, UrlStringIsPrefix) {
200 struct {
201 const char* s1;
202 const char* s2;
203 } true_cases[] = {
204 {"http://www.google.com", "http://www.google.com"},
205 {"http://www.google.com/a/b", "http://www.google.com/a/b"},
206 {"http://www.google.com?test=3", "http://www.google.com/"},
207 {"http://www.google.com/#hash", "http://www.google.com/?q"},
208 {"http://www.google.com/", "http://www.google.com/test/with/dir/"},
209 {"http://www.google.com:360", "http://www.google.com:360/?q=1234"},
210 {"http://www.google.com:80", "http://www.google.com/gurl/is/smart"},
211 {"http://www.google.com/test", "http://www.google.com/test/with/dir/"},
212 {"http://www.google.com/test/", "http://www.google.com/test/with/dir"},
213 {"http://www.google.com/test?", "http://www.google.com/test/with/dir/"},
214 };
215 for (int i = 0; i < arraysize(true_cases); ++i) {
216 EXPECT_TRUE(TopSitesCache::UrlIsPrefix(GURL(true_cases[i].s1),
217 GURL(true_cases[i].s2)))
218 << " for true_cases[" << i << "]";
219 }
220 struct {
221 const char* s1;
222 const char* s2;
223 } false_cases[] = {
224 {"http://www.google.com/test", "http://www.google.com"},
225 {"http://www.google.com/a/b/", "http://www.google.com/a/b"}, // Arguable.
226 {"http://www.google.co", "http://www.google.com"},
227 {"http://google.com", "http://www.google.com"},
228 {"http://www.google.com", "https://www.google.com"},
229 {"http://www.google.com/path", "http://www.google.com:137/path"},
230 {"http://www.google.com/same/dir", "http://www.youtube.com/same/dir"},
231 {"http://www.google.com/te", "http://www.google.com/test"},
232 {"http://www.google.com/test", "http://www.google.com/test-bed"},
233 {"http://www.google.com/test-", "http://www.google.com/test?"},
234 };
235 for (int i = 0; i < arraysize(false_cases); ++i) {
236 EXPECT_FALSE(TopSitesCache::UrlIsPrefix(GURL(false_cases[i].s1),
237 GURL(false_cases[i].s2)))
238 << " for false_cases[" << i << "]";
239 }
240 }
241
242 } // namespace
243
244 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698