Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(140)

Side by Side Diff: chrome/browser/history/top_sites_cache.cc

Issue 23477033: Implementing URL prefix match for history thumbnail cache. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Adding url_utils.*; adding chrome://thumb2; refactoring. Created 7 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/browser/history/top_sites_cache.h" 5 #include "chrome/browser/history/top_sites_cache.h"
6 6
7 #include "base/logging.h" 7 #include "base/logging.h"
8 #include "base/memory/ref_counted_memory.h" 8 #include "base/memory/ref_counted_memory.h"
9 9
10 namespace history { 10 namespace history {
11 11
12 bool TopSitesCache::CanonicalURLComparator::operator()(
13 const CanonicalURLEntry& e1, const CanonicalURLEntry& e2) const {
14 return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(),
15 e2.first->redirects[e2.second].spec());
16 }
17
12 TopSitesCache::TopSitesCache() { 18 TopSitesCache::TopSitesCache() {
13 } 19 }
14 20
15 TopSitesCache::~TopSitesCache() { 21 TopSitesCache::~TopSitesCache() {
16 } 22 }
17 23
18 void TopSitesCache::SetTopSites(const MostVisitedURLList& top_sites) { 24 void TopSitesCache::SetTopSites(const MostVisitedURLList& top_sites) {
19 top_sites_ = top_sites; 25 top_sites_ = top_sites;
20 GenerateCanonicalURLs(); 26 GenerateCanonicalURLs();
21 } 27 }
(...skipping 26 matching lines...) Expand all
48 std::map<GURL, Images>::const_iterator found = 54 std::map<GURL, Images>::const_iterator found =
49 images_.find(GetCanonicalURL(url)); 55 images_.find(GetCanonicalURL(url));
50 if (found != images_.end()) { 56 if (found != images_.end()) {
51 *score = found->second.thumbnail_score; 57 *score = found->second.thumbnail_score;
52 return true; 58 return true;
53 } 59 }
54 return false; 60 return false;
55 } 61 }
56 62
57 const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) { 63 const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) {
58 CanonicalURLs::iterator i = TopSitesCache::GetCanonicalURLsIterator(url); 64 CanonicalURLs::iterator i = GetCanonicalURLsIterator(url);
59 return i == canonical_urls_.end() ? url : i->first.first->url; 65 return i == canonical_urls_.end() ? url : i->first.first->url;
60 } 66 }
61 67
68 const GURL& TopSitesCache::GetCanonicalURLForPrefix(const GURL& url) {
69 CanonicalURLs::iterator i = GetCanonicalURLsIteratorForPrefix(url);
70 return i == canonical_urls_.end() ? url : i->first.first->url;
71 }
72
62 bool TopSitesCache::IsKnownURL(const GURL& url) { 73 bool TopSitesCache::IsKnownURL(const GURL& url) {
63 return GetCanonicalURLsIterator(url) != canonical_urls_.end(); 74 return GetCanonicalURLsIterator(url) != canonical_urls_.end();
64 } 75 }
65 76
66 size_t TopSitesCache::GetURLIndex(const GURL& url) { 77 size_t TopSitesCache::GetURLIndex(const GURL& url) {
67 DCHECK(IsKnownURL(url)); 78 DCHECK(IsKnownURL(url));
68 return GetCanonicalURLsIterator(url)->second; 79 return GetCanonicalURLsIterator(url)->second;
69 } 80 }
70 81
71 void TopSitesCache::GenerateCanonicalURLs() { 82 void TopSitesCache::GenerateCanonicalURLs() {
72 canonical_urls_.clear(); 83 canonical_urls_.clear();
73 for (size_t i = 0; i < top_sites_.size(); i++) 84 for (size_t i = 0; i < top_sites_.size(); i++)
74 StoreRedirectChain(top_sites_[i].redirects, i); 85 StoreRedirectChain(top_sites_[i].redirects, i);
75 } 86 }
76 87
77 void TopSitesCache::StoreRedirectChain(const RedirectList& redirects, 88 void TopSitesCache::StoreRedirectChain(const RedirectList& redirects,
78 size_t destination) { 89 size_t destination) {
79 // redirects is empty if the user pinned a site and there are not enough top 90 // |redirects| is empty if the user pinned a site and there are not enough top
80 // sites before the pinned site. 91 // sites before the pinned site.
81 92
82 // Map all the redirected URLs to the destination. 93 // Map all the redirected URLs to the destination.
83 for (size_t i = 0; i < redirects.size(); i++) { 94 for (size_t i = 0; i < redirects.size(); i++) {
84 // If this redirect is already known, don't replace it with a new one. 95 // If this redirect is already known, don't replace it with a new one.
85 if (!IsKnownURL(redirects[i])) { 96 if (!IsKnownURL(redirects[i])) {
86 CanonicalURLEntry entry; 97 CanonicalURLEntry entry;
87 entry.first = &(top_sites_[destination]); 98 entry.first = &(top_sites_[destination]);
88 entry.second = i; 99 entry.second = i;
89 canonical_urls_[entry] = destination; 100 canonical_urls_[entry] = destination;
90 } 101 }
91 } 102 }
92 } 103 }
93 104
94 TopSitesCache::CanonicalURLs::iterator TopSitesCache::GetCanonicalURLsIterator( 105 TopSitesCache::CanonicalURLs::iterator TopSitesCache::GetCanonicalURLsIterator(
95 const GURL& url) { 106 const GURL& url) {
96 MostVisitedURL most_visited_url; 107 MostVisitedURL most_visited_url;
97 most_visited_url.redirects.push_back(url); 108 most_visited_url.redirects.push_back(url);
98 CanonicalURLEntry entry; 109 CanonicalURLEntry entry;
99 entry.first = &most_visited_url; 110 entry.first = &most_visited_url;
100 entry.second = 0u; 111 entry.second = 0u;
101 return canonical_urls_.find(entry); 112 return canonical_urls_.find(entry);
102 } 113 }
103 114
115 // Implements prefix search of |prefix_url| in |canonical_urls_|. This would be
sky 2013/09/12 21:22:18 This sure is a long way of saying prefix searching
huangs 2013/09/12 23:33:47 The comment explains "why" we're doing this, not j
116 // easy if |canonical_urls_| were implemented as a trie of tokens, but since
117 // isn't, we apply some string processing tricks instead. To see how this works,
118 // let us consider the simpler problem of string prefix search in a sorted list
119 // of strings, e.g.,
120 //
121 // |L| = {"aa", "h", "hh", "his", "ss", "su"}.
122 //
123 // To test whether a query string |s| is a prefix of some string in |L|, we only
124 // need to find the first element of |L| that's >= |s|. For example:
125 //
126 // "hi": by binary search we find "his", for which "hi" is a prefix.
127 // "zz": by binary search, "zz" > all strings in |L|, so it's never a prefix.
128 // "st": by binary search we find "su", for which "st" is not a prefix, so
129 // it's never a prefix of an element; since if such an element |e| exists,
130 // we would have found "st" < |e| < "su".
131 //
132 // We apply the same idea for URL prefix, but using CanonicalURLComparator
133 // instead of string compare.
134 //
135 // Caveats:
136 // - In the case of multiple matches, sorting would biase towards URLs that
137 // appear earlier under CanonicalURLComparator. So between
138 // "http://www.google.com/long/url/with/very/deep/directories"
139 // "http://www.google.com/short"
140 // the prefix "http://www.google.com" would match the first.
141 // - CanonicalURLComparator handles query strings, but UrlIsPrefix() is
142 // insensitive to them.
143 TopSitesCache::CanonicalURLs::iterator
144 TopSitesCache::GetCanonicalURLsIteratorForPrefix(const GURL& prefix_url) {
145 MostVisitedURL most_visited_url;
146 most_visited_url.redirects.push_back(prefix_url);
147 CanonicalURLEntry entry;
148 entry.first = &most_visited_url;
149 entry.second = 0u;
150
151 // Perform effective binary search to perform URL prefix search.
152 TopSitesCache::CanonicalURLs::iterator it =
153 canonical_urls_.lower_bound(entry);
154 // Perform prefix match.
155 if (it != canonical_urls_.end()) {
156 const GURL& comp_url = it->first.first->redirects[it->first.second];
157 if (!UrlIsPrefix(prefix_url, comp_url))
158 it = canonical_urls_.end();
159 }
160 return it;
161 }
162
104 } // namespace history 163 } // namespace history
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698