Index: chrome/browser/history/top_sites_cache.cc |
diff --git a/chrome/browser/history/top_sites_cache.cc b/chrome/browser/history/top_sites_cache.cc |
index f94013f0773be2ee3408d859a9fa827ff60cff89..ec93e7493f615839722cec1d754416e2bfc826de 100644 |
--- a/chrome/browser/history/top_sites_cache.cc |
+++ b/chrome/browser/history/top_sites_cache.cc |
@@ -9,6 +9,12 @@ |
namespace history { |
+bool TopSitesCache::CanonicalURLComparator::operator()( |
+ const CanonicalURLEntry& e1, const CanonicalURLEntry& e2) const { |
+ return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(), |
+ e2.first->redirects[e2.second].spec()); |
+} |
+ |
TopSitesCache::TopSitesCache() { |
} |
@@ -55,7 +61,12 @@ bool TopSitesCache::GetPageThumbnailScore(const GURL& url, |
} |
const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) { |
- CanonicalURLs::iterator i = TopSitesCache::GetCanonicalURLsIterator(url); |
+ CanonicalURLs::iterator i = GetCanonicalURLsIterator(url); |
+ return i == canonical_urls_.end() ? url : i->first.first->url; |
+} |
+ |
+const GURL& TopSitesCache::GetCanonicalURLForPrefix(const GURL& url) { |
+ CanonicalURLs::iterator i = GetCanonicalURLsIteratorForPrefix(url); |
return i == canonical_urls_.end() ? url : i->first.first->url; |
} |
@@ -76,7 +87,7 @@ void TopSitesCache::GenerateCanonicalURLs() { |
void TopSitesCache::StoreRedirectChain(const RedirectList& redirects, |
size_t destination) { |
- // redirects is empty if the user pinned a site and there are not enough top |
+ // |redirects| is empty if the user pinned a site and there are not enough top |
// sites before the pinned site. |
// Map all the redirected URLs to the destination. |
@@ -101,4 +112,52 @@ TopSitesCache::CanonicalURLs::iterator TopSitesCache::GetCanonicalURLsIterator( |
return canonical_urls_.find(entry); |
} |
+// Implements prefix search of |prefix_url| in |canonical_urls_|. This would be |
sky
2013/09/12 21:22:18
This sure is a long way of saying prefix searching
huangs
2013/09/12 23:33:47
The comment explains "why" we're doing this, not j
|
+// easy if |canonical_urls_| were implemented as a trie of tokens, but since |
+// isn't, we apply some string processing tricks instead. To see how this works, |
+// let us consider the simpler problem of string prefix search in a sorted list |
+// of strings, e.g., |
+// |
+// |L| = {"aa", "h", "hh", "his", "ss", "su"}. |
+// |
+// To test whether a query string |s| is a prefix of some string in |L|, we only |
+// need to find the first element of |L| that's >= |s|. For example: |
+// |
+// "hi": by binary search we find "his", for which "hi" is a prefix. |
+// "zz": by binary search, "zz" > all strings in |L|, so it's never a prefix. |
+// "st": by binary search we find "su", for which "st" is not a prefix, so |
+// it's never a prefix of an element; since if such an element |e| exists, |
+// we would have found "st" < |e| < "su". |
+// |
+// We apply the same idea for URL prefix, but using CanonicalURLComparator |
+// instead of string compare. |
+// |
+// Caveats: |
+// - In the case of multiple matches, sorting would biase towards URLs that |
+// appear earlier under CanonicalURLComparator. So between |
+// "http://www.google.com/long/url/with/very/deep/directories" |
+// "http://www.google.com/short" |
+// the prefix "http://www.google.com" would match the first. |
+// - CanonicalURLComparator handles query strings, but UrlIsPrefix() is |
+// insensitive to them. |
+TopSitesCache::CanonicalURLs::iterator |
+ TopSitesCache::GetCanonicalURLsIteratorForPrefix(const GURL& prefix_url) { |
+ MostVisitedURL most_visited_url; |
+ most_visited_url.redirects.push_back(prefix_url); |
+ CanonicalURLEntry entry; |
+ entry.first = &most_visited_url; |
+ entry.second = 0u; |
+ |
+ // Perform effective binary search to perform URL prefix search. |
+ TopSitesCache::CanonicalURLs::iterator it = |
+ canonical_urls_.lower_bound(entry); |
+ // Perform prefix match. |
+ if (it != canonical_urls_.end()) { |
+ const GURL& comp_url = it->first.first->redirects[it->first.second]; |
+ if (!UrlIsPrefix(prefix_url, comp_url)) |
+ it = canonical_urls_.end(); |
+ } |
+ return it; |
+} |
+ |
} // namespace history |