Chromium Code Reviews| Index: chrome/browser/history/top_sites_cache.cc |
| diff --git a/chrome/browser/history/top_sites_cache.cc b/chrome/browser/history/top_sites_cache.cc |
| index f94013f0773be2ee3408d859a9fa827ff60cff89..ec93e7493f615839722cec1d754416e2bfc826de 100644 |
| --- a/chrome/browser/history/top_sites_cache.cc |
| +++ b/chrome/browser/history/top_sites_cache.cc |
| @@ -9,6 +9,12 @@ |
| namespace history { |
| +bool TopSitesCache::CanonicalURLComparator::operator()( |
| + const CanonicalURLEntry& e1, const CanonicalURLEntry& e2) const { |
| + return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(), |
| + e2.first->redirects[e2.second].spec()); |
| +} |
| + |
| TopSitesCache::TopSitesCache() { |
| } |
| @@ -55,7 +61,12 @@ bool TopSitesCache::GetPageThumbnailScore(const GURL& url, |
| } |
| const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) { |
| - CanonicalURLs::iterator i = TopSitesCache::GetCanonicalURLsIterator(url); |
| + CanonicalURLs::iterator i = GetCanonicalURLsIterator(url); |
| + return i == canonical_urls_.end() ? url : i->first.first->url; |
| +} |
| + |
| +const GURL& TopSitesCache::GetCanonicalURLForPrefix(const GURL& url) { |
| + CanonicalURLs::iterator i = GetCanonicalURLsIteratorForPrefix(url); |
| return i == canonical_urls_.end() ? url : i->first.first->url; |
| } |
| @@ -76,7 +87,7 @@ void TopSitesCache::GenerateCanonicalURLs() { |
| void TopSitesCache::StoreRedirectChain(const RedirectList& redirects, |
| size_t destination) { |
| - // redirects is empty if the user pinned a site and there are not enough top |
| + // |redirects| is empty if the user pinned a site and there are not enough top |
| // sites before the pinned site. |
| // Map all the redirected URLs to the destination. |
| @@ -101,4 +112,52 @@ TopSitesCache::CanonicalURLs::iterator TopSitesCache::GetCanonicalURLsIterator( |
| return canonical_urls_.find(entry); |
| } |
| +// Implements prefix search of |prefix_url| in |canonical_urls_|. This would be |
|
sky
2013/09/12 21:22:18
This sure is a long way of saying prefix searching
huangs
2013/09/12 23:33:47
The comment explains "why" we're doing this, not j
|
| +// easy if |canonical_urls_| were implemented as a trie of tokens, but since |
| +// isn't, we apply some string processing tricks instead. To see how this works, |
| +// let us consider the simpler problem of string prefix search in a sorted list |
| +// of strings, e.g., |
| +// |
| +// |L| = {"aa", "h", "hh", "his", "ss", "su"}. |
| +// |
| +// To test whether a query string |s| is a prefix of some string in |L|, we only |
| +// need to find the first element of |L| that's >= |s|. For example: |
| +// |
| +// "hi": by binary search we find "his", for which "hi" is a prefix. |
| +// "zz": by binary search, "zz" > all strings in |L|, so it's never a prefix. |
| +// "st": by binary search we find "su", for which "st" is not a prefix, so |
| +// it's never a prefix of an element; since if such an element |e| exists, |
| +// we would have found "st" < |e| < "su". |
| +// |
| +// We apply the same idea for URL prefix, but using CanonicalURLComparator |
| +// instead of string compare. |
| +// |
| +// Caveats: |
| +// - In the case of multiple matches, sorting would biase towards URLs that |
| +// appear earlier under CanonicalURLComparator. So between |
| +// "http://www.google.com/long/url/with/very/deep/directories" |
| +// "http://www.google.com/short" |
| +// the prefix "http://www.google.com" would match the first. |
| +// - CanonicalURLComparator handles query strings, but UrlIsPrefix() is |
| +// insensitive to them. |
| +TopSitesCache::CanonicalURLs::iterator |
| + TopSitesCache::GetCanonicalURLsIteratorForPrefix(const GURL& prefix_url) { |
| + MostVisitedURL most_visited_url; |
| + most_visited_url.redirects.push_back(prefix_url); |
| + CanonicalURLEntry entry; |
| + entry.first = &most_visited_url; |
| + entry.second = 0u; |
| + |
| + // Perform effective binary search to perform URL prefix search. |
| + TopSitesCache::CanonicalURLs::iterator it = |
| + canonical_urls_.lower_bound(entry); |
| + // Perform prefix match. |
| + if (it != canonical_urls_.end()) { |
| + const GURL& comp_url = it->first.first->redirects[it->first.second]; |
| + if (!UrlIsPrefix(prefix_url, comp_url)) |
| + it = canonical_urls_.end(); |
| + } |
| + return it; |
| +} |
| + |
| } // namespace history |