OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/history/top_sites_cache.h" | 5 #include "chrome/browser/history/top_sites_cache.h" |
6 | 6 |
7 #include "base/logging.h" | 7 #include "base/logging.h" |
8 #include "base/memory/ref_counted_memory.h" | 8 #include "base/memory/ref_counted_memory.h" |
9 | 9 |
10 namespace history { | 10 namespace history { |
11 | 11 |
12 bool TopSitesCache::CanonicalURLComparator::operator()( | |
13 const CanonicalURLEntry& e1, const CanonicalURLEntry& e2) const { | |
14 return CanonicalURLStringCompare(e1.first->redirects[e1.second].spec(), | |
15 e2.first->redirects[e2.second].spec()); | |
16 } | |
17 | |
12 TopSitesCache::TopSitesCache() { | 18 TopSitesCache::TopSitesCache() { |
13 } | 19 } |
14 | 20 |
15 TopSitesCache::~TopSitesCache() { | 21 TopSitesCache::~TopSitesCache() { |
16 } | 22 } |
17 | 23 |
18 void TopSitesCache::SetTopSites(const MostVisitedURLList& top_sites) { | 24 void TopSitesCache::SetTopSites(const MostVisitedURLList& top_sites) { |
19 top_sites_ = top_sites; | 25 top_sites_ = top_sites; |
20 GenerateCanonicalURLs(); | 26 GenerateCanonicalURLs(); |
21 } | 27 } |
(...skipping 26 matching lines...) Expand all Loading... | |
48 std::map<GURL, Images>::const_iterator found = | 54 std::map<GURL, Images>::const_iterator found = |
49 images_.find(GetCanonicalURL(url)); | 55 images_.find(GetCanonicalURL(url)); |
50 if (found != images_.end()) { | 56 if (found != images_.end()) { |
51 *score = found->second.thumbnail_score; | 57 *score = found->second.thumbnail_score; |
52 return true; | 58 return true; |
53 } | 59 } |
54 return false; | 60 return false; |
55 } | 61 } |
56 | 62 |
57 const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) { | 63 const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) { |
58 CanonicalURLs::iterator i = TopSitesCache::GetCanonicalURLsIterator(url); | 64 CanonicalURLs::iterator i = GetCanonicalURLsIterator(url); |
59 return i == canonical_urls_.end() ? url : i->first.first->url; | 65 return i == canonical_urls_.end() ? url : i->first.first->url; |
60 } | 66 } |
61 | 67 |
68 const GURL& TopSitesCache::GetCanonicalURLForPrefix(const GURL& url) { | |
69 CanonicalURLs::iterator i = GetCanonicalURLsIteratorForPrefix(url); | |
70 return i == canonical_urls_.end() ? url : i->first.first->url; | |
71 } | |
72 | |
62 bool TopSitesCache::IsKnownURL(const GURL& url) { | 73 bool TopSitesCache::IsKnownURL(const GURL& url) { |
63 return GetCanonicalURLsIterator(url) != canonical_urls_.end(); | 74 return GetCanonicalURLsIterator(url) != canonical_urls_.end(); |
64 } | 75 } |
65 | 76 |
66 size_t TopSitesCache::GetURLIndex(const GURL& url) { | 77 size_t TopSitesCache::GetURLIndex(const GURL& url) { |
67 DCHECK(IsKnownURL(url)); | 78 DCHECK(IsKnownURL(url)); |
68 return GetCanonicalURLsIterator(url)->second; | 79 return GetCanonicalURLsIterator(url)->second; |
69 } | 80 } |
70 | 81 |
71 void TopSitesCache::GenerateCanonicalURLs() { | 82 void TopSitesCache::GenerateCanonicalURLs() { |
72 canonical_urls_.clear(); | 83 canonical_urls_.clear(); |
73 for (size_t i = 0; i < top_sites_.size(); i++) | 84 for (size_t i = 0; i < top_sites_.size(); i++) |
74 StoreRedirectChain(top_sites_[i].redirects, i); | 85 StoreRedirectChain(top_sites_[i].redirects, i); |
75 } | 86 } |
76 | 87 |
77 void TopSitesCache::StoreRedirectChain(const RedirectList& redirects, | 88 void TopSitesCache::StoreRedirectChain(const RedirectList& redirects, |
78 size_t destination) { | 89 size_t destination) { |
79 // redirects is empty if the user pinned a site and there are not enough top | 90 // |redirects| is empty if the user pinned a site and there are not enough top |
80 // sites before the pinned site. | 91 // sites before the pinned site. |
81 | 92 |
82 // Map all the redirected URLs to the destination. | 93 // Map all the redirected URLs to the destination. |
83 for (size_t i = 0; i < redirects.size(); i++) { | 94 for (size_t i = 0; i < redirects.size(); i++) { |
84 // If this redirect is already known, don't replace it with a new one. | 95 // If this redirect is already known, don't replace it with a new one. |
85 if (!IsKnownURL(redirects[i])) { | 96 if (!IsKnownURL(redirects[i])) { |
86 CanonicalURLEntry entry; | 97 CanonicalURLEntry entry; |
87 entry.first = &(top_sites_[destination]); | 98 entry.first = &(top_sites_[destination]); |
88 entry.second = i; | 99 entry.second = i; |
89 canonical_urls_[entry] = destination; | 100 canonical_urls_[entry] = destination; |
90 } | 101 } |
91 } | 102 } |
92 } | 103 } |
93 | 104 |
94 TopSitesCache::CanonicalURLs::iterator TopSitesCache::GetCanonicalURLsIterator( | 105 TopSitesCache::CanonicalURLs::iterator TopSitesCache::GetCanonicalURLsIterator( |
95 const GURL& url) { | 106 const GURL& url) { |
96 MostVisitedURL most_visited_url; | 107 MostVisitedURL most_visited_url; |
97 most_visited_url.redirects.push_back(url); | 108 most_visited_url.redirects.push_back(url); |
98 CanonicalURLEntry entry; | 109 CanonicalURLEntry entry; |
99 entry.first = &most_visited_url; | 110 entry.first = &most_visited_url; |
100 entry.second = 0u; | 111 entry.second = 0u; |
101 return canonical_urls_.find(entry); | 112 return canonical_urls_.find(entry); |
102 } | 113 } |
103 | 114 |
115 // Implements prefix search of |prefix_url| in |canonical_urls_|. This would be | |
sky
2013/09/12 21:22:18
This sure is a long way of saying prefix searching
huangs
2013/09/12 23:33:47
The comment explains "why" we're doing this, not j
| |
116 // easy if |canonical_urls_| were implemented as a trie of tokens, but since | |
117 // isn't, we apply some string processing tricks instead. To see how this works, | |
118 // let us consider the simpler problem of string prefix search in a sorted list | |
119 // of strings, e.g., | |
120 // | |
121 // |L| = {"aa", "h", "hh", "his", "ss", "su"}. | |
122 // | |
123 // To test whether a query string |s| is a prefix of some string in |L|, we only | |
124 // need to find the first element of |L| that's >= |s|. For example: | |
125 // | |
126 // "hi": by binary search we find "his", for which "hi" is a prefix. | |
127 // "zz": by binary search, "zz" > all strings in |L|, so it's never a prefix. | |
128 // "st": by binary search we find "su", for which "st" is not a prefix, so | |
129 // it's never a prefix of an element; since if such an element |e| exists, | |
130 // we would have found "st" < |e| < "su". | |
131 // | |
132 // We apply the same idea for URL prefix, but using CanonicalURLComparator | |
133 // instead of string compare. | |
134 // | |
135 // Caveats: | |
136 // - In the case of multiple matches, sorting would biase towards URLs that | |
137 // appear earlier under CanonicalURLComparator. So between | |
138 // "http://www.google.com/long/url/with/very/deep/directories" | |
139 // "http://www.google.com/short" | |
140 // the prefix "http://www.google.com" would match the first. | |
141 // - CanonicalURLComparator handles query strings, but UrlIsPrefix() is | |
142 // insensitive to them. | |
143 TopSitesCache::CanonicalURLs::iterator | |
144 TopSitesCache::GetCanonicalURLsIteratorForPrefix(const GURL& prefix_url) { | |
145 MostVisitedURL most_visited_url; | |
146 most_visited_url.redirects.push_back(prefix_url); | |
147 CanonicalURLEntry entry; | |
148 entry.first = &most_visited_url; | |
149 entry.second = 0u; | |
150 | |
151 // Perform effective binary search to perform URL prefix search. | |
152 TopSitesCache::CanonicalURLs::iterator it = | |
153 canonical_urls_.lower_bound(entry); | |
154 // Perform prefix match. | |
155 if (it != canonical_urls_.end()) { | |
156 const GURL& comp_url = it->first.first->redirects[it->first.second]; | |
157 if (!UrlIsPrefix(prefix_url, comp_url)) | |
158 it = canonical_urls_.end(); | |
159 } | |
160 return it; | |
161 } | |
162 | |
104 } // namespace history | 163 } // namespace history |
OLD | NEW |