chrome/browser/history/top_sites_cache.cc - Issue 23477033: Implementing URL prefix match for history thumbnail cache.

Side by Side Diff: chrome/browser/history/top_sites_cache.cc

Issue 23477033: Implementing URL prefix match for history thumbnail cache. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Comment fixes. Created 7 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/browser/history/top_sites_cache.h ('K') | « chrome/browser/history/top_sites_cache.h ('k') | chrome/browser/history/top_sites_cache_unittest.cc » ('j') | chrome/browser/history/top_sites_cache_unittest.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/browser/history/top_sites_cache.h"	5 #include "chrome/browser/history/top_sites_cache.h"

6	6

	7 #include <algorithm>

	8

7 #include "base/logging.h"	9 #include "base/logging.h"

8 #include "base/memory/ref_counted_memory.h"	10 #include "base/memory/ref_counted_memory.h"

9	11

10 namespace history {	12 namespace history {

11	13

	14 bool CanonicalURLComparator::operator()(const CanonicalURLEntry& e1,

	15 const CanonicalURLEntry& e2) const {

	16 return CanonicalURLComparator::CompareString(

	17 e1.first->redirects[e1.second].spec(),

	18 e2.first->redirects[e2.second].spec());

	19 }

	20

	21 // static

	22 bool CanonicalURLComparator::CompareString(const std::string& s1,

	23 const std::string& s2) {

	24 const std::string::value_type* ch1 = s1.c_str();

	25 const std::string::value_type* ch2 = s2.c_str();

	26 // Find first difference.

	27 while (ch1 && ch2 && ch1 == ch2) {

	28 ++ch1;

	29 ++ch2;

	30 }

	31 if (!ch1 \|\| !ch2) // Identical strings, or one is strict prefix of other.

	32 return *ch2 != '\0'; // true iff \|e1\| is strict prefix of \|e2\|.

	33

	34 // Now \|ch1\| != \|ch2\|. Compare using order '?' < '/' < other.

	35 // Table : \|ch2\| = '?' : \|ch2\| = "/" : \|*ch2\| = other

	36 // \|*ch1\| = '?' : (excluded) : true : true

	37 // \|*ch1\| = '/' : false : (excluded) : true

	38 // \|ch1\| = 'other' : false : false : \|ch1\| < \|*ch2\|

	39 return (ch1 == '?') \|\| (ch1 == '/' && *ch2 != '?') \|\|

	40 (ch2 != '?' && ch2 != '/' && ch1 < ch2);

	41 }

	42

	43

12 TopSitesCache::TopSitesCache() {	44 TopSitesCache::TopSitesCache() {

13 }	45 }

14	46

15 TopSitesCache::~TopSitesCache() {	47 TopSitesCache::~TopSitesCache() {

16 }	48 }

17	49

18 void TopSitesCache::SetTopSites(const MostVisitedURLList& top_sites) {	50 void TopSitesCache::SetTopSites(const MostVisitedURLList& top_sites) {

19 top_sites_ = top_sites;	51 top_sites_ = top_sites;

20 GenerateCanonicalURLs();	52 GenerateCanonicalURLs();

21 }	53 }

(...skipping 14 matching lines...) Expand all Loading...
36 if (found != images_.end()) {	68 if (found != images_.end()) {

37 base::RefCountedMemory* data = found->second.thumbnail.get();	69 base::RefCountedMemory* data = found->second.thumbnail.get();

38 if (data) {	70 if (data) {

39 *bytes = data;	71 *bytes = data;

40 return true;	72 return true;

41 }	73 }

42 }	74 }

43 return false;	75 return false;

44 }	76 }

45	77

	78 bool TopSitesCache::GetPageThumbnailForPrefix(

	79 const GURL& prefix_url,

	80 scoped_refptr<base::RefCountedMemory>* bytes) {

	81 std::map<GURL, Images>::const_iterator found =

	82 images_.find(GetCanonicalURLForPrefix(prefix_url));

	83 if (found != images_.end()) {

	84 base::RefCountedMemory* data = found->second.thumbnail.get();

	85 if (data) {

	86 *bytes = data;

	87 return true;

	88 }

	89 }

	90 return false;

	91 }

	92

46 bool TopSitesCache::GetPageThumbnailScore(const GURL& url,	93 bool TopSitesCache::GetPageThumbnailScore(const GURL& url,

47 ThumbnailScore* score) {	94 ThumbnailScore* score) {

48 std::map<GURL, Images>::const_iterator found =	95 std::map<GURL, Images>::const_iterator found =

49 images_.find(GetCanonicalURL(url));	96 images_.find(GetCanonicalURL(url));

50 if (found != images_.end()) {	97 if (found != images_.end()) {

51 *score = found->second.thumbnail_score;	98 *score = found->second.thumbnail_score;

52 return true;	99 return true;

53 }	100 }

54 return false;	101 return false;

55 }	102 }

56	103

57 const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) {	104 const GURL& TopSitesCache::GetCanonicalURL(const GURL& url) {

58 CanonicalURLs::iterator i = TopSitesCache::GetCanonicalURLsIterator(url);	105 CanonicalURLs::iterator i = GetCanonicalURLsIterator(url, false);

	106 return i == canonical_urls_.end() ? url : i->first.first->url;

	107 }

	108

	109 const GURL& TopSitesCache::GetCanonicalURLForPrefix(const GURL& url) {

	110 CanonicalURLs::iterator i = GetCanonicalURLsIterator(url, true);

59 return i == canonical_urls_.end() ? url : i->first.first->url;	111 return i == canonical_urls_.end() ? url : i->first.first->url;

60 }	112 }

61	113

62 bool TopSitesCache::IsKnownURL(const GURL& url) {	114 bool TopSitesCache::IsKnownURL(const GURL& url) {

63 return GetCanonicalURLsIterator(url) != canonical_urls_.end();	115 return GetCanonicalURLsIterator(url, false) != canonical_urls_.end();

64 }	116 }

65	117

66 size_t TopSitesCache::GetURLIndex(const GURL& url) {	118 size_t TopSitesCache::GetURLIndex(const GURL& url) {

67 DCHECK(IsKnownURL(url));	119 DCHECK(IsKnownURL(url));

68 return GetCanonicalURLsIterator(url)->second;	120 return GetCanonicalURLsIterator(url, false)->second;

	121 }

	122

	123 // static

	124 bool TopSitesCache::UrlIsPrefix(const GURL& url1, const GURL& url2) {

	125 if (url1.scheme() != url2.scheme() \|\| url1.host() != url2.host() \|\|

	126 url1.port() != url2.port()) {

	127 return false;

	128 }

	129 // Only need to compare path now. Note that queries are ignored.

	130 std::string p1(url1.path());

	131 std::string p2(url2.path());

	132 if (p1.length() > p2.length())

	133 return false;

	134 std::pair<std::string::iterator, std::string::iterator> first_diff =

	135 std::mismatch(p1.begin(), p1.end(), p2.begin());

	136 // Necessary condition: \|p1\| is a string prefix of \|p2\|.

	137 if (first_diff.first != p1.end())

	138 return false; // E.g.: (\|p1\| = "/test", \|p2\| = "/exam") => false.

	139

	140 // \|p1\| is string prefix.

	141 if (first_diff.second == p2.end()) // Is exact match?

	142 return true; // E.g.: ("/test", "/test") => true.

	143 // \|p1\| is strict string prefix, check full match of last path component.

	144 if (!p1.empty() && *p1.rbegin() == '/') // Ends in '/'?

	145 return true; // E.g.: ("/test/", "/test/stuff") => true.

	146

	147 // Finally, \|p1\| does not end in "/": check first extra character in \|p2\|.

	148 // E.g.: ("/test", "/test/stuff") => true; ("/test", "/testing") => false.

	149 return *(first_diff.second) == '/';

69 }	150 }

70	151

71 void TopSitesCache::GenerateCanonicalURLs() {	152 void TopSitesCache::GenerateCanonicalURLs() {

72 canonical_urls_.clear();	153 canonical_urls_.clear();

73 for (size_t i = 0; i < top_sites_.size(); i++)	154 for (size_t i = 0; i < top_sites_.size(); i++)

74 StoreRedirectChain(top_sites_[i].redirects, i);	155 StoreRedirectChain(top_sites_[i].redirects, i);

75 }	156 }

76	157

77 void TopSitesCache::StoreRedirectChain(const RedirectList& redirects,	158 void TopSitesCache::StoreRedirectChain(const RedirectList& redirects,

78 size_t destination) {	159 size_t destination) {

79 // redirects is empty if the user pinned a site and there are not enough top	160 // \|redirects\| is empty if the user pinned a site and there are not enough top

80 // sites before the pinned site.	161 // sites before the pinned site.

81	162

82 // Map all the redirected URLs to the destination.	163 // Map all the redirected URLs to the destination.

83 for (size_t i = 0; i < redirects.size(); i++) {	164 for (size_t i = 0; i < redirects.size(); i++) {

84 // If this redirect is already known, don't replace it with a new one.	165 // If this redirect is already known, don't replace it with a new one.

85 if (!IsKnownURL(redirects[i])) {	166 if (!IsKnownURL(redirects[i])) {

86 CanonicalURLEntry entry;	167 CanonicalURLEntry entry;

87 entry.first = &(top_sites_[destination]);	168 entry.first = &(top_sites_[destination]);

88 entry.second = i;	169 entry.second = i;

89 canonical_urls_[entry] = destination;	170 canonical_urls_[entry] = destination;

90 }	171 }

91 }	172 }

92 }	173 }

93	174

94 TopSitesCache::CanonicalURLs::iterator TopSitesCache::GetCanonicalURLsIterator(	175 TopSitesCache::CanonicalURLs::iterator TopSitesCache::GetCanonicalURLsIterator(

95 const GURL& url) {	176 const GURL& url, bool match_prefix) {

96 MostVisitedURL most_visited_url;	177 MostVisitedURL most_visited_url;

97 most_visited_url.redirects.push_back(url);	178 most_visited_url.redirects.push_back(url);

98 CanonicalURLEntry entry;	179 CanonicalURLEntry entry;

99 entry.first = &most_visited_url;	180 entry.first = &most_visited_url;

100 entry.second = 0u;	181 entry.second = 0u;

101 return canonical_urls_.find(entry);	182 if (!match_prefix)

	183 return canonical_urls_.find(entry);

	184

	185 TopSitesCache::CanonicalURLs::iterator best_it =

	186 canonical_urls_.lower_bound(entry);
	beaudoin 2013/09/04 16:46:10 Using lower_bound here is really quite clever, but Using lower_bound here is really quite clever, but IIUC it relies of the fact that '/' is smaller than any other character you can find in a path segment. Your custom comparator ensures that, but you should probably spend some time detailing that algorithm here and maybe give an example of a sorted list or URLs and that the first URL in this list that is >= \|url\| is necessarily of prefix of \|url\| if such a prefix exists. huangs 2013/09/04 23:27:39 Wrote big tl;dr in top_sites_cache.h to explain th Wrote big tl;dr in top_sites_cache.h to explain the key ideas of the algorithms.
	187 if (best_it == canonical_urls_.end())

	188 return best_it;

	189

	190 const GURL& comp_url = best_it->first.first->redirects[best_it->first.second];

	191 return UrlIsPrefix(url, comp_url) ? best_it : canonical_urls_.end();

102 }	192 }

103	193

104 } // namespace history	194 } // namespace history

OLD	NEW