Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(267)

Side by Side Diff: content/browser/renderer_host/duplicate_resource_handler.cc

Issue 10701151: DuplicateContentResourceHandler to monitor resources and track how many times th… (Closed) Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/renderer_host/duplicate_resource_handler.h"
6
7 #include <set>
8
9 #include "base/logging.h"
10 #include "base/memory/singleton.h"
11 #include "base/metrics/histogram.h"
12 #include "base/time.h"
13 #include "content/browser/renderer_host/resource_request_info_impl.h"
14 #include "net/base/io_buffer.h"
15 #include "net/url_request/url_request.h"
16 #include "third_party/smhasher/src/PMurHash.h"
17
18 using base::TimeTicks;
19
20 namespace content {
21
22 namespace {
23
24 class GlobalDuplicateRecords {
25 public:
26 static GlobalDuplicateRecords* GetInstance() {
27 return Singleton<GlobalDuplicateRecords>::get();
28 }
29
30 std::set<uint32>* content_matches() {
31 return &content_matches_;
32 }
33
34 std::set<uint32>* content_and_url_matches() {
35 return &content_and_url_matches_;
36 }
37
38 int64 browser_start_time() {
39 return browser_start_time_;
40 }
41
42 private:
43 friend class Singleton<GlobalDuplicateRecords>;
44 friend struct DefaultSingletonTraits<GlobalDuplicateRecords>;
45
46 GlobalDuplicateRecords()
47 : browser_start_time_(TimeTicks::Now().ToInternalValue()) {
48 }
49 ~GlobalDuplicateRecords() {}
50
51 std::set<uint32> content_matches_;
52 std::set<uint32> content_and_url_matches_;
53 int64 browser_start_time_;
gavinp 2012/07/20 17:58:17 Why isn't this of type base::TimeTicks ?
54 };
55
56 } // namespace
57
58 DuplicateResourceHandler::DuplicateResourceHandler(
59 scoped_ptr<ResourceHandler> next_handler,
60 ResourceType::Type resource_type,
61 net::URLRequest* request)
62 : LayeredResourceHandler(next_handler.Pass()),
63 resource_type_(resource_type),
64 bytes_read_(0),
65 request_(request),
66 pmurhash_ph1_(0),
67 pmurhash_pcarry_(0) {
68 }
69
70 DuplicateResourceHandler::~DuplicateResourceHandler() {
71 }
72
73 bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf,
74 int* buf_size, int min_size) {
75 DCHECK_EQ(-1, min_size);
76
77 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size))
78 return false;
79 read_buffer_ = *buf;
80 return true;
81 }
82
83 bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read,
84 bool* defer) {
85 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
86 read_buffer_->data(), bytes_read);
87 bytes_read_ += bytes_read;
88 return next_handler_->OnReadCompleted(request_id, bytes_read, defer);
89 }
90
91 bool DuplicateResourceHandler::OnResponseCompleted(
92 int request_id,
93 const net::URLRequestStatus& status,
94 const std::string& security_info) {
95
96 if (status.status() != net::URLRequestStatus::SUCCESS)
97 return next_handler_->OnResponseCompleted(request_id,
98 status, security_info);
99
100 uint32 contents_hash = PMurHash32_Result(pmurhash_ph1_,
101 pmurhash_pcarry_, bytes_read_);
102
103 // Combine the contents_hash with the url, so we can test if future content
104 // identical resources have the same original url or not.
105 uint32 hashed_with_url;
106 const std::string url_spec = request_->url().spec();
107 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
108 url_spec.data(), url_spec.length());
109 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_,
110 url_spec.length() + bytes_read_);
111
112 DVLOG(4) << "url: " << url_spec;
113 DVLOG(4) << "contents hash: " << contents_hash;
114 DVLOG(4) << "hash with url: " << hashed_with_url;
115
116 std::set<uint32>* content_matches =
117 GlobalDuplicateRecords::GetInstance()->content_matches();
118 std::set<uint32>* content_and_url_matches =
119 GlobalDuplicateRecords::GetInstance()->content_and_url_matches();
120
121 const bool did_match_contents = content_matches->count(contents_hash);
122 const bool did_match_contents_and_url =
123 content_and_url_matches->count(hashed_with_url);
124
125 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents);
126 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents &&
127 did_match_contents_and_url);
128 if (did_match_contents && !did_match_contents_and_url) {
129 int64 session_length = TimeTicks::Now().ToInternalValue() -
130 GlobalDuplicateRecords::GetInstance()->browser_start_time();
gavinp 2012/07/20 17:58:17 Why isn't this of type base::TimeDelta ?
131 content_and_url_matches->insert(hashed_with_url);
132 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_,
133 1, 0x7FFFFFFF, 50);
134 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss",
135 resource_type_, ResourceType::LAST_TYPE);
136 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.SessionLength.HashHitUrlMiss",
137 session_length/(1000*1000),
138 1, 0x7FFFFFFF, 50);
gavinp 2012/07/20 17:58:17 Isn't this only useful if you have Duplicate.BySes
gavinp 2012/07/20 23:30:51 I'm sad to see this idea gone. I think it's most i
139 }
140 content_matches->insert(contents_hash);
141 content_and_url_matches->insert(hashed_with_url);
142
143 bytes_read_ = 0;
144 read_buffer_ = NULL;
145 return next_handler_->OnResponseCompleted(request_id, status, security_info);
146 }
147
148 } // namespace content
149
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698