OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "content/browser/renderer_host/duplicate_resource_handler.h" | |
6 | |
7 #include <set> | |
8 | |
9 #include "base/logging.h" | |
10 #include "base/memory/singleton.h" | |
11 #include "base/metrics/histogram.h" | |
12 #include "base/time.h" | |
13 #include "content/browser/renderer_host/resource_request_info_impl.h" | |
14 #include "net/base/io_buffer.h" | |
15 #include "net/url_request/url_request.h" | |
16 #include "third_party/smhasher/src/PMurHash.h" | |
17 | |
18 using base::TimeTicks; | |
19 | |
20 namespace content { | |
21 | |
22 namespace { | |
23 | |
24 class GlobalDuplicateRecords { | |
25 public: | |
26 static GlobalDuplicateRecords* GetInstance() { | |
27 return Singleton<GlobalDuplicateRecords>::get(); | |
28 } | |
29 | |
30 std::set<uint32>* content_matches() { | |
31 return &content_matches_; | |
32 } | |
33 | |
34 std::set<uint32>* content_and_url_matches() { | |
35 return &content_and_url_matches_; | |
36 } | |
37 | |
38 int64 browser_start_time() { | |
39 return browser_start_time_; | |
40 } | |
41 | |
42 private: | |
43 friend class Singleton<GlobalDuplicateRecords>; | |
44 friend struct DefaultSingletonTraits<GlobalDuplicateRecords>; | |
45 | |
46 GlobalDuplicateRecords() | |
47 : browser_start_time_(TimeTicks::Now().ToInternalValue()) { | |
48 } | |
49 ~GlobalDuplicateRecords() {} | |
50 | |
51 std::set<uint32> content_matches_; | |
52 std::set<uint32> content_and_url_matches_; | |
53 int64 browser_start_time_; | |
gavinp
2012/07/20 17:58:17
Why isn't this of type base::TimeTicks ?
| |
54 }; | |
55 | |
56 } // namespace | |
57 | |
58 DuplicateResourceHandler::DuplicateResourceHandler( | |
59 scoped_ptr<ResourceHandler> next_handler, | |
60 ResourceType::Type resource_type, | |
61 net::URLRequest* request) | |
62 : LayeredResourceHandler(next_handler.Pass()), | |
63 resource_type_(resource_type), | |
64 bytes_read_(0), | |
65 request_(request), | |
66 pmurhash_ph1_(0), | |
67 pmurhash_pcarry_(0) { | |
68 } | |
69 | |
70 DuplicateResourceHandler::~DuplicateResourceHandler() { | |
71 } | |
72 | |
73 bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf, | |
74 int* buf_size, int min_size) { | |
75 DCHECK_EQ(-1, min_size); | |
76 | |
77 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | |
78 return false; | |
79 read_buffer_ = *buf; | |
80 return true; | |
81 } | |
82 | |
83 bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read, | |
84 bool* defer) { | |
85 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | |
86 read_buffer_->data(), bytes_read); | |
87 bytes_read_ += bytes_read; | |
88 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | |
89 } | |
90 | |
91 bool DuplicateResourceHandler::OnResponseCompleted( | |
92 int request_id, | |
93 const net::URLRequestStatus& status, | |
94 const std::string& security_info) { | |
95 | |
96 if (status.status() != net::URLRequestStatus::SUCCESS) | |
97 return next_handler_->OnResponseCompleted(request_id, | |
98 status, security_info); | |
99 | |
100 uint32 contents_hash = PMurHash32_Result(pmurhash_ph1_, | |
101 pmurhash_pcarry_, bytes_read_); | |
102 | |
103 // Combine the contents_hash with the url, so we can test if future content | |
104 // identical resources have the same original url or not. | |
105 uint32 hashed_with_url; | |
106 const std::string url_spec = request_->url().spec(); | |
107 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | |
108 url_spec.data(), url_spec.length()); | |
109 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, | |
110 url_spec.length() + bytes_read_); | |
111 | |
112 DVLOG(4) << "url: " << url_spec; | |
113 DVLOG(4) << "contents hash: " << contents_hash; | |
114 DVLOG(4) << "hash with url: " << hashed_with_url; | |
115 | |
116 std::set<uint32>* content_matches = | |
117 GlobalDuplicateRecords::GetInstance()->content_matches(); | |
118 std::set<uint32>* content_and_url_matches = | |
119 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); | |
120 | |
121 const bool did_match_contents = content_matches->count(contents_hash); | |
122 const bool did_match_contents_and_url = | |
123 content_and_url_matches->count(hashed_with_url); | |
124 | |
125 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents); | |
126 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents && | |
127 did_match_contents_and_url); | |
128 if (did_match_contents && !did_match_contents_and_url) { | |
129 int64 session_length = TimeTicks::Now().ToInternalValue() - | |
130 GlobalDuplicateRecords::GetInstance()->browser_start_time(); | |
gavinp
2012/07/20 17:58:17
Why isn't this of type base::TimeDelta ?
| |
131 content_and_url_matches->insert(hashed_with_url); | |
132 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_, | |
133 1, 0x7FFFFFFF, 50); | |
134 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss", | |
135 resource_type_, ResourceType::LAST_TYPE); | |
136 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.SessionLength.HashHitUrlMiss", | |
137 session_length/(1000*1000), | |
138 1, 0x7FFFFFFF, 50); | |
gavinp
2012/07/20 17:58:17
Isn't this only useful if you have Duplicate.BySes
gavinp
2012/07/20 23:30:51
I'm sad to see this idea gone. I think it's most i
| |
139 } | |
140 content_matches->insert(contents_hash); | |
141 content_and_url_matches->insert(hashed_with_url); | |
142 | |
143 bytes_read_ = 0; | |
144 read_buffer_ = NULL; | |
145 return next_handler_->OnResponseCompleted(request_id, status, security_info); | |
146 } | |
147 | |
148 } // namespace content | |
149 | |
OLD | NEW |