| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" | 5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" |
| 6 | 6 |
| 7 #include <set> | 7 #include <set> |
| 8 | 8 |
| 9 #include "base/lazy_instance.h" | 9 #include "base/lazy_instance.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| (...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 44 DuplicateContentResourceHandler::DuplicateContentResourceHandler( | 44 DuplicateContentResourceHandler::DuplicateContentResourceHandler( |
| 45 scoped_ptr<ResourceHandler> next_handler, | 45 scoped_ptr<ResourceHandler> next_handler, |
| 46 ResourceType::Type resource_type, | 46 ResourceType::Type resource_type, |
| 47 net::URLRequest* request) | 47 net::URLRequest* request) |
| 48 : LayeredResourceHandler(next_handler.Pass()), | 48 : LayeredResourceHandler(next_handler.Pass()), |
| 49 resource_type_(resource_type), | 49 resource_type_(resource_type), |
| 50 bytes_read_(0), | 50 bytes_read_(0), |
| 51 request_(request), | 51 request_(request), |
| 52 pmurhash_ph1_(0), | 52 pmurhash_ph1_(0), |
| 53 pmurhash_pcarry_(0) { | 53 pmurhash_pcarry_(0) { |
| 54 // Ignore everything that's not http/https. Specifically, exclude data and |
| 55 // blob URLs which can be generated by content and cause the maintained sets |
| 56 // to grow without bounds. |
| 57 const GURL& url = request_->url(); |
| 58 track_request_ = url.SchemeIs("http") || url.SchemeIs("https"); |
| 54 } | 59 } |
| 55 | 60 |
| 56 DuplicateContentResourceHandler::~DuplicateContentResourceHandler() { | 61 DuplicateContentResourceHandler::~DuplicateContentResourceHandler() { |
| 57 } | 62 } |
| 58 | 63 |
| 59 bool DuplicateContentResourceHandler::OnWillRead(int request_id, | 64 bool DuplicateContentResourceHandler::OnWillRead(int request_id, |
| 60 net::IOBuffer** buf, | 65 net::IOBuffer** buf, |
| 61 int* buf_size, | 66 int* buf_size, |
| 62 int min_size) { | 67 int min_size) { |
| 63 DCHECK_EQ(-1, min_size); | 68 DCHECK_EQ(-1, min_size); |
| 64 | 69 |
| 65 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | 70 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) |
| 66 return false; | 71 return false; |
| 67 read_buffer_ = *buf; | 72 read_buffer_ = *buf; |
| 68 return true; | 73 return true; |
| 69 } | 74 } |
| 70 | 75 |
| 71 bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, | 76 bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, |
| 72 int bytes_read, | 77 int bytes_read, |
| 73 bool* defer) { | 78 bool* defer) { |
| 74 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | 79 if (track_request_) { |
| 75 read_buffer_->data(), bytes_read); | 80 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
| 76 bytes_read_ += bytes_read; | 81 read_buffer_->data(), bytes_read); |
| 82 bytes_read_ += bytes_read; |
| 83 } |
| 77 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | 84 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); |
| 78 } | 85 } |
| 79 | 86 |
| 80 bool DuplicateContentResourceHandler::OnResponseCompleted( | 87 bool DuplicateContentResourceHandler::OnResponseCompleted( |
| 81 int request_id, | 88 int request_id, |
| 82 const net::URLRequestStatus& status, | 89 const net::URLRequestStatus& status, |
| 83 const std::string& security_info) { | 90 const std::string& security_info) { |
| 84 | 91 if (track_request_) { |
| 85 if (status.is_success()) | 92 if (status.is_success()) |
| 86 RecordContentMetrics(); | 93 RecordContentMetrics(); |
| 94 } |
| 87 | 95 |
| 88 return next_handler_->OnResponseCompleted(request_id, status, security_info); | 96 return next_handler_->OnResponseCompleted(request_id, status, security_info); |
| 89 } | 97 } |
| 90 | 98 |
| 91 void DuplicateContentResourceHandler::RecordContentMetrics() { | 99 void DuplicateContentResourceHandler::RecordContentMetrics() { |
| 92 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, | 100 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, |
| 93 pmurhash_pcarry_, bytes_read_); | 101 pmurhash_pcarry_, bytes_read_); |
| 94 | 102 |
| 95 bool is_http_or_https = request_->url().SchemeIs("http") || | 103 bool is_http_or_https = request_->url().SchemeIs("http") || |
| 96 request_->url().SchemeIs("https"); | 104 request_->url().SchemeIs("https"); |
| 97 UMA_HISTOGRAM_BOOLEAN("Duplicate.IsHttpOrHttps", is_http_or_https); | 105 UMA_HISTOGRAM_BOOLEAN("Duplicate.IsHttpOrHttps", is_http_or_https); |
| 98 | 106 |
| 99 // Combine the contents_hash with the url, so we can test if future content | 107 // Combine the contents_hash with the url, so we can test if future content |
| 100 // identical resources have the same original url or not. | 108 // identical resources have the same original url or not. |
| 101 MH_UINT32 hashed_with_url; | |
| 102 const std::string& url_spec = request_->url().spec(); | 109 const std::string& url_spec = request_->url().spec(); |
| 103 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | 110 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
| 104 url_spec.data(), url_spec.length()); | 111 url_spec.data(), url_spec.length()); |
| 105 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, | 112 MH_UINT32 hashed_with_url = PMurHash32_Result( |
| 106 url_spec.length() + bytes_read_); | 113 pmurhash_ph1_, pmurhash_pcarry_, url_spec.length() + bytes_read_); |
| 107 | 114 |
| 108 DVLOG(4) << "url: " << url_spec; | 115 DVLOG(4) << "url: " << url_spec; |
| 109 DVLOG(4) << "contents hash: " << contents_hash; | 116 DVLOG(4) << "contents hash: " << contents_hash; |
| 110 DVLOG(4) << "hash with url: " << hashed_with_url; | 117 DVLOG(4) << "hash with url: " << hashed_with_url; |
| 111 | 118 |
| 112 std::set<MH_UINT32>* content_matches = | 119 std::set<MH_UINT32>* content_matches = |
| 113 GlobalDuplicateRecords::GetInstance()->content_matches(); | 120 GlobalDuplicateRecords::GetInstance()->content_matches(); |
| 114 std::set<MH_UINT32>* content_and_url_matches = | 121 std::set<MH_UINT32>* content_and_url_matches = |
| 115 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); | 122 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); |
| 116 | 123 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 131 resource_type_, ResourceType::LAST_TYPE); | 138 resource_type_, ResourceType::LAST_TYPE); |
| 132 } | 139 } |
| 133 content_matches->insert(contents_hash); | 140 content_matches->insert(contents_hash); |
| 134 content_and_url_matches->insert(hashed_with_url); | 141 content_and_url_matches->insert(hashed_with_url); |
| 135 | 142 |
| 136 bytes_read_ = 0; | 143 bytes_read_ = 0; |
| 137 read_buffer_ = NULL; | 144 read_buffer_ = NULL; |
| 138 } | 145 } |
| 139 | 146 |
| 140 } // namespace content | 147 } // namespace content |
| OLD | NEW |