Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" | 5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" |
| 6 | 6 |
| 7 #include <set> | 7 #include <set> |
| 8 | 8 |
| 9 #include "base/lazy_instance.h" | 9 #include "base/lazy_instance.h" |
| 10 #include "base/logging.h" | 10 #include "base/logging.h" |
| (...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 65 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | 65 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) |
| 66 return false; | 66 return false; |
| 67 read_buffer_ = *buf; | 67 read_buffer_ = *buf; |
| 68 return true; | 68 return true; |
| 69 } | 69 } |
| 70 | 70 |
| 71 bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, | 71 bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, |
| 72 int bytes_read, | 72 int bytes_read, |
| 73 bool* defer) { | 73 bool* defer) { |
| 74 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | 74 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
| 75 read_buffer_->data(), bytes_read); | 75 read_buffer_->data(), bytes_read); |
|
gavinp
2012/08/12 16:17:14
The hash is calculated incrementally here. If the
frankwang
2012/08/13 04:32:17
I missed this too and agree with Gavin. I also tak
| |
| 76 bytes_read_ += bytes_read; | 76 bytes_read_ += bytes_read; |
| 77 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | 77 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); |
| 78 } | 78 } |
| 79 | 79 |
| 80 bool DuplicateContentResourceHandler::OnResponseCompleted( | 80 bool DuplicateContentResourceHandler::OnResponseCompleted( |
| 81 int request_id, | 81 int request_id, |
| 82 const net::URLRequestStatus& status, | 82 const net::URLRequestStatus& status, |
| 83 const std::string& security_info) { | 83 const std::string& security_info) { |
| 84 | 84 |
| 85 if (status.is_success()) | 85 if (status.is_success()) |
| 86 RecordContentMetrics(); | 86 RecordContentMetrics(); |
| 87 | 87 |
| 88 return next_handler_->OnResponseCompleted(request_id, status, security_info); | 88 return next_handler_->OnResponseCompleted(request_id, status, security_info); |
| 89 } | 89 } |
| 90 | 90 |
| 91 void DuplicateContentResourceHandler::RecordContentMetrics() { | 91 void DuplicateContentResourceHandler::RecordContentMetrics() { |
| 92 // Ignore everything that's not http/https. Specifically, exclude data and | |
| 93 // blob URLs which can be generated by content and cause the maintained sets | |
| 94 // to grow without bounds. | |
| 95 if (!request_->url().SchemeIs("http") && !request_->url().SchemeIs("https")) { | |
|
gavinp
2012/08/09 20:20:45
I'm taking my LGTM back. This code calculates the
scottmg
2012/08/09 20:24:56
Sorry, you lost me? There's a return here, so if t
gavinp
2012/08/12 16:17:14
See my comment on line 74, above.
On 2012/08/09 2
| |
| 96 bytes_read_ = 0; | |
| 97 read_buffer_ = NULL; | |
| 98 return; | |
| 99 } | |
| 92 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, | 100 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, |
| 93 pmurhash_pcarry_, bytes_read_); | 101 pmurhash_pcarry_, bytes_read_); |
| 94 | |
| 95 // Combine the contents_hash with the url, so we can test if future content | 102 // Combine the contents_hash with the url, so we can test if future content |
| 96 // identical resources have the same original url or not. | 103 // identical resources have the same original url or not. |
| 97 MH_UINT32 hashed_with_url; | |
| 98 const std::string& url_spec = request_->url().spec(); | 104 const std::string& url_spec = request_->url().spec(); |
| 99 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | 105 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
| 100 url_spec.data(), url_spec.length()); | 106 url_spec.data(), url_spec.length()); |
| 101 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, | 107 MH_UINT32 hashed_with_url = PMurHash32_Result( |
|
gavinp
2012/08/09 18:32:00
nice cleanup.
| |
| 102 url_spec.length() + bytes_read_); | 108 pmurhash_ph1_, pmurhash_pcarry_, url_spec.length() + bytes_read_); |
| 103 | 109 |
| 104 DVLOG(4) << "url: " << url_spec; | 110 DVLOG(4) << "url: " << url_spec; |
| 105 DVLOG(4) << "contents hash: " << contents_hash; | 111 DVLOG(4) << "contents hash: " << contents_hash; |
| 106 DVLOG(4) << "hash with url: " << hashed_with_url; | 112 DVLOG(4) << "hash with url: " << hashed_with_url; |
| 107 | 113 |
| 108 std::set<MH_UINT32>* content_matches = | 114 std::set<MH_UINT32>* content_matches = |
| 109 GlobalDuplicateRecords::GetInstance()->content_matches(); | 115 GlobalDuplicateRecords::GetInstance()->content_matches(); |
| 110 std::set<MH_UINT32>* content_and_url_matches = | 116 std::set<MH_UINT32>* content_and_url_matches = |
| 111 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); | 117 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); |
| 112 | 118 |
| (...skipping 12 matching lines...) Expand all Loading... | |
| 125 resource_type_, ResourceType::LAST_TYPE); | 131 resource_type_, ResourceType::LAST_TYPE); |
| 126 } | 132 } |
| 127 content_matches->insert(contents_hash); | 133 content_matches->insert(contents_hash); |
| 128 content_and_url_matches->insert(hashed_with_url); | 134 content_and_url_matches->insert(hashed_with_url); |
| 129 | 135 |
| 130 bytes_read_ = 0; | 136 bytes_read_ = 0; |
| 131 read_buffer_ = NULL; | 137 read_buffer_ = NULL; |
| 132 } | 138 } |
| 133 | 139 |
| 134 } // namespace content | 140 } // namespace content |
| OLD | NEW |