OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" | 5 #include "content/browser/renderer_host/duplicate_content_resource_handler.h" |
6 | 6 |
7 #include <set> | 7 #include <set> |
8 | 8 |
9 #include "base/lazy_instance.h" | 9 #include "base/lazy_instance.h" |
10 #include "base/logging.h" | 10 #include "base/logging.h" |
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
65 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | 65 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) |
66 return false; | 66 return false; |
67 read_buffer_ = *buf; | 67 read_buffer_ = *buf; |
68 return true; | 68 return true; |
69 } | 69 } |
70 | 70 |
71 bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, | 71 bool DuplicateContentResourceHandler::OnReadCompleted(int request_id, |
72 int bytes_read, | 72 int bytes_read, |
73 bool* defer) { | 73 bool* defer) { |
74 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | 74 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
75 read_buffer_->data(), bytes_read); | 75 read_buffer_->data(), bytes_read); |
gavinp
2012/08/12 16:17:14
The hash is calculated incrementally here. If the
frankwang
2012/08/13 04:32:17
I missed this too and agree with Gavin. I also tak
| |
76 bytes_read_ += bytes_read; | 76 bytes_read_ += bytes_read; |
77 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | 77 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); |
78 } | 78 } |
79 | 79 |
80 bool DuplicateContentResourceHandler::OnResponseCompleted( | 80 bool DuplicateContentResourceHandler::OnResponseCompleted( |
81 int request_id, | 81 int request_id, |
82 const net::URLRequestStatus& status, | 82 const net::URLRequestStatus& status, |
83 const std::string& security_info) { | 83 const std::string& security_info) { |
84 | 84 |
85 if (status.is_success()) | 85 if (status.is_success()) |
86 RecordContentMetrics(); | 86 RecordContentMetrics(); |
87 | 87 |
88 return next_handler_->OnResponseCompleted(request_id, status, security_info); | 88 return next_handler_->OnResponseCompleted(request_id, status, security_info); |
89 } | 89 } |
90 | 90 |
91 void DuplicateContentResourceHandler::RecordContentMetrics() { | 91 void DuplicateContentResourceHandler::RecordContentMetrics() { |
92 // Ignore everything that's not http/https. Specifically, exclude data and | |
93 // blob URLs which can be generated by content and cause the maintained sets | |
94 // to grow without bounds. | |
95 if (!request_->url().SchemeIs("http") && !request_->url().SchemeIs("https")) { | |
gavinp
2012/08/09 20:20:45
I'm taking my LGTM back. This code calculates the
scottmg
2012/08/09 20:24:56
Sorry, you lost me? There's a return here, so if t
gavinp
2012/08/12 16:17:14
See my comment on line 74, above.
On 2012/08/09 2
| |
96 bytes_read_ = 0; | |
97 read_buffer_ = NULL; | |
98 return; | |
99 } | |
92 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, | 100 MH_UINT32 contents_hash = PMurHash32_Result(pmurhash_ph1_, |
93 pmurhash_pcarry_, bytes_read_); | 101 pmurhash_pcarry_, bytes_read_); |
94 | |
95 // Combine the contents_hash with the url, so we can test if future content | 102 // Combine the contents_hash with the url, so we can test if future content |
96 // identical resources have the same original url or not. | 103 // identical resources have the same original url or not. |
97 MH_UINT32 hashed_with_url; | |
98 const std::string& url_spec = request_->url().spec(); | 104 const std::string& url_spec = request_->url().spec(); |
99 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | 105 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, |
100 url_spec.data(), url_spec.length()); | 106 url_spec.data(), url_spec.length()); |
101 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, | 107 MH_UINT32 hashed_with_url = PMurHash32_Result( |
gavinp
2012/08/09 18:32:00
nice cleanup.
| |
102 url_spec.length() + bytes_read_); | 108 pmurhash_ph1_, pmurhash_pcarry_, url_spec.length() + bytes_read_); |
103 | 109 |
104 DVLOG(4) << "url: " << url_spec; | 110 DVLOG(4) << "url: " << url_spec; |
105 DVLOG(4) << "contents hash: " << contents_hash; | 111 DVLOG(4) << "contents hash: " << contents_hash; |
106 DVLOG(4) << "hash with url: " << hashed_with_url; | 112 DVLOG(4) << "hash with url: " << hashed_with_url; |
107 | 113 |
108 std::set<MH_UINT32>* content_matches = | 114 std::set<MH_UINT32>* content_matches = |
109 GlobalDuplicateRecords::GetInstance()->content_matches(); | 115 GlobalDuplicateRecords::GetInstance()->content_matches(); |
110 std::set<MH_UINT32>* content_and_url_matches = | 116 std::set<MH_UINT32>* content_and_url_matches = |
111 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); | 117 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); |
112 | 118 |
(...skipping 12 matching lines...) Expand all Loading... | |
125 resource_type_, ResourceType::LAST_TYPE); | 131 resource_type_, ResourceType::LAST_TYPE); |
126 } | 132 } |
127 content_matches->insert(contents_hash); | 133 content_matches->insert(contents_hash); |
128 content_and_url_matches->insert(hashed_with_url); | 134 content_and_url_matches->insert(hashed_with_url); |
129 | 135 |
130 bytes_read_ = 0; | 136 bytes_read_ = 0; |
131 read_buffer_ = NULL; | 137 read_buffer_ = NULL; |
132 } | 138 } |
133 | 139 |
134 } // namespace content | 140 } // namespace content |
OLD | NEW |