Chromium Code Reviews| OLD | NEW |
|---|---|
| (Empty) | |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "content/browser/renderer_host/duplicate_resource_handler.h" | |
| 6 | |
| 7 #include <set> | |
| 8 | |
| 9 #include "base/logging.h" | |
| 10 #include "base/memory/singleton.h" | |
| 11 #include "base/metrics/histogram.h" | |
| 12 #include "content/browser/renderer_host/resource_request_info_impl.h" | |
| 13 #include "net/base/io_buffer.h" | |
| 14 #include "net/url_request/url_request.h" | |
| 15 #include "third_party/smhasher/src/PMurHash.h" | |
| 16 | |
| 17 | |
| 18 namespace content { | |
| 19 | |
| 20 namespace { | |
| 21 | |
| 22 class GlobalDuplicateRecords { | |
| 23 public: | |
| 24 static GlobalDuplicateRecords* GetInstance() { | |
| 25 return Singleton<GlobalDuplicateRecords>::get(); | |
| 26 } | |
| 27 | |
| 28 std::set<uint32>* content_matches() { | |
| 29 return &content_matches_; | |
| 30 } | |
| 31 | |
| 32 std::set<uint32>* content_and_url_matches() { | |
| 33 return &content_and_url_matches_; | |
| 34 } | |
| 35 | |
| 36 int* total_bytes_seen() { | |
| 37 return &total_bytes_seen_; | |
| 38 } | |
| 39 | |
| 40 private: | |
| 41 friend class Singleton<GlobalDuplicateRecords>; | |
| 42 friend struct DefaultSingletonTraits<GlobalDuplicateRecords>; | |
| 43 | |
| 44 GlobalDuplicateRecords() {} | |
| 45 ~GlobalDuplicateRecords() {} | |
| 46 | |
| 47 std::set<uint32> content_matches_; | |
| 48 std::set<uint32> content_and_url_matches_; | |
| 49 int total_bytes_seen_; | |
|
gavinp
2012/07/20 11:38:47
I think you can't use an int here, it's very likel
frankwang
2012/07/20 17:51:38
I took this out and went with session length.
| |
| 50 }; | |
| 51 | |
| 52 } // namespace | |
| 53 | |
| 54 DuplicateResourceHandler::DuplicateResourceHandler( | |
| 55 scoped_ptr<ResourceHandler> next_handler, | |
| 56 ResourceType::Type resource_type, | |
| 57 net::URLRequest* request) | |
| 58 : LayeredResourceHandler(next_handler.Pass()), | |
| 59 resource_type_(resource_type), | |
| 60 bytes_read_(0), | |
| 61 request_(request), | |
| 62 pmurhash_ph1_(0), | |
| 63 pmurhash_pcarry_(0) { | |
| 64 } | |
| 65 | |
| 66 DuplicateResourceHandler::~DuplicateResourceHandler() { | |
| 67 } | |
| 68 | |
| 69 bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf, | |
| 70 int* buf_size, int min_size) { | |
| 71 DCHECK_EQ(-1, min_size); | |
| 72 | |
| 73 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | |
| 74 return false; | |
| 75 read_buffer_ = *buf; | |
| 76 return true; | |
| 77 } | |
| 78 | |
| 79 bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read, | |
| 80 bool* defer) { | |
| 81 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | |
| 82 read_buffer_->data(), bytes_read); | |
| 83 bytes_read_ += bytes_read; | |
| 84 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | |
| 85 } | |
| 86 | |
| 87 bool DuplicateResourceHandler::OnResponseCompleted( | |
| 88 int request_id, | |
| 89 const net::URLRequestStatus& status, | |
| 90 const std::string& security_info) { | |
| 91 | |
| 92 if (status.status() != net::URLRequestStatus::SUCCESS) | |
| 93 return next_handler_->OnResponseCompleted(request_id, | |
| 94 status, security_info); | |
| 95 | |
| 96 int* total_bytes_seen = | |
| 97 GlobalDuplicateRecords::GetInstance()->total_bytes_seen(); | |
| 98 *total_bytes_seen += bytes_read_; | |
| 99 uint32 contents_hash = PMurHash32_Result(pmurhash_ph1_, | |
| 100 pmurhash_pcarry_, bytes_read_); | |
| 101 | |
| 102 // Combine the contents_hash with the url, so we can test if future content | |
| 103 // identical resources have the same original url or not. | |
| 104 uint32 hashed_with_url; | |
| 105 const std::string url_spec = request_->url().spec(); | |
| 106 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, | |
| 107 url_spec.data(), url_spec.length()); | |
| 108 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, | |
| 109 url_spec.length() + bytes_read_); | |
| 110 | |
| 111 DVLOG(4) << "url: " << url_spec; | |
| 112 DVLOG(4) << "contents hash: " << contents_hash; | |
| 113 DVLOG(4) << "hash with url: " << hashed_with_url; | |
| 114 | |
| 115 std::set<uint32>* content_matches = | |
| 116 GlobalDuplicateRecords::GetInstance()->content_matches(); | |
| 117 std::set<uint32>* content_and_url_matches = | |
| 118 GlobalDuplicateRecords::GetInstance()->content_and_url_matches(); | |
| 119 | |
| 120 const bool did_match_contents = content_matches->count(contents_hash); | |
| 121 const bool did_match_contents_and_url = | |
| 122 content_and_url_matches->count(hashed_with_url); | |
| 123 | |
| 124 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents); | |
| 125 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents && | |
| 126 did_match_contents_and_url); | |
| 127 if (did_match_contents && !did_match_contents_and_url) { | |
| 128 content_and_url_matches->insert(hashed_with_url); | |
| 129 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_, | |
| 130 1, 0x7FFFFFFF, 50); | |
| 131 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss", | |
| 132 resource_type_, ResourceType::LAST_TYPE); | |
| 133 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.TotalBytesSeen", *total_bytes_seen, | |
| 134 1, 0x7FFFFFFF, 50); | |
| 135 } else { | |
|
gavinp
2012/07/20 11:38:47
I think you don't need this else (insert does noth
frankwang
2012/07/20 17:51:38
Done.
| |
| 136 content_matches->insert(contents_hash); | |
| 137 content_and_url_matches->insert(hashed_with_url); | |
| 138 } | |
| 139 | |
| 140 bytes_read_ = 0; | |
| 141 read_buffer_ = NULL; | |
| 142 return next_handler_->OnResponseCompleted(request_id, status, security_info); | |
| 143 } | |
| 144 | |
| 145 } // namespace content | |
| 146 | |
| OLD | NEW |