 Chromium Code Reviews
 Chromium Code Reviews Issue 10701151:
   DuplicateContentResourceHandler to monitor resources and track how many times th…  (Closed) 
  Base URL: http://src.chromium.org/svn/trunk/src/
    
  
    Issue 10701151:
   DuplicateContentResourceHandler to monitor resources and track how many times th…  (Closed) 
  Base URL: http://src.chromium.org/svn/trunk/src/| Index: content/browser/renderer_host/duplicate_resource_handler.cc | 
| =================================================================== | 
| --- content/browser/renderer_host/duplicate_resource_handler.cc (revision 0) | 
| +++ content/browser/renderer_host/duplicate_resource_handler.cc (revision 0) | 
| @@ -0,0 +1,122 @@ | 
| +// Copyright (c) 2012 The Chromium Authors. All rights reserved. | 
| +// Use of this source code is governed by a BSD-style license that can be | 
| +// found in the LICENSE file. | 
| + | 
| +#include "content/browser/renderer_host/duplicate_resource_handler.h" | 
| + | 
| +#include <cmath> | 
| +#include <cstring> | 
| +#include <set> | 
| + | 
| +#include "base/logging.h" | 
| +#include "base/metrics/histogram.h" | 
| +#include "content/browser/renderer_host/resource_request_info_impl.h" | 
| +#include "net/base/io_buffer.h" | 
| +#include "net/url_request/url_request.h" | 
| +#include "third_party/smhasher/src/MurmurHash3.h" | 
| + | 
| + | 
| +namespace content { | 
| + | 
| +namespace{ | 
| + | 
| +// This set keeps track of a hash of resources | 
| +// that we have seen | 
| +std::set<uint32>* GetSetOfHashes() { | 
| + static std::set<uint32> seen_resources; | 
| 
gavinp
2012/07/14 19:05:06
This won't build in clang, due to our static destr
 | 
| + return &seen_resources; | 
| +} | 
| + | 
| +// This set keeps track of hash of resources based on origin | 
| +// that we have seen previously | 
| +std::set<uint32>* GetSetOfHashesWithURL(){ | 
| + static std::set<uint32> seen_resources_with_url; | 
| + return &seen_resources_with_url; | 
| +} | 
| + | 
| +} // namespace | 
| + | 
| +DuplicateResourceHandler::DuplicateResourceHandler( | 
| + scoped_ptr<ResourceHandler> next_handler, | 
| + ResourceType::Type resource_type, | 
| + net::URLRequest* request) | 
| + : LayeredResourceHandler(next_handler.Pass()), | 
| + resource_type_(resource_type), | 
| + start_seed_(0), | 
| + bytes_read_(0), | 
| + read_buffer_(new net::IOBuffer(kReadBufSize)), | 
| + request_(request) { | 
| +} | 
| + | 
| +DuplicateResourceHandler::~DuplicateResourceHandler() { | 
| +} | 
| + | 
| +bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf, | 
| + int* buf_size, int min_size) { | 
| + DCHECK_EQ(-1, min_size); | 
| + | 
| + *buf = read_buffer_.get(); | 
| + *buf_size = kReadBufSize; | 
| + return next_handler_->OnWillRead(request_id, buf, buf_size, min_size); | 
| 
gavinp
2012/07/14 19:05:06
This doesn't work; you're setting *buf and *buf_si
 
frankwang
2012/07/16 01:44:05
Done.
 | 
| + | 
| +} | 
| + | 
| +bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read, | 
| + bool* defer) { | 
| 
gavinp
2012/07/14 19:05:06
Once you've fixed the above, you'll want to read t
 
frankwang
2012/07/16 01:44:05
Done.
 | 
| + if (!next_handler_->OnReadCompleted(request_id, bytes_read, defer)) | 
| + return false; | 
| + | 
| + // Find hash of buffer, using previous hash as the seed (first seed is 0) | 
| 
gavinp
2012/07/14 19:05:06
This does not work. MurmurHash3 is not incremental
 | 
| + MurmurHash3_x86_32(read_buffer_->data(), bytes_read, start_seed_, &start_seed_); | 
| + | 
| + bytes_read_ += bytes_read; | 
| + return true; | 
| +} | 
| + | 
| +bool DuplicateResourceHandler::OnResponseCompleted( | 
| + int request_id, | 
| + const net::URLRequestStatus& status, | 
| + const std::string& security_info) { | 
| + | 
| + // Hash url into the resource to see whether it is from the same or different origin | 
| + uint32 hashed_with_url; | 
| + const char* url = request_->url().spec().c_str(); | 
| + MurmurHash3_x86_32(url, strlen(url), start_seed_, &hashed_with_url); | 
| + | 
| + // This boolean answers whether we found resource regardless of origin | 
| + const bool did_we_find_resource = GetSetOfHashes()->find(start_seed_) != GetSetOfHashes()->end(); | 
| + // This boolean checks whether we found a resource from the same origin as one previously seen | 
| + const bool did_we_find_resource_same_origin = | 
| 
gavinp
2012/07/14 19:05:06
Origin isn't the right term to use here, for http:
 
frankwang
2012/07/16 01:44:05
Done.
 | 
| + GetSetOfHashesWithURL()->find(hashed_with_url) != GetSetOfHashesWithURL()->end(); | 
| + | 
| + // If we found the resource, classify whether it is from the same origin or different | 
| + if (did_we_find_resource) { | 
| + // If it is the same origin, it is a hit on both caches | 
| + if (did_we_find_resource_same_origin) { | 
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.ProposedCache.HitRate", true); | 
| 
gavinp
2012/07/14 19:05:06
Each of these macro invocations has a global varia
 
frankwang
2012/07/16 01:44:05
Done.
 | 
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.CurrentCache.HitRate", true); | 
| 
gavinp
2012/07/14 19:05:06
You're not really measuring the current cache here
 
frankwang
2012/07/16 01:44:05
Done.
 | 
| + } else { | 
| + // If it is a different origin (interesting case), it hits on the | 
| + // proposed cache not the current cache | 
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.ProposedCache.HitRate", true); | 
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.CurrentCache.HitRate", false); | 
| + // Record kilobytes (log base 10) missed because we are caching based on origin instead of resource | 
| + UMA_HISTOGRAM_COUNTS("Duplicate.SizeKB.Miss.CurrentCache", log10(bytes_read_)); | 
| 
frankwang
2012/07/14 02:13:59
I changed this locally to "Duplicate.Size.Miss.Cur
 
gavinp
2012/07/14 19:05:06
UMA_HISTOGRAM_COUNTS is already exponentially binn
 
frankwang
2012/07/16 01:44:05
Done.
 | 
| + // Record resource type for missed resource | 
| + UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType", resource_type_, ResourceType::LAST_TYPE); | 
| + GetSetOfHashesWithURL()->insert(hashed_with_url); | 
| + } | 
| + } else { | 
| + // We did not see the resource so it is a miss on both caches | 
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.ProposedCache.HitRate", false); | 
| + UMA_HISTOGRAM_BOOLEAN("Duplicate.CurrentCache.HitRate", false); | 
| + GetSetOfHashes()->insert(start_seed_); | 
| + GetSetOfHashesWithURL()->insert(hashed_with_url); | 
| + } | 
| + | 
| + bytes_read_ = 0; | 
| + read_buffer_ = NULL; | 
| + return next_handler_->OnResponseCompleted(request_id, status, security_info); | 
| +} | 
| + | 
| +} //namespace content |