content/browser/renderer_host/duplicate_resource_handler.cc - Issue 10701151: DuplicateContentResourceHandler to monitor resources and track how many times th…

Unified Diff: content/browser/renderer_host/duplicate_resource_handler.cc

Issue 10701151: DuplicateContentResourceHandler to monitor resources and track how many times th… (Closed) Base URL: http://src.chromium.org/svn/trunk/src/

Patch Set: Created 8 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« content/browser/renderer_host/duplicate_resource_handler.h ('K') | « content/browser/renderer_host/duplicate_resource_handler.h ('k') | content/browser/renderer_host/resource_dispatcher_host_impl.cc » ('j') | content/browser/renderer_host/resource_dispatcher_host_impl.cc » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: content/browser/renderer_host/duplicate_resource_handler.cc

===================================================================

--- content/browser/renderer_host/duplicate_resource_handler.cc (revision 0)

+++ content/browser/renderer_host/duplicate_resource_handler.cc (revision 0)

@@ -0,0 +1,141 @@

+// Use of this source code is governed by a BSD-style license that can be

+// found in the LICENSE file.

+#include "content/browser/renderer_host/duplicate_resource_handler.h"

+#include <cmath>

+#include <cstring>

+#include <set>

+#include "base/logging.h"

+#include "base/metrics/histogram.h"

+#include "content/browser/renderer_host/resource_request_info_impl.h"

+#include "net/base/io_buffer.h"

+#include "net/url_request/url_request.h"

+#include "third_party/smhasher/src/PMurHash.h"

+namespace content {

+namespace{

+// This set keeps track of a hash of resources

+// that we have seen

+std::set<uint32>* GetSetOfHashes() {

gavinp 2012/07/18 12:13:32 Naming by data type isn't ideal. Better by use: Co

frankwang 2012/07/19 16:10:26 Done.

+ static std::set<uint32> seen_resources;

gavinp 2012/07/18 12:13:32 Probably we should bite the bullet, and use base/m

frankwang 2012/07/19 16:10:26 Done.

+ return &seen_resources;

+// This set keeps track of hash of resources based on origin

+// that we have seen previously

+std::set<uint32>* GetSetOfHashesWithURL(){

+ static std::set<uint32> seen_resources_with_url;

+ return &seen_resources_with_url;

+} // namespace

+DuplicateResourceHandler::DuplicateResourceHandler(

+ scoped_ptr<ResourceHandler> next_handler,

+ ResourceType::Type resource_type,

+ net::URLRequest* request)

+ : LayeredResourceHandler(next_handler.Pass()),

+ resource_type_(resource_type),

+ ph1_(0),

+ pcarry_(0),

+ buffer_size_(0),

+ bytes_read_(0),

+ request_(request) {

+DuplicateResourceHandler::~DuplicateResourceHandler() {

+bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf,

+ int* buf_size, int min_size) {

+ DCHECK_EQ(-1, min_size);

+ if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size))

+ return false;

gavinp 2012/07/18 12:13:32 Lose this blank line.

frankwang 2012/07/19 16:10:26 Done.

+ read_buffer_ = *buf;

+ buffer_size_ = *buf_size;

+ return true;

+bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read,

+ bool* defer) {

+ PMurHash32_Process(&ph1_,&pcarry_,read_buffer_->data(), bytes_read);

gavinp 2012/07/18 12:13:32 spaces after commas.

frankwang 2012/07/19 16:10:26 Done.

+ bytes_read_ += bytes_read;

+ return next_handler_->OnReadCompleted(request_id, bytes_read, defer);

+bool DuplicateResourceHandler::OnResponseCompleted(

+ int request_id,

+ const net::URLRequestStatus& status,

+ const std::string& security_info) {

gavinp 2012/07/18 12:13:32 What should you do for status != net::URLRequestSt

frankwang 2012/07/19 16:10:26 I put in a check so that I pass through when it is

+ uint32 resource_hash = PMurHash32_Result(ph1_, pcarry_, bytes_read_);

+ // Hash url into the resource to see whether it is

+ // from the same or different origin

gavinp 2012/07/18 12:13:32 Comments should be sentences (have a period) and w

frankwang 2012/07/19 16:10:26 Done.

+ uint32 hashed_with_url;

+ const char* url = request_->url().spec().c_str();

+ int url_length = strlen(url);

gavinp 2012/07/18 12:13:32 This scares me a bit. Is it safe to trust c_str()

frankwang 2012/07/19 16:10:26 Done.

+ PMurHash32_Process(&ph1_, &pcarry_, url, url_length);

+ hashed_with_url = PMurHash32_Result(ph1_, pcarry_, url_length + bytes_read_);

+ DVLOG(4) << "url: " << url;

+ DVLOG(4) << "resource hash: " << resource_hash;

+ DVLOG(4) << "hash with url: " << hashed_with_url;

+ // This boolean answers whether we found resource

+ // based just on hash

+ const bool did_we_find_resource =

gavinp 2012/07/18 12:13:32 did_match_contents_ maybe?

frankwang 2012/07/19 16:10:26 Done.

+ GetSetOfHashes()->find(resource_hash) !=

gavinp 2012/07/18 12:13:32 Use count(resource_hash) instead. You don't need t

+ GetSetOfHashes()->end();

+ // This boolean checks whether we found a resource from the original url

+ // as one previously seen

+ const bool did_we_find_resource_original_url =

gavinp 2012/07/18 12:13:32 I've thought about it more, and now I don't like t

frankwang 2012/07/19 16:10:26 Done.

+ GetSetOfHashesWithURL()->find(hashed_with_url) !=

+ GetSetOfHashesWithURL()->end();

+ // If we found the resource, classify whether it is

+ // from the same url or different

gavinp 2012/07/18 12:13:32 Best practice is to have a single instance of each

frankwang 2012/07/19 16:10:26 Done.

+ if (did_we_find_resource) {

+ // If it is from the original url, it will hit on both caches

+ if (did_we_find_resource_original_url) {

+ UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true);

gavinp 2012/07/18 12:13:32 I've thought about this a bit more: I think you ju

frankwang 2012/07/19 16:10:26 Done.

+ UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", true);

+ } else {

+ // If it is a different url (interesting case), it hits on the

+ // proposed cache not the current cache

+ UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true);

+ UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false);

+ // Record bytes missed because we are caching

+ // based on origin instead of resource

+ UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.HashMiss.Size", bytes_read_,

gavinp 2012/07/18 12:13:32 I think you want: "Duplicate.Size.HashHitUrlMiss"

frankwang 2012/07/19 16:10:26 Done.

+ 1, 0x7FFFFFFF, 50);

+ // Record resource type for missed resource

+ UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType", resource_type_,

gavinp 2012/07/18 12:13:32 The name should reflect that it's for missed resou

frankwang 2012/07/19 16:10:26 Done.

+ ResourceType::LAST_TYPE);

+ GetSetOfHashesWithURL()->insert(hashed_with_url);

+ }

+ } else {

+ // We did not see the resource so it is a miss on both caches

gavinp 2012/07/18 12:13:32 I don't like comments like this. The logic should

frankwang 2012/07/19 16:10:26 Done.

+ UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", false);

+ UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false);

+ GetSetOfHashes()->insert(resource_hash);

+ GetSetOfHashesWithURL()->insert(hashed_with_url);

+ }

+ bytes_read_ = 0;

+ read_buffer_ = NULL;

+ return next_handler_->OnResponseCompleted(request_id, status, security_info);

+} //namespace content