Index: content/browser/renderer_host/duplicate_resource_handler.cc |
=================================================================== |
--- content/browser/renderer_host/duplicate_resource_handler.cc (revision 0) |
+++ content/browser/renderer_host/duplicate_resource_handler.cc (revision 0) |
@@ -0,0 +1,141 @@ |
+// Copyright (c) 2012 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "content/browser/renderer_host/duplicate_resource_handler.h" |
+ |
+#include <cmath> |
+#include <cstring> |
+#include <set> |
+ |
+#include "base/logging.h" |
+#include "base/metrics/histogram.h" |
+#include "content/browser/renderer_host/resource_request_info_impl.h" |
+#include "net/base/io_buffer.h" |
+#include "net/url_request/url_request.h" |
+#include "third_party/smhasher/src/PMurHash.h" |
+ |
+ |
+namespace content { |
+ |
+namespace{ |
+ |
+// This set keeps track of a hash of resources |
+// that we have seen |
+std::set<uint32>* GetSetOfHashes() { |
gavinp
2012/07/18 12:13:32
Naming by data type isn't ideal. Better by use: Co
frankwang
2012/07/19 16:10:26
Done.
|
+ static std::set<uint32> seen_resources; |
gavinp
2012/07/18 12:13:32
Probably we should bite the bullet, and use base/m
frankwang
2012/07/19 16:10:26
Done.
|
+ return &seen_resources; |
+} |
+ |
+// This set keeps track of hash of resources based on origin |
+// that we have seen previously |
+std::set<uint32>* GetSetOfHashesWithURL(){ |
+ static std::set<uint32> seen_resources_with_url; |
+ return &seen_resources_with_url; |
+} |
+ |
+} // namespace |
+ |
+DuplicateResourceHandler::DuplicateResourceHandler( |
+ scoped_ptr<ResourceHandler> next_handler, |
+ ResourceType::Type resource_type, |
+ net::URLRequest* request) |
+ : LayeredResourceHandler(next_handler.Pass()), |
+ resource_type_(resource_type), |
+ ph1_(0), |
+ pcarry_(0), |
+ buffer_size_(0), |
+ bytes_read_(0), |
+ request_(request) { |
+} |
+ |
+DuplicateResourceHandler::~DuplicateResourceHandler() { |
+} |
+ |
+bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf, |
+ int* buf_size, int min_size) { |
+ DCHECK_EQ(-1, min_size); |
+ |
+ if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) |
+ return false; |
+ |
gavinp
2012/07/18 12:13:32
Lose this blank line.
frankwang
2012/07/19 16:10:26
Done.
|
+ read_buffer_ = *buf; |
+ buffer_size_ = *buf_size; |
+ return true; |
+} |
+ |
+bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read, |
+ bool* defer) { |
+ |
+ PMurHash32_Process(&ph1_,&pcarry_,read_buffer_->data(), bytes_read); |
gavinp
2012/07/18 12:13:32
spaces after commas.
frankwang
2012/07/19 16:10:26
Done.
|
+ bytes_read_ += bytes_read; |
+ |
+ return next_handler_->OnReadCompleted(request_id, bytes_read, defer); |
+} |
+ |
+bool DuplicateResourceHandler::OnResponseCompleted( |
+ int request_id, |
+ const net::URLRequestStatus& status, |
+ const std::string& security_info) { |
gavinp
2012/07/18 12:13:32
What should you do for status != net::URLRequestSt
frankwang
2012/07/19 16:10:26
I put in a check so that I pass through when it is
|
+ |
+ uint32 resource_hash = PMurHash32_Result(ph1_, pcarry_, bytes_read_); |
+ |
+ // Hash url into the resource to see whether it is |
+ // from the same or different origin |
gavinp
2012/07/18 12:13:32
Comments should be sentences (have a period) and w
frankwang
2012/07/19 16:10:26
Done.
|
+ uint32 hashed_with_url; |
+ const char* url = request_->url().spec().c_str(); |
+ int url_length = strlen(url); |
gavinp
2012/07/18 12:13:32
This scares me a bit. Is it safe to trust c_str()
frankwang
2012/07/19 16:10:26
Done.
|
+ PMurHash32_Process(&ph1_, &pcarry_, url, url_length); |
+ hashed_with_url = PMurHash32_Result(ph1_, pcarry_, url_length + bytes_read_); |
+ |
+ DVLOG(4) << "url: " << url; |
+ DVLOG(4) << "resource hash: " << resource_hash; |
+ DVLOG(4) << "hash with url: " << hashed_with_url; |
+ |
+ // This boolean answers whether we found resource |
+ // based just on hash |
+ const bool did_we_find_resource = |
gavinp
2012/07/18 12:13:32
did_match_contents_ maybe?
frankwang
2012/07/19 16:10:26
Done.
|
+ GetSetOfHashes()->find(resource_hash) != |
gavinp
2012/07/18 12:13:32
Use count(resource_hash) instead. You don't need t
|
+ GetSetOfHashes()->end(); |
+ |
+ // This boolean checks whether we found a resource from the original url |
+ // as one previously seen |
+ const bool did_we_find_resource_original_url = |
gavinp
2012/07/18 12:13:32
I've thought about it more, and now I don't like t
frankwang
2012/07/19 16:10:26
Done.
|
+ GetSetOfHashesWithURL()->find(hashed_with_url) != |
+ GetSetOfHashesWithURL()->end(); |
+ |
+ // If we found the resource, classify whether it is |
+ // from the same url or different |
gavinp
2012/07/18 12:13:32
Best practice is to have a single instance of each
frankwang
2012/07/19 16:10:26
Done.
|
+ if (did_we_find_resource) { |
+ // If it is from the original url, it will hit on both caches |
+ if (did_we_find_resource_original_url) { |
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true); |
gavinp
2012/07/18 12:13:32
I've thought about this a bit more: I think you ju
frankwang
2012/07/19 16:10:26
Done.
|
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", true); |
+ } else { |
+ // If it is a different url (interesting case), it hits on the |
+ // proposed cache not the current cache |
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true); |
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false); |
+ // Record bytes missed because we are caching |
+ // based on origin instead of resource |
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.HashMiss.Size", bytes_read_, |
gavinp
2012/07/18 12:13:32
I think you want: "Duplicate.Size.HashHitUrlMiss"
frankwang
2012/07/19 16:10:26
Done.
|
+ 1, 0x7FFFFFFF, 50); |
+ // Record resource type for missed resource |
+ UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType", resource_type_, |
gavinp
2012/07/18 12:13:32
The name should reflect that it's for missed resou
frankwang
2012/07/19 16:10:26
Done.
|
+ ResourceType::LAST_TYPE); |
+ GetSetOfHashesWithURL()->insert(hashed_with_url); |
+ } |
+ } else { |
+ // We did not see the resource so it is a miss on both caches |
gavinp
2012/07/18 12:13:32
I don't like comments like this. The logic should
frankwang
2012/07/19 16:10:26
Done.
|
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", false); |
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false); |
+ GetSetOfHashes()->insert(resource_hash); |
+ GetSetOfHashesWithURL()->insert(hashed_with_url); |
+ } |
+ |
+ bytes_read_ = 0; |
+ read_buffer_ = NULL; |
+ return next_handler_->OnResponseCompleted(request_id, status, security_info); |
+} |
+ |
+} //namespace content |