Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(609)

Unified Diff: content/browser/renderer_host/duplicate_resource_handler.cc

Issue 10701151: DuplicateContentResourceHandler to monitor resources and track how many times th… (Closed) Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: Changes for second full pass-through Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: content/browser/renderer_host/duplicate_resource_handler.cc
===================================================================
--- content/browser/renderer_host/duplicate_resource_handler.cc (revision 0)
+++ content/browser/renderer_host/duplicate_resource_handler.cc (revision 0)
@@ -0,0 +1,146 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/browser/renderer_host/duplicate_resource_handler.h"
+
+#include <set>
+
+#include "base/logging.h"
+#include "base/memory/singleton.h"
+#include "base/metrics/histogram.h"
+#include "content/browser/renderer_host/resource_request_info_impl.h"
+#include "net/base/io_buffer.h"
+#include "net/url_request/url_request.h"
+#include "third_party/smhasher/src/PMurHash.h"
+
+
+namespace content {
+
+namespace {
+
+class GlobalDuplicateRecords {
+ public:
+ static GlobalDuplicateRecords* GetInstance() {
+ return Singleton<GlobalDuplicateRecords>::get();
+ }
+
+ std::set<uint32>* content_matches() {
+ return &content_matches_;
+ }
+
+ std::set<uint32>* content_and_url_matches() {
+ return &content_and_url_matches_;
+ }
+
+ int* total_bytes_seen() {
+ return &total_bytes_seen_;
+ }
+
+ private:
+ friend class Singleton<GlobalDuplicateRecords>;
+ friend struct DefaultSingletonTraits<GlobalDuplicateRecords>;
+
+ GlobalDuplicateRecords() {}
+ ~GlobalDuplicateRecords() {}
+
+ std::set<uint32> content_matches_;
+ std::set<uint32> content_and_url_matches_;
+ int total_bytes_seen_;
gavinp 2012/07/20 11:38:47 I think you can't use an int here, it's very likel
frankwang 2012/07/20 17:51:38 I took this out and went with session length.
+};
+
+} // namespace
+
+DuplicateResourceHandler::DuplicateResourceHandler(
+ scoped_ptr<ResourceHandler> next_handler,
+ ResourceType::Type resource_type,
+ net::URLRequest* request)
+ : LayeredResourceHandler(next_handler.Pass()),
+ resource_type_(resource_type),
+ bytes_read_(0),
+ request_(request),
+ pmurhash_ph1_(0),
+ pmurhash_pcarry_(0) {
+}
+
+DuplicateResourceHandler::~DuplicateResourceHandler() {
+}
+
+bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf,
+ int* buf_size, int min_size) {
+ DCHECK_EQ(-1, min_size);
+
+ if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size))
+ return false;
+ read_buffer_ = *buf;
+ return true;
+}
+
+bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read,
+ bool* defer) {
+ PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
+ read_buffer_->data(), bytes_read);
+ bytes_read_ += bytes_read;
+ return next_handler_->OnReadCompleted(request_id, bytes_read, defer);
+}
+
+bool DuplicateResourceHandler::OnResponseCompleted(
+ int request_id,
+ const net::URLRequestStatus& status,
+ const std::string& security_info) {
+
+ if (status.status() != net::URLRequestStatus::SUCCESS)
+ return next_handler_->OnResponseCompleted(request_id,
+ status, security_info);
+
+ int* total_bytes_seen =
+ GlobalDuplicateRecords::GetInstance()->total_bytes_seen();
+ *total_bytes_seen += bytes_read_;
+ uint32 contents_hash = PMurHash32_Result(pmurhash_ph1_,
+ pmurhash_pcarry_, bytes_read_);
+
+ // Combine the contents_hash with the url, so we can test if future content
+ // identical resources have the same original url or not.
+ uint32 hashed_with_url;
+ const std::string url_spec = request_->url().spec();
+ PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_,
+ url_spec.data(), url_spec.length());
+ hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_,
+ url_spec.length() + bytes_read_);
+
+ DVLOG(4) << "url: " << url_spec;
+ DVLOG(4) << "contents hash: " << contents_hash;
+ DVLOG(4) << "hash with url: " << hashed_with_url;
+
+ std::set<uint32>* content_matches =
+ GlobalDuplicateRecords::GetInstance()->content_matches();
+ std::set<uint32>* content_and_url_matches =
+ GlobalDuplicateRecords::GetInstance()->content_and_url_matches();
+
+ const bool did_match_contents = content_matches->count(contents_hash);
+ const bool did_match_contents_and_url =
+ content_and_url_matches->count(hashed_with_url);
+
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents);
+ UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents &&
+ did_match_contents_and_url);
+ if (did_match_contents && !did_match_contents_and_url) {
+ content_and_url_matches->insert(hashed_with_url);
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_,
+ 1, 0x7FFFFFFF, 50);
+ UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss",
+ resource_type_, ResourceType::LAST_TYPE);
+ UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.TotalBytesSeen", *total_bytes_seen,
+ 1, 0x7FFFFFFF, 50);
+ } else {
gavinp 2012/07/20 11:38:47 I think you don't need this else (insert does noth
frankwang 2012/07/20 17:51:38 Done.
+ content_matches->insert(contents_hash);
+ content_and_url_matches->insert(hashed_with_url);
+ }
+
+ bytes_read_ = 0;
+ read_buffer_ = NULL;
+ return next_handler_->OnResponseCompleted(request_id, status, security_info);
+}
+
+} // namespace content
+

Powered by Google App Engine
This is Rietveld 408576698