Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(7)

Side by Side Diff: content/browser/renderer_host/duplicate_resource_handler.cc

Issue 10701151: DuplicateContentResourceHandler to monitor resources and track how many times th… (Closed) Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/renderer_host/duplicate_resource_handler.h"
6
7 #include <set>
8
9 #include "base/logging.h"
10 #include "base/memory/singleton.h"
11 #include "base/metrics/histogram.h"
12 #include "content/browser/renderer_host/resource_request_info_impl.h"
13 #include "net/base/io_buffer.h"
14 #include "net/url_request/url_request.h"
15 #include "third_party/smhasher/src/PMurHash.h"
16
17
18 namespace content {
19
20 namespace{
21
22 class GlobalDuplicateRecords {
23
24 public:
25 static GlobalDuplicateRecords* GetInstance(){
26 return Singleton<GlobalDuplicateRecords>::get();
27 }
28
29 std::set<uint32>* content_matches() {
30 return &content_matches_;
31 }
32
33 std::set<uint32>* content_and_url_matches(){
34 return &content_and_url_matches_;
35 }
36
37 private:
38 friend class Singleton<GlobalDuplicateRecords>;
39 friend struct DefaultSingletonTraits<GlobalDuplicateRecords>;
40
41 GlobalDuplicateRecords() {}
42 ~GlobalDuplicateRecords() {}
43
44 std::set<uint32> content_matches_;
45 std::set<uint32> content_and_url_matches_;
46 };
47
48 } // namespace
49
50 DuplicateResourceHandler::DuplicateResourceHandler(
51 scoped_ptr<ResourceHandler> next_handler,
52 ResourceType::Type resource_type)
53 : LayeredResourceHandler(next_handler.Pass()),
54 resource_type_(resource_type),
55 pmurhash_ph1_(0),
56 pmurhash_pcarry_(0),
57 bytes_read_(0) {
58 }
59
60 DuplicateResourceHandler::~DuplicateResourceHandler() {
61 }
62
63 bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf,
64 int* buf_size, int min_size) {
65 DCHECK_EQ(-1, min_size);
66
67 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size))
68 return false;
69 read_buffer_ = *buf;
70 return true;
71 }
72
73 bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read,
74 bool* defer) {
75
76 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, read_buffer_->data(), by tes_read);
77 bytes_read_ += bytes_read;
78
79 return next_handler_->OnReadCompleted(request_id, bytes_read, defer);
80 }
81
82 bool DuplicateResourceHandler::OnResponseCompleted(
83 int request_id,
84 const net::URLRequestStatus& status,
85 const std::string& security_info) {
86
87 if (status.status() != net::URLRequestStatus::SUCCESS)
88 return next_handler_->OnResponseCompleted(request_id, status, security_info) ;
89
90 uint32 resource_hash = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, byte s_read_);
91
92 // Hash url into the resource to see whether it is from the same or
93 // different url.
94 uint32 hashed_with_url;
95 const std::string url_spec = request_->url().spec();
frankwang 2012/07/19 16:10:27 The segfault happens here.
gavinp 2012/07/19 16:14:13 It seems you never initialize request_.
96 int url_length = url_spec.size();
97 PMurHash32_Process(&pmurhash_ph1_, &pmurhash_pcarry_, url_spec.data(), url_len gth);
98 hashed_with_url = PMurHash32_Result(pmurhash_ph1_, pmurhash_pcarry_, url_lengt h + bytes_read_);
99
100 DVLOG(4) << "url: " << url_spec;
101 DVLOG(4) << "resource hash: " << resource_hash;
102 DVLOG(4) << "hash with url: " << hashed_with_url;
103
104 std::set<uint32>* content_hashes =
105 GlobalDuplicateRecords::GetInstance()->content_matches();
106 std::set<uint32>* content_and_url_hashes =
107 GlobalDuplicateRecords::GetInstance()->content_and_url_matches();
108
109 const bool did_match_contents = content_hashes->count(resource_hash);
110 const bool did_match_contents_and_url =
111 content_and_url_hashes->count(hashed_with_url);
112
113 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hits", did_match_contents);
114 UMA_HISTOGRAM_BOOLEAN("Duplicate.HitsSameUrl", did_match_contents &&
115 did_match_contents_and_url);
116 if (did_match_contents && !did_match_contents_and_url) {
117 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.Size.HashHitUrlMiss", bytes_read_,
118 1, 0x7FFFFFFF, 50);
119 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType.HashHitUrlMiss", resourc e_type_,
120 ResourceType::LAST_TYPE);
121 content_and_url_hashes->insert(hashed_with_url);
122 } else {
123 content_hashes->insert(resource_hash);
124 content_and_url_hashes->insert(hashed_with_url);
125 }
126
127 bytes_read_ = 0;
128 read_buffer_ = NULL;
129 return next_handler_->OnResponseCompleted(request_id, status, security_info);
130 }
131
132 } //namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698