Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(38)

Side by Side Diff: content/browser/renderer_host/duplicate_resource_handler.cc

Issue 10701151: DuplicateContentResourceHandler to monitor resources and track how many times th… (Closed) Base URL: http://src.chromium.org/svn/trunk/src/
Patch Set: Created 8 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "content/browser/renderer_host/duplicate_resource_handler.h"
6
7 #include <cmath>
8 #include <cstring>
9 #include <set>
10
11 #include "base/logging.h"
12 #include "base/metrics/histogram.h"
13 #include "content/browser/renderer_host/resource_request_info_impl.h"
14 #include "net/base/io_buffer.h"
15 #include "net/url_request/url_request.h"
16 #include "third_party/smhasher/src/PMurHash.h"
17
18
19 namespace content {
20
21 namespace{
22
23 // This set keeps track of a hash of resources
24 // that we have seen
25 std::set<uint32>* GetSetOfHashes() {
gavinp 2012/07/18 12:13:32 Naming by data type isn't ideal. Better by use: Co
frankwang 2012/07/19 16:10:26 Done.
26 static std::set<uint32> seen_resources;
gavinp 2012/07/18 12:13:32 Probably we should bite the bullet, and use base/m
frankwang 2012/07/19 16:10:26 Done.
27 return &seen_resources;
28 }
29
30 // This set keeps track of hash of resources based on origin
31 // that we have seen previously
32 std::set<uint32>* GetSetOfHashesWithURL(){
33 static std::set<uint32> seen_resources_with_url;
34 return &seen_resources_with_url;
35 }
36
37 } // namespace
38
39 DuplicateResourceHandler::DuplicateResourceHandler(
40 scoped_ptr<ResourceHandler> next_handler,
41 ResourceType::Type resource_type,
42 net::URLRequest* request)
43 : LayeredResourceHandler(next_handler.Pass()),
44 resource_type_(resource_type),
45 ph1_(0),
46 pcarry_(0),
47 buffer_size_(0),
48 bytes_read_(0),
49 request_(request) {
50 }
51
52 DuplicateResourceHandler::~DuplicateResourceHandler() {
53 }
54
55 bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf,
56 int* buf_size, int min_size) {
57 DCHECK_EQ(-1, min_size);
58
59 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size))
60 return false;
61
gavinp 2012/07/18 12:13:32 Lose this blank line.
frankwang 2012/07/19 16:10:26 Done.
62 read_buffer_ = *buf;
63 buffer_size_ = *buf_size;
64 return true;
65 }
66
67 bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read,
68 bool* defer) {
69
70 PMurHash32_Process(&ph1_,&pcarry_,read_buffer_->data(), bytes_read);
gavinp 2012/07/18 12:13:32 spaces after commas.
frankwang 2012/07/19 16:10:26 Done.
71 bytes_read_ += bytes_read;
72
73 return next_handler_->OnReadCompleted(request_id, bytes_read, defer);
74 }
75
76 bool DuplicateResourceHandler::OnResponseCompleted(
77 int request_id,
78 const net::URLRequestStatus& status,
79 const std::string& security_info) {
gavinp 2012/07/18 12:13:32 What should you do for status != net::URLRequestSt
frankwang 2012/07/19 16:10:26 I put in a check so that I pass through when it is
80
81 uint32 resource_hash = PMurHash32_Result(ph1_, pcarry_, bytes_read_);
82
83 // Hash url into the resource to see whether it is
84 // from the same or different origin
gavinp 2012/07/18 12:13:32 Comments should be sentences (have a period) and w
frankwang 2012/07/19 16:10:26 Done.
85 uint32 hashed_with_url;
86 const char* url = request_->url().spec().c_str();
87 int url_length = strlen(url);
gavinp 2012/07/18 12:13:32 This scares me a bit. Is it safe to trust c_str()
frankwang 2012/07/19 16:10:26 Done.
88 PMurHash32_Process(&ph1_, &pcarry_, url, url_length);
89 hashed_with_url = PMurHash32_Result(ph1_, pcarry_, url_length + bytes_read_);
90
91 DVLOG(4) << "url: " << url;
92 DVLOG(4) << "resource hash: " << resource_hash;
93 DVLOG(4) << "hash with url: " << hashed_with_url;
94
95 // This boolean answers whether we found resource
96 // based just on hash
97 const bool did_we_find_resource =
gavinp 2012/07/18 12:13:32 did_match_contents_ maybe?
frankwang 2012/07/19 16:10:26 Done.
98 GetSetOfHashes()->find(resource_hash) !=
gavinp 2012/07/18 12:13:32 Use count(resource_hash) instead. You don't need t
99 GetSetOfHashes()->end();
100
101 // This boolean checks whether we found a resource from the original url
102 // as one previously seen
103 const bool did_we_find_resource_original_url =
gavinp 2012/07/18 12:13:32 I've thought about it more, and now I don't like t
frankwang 2012/07/19 16:10:26 Done.
104 GetSetOfHashesWithURL()->find(hashed_with_url) !=
105 GetSetOfHashesWithURL()->end();
106
107 // If we found the resource, classify whether it is
108 // from the same url or different
gavinp 2012/07/18 12:13:32 Best practice is to have a single instance of each
frankwang 2012/07/19 16:10:26 Done.
109 if (did_we_find_resource) {
110 // If it is from the original url, it will hit on both caches
111 if (did_we_find_resource_original_url) {
112 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true);
gavinp 2012/07/18 12:13:32 I've thought about this a bit more: I think you ju
frankwang 2012/07/19 16:10:26 Done.
113 UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", true);
114 } else {
115 // If it is a different url (interesting case), it hits on the
116 // proposed cache not the current cache
117 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true);
118 UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false);
119 // Record bytes missed because we are caching
120 // based on origin instead of resource
121 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.HashMiss.Size", bytes_read_,
gavinp 2012/07/18 12:13:32 I think you want: "Duplicate.Size.HashHitUrlMiss"
frankwang 2012/07/19 16:10:26 Done.
122 1, 0x7FFFFFFF, 50);
123 // Record resource type for missed resource
124 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType", resource_type_,
gavinp 2012/07/18 12:13:32 The name should reflect that it's for missed resou
frankwang 2012/07/19 16:10:26 Done.
125 ResourceType::LAST_TYPE);
126 GetSetOfHashesWithURL()->insert(hashed_with_url);
127 }
128 } else {
129 // We did not see the resource so it is a miss on both caches
gavinp 2012/07/18 12:13:32 I don't like comments like this. The logic should
frankwang 2012/07/19 16:10:26 Done.
130 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", false);
131 UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false);
132 GetSetOfHashes()->insert(resource_hash);
133 GetSetOfHashesWithURL()->insert(hashed_with_url);
134 }
135
136 bytes_read_ = 0;
137 read_buffer_ = NULL;
138 return next_handler_->OnResponseCompleted(request_id, status, security_info);
139 }
140
141 } //namespace content
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698