OLD | NEW |
---|---|
(Empty) | |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "content/browser/renderer_host/duplicate_resource_handler.h" | |
6 | |
7 #include <cmath> | |
8 #include <cstring> | |
9 #include <set> | |
10 | |
11 #include "base/logging.h" | |
12 #include "base/metrics/histogram.h" | |
13 #include "content/browser/renderer_host/resource_request_info_impl.h" | |
14 #include "net/base/io_buffer.h" | |
15 #include "net/url_request/url_request.h" | |
16 #include "third_party/smhasher/src/PMurHash.h" | |
17 | |
18 | |
19 namespace content { | |
20 | |
21 namespace{ | |
22 | |
23 // This set keeps track of a hash of resources | |
24 // that we have seen | |
25 std::set<uint32>* GetSetOfHashes() { | |
gavinp
2012/07/18 12:13:32
Naming by data type isn't ideal. Better by use: Co
frankwang
2012/07/19 16:10:26
Done.
| |
26 static std::set<uint32> seen_resources; | |
gavinp
2012/07/18 12:13:32
Probably we should bite the bullet, and use base/m
frankwang
2012/07/19 16:10:26
Done.
| |
27 return &seen_resources; | |
28 } | |
29 | |
30 // This set keeps track of hash of resources based on origin | |
31 // that we have seen previously | |
32 std::set<uint32>* GetSetOfHashesWithURL(){ | |
33 static std::set<uint32> seen_resources_with_url; | |
34 return &seen_resources_with_url; | |
35 } | |
36 | |
37 } // namespace | |
38 | |
39 DuplicateResourceHandler::DuplicateResourceHandler( | |
40 scoped_ptr<ResourceHandler> next_handler, | |
41 ResourceType::Type resource_type, | |
42 net::URLRequest* request) | |
43 : LayeredResourceHandler(next_handler.Pass()), | |
44 resource_type_(resource_type), | |
45 ph1_(0), | |
46 pcarry_(0), | |
47 buffer_size_(0), | |
48 bytes_read_(0), | |
49 request_(request) { | |
50 } | |
51 | |
52 DuplicateResourceHandler::~DuplicateResourceHandler() { | |
53 } | |
54 | |
55 bool DuplicateResourceHandler::OnWillRead(int request_id, net::IOBuffer** buf, | |
56 int* buf_size, int min_size) { | |
57 DCHECK_EQ(-1, min_size); | |
58 | |
59 if (!next_handler_->OnWillRead(request_id, buf, buf_size, min_size)) | |
60 return false; | |
61 | |
gavinp
2012/07/18 12:13:32
Lose this blank line.
frankwang
2012/07/19 16:10:26
Done.
| |
62 read_buffer_ = *buf; | |
63 buffer_size_ = *buf_size; | |
64 return true; | |
65 } | |
66 | |
67 bool DuplicateResourceHandler::OnReadCompleted(int request_id, int bytes_read, | |
68 bool* defer) { | |
69 | |
70 PMurHash32_Process(&ph1_,&pcarry_,read_buffer_->data(), bytes_read); | |
gavinp
2012/07/18 12:13:32
spaces after commas.
frankwang
2012/07/19 16:10:26
Done.
| |
71 bytes_read_ += bytes_read; | |
72 | |
73 return next_handler_->OnReadCompleted(request_id, bytes_read, defer); | |
74 } | |
75 | |
76 bool DuplicateResourceHandler::OnResponseCompleted( | |
77 int request_id, | |
78 const net::URLRequestStatus& status, | |
79 const std::string& security_info) { | |
gavinp
2012/07/18 12:13:32
What should you do for status != net::URLRequestSt
frankwang
2012/07/19 16:10:26
I put in a check so that I pass through when it is
| |
80 | |
81 uint32 resource_hash = PMurHash32_Result(ph1_, pcarry_, bytes_read_); | |
82 | |
83 // Hash url into the resource to see whether it is | |
84 // from the same or different origin | |
gavinp
2012/07/18 12:13:32
Comments should be sentences (have a period) and w
frankwang
2012/07/19 16:10:26
Done.
| |
85 uint32 hashed_with_url; | |
86 const char* url = request_->url().spec().c_str(); | |
87 int url_length = strlen(url); | |
gavinp
2012/07/18 12:13:32
This scares me a bit. Is it safe to trust c_str()
frankwang
2012/07/19 16:10:26
Done.
| |
88 PMurHash32_Process(&ph1_, &pcarry_, url, url_length); | |
89 hashed_with_url = PMurHash32_Result(ph1_, pcarry_, url_length + bytes_read_); | |
90 | |
91 DVLOG(4) << "url: " << url; | |
92 DVLOG(4) << "resource hash: " << resource_hash; | |
93 DVLOG(4) << "hash with url: " << hashed_with_url; | |
94 | |
95 // This boolean answers whether we found resource | |
96 // based just on hash | |
97 const bool did_we_find_resource = | |
gavinp
2012/07/18 12:13:32
did_match_contents_ maybe?
frankwang
2012/07/19 16:10:26
Done.
| |
98 GetSetOfHashes()->find(resource_hash) != | |
gavinp
2012/07/18 12:13:32
Use count(resource_hash) instead. You don't need t
| |
99 GetSetOfHashes()->end(); | |
100 | |
101 // This boolean checks whether we found a resource from the original url | |
102 // as one previously seen | |
103 const bool did_we_find_resource_original_url = | |
gavinp
2012/07/18 12:13:32
I've thought about it more, and now I don't like t
frankwang
2012/07/19 16:10:26
Done.
| |
104 GetSetOfHashesWithURL()->find(hashed_with_url) != | |
105 GetSetOfHashesWithURL()->end(); | |
106 | |
107 // If we found the resource, classify whether it is | |
108 // from the same url or different | |
gavinp
2012/07/18 12:13:32
Best practice is to have a single instance of each
frankwang
2012/07/19 16:10:26
Done.
| |
109 if (did_we_find_resource) { | |
110 // If it is from the original url, it will hit on both caches | |
111 if (did_we_find_resource_original_url) { | |
112 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true); | |
gavinp
2012/07/18 12:13:32
I've thought about this a bit more: I think you ju
frankwang
2012/07/19 16:10:26
Done.
| |
113 UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", true); | |
114 } else { | |
115 // If it is a different url (interesting case), it hits on the | |
116 // proposed cache not the current cache | |
117 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", true); | |
118 UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false); | |
119 // Record bytes missed because we are caching | |
120 // based on origin instead of resource | |
121 UMA_HISTOGRAM_CUSTOM_COUNTS("Duplicate.HashMiss.Size", bytes_read_, | |
gavinp
2012/07/18 12:13:32
I think you want: "Duplicate.Size.HashHitUrlMiss"
frankwang
2012/07/19 16:10:26
Done.
| |
122 1, 0x7FFFFFFF, 50); | |
123 // Record resource type for missed resource | |
124 UMA_HISTOGRAM_ENUMERATION("Duplicate.ResourceType", resource_type_, | |
gavinp
2012/07/18 12:13:32
The name should reflect that it's for missed resou
frankwang
2012/07/19 16:10:26
Done.
| |
125 ResourceType::LAST_TYPE); | |
126 GetSetOfHashesWithURL()->insert(hashed_with_url); | |
127 } | |
128 } else { | |
129 // We did not see the resource so it is a miss on both caches | |
gavinp
2012/07/18 12:13:32
I don't like comments like this. The logic should
frankwang
2012/07/19 16:10:26
Done.
| |
130 UMA_HISTOGRAM_BOOLEAN("Duplicate.Hash.Hits", false); | |
131 UMA_HISTOGRAM_BOOLEAN("Duplicate.HashSameUrl.Hits", false); | |
132 GetSetOfHashes()->insert(resource_hash); | |
133 GetSetOfHashesWithURL()->insert(hashed_with_url); | |
134 } | |
135 | |
136 bytes_read_ = 0; | |
137 read_buffer_ = NULL; | |
138 return next_handler_->OnResponseCompleted(request_id, status, security_info); | |
139 } | |
140 | |
141 } //namespace content | |
OLD | NEW |