OLD | NEW |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | 1 // Copyright 2017 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/browser/page_load_metrics/observers/ads_page_load_metrics_obser
ver.h" | 5 #include "chrome/browser/page_load_metrics/observers/ads_page_load_metrics_obser
ver.h" |
6 | 6 |
7 #include <string> | 7 #include <string> |
8 #include <utility> | 8 #include <utility> |
9 | 9 |
10 #include "base/feature_list.h" | 10 #include "base/feature_list.h" |
11 #include "base/logging.h" | 11 #include "base/logging.h" |
12 #include "base/memory/ptr_util.h" | 12 #include "base/memory/ptr_util.h" |
13 #include "base/strings/string_util.h" | 13 #include "base/strings/string_util.h" |
14 #include "chrome/browser/page_load_metrics/ads_detection.h" | |
15 #include "chrome/browser/page_load_metrics/page_load_metrics_util.h" | 14 #include "chrome/browser/page_load_metrics/page_load_metrics_util.h" |
16 #include "content/public/browser/navigation_handle.h" | 15 #include "content/public/browser/navigation_handle.h" |
17 #include "content/public/browser/render_frame_host.h" | 16 #include "content/public/browser/render_frame_host.h" |
18 #include "content/public/browser/web_contents.h" | 17 #include "content/public/browser/web_contents.h" |
19 #include "url/gurl.h" | 18 #include "url/gurl.h" |
20 | 19 |
21 namespace { | 20 namespace { |
22 | 21 |
23 const base::Feature kAdsFeature{"AdsMetrics", base::FEATURE_ENABLED_BY_DEFAULT}; | 22 const base::Feature kAdsFeature{"AdsMetrics", base::FEATURE_ENABLED_BY_DEFAULT}; |
24 | 23 |
25 #define ADS_HISTOGRAM(suffix, hist_macro, ad_type, value) \ | 24 #define ADS_HISTOGRAM(suffix, hist_macro, ad_type, value) \ |
26 switch (ad_type) { \ | 25 switch (ad_type) { \ |
27 case page_load_metrics::AD_TYPE_GOOGLE: \ | 26 case AdsPageLoadMetricsObserver::AD_TYPE_GOOGLE: \ |
28 hist_macro("PageLoad.Clients.Ads.Google." suffix, value); \ | 27 hist_macro("PageLoad.Clients.Ads.Google." suffix, value); \ |
29 break; \ | 28 break; \ |
30 case page_load_metrics::AD_TYPE_SUBRESOURCE_FILTER: \ | 29 case AdsPageLoadMetricsObserver::AD_TYPE_SUBRESOURCE_FILTER: \ |
31 hist_macro("PageLoad.Clients.Ads.SubresourceFilter." suffix, value); \ | 30 hist_macro("PageLoad.Clients.Ads.SubresourceFilter." suffix, value); \ |
32 break; \ | 31 break; \ |
33 case page_load_metrics::AD_TYPE_ALL: \ | 32 case AdsPageLoadMetricsObserver::AD_TYPE_ALL: \ |
34 hist_macro("PageLoad.Clients.Ads.All." suffix, value); \ | 33 hist_macro("PageLoad.Clients.Ads.All." suffix, value); \ |
35 break; \ | 34 break; \ |
36 } | 35 } |
37 | 36 |
38 void RecordParentExistsForSubFrame(bool parent_exists, | 37 bool DetectGoogleAd(content::NavigationHandle* navigation_handle) { |
39 const page_load_metrics::AdTypes& ad_types) { | 38 // Because sub-resource filtering isn't always enabled, and doesn't work |
| 39 // well in monitoring mode (no CSS enforcement), it's difficult to identify |
| 40 // ads. Google ads are prevalent and easy to track, so we'll start by |
| 41 // tracking those. Note that the frame name can be very large, so be careful |
| 42 // to avoid full string searches if possible. |
| 43 // TODO(jkarlin): Track other ad networks that are easy to identify. |
| 44 |
| 45 // In case the navigation aborted, look up the RFH by the Frame Tree Node |
| 46 // ID. It returns the committed frame host or the initial frame host for the |
| 47 // frame if no committed host exists. Using a previous host is fine because |
| 48 // once a frame has an ad we always consider it to have an ad. |
| 49 // We use the unsafe method of FindFrameByFrameTreeNodeId because we're not |
| 50 // concerned with which process the frame lives on (we're just measuring |
| 51 // bytes and not granting security priveleges). |
| 52 content::RenderFrameHost* current_frame_host = |
| 53 navigation_handle->GetWebContents()->UnsafeFindFrameByFrameTreeNodeId( |
| 54 navigation_handle->GetFrameTreeNodeId()); |
| 55 if (current_frame_host) { |
| 56 const std::string& frame_name = current_frame_host->GetFrameName(); |
| 57 if (base::StartsWith(frame_name, "google_ads_iframe", |
| 58 base::CompareCase::SENSITIVE) || |
| 59 base::StartsWith(frame_name, "google_ads_frame", |
| 60 base::CompareCase::SENSITIVE)) { |
| 61 return true; |
| 62 } |
| 63 } |
| 64 |
| 65 const GURL& url = navigation_handle->GetURL(); |
| 66 return url.host_piece() == "tpc.googlesyndication.com" && |
| 67 base::StartsWith(url.path_piece(), "/safeframe", |
| 68 base::CompareCase::SENSITIVE); |
| 69 } |
| 70 |
| 71 void RecordParentExistsForSubFrame( |
| 72 bool parent_exists, |
| 73 const AdsPageLoadMetricsObserver::AdTypes& ad_types) { |
40 ADS_HISTOGRAM("ParentExistsForSubFrame", UMA_HISTOGRAM_BOOLEAN, | 74 ADS_HISTOGRAM("ParentExistsForSubFrame", UMA_HISTOGRAM_BOOLEAN, |
41 page_load_metrics::AD_TYPE_ALL, parent_exists); | 75 AdsPageLoadMetricsObserver::AD_TYPE_ALL, parent_exists); |
42 } | 76 } |
43 | 77 |
44 } // namespace | 78 } // namespace |
45 | 79 |
46 AdsPageLoadMetricsObserver::AdFrameData::AdFrameData( | 80 AdsPageLoadMetricsObserver::AdFrameData::AdFrameData( |
47 FrameTreeNodeId frame_tree_node_id, | 81 FrameTreeNodeId frame_tree_node_id, |
48 page_load_metrics::AdTypes ad_types) | 82 AdTypes ad_types) |
49 : frame_bytes(0u), | 83 : frame_bytes(0u), |
50 frame_bytes_uncached(0u), | 84 frame_bytes_uncached(0u), |
51 frame_tree_node_id(frame_tree_node_id), | 85 frame_tree_node_id(frame_tree_node_id), |
52 ad_types(ad_types) {} | 86 ad_types(ad_types) {} |
53 | 87 |
54 // static | 88 // static |
55 std::unique_ptr<AdsPageLoadMetricsObserver> | 89 std::unique_ptr<AdsPageLoadMetricsObserver> |
56 AdsPageLoadMetricsObserver::CreateIfNeeded() { | 90 AdsPageLoadMetricsObserver::CreateIfNeeded() { |
57 if (!base::FeatureList::IsEnabled(kAdsFeature)) | 91 if (!base::FeatureList::IsEnabled(kAdsFeature)) |
58 return nullptr; | 92 return nullptr; |
(...skipping 37 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
96 void AdsPageLoadMetricsObserver::OnDidFinishSubFrameNavigation( | 130 void AdsPageLoadMetricsObserver::OnDidFinishSubFrameNavigation( |
97 content::NavigationHandle* navigation_handle) { | 131 content::NavigationHandle* navigation_handle) { |
98 // Determine if the frame is part of an existing ad, the root of a new ad, | 132 // Determine if the frame is part of an existing ad, the root of a new ad, |
99 // or a non-ad frame. Once a frame is labled as an ad, it is always | 133 // or a non-ad frame. Once a frame is labled as an ad, it is always |
100 // considered an ad, even if it navigates to a non-ad page. This function | 134 // considered an ad, even if it navigates to a non-ad page. This function |
101 // labels all of a page's frames, even those that fail to commit. | 135 // labels all of a page's frames, even those that fail to commit. |
102 FrameTreeNodeId frame_tree_node_id = navigation_handle->GetFrameTreeNodeId(); | 136 FrameTreeNodeId frame_tree_node_id = navigation_handle->GetFrameTreeNodeId(); |
103 content::RenderFrameHost* parent_frame_host = | 137 content::RenderFrameHost* parent_frame_host = |
104 navigation_handle->GetParentFrame(); | 138 navigation_handle->GetParentFrame(); |
105 | 139 |
106 page_load_metrics::AdTypes ad_types = | 140 AdTypes ad_types = DetectAds(navigation_handle); |
107 page_load_metrics::GetDetectedAdTypes(navigation_handle); | |
108 | 141 |
109 const auto& id_and_data = ad_frames_data_.find(frame_tree_node_id); | 142 const auto& id_and_data = ad_frames_data_.find(frame_tree_node_id); |
110 if (id_and_data != ad_frames_data_.end()) { | 143 if (id_and_data != ad_frames_data_.end()) { |
111 // An existing subframe is navigating again. | 144 // An existing subframe is navigating again. |
112 if (id_and_data->second) { | 145 if (id_and_data->second) { |
113 // The subframe was an ad to begin with, keep tracking it as an ad. | 146 // The subframe was an ad to begin with, keep tracking it as an ad. |
114 ProcessOngoingNavigationResource(frame_tree_node_id); | 147 ProcessOngoingNavigationResource(frame_tree_node_id); |
115 | 148 |
116 if (frame_tree_node_id == id_and_data->second->frame_tree_node_id) { | 149 if (frame_tree_node_id == id_and_data->second->frame_tree_node_id) { |
117 // This is the top-most frame in the ad. | 150 // This is the top-most frame in the ad. |
118 ADS_HISTOGRAM("Navigations.AdFrameRenavigatedToAd", | 151 ADS_HISTOGRAM("Navigations.AdFrameRenavigatedToAd", |
119 UMA_HISTOGRAM_BOOLEAN, page_load_metrics::AD_TYPE_ALL, | 152 UMA_HISTOGRAM_BOOLEAN, AD_TYPE_ALL, ad_types.any()); |
120 ad_types.any()); | |
121 } | 153 } |
122 return; | 154 return; |
123 } | 155 } |
124 // This frame was previously not an ad, process it as usual. If it had | 156 // This frame was previously not an ad, process it as usual. If it had |
125 // any child frames that were ads, those will still be recorded. | 157 // any child frames that were ads, those will still be recorded. |
126 ADS_HISTOGRAM("Navigations.NonAdFrameRenavigatedToAd", | 158 ADS_HISTOGRAM("Navigations.NonAdFrameRenavigatedToAd", |
127 UMA_HISTOGRAM_BOOLEAN, page_load_metrics::AD_TYPE_ALL, | 159 UMA_HISTOGRAM_BOOLEAN, AD_TYPE_ALL, ad_types.any()); |
128 ad_types.any()); | |
129 } | 160 } |
130 | 161 |
131 // Determine who the parent frame's ad ancestor is. | 162 // Determine who the parent frame's ad ancestor is. |
132 const auto& parent_id_and_data = | 163 const auto& parent_id_and_data = |
133 ad_frames_data_.find(parent_frame_host->GetFrameTreeNodeId()); | 164 ad_frames_data_.find(parent_frame_host->GetFrameTreeNodeId()); |
134 if (parent_id_and_data == ad_frames_data_.end()) { | 165 if (parent_id_and_data == ad_frames_data_.end()) { |
135 // We don't know who the parent for this frame is. One possibility is that | 166 // We don't know who the parent for this frame is. One possibility is that |
136 // it's a frame from a previous navigation. | 167 // it's a frame from a previous navigation. |
137 RecordParentExistsForSubFrame(false /* parent_exists */, ad_types); | 168 RecordParentExistsForSubFrame(false /* parent_exists */, ad_types); |
138 return; | 169 return; |
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
175 RecordHistograms(); | 206 RecordHistograms(); |
176 } | 207 } |
177 | 208 |
178 void AdsPageLoadMetricsObserver::OnSubframeNavigationEvaluated( | 209 void AdsPageLoadMetricsObserver::OnSubframeNavigationEvaluated( |
179 content::NavigationHandle* navigation_handle, | 210 content::NavigationHandle* navigation_handle, |
180 subresource_filter::LoadPolicy load_policy) { | 211 subresource_filter::LoadPolicy load_policy) { |
181 // We don't track DISALLOW frames because their resources won't be loaded | 212 // We don't track DISALLOW frames because their resources won't be loaded |
182 // and therefore would provide bad histogram data. Note that WOULD_DISALLOW | 213 // and therefore would provide bad histogram data. Note that WOULD_DISALLOW |
183 // is only seen in dry runs. | 214 // is only seen in dry runs. |
184 if (load_policy == subresource_filter::LoadPolicy::WOULD_DISALLOW) { | 215 if (load_policy == subresource_filter::LoadPolicy::WOULD_DISALLOW) { |
185 SetDetectedAdType(navigation_handle, | 216 unfinished_subresource_ad_frames_.insert( |
186 page_load_metrics::AD_TYPE_SUBRESOURCE_FILTER); | 217 navigation_handle->GetFrameTreeNodeId()); |
187 } | 218 } |
188 } | 219 } |
189 | 220 |
190 void AdsPageLoadMetricsObserver::OnSubresourceFilterGoingAway() { | 221 void AdsPageLoadMetricsObserver::OnSubresourceFilterGoingAway() { |
191 subresource_observer_.RemoveAll(); | 222 subresource_observer_.RemoveAll(); |
192 } | 223 } |
193 | 224 |
| 225 bool AdsPageLoadMetricsObserver::DetectSubresourceFilterAd( |
| 226 FrameTreeNodeId frame_tree_node_id) { |
| 227 return unfinished_subresource_ad_frames_.erase(frame_tree_node_id); |
| 228 } |
| 229 |
| 230 AdsPageLoadMetricsObserver::AdTypes AdsPageLoadMetricsObserver::DetectAds( |
| 231 content::NavigationHandle* navigation_handle) { |
| 232 AdTypes ad_types; |
| 233 |
| 234 if (DetectGoogleAd(navigation_handle)) |
| 235 ad_types.set(AD_TYPE_GOOGLE); |
| 236 |
| 237 if (DetectSubresourceFilterAd(navigation_handle->GetFrameTreeNodeId())) |
| 238 ad_types.set(AD_TYPE_SUBRESOURCE_FILTER); |
| 239 |
| 240 return ad_types; |
| 241 } |
| 242 |
194 void AdsPageLoadMetricsObserver::ProcessLoadedResource( | 243 void AdsPageLoadMetricsObserver::ProcessLoadedResource( |
195 const page_load_metrics::ExtraRequestCompleteInfo& extra_request_info) { | 244 const page_load_metrics::ExtraRequestCompleteInfo& extra_request_info) { |
196 const auto& id_and_data = | 245 const auto& id_and_data = |
197 ad_frames_data_.find(extra_request_info.frame_tree_node_id); | 246 ad_frames_data_.find(extra_request_info.frame_tree_node_id); |
198 if (id_and_data == ad_frames_data_.end()) { | 247 if (id_and_data == ad_frames_data_.end()) { |
199 if (extra_request_info.resource_type == content::RESOURCE_TYPE_MAIN_FRAME || | 248 if (extra_request_info.resource_type == content::RESOURCE_TYPE_MAIN_FRAME || |
200 extra_request_info.resource_type == content::RESOURCE_TYPE_SUB_FRAME) { | 249 extra_request_info.resource_type == content::RESOURCE_TYPE_SUB_FRAME) { |
201 // This resource request is the primary resource load for a frame that | 250 // This resource request is the primary resource load for a frame that |
202 // hasn't yet finished navigating. Hang onto the request info and replay | 251 // hasn't yet finished navigating. Hang onto the request info and replay |
203 // it once the frame finishes navigating. | 252 // it once the frame finishes navigating. |
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
236 | 285 |
237 if (ancestor_data) { | 286 if (ancestor_data) { |
238 ancestor_data->frame_bytes += extra_request_info.raw_body_bytes; | 287 ancestor_data->frame_bytes += extra_request_info.raw_body_bytes; |
239 if (!extra_request_info.was_cached) { | 288 if (!extra_request_info.was_cached) { |
240 ancestor_data->frame_bytes_uncached += extra_request_info.raw_body_bytes; | 289 ancestor_data->frame_bytes_uncached += extra_request_info.raw_body_bytes; |
241 } | 290 } |
242 } | 291 } |
243 } | 292 } |
244 | 293 |
245 void AdsPageLoadMetricsObserver::RecordHistograms() { | 294 void AdsPageLoadMetricsObserver::RecordHistograms() { |
246 RecordHistogramsForType(page_load_metrics::AD_TYPE_GOOGLE); | 295 RecordHistogramsForType(AD_TYPE_GOOGLE); |
247 RecordHistogramsForType(page_load_metrics::AD_TYPE_SUBRESOURCE_FILTER); | 296 RecordHistogramsForType(AD_TYPE_SUBRESOURCE_FILTER); |
248 RecordHistogramsForType(page_load_metrics::AD_TYPE_ALL); | 297 RecordHistogramsForType(AD_TYPE_ALL); |
249 } | 298 } |
250 | 299 |
251 void AdsPageLoadMetricsObserver::RecordHistogramsForType(int ad_type) { | 300 void AdsPageLoadMetricsObserver::RecordHistogramsForType(int ad_type) { |
252 if (page_bytes_ == 0) | 301 if (page_bytes_ == 0) |
253 return; | 302 return; |
254 | 303 |
255 int non_zero_ad_frames = 0; | 304 int non_zero_ad_frames = 0; |
256 size_t total_ad_frame_bytes = 0; | 305 size_t total_ad_frame_bytes = 0; |
257 size_t uncached_ad_frame_bytes = 0; | 306 size_t uncached_ad_frame_bytes = 0; |
258 | 307 |
259 for (const AdFrameData& ad_frame_data : ad_frames_data_storage_) { | 308 for (const AdFrameData& ad_frame_data : ad_frames_data_storage_) { |
260 if (ad_frame_data.frame_bytes == 0) | 309 if (ad_frame_data.frame_bytes == 0) |
261 continue; | 310 continue; |
262 | 311 |
263 // If this isn't the type of ad we're looking for, move on to the next. | 312 // If this isn't the type of ad we're looking for, move on to the next. |
264 if (ad_type != page_load_metrics::AD_TYPE_ALL && | 313 if (ad_type != AD_TYPE_ALL && !ad_frame_data.ad_types.test(ad_type)) |
265 !ad_frame_data.ad_types.test(ad_type)) | |
266 continue; | 314 continue; |
267 | 315 |
268 non_zero_ad_frames += 1; | 316 non_zero_ad_frames += 1; |
269 total_ad_frame_bytes += ad_frame_data.frame_bytes; | 317 total_ad_frame_bytes += ad_frame_data.frame_bytes; |
270 | 318 |
271 uncached_ad_frame_bytes += ad_frame_data.frame_bytes_uncached; | 319 uncached_ad_frame_bytes += ad_frame_data.frame_bytes_uncached; |
272 ADS_HISTOGRAM("Bytes.AdFrames.PerFrame.Total", PAGE_BYTES_HISTOGRAM, | 320 ADS_HISTOGRAM("Bytes.AdFrames.PerFrame.Total", PAGE_BYTES_HISTOGRAM, |
273 ad_type, ad_frame_data.frame_bytes); | 321 ad_type, ad_frame_data.frame_bytes); |
274 ADS_HISTOGRAM("Bytes.AdFrames.PerFrame.Network", PAGE_BYTES_HISTOGRAM, | 322 ADS_HISTOGRAM("Bytes.AdFrames.PerFrame.Network", PAGE_BYTES_HISTOGRAM, |
275 ad_type, ad_frame_data.frame_bytes_uncached); | 323 ad_type, ad_frame_data.frame_bytes_uncached); |
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
319 void AdsPageLoadMetricsObserver::ProcessOngoingNavigationResource( | 367 void AdsPageLoadMetricsObserver::ProcessOngoingNavigationResource( |
320 FrameTreeNodeId frame_tree_node_id) { | 368 FrameTreeNodeId frame_tree_node_id) { |
321 const auto& frame_id_and_request = | 369 const auto& frame_id_and_request = |
322 ongoing_navigation_resources_.find(frame_tree_node_id); | 370 ongoing_navigation_resources_.find(frame_tree_node_id); |
323 if (frame_id_and_request == ongoing_navigation_resources_.end()) | 371 if (frame_id_and_request == ongoing_navigation_resources_.end()) |
324 return; | 372 return; |
325 | 373 |
326 ProcessLoadedResource(frame_id_and_request->second); | 374 ProcessLoadedResource(frame_id_and_request->second); |
327 ongoing_navigation_resources_.erase(frame_id_and_request); | 375 ongoing_navigation_resources_.erase(frame_id_and_request); |
328 } | 376 } |
OLD | NEW |