OLD | NEW |
| (Empty) |
1 // Copyright 2017 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/browser/page_load_metrics/observers/ads_page_load_metrics_obser
ver.h" | |
6 | |
7 #include <string> | |
8 #include <utility> | |
9 | |
10 #include "base/feature_list.h" | |
11 #include "base/logging.h" | |
12 #include "base/memory/ptr_util.h" | |
13 #include "base/strings/string_util.h" | |
14 #include "chrome/browser/page_load_metrics/page_load_metrics_util.h" | |
15 #include "content/public/browser/navigation_handle.h" | |
16 #include "content/public/browser/render_frame_host.h" | |
17 #include "content/public/browser/web_contents.h" | |
18 #include "url/gurl.h" | |
19 | |
20 namespace { | |
21 | |
22 const base::Feature kAdsFeature{"AdsMetrics", base::FEATURE_ENABLED_BY_DEFAULT}; | |
23 | |
24 bool FrameIsAd(content::NavigationHandle* navigation_handle) { | |
25 content::RenderFrameHost* current_frame_host = | |
26 navigation_handle->GetRenderFrameHost(); | |
27 DCHECK(current_frame_host); | |
28 const std::string& name = current_frame_host->GetFrameName(); | |
29 const GURL& url = navigation_handle->GetURL(); | |
30 | |
31 // Because sub-resource filtering isn't always enabled, and doesn't work | |
32 // well in monitoring mode (no CSS enforcement), it's difficult to identify | |
33 // ads. Google ads are prevalent and easy to track, so we'll start by | |
34 // tracking those. Note that the frame name can be very large, so be careful | |
35 // to avoid full string searches if possible. | |
36 // TODO(jkarlin): Track other ad networks that are easy to identify. | |
37 return base::StartsWith(name, "google_ads_iframe", | |
38 base::CompareCase::SENSITIVE) || | |
39 base::StartsWith(name, "google_ads_frame", | |
40 base::CompareCase::SENSITIVE) || | |
41 (url.host_piece() == "tpc.googlesyndication.com" && | |
42 base::StartsWith(url.path_piece(), "/safeframe", | |
43 base::CompareCase::SENSITIVE)); | |
44 } | |
45 | |
46 } // namespace | |
47 | |
48 AdsPageLoadMetricsObserver::AdFrameData::AdFrameData( | |
49 FrameTreeNodeId frame_tree_node_id) | |
50 : frame_bytes(0u), | |
51 frame_bytes_uncached(0u), | |
52 frame_tree_node_id(frame_tree_node_id) {} | |
53 | |
54 // static | |
55 std::unique_ptr<AdsPageLoadMetricsObserver> | |
56 AdsPageLoadMetricsObserver::CreateIfNeeded() { | |
57 if (!base::FeatureList::IsEnabled(kAdsFeature)) | |
58 return nullptr; | |
59 return base::MakeUnique<AdsPageLoadMetricsObserver>(); | |
60 } | |
61 | |
62 AdsPageLoadMetricsObserver::AdsPageLoadMetricsObserver() = default; | |
63 AdsPageLoadMetricsObserver::~AdsPageLoadMetricsObserver() = default; | |
64 | |
65 page_load_metrics::PageLoadMetricsObserver::ObservePolicy | |
66 AdsPageLoadMetricsObserver::OnCommit( | |
67 content::NavigationHandle* navigation_handle) { | |
68 DCHECK(ad_frames_data_.empty()); | |
69 | |
70 // The main frame is never considered an ad. | |
71 ad_frames_data_[navigation_handle->GetFrameTreeNodeId()] = nullptr; | |
72 ProcessOngoingNavigationResource(navigation_handle->GetFrameTreeNodeId()); | |
73 return CONTINUE_OBSERVING; | |
74 } | |
75 | |
76 page_load_metrics::PageLoadMetricsObserver::ObservePolicy | |
77 AdsPageLoadMetricsObserver::OnDidFinishSubFrameNavigation( | |
78 content::NavigationHandle* navigation_handle) { | |
79 FrameTreeNodeId frame_tree_node_id = navigation_handle->GetFrameTreeNodeId(); | |
80 | |
81 if (!navigation_handle->HasCommitted() || | |
82 navigation_handle->IsSameDocument() || navigation_handle->IsErrorPage()) { | |
83 // We're not interested in tracking this navigation. In case we've seen a | |
84 // resource for the navigation before this message, clear it from | |
85 // ongoing_navigation_resources_. | |
86 ongoing_navigation_resources_.erase(frame_tree_node_id); | |
87 return CONTINUE_OBSERVING; | |
88 } | |
89 | |
90 content::RenderFrameHost* parent_frame_host = | |
91 navigation_handle->GetRenderFrameHost()->GetParent(); | |
92 DCHECK(parent_frame_host); | |
93 | |
94 bool top_level_subframe = !parent_frame_host->GetParent(); | |
95 | |
96 const auto& id_and_data = ad_frames_data_.find(frame_tree_node_id); | |
97 if (id_and_data != ad_frames_data_.end()) { | |
98 // An existing subframe is navigating again. | |
99 if (id_and_data->second) { | |
100 // The subframe was an ad to begin with, keep tracking it as an ad. | |
101 ProcessOngoingNavigationResource(frame_tree_node_id); | |
102 | |
103 if (frame_tree_node_id == id_and_data->second->frame_tree_node_id) { | |
104 // This is the top-most frame in the ad. | |
105 UMA_HISTOGRAM_BOOLEAN( | |
106 "PageLoad.Clients.Ads.Google.Navigations.AdFrameRenavigatedToAd", | |
107 FrameIsAd(navigation_handle)); | |
108 } | |
109 | |
110 return CONTINUE_OBSERVING; | |
111 } | |
112 // This frame was previously not an ad, process it as usual. If it had | |
113 // any child frames that were ads, those will still be recorded. | |
114 UMA_HISTOGRAM_BOOLEAN( | |
115 "PageLoad.Clients.Ads.Google.Navigations.NonAdFrameRenavigatedToAd", | |
116 FrameIsAd(navigation_handle)); | |
117 } else if (top_level_subframe) { | |
118 top_level_subframe_count_ += 1; | |
119 } | |
120 | |
121 // Determine who the parent frame's ad ancestor is. | |
122 const auto& parent_id_and_data = | |
123 ad_frames_data_.find(parent_frame_host->GetFrameTreeNodeId()); | |
124 DCHECK(parent_id_and_data != ad_frames_data_.end()); | |
125 AdFrameData* ad_data = parent_id_and_data->second; | |
126 | |
127 if (!ad_data && FrameIsAd(navigation_handle)) { | |
128 // This frame is not nested within an ad frame but is itself an ad. | |
129 ad_frames_data_storage_.emplace_back(frame_tree_node_id); | |
130 ad_data = &ad_frames_data_storage_.back(); | |
131 } | |
132 | |
133 ad_frames_data_[frame_tree_node_id] = ad_data; | |
134 | |
135 if (top_level_subframe && ad_data) | |
136 top_level_ad_frame_count_ += 1; | |
137 | |
138 ProcessOngoingNavigationResource(frame_tree_node_id); | |
139 return CONTINUE_OBSERVING; | |
140 } | |
141 | |
142 page_load_metrics::PageLoadMetricsObserver::ObservePolicy | |
143 AdsPageLoadMetricsObserver::FlushMetricsOnAppEnterBackground( | |
144 const page_load_metrics::PageLoadTiming& timing, | |
145 const page_load_metrics::PageLoadExtraInfo& extra_info) { | |
146 // The browser may come back, but there is no guarantee. To be safe, record | |
147 // what we have now and ignore future changes to this navigation. | |
148 if (extra_info.did_commit) | |
149 RecordHistograms(); | |
150 | |
151 return STOP_OBSERVING; | |
152 } | |
153 | |
154 void AdsPageLoadMetricsObserver::OnLoadedResource( | |
155 const page_load_metrics::ExtraRequestCompleteInfo& extra_request_info) { | |
156 ProcessLoadedResource(extra_request_info); | |
157 } | |
158 | |
159 void AdsPageLoadMetricsObserver::OnComplete( | |
160 const page_load_metrics::PageLoadTiming& timing, | |
161 const page_load_metrics::PageLoadExtraInfo& info) { | |
162 RecordHistograms(); | |
163 } | |
164 | |
165 void AdsPageLoadMetricsObserver::ProcessLoadedResource( | |
166 const page_load_metrics::ExtraRequestCompleteInfo& extra_request_info) { | |
167 if (!extra_request_info.url.SchemeIsHTTPOrHTTPS()) { | |
168 // Data uris should be accounted for in the generating resource, not | |
169 // here. Blobs for PlzNavigate shouldn't be counted as the http resource | |
170 // was already counted. Blobs for other things like CacheStorage or | |
171 // IndexedDB are also ignored for now, as they're not normal HTTP loads. | |
172 return; | |
173 } | |
174 | |
175 const auto& id_and_data = | |
176 ad_frames_data_.find(extra_request_info.frame_tree_node_id); | |
177 if (id_and_data == ad_frames_data_.end()) { | |
178 // This resouce is for a frame that hasn't yet committed. It must be the | |
179 // main document for the frame. Hold onto it and once it commits we'll run | |
180 // it in ProcessOngoingNavigationResource. | |
181 // TODO(jkarlin): Plumb the resource type through and DCHECK that the type | |
182 // is document. | |
183 auto it_and_success = ongoing_navigation_resources_.emplace( | |
184 std::piecewise_construct, | |
185 std::forward_as_tuple(extra_request_info.frame_tree_node_id), | |
186 std::forward_as_tuple( | |
187 extra_request_info.url, extra_request_info.frame_tree_node_id, | |
188 extra_request_info.was_cached, extra_request_info.raw_body_bytes, | |
189 extra_request_info.original_network_content_length, nullptr, | |
190 extra_request_info.resource_type)); | |
191 DCHECK(it_and_success.second); | |
192 return; | |
193 } | |
194 | |
195 page_bytes_ += extra_request_info.raw_body_bytes; | |
196 if (!extra_request_info.was_cached) | |
197 uncached_page_bytes_ += extra_request_info.raw_body_bytes; | |
198 | |
199 // Determine if the frame (or its ancestor) is an ad, if so attribute the | |
200 // bytes to the highest ad ancestor. | |
201 AdFrameData* ancestor_data = id_and_data->second; | |
202 | |
203 if (ancestor_data) { | |
204 ancestor_data->frame_bytes += extra_request_info.raw_body_bytes; | |
205 if (!extra_request_info.was_cached) { | |
206 ancestor_data->frame_bytes_uncached += extra_request_info.raw_body_bytes; | |
207 } | |
208 } | |
209 } | |
210 | |
211 void AdsPageLoadMetricsObserver::RecordHistograms() { | |
212 if (page_bytes_ == 0) | |
213 return; | |
214 | |
215 size_t total_ad_frame_bytes = 0; | |
216 size_t uncached_ad_frame_bytes = 0; | |
217 | |
218 UMA_HISTOGRAM_COUNTS_1000( | |
219 "PageLoad.Clients.Ads.Google.FrameCounts.AnyParentFrame.AdFrames", | |
220 ad_frames_data_storage_.size()); | |
221 | |
222 // Don't post UMA for pages that don't have ads. | |
223 if (ad_frames_data_storage_.empty()) | |
224 return; | |
225 | |
226 for (const AdFrameData& ad_frame_data : ad_frames_data_storage_) { | |
227 total_ad_frame_bytes += ad_frame_data.frame_bytes; | |
228 uncached_ad_frame_bytes += ad_frame_data.frame_bytes_uncached; | |
229 | |
230 PAGE_BYTES_HISTOGRAM( | |
231 "PageLoad.Clients.Ads.Google.Bytes.AdFrames.PerFrame.Total", | |
232 ad_frame_data.frame_bytes); | |
233 PAGE_BYTES_HISTOGRAM( | |
234 "PageLoad.Clients.Ads.Google.Bytes.AdFrames.PerFrame.Network", | |
235 ad_frame_data.frame_bytes_uncached); | |
236 if (ad_frame_data.frame_bytes > 0) { | |
237 UMA_HISTOGRAM_PERCENTAGE( | |
238 "PageLoad.Clients.Ads.Google.Bytes.AdFrames.PerFrame.PercentNetwork", | |
239 ad_frame_data.frame_bytes_uncached * 100 / ad_frame_data.frame_bytes); | |
240 } | |
241 } | |
242 | |
243 UMA_HISTOGRAM_COUNTS_1000( | |
244 "PageLoad.Clients.Ads.Google.FrameCounts.MainFrameParent.TotalFrames", | |
245 top_level_subframe_count_); | |
246 UMA_HISTOGRAM_COUNTS_1000( | |
247 "PageLoad.Clients.Ads.Google.FrameCounts.MainFrameParent.AdFrames", | |
248 top_level_ad_frame_count_); | |
249 | |
250 DCHECK_LT(0, top_level_subframe_count_); // Because ad frames isn't empty. | |
251 UMA_HISTOGRAM_PERCENTAGE( | |
252 "PageLoad.Clients.Ads.Google.FrameCounts.MainFrameParent.PercentAdFrames", | |
253 top_level_ad_frame_count_ * 100 / top_level_subframe_count_); | |
254 | |
255 PAGE_BYTES_HISTOGRAM( | |
256 "PageLoad.Clients.Ads.Google.Bytes.NonAdFrames.Aggregate.Total", | |
257 page_bytes_ - total_ad_frame_bytes); | |
258 | |
259 PAGE_BYTES_HISTOGRAM("PageLoad.Clients.Ads.Google.Bytes.FullPage.Total", | |
260 page_bytes_); | |
261 PAGE_BYTES_HISTOGRAM("PageLoad.Clients.Ads.Google.Bytes.FullPage.Network", | |
262 uncached_page_bytes_); | |
263 if (page_bytes_) { | |
264 UMA_HISTOGRAM_PERCENTAGE( | |
265 "PageLoad.Clients.Ads.Google.Bytes.FullPage.Total.PercentAds", | |
266 total_ad_frame_bytes * 100 / page_bytes_); | |
267 } | |
268 if (uncached_page_bytes_ > 0) { | |
269 UMA_HISTOGRAM_PERCENTAGE( | |
270 "PageLoad.Clients.Ads.Google.Bytes.FullPage.Network.PercentAds", | |
271 uncached_ad_frame_bytes * 100 / uncached_page_bytes_); | |
272 } | |
273 | |
274 PAGE_BYTES_HISTOGRAM( | |
275 "PageLoad.Clients.Ads.Google.Bytes.AdFrames.Aggregate.Total", | |
276 total_ad_frame_bytes); | |
277 PAGE_BYTES_HISTOGRAM( | |
278 "PageLoad.Clients.Ads.Google.Bytes.AdFrames.Aggregate.Network", | |
279 uncached_ad_frame_bytes); | |
280 | |
281 if (total_ad_frame_bytes) { | |
282 UMA_HISTOGRAM_PERCENTAGE( | |
283 "PageLoad.Clients.Ads.Google.Bytes.AdFrames.Aggregate.PercentNetwork", | |
284 uncached_ad_frame_bytes * 100 / total_ad_frame_bytes); | |
285 } | |
286 } | |
287 | |
288 void AdsPageLoadMetricsObserver::ProcessOngoingNavigationResource( | |
289 FrameTreeNodeId frame_tree_node_id) { | |
290 const auto& frame_id_and_request = | |
291 ongoing_navigation_resources_.find(frame_tree_node_id); | |
292 if (frame_id_and_request == ongoing_navigation_resources_.end()) | |
293 return; | |
294 | |
295 ProcessLoadedResource(frame_id_and_request->second); | |
296 ongoing_navigation_resources_.erase(frame_id_and_request); | |
297 } | |
OLD | NEW |