Index: chrome/browser/page_load_metrics/ads_detection.cc |
diff --git a/chrome/browser/page_load_metrics/ads_detection.cc b/chrome/browser/page_load_metrics/ads_detection.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..5ea9db4d92d8bf930676bd1b1ed74caf13f33aa3 |
--- /dev/null |
+++ b/chrome/browser/page_load_metrics/ads_detection.cc |
@@ -0,0 +1,114 @@ |
+// Copyright 2017 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "chrome/browser/page_load_metrics/ads_detection.h" |
+ |
+#include <memory> |
+#include <string> |
+#include <utility> |
+ |
+#include "base/logging.h" |
+#include "base/macros.h" |
+#include "base/memory/ptr_util.h" |
+#include "base/strings/string_util.h" |
+#include "base/supports_user_data.h" |
+#include "content/public/browser/navigation_handle.h" |
+#include "content/public/browser/render_frame_host.h" |
+#include "content/public/browser/web_contents.h" |
+#include "url/gurl.h" |
+ |
jkarlin
2017/07/19 13:32:36
page_load_metrics namespace
Łukasz Anforowicz
2017/07/19 18:11:29
Done.
|
+namespace { |
+ |
+bool IsGoogleAd(content::NavigationHandle* navigation_handle) { |
+ // Because sub-resource filtering isn't always enabled, and doesn't work |
+ // well in monitoring mode (no CSS enforcement), it's difficult to identify |
+ // ads. Google ads are prevalent and easy to track, so we'll start by |
+ // tracking those. Note that the frame name can be very large, so be careful |
+ // to avoid full string searches if possible. |
+ // TODO(jkarlin): Track other ad networks that are easy to identify. |
+ |
+ // In case the navigation aborted, look up the RFH by the Frame Tree Node |
+ // ID. It returns the committed frame host or the initial frame host for the |
+ // frame if no committed host exists. Using a previous host is fine because |
+ // once a frame has an ad we always consider it to have an ad. |
+ // We use the unsafe method of FindFrameByFrameTreeNodeId because we're not |
+ // concerned with which process the frame lives on (we're just measuring |
+ // bytes and not granting security priveleges). |
+ content::RenderFrameHost* current_frame_host = |
+ navigation_handle->GetWebContents()->UnsafeFindFrameByFrameTreeNodeId( |
+ navigation_handle->GetFrameTreeNodeId()); |
+ if (current_frame_host) { |
+ const std::string& frame_name = current_frame_host->GetFrameName(); |
+ if (base::StartsWith(frame_name, "google_ads_iframe", |
+ base::CompareCase::SENSITIVE) || |
+ base::StartsWith(frame_name, "google_ads_frame", |
+ base::CompareCase::SENSITIVE)) { |
+ return true; |
+ } |
+ } |
+ |
+ const GURL& frame_url = navigation_handle->GetURL(); |
+ if (frame_url.host_piece() == "tpc.googlesyndication.com" && |
+ base::StartsWith(frame_url.path_piece(), "/safeframe", |
+ base::CompareCase::SENSITIVE)) { |
+ return true; |
+ } |
+ |
+ return false; |
+} |
+ |
+class NavigationHandleAdsData : public base::SupportsUserData::Data { |
+ public: |
+ static NavigationHandleAdsData* GetOrCreate( |
+ content::NavigationHandle* navigation_handle) { |
+ DCHECK(navigation_handle); |
+ NavigationHandleAdsData* ads_data = static_cast<NavigationHandleAdsData*>( |
+ navigation_handle->GetUserData(kUserDataKey)); |
+ if (!ads_data) { |
+ std::unique_ptr<NavigationHandleAdsData> new_ads_data = |
+ base::MakeUnique<NavigationHandleAdsData>(); |
+ if (IsGoogleAd(navigation_handle)) |
jkarlin
2017/07/19 13:32:36
The NavigationHandleAdsData might be created earli
Łukasz Anforowicz
2017/07/19 18:11:29
Good point (and sort of "done"), but:
1. Repeated
jkarlin
2017/07/20 16:51:51
The calculation cost isn't terrible, but I'd prefe
Łukasz Anforowicz
2017/07/20 17:33:51
Done.
|
+ new_ads_data->ad_types().set(AD_TYPE_GOOGLE); |
+ |
+ // It is safe to retain |ads_data| raw pointer, despite passing an |
+ // ownership of |new_ads_data| to SetUserData, because |navigation_handle| |
+ // will keep the NavigationHandleAdsData instance alive until the |
+ // |navigation_handle| is destroyed. |
+ ads_data = new_ads_data.get(); |
+ |
+ navigation_handle->SetUserData(kUserDataKey, std::move(new_ads_data)); |
+ } |
+ |
+ return ads_data; |
+ } |
+ |
+ NavigationHandleAdsData() = default; |
+ ~NavigationHandleAdsData() override {} |
+ |
+ AdTypes& ad_types() { return ad_types_; } |
+ const AdTypes& ad_types() const { return ad_types_; } |
+ |
+ private: |
+ AdTypes ad_types_; |
+ |
+ static const char kUserDataKey[]; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(NavigationHandleAdsData); |
+}; |
+ |
+const char NavigationHandleAdsData::kUserDataKey[] = "AdsData"; |
+ |
+} // namespace |
+ |
+const AdTypes& GetDetectedAdTypes( |
+ content::NavigationHandle* navigation_handle) { |
+ DCHECK(navigation_handle); |
jkarlin
2017/07/19 13:32:35
An indirect (but better than nothing) way to verif
Łukasz Anforowicz
2017/07/19 18:11:29
Done, although this is somewhat tricky in case the
Łukasz Anforowicz
2017/07/19 22:44:26
Actually this didn't quite work with PlzNavigate:
jkarlin
2017/07/20 16:51:51
Sorry, I should have realized that it wouldn't wor
Łukasz Anforowicz
2017/07/20 17:33:51
I've added a TODO here.
|
+ return NavigationHandleAdsData::GetOrCreate(navigation_handle)->ad_types(); |
+} |
+ |
+void SetDetectedAdTypes(content::NavigationHandle* navigation_handle, |
+ AdType type) { |
+ DCHECK(navigation_handle); |
+ NavigationHandleAdsData::GetOrCreate(navigation_handle)->ad_types().set(type); |
+} |