Index: chrome/browser/page_load_metrics/ads_detection.cc |
diff --git a/chrome/browser/page_load_metrics/ads_detection.cc b/chrome/browser/page_load_metrics/ads_detection.cc |
new file mode 100644 |
index 0000000000000000000000000000000000000000..0c9d66d988f782d9261d1475e85cd15eabc31392 |
--- /dev/null |
+++ b/chrome/browser/page_load_metrics/ads_detection.cc |
@@ -0,0 +1,128 @@ |
+// Copyright 2017 The Chromium Authors. All rights reserved. |
+// Use of this source code is governed by a BSD-style license that can be |
+// found in the LICENSE file. |
+ |
+#include "chrome/browser/page_load_metrics/ads_detection.h" |
+ |
+#include <memory> |
+#include <string> |
+#include <utility> |
+ |
+#include "base/logging.h" |
+#include "base/macros.h" |
+#include "base/memory/ptr_util.h" |
+#include "base/strings/string_util.h" |
+#include "base/supports_user_data.h" |
+#include "content/public/browser/navigation_handle.h" |
+#include "content/public/browser/render_frame_host.h" |
+#include "content/public/browser/web_contents.h" |
+#include "url/gurl.h" |
+ |
+namespace page_load_metrics { |
+ |
+namespace { |
+ |
+bool IsGoogleAd(content::NavigationHandle* navigation_handle) { |
+ // Because sub-resource filtering isn't always enabled, and doesn't work |
+ // well in monitoring mode (no CSS enforcement), it's difficult to identify |
+ // ads. Google ads are prevalent and easy to track, so we'll start by |
+ // tracking those. Note that the frame name can be very large, so be careful |
+ // to avoid full string searches if possible. |
+ // TODO(jkarlin): Track other ad networks that are easy to identify. |
+ |
+ // In case the navigation aborted, look up the RFH by the Frame Tree Node |
+ // ID. It returns the committed frame host or the initial frame host for the |
+ // frame if no committed host exists. Using a previous host is fine because |
+ // once a frame has an ad we always consider it to have an ad. |
Charlie Reis
2017/07/20 17:06:16
Are we ok with this for the process model decision
jkarlin
2017/07/20 17:19:43
Actually, that comment (the once an ad always an a
Łukasz Anforowicz
2017/07/20 17:33:51
I think this is okay for now.
|
+ // We use the unsafe method of FindFrameByFrameTreeNodeId because we're not |
+ // concerned with which process the frame lives on (we're just measuring |
+ // bytes and not granting security priveleges). |
Charlie Reis
2017/07/20 17:06:16
nit: privileges
Łukasz Anforowicz
2017/07/20 17:33:51
Done.
|
+ content::RenderFrameHost* current_frame_host = |
+ navigation_handle->GetWebContents()->UnsafeFindFrameByFrameTreeNodeId( |
+ navigation_handle->GetFrameTreeNodeId()); |
+ if (current_frame_host) { |
+ const std::string& frame_name = current_frame_host->GetFrameName(); |
+ if (base::StartsWith(frame_name, "google_ads_iframe", |
+ base::CompareCase::SENSITIVE) || |
+ base::StartsWith(frame_name, "google_ads_frame", |
+ base::CompareCase::SENSITIVE)) { |
+ return true; |
+ } |
+ } |
+ |
+ const GURL& frame_url = navigation_handle->GetURL(); |
+ if (frame_url.host_piece() == "tpc.googlesyndication.com" && |
+ base::StartsWith(frame_url.path_piece(), "/safeframe", |
+ base::CompareCase::SENSITIVE)) { |
+ return true; |
+ } |
+ |
+ return false; |
+} |
+ |
+class NavigationHandleAdsData : public base::SupportsUserData::Data { |
Charlie Reis
2017/07/20 17:06:16
nit: Please add comment, along the lines of "Assoc
Łukasz Anforowicz
2017/07/20 17:33:51
Done.
|
+ public: |
+ static NavigationHandleAdsData* GetOrCreate( |
+ content::NavigationHandle* navigation_handle) { |
+ DCHECK(navigation_handle); |
+ NavigationHandleAdsData* ads_data = static_cast<NavigationHandleAdsData*>( |
+ navigation_handle->GetUserData(kUserDataKey)); |
+ if (!ads_data) { |
+ std::unique_ptr<NavigationHandleAdsData> new_ads_data = |
+ base::MakeUnique<NavigationHandleAdsData>(); |
+ |
+ // It is safe to retain |ads_data| raw pointer, despite passing an |
+ // ownership of |new_ads_data| to SetUserData, because |navigation_handle| |
+ // will keep the NavigationHandleAdsData instance alive until the |
+ // |navigation_handle| is destroyed. |
+ ads_data = new_ads_data.get(); |
+ |
+ navigation_handle->SetUserData(kUserDataKey, std::move(new_ads_data)); |
+ } |
+ |
+ return ads_data; |
+ } |
+ |
+ NavigationHandleAdsData() = default; |
+ ~NavigationHandleAdsData() override {} |
+ |
+ AdTypes& ad_types() { return ad_types_; } |
+ const AdTypes& ad_types() const { return ad_types_; } |
+ |
+ private: |
+ AdTypes ad_types_; |
+ |
+ static const char kUserDataKey[]; |
+ |
+ DISALLOW_COPY_AND_ASSIGN(NavigationHandleAdsData); |
+}; |
+ |
+const char NavigationHandleAdsData::kUserDataKey[] = "AdsData"; |
Charlie Reis
2017/07/20 17:06:16
nit: Move above the NavigationHandleAdsData class.
Łukasz Anforowicz
2017/07/20 17:33:51
This wouldn't compile (error: use of undeclared id
Charlie Reis
2017/07/20 20:10:53
Ah, sorry, I didn't see it was a member, just that
|
+ |
+} // namespace |
+ |
+const AdTypes& GetDetectedAdTypes( |
+ content::NavigationHandle* navigation_handle) { |
+ DCHECK(navigation_handle); |
+ |
+ // Verify that we are called after the navigation is ready to commit. |
+ // Being called late enough in the navigation process ensures that |
+ // 1) subresource filter got a chance to run its heuristics |
+ // 2) the URL looked at by IsGoogleAd is the final, redirected URL. |
+ DCHECK((navigation_handle->GetNetErrorCode() != net::Error::OK) || |
+ navigation_handle->GetRenderFrameHost()); |
+ |
+ NavigationHandleAdsData* ads_data = |
+ NavigationHandleAdsData::GetOrCreate(navigation_handle); |
+ if (IsGoogleAd(navigation_handle)) |
+ ads_data->ad_types().set(AD_TYPE_GOOGLE); |
+ return ads_data->ad_types(); |
+} |
+ |
+void SetDetectedAdType(content::NavigationHandle* navigation_handle, |
+ AdType type) { |
+ DCHECK(navigation_handle); |
+ NavigationHandleAdsData::GetOrCreate(navigation_handle)->ad_types().set(type); |
+} |
+ |
+} // namespace page_load_metrics |