Index: chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
diff --git a/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc b/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
index 14fdb02acfce05e181fb1fba65d93e5912671441..d4a1fe14e6a7a95d05ef7586e5eb9a07856f045b 100644 |
--- a/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
+++ b/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
@@ -5,7 +5,10 @@ |
#include "chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.h" |
#include "base/memory/ptr_util.h" |
+#include "base/metrics/histogram_macros.h" |
+#include "base/strings/stringprintf.h" |
#include "base/time/time.h" |
+#include "base/timer/timer.h" |
#include "chrome/browser/chrome_notification_types.h" |
#include "chrome/browser/safe_browsing/safe_browsing_navigation_observer.h" |
#include "chrome/browser/sessions/session_tab_helper.h" |
@@ -21,20 +24,58 @@ using content::WebContents; |
namespace safe_browsing { |
+namespace { |
+ |
+const char kAttributionResultUMAPrefix[] = "SB2.ReferrerAttributionResult"; |
+const char kAttributionURLChainSizeUMAPrefix[] = "SB2.ReferrerURLChainSize"; |
+const char kReferrerHasInvalidTabIDUMAPrefix[] = "SB2.ReferrerHasInvalidTabID"; |
+const char kDownloadAttributionUMASuffix[] = "DownloadAttribution"; |
+ |
+// Given when an event happened and its TTL, determine if it is already expired. |
+// Note, if for some reason this event's timestamp is in the future, this |
+// event's timestamp is invalid, hence we treat it as expired. |
+bool IsEventExpired(const base::Time& event_time, double ttl_in_second) { |
+ double current_time_in_second = base::Time::Now().ToDoubleT(); |
+ double event_time_in_second = event_time.ToDoubleT(); |
+ if (current_time_in_second <= event_time_in_second) |
+ return true; |
+ return current_time_in_second - event_time_in_second > ttl_in_second; |
+} |
+ |
+void RecordURLChainSize(const std::string& attribution_type, std::size_t size) { |
+ std::string metric_name = base::StringPrintf( |
+ "%s.%s", kAttributionURLChainSizeUMAPrefix, attribution_type.c_str()); |
+ UMA_HISTOGRAM_COUNTS_100(metric_name, size); |
Nathan Parker
2016/12/05 22:21:23
Does this work? I thought most of the UMA histogr
Jialiu Lin
2016/12/06 23:10:06
You're right... this would not work. Change to lit
|
+} |
+ |
+void RecordAttributionResult( |
+ const std::string& attribution_type, |
+ SafeBrowsingNavigationObserverManager::AttributionResult type) { |
+ std::string metric_name = base::StringPrintf( |
+ "%s.%s", kAttributionResultUMAPrefix, attribution_type.c_str()); |
+ UMA_HISTOGRAM_ENUMERATION( |
+ metric_name, type, |
+ SafeBrowsingNavigationObserverManager::ATTRIBUTION_FAILURE_TYPE_MAX); |
+} |
+ |
+void RecordInvalidTabIDEvent(const std::string& attribution_type) { |
+ std::string metric_name = base::StringPrintf( |
+ "%s.%s", kReferrerHasInvalidTabIDUMAPrefix, attribution_type.c_str()); |
+ UMA_HISTOGRAM_BOOLEAN(metric_name, true); |
+} |
+ |
+} // namespace |
+ |
// The expiration period of a user gesture. Any user gesture that happened 1.0 |
// second ago will be considered as expired and not relevant to upcoming |
// navigation events. |
static const double kUserGestureTTLInSecond = 1.0; |
+static const double kNavigationFootPrintTTLInSecond = 120.0; |
Nathan Parker
2016/12/05 22:21:24
Add a comment describing how this is intended to b
Jialiu Lin
2016/12/06 23:10:06
Done.
|
// static |
bool SafeBrowsingNavigationObserverManager::IsUserGestureExpired( |
const base::Time& timestamp) { |
- double now = base::Time::Now().ToDoubleT(); |
- double timestamp_in_double = timestamp.ToDoubleT(); |
- |
- if (now <= timestamp_in_double) |
- return true; |
- return (now - timestamp_in_double) > kUserGestureTTLInSecond; |
+ return IsEventExpired(timestamp, kUserGestureTTLInSecond); |
} |
// static |
@@ -50,6 +91,9 @@ GURL SafeBrowsingNavigationObserverManager::ClearEmptyRef(const GURL& url) { |
SafeBrowsingNavigationObserverManager::SafeBrowsingNavigationObserverManager() { |
registrar_.Add(this, chrome::NOTIFICATION_RETARGETING, |
content::NotificationService::AllSources()); |
+ |
+ // TODO(jialiul): call ScheduleNextCleanUpAfterInterval() when this class is |
+ // ready to be hooked into SafeBrowsingService. |
} |
void SafeBrowsingNavigationObserverManager::RecordNavigationEvent( |
@@ -104,7 +148,33 @@ void SafeBrowsingNavigationObserverManager::RecordHostToIpMapping( |
void SafeBrowsingNavigationObserverManager::OnWebContentDestroyed( |
content::WebContents* web_contents) { |
user_gesture_map_.erase(web_contents); |
- // TODO (jialiul): Will add other clean up tasks shortly. |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootPrints() { |
+ CleanUpNavigationEvents(); |
+ CleanUpUserGestures(); |
+ CleanUpIpAddresses(); |
+ ScheduleNextCleanUpAfterInterval( |
+ base::TimeDelta::FromSecondsD(kNavigationFootPrintTTLInSecond)); |
+} |
+ |
+void SafeBrowsingNavigationObserverManager:: |
+ AddReferrerChainToClientDownloadRequest( |
+ const GURL& download_url, |
+ content::WebContents* source_contents, |
+ ClientDownloadRequest* request) { |
+ std::string attribution_type(kDownloadAttributionUMASuffix); |
Nathan Parker
2016/12/05 22:21:23
const.
Or even just
const char* attribution_type
Jialiu Lin
2016/12/06 23:10:06
no longer using this variable.
|
+ int download_tab_id = SessionTabHelper::IdForTab(source_contents); |
+ if (download_tab_id == -1) { |
+ RecordInvalidTabIDEvent(attribution_type); |
+ } |
+ std::vector<ClientDownloadRequest::ReferrerChainEntry> attribution_chain; |
+ AttributionResult result = IdentifyReferrerChain( |
+ download_url, download_tab_id, 2, &attribution_chain); |
Nathan Parker
2016/12/05 22:21:23
What is the 2? Add a comment, or make it a consta
Jialiu Lin
2016/12/06 23:10:06
Added a const
|
+ RecordURLChainSize(attribution_type, attribution_chain.size()); |
+ RecordAttributionResult(attribution_type, result); |
+ for (auto entry : attribution_chain) |
+ (*request->add_referrer_chain_entry()) = entry; |
Nathan Parker
2016/12/05 22:21:23
You shouldn't need the parens.
Jialiu Lin
2016/12/06 23:10:06
Done.
|
} |
SafeBrowsingNavigationObserverManager:: |
@@ -166,4 +236,172 @@ void SafeBrowsingNavigationObserverManager::RecordRetargeting( |
insertion_result.first->second.push_back(std::move(nav_event)); |
} |
+void SafeBrowsingNavigationObserverManager::CleanUpNavigationEvents() { |
+ // Remove any stale NavigationEnvent, if it lasts longer than |
Nathan Parker
2016/12/05 22:21:24
nit: NavigationEvent.
And do you mean "lasts long
Jialiu Lin
2016/12/06 23:10:06
is older than...
|
+ // kNavigationFootPrintTTLInSecond. |
+ for (auto it = navigation_map_.begin(); it != navigation_map_.end();) { |
+ it->second.erase(std::remove_if(it->second.begin(), it->second.end(), |
+ [](const NavigationEvent& nav_event) { |
+ return IsEventExpired( |
+ nav_event.last_updated, |
+ kNavigationFootPrintTTLInSecond); |
+ }), |
+ it->second.end()); |
+ if (it->second.size() == 0) |
+ it = navigation_map_.erase(it); |
Nathan Parker
2016/12/05 22:21:24
This is clever -- avoids needing a second pass.
Jialiu Lin
2016/12/06 23:10:06
Acknowledged.
|
+ else |
+ ++it; |
+ } |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::CleanUpUserGestures() { |
+ for (auto it = user_gesture_map_.begin(); it != user_gesture_map_.end();) { |
+ if (IsEventExpired(it->second, kUserGestureTTLInSecond)) |
+ it = user_gesture_map_.erase(it); |
+ else |
+ ++it; |
+ } |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::CleanUpIpAddresses() { |
+ for (auto it = host_to_ip_map_.begin(); it != host_to_ip_map_.end();) { |
+ it->second.erase(std::remove_if(it->second.begin(), it->second.end(), |
+ [](const ResolvedIPAddress& resolved_ip) { |
+ return IsEventExpired( |
+ resolved_ip.timestamp, |
+ kNavigationFootPrintTTLInSecond); |
+ }), |
+ it->second.end()); |
+ if (it->second.size() == 0) |
+ it = host_to_ip_map_.erase(it); |
+ else |
+ ++it; |
+ } |
+} |
+ |
+bool SafeBrowsingNavigationObserverManager::IsCleanUpScheduled() const { |
+ return cleanup_timer_.IsRunning(); |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::ScheduleNextCleanUpAfterInterval( |
+ base::TimeDelta interval) { |
+ DCHECK(interval >= base::TimeDelta()); |
+ cleanup_timer_.Stop(); |
+ cleanup_timer_.Start( |
+ FROM_HERE, interval, this, |
+ &SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootPrints); |
+} |
+ |
+NavigationEvent* SafeBrowsingNavigationObserverManager::FindNavigationEvent( |
+ const GURL& target_url, |
+ int target_tab_id) { |
+ auto it = navigation_map_.find(target_url); |
+ if (it == navigation_map_.end()) { |
+ return nullptr; |
+ } |
+ // Since navigation events are recorded in chronological order, we traverse |
Nathan Parker
2016/12/05 22:21:23
Is there any way someone could confuse this by add
Jialiu Lin
2016/12/06 23:10:06
creis@ and I talked about this before.
a -> b->c->
|
+ // the vector in reverse order to get the latest match. |
+ for (auto rit = it->second.rbegin(); rit != it->second.rend(); ++rit) { |
+ // If tab id is not valid, we only compare url, otherwise we compare both. |
Nathan Parker
2016/12/05 22:21:23
I'm curious: When/why would tab_id be invalid?
Jialiu Lin
2016/12/06 23:10:06
I never see this happens in real settings, but it
|
+ if (rit->destination_url == target_url && |
+ (target_tab_id == -1 || rit->target_tab_id == target_tab_id)) { |
+ // If both source_url and source_main_frame_url are empty, and this |
+ // navigation is not triggered by user, a retargeting navigation probably |
+ // causes this navigation. In this case, we skip this navigation event and |
+ // looks for the retargeting navigation event. |
+ if (rit->source_url.is_empty() && rit->source_main_frame_url.is_empty() && |
+ !rit->is_user_initiated) |
+ continue; |
+ else |
+ return &*rit; |
+ } |
+ } |
+ return nullptr; |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::AddToReferrerChain( |
+ std::vector<safe_browsing::ClientDownloadRequest::ReferrerChainEntry>* |
+ referrer_chain, |
+ NavigationEvent* nav_event, |
+ ClientDownloadRequest::ReferrerChainEntry::URLType type) { |
+ ClientDownloadRequest::ReferrerChainEntry referrer_chain_entry; |
+ referrer_chain_entry.set_url(nav_event->destination_url.spec()); |
+ referrer_chain_entry.set_type(type); |
+ auto ip_it = host_to_ip_map_.find(nav_event->destination_url.host()); |
+ if (ip_it != host_to_ip_map_.end()) { |
+ for (ResolvedIPAddress entry : ip_it->second) { |
+ referrer_chain_entry.add_ip_address(entry.ip); |
+ } |
+ } |
+ // Since we only track navigation to landing referrer, we will not log the |
+ // referrer of the landing referrer page. |
+ if (type != ClientDownloadRequest::ReferrerChainEntry::LANDING_REFERRER) { |
+ referrer_chain_entry.set_referrer_url(nav_event->source_url.spec()); |
+ referrer_chain_entry.set_referrer_main_frame_url( |
+ nav_event->source_main_frame_url.spec()); |
+ } |
+ referrer_chain_entry.set_is_retargeting(nav_event->source_tab_id != |
+ nav_event->target_tab_id); |
+ referrer_chain_entry.set_navigation_time_msec( |
+ nav_event->last_updated.ToJavaTime()); |
+ referrer_chain->push_back(referrer_chain_entry); |
+} |
+ |
+SafeBrowsingNavigationObserverManager::AttributionResult |
+SafeBrowsingNavigationObserverManager::IdentifyReferrerChain( |
+ const GURL& target_url, |
+ int target_tab_id, |
+ int user_gesture_count_max, |
+ std::vector<ClientDownloadRequest::ReferrerChainEntry>* referrer_chain) { |
+ if (!target_url.is_valid()) |
+ return INVALID_URL; |
+ |
+ NavigationEvent* nav_event = FindNavigationEvent(target_url, target_tab_id); |
+ if (!nav_event) { |
+ // We cannot find a single navigation event related to this download. |
+ return NAVIGATION_EVENT_NOT_FOUND; |
+ } |
+ |
+ AddToReferrerChain(referrer_chain, nav_event, |
+ ClientDownloadRequest::ReferrerChainEntry::DOWNLOAD_URL); |
+ AttributionResult result = SUCCESS; |
+ int user_gesture_count = 0; |
+ while (user_gesture_count < user_gesture_count_max) { |
+ // Back trace to the next nav_event that initiated by user. |
+ while (!nav_event->is_user_initiated) { |
+ nav_event = |
+ FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); |
+ if (!nav_event) |
+ return result; |
+ AddToReferrerChain( |
+ referrer_chain, nav_event, |
+ nav_event->has_server_redirect |
+ ? ClientDownloadRequest::ReferrerChainEntry::SERVER_REDIRECT |
+ : ClientDownloadRequest::ReferrerChainEntry::CLIENT_REDIRECT); |
+ } |
+ user_gesture_count++; |
+ // If the source_url and source_main_frame_url of current navigation event |
+ // is empty, and is_user_initiated is true, this is a browser initiated |
+ // navigation (e.g. trigged by typing in address bar, clicking on bookmark, |
+ // etc). We reached the end of the referrer chain. |
+ if (nav_event->source_url.is_empty() && |
+ nav_event->source_main_frame_url.is_empty()) { |
+ DCHECK(nav_event->is_user_initiated); |
+ return result; |
+ } |
+ nav_event = |
+ FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); |
+ if (!nav_event) |
+ return result; |
+ AddToReferrerChain( |
+ referrer_chain, nav_event, |
+ user_gesture_count == 1 |
+ ? ClientDownloadRequest::ReferrerChainEntry::LANDING_PAGE |
+ : ClientDownloadRequest::ReferrerChainEntry::LANDING_REFERRER); |
+ result = user_gesture_count == 1 ? SUCCESS_LANDING_PAGE |
+ : SUCCESS_LANDING_REFERRER; |
+ } |
+ return result; |
+} |
+ |
} // namespace safe_browsing |