Chromium Code Reviews| Index: chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
| diff --git a/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc b/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
| index 14fdb02acfce05e181fb1fba65d93e5912671441..d4a1fe14e6a7a95d05ef7586e5eb9a07856f045b 100644 |
| --- a/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
| +++ b/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
| @@ -5,7 +5,10 @@ |
| #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.h" |
| #include "base/memory/ptr_util.h" |
| +#include "base/metrics/histogram_macros.h" |
| +#include "base/strings/stringprintf.h" |
| #include "base/time/time.h" |
| +#include "base/timer/timer.h" |
| #include "chrome/browser/chrome_notification_types.h" |
| #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer.h" |
| #include "chrome/browser/sessions/session_tab_helper.h" |
| @@ -21,20 +24,58 @@ using content::WebContents; |
| namespace safe_browsing { |
| +namespace { |
| + |
| +const char kAttributionResultUMAPrefix[] = "SB2.ReferrerAttributionResult"; |
| +const char kAttributionURLChainSizeUMAPrefix[] = "SB2.ReferrerURLChainSize"; |
| +const char kReferrerHasInvalidTabIDUMAPrefix[] = "SB2.ReferrerHasInvalidTabID"; |
| +const char kDownloadAttributionUMASuffix[] = "DownloadAttribution"; |
| + |
| +// Given when an event happened and its TTL, determine if it is already expired. |
| +// Note, if for some reason this event's timestamp is in the future, this |
| +// event's timestamp is invalid, hence we treat it as expired. |
| +bool IsEventExpired(const base::Time& event_time, double ttl_in_second) { |
| + double current_time_in_second = base::Time::Now().ToDoubleT(); |
| + double event_time_in_second = event_time.ToDoubleT(); |
| + if (current_time_in_second <= event_time_in_second) |
| + return true; |
| + return current_time_in_second - event_time_in_second > ttl_in_second; |
| +} |
| + |
| +void RecordURLChainSize(const std::string& attribution_type, std::size_t size) { |
| + std::string metric_name = base::StringPrintf( |
| + "%s.%s", kAttributionURLChainSizeUMAPrefix, attribution_type.c_str()); |
| + UMA_HISTOGRAM_COUNTS_100(metric_name, size); |
|
Nathan Parker
2016/12/05 22:21:23
Does this work? I thought most of the UMA histogr
Jialiu Lin
2016/12/06 23:10:06
You're right... this would not work. Change to lit
|
| +} |
| + |
| +void RecordAttributionResult( |
| + const std::string& attribution_type, |
| + SafeBrowsingNavigationObserverManager::AttributionResult type) { |
| + std::string metric_name = base::StringPrintf( |
| + "%s.%s", kAttributionResultUMAPrefix, attribution_type.c_str()); |
| + UMA_HISTOGRAM_ENUMERATION( |
| + metric_name, type, |
| + SafeBrowsingNavigationObserverManager::ATTRIBUTION_FAILURE_TYPE_MAX); |
| +} |
| + |
| +void RecordInvalidTabIDEvent(const std::string& attribution_type) { |
| + std::string metric_name = base::StringPrintf( |
| + "%s.%s", kReferrerHasInvalidTabIDUMAPrefix, attribution_type.c_str()); |
| + UMA_HISTOGRAM_BOOLEAN(metric_name, true); |
| +} |
| + |
| +} // namespace |
| + |
| // The expiration period of a user gesture. Any user gesture that happened 1.0 |
| // second ago will be considered as expired and not relevant to upcoming |
| // navigation events. |
| static const double kUserGestureTTLInSecond = 1.0; |
| +static const double kNavigationFootPrintTTLInSecond = 120.0; |
|
Nathan Parker
2016/12/05 22:21:24
Add a comment describing how this is intended to b
Jialiu Lin
2016/12/06 23:10:06
Done.
|
| // static |
| bool SafeBrowsingNavigationObserverManager::IsUserGestureExpired( |
| const base::Time& timestamp) { |
| - double now = base::Time::Now().ToDoubleT(); |
| - double timestamp_in_double = timestamp.ToDoubleT(); |
| - |
| - if (now <= timestamp_in_double) |
| - return true; |
| - return (now - timestamp_in_double) > kUserGestureTTLInSecond; |
| + return IsEventExpired(timestamp, kUserGestureTTLInSecond); |
| } |
| // static |
| @@ -50,6 +91,9 @@ GURL SafeBrowsingNavigationObserverManager::ClearEmptyRef(const GURL& url) { |
| SafeBrowsingNavigationObserverManager::SafeBrowsingNavigationObserverManager() { |
| registrar_.Add(this, chrome::NOTIFICATION_RETARGETING, |
| content::NotificationService::AllSources()); |
| + |
| + // TODO(jialiul): call ScheduleNextCleanUpAfterInterval() when this class is |
| + // ready to be hooked into SafeBrowsingService. |
| } |
| void SafeBrowsingNavigationObserverManager::RecordNavigationEvent( |
| @@ -104,7 +148,33 @@ void SafeBrowsingNavigationObserverManager::RecordHostToIpMapping( |
| void SafeBrowsingNavigationObserverManager::OnWebContentDestroyed( |
| content::WebContents* web_contents) { |
| user_gesture_map_.erase(web_contents); |
| - // TODO (jialiul): Will add other clean up tasks shortly. |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootPrints() { |
| + CleanUpNavigationEvents(); |
| + CleanUpUserGestures(); |
| + CleanUpIpAddresses(); |
| + ScheduleNextCleanUpAfterInterval( |
| + base::TimeDelta::FromSecondsD(kNavigationFootPrintTTLInSecond)); |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager:: |
| + AddReferrerChainToClientDownloadRequest( |
| + const GURL& download_url, |
| + content::WebContents* source_contents, |
| + ClientDownloadRequest* request) { |
| + std::string attribution_type(kDownloadAttributionUMASuffix); |
|
Nathan Parker
2016/12/05 22:21:23
const.
Or even just
const char* attribution_type
Jialiu Lin
2016/12/06 23:10:06
no longer using this variable.
|
| + int download_tab_id = SessionTabHelper::IdForTab(source_contents); |
| + if (download_tab_id == -1) { |
| + RecordInvalidTabIDEvent(attribution_type); |
| + } |
| + std::vector<ClientDownloadRequest::ReferrerChainEntry> attribution_chain; |
| + AttributionResult result = IdentifyReferrerChain( |
| + download_url, download_tab_id, 2, &attribution_chain); |
|
Nathan Parker
2016/12/05 22:21:23
What is the 2? Add a comment, or make it a consta
Jialiu Lin
2016/12/06 23:10:06
Added a const
|
| + RecordURLChainSize(attribution_type, attribution_chain.size()); |
| + RecordAttributionResult(attribution_type, result); |
| + for (auto entry : attribution_chain) |
| + (*request->add_referrer_chain_entry()) = entry; |
|
Nathan Parker
2016/12/05 22:21:23
You shouldn't need the parens.
Jialiu Lin
2016/12/06 23:10:06
Done.
|
| } |
| SafeBrowsingNavigationObserverManager:: |
| @@ -166,4 +236,172 @@ void SafeBrowsingNavigationObserverManager::RecordRetargeting( |
| insertion_result.first->second.push_back(std::move(nav_event)); |
| } |
| +void SafeBrowsingNavigationObserverManager::CleanUpNavigationEvents() { |
| + // Remove any stale NavigationEnvent, if it lasts longer than |
|
Nathan Parker
2016/12/05 22:21:24
nit: NavigationEvent.
And do you mean "lasts long
Jialiu Lin
2016/12/06 23:10:06
is older than...
|
| + // kNavigationFootPrintTTLInSecond. |
| + for (auto it = navigation_map_.begin(); it != navigation_map_.end();) { |
| + it->second.erase(std::remove_if(it->second.begin(), it->second.end(), |
| + [](const NavigationEvent& nav_event) { |
| + return IsEventExpired( |
| + nav_event.last_updated, |
| + kNavigationFootPrintTTLInSecond); |
| + }), |
| + it->second.end()); |
| + if (it->second.size() == 0) |
| + it = navigation_map_.erase(it); |
|
Nathan Parker
2016/12/05 22:21:24
This is clever -- avoids needing a second pass.
Jialiu Lin
2016/12/06 23:10:06
Acknowledged.
|
| + else |
| + ++it; |
| + } |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::CleanUpUserGestures() { |
| + for (auto it = user_gesture_map_.begin(); it != user_gesture_map_.end();) { |
| + if (IsEventExpired(it->second, kUserGestureTTLInSecond)) |
| + it = user_gesture_map_.erase(it); |
| + else |
| + ++it; |
| + } |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::CleanUpIpAddresses() { |
| + for (auto it = host_to_ip_map_.begin(); it != host_to_ip_map_.end();) { |
| + it->second.erase(std::remove_if(it->second.begin(), it->second.end(), |
| + [](const ResolvedIPAddress& resolved_ip) { |
| + return IsEventExpired( |
| + resolved_ip.timestamp, |
| + kNavigationFootPrintTTLInSecond); |
| + }), |
| + it->second.end()); |
| + if (it->second.size() == 0) |
| + it = host_to_ip_map_.erase(it); |
| + else |
| + ++it; |
| + } |
| +} |
| + |
| +bool SafeBrowsingNavigationObserverManager::IsCleanUpScheduled() const { |
| + return cleanup_timer_.IsRunning(); |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::ScheduleNextCleanUpAfterInterval( |
| + base::TimeDelta interval) { |
| + DCHECK(interval >= base::TimeDelta()); |
| + cleanup_timer_.Stop(); |
| + cleanup_timer_.Start( |
| + FROM_HERE, interval, this, |
| + &SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootPrints); |
| +} |
| + |
| +NavigationEvent* SafeBrowsingNavigationObserverManager::FindNavigationEvent( |
| + const GURL& target_url, |
| + int target_tab_id) { |
| + auto it = navigation_map_.find(target_url); |
| + if (it == navigation_map_.end()) { |
| + return nullptr; |
| + } |
| + // Since navigation events are recorded in chronological order, we traverse |
|
Nathan Parker
2016/12/05 22:21:23
Is there any way someone could confuse this by add
Jialiu Lin
2016/12/06 23:10:06
creis@ and I talked about this before.
a -> b->c->
|
| + // the vector in reverse order to get the latest match. |
| + for (auto rit = it->second.rbegin(); rit != it->second.rend(); ++rit) { |
| + // If tab id is not valid, we only compare url, otherwise we compare both. |
|
Nathan Parker
2016/12/05 22:21:23
I'm curious: When/why would tab_id be invalid?
Jialiu Lin
2016/12/06 23:10:06
I never see this happens in real settings, but it
|
| + if (rit->destination_url == target_url && |
| + (target_tab_id == -1 || rit->target_tab_id == target_tab_id)) { |
| + // If both source_url and source_main_frame_url are empty, and this |
| + // navigation is not triggered by user, a retargeting navigation probably |
| + // causes this navigation. In this case, we skip this navigation event and |
| + // looks for the retargeting navigation event. |
| + if (rit->source_url.is_empty() && rit->source_main_frame_url.is_empty() && |
| + !rit->is_user_initiated) |
| + continue; |
| + else |
| + return &*rit; |
| + } |
| + } |
| + return nullptr; |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::AddToReferrerChain( |
| + std::vector<safe_browsing::ClientDownloadRequest::ReferrerChainEntry>* |
| + referrer_chain, |
| + NavigationEvent* nav_event, |
| + ClientDownloadRequest::ReferrerChainEntry::URLType type) { |
| + ClientDownloadRequest::ReferrerChainEntry referrer_chain_entry; |
| + referrer_chain_entry.set_url(nav_event->destination_url.spec()); |
| + referrer_chain_entry.set_type(type); |
| + auto ip_it = host_to_ip_map_.find(nav_event->destination_url.host()); |
| + if (ip_it != host_to_ip_map_.end()) { |
| + for (ResolvedIPAddress entry : ip_it->second) { |
| + referrer_chain_entry.add_ip_address(entry.ip); |
| + } |
| + } |
| + // Since we only track navigation to landing referrer, we will not log the |
| + // referrer of the landing referrer page. |
| + if (type != ClientDownloadRequest::ReferrerChainEntry::LANDING_REFERRER) { |
| + referrer_chain_entry.set_referrer_url(nav_event->source_url.spec()); |
| + referrer_chain_entry.set_referrer_main_frame_url( |
| + nav_event->source_main_frame_url.spec()); |
| + } |
| + referrer_chain_entry.set_is_retargeting(nav_event->source_tab_id != |
| + nav_event->target_tab_id); |
| + referrer_chain_entry.set_navigation_time_msec( |
| + nav_event->last_updated.ToJavaTime()); |
| + referrer_chain->push_back(referrer_chain_entry); |
| +} |
| + |
| +SafeBrowsingNavigationObserverManager::AttributionResult |
| +SafeBrowsingNavigationObserverManager::IdentifyReferrerChain( |
| + const GURL& target_url, |
| + int target_tab_id, |
| + int user_gesture_count_max, |
| + std::vector<ClientDownloadRequest::ReferrerChainEntry>* referrer_chain) { |
| + if (!target_url.is_valid()) |
| + return INVALID_URL; |
| + |
| + NavigationEvent* nav_event = FindNavigationEvent(target_url, target_tab_id); |
| + if (!nav_event) { |
| + // We cannot find a single navigation event related to this download. |
| + return NAVIGATION_EVENT_NOT_FOUND; |
| + } |
| + |
| + AddToReferrerChain(referrer_chain, nav_event, |
| + ClientDownloadRequest::ReferrerChainEntry::DOWNLOAD_URL); |
| + AttributionResult result = SUCCESS; |
| + int user_gesture_count = 0; |
| + while (user_gesture_count < user_gesture_count_max) { |
| + // Back trace to the next nav_event that initiated by user. |
| + while (!nav_event->is_user_initiated) { |
| + nav_event = |
| + FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); |
| + if (!nav_event) |
| + return result; |
| + AddToReferrerChain( |
| + referrer_chain, nav_event, |
| + nav_event->has_server_redirect |
| + ? ClientDownloadRequest::ReferrerChainEntry::SERVER_REDIRECT |
| + : ClientDownloadRequest::ReferrerChainEntry::CLIENT_REDIRECT); |
| + } |
| + user_gesture_count++; |
| + // If the source_url and source_main_frame_url of current navigation event |
| + // is empty, and is_user_initiated is true, this is a browser initiated |
| + // navigation (e.g. trigged by typing in address bar, clicking on bookmark, |
| + // etc). We reached the end of the referrer chain. |
| + if (nav_event->source_url.is_empty() && |
| + nav_event->source_main_frame_url.is_empty()) { |
| + DCHECK(nav_event->is_user_initiated); |
| + return result; |
| + } |
| + nav_event = |
| + FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); |
| + if (!nav_event) |
| + return result; |
| + AddToReferrerChain( |
| + referrer_chain, nav_event, |
| + user_gesture_count == 1 |
| + ? ClientDownloadRequest::ReferrerChainEntry::LANDING_PAGE |
| + : ClientDownloadRequest::ReferrerChainEntry::LANDING_REFERRER); |
| + result = user_gesture_count == 1 ? SUCCESS_LANDING_PAGE |
| + : SUCCESS_LANDING_REFERRER; |
| + } |
| + return result; |
| +} |
| + |
| } // namespace safe_browsing |