Chromium Code Reviews| Index: chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
| diff --git a/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc b/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
| index 14fdb02acfce05e181fb1fba65d93e5912671441..b3c0c264f6502c6b98571410fa6a02526a4a98ce 100644 |
| --- a/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
| +++ b/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
| @@ -5,7 +5,10 @@ |
| #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.h" |
| #include "base/memory/ptr_util.h" |
| +#include "base/metrics/histogram_macros.h" |
| +#include "base/strings/stringprintf.h" |
| #include "base/time/time.h" |
| +#include "base/timer/timer.h" |
| #include "chrome/browser/chrome_notification_types.h" |
| #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer.h" |
| #include "chrome/browser/sessions/session_tab_helper.h" |
| @@ -21,20 +24,43 @@ using content::WebContents; |
| namespace safe_browsing { |
| +namespace { |
| + |
| +const char kDownloadAttributionResultUMA[] = |
|
Charlie Reis
2016/12/09 22:00:25
These shouldn't be constants. Just use the string
Jialiu Lin
2016/12/12 23:43:38
Done.
|
| + "SafeBrowsing.ReferrerAttributionResult.DownloadAttribution"; |
| +const char kDownloadAttributionURLChainSizeUMA[] = |
| + "SafeBrowsing.ReferrerURLChainSize.DownloadAttribution"; |
| +const char kDownloadHasInvalidTabIDUMA[] = |
| + "SafeBrowsing.ReferrerHasInvalidTabID.DownloadAttribution"; |
| + |
| +// Given when an event happened and its TTL, determine if it is already expired. |
| +// Note, if for some reason this event's timestamp is in the future, this |
| +// event's timestamp is invalid, hence we treat it as expired. |
| +bool IsEventExpired(const base::Time& event_time, double ttl_in_second) { |
| + double current_time_in_second = base::Time::Now().ToDoubleT(); |
| + double event_time_in_second = event_time.ToDoubleT(); |
| + if (current_time_in_second <= event_time_in_second) |
| + return true; |
| + return current_time_in_second - event_time_in_second > ttl_in_second; |
| +} |
| + |
| +} // namespace |
| + |
| // The expiration period of a user gesture. Any user gesture that happened 1.0 |
| -// second ago will be considered as expired and not relevant to upcoming |
| -// navigation events. |
| +// second ago is considered as expired and not relevant to upcoming navigation |
| +// events. |
| static const double kUserGestureTTLInSecond = 1.0; |
| +// The expiration period of navigation events and resolved IP addresses. Any |
| +// navigation related records that happened 2 minutes ago is considered as |
|
Charlie Reis
2016/12/09 22:00:25
nit: s/is/are/
Jialiu Lin
2016/12/12 23:43:38
Done.
|
| +// expired. So we clean up these navigation footprints every 2 minutes. |
| +static const double kNavigationFootprintTTLInSecond = 120.0; |
| +// The number of user gestures we trace back for download attribution. |
| +static const int kDownloadAttributionUserGestureLimit = 2; |
| // static |
| bool SafeBrowsingNavigationObserverManager::IsUserGestureExpired( |
| const base::Time& timestamp) { |
| - double now = base::Time::Now().ToDoubleT(); |
| - double timestamp_in_double = timestamp.ToDoubleT(); |
| - |
| - if (now <= timestamp_in_double) |
| - return true; |
| - return (now - timestamp_in_double) > kUserGestureTTLInSecond; |
| + return IsEventExpired(timestamp, kUserGestureTTLInSecond); |
| } |
| // static |
| @@ -50,6 +76,9 @@ GURL SafeBrowsingNavigationObserverManager::ClearEmptyRef(const GURL& url) { |
| SafeBrowsingNavigationObserverManager::SafeBrowsingNavigationObserverManager() { |
| registrar_.Add(this, chrome::NOTIFICATION_RETARGETING, |
| content::NotificationService::AllSources()); |
| + |
| + // TODO(jialiul): call ScheduleNextCleanUpAfterInterval() when this class is |
| + // ready to be hooked into SafeBrowsingService. |
| } |
| void SafeBrowsingNavigationObserverManager::RecordNavigationEvent( |
| @@ -104,7 +133,99 @@ void SafeBrowsingNavigationObserverManager::RecordHostToIpMapping( |
| void SafeBrowsingNavigationObserverManager::OnWebContentDestroyed( |
| content::WebContents* web_contents) { |
| user_gesture_map_.erase(web_contents); |
| - // TODO (jialiul): Will add other clean up tasks shortly. |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootprints() { |
|
Charlie Reis
2016/12/09 22:00:25
This will be running every 2 minutes, right? Plea
Jialiu Lin
2016/12/12 23:43:37
I sent an email to rschoen@ (cc'ed you and nparker
Charlie Reis
2016/12/14 07:43:57
Thanks! Sounds like a circular buffer may be a go
Jialiu Lin
2016/12/14 19:06:29
Let me think about this. I feel circular buffer is
Charlie Reis
2016/12/15 00:38:33
True, though the flip side is that they can alloca
|
| + CleanUpNavigationEvents(); |
| + CleanUpUserGestures(); |
| + CleanUpIpAddresses(); |
| + ScheduleNextCleanUpAfterInterval( |
| + base::TimeDelta::FromSecondsD(kNavigationFootprintTTLInSecond)); |
| +} |
| + |
| +SafeBrowsingNavigationObserverManager::AttributionResult |
| +SafeBrowsingNavigationObserverManager::IdentifyReferrerChain( |
| + const GURL& target_url, |
| + int target_tab_id, |
| + int user_gesture_count_limit, |
| + std::vector<ReferrerChainEntry>* referrer_chain) { |
| + if (!target_url.is_valid()) |
| + return INVALID_URL; |
| + |
| + NavigationEvent* nav_event = FindNavigationEvent(target_url, target_tab_id); |
| + if (!nav_event) { |
| + // We cannot find a single navigation event related to this download. |
| + return NAVIGATION_EVENT_NOT_FOUND; |
| + } |
| + |
| + AddToReferrerChain(referrer_chain, nav_event, |
| + ReferrerChainEntry::DOWNLOAD_URL); |
| + AttributionResult result = SUCCESS; |
| + int user_gesture_count = 0; |
| + while (user_gesture_count < user_gesture_count_limit) { |
| + // Back trace to the next nav_event that initiated by user. |
|
Charlie Reis
2016/12/09 22:00:25
nit: that was initiated by the user
Jialiu Lin
2016/12/12 23:43:38
Done.
|
| + while (!nav_event->is_user_initiated) { |
| + nav_event = |
| + FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); |
| + if (!nav_event) |
| + return result; |
| + AddToReferrerChain( |
| + referrer_chain, nav_event, |
| + nav_event->has_server_redirect |
| + ? ReferrerChainEntry::SERVER_REDIRECT |
| + : ReferrerChainEntry::CLIENT_REDIRECT); |
| + } |
| + user_gesture_count++; |
|
Charlie Reis
2016/12/09 22:00:25
nit: Blank line before and after.
Jialiu Lin
2016/12/12 23:43:38
Done.
|
| + // If the source_url and source_main_frame_url of current navigation event |
| + // is empty, and is_user_initiated is true, this is a browser initiated |
|
Charlie Reis
2016/12/09 22:00:25
nit: are empty
Jialiu Lin
2016/12/12 23:43:37
Done.
|
| + // navigation (e.g. trigged by typing in address bar, clicking on bookmark, |
| + // etc). We reached the end of the referrer chain. |
| + if (nav_event->source_url.is_empty() && |
| + nav_event->source_main_frame_url.is_empty()) { |
| + DCHECK(nav_event->is_user_initiated); |
| + return result; |
| + } |
| + nav_event = |
|
Charlie Reis
2016/12/09 22:00:25
nit: Blank line before.
Jialiu Lin
2016/12/12 23:43:37
Done.
|
| + FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); |
| + if (!nav_event) |
| + return result; |
| + AddToReferrerChain( |
| + referrer_chain, nav_event, |
| + user_gesture_count == 1 |
| + ? ReferrerChainEntry::LANDING_PAGE |
| + : ReferrerChainEntry::LANDING_REFERRER); |
|
Charlie Reis
2016/12/09 22:00:25
This is a bit unclear from reading the code. I'd
Jialiu Lin
2016/12/12 23:43:37
Done.
|
| + result = user_gesture_count == 1 ? SUCCESS_LANDING_PAGE |
| + : SUCCESS_LANDING_REFERRER; |
| + } |
| + return result; |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager:: |
| + AddReferrerChainToClientDownloadRequest( |
| + const GURL& download_url, |
| + content::WebContents* source_contents, |
| + ClientDownloadRequest* request) { |
| + int download_tab_id = SessionTabHelper::IdForTab(source_contents); |
| + if (download_tab_id == -1) { |
| + UMA_HISTOGRAM_BOOLEAN(kDownloadHasInvalidTabIDUMA, true); |
|
Charlie Reis
2016/12/09 22:00:25
nit: Replace this whole block with:
UMA_HISTOGRAM_
Jialiu Lin
2016/12/12 23:43:38
Done.
|
| + } else { |
| + UMA_HISTOGRAM_BOOLEAN(kDownloadHasInvalidTabIDUMA, false); |
| + } |
| + std::vector<ReferrerChainEntry> attribution_chain; |
| + AttributionResult result = IdentifyReferrerChain( |
| + download_url, |
| + download_tab_id, |
| + kDownloadAttributionUserGestureLimit, |
| + &attribution_chain); |
| + UMA_HISTOGRAM_COUNTS_100( |
| + kDownloadAttributionURLChainSizeUMA, |
| + attribution_chain.size()); |
| + UMA_HISTOGRAM_ENUMERATION( |
| + kDownloadAttributionResultUMA, |
| + result, |
| + SafeBrowsingNavigationObserverManager::ATTRIBUTION_FAILURE_TYPE_MAX); |
| + for (auto entry : attribution_chain) |
| + *request->add_referrer_chain_entry() = entry; |
| } |
| SafeBrowsingNavigationObserverManager:: |
| @@ -166,4 +287,115 @@ void SafeBrowsingNavigationObserverManager::RecordRetargeting( |
| insertion_result.first->second.push_back(std::move(nav_event)); |
| } |
| +void SafeBrowsingNavigationObserverManager::CleanUpNavigationEvents() { |
| + // Remove any stale NavigationEnvent, if it is older than |
| + // kNavigationFootprintTTLInSecond. |
| + for (auto it = navigation_map_.begin(); it != navigation_map_.end();) { |
| + it->second.erase(std::remove_if(it->second.begin(), it->second.end(), |
| + [](const NavigationEvent& nav_event) { |
| + return IsEventExpired( |
| + nav_event.last_updated, |
| + kNavigationFootprintTTLInSecond); |
| + }), |
| + it->second.end()); |
| + if (it->second.size() == 0) |
| + it = navigation_map_.erase(it); |
| + else |
| + ++it; |
| + } |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::CleanUpUserGestures() { |
| + for (auto it = user_gesture_map_.begin(); it != user_gesture_map_.end();) { |
| + if (IsEventExpired(it->second, kUserGestureTTLInSecond)) |
| + it = user_gesture_map_.erase(it); |
| + else |
| + ++it; |
| + } |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::CleanUpIpAddresses() { |
| + for (auto it = host_to_ip_map_.begin(); it != host_to_ip_map_.end();) { |
| + it->second.erase(std::remove_if(it->second.begin(), it->second.end(), |
| + [](const ResolvedIPAddress& resolved_ip) { |
| + return IsEventExpired( |
| + resolved_ip.timestamp, |
| + kNavigationFootprintTTLInSecond); |
| + }), |
| + it->second.end()); |
| + if (it->second.size() == 0) |
| + it = host_to_ip_map_.erase(it); |
| + else |
| + ++it; |
| + } |
| +} |
| + |
| +bool SafeBrowsingNavigationObserverManager::IsCleanUpScheduled() const { |
| + return cleanup_timer_.IsRunning(); |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::ScheduleNextCleanUpAfterInterval( |
| + base::TimeDelta interval) { |
| + DCHECK(interval >= base::TimeDelta()); |
|
Charlie Reis
2016/12/09 22:00:25
Would DCHECK_GE work? Also, presumably we don't w
Jialiu Lin
2016/12/12 23:43:37
Done.
|
| + cleanup_timer_.Stop(); |
| + cleanup_timer_.Start( |
| + FROM_HERE, interval, this, |
| + &SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootprints); |
| +} |
| + |
| +NavigationEvent* SafeBrowsingNavigationObserverManager::FindNavigationEvent( |
| + const GURL& target_url, |
| + int target_tab_id) { |
| + auto it = navigation_map_.find(target_url); |
| + if (it == navigation_map_.end()) { |
| + return nullptr; |
| + } |
| + // Since navigation events are recorded in chronological order, we traverse |
| + // the vector in reverse order to get the latest match. |
|
Charlie Reis
2016/12/09 22:00:25
I think we discussed earlier that this is imperfec
Jialiu Lin
2016/12/12 23:43:37
Added comment and example to safe_browsing_navigat
Charlie Reis
2016/12/14 07:43:57
Acknowledged.
|
| + for (auto rit = it->second.rbegin(); rit != it->second.rend(); ++rit) { |
| + // If tab id is not valid, we only compare url, otherwise we compare both. |
| + if (rit->destination_url == target_url && |
| + (target_tab_id == -1 || rit->target_tab_id == target_tab_id)) { |
| + // If both source_url and source_main_frame_url are empty, and this |
| + // navigation is not triggered by user, a retargeting navigation probably |
| + // causes this navigation. In this case, we skip this navigation event and |
| + // looks for the retargeting navigation event. |
| + if (rit->source_url.is_empty() && rit->source_main_frame_url.is_empty() && |
| + !rit->is_user_initiated) |
|
Charlie Reis
2016/12/09 22:00:25
nit: This if/else should probably have braces.
Jialiu Lin
2016/12/12 23:43:38
Done.
|
| + continue; |
| + else |
| + return &*rit; |
| + } |
| + } |
| + return nullptr; |
| +} |
| + |
| +void SafeBrowsingNavigationObserverManager::AddToReferrerChain( |
| + std::vector<ReferrerChainEntry>* |
| + referrer_chain, |
| + NavigationEvent* nav_event, |
| + ReferrerChainEntry::URLType type) { |
| + ReferrerChainEntry referrer_chain_entry; |
| + referrer_chain_entry.set_url(nav_event->destination_url.spec()); |
| + referrer_chain_entry.set_type(type); |
| + auto ip_it = host_to_ip_map_.find(nav_event->destination_url.host()); |
| + if (ip_it != host_to_ip_map_.end()) { |
| + for (ResolvedIPAddress entry : ip_it->second) { |
| + referrer_chain_entry.add_ip_address(entry.ip); |
| + } |
| + } |
| + // Since we only track navigation to landing referrer, we will not log the |
| + // referrer of the landing referrer page. |
| + if (type != ReferrerChainEntry::LANDING_REFERRER) { |
| + referrer_chain_entry.set_referrer_url(nav_event->source_url.spec()); |
| + referrer_chain_entry.set_referrer_main_frame_url( |
| + nav_event->source_main_frame_url.spec()); |
| + } |
| + referrer_chain_entry.set_is_retargeting(nav_event->source_tab_id != |
| + nav_event->target_tab_id); |
| + referrer_chain_entry.set_navigation_time_msec( |
| + nav_event->last_updated.ToJavaTime()); |
| + referrer_chain->push_back(referrer_chain_entry); |
| +} |
| + |
| } // namespace safe_browsing |