Index: chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
diff --git a/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc b/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
index 14fdb02acfce05e181fb1fba65d93e5912671441..b3c0c264f6502c6b98571410fa6a02526a4a98ce 100644 |
--- a/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
+++ b/chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.cc |
@@ -5,7 +5,10 @@ |
#include "chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.h" |
#include "base/memory/ptr_util.h" |
+#include "base/metrics/histogram_macros.h" |
+#include "base/strings/stringprintf.h" |
#include "base/time/time.h" |
+#include "base/timer/timer.h" |
#include "chrome/browser/chrome_notification_types.h" |
#include "chrome/browser/safe_browsing/safe_browsing_navigation_observer.h" |
#include "chrome/browser/sessions/session_tab_helper.h" |
@@ -21,20 +24,43 @@ using content::WebContents; |
namespace safe_browsing { |
+namespace { |
+ |
+const char kDownloadAttributionResultUMA[] = |
Charlie Reis
2016/12/09 22:00:25
These shouldn't be constants. Just use the string
Jialiu Lin
2016/12/12 23:43:38
Done.
|
+ "SafeBrowsing.ReferrerAttributionResult.DownloadAttribution"; |
+const char kDownloadAttributionURLChainSizeUMA[] = |
+ "SafeBrowsing.ReferrerURLChainSize.DownloadAttribution"; |
+const char kDownloadHasInvalidTabIDUMA[] = |
+ "SafeBrowsing.ReferrerHasInvalidTabID.DownloadAttribution"; |
+ |
+// Given when an event happened and its TTL, determine if it is already expired. |
+// Note, if for some reason this event's timestamp is in the future, this |
+// event's timestamp is invalid, hence we treat it as expired. |
+bool IsEventExpired(const base::Time& event_time, double ttl_in_second) { |
+ double current_time_in_second = base::Time::Now().ToDoubleT(); |
+ double event_time_in_second = event_time.ToDoubleT(); |
+ if (current_time_in_second <= event_time_in_second) |
+ return true; |
+ return current_time_in_second - event_time_in_second > ttl_in_second; |
+} |
+ |
+} // namespace |
+ |
// The expiration period of a user gesture. Any user gesture that happened 1.0 |
-// second ago will be considered as expired and not relevant to upcoming |
-// navigation events. |
+// second ago is considered as expired and not relevant to upcoming navigation |
+// events. |
static const double kUserGestureTTLInSecond = 1.0; |
+// The expiration period of navigation events and resolved IP addresses. Any |
+// navigation related records that happened 2 minutes ago is considered as |
Charlie Reis
2016/12/09 22:00:25
nit: s/is/are/
Jialiu Lin
2016/12/12 23:43:38
Done.
|
+// expired. So we clean up these navigation footprints every 2 minutes. |
+static const double kNavigationFootprintTTLInSecond = 120.0; |
+// The number of user gestures we trace back for download attribution. |
+static const int kDownloadAttributionUserGestureLimit = 2; |
// static |
bool SafeBrowsingNavigationObserverManager::IsUserGestureExpired( |
const base::Time& timestamp) { |
- double now = base::Time::Now().ToDoubleT(); |
- double timestamp_in_double = timestamp.ToDoubleT(); |
- |
- if (now <= timestamp_in_double) |
- return true; |
- return (now - timestamp_in_double) > kUserGestureTTLInSecond; |
+ return IsEventExpired(timestamp, kUserGestureTTLInSecond); |
} |
// static |
@@ -50,6 +76,9 @@ GURL SafeBrowsingNavigationObserverManager::ClearEmptyRef(const GURL& url) { |
SafeBrowsingNavigationObserverManager::SafeBrowsingNavigationObserverManager() { |
registrar_.Add(this, chrome::NOTIFICATION_RETARGETING, |
content::NotificationService::AllSources()); |
+ |
+ // TODO(jialiul): call ScheduleNextCleanUpAfterInterval() when this class is |
+ // ready to be hooked into SafeBrowsingService. |
} |
void SafeBrowsingNavigationObserverManager::RecordNavigationEvent( |
@@ -104,7 +133,99 @@ void SafeBrowsingNavigationObserverManager::RecordHostToIpMapping( |
void SafeBrowsingNavigationObserverManager::OnWebContentDestroyed( |
content::WebContents* web_contents) { |
user_gesture_map_.erase(web_contents); |
- // TODO (jialiul): Will add other clean up tasks shortly. |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootprints() { |
Charlie Reis
2016/12/09 22:00:25
This will be running every 2 minutes, right? Plea
Jialiu Lin
2016/12/12 23:43:37
I sent an email to rschoen@ (cc'ed you and nparker
Charlie Reis
2016/12/14 07:43:57
Thanks! Sounds like a circular buffer may be a go
Jialiu Lin
2016/12/14 19:06:29
Let me think about this. I feel circular buffer is
Charlie Reis
2016/12/15 00:38:33
True, though the flip side is that they can alloca
|
+ CleanUpNavigationEvents(); |
+ CleanUpUserGestures(); |
+ CleanUpIpAddresses(); |
+ ScheduleNextCleanUpAfterInterval( |
+ base::TimeDelta::FromSecondsD(kNavigationFootprintTTLInSecond)); |
+} |
+ |
+SafeBrowsingNavigationObserverManager::AttributionResult |
+SafeBrowsingNavigationObserverManager::IdentifyReferrerChain( |
+ const GURL& target_url, |
+ int target_tab_id, |
+ int user_gesture_count_limit, |
+ std::vector<ReferrerChainEntry>* referrer_chain) { |
+ if (!target_url.is_valid()) |
+ return INVALID_URL; |
+ |
+ NavigationEvent* nav_event = FindNavigationEvent(target_url, target_tab_id); |
+ if (!nav_event) { |
+ // We cannot find a single navigation event related to this download. |
+ return NAVIGATION_EVENT_NOT_FOUND; |
+ } |
+ |
+ AddToReferrerChain(referrer_chain, nav_event, |
+ ReferrerChainEntry::DOWNLOAD_URL); |
+ AttributionResult result = SUCCESS; |
+ int user_gesture_count = 0; |
+ while (user_gesture_count < user_gesture_count_limit) { |
+ // Back trace to the next nav_event that initiated by user. |
Charlie Reis
2016/12/09 22:00:25
nit: that was initiated by the user
Jialiu Lin
2016/12/12 23:43:38
Done.
|
+ while (!nav_event->is_user_initiated) { |
+ nav_event = |
+ FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); |
+ if (!nav_event) |
+ return result; |
+ AddToReferrerChain( |
+ referrer_chain, nav_event, |
+ nav_event->has_server_redirect |
+ ? ReferrerChainEntry::SERVER_REDIRECT |
+ : ReferrerChainEntry::CLIENT_REDIRECT); |
+ } |
+ user_gesture_count++; |
Charlie Reis
2016/12/09 22:00:25
nit: Blank line before and after.
Jialiu Lin
2016/12/12 23:43:38
Done.
|
+ // If the source_url and source_main_frame_url of current navigation event |
+ // is empty, and is_user_initiated is true, this is a browser initiated |
Charlie Reis
2016/12/09 22:00:25
nit: are empty
Jialiu Lin
2016/12/12 23:43:37
Done.
|
+ // navigation (e.g. trigged by typing in address bar, clicking on bookmark, |
+ // etc). We reached the end of the referrer chain. |
+ if (nav_event->source_url.is_empty() && |
+ nav_event->source_main_frame_url.is_empty()) { |
+ DCHECK(nav_event->is_user_initiated); |
+ return result; |
+ } |
+ nav_event = |
Charlie Reis
2016/12/09 22:00:25
nit: Blank line before.
Jialiu Lin
2016/12/12 23:43:37
Done.
|
+ FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); |
+ if (!nav_event) |
+ return result; |
+ AddToReferrerChain( |
+ referrer_chain, nav_event, |
+ user_gesture_count == 1 |
+ ? ReferrerChainEntry::LANDING_PAGE |
+ : ReferrerChainEntry::LANDING_REFERRER); |
Charlie Reis
2016/12/09 22:00:25
This is a bit unclear from reading the code. I'd
Jialiu Lin
2016/12/12 23:43:37
Done.
|
+ result = user_gesture_count == 1 ? SUCCESS_LANDING_PAGE |
+ : SUCCESS_LANDING_REFERRER; |
+ } |
+ return result; |
+} |
+ |
+void SafeBrowsingNavigationObserverManager:: |
+ AddReferrerChainToClientDownloadRequest( |
+ const GURL& download_url, |
+ content::WebContents* source_contents, |
+ ClientDownloadRequest* request) { |
+ int download_tab_id = SessionTabHelper::IdForTab(source_contents); |
+ if (download_tab_id == -1) { |
+ UMA_HISTOGRAM_BOOLEAN(kDownloadHasInvalidTabIDUMA, true); |
Charlie Reis
2016/12/09 22:00:25
nit: Replace this whole block with:
UMA_HISTOGRAM_
Jialiu Lin
2016/12/12 23:43:38
Done.
|
+ } else { |
+ UMA_HISTOGRAM_BOOLEAN(kDownloadHasInvalidTabIDUMA, false); |
+ } |
+ std::vector<ReferrerChainEntry> attribution_chain; |
+ AttributionResult result = IdentifyReferrerChain( |
+ download_url, |
+ download_tab_id, |
+ kDownloadAttributionUserGestureLimit, |
+ &attribution_chain); |
+ UMA_HISTOGRAM_COUNTS_100( |
+ kDownloadAttributionURLChainSizeUMA, |
+ attribution_chain.size()); |
+ UMA_HISTOGRAM_ENUMERATION( |
+ kDownloadAttributionResultUMA, |
+ result, |
+ SafeBrowsingNavigationObserverManager::ATTRIBUTION_FAILURE_TYPE_MAX); |
+ for (auto entry : attribution_chain) |
+ *request->add_referrer_chain_entry() = entry; |
} |
SafeBrowsingNavigationObserverManager:: |
@@ -166,4 +287,115 @@ void SafeBrowsingNavigationObserverManager::RecordRetargeting( |
insertion_result.first->second.push_back(std::move(nav_event)); |
} |
+void SafeBrowsingNavigationObserverManager::CleanUpNavigationEvents() { |
+ // Remove any stale NavigationEnvent, if it is older than |
+ // kNavigationFootprintTTLInSecond. |
+ for (auto it = navigation_map_.begin(); it != navigation_map_.end();) { |
+ it->second.erase(std::remove_if(it->second.begin(), it->second.end(), |
+ [](const NavigationEvent& nav_event) { |
+ return IsEventExpired( |
+ nav_event.last_updated, |
+ kNavigationFootprintTTLInSecond); |
+ }), |
+ it->second.end()); |
+ if (it->second.size() == 0) |
+ it = navigation_map_.erase(it); |
+ else |
+ ++it; |
+ } |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::CleanUpUserGestures() { |
+ for (auto it = user_gesture_map_.begin(); it != user_gesture_map_.end();) { |
+ if (IsEventExpired(it->second, kUserGestureTTLInSecond)) |
+ it = user_gesture_map_.erase(it); |
+ else |
+ ++it; |
+ } |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::CleanUpIpAddresses() { |
+ for (auto it = host_to_ip_map_.begin(); it != host_to_ip_map_.end();) { |
+ it->second.erase(std::remove_if(it->second.begin(), it->second.end(), |
+ [](const ResolvedIPAddress& resolved_ip) { |
+ return IsEventExpired( |
+ resolved_ip.timestamp, |
+ kNavigationFootprintTTLInSecond); |
+ }), |
+ it->second.end()); |
+ if (it->second.size() == 0) |
+ it = host_to_ip_map_.erase(it); |
+ else |
+ ++it; |
+ } |
+} |
+ |
+bool SafeBrowsingNavigationObserverManager::IsCleanUpScheduled() const { |
+ return cleanup_timer_.IsRunning(); |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::ScheduleNextCleanUpAfterInterval( |
+ base::TimeDelta interval) { |
+ DCHECK(interval >= base::TimeDelta()); |
Charlie Reis
2016/12/09 22:00:25
Would DCHECK_GE work? Also, presumably we don't w
Jialiu Lin
2016/12/12 23:43:37
Done.
|
+ cleanup_timer_.Stop(); |
+ cleanup_timer_.Start( |
+ FROM_HERE, interval, this, |
+ &SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootprints); |
+} |
+ |
+NavigationEvent* SafeBrowsingNavigationObserverManager::FindNavigationEvent( |
+ const GURL& target_url, |
+ int target_tab_id) { |
+ auto it = navigation_map_.find(target_url); |
+ if (it == navigation_map_.end()) { |
+ return nullptr; |
+ } |
+ // Since navigation events are recorded in chronological order, we traverse |
+ // the vector in reverse order to get the latest match. |
Charlie Reis
2016/12/09 22:00:25
I think we discussed earlier that this is imperfec
Jialiu Lin
2016/12/12 23:43:37
Added comment and example to safe_browsing_navigat
Charlie Reis
2016/12/14 07:43:57
Acknowledged.
|
+ for (auto rit = it->second.rbegin(); rit != it->second.rend(); ++rit) { |
+ // If tab id is not valid, we only compare url, otherwise we compare both. |
+ if (rit->destination_url == target_url && |
+ (target_tab_id == -1 || rit->target_tab_id == target_tab_id)) { |
+ // If both source_url and source_main_frame_url are empty, and this |
+ // navigation is not triggered by user, a retargeting navigation probably |
+ // causes this navigation. In this case, we skip this navigation event and |
+ // looks for the retargeting navigation event. |
+ if (rit->source_url.is_empty() && rit->source_main_frame_url.is_empty() && |
+ !rit->is_user_initiated) |
Charlie Reis
2016/12/09 22:00:25
nit: This if/else should probably have braces.
Jialiu Lin
2016/12/12 23:43:38
Done.
|
+ continue; |
+ else |
+ return &*rit; |
+ } |
+ } |
+ return nullptr; |
+} |
+ |
+void SafeBrowsingNavigationObserverManager::AddToReferrerChain( |
+ std::vector<ReferrerChainEntry>* |
+ referrer_chain, |
+ NavigationEvent* nav_event, |
+ ReferrerChainEntry::URLType type) { |
+ ReferrerChainEntry referrer_chain_entry; |
+ referrer_chain_entry.set_url(nav_event->destination_url.spec()); |
+ referrer_chain_entry.set_type(type); |
+ auto ip_it = host_to_ip_map_.find(nav_event->destination_url.host()); |
+ if (ip_it != host_to_ip_map_.end()) { |
+ for (ResolvedIPAddress entry : ip_it->second) { |
+ referrer_chain_entry.add_ip_address(entry.ip); |
+ } |
+ } |
+ // Since we only track navigation to landing referrer, we will not log the |
+ // referrer of the landing referrer page. |
+ if (type != ReferrerChainEntry::LANDING_REFERRER) { |
+ referrer_chain_entry.set_referrer_url(nav_event->source_url.spec()); |
+ referrer_chain_entry.set_referrer_main_frame_url( |
+ nav_event->source_main_frame_url.spec()); |
+ } |
+ referrer_chain_entry.set_is_retargeting(nav_event->source_tab_id != |
+ nav_event->target_tab_id); |
+ referrer_chain_entry.set_navigation_time_msec( |
+ nav_event->last_updated.ToJavaTime()); |
+ referrer_chain->push_back(referrer_chain_entry); |
+} |
+ |
} // namespace safe_browsing |