Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager .h" | 5 #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager .h" |
| 6 | 6 |
| 7 #include "base/memory/ptr_util.h" | 7 #include "base/memory/ptr_util.h" |
| 8 #include "base/metrics/histogram_macros.h" | |
| 9 #include "base/strings/stringprintf.h" | |
| 8 #include "base/time/time.h" | 10 #include "base/time/time.h" |
| 11 #include "base/timer/timer.h" | |
| 9 #include "chrome/browser/chrome_notification_types.h" | 12 #include "chrome/browser/chrome_notification_types.h" |
| 10 #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer.h" | 13 #include "chrome/browser/safe_browsing/safe_browsing_navigation_observer.h" |
| 11 #include "chrome/browser/sessions/session_tab_helper.h" | 14 #include "chrome/browser/sessions/session_tab_helper.h" |
| 12 #include "chrome/browser/tab_contents/retargeting_details.h" | 15 #include "chrome/browser/tab_contents/retargeting_details.h" |
| 13 #include "content/public/browser/navigation_details.h" | 16 #include "content/public/browser/navigation_details.h" |
| 14 #include "content/public/browser/notification_service.h" | 17 #include "content/public/browser/notification_service.h" |
| 15 #include "content/public/browser/notification_types.h" | 18 #include "content/public/browser/notification_types.h" |
| 16 #include "content/public/browser/render_frame_host.h" | 19 #include "content/public/browser/render_frame_host.h" |
| 17 #include "content/public/browser/render_process_host.h" | 20 #include "content/public/browser/render_process_host.h" |
| 18 #include "content/public/browser/web_contents.h" | 21 #include "content/public/browser/web_contents.h" |
| 19 | 22 |
| 20 using content::WebContents; | 23 using content::WebContents; |
| 21 | 24 |
| 22 namespace safe_browsing { | 25 namespace safe_browsing { |
| 23 | 26 |
| 27 namespace { | |
| 28 | |
| 29 const char kDownloadAttributionResultUMA[] = | |
|
Charlie Reis
2016/12/09 22:00:25
These shouldn't be constants. Just use the string
Jialiu Lin
2016/12/12 23:43:38
Done.
| |
| 30 "SafeBrowsing.ReferrerAttributionResult.DownloadAttribution"; | |
| 31 const char kDownloadAttributionURLChainSizeUMA[] = | |
| 32 "SafeBrowsing.ReferrerURLChainSize.DownloadAttribution"; | |
| 33 const char kDownloadHasInvalidTabIDUMA[] = | |
| 34 "SafeBrowsing.ReferrerHasInvalidTabID.DownloadAttribution"; | |
| 35 | |
| 36 // Given when an event happened and its TTL, determine if it is already expired. | |
| 37 // Note, if for some reason this event's timestamp is in the future, this | |
| 38 // event's timestamp is invalid, hence we treat it as expired. | |
| 39 bool IsEventExpired(const base::Time& event_time, double ttl_in_second) { | |
| 40 double current_time_in_second = base::Time::Now().ToDoubleT(); | |
| 41 double event_time_in_second = event_time.ToDoubleT(); | |
| 42 if (current_time_in_second <= event_time_in_second) | |
| 43 return true; | |
| 44 return current_time_in_second - event_time_in_second > ttl_in_second; | |
| 45 } | |
| 46 | |
| 47 } // namespace | |
| 48 | |
| 24 // The expiration period of a user gesture. Any user gesture that happened 1.0 | 49 // The expiration period of a user gesture. Any user gesture that happened 1.0 |
| 25 // second ago will be considered as expired and not relevant to upcoming | 50 // second ago is considered as expired and not relevant to upcoming navigation |
| 26 // navigation events. | 51 // events. |
| 27 static const double kUserGestureTTLInSecond = 1.0; | 52 static const double kUserGestureTTLInSecond = 1.0; |
| 53 // The expiration period of navigation events and resolved IP addresses. Any | |
| 54 // navigation related records that happened 2 minutes ago is considered as | |
|
Charlie Reis
2016/12/09 22:00:25
nit: s/is/are/
Jialiu Lin
2016/12/12 23:43:38
Done.
| |
| 55 // expired. So we clean up these navigation footprints every 2 minutes. | |
| 56 static const double kNavigationFootprintTTLInSecond = 120.0; | |
| 57 // The number of user gestures we trace back for download attribution. | |
| 58 static const int kDownloadAttributionUserGestureLimit = 2; | |
| 28 | 59 |
| 29 // static | 60 // static |
| 30 bool SafeBrowsingNavigationObserverManager::IsUserGestureExpired( | 61 bool SafeBrowsingNavigationObserverManager::IsUserGestureExpired( |
| 31 const base::Time& timestamp) { | 62 const base::Time& timestamp) { |
| 32 double now = base::Time::Now().ToDoubleT(); | 63 return IsEventExpired(timestamp, kUserGestureTTLInSecond); |
| 33 double timestamp_in_double = timestamp.ToDoubleT(); | |
| 34 | |
| 35 if (now <= timestamp_in_double) | |
| 36 return true; | |
| 37 return (now - timestamp_in_double) > kUserGestureTTLInSecond; | |
| 38 } | 64 } |
| 39 | 65 |
| 40 // static | 66 // static |
| 41 GURL SafeBrowsingNavigationObserverManager::ClearEmptyRef(const GURL& url) { | 67 GURL SafeBrowsingNavigationObserverManager::ClearEmptyRef(const GURL& url) { |
| 42 if (url.has_ref() && url.ref().empty()) { | 68 if (url.has_ref() && url.ref().empty()) { |
| 43 url::Replacements<char> replacements; | 69 url::Replacements<char> replacements; |
| 44 replacements.ClearRef(); | 70 replacements.ClearRef(); |
| 45 return url.ReplaceComponents(replacements); | 71 return url.ReplaceComponents(replacements); |
| 46 } | 72 } |
| 47 return url; | 73 return url; |
| 48 } | 74 } |
| 49 | 75 |
| 50 SafeBrowsingNavigationObserverManager::SafeBrowsingNavigationObserverManager() { | 76 SafeBrowsingNavigationObserverManager::SafeBrowsingNavigationObserverManager() { |
| 51 registrar_.Add(this, chrome::NOTIFICATION_RETARGETING, | 77 registrar_.Add(this, chrome::NOTIFICATION_RETARGETING, |
| 52 content::NotificationService::AllSources()); | 78 content::NotificationService::AllSources()); |
| 79 | |
| 80 // TODO(jialiul): call ScheduleNextCleanUpAfterInterval() when this class is | |
| 81 // ready to be hooked into SafeBrowsingService. | |
| 53 } | 82 } |
| 54 | 83 |
| 55 void SafeBrowsingNavigationObserverManager::RecordNavigationEvent( | 84 void SafeBrowsingNavigationObserverManager::RecordNavigationEvent( |
| 56 const GURL& nav_event_key, | 85 const GURL& nav_event_key, |
| 57 NavigationEvent* nav_event) { | 86 NavigationEvent* nav_event) { |
| 58 auto insertion_result = navigation_map_.insert( | 87 auto insertion_result = navigation_map_.insert( |
| 59 std::make_pair(nav_event_key, std::vector<NavigationEvent>())); | 88 std::make_pair(nav_event_key, std::vector<NavigationEvent>())); |
| 60 | 89 |
| 61 insertion_result.first->second.push_back(std::move(*nav_event)); | 90 insertion_result.first->second.push_back(std::move(*nav_event)); |
| 62 } | 91 } |
| (...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 97 } | 126 } |
| 98 } | 127 } |
| 99 // If this is a new IP of this host, and we added to the end of the vector. | 128 // If this is a new IP of this host, and we added to the end of the vector. |
| 100 insert_result.first->second.push_back( | 129 insert_result.first->second.push_back( |
| 101 ResolvedIPAddress(base::Time::Now(), ip)); | 130 ResolvedIPAddress(base::Time::Now(), ip)); |
| 102 } | 131 } |
| 103 | 132 |
| 104 void SafeBrowsingNavigationObserverManager::OnWebContentDestroyed( | 133 void SafeBrowsingNavigationObserverManager::OnWebContentDestroyed( |
| 105 content::WebContents* web_contents) { | 134 content::WebContents* web_contents) { |
| 106 user_gesture_map_.erase(web_contents); | 135 user_gesture_map_.erase(web_contents); |
| 107 // TODO (jialiul): Will add other clean up tasks shortly. | 136 } |
| 137 | |
| 138 void SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootprints() { | |
|
Charlie Reis
2016/12/09 22:00:25
This will be running every 2 minutes, right? Plea
Jialiu Lin
2016/12/12 23:43:37
I sent an email to rschoen@ (cc'ed you and nparker
Charlie Reis
2016/12/14 07:43:57
Thanks! Sounds like a circular buffer may be a go
Jialiu Lin
2016/12/14 19:06:29
Let me think about this. I feel circular buffer is
Charlie Reis
2016/12/15 00:38:33
True, though the flip side is that they can alloca
| |
| 139 CleanUpNavigationEvents(); | |
| 140 CleanUpUserGestures(); | |
| 141 CleanUpIpAddresses(); | |
| 142 ScheduleNextCleanUpAfterInterval( | |
| 143 base::TimeDelta::FromSecondsD(kNavigationFootprintTTLInSecond)); | |
| 144 } | |
| 145 | |
| 146 SafeBrowsingNavigationObserverManager::AttributionResult | |
| 147 SafeBrowsingNavigationObserverManager::IdentifyReferrerChain( | |
| 148 const GURL& target_url, | |
| 149 int target_tab_id, | |
| 150 int user_gesture_count_limit, | |
| 151 std::vector<ReferrerChainEntry>* referrer_chain) { | |
| 152 if (!target_url.is_valid()) | |
| 153 return INVALID_URL; | |
| 154 | |
| 155 NavigationEvent* nav_event = FindNavigationEvent(target_url, target_tab_id); | |
| 156 if (!nav_event) { | |
| 157 // We cannot find a single navigation event related to this download. | |
| 158 return NAVIGATION_EVENT_NOT_FOUND; | |
| 159 } | |
| 160 | |
| 161 AddToReferrerChain(referrer_chain, nav_event, | |
| 162 ReferrerChainEntry::DOWNLOAD_URL); | |
| 163 AttributionResult result = SUCCESS; | |
| 164 int user_gesture_count = 0; | |
| 165 while (user_gesture_count < user_gesture_count_limit) { | |
| 166 // Back trace to the next nav_event that initiated by user. | |
|
Charlie Reis
2016/12/09 22:00:25
nit: that was initiated by the user
Jialiu Lin
2016/12/12 23:43:38
Done.
| |
| 167 while (!nav_event->is_user_initiated) { | |
| 168 nav_event = | |
| 169 FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); | |
| 170 if (!nav_event) | |
| 171 return result; | |
| 172 AddToReferrerChain( | |
| 173 referrer_chain, nav_event, | |
| 174 nav_event->has_server_redirect | |
| 175 ? ReferrerChainEntry::SERVER_REDIRECT | |
| 176 : ReferrerChainEntry::CLIENT_REDIRECT); | |
| 177 } | |
| 178 user_gesture_count++; | |
|
Charlie Reis
2016/12/09 22:00:25
nit: Blank line before and after.
Jialiu Lin
2016/12/12 23:43:38
Done.
| |
| 179 // If the source_url and source_main_frame_url of current navigation event | |
| 180 // is empty, and is_user_initiated is true, this is a browser initiated | |
|
Charlie Reis
2016/12/09 22:00:25
nit: are empty
Jialiu Lin
2016/12/12 23:43:37
Done.
| |
| 181 // navigation (e.g. trigged by typing in address bar, clicking on bookmark, | |
| 182 // etc). We reached the end of the referrer chain. | |
| 183 if (nav_event->source_url.is_empty() && | |
| 184 nav_event->source_main_frame_url.is_empty()) { | |
| 185 DCHECK(nav_event->is_user_initiated); | |
| 186 return result; | |
| 187 } | |
| 188 nav_event = | |
|
Charlie Reis
2016/12/09 22:00:25
nit: Blank line before.
Jialiu Lin
2016/12/12 23:43:37
Done.
| |
| 189 FindNavigationEvent(nav_event->source_url, nav_event->source_tab_id); | |
| 190 if (!nav_event) | |
| 191 return result; | |
| 192 AddToReferrerChain( | |
| 193 referrer_chain, nav_event, | |
| 194 user_gesture_count == 1 | |
| 195 ? ReferrerChainEntry::LANDING_PAGE | |
| 196 : ReferrerChainEntry::LANDING_REFERRER); | |
|
Charlie Reis
2016/12/09 22:00:25
This is a bit unclear from reading the code. I'd
Jialiu Lin
2016/12/12 23:43:37
Done.
| |
| 197 result = user_gesture_count == 1 ? SUCCESS_LANDING_PAGE | |
| 198 : SUCCESS_LANDING_REFERRER; | |
| 199 } | |
| 200 return result; | |
| 201 } | |
| 202 | |
| 203 void SafeBrowsingNavigationObserverManager:: | |
| 204 AddReferrerChainToClientDownloadRequest( | |
| 205 const GURL& download_url, | |
| 206 content::WebContents* source_contents, | |
| 207 ClientDownloadRequest* request) { | |
| 208 int download_tab_id = SessionTabHelper::IdForTab(source_contents); | |
| 209 if (download_tab_id == -1) { | |
| 210 UMA_HISTOGRAM_BOOLEAN(kDownloadHasInvalidTabIDUMA, true); | |
|
Charlie Reis
2016/12/09 22:00:25
nit: Replace this whole block with:
UMA_HISTOGRAM_
Jialiu Lin
2016/12/12 23:43:38
Done.
| |
| 211 } else { | |
| 212 UMA_HISTOGRAM_BOOLEAN(kDownloadHasInvalidTabIDUMA, false); | |
| 213 } | |
| 214 std::vector<ReferrerChainEntry> attribution_chain; | |
| 215 AttributionResult result = IdentifyReferrerChain( | |
| 216 download_url, | |
| 217 download_tab_id, | |
| 218 kDownloadAttributionUserGestureLimit, | |
| 219 &attribution_chain); | |
| 220 UMA_HISTOGRAM_COUNTS_100( | |
| 221 kDownloadAttributionURLChainSizeUMA, | |
| 222 attribution_chain.size()); | |
| 223 UMA_HISTOGRAM_ENUMERATION( | |
| 224 kDownloadAttributionResultUMA, | |
| 225 result, | |
| 226 SafeBrowsingNavigationObserverManager::ATTRIBUTION_FAILURE_TYPE_MAX); | |
| 227 for (auto entry : attribution_chain) | |
| 228 *request->add_referrer_chain_entry() = entry; | |
| 108 } | 229 } |
| 109 | 230 |
| 110 SafeBrowsingNavigationObserverManager:: | 231 SafeBrowsingNavigationObserverManager:: |
| 111 ~SafeBrowsingNavigationObserverManager() {} | 232 ~SafeBrowsingNavigationObserverManager() {} |
| 112 | 233 |
| 113 void SafeBrowsingNavigationObserverManager::Observe( | 234 void SafeBrowsingNavigationObserverManager::Observe( |
| 114 int type, | 235 int type, |
| 115 const content::NotificationSource& source, | 236 const content::NotificationSource& source, |
| 116 const content::NotificationDetails& details) { | 237 const content::NotificationDetails& details) { |
| 117 if (type == chrome::NOTIFICATION_RETARGETING) | 238 if (type == chrome::NOTIFICATION_RETARGETING) |
| (...skipping 41 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
| 159 OnUserGestureConsumed(it->first, it->second); | 280 OnUserGestureConsumed(it->first, it->second); |
| 160 } else { | 281 } else { |
| 161 nav_event.is_user_initiated = false; | 282 nav_event.is_user_initiated = false; |
| 162 } | 283 } |
| 163 | 284 |
| 164 auto insertion_result = navigation_map_.insert( | 285 auto insertion_result = navigation_map_.insert( |
| 165 std::make_pair(target_url, std::vector<NavigationEvent>())); | 286 std::make_pair(target_url, std::vector<NavigationEvent>())); |
| 166 insertion_result.first->second.push_back(std::move(nav_event)); | 287 insertion_result.first->second.push_back(std::move(nav_event)); |
| 167 } | 288 } |
| 168 | 289 |
| 290 void SafeBrowsingNavigationObserverManager::CleanUpNavigationEvents() { | |
| 291 // Remove any stale NavigationEnvent, if it is older than | |
| 292 // kNavigationFootprintTTLInSecond. | |
| 293 for (auto it = navigation_map_.begin(); it != navigation_map_.end();) { | |
| 294 it->second.erase(std::remove_if(it->second.begin(), it->second.end(), | |
| 295 [](const NavigationEvent& nav_event) { | |
| 296 return IsEventExpired( | |
| 297 nav_event.last_updated, | |
| 298 kNavigationFootprintTTLInSecond); | |
| 299 }), | |
| 300 it->second.end()); | |
| 301 if (it->second.size() == 0) | |
| 302 it = navigation_map_.erase(it); | |
| 303 else | |
| 304 ++it; | |
| 305 } | |
| 306 } | |
| 307 | |
| 308 void SafeBrowsingNavigationObserverManager::CleanUpUserGestures() { | |
| 309 for (auto it = user_gesture_map_.begin(); it != user_gesture_map_.end();) { | |
| 310 if (IsEventExpired(it->second, kUserGestureTTLInSecond)) | |
| 311 it = user_gesture_map_.erase(it); | |
| 312 else | |
| 313 ++it; | |
| 314 } | |
| 315 } | |
| 316 | |
| 317 void SafeBrowsingNavigationObserverManager::CleanUpIpAddresses() { | |
| 318 for (auto it = host_to_ip_map_.begin(); it != host_to_ip_map_.end();) { | |
| 319 it->second.erase(std::remove_if(it->second.begin(), it->second.end(), | |
| 320 [](const ResolvedIPAddress& resolved_ip) { | |
| 321 return IsEventExpired( | |
| 322 resolved_ip.timestamp, | |
| 323 kNavigationFootprintTTLInSecond); | |
| 324 }), | |
| 325 it->second.end()); | |
| 326 if (it->second.size() == 0) | |
| 327 it = host_to_ip_map_.erase(it); | |
| 328 else | |
| 329 ++it; | |
| 330 } | |
| 331 } | |
| 332 | |
| 333 bool SafeBrowsingNavigationObserverManager::IsCleanUpScheduled() const { | |
| 334 return cleanup_timer_.IsRunning(); | |
| 335 } | |
| 336 | |
| 337 void SafeBrowsingNavigationObserverManager::ScheduleNextCleanUpAfterInterval( | |
| 338 base::TimeDelta interval) { | |
| 339 DCHECK(interval >= base::TimeDelta()); | |
|
Charlie Reis
2016/12/09 22:00:25
Would DCHECK_GE work? Also, presumably we don't w
Jialiu Lin
2016/12/12 23:43:37
Done.
| |
| 340 cleanup_timer_.Stop(); | |
| 341 cleanup_timer_.Start( | |
| 342 FROM_HERE, interval, this, | |
| 343 &SafeBrowsingNavigationObserverManager::CleanUpStaleNavigationFootprints); | |
| 344 } | |
| 345 | |
| 346 NavigationEvent* SafeBrowsingNavigationObserverManager::FindNavigationEvent( | |
| 347 const GURL& target_url, | |
| 348 int target_tab_id) { | |
| 349 auto it = navigation_map_.find(target_url); | |
| 350 if (it == navigation_map_.end()) { | |
| 351 return nullptr; | |
| 352 } | |
| 353 // Since navigation events are recorded in chronological order, we traverse | |
| 354 // the vector in reverse order to get the latest match. | |
|
Charlie Reis
2016/12/09 22:00:25
I think we discussed earlier that this is imperfec
Jialiu Lin
2016/12/12 23:43:37
Added comment and example to safe_browsing_navigat
Charlie Reis
2016/12/14 07:43:57
Acknowledged.
| |
| 355 for (auto rit = it->second.rbegin(); rit != it->second.rend(); ++rit) { | |
| 356 // If tab id is not valid, we only compare url, otherwise we compare both. | |
| 357 if (rit->destination_url == target_url && | |
| 358 (target_tab_id == -1 || rit->target_tab_id == target_tab_id)) { | |
| 359 // If both source_url and source_main_frame_url are empty, and this | |
| 360 // navigation is not triggered by user, a retargeting navigation probably | |
| 361 // causes this navigation. In this case, we skip this navigation event and | |
| 362 // looks for the retargeting navigation event. | |
| 363 if (rit->source_url.is_empty() && rit->source_main_frame_url.is_empty() && | |
| 364 !rit->is_user_initiated) | |
|
Charlie Reis
2016/12/09 22:00:25
nit: This if/else should probably have braces.
Jialiu Lin
2016/12/12 23:43:38
Done.
| |
| 365 continue; | |
| 366 else | |
| 367 return &*rit; | |
| 368 } | |
| 369 } | |
| 370 return nullptr; | |
| 371 } | |
| 372 | |
| 373 void SafeBrowsingNavigationObserverManager::AddToReferrerChain( | |
| 374 std::vector<ReferrerChainEntry>* | |
| 375 referrer_chain, | |
| 376 NavigationEvent* nav_event, | |
| 377 ReferrerChainEntry::URLType type) { | |
| 378 ReferrerChainEntry referrer_chain_entry; | |
| 379 referrer_chain_entry.set_url(nav_event->destination_url.spec()); | |
| 380 referrer_chain_entry.set_type(type); | |
| 381 auto ip_it = host_to_ip_map_.find(nav_event->destination_url.host()); | |
| 382 if (ip_it != host_to_ip_map_.end()) { | |
| 383 for (ResolvedIPAddress entry : ip_it->second) { | |
| 384 referrer_chain_entry.add_ip_address(entry.ip); | |
| 385 } | |
| 386 } | |
| 387 // Since we only track navigation to landing referrer, we will not log the | |
| 388 // referrer of the landing referrer page. | |
| 389 if (type != ReferrerChainEntry::LANDING_REFERRER) { | |
| 390 referrer_chain_entry.set_referrer_url(nav_event->source_url.spec()); | |
| 391 referrer_chain_entry.set_referrer_main_frame_url( | |
| 392 nav_event->source_main_frame_url.spec()); | |
| 393 } | |
| 394 referrer_chain_entry.set_is_retargeting(nav_event->source_tab_id != | |
| 395 nav_event->target_tab_id); | |
| 396 referrer_chain_entry.set_navigation_time_msec( | |
| 397 nav_event->last_updated.ToJavaTime()); | |
| 398 referrer_chain->push_back(referrer_chain_entry); | |
| 399 } | |
| 400 | |
| 169 } // namespace safe_browsing | 401 } // namespace safe_browsing |
| OLD | NEW |