OLD | NEW |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H
_ | 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H
_ |
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H
_ | 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H
_ |
7 | 7 |
| 8 #include "chrome/common/safe_browsing/csd.pb.h" |
8 #include "content/public/browser/notification_observer.h" | 9 #include "content/public/browser/notification_observer.h" |
9 #include "content/public/browser/notification_registrar.h" | 10 #include "content/public/browser/notification_registrar.h" |
10 #include "content/public/browser/web_contents_observer.h" | 11 #include "content/public/browser/web_contents_observer.h" |
11 #include "url/gurl.h" | 12 #include "url/gurl.h" |
12 | 13 |
13 namespace safe_browsing { | 14 namespace safe_browsing { |
14 | 15 |
15 class SafeBrowsingNavigationObserver; | 16 class SafeBrowsingNavigationObserver; |
16 struct NavigationEvent; | 17 struct NavigationEvent; |
17 struct ResolvedIPAddress; | 18 struct ResolvedIPAddress; |
18 | 19 |
19 // Manager class for SafeBrowsingNavigationObserver, which is in charge of | 20 // Manager class for SafeBrowsingNavigationObserver, which is in charge of |
20 // cleaning up stale navigation events, and identifing landing page/landing | 21 // cleaning up stale navigation events, and identifying landing page/landing |
21 // referrer for a specific download. | 22 // referrer for a specific download. |
22 // TODO(jialiul): For now, SafeBrowsingNavigationObserverManager also listens to | 23 // TODO(jialiul): For now, SafeBrowsingNavigationObserverManager also listens to |
23 // NOTIFICATION_RETARGETING as a way to detect cross frame/tab navigation. | 24 // NOTIFICATION_RETARGETING as a way to detect cross frame/tab navigation. |
24 // Remove base class content::NotificationObserver when | 25 // Remove base class content::NotificationObserver when |
25 // WebContentsObserver::DidOpenRequestedURL() covers all retargeting cases. | 26 // WebContentsObserver::DidOpenRequestedURL() covers all retargeting cases. |
26 class SafeBrowsingNavigationObserverManager | 27 class SafeBrowsingNavigationObserverManager |
27 : public content::NotificationObserver, | 28 : public content::NotificationObserver, |
28 public base::RefCountedThreadSafe<SafeBrowsingNavigationObserverManager> { | 29 public base::RefCountedThreadSafe<SafeBrowsingNavigationObserverManager> { |
29 public: | 30 public: |
| 31 // For UMA histogram counting. Do NOT change order. |
| 32 enum AttributionResult { |
| 33 SUCCESS = 1, // Identified referrer chain is not empty. |
| 34 SUCCESS_LANDING_PAGE = 2, // Successfully identified landing page. |
| 35 SUCCESS_LANDING_REFERRER = 3, // Successfully identified landing referrer. |
| 36 INVALID_URL = 4, |
| 37 NAVIGATION_EVENT_NOT_FOUND = 5, |
| 38 |
| 39 // Always at the end. |
| 40 ATTRIBUTION_FAILURE_TYPE_MAX |
| 41 }; |
| 42 |
30 // Helper function to check if user gesture is older than | 43 // Helper function to check if user gesture is older than |
31 // kUserGestureTTLInSecond. | 44 // kUserGestureTTLInSecond. |
32 static bool IsUserGestureExpired(const base::Time& timestamp); | 45 static bool IsUserGestureExpired(const base::Time& timestamp); |
33 // Helper function to strip empty ref fragment from a URL. Many pages | 46 // Helper function to strip empty ref fragment from a URL. Many pages |
34 // end up with a "#" at the end of their URLs due to navigation triggered by | 47 // end up with a "#" at the end of their URLs due to navigation triggered by |
35 // href="#" and javascript onclick function. We don't want to have separate | 48 // href="#" and javascript onclick function. We don't want to have separate |
36 // entries for these cases in the maps. | 49 // entries for these cases in the maps. |
37 static GURL ClearEmptyRef(const GURL& url); | 50 static GURL ClearEmptyRef(const GURL& url); |
38 | 51 |
39 SafeBrowsingNavigationObserverManager(); | 52 SafeBrowsingNavigationObserverManager(); |
40 | 53 |
41 // Add |nav_event| to |navigation_map_| based on |nav_event_key|. Object | 54 // Add |nav_event| to |navigation_map_| based on |nav_event_key|. Object |
42 // pointed to by |nav_event| will be no longer accessible after this function. | 55 // pointed to by |nav_event| will be no longer accessible after this function. |
43 void RecordNavigationEvent(const GURL& nav_event_key, | 56 void RecordNavigationEvent(const GURL& nav_event_key, |
44 NavigationEvent* nav_event); | 57 NavigationEvent* nav_event); |
45 void RecordUserGestureForWebContents(content::WebContents* web_contents, | 58 void RecordUserGestureForWebContents(content::WebContents* web_contents, |
46 const base::Time& timestamp); | 59 const base::Time& timestamp); |
47 void OnUserGestureConsumed(content::WebContents* web_contents, | 60 void OnUserGestureConsumed(content::WebContents* web_contents, |
48 const base::Time& timestamp); | 61 const base::Time& timestamp); |
49 void RecordHostToIpMapping(const std::string& host, const std::string& ip); | 62 void RecordHostToIpMapping(const std::string& host, const std::string& ip); |
| 63 |
50 // Clean-ups need to be done when a WebContents gets destroyed. | 64 // Clean-ups need to be done when a WebContents gets destroyed. |
51 void OnWebContentDestroyed(content::WebContents* web_contents); | 65 void OnWebContentDestroyed(content::WebContents* web_contents); |
52 | 66 |
53 // TODO(jialiul): more functions are coming for managing navigation_map_. | 67 // Remove all the observed NavigationEvents, user gestures, and resolved IP |
| 68 // addresses that are older than kNavigationFootprintTTLInSecond. |
| 69 void CleanUpStaleNavigationFootprints(); |
| 70 |
| 71 // Based on the |target_url| and |target_tab_id|, trace back the observed |
| 72 // NavigationEvents in navigation_map_ to identify the sequence of navigations |
| 73 // leading to the target, with the coverage limited to |
| 74 // |user_gesture_count_limit| number of user gestures. Then convert these |
| 75 // identified NavigationEvents into ReferrerChainEntrys and append them to |
| 76 // |out_referrer_chain|. |
| 77 AttributionResult IdentifyReferrerChain( |
| 78 const GURL& target_url, |
| 79 int target_tab_id, // -1 if tab id is not valid |
| 80 int user_gesture_count_limit, |
| 81 std::vector<ReferrerChainEntry>* out_referrer_chain); |
| 82 |
| 83 // Identify and add referrer chain info of a download to ClientDownloadRequest |
| 84 // proto. This function also record UMA stats of download attribution result. |
| 85 // TODO(jialiul): This function will be moved to DownloadProtectionService |
| 86 // class shortly. |
| 87 void AddReferrerChainToClientDownloadRequest( |
| 88 const GURL& download_url, |
| 89 content::WebContents* source_contents, |
| 90 ClientDownloadRequest* out_request); |
54 | 91 |
55 private: | 92 private: |
56 friend class base::RefCountedThreadSafe< | 93 friend class base::RefCountedThreadSafe< |
57 SafeBrowsingNavigationObserverManager>; | 94 SafeBrowsingNavigationObserverManager>; |
58 friend class TestNavigationObserverManager; | 95 friend class TestNavigationObserverManager; |
59 friend class SBNavigationObserverBrowserTest; | 96 friend class SBNavigationObserverBrowserTest; |
60 friend class SBNavigationObserverTest; | 97 friend class SBNavigationObserverTest; |
61 | 98 |
62 struct GurlHash { | 99 struct GurlHash { |
63 std::size_t operator()(const GURL& url) const { | 100 std::size_t operator()(const GURL& url) const { |
(...skipping 13 matching lines...) Expand all Loading... |
77 void Observe(int type, | 114 void Observe(int type, |
78 const content::NotificationSource& source, | 115 const content::NotificationSource& source, |
79 const content::NotificationDetails& details) override; | 116 const content::NotificationDetails& details) override; |
80 | 117 |
81 void RecordRetargeting(const content::NotificationDetails& details); | 118 void RecordRetargeting(const content::NotificationDetails& details); |
82 | 119 |
83 NavigationMap* navigation_map() { return &navigation_map_; } | 120 NavigationMap* navigation_map() { return &navigation_map_; } |
84 | 121 |
85 HostToIpMap* host_to_ip_map() { return &host_to_ip_map_; } | 122 HostToIpMap* host_to_ip_map() { return &host_to_ip_map_; } |
86 | 123 |
| 124 // Remove stale entries from navigation_map_ if they are older than |
| 125 // kNavigationFootprintTTLInSecond (2 minutes). |
| 126 void CleanUpNavigationEvents(); |
| 127 |
| 128 // Remove stale entries from user_gesture_map_ if they are older than |
| 129 // kUserGestureTTLInSecond (1 sec). |
| 130 void CleanUpUserGestures(); |
| 131 |
| 132 // Remove stale entries from host_to_ip_map_ if they are older than |
| 133 // kNavigationFootprintTTLInSecond (2 minutes). |
| 134 void CleanUpIpAddresses(); |
| 135 |
| 136 bool IsCleanUpScheduled() const; |
| 137 |
| 138 void ScheduleNextCleanUpAfterInterval(base::TimeDelta interval); |
| 139 |
| 140 // Find the most recent navigation event that navigated to |target_url| in the |
| 141 // tab with ID |target_tab_id|. If |target_tab_id| is not available (-1), we |
| 142 // look for all tabs for the most recent navigation to |target_url|. |
| 143 // For some cases, the most recent navigation to |target_url| may not be |
| 144 // relevant. |
| 145 // For example, url1 in window A opens url2 in window B, url1 then opens an |
| 146 // about:blank page window C and injects script code in it to trigger a |
| 147 // delayed download in Window D. Before the download occurs, url2 in window B |
| 148 // opens a different about:blank page in window C. |
| 149 // A ---- C - D |
| 150 // \ / |
| 151 // B |
| 152 // In this case, FindNavigationEvent() will think url2 in Window B is the |
| 153 // referrer of about::blank in Window C since this navigation is more recent. |
| 154 // However, it does not prevent us to attribute url1 in Window A as the cause |
| 155 // of all these navigations. |
| 156 NavigationEvent* FindNavigationEvent(const GURL& target_url, |
| 157 int target_tab_id); |
| 158 |
| 159 void AddToReferrerChain(std::vector<ReferrerChainEntry>* referrer_chain, |
| 160 NavigationEvent* nav_event, |
| 161 ReferrerChainEntry::URLType type); |
| 162 |
87 // navigation_map_ keeps track of all the observed navigations. This map is | 163 // navigation_map_ keeps track of all the observed navigations. This map is |
88 // keyed on the resolved request url. In other words, in case of server | 164 // keyed on the resolved request url. In other words, in case of server |
89 // redirects, its key is the last server redirect url, otherwise, it is the | 165 // redirects, its key is the last server redirect url, otherwise, it is the |
90 // original target url. Since the same url can be requested multiple times | 166 // original target url. Since the same url can be requested multiple times |
91 // across different tabs and frames, the value of this map is a vector of | 167 // across different tabs and frames, the value of this map is a vector of |
92 // NavigationEvent ordered by navigation finish time. | 168 // NavigationEvent ordered by navigation finish time. |
93 // TODO(jialiul): Entries in navigation_map_ will be removed if they are older | 169 // TODO(jialiul): Entries in navigation_map_ will be removed if they are older |
94 // than 2 minutes since their corresponding navigations finish. | 170 // than 2 minutes since their corresponding navigations finish. |
95 NavigationMap navigation_map_; | 171 NavigationMap navigation_map_; |
96 | 172 |
97 // user_gesture_map_ keeps track of the timestamp of last user gesture in | 173 // user_gesture_map_ keeps track of the timestamp of last user gesture in |
98 // in each WebContents. We assume for majority of cases, a navigation | 174 // in each WebContents. We assume for majority of cases, a navigation |
99 // shortly after a user gesture indicate this navigation is user initiated. | 175 // shortly after a user gesture indicate this navigation is user initiated. |
100 UserGestureMap user_gesture_map_; | 176 UserGestureMap user_gesture_map_; |
101 | 177 |
102 // Host to timestamped IP addresses map that covers all the main frame and | 178 // Host to timestamped IP addresses map that covers all the main frame and |
103 // subframe URLs' hosts. Since it is possible for a host to resolve to more | 179 // subframe URLs' hosts. Since it is possible for a host to resolve to more |
104 // than one IP in even a short period of time, we map a single host to a | 180 // than one IP in even a short period of time, we map a single host to a |
105 // vector of ResolvedIPAddresss. This map is used to fill in ip_address field | 181 // vector of ResolvedIPAddresss. This map is used to fill in ip_address field |
106 // in URLChainEntry in ClientDownloadRequest. | 182 // in URLChainEntry in ClientDownloadRequest. |
107 HostToIpMap host_to_ip_map_; | 183 HostToIpMap host_to_ip_map_; |
108 | 184 |
109 content::NotificationRegistrar registrar_; | 185 content::NotificationRegistrar registrar_; |
110 | 186 |
| 187 base::OneShotTimer cleanup_timer_; |
| 188 |
111 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingNavigationObserverManager); | 189 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingNavigationObserverManager); |
112 }; | 190 }; |
113 } // namespace safe_browsing | 191 } // namespace safe_browsing |
114 | 192 |
115 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGE
R_H_ | 193 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGE
R_H_ |
OLD | NEW |