OLD | NEW |
---|---|
1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H _ | 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H _ |
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H _ | 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H _ |
7 | 7 |
8 #include <deque> | 8 #include <deque> |
9 #include "base/feature_list.h" | 9 #include "base/feature_list.h" |
10 #include "base/supports_user_data.h" | 10 #include "base/supports_user_data.h" |
(...skipping 25 matching lines...) Expand all Loading... | |
36 }; | 36 }; |
37 | 37 |
38 // Struct that manages insertion, cleanup, and lookup of NavigationEvent | 38 // Struct that manages insertion, cleanup, and lookup of NavigationEvent |
39 // objects. Its maximum size is kNavigationRecordMaxSize. | 39 // objects. Its maximum size is kNavigationRecordMaxSize. |
40 struct NavigationEventList { | 40 struct NavigationEventList { |
41 public: | 41 public: |
42 explicit NavigationEventList(std::size_t size_limit); | 42 explicit NavigationEventList(std::size_t size_limit); |
43 | 43 |
44 ~NavigationEventList(); | 44 ~NavigationEventList(); |
45 | 45 |
46 // Find the most recent navigation event that navigated to |target_url| and | 46 // Finds the most recent navigation event that navigated to |target_url| and |
47 // its associated |target_main_frame_url| in the tab with ID |target_tab_id|. | 47 // its associated |target_main_frame_url| in the tab with ID |target_tab_id|. |
48 // If navigation happened in the main frame, |target_url| and |target_main_ | 48 // If navigation happened in the main frame, |target_url| and |target_main_ |
lpz
2017/03/29 13:46:27
nit: |target_main_frame_url| to newline so it's no
Jialiu Lin
2017/03/29 20:40:00
Done.
| |
49 // frame_url| are the same. | 49 // frame_url| are the same. |
50 // If |target_url| is empty, we use its main frame url (a.k.a. | 50 // If |target_url| is empty, we use its main frame url (a.k.a. |
51 // |target_main_frame_url|) to search for navigation events. | 51 // |target_main_frame_url|) to search for navigation events. |
52 // If |target_tab_id| is not available (-1), we look for all tabs for the most | 52 // If |target_tab_id| is not available (-1), we look for all tabs for the most |
53 // recent navigation to |target_url| or |target_main_frame_url|. | 53 // recent navigation to |target_url| or |target_main_frame_url|. |
54 // For some cases, the most recent navigation to |target_url| may not be | 54 // For some cases, the most recent navigation to |target_url| may not be |
55 // relevant. | 55 // relevant. |
56 // For example, url1 in window A opens url2 in window B, url1 then opens an | 56 // For example, url1 in window A opens url2 in window B, url1 then opens an |
57 // about:blank page window C and injects script code in it to trigger a | 57 // about:blank page window C and injects script code in it to trigger a |
58 // delayed download in Window D. Before the download occurs, url2 in window B | 58 // delayed event (e.g. a download) in Window D. Before the event occurs, url2 |
59 // opens a different about:blank page in window C. | 59 // in window B opens a different about:blank page in window C. |
60 // A ---- C - D | 60 // A ---- C - D |
61 // \ / | 61 // \ / |
62 // B | 62 // B |
63 // In this case, FindNavigationEvent() will think url2 in Window B is the | 63 // In this case, FindNavigationEvent() will think url2 in Window B is the |
64 // referrer of about::blank in Window C since this navigation is more recent. | 64 // referrer of about::blank in Window C since this navigation is more recent. |
65 // However, it does not prevent us to attribute url1 in Window A as the cause | 65 // However, it does not prevent us to attribute url1 in Window A as the cause |
66 // of all these navigations. | 66 // of all these navigations. |
67 NavigationEvent* FindNavigationEvent(const GURL& target_url, | 67 NavigationEvent* FindNavigationEvent(const GURL& target_url, |
68 const GURL& target_main_frame_url, | 68 const GURL& target_main_frame_url, |
69 int target_tab_id); | 69 int target_tab_id); |
70 | 70 |
71 // Find the most recent retargeting NavigationEvent that satisfies | 71 // Finds the most recent retargeting NavigationEvent that satisfies |
72 // |target_url|, and |target_tab_id|. | 72 // |target_url|, and |target_tab_id|. |
73 NavigationEvent* FindRetargetingNavigationEvent(const GURL& target_url, | 73 NavigationEvent* FindRetargetingNavigationEvent(const GURL& target_url, |
74 int target_tab_id); | 74 int target_tab_id); |
75 | 75 |
76 void RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event); | 76 void RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event); |
77 | 77 |
78 // Remove stale NavigationEvents and return the number of items removed. | 78 // Removes stale NavigationEvents and return the number of items removed. |
79 std::size_t CleanUpNavigationEvents(); | 79 std::size_t CleanUpNavigationEvents(); |
80 | 80 |
81 std::size_t Size() { return navigation_events_.size(); } | 81 std::size_t Size() { return navigation_events_.size(); } |
82 | 82 |
83 NavigationEvent* Get(std::size_t index) { | 83 NavigationEvent* Get(std::size_t index) { |
84 return navigation_events_[index].get(); | 84 return navigation_events_[index].get(); |
85 } | 85 } |
86 | 86 |
87 private: | 87 private: |
88 std::deque<std::unique_ptr<NavigationEvent>> navigation_events_; | 88 std::deque<std::unique_ptr<NavigationEvent>> navigation_events_; |
89 const std::size_t size_limit_; | 89 const std::size_t size_limit_; |
90 }; | 90 }; |
91 | 91 |
92 // Manager class for SafeBrowsingNavigationObserver, which is in charge of | 92 // Manager class for SafeBrowsingNavigationObserver, which is in charge of |
93 // cleaning up stale navigation events, and identifying landing page/landing | 93 // cleaning up stale navigation events, and identifying landing page/landing |
94 // referrer for a specific download. | 94 // referrer for a specific Safe Browsing event. |
95 class SafeBrowsingNavigationObserverManager | 95 class SafeBrowsingNavigationObserverManager |
96 : public base::RefCountedThreadSafe<SafeBrowsingNavigationObserverManager> { | 96 : public base::RefCountedThreadSafe<SafeBrowsingNavigationObserverManager> { |
97 public: | 97 public: |
98 static const base::Feature kDownloadAttribution; | 98 static const base::Feature kDownloadAttribution; |
99 | 99 |
100 // For UMA histogram counting. Do NOT change order. | 100 // For UMA histogram counting. Do NOT change order. |
101 enum AttributionResult { | 101 enum AttributionResult { |
102 SUCCESS = 1, // Identified referrer chain is not empty. | 102 SUCCESS = 1, // Identified referrer chain is not empty. |
103 SUCCESS_LANDING_PAGE = 2, // Successfully identified landing page. | 103 SUCCESS_LANDING_PAGE = 2, // Successfully identified landing page. |
104 SUCCESS_LANDING_REFERRER = 3, // Successfully identified landing referrer. | 104 SUCCESS_LANDING_REFERRER = 3, // Successfully identified landing referrer. |
105 INVALID_URL = 4, | 105 INVALID_URL = 4, |
106 NAVIGATION_EVENT_NOT_FOUND = 5, | 106 NAVIGATION_EVENT_NOT_FOUND = 5, |
107 | 107 |
108 // Always at the end. | 108 // Always at the end. |
109 ATTRIBUTION_FAILURE_TYPE_MAX | 109 ATTRIBUTION_FAILURE_TYPE_MAX |
110 }; | 110 }; |
111 | 111 |
112 // Helper function to check if user gesture is older than | 112 // Helper function to check if user gesture is older than |
113 // kUserGestureTTLInSecond. | 113 // kUserGestureTTLInSecond. |
114 static bool IsUserGestureExpired(const base::Time& timestamp); | 114 static bool IsUserGestureExpired(const base::Time& timestamp); |
115 | 115 |
116 // Helper function to strip empty ref fragment from a URL. Many pages | 116 // Helper function to strip empty ref fragment from a URL. Many pages |
117 // end up with a "#" at the end of their URLs due to navigation triggered by | 117 // end up with a "#" at the end of their URLs due to navigation triggered by |
118 // href="#" and javascript onclick function. We don't want to have separate | 118 // href="#" and javascript onclick function. We don't want to have separate |
119 // entries for these cases in the maps. | 119 // entries for these cases in the maps. |
120 static GURL ClearEmptyRef(const GURL& url); | 120 static GURL ClearEmptyRef(const GURL& url); |
121 | 121 |
122 // Checks if we should enable observing navigations for safe browsing purpose. | 122 // Checks if we should enable observing navigations for safe browsing purpose. |
123 // Return true if the safe browsing service and the download attribution | 123 // Return true if the safe browsing service and the |kDownloadAttribution| |
124 // feature are both enabled, and safe browsing service is initialized. | 124 // feature are both enabled, and safe browsing service is initialized. |
125 static bool IsEnabledAndReady(Profile* profile); | 125 static bool IsEnabledAndReady(Profile* profile); |
126 | 126 |
127 SafeBrowsingNavigationObserverManager(); | 127 SafeBrowsingNavigationObserverManager(); |
128 | 128 |
129 // Add |nav_event| to |navigation_event_list_|. Object pointed to by | 129 // Adds |nav_event| to |navigation_event_list_|. Object pointed to by |
130 // |nav_event| will be no longer accessible after this function. | 130 // |nav_event| will be no longer accessible after this function. |
131 void RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event); | 131 void RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event); |
132 void RecordUserGestureForWebContents(content::WebContents* web_contents, | 132 void RecordUserGestureForWebContents(content::WebContents* web_contents, |
133 const base::Time& timestamp); | 133 const base::Time& timestamp); |
134 void OnUserGestureConsumed(content::WebContents* web_contents, | 134 void OnUserGestureConsumed(content::WebContents* web_contents, |
135 const base::Time& timestamp); | 135 const base::Time& timestamp); |
136 bool HasUserGesture(content::WebContents* web_contents); | 136 bool HasUserGesture(content::WebContents* web_contents); |
137 void RecordHostToIpMapping(const std::string& host, const std::string& ip); | 137 void RecordHostToIpMapping(const std::string& host, const std::string& ip); |
138 | 138 |
139 // Clean-ups need to be done when a WebContents gets destroyed. | 139 // Clean-ups need to be done when a WebContents gets destroyed. |
140 void OnWebContentDestroyed(content::WebContents* web_contents); | 140 void OnWebContentDestroyed(content::WebContents* web_contents); |
141 | 141 |
142 // Remove all the observed NavigationEvents, user gestures, and resolved IP | 142 // Removes all the observed NavigationEvents, user gestures, and resolved IP |
143 // addresses that are older than kNavigationFootprintTTLInSecond. | 143 // addresses that are older than kNavigationFootprintTTLInSecond. |
144 void CleanUpStaleNavigationFootprints(); | 144 void CleanUpStaleNavigationFootprints(); |
145 | 145 |
146 // Based on the |target_url| and |target_tab_id|, trace back the observed | 146 // Based on the |target_url| and |target_tab_id|, traces back the observed |
147 // NavigationEvents in navigation_event_list_ to identify the sequence of | 147 // NavigationEvents in navigation_event_list_ to identify the sequence of |
148 // navigations leading to the target, with the coverage limited to | 148 // navigations leading to the target, with the coverage limited to |
149 // |user_gesture_count_limit| number of user gestures. Then convert these | 149 // |user_gesture_count_limit| number of user gestures. Then converts these |
150 // identified NavigationEvents into ReferrerChainEntrys and append them to | 150 // identified NavigationEvents into ReferrerChainEntrys and append them to |
151 // |out_referrer_chain|. | 151 // |out_referrer_chain|. |
152 AttributionResult IdentifyReferrerChainForDownload( | 152 AttributionResult IdentifyReferrerChainByEventURL( |
153 const GURL& target_url, | 153 const GURL& event_url, |
154 int target_tab_id, // -1 if tab id is not valid | 154 int event_tab_id, // -1 if tab id is unknown or not available |
155 int user_gesture_count_limit, | 155 int user_gesture_count_limit, |
156 ReferrerChain* out_referrer_chain); | 156 ReferrerChain* out_referrer_chain); |
157 | 157 |
158 // Based on the |web_contents| associated with a download, trace back the | 158 // Based on the |web_contents| associated with an event, traces back the |
159 // observed NavigationEvents in navigation_event_list_ to identify the | 159 // observed NavigationEvents in |navigation_event_list_| to identify the |
160 // sequence of navigations leading to the download hosting page, with the | 160 // sequence of navigations leading to the event hosting page, with the |
161 // coverage limited to |user_gesture_count_limit| number of user gestures. | 161 // coverage limited to |user_gesture_count_limit| number of user gestures. |
162 // Then convert these identified NavigationEvents into ReferrerChainEntrys | 162 // Then converts these identified NavigationEvents into ReferrerChainEntrys |
163 // and append them to |out_referrer_chain|. | 163 // and append them to |out_referrer_chain|. |
164 AttributionResult IdentifyReferrerChainByDownloadWebContent( | 164 AttributionResult IdentifyReferrerChainByWebContent( |
lpz
2017/03/29 13:46:26
nit: WebContents (with an s) to match content::Web
Jialiu Lin
2017/03/29 20:40:00
Ah, thanks for catching this. Done.
| |
165 content::WebContents* web_contents, | 165 content::WebContents* web_contents, |
166 int user_gesture_count_limit, | 166 int user_gesture_count_limit, |
167 ReferrerChain* out_referrer_chain); | 167 ReferrerChain* out_referrer_chain); |
168 | 168 |
169 // Based on the |initiating_frame_url| and its associated |tab_id|, trace back | 169 // Based on the |initiating_frame_url| and its associated |tab_id|, traces |
170 // the observed NavigationEvents in navigation_event_list_ to identify those | 170 // back the observed NavigationEvents in navigation_event_list_ to identify |
171 // navigations leading to this |initiating_frame_url|. If this initiating | 171 // those navigations leading to this |initiating_frame_url|. If this |
172 // frame has a user gesture, we trace back with the coverage limited to | 172 // initiating frame has a user gesture, we trace back with the coverage |
173 // |user_gesture_count_limit|-1 number of user gestures, otherwise we trace | 173 // limited to |user_gesture_count_limit|-1 number of user gestures, otherwise |
174 // back |user_gesture_count_limit| number of user gestures. We then convert | 174 // we trace back |user_gesture_count_limit| number of user gestures. We then |
175 // these identified NavigationEvents into ReferrerChainEntrys and append them | 175 // converts these identified NavigationEvents into ReferrerChainEntrys and |
176 // to |out_referrer_chain|. | 176 // appends them to |out_referrer_chain|. |
177 AttributionResult IdentifyReferrerChainForDownloadHostingPage( | 177 AttributionResult IdentifyReferrerChainByHostingPage( |
178 const GURL& initiating_frame_url, | 178 const GURL& initiating_frame_url, |
179 const GURL& initiating_main_frame_url, | 179 const GURL& initiating_main_frame_url, |
180 int tab_id, | 180 int tab_id, |
181 bool has_user_gesture, | 181 bool has_user_gesture, |
182 int user_gesture_count_limit, | 182 int user_gesture_count_limit, |
183 ReferrerChain* out_referrer_chain); | 183 ReferrerChain* out_referrer_chain); |
184 | 184 |
185 // Record the creation of a new WebContents by |source_web_contents|. This is | 185 // Record the creation of a new WebContents by |source_web_contents|. This is |
186 // used to detect cross-frame and cross-tab navigations. | 186 // used to detect cross-frame and cross-tab navigations. |
187 void RecordNewWebContents(content::WebContents* source_web_contents, | 187 void RecordNewWebContents(content::WebContents* source_web_contents, |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
255 NavigationEventList navigation_event_list_; | 255 NavigationEventList navigation_event_list_; |
256 | 256 |
257 // user_gesture_map_ keeps track of the timestamp of last user gesture in | 257 // user_gesture_map_ keeps track of the timestamp of last user gesture in |
258 // in each WebContents. We assume for majority of cases, a navigation | 258 // in each WebContents. We assume for majority of cases, a navigation |
259 // shortly after a user gesture indicate this navigation is user initiated. | 259 // shortly after a user gesture indicate this navigation is user initiated. |
260 UserGestureMap user_gesture_map_; | 260 UserGestureMap user_gesture_map_; |
261 | 261 |
262 // Host to timestamped IP addresses map that covers all the main frame and | 262 // Host to timestamped IP addresses map that covers all the main frame and |
263 // subframe URLs' hosts. Since it is possible for a host to resolve to more | 263 // subframe URLs' hosts. Since it is possible for a host to resolve to more |
264 // than one IP in even a short period of time, we map a single host to a | 264 // than one IP in even a short period of time, we map a single host to a |
265 // vector of ResolvedIPAddresss. This map is used to fill in ip_address field | 265 // vector of ResolvedIPAddresss. |
266 // in URLChainEntry in ClientDownloadRequest. | |
267 HostToIpMap host_to_ip_map_; | 266 HostToIpMap host_to_ip_map_; |
268 | 267 |
269 base::OneShotTimer cleanup_timer_; | 268 base::OneShotTimer cleanup_timer_; |
270 | 269 |
271 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingNavigationObserverManager); | 270 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingNavigationObserverManager); |
272 }; | 271 }; |
273 } // namespace safe_browsing | 272 } // namespace safe_browsing |
274 | 273 |
275 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGE R_H_ | 274 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGE R_H_ |
OLD | NEW |