Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(37)

Side by Side Diff: chrome/browser/safe_browsing/safe_browsing_navigation_observer_manager.h

Issue 2777853005: Generalize the usage of referrer chain (Closed)
Patch Set: address lpz's comments Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H _ 5 #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H _
6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H _ 6 #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGER_H _
7 7
8 #include <deque> 8 #include <deque>
9 #include "base/feature_list.h" 9 #include "base/feature_list.h"
10 #include "base/supports_user_data.h" 10 #include "base/supports_user_data.h"
(...skipping 25 matching lines...) Expand all
36 }; 36 };
37 37
38 // Struct that manages insertion, cleanup, and lookup of NavigationEvent 38 // Struct that manages insertion, cleanup, and lookup of NavigationEvent
39 // objects. Its maximum size is kNavigationRecordMaxSize. 39 // objects. Its maximum size is kNavigationRecordMaxSize.
40 struct NavigationEventList { 40 struct NavigationEventList {
41 public: 41 public:
42 explicit NavigationEventList(std::size_t size_limit); 42 explicit NavigationEventList(std::size_t size_limit);
43 43
44 ~NavigationEventList(); 44 ~NavigationEventList();
45 45
46 // Find the most recent navigation event that navigated to |target_url| and 46 // Finds the most recent navigation event that navigated to |target_url| and
47 // its associated |target_main_frame_url| in the tab with ID |target_tab_id|. 47 // its associated |target_main_frame_url| in the tab with ID |target_tab_id|.
48 // If navigation happened in the main frame, |target_url| and |target_main_ 48 // If navigation happened in the main frame, |target_url| and
49 // frame_url| are the same. 49 // |target_main_frame_url| are the same.
50 // If |target_url| is empty, we use its main frame url (a.k.a. 50 // If |target_url| is empty, we use its main frame url (a.k.a.
51 // |target_main_frame_url|) to search for navigation events. 51 // |target_main_frame_url|) to search for navigation events.
52 // If |target_tab_id| is not available (-1), we look for all tabs for the most 52 // If |target_tab_id| is not available (-1), we look for all tabs for the most
53 // recent navigation to |target_url| or |target_main_frame_url|. 53 // recent navigation to |target_url| or |target_main_frame_url|.
54 // For some cases, the most recent navigation to |target_url| may not be 54 // For some cases, the most recent navigation to |target_url| may not be
55 // relevant. 55 // relevant.
56 // For example, url1 in window A opens url2 in window B, url1 then opens an 56 // For example, url1 in window A opens url2 in window B, url1 then opens an
57 // about:blank page window C and injects script code in it to trigger a 57 // about:blank page window C and injects script code in it to trigger a
58 // delayed download in Window D. Before the download occurs, url2 in window B 58 // delayed event (e.g. a download) in Window D. Before the event occurs, url2
59 // opens a different about:blank page in window C. 59 // in window B opens a different about:blank page in window C.
60 // A ---- C - D 60 // A ---- C - D
61 // \ / 61 // \ /
62 // B 62 // B
63 // In this case, FindNavigationEvent() will think url2 in Window B is the 63 // In this case, FindNavigationEvent() will think url2 in Window B is the
64 // referrer of about::blank in Window C since this navigation is more recent. 64 // referrer of about::blank in Window C since this navigation is more recent.
65 // However, it does not prevent us to attribute url1 in Window A as the cause 65 // However, it does not prevent us to attribute url1 in Window A as the cause
66 // of all these navigations. 66 // of all these navigations.
67 NavigationEvent* FindNavigationEvent(const GURL& target_url, 67 NavigationEvent* FindNavigationEvent(const GURL& target_url,
68 const GURL& target_main_frame_url, 68 const GURL& target_main_frame_url,
69 int target_tab_id); 69 int target_tab_id);
70 70
71 // Find the most recent retargeting NavigationEvent that satisfies 71 // Finds the most recent retargeting NavigationEvent that satisfies
72 // |target_url|, and |target_tab_id|. 72 // |target_url|, and |target_tab_id|.
73 NavigationEvent* FindRetargetingNavigationEvent(const GURL& target_url, 73 NavigationEvent* FindRetargetingNavigationEvent(const GURL& target_url,
74 int target_tab_id); 74 int target_tab_id);
75 75
76 void RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event); 76 void RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event);
77 77
78 // Remove stale NavigationEvents and return the number of items removed. 78 // Removes stale NavigationEvents and return the number of items removed.
79 std::size_t CleanUpNavigationEvents(); 79 std::size_t CleanUpNavigationEvents();
80 80
81 std::size_t Size() { return navigation_events_.size(); } 81 std::size_t Size() { return navigation_events_.size(); }
82 82
83 NavigationEvent* Get(std::size_t index) { 83 NavigationEvent* Get(std::size_t index) {
84 return navigation_events_[index].get(); 84 return navigation_events_[index].get();
85 } 85 }
86 86
87 private: 87 private:
88 std::deque<std::unique_ptr<NavigationEvent>> navigation_events_; 88 std::deque<std::unique_ptr<NavigationEvent>> navigation_events_;
89 const std::size_t size_limit_; 89 const std::size_t size_limit_;
90 }; 90 };
91 91
92 // Manager class for SafeBrowsingNavigationObserver, which is in charge of 92 // Manager class for SafeBrowsingNavigationObserver, which is in charge of
93 // cleaning up stale navigation events, and identifying landing page/landing 93 // cleaning up stale navigation events, and identifying landing page/landing
94 // referrer for a specific download. 94 // referrer for a specific Safe Browsing event.
95 class SafeBrowsingNavigationObserverManager 95 class SafeBrowsingNavigationObserverManager
96 : public base::RefCountedThreadSafe<SafeBrowsingNavigationObserverManager> { 96 : public base::RefCountedThreadSafe<SafeBrowsingNavigationObserverManager> {
97 public: 97 public:
98 static const base::Feature kDownloadAttribution; 98 static const base::Feature kDownloadAttribution;
99 99
100 // For UMA histogram counting. Do NOT change order. 100 // For UMA histogram counting. Do NOT change order.
101 enum AttributionResult { 101 enum AttributionResult {
102 SUCCESS = 1, // Identified referrer chain is not empty. 102 SUCCESS = 1, // Identified referrer chain is not empty.
103 SUCCESS_LANDING_PAGE = 2, // Successfully identified landing page. 103 SUCCESS_LANDING_PAGE = 2, // Successfully identified landing page.
104 SUCCESS_LANDING_REFERRER = 3, // Successfully identified landing referrer. 104 SUCCESS_LANDING_REFERRER = 3, // Successfully identified landing referrer.
105 INVALID_URL = 4, 105 INVALID_URL = 4,
106 NAVIGATION_EVENT_NOT_FOUND = 5, 106 NAVIGATION_EVENT_NOT_FOUND = 5,
107 107
108 // Always at the end. 108 // Always at the end.
109 ATTRIBUTION_FAILURE_TYPE_MAX 109 ATTRIBUTION_FAILURE_TYPE_MAX
110 }; 110 };
111 111
112 // Helper function to check if user gesture is older than 112 // Helper function to check if user gesture is older than
113 // kUserGestureTTLInSecond. 113 // kUserGestureTTLInSecond.
114 static bool IsUserGestureExpired(const base::Time& timestamp); 114 static bool IsUserGestureExpired(const base::Time& timestamp);
115 115
116 // Helper function to strip empty ref fragment from a URL. Many pages 116 // Helper function to strip empty ref fragment from a URL. Many pages
117 // end up with a "#" at the end of their URLs due to navigation triggered by 117 // end up with a "#" at the end of their URLs due to navigation triggered by
118 // href="#" and javascript onclick function. We don't want to have separate 118 // href="#" and javascript onclick function. We don't want to have separate
119 // entries for these cases in the maps. 119 // entries for these cases in the maps.
120 static GURL ClearEmptyRef(const GURL& url); 120 static GURL ClearEmptyRef(const GURL& url);
121 121
122 // Checks if we should enable observing navigations for safe browsing purpose. 122 // Checks if we should enable observing navigations for safe browsing purpose.
123 // Return true if the safe browsing service and the download attribution 123 // Return true if the safe browsing service and the |kDownloadAttribution|
124 // feature are both enabled, and safe browsing service is initialized. 124 // feature are both enabled, and safe browsing service is initialized.
125 static bool IsEnabledAndReady(Profile* profile); 125 static bool IsEnabledAndReady(Profile* profile);
126 126
127 SafeBrowsingNavigationObserverManager(); 127 SafeBrowsingNavigationObserverManager();
128 128
129 // Add |nav_event| to |navigation_event_list_|. Object pointed to by 129 // Adds |nav_event| to |navigation_event_list_|. Object pointed to by
130 // |nav_event| will be no longer accessible after this function. 130 // |nav_event| will be no longer accessible after this function.
131 void RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event); 131 void RecordNavigationEvent(std::unique_ptr<NavigationEvent> nav_event);
132 void RecordUserGestureForWebContents(content::WebContents* web_contents, 132 void RecordUserGestureForWebContents(content::WebContents* web_contents,
133 const base::Time& timestamp); 133 const base::Time& timestamp);
134 void OnUserGestureConsumed(content::WebContents* web_contents, 134 void OnUserGestureConsumed(content::WebContents* web_contents,
135 const base::Time& timestamp); 135 const base::Time& timestamp);
136 bool HasUserGesture(content::WebContents* web_contents); 136 bool HasUserGesture(content::WebContents* web_contents);
137 void RecordHostToIpMapping(const std::string& host, const std::string& ip); 137 void RecordHostToIpMapping(const std::string& host, const std::string& ip);
138 138
139 // Clean-ups need to be done when a WebContents gets destroyed. 139 // Clean-ups need to be done when a WebContents gets destroyed.
140 void OnWebContentDestroyed(content::WebContents* web_contents); 140 void OnWebContentDestroyed(content::WebContents* web_contents);
141 141
142 // Remove all the observed NavigationEvents, user gestures, and resolved IP 142 // Removes all the observed NavigationEvents, user gestures, and resolved IP
143 // addresses that are older than kNavigationFootprintTTLInSecond. 143 // addresses that are older than kNavigationFootprintTTLInSecond.
144 void CleanUpStaleNavigationFootprints(); 144 void CleanUpStaleNavigationFootprints();
145 145
146 // Based on the |target_url| and |target_tab_id|, trace back the observed 146 // Based on the |target_url| and |target_tab_id|, traces back the observed
147 // NavigationEvents in navigation_event_list_ to identify the sequence of 147 // NavigationEvents in navigation_event_list_ to identify the sequence of
148 // navigations leading to the target, with the coverage limited to 148 // navigations leading to the target, with the coverage limited to
149 // |user_gesture_count_limit| number of user gestures. Then convert these 149 // |user_gesture_count_limit| number of user gestures. Then converts these
150 // identified NavigationEvents into ReferrerChainEntrys and append them to 150 // identified NavigationEvents into ReferrerChainEntrys and append them to
151 // |out_referrer_chain|. 151 // |out_referrer_chain|.
152 AttributionResult IdentifyReferrerChainForDownload( 152 AttributionResult IdentifyReferrerChainByEventURL(
153 const GURL& target_url, 153 const GURL& event_url,
154 int target_tab_id, // -1 if tab id is not valid 154 int event_tab_id, // -1 if tab id is unknown or not available
155 int user_gesture_count_limit, 155 int user_gesture_count_limit,
156 ReferrerChain* out_referrer_chain); 156 ReferrerChain* out_referrer_chain);
157 157
158 // Based on the |web_contents| associated with a download, trace back the 158 // Based on the |web_contents| associated with an event, traces back the
159 // observed NavigationEvents in navigation_event_list_ to identify the 159 // observed NavigationEvents in |navigation_event_list_| to identify the
160 // sequence of navigations leading to the download hosting page, with the 160 // sequence of navigations leading to the event hosting page, with the
161 // coverage limited to |user_gesture_count_limit| number of user gestures. 161 // coverage limited to |user_gesture_count_limit| number of user gestures.
162 // Then convert these identified NavigationEvents into ReferrerChainEntrys 162 // Then converts these identified NavigationEvents into ReferrerChainEntrys
163 // and append them to |out_referrer_chain|. 163 // and append them to |out_referrer_chain|.
164 AttributionResult IdentifyReferrerChainByDownloadWebContent( 164 AttributionResult IdentifyReferrerChainByWebContents(
165 content::WebContents* web_contents, 165 content::WebContents* web_contents,
166 int user_gesture_count_limit, 166 int user_gesture_count_limit,
167 ReferrerChain* out_referrer_chain); 167 ReferrerChain* out_referrer_chain);
168 168
169 // Based on the |initiating_frame_url| and its associated |tab_id|, trace back 169 // Based on the |initiating_frame_url| and its associated |tab_id|, traces
170 // the observed NavigationEvents in navigation_event_list_ to identify those 170 // back the observed NavigationEvents in navigation_event_list_ to identify
171 // navigations leading to this |initiating_frame_url|. If this initiating 171 // those navigations leading to this |initiating_frame_url|. If this
172 // frame has a user gesture, we trace back with the coverage limited to 172 // initiating frame has a user gesture, we trace back with the coverage
173 // |user_gesture_count_limit|-1 number of user gestures, otherwise we trace 173 // limited to |user_gesture_count_limit|-1 number of user gestures, otherwise
174 // back |user_gesture_count_limit| number of user gestures. We then convert 174 // we trace back |user_gesture_count_limit| number of user gestures. We then
175 // these identified NavigationEvents into ReferrerChainEntrys and append them 175 // converts these identified NavigationEvents into ReferrerChainEntrys and
176 // to |out_referrer_chain|. 176 // appends them to |out_referrer_chain|.
177 AttributionResult IdentifyReferrerChainForDownloadHostingPage( 177 AttributionResult IdentifyReferrerChainByHostingPage(
178 const GURL& initiating_frame_url, 178 const GURL& initiating_frame_url,
179 const GURL& initiating_main_frame_url, 179 const GURL& initiating_main_frame_url,
180 int tab_id, 180 int tab_id,
181 bool has_user_gesture, 181 bool has_user_gesture,
182 int user_gesture_count_limit, 182 int user_gesture_count_limit,
183 ReferrerChain* out_referrer_chain); 183 ReferrerChain* out_referrer_chain);
184 184
185 // Record the creation of a new WebContents by |source_web_contents|. This is 185 // Record the creation of a new WebContents by |source_web_contents|. This is
186 // used to detect cross-frame and cross-tab navigations. 186 // used to detect cross-frame and cross-tab navigations.
187 void RecordNewWebContents(content::WebContents* source_web_contents, 187 void RecordNewWebContents(content::WebContents* source_web_contents,
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after
255 NavigationEventList navigation_event_list_; 255 NavigationEventList navigation_event_list_;
256 256
257 // user_gesture_map_ keeps track of the timestamp of last user gesture in 257 // user_gesture_map_ keeps track of the timestamp of last user gesture in
258 // in each WebContents. We assume for majority of cases, a navigation 258 // in each WebContents. We assume for majority of cases, a navigation
259 // shortly after a user gesture indicate this navigation is user initiated. 259 // shortly after a user gesture indicate this navigation is user initiated.
260 UserGestureMap user_gesture_map_; 260 UserGestureMap user_gesture_map_;
261 261
262 // Host to timestamped IP addresses map that covers all the main frame and 262 // Host to timestamped IP addresses map that covers all the main frame and
263 // subframe URLs' hosts. Since it is possible for a host to resolve to more 263 // subframe URLs' hosts. Since it is possible for a host to resolve to more
264 // than one IP in even a short period of time, we map a single host to a 264 // than one IP in even a short period of time, we map a single host to a
265 // vector of ResolvedIPAddresss. This map is used to fill in ip_address field 265 // vector of ResolvedIPAddresss.
266 // in URLChainEntry in ClientDownloadRequest.
267 HostToIpMap host_to_ip_map_; 266 HostToIpMap host_to_ip_map_;
268 267
269 base::OneShotTimer cleanup_timer_; 268 base::OneShotTimer cleanup_timer_;
270 269
271 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingNavigationObserverManager); 270 DISALLOW_COPY_AND_ASSIGN(SafeBrowsingNavigationObserverManager);
272 }; 271 };
273 } // namespace safe_browsing 272 } // namespace safe_browsing
274 273
275 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGE R_H_ 274 #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_NAVIGATION_OBSERVER_MANAGE R_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698