Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1310)

Side by Side Diff: components/safe_browsing/browser/threat_details.h

Issue 2837603002: Content API changes to improve DOM stitching in ThreatDetails code. (Closed)
Patch Set: Address feedback Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_ 5 #ifndef COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_
6 #define COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_ 6 #define COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_
7 7
8 // A class that encapsulates the detailed threat reports sent when 8 // A class that encapsulates the detailed threat reports sent when
9 // users opt-in to do so from the safe browsing warning page. 9 // users opt-in to do so from the safe browsing warning page.
10 10
(...skipping 38 matching lines...) Expand 10 before | Expand all | Expand 10 after
49 using ResourceMap = 49 using ResourceMap =
50 base::hash_map<std::string, 50 base::hash_map<std::string,
51 std::unique_ptr<ClientSafeBrowsingReportRequest::Resource>>; 51 std::unique_ptr<ClientSafeBrowsingReportRequest::Resource>>;
52 52
53 // Maps a key of an HTML element to its corresponding HTMLElement proto message. 53 // Maps a key of an HTML element to its corresponding HTMLElement proto message.
54 // HTML Element keys have the form "<frame_id>-<node_id>", where |frame_id| is 54 // HTML Element keys have the form "<frame_id>-<node_id>", where |frame_id| is
55 // the FrameTree NodeID of the render frame containing the element, and 55 // the FrameTree NodeID of the render frame containing the element, and
56 // |node_id| is a sequential ID for the element generated by the renderer. 56 // |node_id| is a sequential ID for the element generated by the renderer.
57 using ElementMap = base::hash_map<std::string, std::unique_ptr<HTMLElement>>; 57 using ElementMap = base::hash_map<std::string, std::unique_ptr<HTMLElement>>;
58 58
59 // Maps a URL to some HTML Elements. Used to maintain parent/child relationship 59 // Maps the key of an iframe element to the FrameTreeNode ID of the frame that
60 // for HTML Elements across IFrame boundaries. 60 // rendered the contents of the iframe.
61 // The key is the string URL set as the src attribute of an iframe. The value is 61 using KeyToFrameTreeIdMap = base::hash_map<std::string, int>;
62 // the HTMLElement proto that represents the iframe element with that URL.
63 // The HTMLElement protos are not owned by this map.
64 using UrlToDomElementMap = base::hash_map<std::string, HTMLElement*>;
65 62
66 // Maps a URL to some Element IDs. Used to maintain parent/child relationship 63 // Maps a FrameTreeNode ID of a frame to a set of child IDs. The child IDs are
67 // for HTML Elements across IFrame boundaries. 64 // the Element IDs of the top-level HTML Elements in this render frame.
Charlie Reis 2017/05/10 22:17:49 nit: s/render frame/frame/
lpz 2017/05/12 13:53:16 Done.
68 // The key is the string URL of a render frame. The value is the set of Element 65 using FrameTreeIdToChildIdsMap = base::hash_map<int, std::unordered_set<int>>;
69 // IDs that are at the top-level of this render frame.
70 using UrlToChildIdsMap = base::hash_map<std::string, std::unordered_set<int>>;
71 66
72 class ThreatDetails : public base::RefCountedThreadSafe< 67 class ThreatDetails : public base::RefCountedThreadSafe<
73 ThreatDetails, 68 ThreatDetails,
74 content::BrowserThread::DeleteOnUIThread>, 69 content::BrowserThread::DeleteOnUIThread>,
75 public content::WebContentsObserver { 70 public content::WebContentsObserver {
76 public: 71 public:
77 typedef security_interstitials::UnsafeResource UnsafeResource; 72 typedef security_interstitials::UnsafeResource UnsafeResource;
78 73
79 // Constructs a new ThreatDetails instance, using the factory. 74 // Constructs a new ThreatDetails instance, using the factory.
80 static ThreatDetails* NewThreatDetails( 75 static ThreatDetails* NewThreatDetails(
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
113 ThreatDetails(BaseUIManager* ui_manager, 108 ThreatDetails(BaseUIManager* ui_manager,
114 content::WebContents* web_contents, 109 content::WebContents* web_contents,
115 const UnsafeResource& resource, 110 const UnsafeResource& resource,
116 net::URLRequestContextGetter* request_context_getter, 111 net::URLRequestContextGetter* request_context_getter,
117 history::HistoryService* history_service); 112 history::HistoryService* history_service);
118 113
119 ~ThreatDetails() override; 114 ~ThreatDetails() override;
120 115
121 // Called on the IO thread with the DOM details. 116 // Called on the IO thread with the DOM details.
122 virtual void AddDOMDetails( 117 virtual void AddDOMDetails(
118 const int process_id,
123 const int frame_tree_node_id, 119 const int frame_tree_node_id,
124 const GURL& frame_last_committed_url, 120 const GURL& frame_last_committed_url,
125 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params); 121 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params,
122 const KeyToFrameTreeIdMap& child_frame_tree_map);
126 123
127 // The report protocol buffer. 124 // The report protocol buffer.
128 std::unique_ptr<ClientSafeBrowsingReportRequest> report_; 125 std::unique_ptr<ClientSafeBrowsingReportRequest> report_;
129 126
130 // Used to get a pointer to the HTTP cache. 127 // Used to get a pointer to the HTTP cache.
131 scoped_refptr<net::URLRequestContextGetter> request_context_getter_; 128 scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
132 129
133 private: 130 private:
134 friend class base::RefCountedThreadSafe<ThreatDetails>; 131 friend class base::RefCountedThreadSafe<ThreatDetails>;
135 friend struct content::BrowserThread::DeleteOnThread< 132 friend struct content::BrowserThread::DeleteOnThread<
(...skipping 24 matching lines...) Expand all
160 const std::string& tagname, 157 const std::string& tagname,
161 const std::vector<GURL>* children); 158 const std::vector<GURL>* children);
162 159
163 // Message handler. 160 // Message handler.
164 void OnReceivedThreatDOMDetails( 161 void OnReceivedThreatDOMDetails(
165 content::RenderFrameHost* sender, 162 content::RenderFrameHost* sender,
166 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params); 163 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params);
167 164
168 void AddRedirectUrlList(const std::vector<GURL>& urls); 165 void AddRedirectUrlList(const std::vector<GURL>& urls);
169 166
170 // Adds an HTML Element to the DOM structure. 167 // Adds an HTML Element to the DOM structure. |frame_tree_node_id| is the
171 // |frame_tree_node_id| is the unique ID of the render frame the element came 168 // unique ID of the frame the element came from, and |process_id| is that
172 // from. |frame_url| is the URL that the render frame was handling. 169 // frame's current renderer process ID. |child_frame_routing_id| is only set
173 // |element_node_id| is a unique ID of the element within the render frame. 170 // for iframe elements, and indicates the routing ID of the RenderFrame (or
174 // |tag_name| is the tag of the element. |parent_element_node_id| is the 171 // placeholder within |process_id|, if it is an out-of-process iframe)
175 // unique ID of the parent element with the render frame. |attributes| 172 // containing the body of the iframe. |element_node_id| is a unique ID of the
176 // contains the names and values of the element's attributes.|resource| is 173 // element within the render frame. |tag_name| is the tag of the element.
177 // set if this element is a resource. 174 // |parent_element_node_id| is the unique ID of the parent element with the
175 // render frame. |attributes| contains the names and values of the element's
176 // attributes. |resource| is set if this element is a resource.
178 void AddDomElement(const int frame_tree_node_id, 177 void AddDomElement(const int frame_tree_node_id,
179 const std::string& frame_url, 178 const int process_id,
179 const int child_frame_routing_id,
180 const int element_node_id, 180 const int element_node_id,
181 const std::string& tag_name, 181 const std::string& tag_name,
182 const int parent_element_node_id, 182 const int parent_element_node_id,
183 const std::vector<AttributeNameValue>& attributes, 183 const std::vector<AttributeNameValue>& attributes,
184 const ClientSafeBrowsingReportRequest::Resource* resource); 184 const ClientSafeBrowsingReportRequest::Resource* resource);
185 185
186 scoped_refptr<BaseUIManager> ui_manager_; 186 scoped_refptr<BaseUIManager> ui_manager_;
187 187
188 const UnsafeResource resource_; 188 const UnsafeResource resource_;
189 189
190 // For every Url we collect we create a Resource message. We keep 190 // For every Url we collect we create a Resource message. We keep
191 // them in a map so we can avoid duplicates. 191 // them in a map so we can avoid duplicates.
192 ResourceMap resources_; 192 ResourceMap resources_;
193 193
194 // Store all HTML elements collected, keep them in a map for easy lookup. 194 // Store all HTML elements collected, keep them in a map for easy lookup.
195 ElementMap elements_; 195 ElementMap elements_;
196 196
197 // For each iframe element encountered we map the src of the iframe to the 197 // For each iframe element encountered we map the key of the iframe to the
198 // iframe element. This is used when we receive elements from a different 198 // FrameTreeNode ID of the frame containing the contents of that iframe.
199 // frame whose document URL matches the src of an iframe in this map. We can 199 // We populate this map when receiving results from ThreatDomDetails, and use
200 // then add all elements from the subframe as children of the iframe element 200 // it in a second pass (after FinishCollection) to attach children to iframe
201 // stored here. 201 // elements.
202 UrlToDomElementMap iframe_src_to_element_map_; 202 KeyToFrameTreeIdMap iframe_key_to_frame_tree_id_map_;
Charlie Reis 2017/05/10 22:17:49 Please add a note that this can only be accessed o
lpz 2017/05/12 13:53:16 Done.
203 203
204 // When getting a set of elements from a render frame, we store the frame's 204 // When getting a set of elements from a frame, we store the frame's
205 // URL and a collection of all the top-level elements in that frame. When we 205 // FrameTreeNode ID and a collection of all top-level elements in that frame.
206 // later encounter the parent iframe with the same src URL, we can add all of 206 // It is populated as we receive sets of nodes from different render frames.
Charlie Reis 2017/05/10 22:17:49 nit: s/render frames/frames/ (Or RenderFrames if y
lpz 2017/05/12 13:53:16 Done, and also replaced a few more usages of "rend
207 // these elements as children of that iframe. 207 // It is used together with |iframe_key_to_frame_tree_id_map_| in a second
208 UrlToChildIdsMap document_url_to_children_map_; 208 // pass to insert child elements under their parent iframe elements.
209 FrameTreeIdToChildIdsMap frame_tree_id_to_children_map_;
209 210
210 // Result from the cache extractor. 211 // Result from the cache extractor.
211 bool cache_result_; 212 bool cache_result_;
212 213
213 // Whether user did proceed with the safe browsing blocking page or 214 // Whether user did proceed with the safe browsing blocking page or
214 // not. 215 // not.
215 bool did_proceed_; 216 bool did_proceed_;
216 217
217 // How many times this user has visited this page before. 218 // How many times this user has visited this page before.
218 int num_visits_; 219 int num_visits_;
219 220
220 // Keeps track of whether we have an ambiguous DOM in this report. This can 221 // Keeps track of whether we have an ambiguous DOM in this report. This can
221 // happen when the HTML Elements returned by a render frame can't be 222 // happen when the HTML Elements returned by a render frame can't be
222 // associated with a parent Element in the parent frame. 223 // associated with a parent Element in the parent frame.
223 bool ambiguous_dom_; 224 bool ambiguous_dom_;
224 225
225 // The factory used to instantiate SafeBrowsingBlockingPage objects. 226 // The factory used to instanciate SafeBrowsingBlockingPage objects.
Charlie Reis 2017/05/10 22:17:49 nit: This was correct before as "instantiate" I'm
lpz 2017/05/12 13:53:16 Yeah weird. Done.
226 // Useful for tests, so they can provide their own implementation of 227 // Usefull for tests, so they can provide their own implementation of
Charlie Reis 2017/05/10 22:17:49 nit: This was correct before as "Useful"
lpz 2017/05/12 13:53:16 Done.
227 // SafeBrowsingBlockingPage. 228 // SafeBrowsingBlockingPage.
228 static ThreatDetailsFactory* factory_; 229 static ThreatDetailsFactory* factory_;
229 230
230 // Used to collect details from the HTTP Cache. 231 // Used to collect details from the HTTP Cache.
231 scoped_refptr<ThreatDetailsCacheCollector> cache_collector_; 232 scoped_refptr<ThreatDetailsCacheCollector> cache_collector_;
232 233
233 // Used to collect redirect urls from the history service 234 // Used to collect redirect urls from the history service
234 scoped_refptr<ThreatDetailsRedirectsCollector> redirects_collector_; 235 scoped_refptr<ThreatDetailsRedirectsCollector> redirects_collector_;
235 236
236 FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, HistoryServiceUrls); 237 FRIEND_TEST_ALL_PREFIXES(ThreatDetailsTest, HistoryServiceUrls);
(...skipping 16 matching lines...) Expand all
253 BaseUIManager* ui_manager, 254 BaseUIManager* ui_manager,
254 content::WebContents* web_contents, 255 content::WebContents* web_contents,
255 const security_interstitials::UnsafeResource& unsafe_resource, 256 const security_interstitials::UnsafeResource& unsafe_resource,
256 net::URLRequestContextGetter* request_context_getter, 257 net::URLRequestContextGetter* request_context_getter,
257 history::HistoryService* history_service) = 0; 258 history::HistoryService* history_service) = 0;
258 }; 259 };
259 260
260 } // namespace safe_browsing 261 } // namespace safe_browsing
261 262
262 #endif // COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_ 263 #endif // COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698