Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(475)

Side by Side Diff: components/safe_browsing/browser/threat_details.h

Issue 2837603002: Content API changes to improve DOM stitching in ThreatDetails code. (Closed)
Patch Set: Sync and small unittest tweak Created 3 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #ifndef COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_ 5 #ifndef COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_
6 #define COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_ 6 #define COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_
7 7
8 // A class that encapsulates the detailed threat reports sent when 8 // A class that encapsulates the detailed threat reports sent when
9 // users opt-in to do so from the safe browsing warning page. 9 // users opt-in to do so from the safe browsing warning page.
10 10
(...skipping 34 matching lines...) Expand 10 before | Expand all | Expand 10 after
45 class ThreatDetailsCacheCollector; 45 class ThreatDetailsCacheCollector;
46 class ThreatDetailsRedirectsCollector; 46 class ThreatDetailsRedirectsCollector;
47 class ThreatDetailsFactory; 47 class ThreatDetailsFactory;
48 48
49 using ResourceMap = 49 using ResourceMap =
50 base::hash_map<std::string, 50 base::hash_map<std::string,
51 std::unique_ptr<ClientSafeBrowsingReportRequest::Resource>>; 51 std::unique_ptr<ClientSafeBrowsingReportRequest::Resource>>;
52 52
53 // Maps a key of an HTML element to its corresponding HTMLElement proto message. 53 // Maps a key of an HTML element to its corresponding HTMLElement proto message.
54 // HTML Element keys have the form "<frame_id>-<node_id>", where |frame_id| is 54 // HTML Element keys have the form "<frame_id>-<node_id>", where |frame_id| is
55 // the FrameTree NodeID of the render frame containing the element, and 55 // the FrameTree NodeID of the frame containing the element, and
Charlie Reis 2017/05/12 21:40:50 Heh, I keep catching more of these. :) nit: the
lpz 2017/05/15 18:49:07 Yeah, this one was sneaky, sorry for the churn fro
56 // |node_id| is a sequential ID for the element generated by the renderer. 56 // |node_id| is a sequential ID for the element generated by the renderer.
57 using ElementMap = base::hash_map<std::string, std::unique_ptr<HTMLElement>>; 57 using ElementMap = base::hash_map<std::string, std::unique_ptr<HTMLElement>>;
58 58
59 // Maps a URL to some HTML Elements. Used to maintain parent/child relationship 59 // Maps the key of an iframe element to the FrameTreeNode ID of the frame that
60 // for HTML Elements across IFrame boundaries. 60 // rendered the contents of the iframe.
61 // The key is the string URL set as the src attribute of an iframe. The value is 61 using KeyToFrameTreeIdMap = base::hash_map<std::string, int>;
62 // the HTMLElement proto that represents the iframe element with that URL.
63 // The HTMLElement protos are not owned by this map.
64 using UrlToDomElementMap = base::hash_map<std::string, HTMLElement*>;
65 62
66 // Maps a URL to some Element IDs. Used to maintain parent/child relationship 63 // Maps a FrameTreeNode ID of a frame to a set of child IDs. The child IDs are
67 // for HTML Elements across IFrame boundaries. 64 // the Element IDs of the top-level HTML Elements in this frame.
68 // The key is the string URL of a render frame. The value is the set of Element 65 using FrameTreeIdToChildIdsMap = base::hash_map<int, std::unordered_set<int>>;
69 // IDs that are at the top-level of this render frame.
70 using UrlToChildIdsMap = base::hash_map<std::string, std::unordered_set<int>>;
71 66
72 class ThreatDetails : public base::RefCountedThreadSafe< 67 class ThreatDetails : public base::RefCountedThreadSafe<
73 ThreatDetails, 68 ThreatDetails,
74 content::BrowserThread::DeleteOnUIThread>, 69 content::BrowserThread::DeleteOnUIThread>,
75 public content::WebContentsObserver { 70 public content::WebContentsObserver {
76 public: 71 public:
77 typedef security_interstitials::UnsafeResource UnsafeResource; 72 typedef security_interstitials::UnsafeResource UnsafeResource;
78 73
79 // Constructs a new ThreatDetails instance, using the factory. 74 // Constructs a new ThreatDetails instance, using the factory.
80 static ThreatDetails* NewThreatDetails( 75 static ThreatDetails* NewThreatDetails(
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
114 content::WebContents* web_contents, 109 content::WebContents* web_contents,
115 const UnsafeResource& resource, 110 const UnsafeResource& resource,
116 net::URLRequestContextGetter* request_context_getter, 111 net::URLRequestContextGetter* request_context_getter,
117 history::HistoryService* history_service); 112 history::HistoryService* history_service);
118 113
119 ~ThreatDetails() override; 114 ~ThreatDetails() override;
120 115
121 // Called on the IO thread with the DOM details. 116 // Called on the IO thread with the DOM details.
122 virtual void AddDOMDetails( 117 virtual void AddDOMDetails(
123 const int frame_tree_node_id, 118 const int frame_tree_node_id,
124 const GURL& frame_last_committed_url, 119 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params,
125 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params); 120 const KeyToFrameTreeIdMap& child_frame_tree_map);
126 121
127 // The report protocol buffer. 122 // The report protocol buffer.
128 std::unique_ptr<ClientSafeBrowsingReportRequest> report_; 123 std::unique_ptr<ClientSafeBrowsingReportRequest> report_;
129 124
130 // Used to get a pointer to the HTTP cache. 125 // Used to get a pointer to the HTTP cache.
131 scoped_refptr<net::URLRequestContextGetter> request_context_getter_; 126 scoped_refptr<net::URLRequestContextGetter> request_context_getter_;
132 127
133 private: 128 private:
134 friend class base::RefCountedThreadSafe<ThreatDetails>; 129 friend class base::RefCountedThreadSafe<ThreatDetails>;
135 friend struct content::BrowserThread::DeleteOnThread< 130 friend struct content::BrowserThread::DeleteOnThread<
(...skipping 24 matching lines...) Expand all
160 const std::string& tagname, 155 const std::string& tagname,
161 const std::vector<GURL>* children); 156 const std::vector<GURL>* children);
162 157
163 // Message handler. 158 // Message handler.
164 void OnReceivedThreatDOMDetails( 159 void OnReceivedThreatDOMDetails(
165 content::RenderFrameHost* sender, 160 content::RenderFrameHost* sender,
166 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params); 161 const std::vector<SafeBrowsingHostMsg_ThreatDOMDetails_Node>& params);
167 162
168 void AddRedirectUrlList(const std::vector<GURL>& urls); 163 void AddRedirectUrlList(const std::vector<GURL>& urls);
169 164
170 // Adds an HTML Element to the DOM structure. 165 // Adds an HTML Element to the DOM structure. |frame_tree_node_id| is the
171 // |frame_tree_node_id| is the unique ID of the render frame the element came 166 // unique ID of the frame the element came from, and |process_id| is that
172 // from. |frame_url| is the URL that the render frame was handling. 167 // frame's current renderer process ID. |child_frame_routing_id| is only set
Charlie Reis 2017/05/12 21:40:50 Don't forget to remove the stale params from the c
lpz 2017/05/15 18:49:07 Doh, done.
173 // |element_node_id| is a unique ID of the element within the render frame. 168 // for iframe elements, and indicates the routing ID of the RenderFrame (or
174 // |tag_name| is the tag of the element. |parent_element_node_id| is the 169 // placeholder within |process_id|, if it is an out-of-process iframe)
175 // unique ID of the parent element with the render frame. |attributes| 170 // containing the body of the iframe. |element_node_id| is a unique ID of the
176 // contains the names and values of the element's attributes.|resource| is 171 // element within the frame. |tag_name| is the tag of the element.
177 // set if this element is a resource. 172 // |parent_element_node_id| is the unique ID of the parent element within the
173 // frame. |attributes| contains the names and values of the element's
174 // attributes. |resource| is set if this element is a resource.
178 void AddDomElement(const int frame_tree_node_id, 175 void AddDomElement(const int frame_tree_node_id,
179 const std::string& frame_url,
180 const int element_node_id, 176 const int element_node_id,
181 const std::string& tag_name, 177 const std::string& tag_name,
182 const int parent_element_node_id, 178 const int parent_element_node_id,
183 const std::vector<AttributeNameValue>& attributes, 179 const std::vector<AttributeNameValue>& attributes,
184 const ClientSafeBrowsingReportRequest::Resource* resource); 180 const ClientSafeBrowsingReportRequest::Resource* resource);
185 181
186 scoped_refptr<BaseUIManager> ui_manager_; 182 scoped_refptr<BaseUIManager> ui_manager_;
187 183
188 const UnsafeResource resource_; 184 const UnsafeResource resource_;
189 185
190 // For every Url we collect we create a Resource message. We keep 186 // For every Url we collect we create a Resource message. We keep
191 // them in a map so we can avoid duplicates. 187 // them in a map so we can avoid duplicates.
192 ResourceMap resources_; 188 ResourceMap resources_;
193 189
194 // Store all HTML elements collected, keep them in a map for easy lookup. 190 // Store all HTML elements collected, keep them in a map for easy lookup.
195 ElementMap elements_; 191 ElementMap elements_;
196 192
197 // For each iframe element encountered we map the src of the iframe to the 193 // For each iframe element encountered we map the key of the iframe to the
198 // iframe element. This is used when we receive elements from a different 194 // FrameTreeNode ID of the frame containing the contents of that iframe.
199 // frame whose document URL matches the src of an iframe in this map. We can 195 // We populate this map when receiving results from ThreatDomDetails, and use
200 // then add all elements from the subframe as children of the iframe element 196 // it in a second pass (after FinishCollection) to attach children to iframe
201 // stored here. 197 // elements.
202 UrlToDomElementMap iframe_src_to_element_map_; 198 // Should only be accessed on the IO thread.
199 KeyToFrameTreeIdMap iframe_key_to_frame_tree_id_map_;
203 200
204 // When getting a set of elements from a render frame, we store the frame's 201 // When getting a set of elements from a frame, we store the frame's
205 // URL and a collection of all the top-level elements in that frame. When we 202 // FrameTreeNode ID and a collection of all top-level elements in that frame.
206 // later encounter the parent iframe with the same src URL, we can add all of 203 // It is populated as we receive sets of nodes from different renderers.
207 // these elements as children of that iframe. 204 // It is used together with |iframe_key_to_frame_tree_id_map_| in a second
208 UrlToChildIdsMap document_url_to_children_map_; 205 // pass to insert child elements under their parent iframe elements.
206 FrameTreeIdToChildIdsMap frame_tree_id_to_children_map_;
209 207
210 // Result from the cache extractor. 208 // Result from the cache extractor.
211 bool cache_result_; 209 bool cache_result_;
212 210
213 // Whether user did proceed with the safe browsing blocking page or 211 // Whether user did proceed with the safe browsing blocking page or
214 // not. 212 // not.
215 bool did_proceed_; 213 bool did_proceed_;
216 214
217 // How many times this user has visited this page before. 215 // How many times this user has visited this page before.
218 int num_visits_; 216 int num_visits_;
219 217
220 // Keeps track of whether we have an ambiguous DOM in this report. This can 218 // Keeps track of whether we have an ambiguous DOM in this report. This can
221 // happen when the HTML Elements returned by a render frame can't be 219 // happen when the HTML Elements returned by a renderer can't be
222 // associated with a parent Element in the parent frame. 220 // associated with a parent Element in the parent frame.
223 bool ambiguous_dom_; 221 bool ambiguous_dom_;
224 222
225 // The factory used to instantiate SafeBrowsingBlockingPage objects. 223 // The factory used to instantiate SafeBrowsingBlockingPage objects.
226 // Useful for tests, so they can provide their own implementation of 224 // Useful for tests, so they can provide their own implementation of
227 // SafeBrowsingBlockingPage. 225 // SafeBrowsingBlockingPage.
228 static ThreatDetailsFactory* factory_; 226 static ThreatDetailsFactory* factory_;
229 227
230 // Used to collect details from the HTTP Cache. 228 // Used to collect details from the HTTP Cache.
231 scoped_refptr<ThreatDetailsCacheCollector> cache_collector_; 229 scoped_refptr<ThreatDetailsCacheCollector> cache_collector_;
(...skipping 21 matching lines...) Expand all
253 BaseUIManager* ui_manager, 251 BaseUIManager* ui_manager,
254 content::WebContents* web_contents, 252 content::WebContents* web_contents,
255 const security_interstitials::UnsafeResource& unsafe_resource, 253 const security_interstitials::UnsafeResource& unsafe_resource,
256 net::URLRequestContextGetter* request_context_getter, 254 net::URLRequestContextGetter* request_context_getter,
257 history::HistoryService* history_service) = 0; 255 history::HistoryService* history_service) = 0;
258 }; 256 };
259 257
260 } // namespace safe_browsing 258 } // namespace safe_browsing
261 259
262 #endif // COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_ 260 #endif // COMPONENTS_SAFE_BROWSING_BROWSER_THREAT_DETAILS_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698