Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(324)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor_unittest.cc

Issue 2878046: Add an extractor for DOM features to be used for client side phishing detection. (Closed)
Patch Set: address marria's comments Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
(Empty)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
6
7 #include <string.h> // for memcpy()
8 #include <map>
9 #include <string>
10
11 #include "base/callback.h"
12 #include "base/command_line.h"
13 #include "base/message_loop.h"
14 #include "base/process.h"
15 #include "base/string_util.h"
16 #include "chrome/common/main_function_params.h"
17 #include "chrome/common/render_messages.h"
18 #include "chrome/common/sandbox_init_wrapper.h"
19 #include "chrome/renderer/mock_render_process.h"
20 #include "chrome/renderer/render_thread.h"
21 #include "chrome/renderer/render_view.h"
22 #include "chrome/renderer/render_view_visitor.h"
23 #include "chrome/renderer/renderer_main_platform_delegate.h"
24 #include "chrome/renderer/safe_browsing/features.h"
25 #include "googleurl/src/gurl.h"
26 #include "ipc/ipc_channel.h"
27 #include "testing/gmock/include/gmock/gmock.h"
28 #include "testing/gtest/include/gtest/gtest.h"
29 #include "third_party/WebKit/WebKit/chromium/public/WebFrame.h"
30 #include "third_party/WebKit/WebKit/chromium/public/WebURLRequest.h"
31 #include "third_party/WebKit/WebKit/chromium/public/WebView.h"
32 #include "webkit/glue/webkit_glue.h"
33
34 using ::testing::ContainerEq;
35
36 namespace safe_browsing {
37
38 class PhishingDOMFeatureExtractorTest : public ::testing::Test,
39 public IPC::Channel::Listener,
40 public RenderViewVisitor {
41 public:
42 // IPC::Channel::Listener implementation.
43 virtual void OnMessageReceived(const IPC::Message& message) {
44 IPC_BEGIN_MESSAGE_MAP(PhishingDOMFeatureExtractorTest, message)
45 IPC_MESSAGE_HANDLER(ViewHostMsg_RenderViewReady, OnRenderViewReady)
46 IPC_MESSAGE_HANDLER(ViewHostMsg_DidStopLoading, OnDidStopLoading)
47 IPC_MESSAGE_HANDLER(ViewHostMsg_RequestResource, OnRequestResource)
48 IPC_END_MESSAGE_MAP()
49 }
50
51 // RenderViewVisitor implementation.
52 virtual bool Visit(RenderView* render_view) {
53 view_ = render_view;
54 return false;
55 }
56
57 protected:
58 virtual void SetUp() {
59 // Set up the renderer. This code is largely adapted from
60 // render_view_test.cc and renderer_main.cc. Note that we use a
61 // MockRenderProcess (because we don't need to use IPC for painting),
62 // but we use a real RenderThread so that we can use the ResourceDispatcher
63 // to fetch network resources. These are then served canned content
64 // in OnRequestResource().
65 sandbox_init_wrapper_.reset(new SandboxInitWrapper);
66 command_line_.reset(new CommandLine(CommandLine::ARGUMENTS_ONLY));
67 params_.reset(new MainFunctionParams(*command_line_,
68 *sandbox_init_wrapper_, NULL));
69 platform_.reset(new RendererMainPlatformDelegate(*params_));
70 platform_->PlatformInitialize();
71
72 // We use a new IPC channel name for each test that runs.
73 // This is necessary because the renderer-side IPC channel is not
74 // shut down when the RenderThread goes away, so attempting to reuse
75 // the channel name gives an error (see ChildThread::~ChildThread()).
76 std::string thread_name = StringPrintf(
77 "phishing_dom_feature_Extractor_unittest.%d",
78 next_thread_id_++);
79 channel_.reset(new IPC::Channel(thread_name,
80 IPC::Channel::MODE_SERVER, this));
81 ASSERT_TRUE(channel_->Connect());
82
83 webkit_glue::SetJavaScriptFlags(L"--expose-gc");
84 mock_process_.reset(new MockRenderProcess);
85 render_thread_ = new RenderThread(thread_name);
86 mock_process_->set_main_thread(render_thread_);
87
88 // Tell the renderer to create a view, then wait until it's ready.
89 // We can't call View::Create() directly here or else we won't get
90 // RenderProcess's lazy initialization of WebKit.
91 view_ = NULL;
92 ViewMsg_New_Params params;
93 params.parent_window = 0;
94 params.view_id = kViewId;
95 params.session_storage_namespace_id = kInvalidSessionStorageNamespaceId;
96 ASSERT_TRUE(channel_->Send(new ViewMsg_New(params)));
97 msg_loop_.Run();
98
99 extractor_.reset(new PhishingDOMFeatureExtractor(view_));
100 }
101
102 virtual void TearDown() {
103 // Try very hard to collect garbage before shutting down.
104 GetMainFrame()->collectGarbage();
105 GetMainFrame()->collectGarbage();
106
107 ASSERT_TRUE(channel_->Send(new ViewMsg_Close(kViewId)));
108 do {
109 msg_loop_.RunAllPending();
110 view_ = NULL;
111 RenderView::ForEach(this);
112 } while (view_);
113
114 mock_process_.reset();
115 msg_loop_.RunAllPending();
116 platform_->PlatformUninitialize();
117 platform_.reset();
118 command_line_.reset();
119 sandbox_init_wrapper_.reset();
120 }
121
122 // Returns the main WebFrame for our RenderView.
123 WebKit::WebFrame* GetMainFrame() {
124 return view_->webview()->mainFrame();
125 }
126
127 // Loads |url| into the RenderView, waiting for the load to finish.
128 void LoadURL(const std::string& url) {
129 GetMainFrame()->loadRequest(WebKit::WebURLRequest(GURL(url)));
130 msg_loop_.Run();
131 }
132
133 // Runs the DOMFeatureExtractor on the RenderView, waiting for the
134 // completion callback. Returns the success boolean from the callback.
135 bool ExtractFeatures(FeatureMap* features) {
136 success_ = false;
137 extractor_->ExtractFeatures(
138 features,
139 NewCallback(this, &PhishingDOMFeatureExtractorTest::ExtractionDone));
140 msg_loop_.Run();
141 return success_;
142 }
143
144 // Completion callback for feature extraction.
145 void ExtractionDone(bool success) {
146 success_ = success;
147 msg_loop_.Quit();
148 }
149
150 // IPC message handlers below
151
152 // Notification that page load has finished. Exit the message loop
153 // so that the test can continue.
154 void OnDidStopLoading() {
155 msg_loop_.Quit();
156 }
157
158 // Notification that the renderer wants to load a resource.
159 // If the requested url is in responses_, we send the renderer a 200
160 // and the supplied content, otherwise we send it a 404 error.
161 void OnRequestResource(const IPC::Message& message,
162 int request_id,
163 const ViewHostMsg_Resource_Request& request_data) {
164 std::string headers, body;
165 std::map<std::string, std::string>::const_iterator it =
166 responses_.find(request_data.url.spec());
167 if (it == responses_.end()) {
168 headers = "HTTP/1.1 404 Not Found\0Content-Type:text/html\0\0";
169 body = "content not found";
170 } else {
171 headers = "HTTP/1.1 200 OK\0Content-Type:text/html\0\0";
172 body = it->second;
173 }
174
175 ResourceResponseHead response_head;
176 response_head.headers = new net::HttpResponseHeaders(headers);
177 response_head.mime_type = "text/html";
178 ASSERT_TRUE(channel_->Send(new ViewMsg_Resource_ReceivedResponse(
179 message.routing_id(), request_id, response_head)));
180
181 base::SharedMemory shared_memory;
182 ASSERT_TRUE(shared_memory.Create(std::wstring(), false,
183 false, body.size()));
184 ASSERT_TRUE(shared_memory.Map(body.size()));
185 memcpy(shared_memory.memory(), body.data(), body.size());
186
187 base::SharedMemoryHandle handle;
188 ASSERT_TRUE(shared_memory.GiveToProcess(base::Process::Current().handle(),
189 &handle));
190 ASSERT_TRUE(channel_->Send(new ViewMsg_Resource_DataReceived(
191 message.routing_id(), request_id, handle, body.size())));
192
193 ASSERT_TRUE(channel_->Send(new ViewMsg_Resource_RequestComplete(
194 message.routing_id(),
195 request_id,
196 URLRequestStatus(),
197 std::string())));
198 }
199
200 // Notification that the render view we've created is ready to use.
201 void OnRenderViewReady() {
202 // Grab a pointer to the new view using RenderViewVisitor.
203 ASSERT_TRUE(!view_);
204 RenderView::ForEach(this);
205 ASSERT_TRUE(view_);
206 msg_loop_.Quit();
207 }
208
209 static int next_thread_id_; // incrementing counter for thread ids
210 static const int32 kViewId = 5; // arbitrary id for our testing view
211
212 MessageLoopForIO msg_loop_;
213 // channel that the renderer uses to talk to the browser.
214 // For this test, we will handle the browser end of the channel.
215 scoped_ptr<IPC::Channel> channel_;
216 RenderThread* render_thread_; // owned by mock_process_
217 scoped_ptr<MockRenderProcess> mock_process_;
218 RenderView* view_; // not owned, deletes itself on close
219 scoped_ptr<RendererMainPlatformDelegate> platform_;
220 scoped_ptr<MainFunctionParams> params_;
221 scoped_ptr<CommandLine> command_line_;
222 scoped_ptr<SandboxInitWrapper> sandbox_init_wrapper_;
223
224 scoped_ptr<PhishingDOMFeatureExtractor> extractor_;
225 // Map of URL -> response body for network requests from the renderer.
226 // Any URLs not in this map are served a 404 error.
227 std::map<std::string, std::string> responses_;
228 bool success_; // holds the success value from ExtractFeatures
229 };
230
231 int PhishingDOMFeatureExtractorTest::next_thread_id_ = 0;
232
233 TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
234 responses_["http://host.com/"] =
235 "<html><head><body>"
236 "<form action=\"query\"><input type=text><input type=checkbox></form>"
237 "<form action=\"http://cgi.host.com/submit\"></form>"
238 "<form action=\"http://other.com/\"></form>"
239 "<form action=\"query\"></form>"
240 "<form></form></body></html>";
241
242 FeatureMap expected_features;
243 expected_features.AddBooleanFeature(features::kPageHasForms);
244 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
245 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
246 expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
247
248 FeatureMap features;
249 LoadURL("http://host.com/");
250 ASSERT_TRUE(ExtractFeatures(&features));
251 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
252
253 responses_["http://host.com/"] =
254 "<html><head><body>"
255 "<input type=\"radio\"><input type=password></body></html>";
256
257 expected_features.Clear();
258 expected_features.AddBooleanFeature(features::kPageHasRadioInputs);
259 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
260
261 features.Clear();
262 LoadURL("http://host.com/");
263 ASSERT_TRUE(ExtractFeatures(&features));
264 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
265
266 responses_["http://host.com/"] =
267 "<html><head><body><input></body></html>";
268
269 expected_features.Clear();
270 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
271
272 features.Clear();
273 LoadURL("http://host.com/");
274 ASSERT_TRUE(ExtractFeatures(&features));
275 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
276
277 responses_["http://host.com/"] =
278 "<html><head><body><input type=\"invalid\"></body></html>";
279
280 expected_features.Clear();
281 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
282
283 features.Clear();
284 LoadURL("http://host.com/");
285 ASSERT_TRUE(ExtractFeatures(&features));
286 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
287 }
288
289 TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
290 responses_["http://www.host.com/"] =
291 "<html><head><body>"
292 "<a href=\"http://www2.host.com/abc\">link</a>"
293 "<a name=page_anchor></a>"
294 "<a href=\"http://www.chromium.org/\">chromium</a>"
295 "</body></html";
296
297 FeatureMap expected_features;
298 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
299 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
300 expected_features.AddBooleanFeature(features::kPageLinkDomain +
301 std::string("chromium.org"));
302
303 FeatureMap features;
304 LoadURL("http://www.host.com/");
305 ASSERT_TRUE(ExtractFeatures(&features));
306 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
307
308 responses_.clear();
309 responses_["https://www.host.com/"] =
310 "<html><head><body>"
311 "<a href=\"login\">this is secure</a>"
312 "<a href=\"http://host.com\">not secure</a>"
313 "<a href=\"https://www2.host.com/login\">also secure</a>"
314 "<a href=\"http://chromium.org/\">also not secure</a>"
315 "</body></html>";
316
317 expected_features.Clear();
318 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
319 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5);
320 expected_features.AddBooleanFeature(features::kPageLinkDomain +
321 std::string("chromium.org"));
322
323 features.Clear();
324 LoadURL("https://www.host.com/");
325 ASSERT_TRUE(ExtractFeatures(&features));
326 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
327 }
328
329 TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) {
330 responses_["http://host.com/"] =
331 "<html><head><script></script><script></script></head></html>";
332
333 FeatureMap expected_features;
334 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
335
336 FeatureMap features;
337 LoadURL("http://host.com/");
338 ASSERT_TRUE(ExtractFeatures(&features));
339 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
340
341 responses_["http://host.com/"] =
342 "<html><head><script></script><script></script><script></script>"
343 "<script></script><script></script><script></script><script></script>"
344 "</head><body><img src=\"blah.gif\">"
345 "<img src=\"http://host2.com/blah.gif\"></body></html>";
346
347 expected_features.Clear();
348 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
349 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix);
350 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5);
351
352 features.Clear();
353 LoadURL("http://host.com/");
354 ASSERT_TRUE(ExtractFeatures(&features));
355 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
356 }
357
358 TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
359 // Test that features are aggregated across all frames.
360 responses_["http://host.com/"] =
361 "<html><body><input type=text><a href=\"info.html\">link</a>"
362 "<iframe src=\"http://host2.com/\"></iframe>"
363 "<iframe src=\"http://host3.com/\"></iframe>"
364 "</body></html>";
365
366 responses_["http://host2.com/"] =
367 "<html><head><script></script><body>"
368 "<form action=\"http://host4.com/\"><input type=checkbox></form>"
369 "<form action=\"http://host2.com/submit\"></form>"
370 "<a href=\"http://www.host2.com/home\">link</a>"
371 "<iframe src=\"nested.html\"></iframe>"
372 "<body></html>";
373
374 responses_["http://host2.com/nested.html"] =
375 "<html><body><input type=password>"
376 "<a href=\"https://host4.com/\">link</a>"
377 "<a href=\"relative\">another</a>"
378 "</body></html>";
379
380 responses_["http://host3.com/"] =
381 "<html><head><script></script><body>"
382 "<img src=\"http://host.com/123.png\">"
383 "</body></html>";
384
385 FeatureMap expected_features;
386 expected_features.AddBooleanFeature(features::kPageHasForms);
387 // Form action domains are compared to the URL of the document they're in,
388 // not the URL of the toplevel page. So http://host2.com/ has two form
389 // actions, one of which is external.
390 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
391 expected_features.AddBooleanFeature(features::kPageHasTextInputs);
392 expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
393 expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
394 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
395 expected_features.AddBooleanFeature(features::kPageLinkDomain +
396 std::string("host4.com"));
397 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25);
398 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
399 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0);
400
401 FeatureMap features;
402 LoadURL("http://host.com/");
403 ASSERT_TRUE(ExtractFeatures(&features));
404 EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
405 }
406
407 // TODO(bryner): Test extraction with multiple passes, including the case where
408 // the node we stopped on is removed from the document.
409
410 } // namespace safe_browsing
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698