OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. |
| 4 |
| 5 #include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h" |
| 6 |
| 7 #include <string.h> // for memcpy() |
| 8 #include <map> |
| 9 #include <string> |
| 10 |
| 11 #include "base/callback.h" |
| 12 #include "base/command_line.h" |
| 13 #include "base/message_loop.h" |
| 14 #include "base/process.h" |
| 15 #include "base/string_util.h" |
| 16 #include "chrome/common/main_function_params.h" |
| 17 #include "chrome/common/render_messages.h" |
| 18 #include "chrome/common/sandbox_init_wrapper.h" |
| 19 #include "chrome/renderer/mock_render_process.h" |
| 20 #include "chrome/renderer/render_thread.h" |
| 21 #include "chrome/renderer/render_view.h" |
| 22 #include "chrome/renderer/render_view_visitor.h" |
| 23 #include "chrome/renderer/renderer_main_platform_delegate.h" |
| 24 #include "chrome/renderer/safe_browsing/features.h" |
| 25 #include "googleurl/src/gurl.h" |
| 26 #include "ipc/ipc_channel.h" |
| 27 #include "testing/gmock/include/gmock/gmock.h" |
| 28 #include "testing/gtest/include/gtest/gtest.h" |
| 29 #include "third_party/WebKit/WebKit/chromium/public/WebFrame.h" |
| 30 #include "third_party/WebKit/WebKit/chromium/public/WebURLRequest.h" |
| 31 #include "third_party/WebKit/WebKit/chromium/public/WebView.h" |
| 32 #include "webkit/glue/webkit_glue.h" |
| 33 |
| 34 using ::testing::ContainerEq; |
| 35 |
| 36 namespace safe_browsing { |
| 37 |
| 38 class PhishingDOMFeatureExtractorTest : public ::testing::Test, |
| 39 public IPC::Channel::Listener, |
| 40 public RenderViewVisitor { |
| 41 public: |
| 42 // IPC::Channel::Listener implementation. |
| 43 virtual void OnMessageReceived(const IPC::Message& message) { |
| 44 IPC_BEGIN_MESSAGE_MAP(PhishingDOMFeatureExtractorTest, message) |
| 45 IPC_MESSAGE_HANDLER(ViewHostMsg_RenderViewReady, OnRenderViewReady) |
| 46 IPC_MESSAGE_HANDLER(ViewHostMsg_DidStopLoading, OnDidStopLoading) |
| 47 IPC_MESSAGE_HANDLER(ViewHostMsg_RequestResource, OnRequestResource) |
| 48 IPC_END_MESSAGE_MAP() |
| 49 } |
| 50 |
| 51 // RenderViewVisitor implementation. |
| 52 virtual bool Visit(RenderView* render_view) { |
| 53 view_ = render_view; |
| 54 return false; |
| 55 } |
| 56 |
| 57 protected: |
| 58 virtual void SetUp() { |
| 59 // Set up the renderer. This code is largely adapted from |
| 60 // render_view_test.cc and renderer_main.cc. Note that we use a |
| 61 // MockRenderProcess (because we don't need to use IPC for painting), |
| 62 // but we use a real RenderThread so that we can use the ResourceDispatcher |
| 63 // to fetch network resources. These are then served canned content |
| 64 // in OnRequestResource(). |
| 65 sandbox_init_wrapper_.reset(new SandboxInitWrapper); |
| 66 command_line_.reset(new CommandLine(CommandLine::ARGUMENTS_ONLY)); |
| 67 params_.reset(new MainFunctionParams(*command_line_, |
| 68 *sandbox_init_wrapper_, NULL)); |
| 69 platform_.reset(new RendererMainPlatformDelegate(*params_)); |
| 70 platform_->PlatformInitialize(); |
| 71 |
| 72 // We use a new IPC channel name for each test that runs. |
| 73 // This is necessary because the renderer-side IPC channel is not |
| 74 // shut down when the RenderThread goes away, so attempting to reuse |
| 75 // the channel name gives an error (see ChildThread::~ChildThread()). |
| 76 std::string thread_name = StringPrintf( |
| 77 "phishing_dom_feature_Extractor_unittest.%d", |
| 78 next_thread_id_++); |
| 79 channel_.reset(new IPC::Channel(thread_name, |
| 80 IPC::Channel::MODE_SERVER, this)); |
| 81 ASSERT_TRUE(channel_->Connect()); |
| 82 |
| 83 webkit_glue::SetJavaScriptFlags(L"--expose-gc"); |
| 84 mock_process_.reset(new MockRenderProcess); |
| 85 render_thread_ = new RenderThread(thread_name); |
| 86 mock_process_->set_main_thread(render_thread_); |
| 87 |
| 88 // Tell the renderer to create a view, then wait until it's ready. |
| 89 // We can't call View::Create() directly here or else we won't get |
| 90 // RenderProcess's lazy initialization of WebKit. |
| 91 view_ = NULL; |
| 92 ViewMsg_New_Params params; |
| 93 params.parent_window = 0; |
| 94 params.view_id = kViewId; |
| 95 params.session_storage_namespace_id = kInvalidSessionStorageNamespaceId; |
| 96 ASSERT_TRUE(channel_->Send(new ViewMsg_New(params))); |
| 97 msg_loop_.Run(); |
| 98 |
| 99 extractor_.reset(new PhishingDOMFeatureExtractor(view_)); |
| 100 } |
| 101 |
| 102 virtual void TearDown() { |
| 103 // Try very hard to collect garbage before shutting down. |
| 104 GetMainFrame()->collectGarbage(); |
| 105 GetMainFrame()->collectGarbage(); |
| 106 |
| 107 ASSERT_TRUE(channel_->Send(new ViewMsg_Close(kViewId))); |
| 108 do { |
| 109 msg_loop_.RunAllPending(); |
| 110 view_ = NULL; |
| 111 RenderView::ForEach(this); |
| 112 } while (view_); |
| 113 |
| 114 mock_process_.reset(); |
| 115 msg_loop_.RunAllPending(); |
| 116 platform_->PlatformUninitialize(); |
| 117 platform_.reset(); |
| 118 command_line_.reset(); |
| 119 sandbox_init_wrapper_.reset(); |
| 120 } |
| 121 |
| 122 // Returns the main WebFrame for our RenderView. |
| 123 WebKit::WebFrame* GetMainFrame() { |
| 124 return view_->webview()->mainFrame(); |
| 125 } |
| 126 |
| 127 // Loads |url| into the RenderView, waiting for the load to finish. |
| 128 void LoadURL(const std::string& url) { |
| 129 GetMainFrame()->loadRequest(WebKit::WebURLRequest(GURL(url))); |
| 130 msg_loop_.Run(); |
| 131 } |
| 132 |
| 133 // Runs the DOMFeatureExtractor on the RenderView, waiting for the |
| 134 // completion callback. Returns the success boolean from the callback. |
| 135 bool ExtractFeatures(FeatureMap* features) { |
| 136 success_ = false; |
| 137 extractor_->ExtractFeatures( |
| 138 features, |
| 139 NewCallback(this, &PhishingDOMFeatureExtractorTest::ExtractionDone)); |
| 140 msg_loop_.Run(); |
| 141 return success_; |
| 142 } |
| 143 |
| 144 // Completion callback for feature extraction. |
| 145 void ExtractionDone(bool success) { |
| 146 success_ = success; |
| 147 msg_loop_.Quit(); |
| 148 } |
| 149 |
| 150 // IPC message handlers below |
| 151 |
| 152 // Notification that page load has finished. Exit the message loop |
| 153 // so that the test can continue. |
| 154 void OnDidStopLoading() { |
| 155 msg_loop_.Quit(); |
| 156 } |
| 157 |
| 158 // Notification that the renderer wants to load a resource. |
| 159 // If the requested url is in responses_, we send the renderer a 200 |
| 160 // and the supplied content, otherwise we send it a 404 error. |
| 161 void OnRequestResource(const IPC::Message& message, |
| 162 int request_id, |
| 163 const ViewHostMsg_Resource_Request& request_data) { |
| 164 std::string headers, body; |
| 165 std::map<std::string, std::string>::const_iterator it = |
| 166 responses_.find(request_data.url.spec()); |
| 167 if (it == responses_.end()) { |
| 168 headers = "HTTP/1.1 404 Not Found\0Content-Type:text/html\0\0"; |
| 169 body = "content not found"; |
| 170 } else { |
| 171 headers = "HTTP/1.1 200 OK\0Content-Type:text/html\0\0"; |
| 172 body = it->second; |
| 173 } |
| 174 |
| 175 ResourceResponseHead response_head; |
| 176 response_head.headers = new net::HttpResponseHeaders(headers); |
| 177 response_head.mime_type = "text/html"; |
| 178 ASSERT_TRUE(channel_->Send(new ViewMsg_Resource_ReceivedResponse( |
| 179 message.routing_id(), request_id, response_head))); |
| 180 |
| 181 base::SharedMemory shared_memory; |
| 182 ASSERT_TRUE(shared_memory.Create(std::wstring(), false, |
| 183 false, body.size())); |
| 184 ASSERT_TRUE(shared_memory.Map(body.size())); |
| 185 memcpy(shared_memory.memory(), body.data(), body.size()); |
| 186 |
| 187 base::SharedMemoryHandle handle; |
| 188 ASSERT_TRUE(shared_memory.GiveToProcess(base::Process::Current().handle(), |
| 189 &handle)); |
| 190 ASSERT_TRUE(channel_->Send(new ViewMsg_Resource_DataReceived( |
| 191 message.routing_id(), request_id, handle, body.size()))); |
| 192 |
| 193 ASSERT_TRUE(channel_->Send(new ViewMsg_Resource_RequestComplete( |
| 194 message.routing_id(), |
| 195 request_id, |
| 196 URLRequestStatus(), |
| 197 std::string()))); |
| 198 } |
| 199 |
| 200 // Notification that the render view we've created is ready to use. |
| 201 void OnRenderViewReady() { |
| 202 // Grab a pointer to the new view using RenderViewVisitor. |
| 203 ASSERT_TRUE(!view_); |
| 204 RenderView::ForEach(this); |
| 205 ASSERT_TRUE(view_); |
| 206 msg_loop_.Quit(); |
| 207 } |
| 208 |
| 209 static int next_thread_id_; // incrementing counter for thread ids |
| 210 static const int32 kViewId = 5; // arbitrary id for our testing view |
| 211 |
| 212 MessageLoopForIO msg_loop_; |
| 213 // channel that the renderer uses to talk to the browser. |
| 214 // For this test, we will handle the browser end of the channel. |
| 215 scoped_ptr<IPC::Channel> channel_; |
| 216 RenderThread* render_thread_; // owned by mock_process_ |
| 217 scoped_ptr<MockRenderProcess> mock_process_; |
| 218 RenderView* view_; // not owned, deletes itself on close |
| 219 scoped_ptr<RendererMainPlatformDelegate> platform_; |
| 220 scoped_ptr<MainFunctionParams> params_; |
| 221 scoped_ptr<CommandLine> command_line_; |
| 222 scoped_ptr<SandboxInitWrapper> sandbox_init_wrapper_; |
| 223 |
| 224 scoped_ptr<PhishingDOMFeatureExtractor> extractor_; |
| 225 // Map of URL -> response body for network requests from the renderer. |
| 226 // Any URLs not in this map are served a 404 error. |
| 227 std::map<std::string, std::string> responses_; |
| 228 bool success_; // holds the success value from ExtractFeatures |
| 229 }; |
| 230 |
| 231 int PhishingDOMFeatureExtractorTest::next_thread_id_ = 0; |
| 232 |
| 233 TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) { |
| 234 responses_["http://host.com/"] = |
| 235 "<html><head><body>" |
| 236 "<form action=\"query\"><input type=text><input type=checkbox></form>" |
| 237 "<form action=\"http://cgi.host.com/submit\"></form>" |
| 238 "<form action=\"http://other.com/\"></form>" |
| 239 "<form action=\"query\"></form>" |
| 240 "<form></form></body></html>"; |
| 241 |
| 242 FeatureMap expected_features; |
| 243 expected_features.AddBooleanFeature(features::kPageHasForms); |
| 244 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25); |
| 245 expected_features.AddBooleanFeature(features::kPageHasTextInputs); |
| 246 expected_features.AddBooleanFeature(features::kPageHasCheckInputs); |
| 247 |
| 248 FeatureMap features; |
| 249 LoadURL("http://host.com/"); |
| 250 ASSERT_TRUE(ExtractFeatures(&features)); |
| 251 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 252 |
| 253 responses_["http://host.com/"] = |
| 254 "<html><head><body>" |
| 255 "<input type=\"radio\"><input type=password></body></html>"; |
| 256 |
| 257 expected_features.Clear(); |
| 258 expected_features.AddBooleanFeature(features::kPageHasRadioInputs); |
| 259 expected_features.AddBooleanFeature(features::kPageHasPswdInputs); |
| 260 |
| 261 features.Clear(); |
| 262 LoadURL("http://host.com/"); |
| 263 ASSERT_TRUE(ExtractFeatures(&features)); |
| 264 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 265 |
| 266 responses_["http://host.com/"] = |
| 267 "<html><head><body><input></body></html>"; |
| 268 |
| 269 expected_features.Clear(); |
| 270 expected_features.AddBooleanFeature(features::kPageHasTextInputs); |
| 271 |
| 272 features.Clear(); |
| 273 LoadURL("http://host.com/"); |
| 274 ASSERT_TRUE(ExtractFeatures(&features)); |
| 275 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 276 |
| 277 responses_["http://host.com/"] = |
| 278 "<html><head><body><input type=\"invalid\"></body></html>"; |
| 279 |
| 280 expected_features.Clear(); |
| 281 expected_features.AddBooleanFeature(features::kPageHasTextInputs); |
| 282 |
| 283 features.Clear(); |
| 284 LoadURL("http://host.com/"); |
| 285 ASSERT_TRUE(ExtractFeatures(&features)); |
| 286 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 287 } |
| 288 |
| 289 TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) { |
| 290 responses_["http://www.host.com/"] = |
| 291 "<html><head><body>" |
| 292 "<a href=\"http://www2.host.com/abc\">link</a>" |
| 293 "<a name=page_anchor></a>" |
| 294 "<a href=\"http://www.chromium.org/\">chromium</a>" |
| 295 "</body></html"; |
| 296 |
| 297 FeatureMap expected_features; |
| 298 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5); |
| 299 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0); |
| 300 expected_features.AddBooleanFeature(features::kPageLinkDomain + |
| 301 std::string("chromium.org")); |
| 302 |
| 303 FeatureMap features; |
| 304 LoadURL("http://www.host.com/"); |
| 305 ASSERT_TRUE(ExtractFeatures(&features)); |
| 306 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 307 |
| 308 responses_.clear(); |
| 309 responses_["https://www.host.com/"] = |
| 310 "<html><head><body>" |
| 311 "<a href=\"login\">this is secure</a>" |
| 312 "<a href=\"http://host.com\">not secure</a>" |
| 313 "<a href=\"https://www2.host.com/login\">also secure</a>" |
| 314 "<a href=\"http://chromium.org/\">also not secure</a>" |
| 315 "</body></html>"; |
| 316 |
| 317 expected_features.Clear(); |
| 318 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25); |
| 319 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5); |
| 320 expected_features.AddBooleanFeature(features::kPageLinkDomain + |
| 321 std::string("chromium.org")); |
| 322 |
| 323 features.Clear(); |
| 324 LoadURL("https://www.host.com/"); |
| 325 ASSERT_TRUE(ExtractFeatures(&features)); |
| 326 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 327 } |
| 328 |
| 329 TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) { |
| 330 responses_["http://host.com/"] = |
| 331 "<html><head><script></script><script></script></head></html>"; |
| 332 |
| 333 FeatureMap expected_features; |
| 334 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne); |
| 335 |
| 336 FeatureMap features; |
| 337 LoadURL("http://host.com/"); |
| 338 ASSERT_TRUE(ExtractFeatures(&features)); |
| 339 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 340 |
| 341 responses_["http://host.com/"] = |
| 342 "<html><head><script></script><script></script><script></script>" |
| 343 "<script></script><script></script><script></script><script></script>" |
| 344 "</head><body><img src=\"blah.gif\">" |
| 345 "<img src=\"http://host2.com/blah.gif\"></body></html>"; |
| 346 |
| 347 expected_features.Clear(); |
| 348 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne); |
| 349 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix); |
| 350 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5); |
| 351 |
| 352 features.Clear(); |
| 353 LoadURL("http://host.com/"); |
| 354 ASSERT_TRUE(ExtractFeatures(&features)); |
| 355 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 356 } |
| 357 |
| 358 TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) { |
| 359 // Test that features are aggregated across all frames. |
| 360 responses_["http://host.com/"] = |
| 361 "<html><body><input type=text><a href=\"info.html\">link</a>" |
| 362 "<iframe src=\"http://host2.com/\"></iframe>" |
| 363 "<iframe src=\"http://host3.com/\"></iframe>" |
| 364 "</body></html>"; |
| 365 |
| 366 responses_["http://host2.com/"] = |
| 367 "<html><head><script></script><body>" |
| 368 "<form action=\"http://host4.com/\"><input type=checkbox></form>" |
| 369 "<form action=\"http://host2.com/submit\"></form>" |
| 370 "<a href=\"http://www.host2.com/home\">link</a>" |
| 371 "<iframe src=\"nested.html\"></iframe>" |
| 372 "<body></html>"; |
| 373 |
| 374 responses_["http://host2.com/nested.html"] = |
| 375 "<html><body><input type=password>" |
| 376 "<a href=\"https://host4.com/\">link</a>" |
| 377 "<a href=\"relative\">another</a>" |
| 378 "</body></html>"; |
| 379 |
| 380 responses_["http://host3.com/"] = |
| 381 "<html><head><script></script><body>" |
| 382 "<img src=\"http://host.com/123.png\">" |
| 383 "</body></html>"; |
| 384 |
| 385 FeatureMap expected_features; |
| 386 expected_features.AddBooleanFeature(features::kPageHasForms); |
| 387 // Form action domains are compared to the URL of the document they're in, |
| 388 // not the URL of the toplevel page. So http://host2.com/ has two form |
| 389 // actions, one of which is external. |
| 390 expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5); |
| 391 expected_features.AddBooleanFeature(features::kPageHasTextInputs); |
| 392 expected_features.AddBooleanFeature(features::kPageHasPswdInputs); |
| 393 expected_features.AddBooleanFeature(features::kPageHasCheckInputs); |
| 394 expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25); |
| 395 expected_features.AddBooleanFeature(features::kPageLinkDomain + |
| 396 std::string("host4.com")); |
| 397 expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25); |
| 398 expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne); |
| 399 expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0); |
| 400 |
| 401 FeatureMap features; |
| 402 LoadURL("http://host.com/"); |
| 403 ASSERT_TRUE(ExtractFeatures(&features)); |
| 404 EXPECT_THAT(features.features(), ContainerEq(expected_features.features())); |
| 405 } |
| 406 |
| 407 // TODO(bryner): Test extraction with multiple passes, including the case where |
| 408 // the node we stopped on is removed from the document. |
| 409 |
| 410 } // namespace safe_browsing |
OLD | NEW |