| Index: chrome/renderer/safe_browsing/phishing_dom_feature_extractor_unittest.cc
|
| diff --git a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_unittest.cc b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_unittest.cc
|
| new file mode 100644
|
| index 0000000000000000000000000000000000000000..637b2bde2c2b64692bddddff72f333b2ad405161
|
| --- /dev/null
|
| +++ b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor_unittest.cc
|
| @@ -0,0 +1,410 @@
|
| +// Copyright (c) 2010 The Chromium Authors. All rights reserved.
|
| +// Use of this source code is governed by a BSD-style license that can be
|
| +// found in the LICENSE file.
|
| +
|
| +#include "chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h"
|
| +
|
| +#include <string.h> // for memcpy()
|
| +#include <map>
|
| +#include <string>
|
| +
|
| +#include "base/callback.h"
|
| +#include "base/command_line.h"
|
| +#include "base/message_loop.h"
|
| +#include "base/process.h"
|
| +#include "base/string_util.h"
|
| +#include "chrome/common/main_function_params.h"
|
| +#include "chrome/common/render_messages.h"
|
| +#include "chrome/common/sandbox_init_wrapper.h"
|
| +#include "chrome/renderer/mock_render_process.h"
|
| +#include "chrome/renderer/render_thread.h"
|
| +#include "chrome/renderer/render_view.h"
|
| +#include "chrome/renderer/render_view_visitor.h"
|
| +#include "chrome/renderer/renderer_main_platform_delegate.h"
|
| +#include "chrome/renderer/safe_browsing/features.h"
|
| +#include "googleurl/src/gurl.h"
|
| +#include "ipc/ipc_channel.h"
|
| +#include "testing/gmock/include/gmock/gmock.h"
|
| +#include "testing/gtest/include/gtest/gtest.h"
|
| +#include "third_party/WebKit/WebKit/chromium/public/WebFrame.h"
|
| +#include "third_party/WebKit/WebKit/chromium/public/WebURLRequest.h"
|
| +#include "third_party/WebKit/WebKit/chromium/public/WebView.h"
|
| +#include "webkit/glue/webkit_glue.h"
|
| +
|
| +using ::testing::ContainerEq;
|
| +
|
| +namespace safe_browsing {
|
| +
|
| +class PhishingDOMFeatureExtractorTest : public ::testing::Test,
|
| + public IPC::Channel::Listener,
|
| + public RenderViewVisitor {
|
| + public:
|
| + // IPC::Channel::Listener implementation.
|
| + virtual void OnMessageReceived(const IPC::Message& message) {
|
| + IPC_BEGIN_MESSAGE_MAP(PhishingDOMFeatureExtractorTest, message)
|
| + IPC_MESSAGE_HANDLER(ViewHostMsg_RenderViewReady, OnRenderViewReady)
|
| + IPC_MESSAGE_HANDLER(ViewHostMsg_DidStopLoading, OnDidStopLoading)
|
| + IPC_MESSAGE_HANDLER(ViewHostMsg_RequestResource, OnRequestResource)
|
| + IPC_END_MESSAGE_MAP()
|
| + }
|
| +
|
| + // RenderViewVisitor implementation.
|
| + virtual bool Visit(RenderView* render_view) {
|
| + view_ = render_view;
|
| + return false;
|
| + }
|
| +
|
| + protected:
|
| + virtual void SetUp() {
|
| + // Set up the renderer. This code is largely adapted from
|
| + // render_view_test.cc and renderer_main.cc. Note that we use a
|
| + // MockRenderProcess (because we don't need to use IPC for painting),
|
| + // but we use a real RenderThread so that we can use the ResourceDispatcher
|
| + // to fetch network resources. These are then served canned content
|
| + // in OnRequestResource().
|
| + sandbox_init_wrapper_.reset(new SandboxInitWrapper);
|
| + command_line_.reset(new CommandLine(CommandLine::ARGUMENTS_ONLY));
|
| + params_.reset(new MainFunctionParams(*command_line_,
|
| + *sandbox_init_wrapper_, NULL));
|
| + platform_.reset(new RendererMainPlatformDelegate(*params_));
|
| + platform_->PlatformInitialize();
|
| +
|
| + // We use a new IPC channel name for each test that runs.
|
| + // This is necessary because the renderer-side IPC channel is not
|
| + // shut down when the RenderThread goes away, so attempting to reuse
|
| + // the channel name gives an error (see ChildThread::~ChildThread()).
|
| + std::string thread_name = StringPrintf(
|
| + "phishing_dom_feature_Extractor_unittest.%d",
|
| + next_thread_id_++);
|
| + channel_.reset(new IPC::Channel(thread_name,
|
| + IPC::Channel::MODE_SERVER, this));
|
| + ASSERT_TRUE(channel_->Connect());
|
| +
|
| + webkit_glue::SetJavaScriptFlags(L"--expose-gc");
|
| + mock_process_.reset(new MockRenderProcess);
|
| + render_thread_ = new RenderThread(thread_name);
|
| + mock_process_->set_main_thread(render_thread_);
|
| +
|
| + // Tell the renderer to create a view, then wait until it's ready.
|
| + // We can't call View::Create() directly here or else we won't get
|
| + // RenderProcess's lazy initialization of WebKit.
|
| + view_ = NULL;
|
| + ViewMsg_New_Params params;
|
| + params.parent_window = 0;
|
| + params.view_id = kViewId;
|
| + params.session_storage_namespace_id = kInvalidSessionStorageNamespaceId;
|
| + ASSERT_TRUE(channel_->Send(new ViewMsg_New(params)));
|
| + msg_loop_.Run();
|
| +
|
| + extractor_.reset(new PhishingDOMFeatureExtractor(view_));
|
| + }
|
| +
|
| + virtual void TearDown() {
|
| + // Try very hard to collect garbage before shutting down.
|
| + GetMainFrame()->collectGarbage();
|
| + GetMainFrame()->collectGarbage();
|
| +
|
| + ASSERT_TRUE(channel_->Send(new ViewMsg_Close(kViewId)));
|
| + do {
|
| + msg_loop_.RunAllPending();
|
| + view_ = NULL;
|
| + RenderView::ForEach(this);
|
| + } while (view_);
|
| +
|
| + mock_process_.reset();
|
| + msg_loop_.RunAllPending();
|
| + platform_->PlatformUninitialize();
|
| + platform_.reset();
|
| + command_line_.reset();
|
| + sandbox_init_wrapper_.reset();
|
| + }
|
| +
|
| + // Returns the main WebFrame for our RenderView.
|
| + WebKit::WebFrame* GetMainFrame() {
|
| + return view_->webview()->mainFrame();
|
| + }
|
| +
|
| + // Loads |url| into the RenderView, waiting for the load to finish.
|
| + void LoadURL(const std::string& url) {
|
| + GetMainFrame()->loadRequest(WebKit::WebURLRequest(GURL(url)));
|
| + msg_loop_.Run();
|
| + }
|
| +
|
| + // Runs the DOMFeatureExtractor on the RenderView, waiting for the
|
| + // completion callback. Returns the success boolean from the callback.
|
| + bool ExtractFeatures(FeatureMap* features) {
|
| + success_ = false;
|
| + extractor_->ExtractFeatures(
|
| + features,
|
| + NewCallback(this, &PhishingDOMFeatureExtractorTest::ExtractionDone));
|
| + msg_loop_.Run();
|
| + return success_;
|
| + }
|
| +
|
| + // Completion callback for feature extraction.
|
| + void ExtractionDone(bool success) {
|
| + success_ = success;
|
| + msg_loop_.Quit();
|
| + }
|
| +
|
| + // IPC message handlers below
|
| +
|
| + // Notification that page load has finished. Exit the message loop
|
| + // so that the test can continue.
|
| + void OnDidStopLoading() {
|
| + msg_loop_.Quit();
|
| + }
|
| +
|
| + // Notification that the renderer wants to load a resource.
|
| + // If the requested url is in responses_, we send the renderer a 200
|
| + // and the supplied content, otherwise we send it a 404 error.
|
| + void OnRequestResource(const IPC::Message& message,
|
| + int request_id,
|
| + const ViewHostMsg_Resource_Request& request_data) {
|
| + std::string headers, body;
|
| + std::map<std::string, std::string>::const_iterator it =
|
| + responses_.find(request_data.url.spec());
|
| + if (it == responses_.end()) {
|
| + headers = "HTTP/1.1 404 Not Found\0Content-Type:text/html\0\0";
|
| + body = "content not found";
|
| + } else {
|
| + headers = "HTTP/1.1 200 OK\0Content-Type:text/html\0\0";
|
| + body = it->second;
|
| + }
|
| +
|
| + ResourceResponseHead response_head;
|
| + response_head.headers = new net::HttpResponseHeaders(headers);
|
| + response_head.mime_type = "text/html";
|
| + ASSERT_TRUE(channel_->Send(new ViewMsg_Resource_ReceivedResponse(
|
| + message.routing_id(), request_id, response_head)));
|
| +
|
| + base::SharedMemory shared_memory;
|
| + ASSERT_TRUE(shared_memory.Create(std::wstring(), false,
|
| + false, body.size()));
|
| + ASSERT_TRUE(shared_memory.Map(body.size()));
|
| + memcpy(shared_memory.memory(), body.data(), body.size());
|
| +
|
| + base::SharedMemoryHandle handle;
|
| + ASSERT_TRUE(shared_memory.GiveToProcess(base::Process::Current().handle(),
|
| + &handle));
|
| + ASSERT_TRUE(channel_->Send(new ViewMsg_Resource_DataReceived(
|
| + message.routing_id(), request_id, handle, body.size())));
|
| +
|
| + ASSERT_TRUE(channel_->Send(new ViewMsg_Resource_RequestComplete(
|
| + message.routing_id(),
|
| + request_id,
|
| + URLRequestStatus(),
|
| + std::string())));
|
| + }
|
| +
|
| + // Notification that the render view we've created is ready to use.
|
| + void OnRenderViewReady() {
|
| + // Grab a pointer to the new view using RenderViewVisitor.
|
| + ASSERT_TRUE(!view_);
|
| + RenderView::ForEach(this);
|
| + ASSERT_TRUE(view_);
|
| + msg_loop_.Quit();
|
| + }
|
| +
|
| + static int next_thread_id_; // incrementing counter for thread ids
|
| + static const int32 kViewId = 5; // arbitrary id for our testing view
|
| +
|
| + MessageLoopForIO msg_loop_;
|
| + // channel that the renderer uses to talk to the browser.
|
| + // For this test, we will handle the browser end of the channel.
|
| + scoped_ptr<IPC::Channel> channel_;
|
| + RenderThread* render_thread_; // owned by mock_process_
|
| + scoped_ptr<MockRenderProcess> mock_process_;
|
| + RenderView* view_; // not owned, deletes itself on close
|
| + scoped_ptr<RendererMainPlatformDelegate> platform_;
|
| + scoped_ptr<MainFunctionParams> params_;
|
| + scoped_ptr<CommandLine> command_line_;
|
| + scoped_ptr<SandboxInitWrapper> sandbox_init_wrapper_;
|
| +
|
| + scoped_ptr<PhishingDOMFeatureExtractor> extractor_;
|
| + // Map of URL -> response body for network requests from the renderer.
|
| + // Any URLs not in this map are served a 404 error.
|
| + std::map<std::string, std::string> responses_;
|
| + bool success_; // holds the success value from ExtractFeatures
|
| +};
|
| +
|
| +int PhishingDOMFeatureExtractorTest::next_thread_id_ = 0;
|
| +
|
| +TEST_F(PhishingDOMFeatureExtractorTest, FormFeatures) {
|
| + responses_["http://host.com/"] =
|
| + "<html><head><body>"
|
| + "<form action=\"query\"><input type=text><input type=checkbox></form>"
|
| + "<form action=\"http://cgi.host.com/submit\"></form>"
|
| + "<form action=\"http://other.com/\"></form>"
|
| + "<form action=\"query\"></form>"
|
| + "<form></form></body></html>";
|
| +
|
| + FeatureMap expected_features;
|
| + expected_features.AddBooleanFeature(features::kPageHasForms);
|
| + expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.25);
|
| + expected_features.AddBooleanFeature(features::kPageHasTextInputs);
|
| + expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
|
| +
|
| + FeatureMap features;
|
| + LoadURL("http://host.com/");
|
| + ASSERT_TRUE(ExtractFeatures(&features));
|
| + EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
|
| +
|
| + responses_["http://host.com/"] =
|
| + "<html><head><body>"
|
| + "<input type=\"radio\"><input type=password></body></html>";
|
| +
|
| + expected_features.Clear();
|
| + expected_features.AddBooleanFeature(features::kPageHasRadioInputs);
|
| + expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
|
| +
|
| + features.Clear();
|
| + LoadURL("http://host.com/");
|
| + ASSERT_TRUE(ExtractFeatures(&features));
|
| + EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
|
| +
|
| + responses_["http://host.com/"] =
|
| + "<html><head><body><input></body></html>";
|
| +
|
| + expected_features.Clear();
|
| + expected_features.AddBooleanFeature(features::kPageHasTextInputs);
|
| +
|
| + features.Clear();
|
| + LoadURL("http://host.com/");
|
| + ASSERT_TRUE(ExtractFeatures(&features));
|
| + EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
|
| +
|
| + responses_["http://host.com/"] =
|
| + "<html><head><body><input type=\"invalid\"></body></html>";
|
| +
|
| + expected_features.Clear();
|
| + expected_features.AddBooleanFeature(features::kPageHasTextInputs);
|
| +
|
| + features.Clear();
|
| + LoadURL("http://host.com/");
|
| + ASSERT_TRUE(ExtractFeatures(&features));
|
| + EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
|
| +}
|
| +
|
| +TEST_F(PhishingDOMFeatureExtractorTest, LinkFeatures) {
|
| + responses_["http://www.host.com/"] =
|
| + "<html><head><body>"
|
| + "<a href=\"http://www2.host.com/abc\">link</a>"
|
| + "<a name=page_anchor></a>"
|
| + "<a href=\"http://www.chromium.org/\">chromium</a>"
|
| + "</body></html";
|
| +
|
| + FeatureMap expected_features;
|
| + expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.5);
|
| + expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.0);
|
| + expected_features.AddBooleanFeature(features::kPageLinkDomain +
|
| + std::string("chromium.org"));
|
| +
|
| + FeatureMap features;
|
| + LoadURL("http://www.host.com/");
|
| + ASSERT_TRUE(ExtractFeatures(&features));
|
| + EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
|
| +
|
| + responses_.clear();
|
| + responses_["https://www.host.com/"] =
|
| + "<html><head><body>"
|
| + "<a href=\"login\">this is secure</a>"
|
| + "<a href=\"http://host.com\">not secure</a>"
|
| + "<a href=\"https://www2.host.com/login\">also secure</a>"
|
| + "<a href=\"http://chromium.org/\">also not secure</a>"
|
| + "</body></html>";
|
| +
|
| + expected_features.Clear();
|
| + expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
|
| + expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.5);
|
| + expected_features.AddBooleanFeature(features::kPageLinkDomain +
|
| + std::string("chromium.org"));
|
| +
|
| + features.Clear();
|
| + LoadURL("https://www.host.com/");
|
| + ASSERT_TRUE(ExtractFeatures(&features));
|
| + EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
|
| +}
|
| +
|
| +TEST_F(PhishingDOMFeatureExtractorTest, ScriptAndImageFeatures) {
|
| + responses_["http://host.com/"] =
|
| + "<html><head><script></script><script></script></head></html>";
|
| +
|
| + FeatureMap expected_features;
|
| + expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
|
| +
|
| + FeatureMap features;
|
| + LoadURL("http://host.com/");
|
| + ASSERT_TRUE(ExtractFeatures(&features));
|
| + EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
|
| +
|
| + responses_["http://host.com/"] =
|
| + "<html><head><script></script><script></script><script></script>"
|
| + "<script></script><script></script><script></script><script></script>"
|
| + "</head><body><img src=\"blah.gif\">"
|
| + "<img src=\"http://host2.com/blah.gif\"></body></html>";
|
| +
|
| + expected_features.Clear();
|
| + expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
|
| + expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTSix);
|
| + expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 0.5);
|
| +
|
| + features.Clear();
|
| + LoadURL("http://host.com/");
|
| + ASSERT_TRUE(ExtractFeatures(&features));
|
| + EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
|
| +}
|
| +
|
| +TEST_F(PhishingDOMFeatureExtractorTest, SubFrames) {
|
| + // Test that features are aggregated across all frames.
|
| + responses_["http://host.com/"] =
|
| + "<html><body><input type=text><a href=\"info.html\">link</a>"
|
| + "<iframe src=\"http://host2.com/\"></iframe>"
|
| + "<iframe src=\"http://host3.com/\"></iframe>"
|
| + "</body></html>";
|
| +
|
| + responses_["http://host2.com/"] =
|
| + "<html><head><script></script><body>"
|
| + "<form action=\"http://host4.com/\"><input type=checkbox></form>"
|
| + "<form action=\"http://host2.com/submit\"></form>"
|
| + "<a href=\"http://www.host2.com/home\">link</a>"
|
| + "<iframe src=\"nested.html\"></iframe>"
|
| + "<body></html>";
|
| +
|
| + responses_["http://host2.com/nested.html"] =
|
| + "<html><body><input type=password>"
|
| + "<a href=\"https://host4.com/\">link</a>"
|
| + "<a href=\"relative\">another</a>"
|
| + "</body></html>";
|
| +
|
| + responses_["http://host3.com/"] =
|
| + "<html><head><script></script><body>"
|
| + "<img src=\"http://host.com/123.png\">"
|
| + "</body></html>";
|
| +
|
| + FeatureMap expected_features;
|
| + expected_features.AddBooleanFeature(features::kPageHasForms);
|
| + // Form action domains are compared to the URL of the document they're in,
|
| + // not the URL of the toplevel page. So http://host2.com/ has two form
|
| + // actions, one of which is external.
|
| + expected_features.AddRealFeature(features::kPageActionOtherDomainFreq, 0.5);
|
| + expected_features.AddBooleanFeature(features::kPageHasTextInputs);
|
| + expected_features.AddBooleanFeature(features::kPageHasPswdInputs);
|
| + expected_features.AddBooleanFeature(features::kPageHasCheckInputs);
|
| + expected_features.AddRealFeature(features::kPageExternalLinksFreq, 0.25);
|
| + expected_features.AddBooleanFeature(features::kPageLinkDomain +
|
| + std::string("host4.com"));
|
| + expected_features.AddRealFeature(features::kPageSecureLinksFreq, 0.25);
|
| + expected_features.AddBooleanFeature(features::kPageNumScriptTagsGTOne);
|
| + expected_features.AddRealFeature(features::kPageImgOtherDomainFreq, 1.0);
|
| +
|
| + FeatureMap features;
|
| + LoadURL("http://host.com/");
|
| + ASSERT_TRUE(ExtractFeatures(&features));
|
| + EXPECT_THAT(features.features(), ContainerEq(expected_features.features()));
|
| +}
|
| +
|
| +// TODO(bryner): Test extraction with multiple passes, including the case where
|
| +// the node we stopped on is removed from the document.
|
| +
|
| +} // namespace safe_browsing
|
|
|