Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(178)

Unified Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h

Issue 2878046: Add an extractor for DOM features to be used for client side phishing detection. (Closed)
Patch Set: address marria's comments Created 10 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h
diff --git a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h
new file mode 100644
index 0000000000000000000000000000000000000000..bc9d5996aa2ea7d8632510af4536aa3f846c4cb8
--- /dev/null
+++ b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h
@@ -0,0 +1,128 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// PhishingDOMFeatureExtractor handles computing DOM-based features for the
+// client-side phishing detection model. These include the presence of various
+// types of elements, ratios of external and secure links, and tokens for
+// external domains linked to.
+
+#ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
+#define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/callback.h"
+#include "base/scoped_ptr.h"
+#include "base/task.h"
+
+class GURL;
+class RenderView;
+
+namespace WebKit {
+class WebElement;
+class WebFrame;
+}
+
+namespace safe_browsing {
+class FeatureMap;
+
+class PhishingDOMFeatureExtractor {
+ public:
+ // Callback to be run when feature extraction finishes. The callback
+ // argument is true if extraction was successful, false otherwise.
+ typedef Callback1<bool>::Type DoneCallback;
+
+ // Creates a PhishingDOMFeatureExtractor for the specified RenderView.
+ // The PhishingDOMFeatureExtrator should be destroyed prior to destroying
+ // the RenderView.
+ explicit PhishingDOMFeatureExtractor(RenderView* render_view);
+ ~PhishingDOMFeatureExtractor();
+
+ // Begins extracting features into the given FeatureMap for the page
+ // currently loaded in this object's RenderView. To avoid blocking the
+ // render thread for too long, the feature extractor may run in several
+ // chunks of work, posting a task to the current MessageLoop to continue
+ // processing. Once feature extraction is complete, |done_callback|
+ // is run. PhishingDOMFeatureExtractor takes ownership of the callback.
+ void ExtractFeatures(FeatureMap* features, DoneCallback* done_callback);
+
+ // Cancels any pending feature extraction. The DoneCallback will not be run.
+ // Must be called if there is a feature extraction in progress when the page
+ // is unloaded or the PhishingDOMFeatureExtractor is destroyed.
+ void CancelPendingExtraction();
+
+ private:
+ struct FrameData;
+ struct PageFeatureState;
+
+ // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs
+ // until a predefined maximum amount of time has elapsed, then posts a task
+ // to the current MessageLoop to continue extraction. When extraction
+ // finishes, calls RunCallback().
+ void ExtractFeaturesWithTimeout();
+
+ // Handlers for the various HTML elements that we compute features for.
+ // Since some of the features (such as ratios) cannot be computed until
+ // feature extraction is finished, these handlers do not add to the feature
+ // map directly. Instead, they update the values in the PageFeatureState.
+ void HandleLink(const WebKit::WebElement& element);
+ void HandleForm(const WebKit::WebElement& element);
+ void HandleImage(const WebKit::WebElement& element);
+ void HandleInput(const WebKit::WebElement& element);
+ void HandleScript(const WebKit::WebElement& element);
+
+ // Helper to verify that there is no pending feature extraction. Dies in
+ // debug builds if the state is not as expected. This is a no-op in release
+ // builds.
+ void CheckNoPendingExtraction();
+
+ // Runs |done_callback_| and then clears all internal state.
+ void RunCallback(bool success);
+
+ // Clears all internal feature extraction state.
+ void Clear();
+
+ // Called after advancing |cur_frame_| to update the state in
+ // |cur_frame_data_|. Returns true if the state was updated successfully.
+ bool ResetFrameData();
+
+ // Given a URL, checks whether the domain is different from the domain of
+ // the current frame's URL. If so, stores the domain in |domain| and returns
+ // true, otherwise returns false.
+ bool IsExternalDomain(const GURL& url, std::string* domain) const;
+
+ // Called once all frames have been processed to compute features from the
+ // PageFeatureState and add them to |features_|. See features.h for a
+ // description of which features are computed.
+ void InsertFeatures();
+
+ // Non-owned pointer to the view that we will extract features from.
+ RenderView* render_view_;
+
+ // The output parameters from the most recent call to ExtractFeatures().
+ FeatureMap* features_; // The caller keeps ownership of this.
+ scoped_ptr<DoneCallback> done_callback_;
+
+ // Non-owned pointer to the current frame that we are processing.
+ WebKit::WebFrame* cur_frame_;
+
+ // Stores extra state for |cur_frame_| that will be persisted until we
+ // advance to the next frame.
+ scoped_ptr<FrameData> cur_frame_data_;
+
+ // Stores the intermediate data used to create features. This data is
+ // accumulated across all frames in the RenderView.
+ scoped_ptr<PageFeatureState> page_feature_state_;
+
+ // Used to create ExtractFeaturesWithTimeout tasks.
+ // These tasks are revoked if extraction is cancelled.
+ ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_;
+
+ DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor);
+};
+
+} // namespace safe_browsing
+
+#endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
« no previous file with comments | « chrome/renderer/safe_browsing/features_unittest.cc ('k') | chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698