Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1339)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h

Issue 2878046: Add an extractor for DOM features to be used for client side phishing detection. (Closed)
Patch Set: address marria's comments Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the
6 // client-side phishing detection model. These include the presence of various
7 // types of elements, ratios of external and secure links, and tokens for
8 // external domains linked to.
9
10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
12
13 #include <string>
14
15 #include "base/basictypes.h"
16 #include "base/callback.h"
17 #include "base/scoped_ptr.h"
18 #include "base/task.h"
19
20 class GURL;
21 class RenderView;
22
23 namespace WebKit {
24 class WebElement;
25 class WebFrame;
26 }
27
28 namespace safe_browsing {
29 class FeatureMap;
30
31 class PhishingDOMFeatureExtractor {
32 public:
33 // Callback to be run when feature extraction finishes. The callback
34 // argument is true if extraction was successful, false otherwise.
35 typedef Callback1<bool>::Type DoneCallback;
36
37 // Creates a PhishingDOMFeatureExtractor for the specified RenderView.
38 // The PhishingDOMFeatureExtrator should be destroyed prior to destroying
39 // the RenderView.
40 explicit PhishingDOMFeatureExtractor(RenderView* render_view);
41 ~PhishingDOMFeatureExtractor();
42
43 // Begins extracting features into the given FeatureMap for the page
44 // currently loaded in this object's RenderView. To avoid blocking the
45 // render thread for too long, the feature extractor may run in several
46 // chunks of work, posting a task to the current MessageLoop to continue
47 // processing. Once feature extraction is complete, |done_callback|
48 // is run. PhishingDOMFeatureExtractor takes ownership of the callback.
49 void ExtractFeatures(FeatureMap* features, DoneCallback* done_callback);
50
51 // Cancels any pending feature extraction. The DoneCallback will not be run.
52 // Must be called if there is a feature extraction in progress when the page
53 // is unloaded or the PhishingDOMFeatureExtractor is destroyed.
54 void CancelPendingExtraction();
55
56 private:
57 struct FrameData;
58 struct PageFeatureState;
59
60 // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs
61 // until a predefined maximum amount of time has elapsed, then posts a task
62 // to the current MessageLoop to continue extraction. When extraction
63 // finishes, calls RunCallback().
64 void ExtractFeaturesWithTimeout();
65
66 // Handlers for the various HTML elements that we compute features for.
67 // Since some of the features (such as ratios) cannot be computed until
68 // feature extraction is finished, these handlers do not add to the feature
69 // map directly. Instead, they update the values in the PageFeatureState.
70 void HandleLink(const WebKit::WebElement& element);
71 void HandleForm(const WebKit::WebElement& element);
72 void HandleImage(const WebKit::WebElement& element);
73 void HandleInput(const WebKit::WebElement& element);
74 void HandleScript(const WebKit::WebElement& element);
75
76 // Helper to verify that there is no pending feature extraction. Dies in
77 // debug builds if the state is not as expected. This is a no-op in release
78 // builds.
79 void CheckNoPendingExtraction();
80
81 // Runs |done_callback_| and then clears all internal state.
82 void RunCallback(bool success);
83
84 // Clears all internal feature extraction state.
85 void Clear();
86
87 // Called after advancing |cur_frame_| to update the state in
88 // |cur_frame_data_|. Returns true if the state was updated successfully.
89 bool ResetFrameData();
90
91 // Given a URL, checks whether the domain is different from the domain of
92 // the current frame's URL. If so, stores the domain in |domain| and returns
93 // true, otherwise returns false.
94 bool IsExternalDomain(const GURL& url, std::string* domain) const;
95
96 // Called once all frames have been processed to compute features from the
97 // PageFeatureState and add them to |features_|. See features.h for a
98 // description of which features are computed.
99 void InsertFeatures();
100
101 // Non-owned pointer to the view that we will extract features from.
102 RenderView* render_view_;
103
104 // The output parameters from the most recent call to ExtractFeatures().
105 FeatureMap* features_; // The caller keeps ownership of this.
106 scoped_ptr<DoneCallback> done_callback_;
107
108 // Non-owned pointer to the current frame that we are processing.
109 WebKit::WebFrame* cur_frame_;
110
111 // Stores extra state for |cur_frame_| that will be persisted until we
112 // advance to the next frame.
113 scoped_ptr<FrameData> cur_frame_data_;
114
115 // Stores the intermediate data used to create features. This data is
116 // accumulated across all frames in the RenderView.
117 scoped_ptr<PageFeatureState> page_feature_state_;
118
119 // Used to create ExtractFeaturesWithTimeout tasks.
120 // These tasks are revoked if extraction is cancelled.
121 ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_;
122
123 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor);
124 };
125
126 } // namespace safe_browsing
127
128 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
OLDNEW
« no previous file with comments | « chrome/renderer/safe_browsing/features_unittest.cc ('k') | chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698