Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(217)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h

Issue 3130039: Limit the time spent on a single iteration of PhishingDOMFeatureExtractor. (Closed) Base URL: http://src.chromium.org/git/chromium.git
Patch Set: tiny comment fix Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the 5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the
6 // client-side phishing detection model. These include the presence of various 6 // client-side phishing detection model. These include the presence of various
7 // types of elements, ratios of external and secure links, and tokens for 7 // types of elements, ratios of external and secure links, and tokens for
8 // external domains linked to. 8 // external domains linked to.
9 9
10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ 10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ 11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
12 12
13 #include <string> 13 #include <string>
14 14
15 #include "base/basictypes.h" 15 #include "base/basictypes.h"
16 #include "base/callback.h" 16 #include "base/callback.h"
17 #include "base/scoped_ptr.h" 17 #include "base/scoped_ptr.h"
18 #include "base/task.h" 18 #include "base/task.h"
19 19
20 class GURL; 20 class GURL;
21 class RenderView; 21 class RenderView;
22 22
23 namespace WebKit { 23 namespace WebKit {
24 class WebElement; 24 class WebElement;
25 class WebFrame; 25 class WebFrame;
26 } 26 }
27 27
28 namespace safe_browsing { 28 namespace safe_browsing {
29 class FeatureExtractorClock;
29 class FeatureMap; 30 class FeatureMap;
30 31
31 class PhishingDOMFeatureExtractor { 32 class PhishingDOMFeatureExtractor {
32 public: 33 public:
33 // Callback to be run when feature extraction finishes. The callback 34 // Callback to be run when feature extraction finishes. The callback
34 // argument is true if extraction was successful, false otherwise. 35 // argument is true if extraction was successful, false otherwise.
35 typedef Callback1<bool>::Type DoneCallback; 36 typedef Callback1<bool>::Type DoneCallback;
36 37
37 // Creates a PhishingDOMFeatureExtractor for the specified RenderView. 38 // Creates a PhishingDOMFeatureExtractor for the specified RenderView.
38 // The PhishingDOMFeatureExtrator should be destroyed prior to destroying 39 // The PhishingDOMFeatureExtrator should be destroyed prior to destroying
39 // the RenderView. 40 // the RenderView. |clock| is used for timing feature extractor operations,
40 explicit PhishingDOMFeatureExtractor(RenderView* render_view); 41 // and may be mocked for testing. PhishingDOMFeatureExtractor takes
42 // ownership of the clock.
43 PhishingDOMFeatureExtractor(RenderView* render_view,
44 FeatureExtractorClock* clock);
41 ~PhishingDOMFeatureExtractor(); 45 ~PhishingDOMFeatureExtractor();
42 46
43 // Begins extracting features into the given FeatureMap for the page 47 // Begins extracting features into the given FeatureMap for the page
44 // currently loaded in this object's RenderView. To avoid blocking the 48 // currently loaded in this object's RenderView. To avoid blocking the
45 // render thread for too long, the feature extractor may run in several 49 // render thread for too long, the feature extractor may run in several
46 // chunks of work, posting a task to the current MessageLoop to continue 50 // chunks of work, posting a task to the current MessageLoop to continue
47 // processing. Once feature extraction is complete, |done_callback| 51 // processing. Once feature extraction is complete, |done_callback|
48 // is run. PhishingDOMFeatureExtractor takes ownership of the callback. 52 // is run. PhishingDOMFeatureExtractor takes ownership of the callback.
49 void ExtractFeatures(FeatureMap* features, DoneCallback* done_callback); 53 void ExtractFeatures(FeatureMap* features, DoneCallback* done_callback);
50 54
51 // Cancels any pending feature extraction. The DoneCallback will not be run. 55 // Cancels any pending feature extraction. The DoneCallback will not be run.
52 // Must be called if there is a feature extraction in progress when the page 56 // Must be called if there is a feature extraction in progress when the page
53 // is unloaded or the PhishingDOMFeatureExtractor is destroyed. 57 // is unloaded or the PhishingDOMFeatureExtractor is destroyed.
54 void CancelPendingExtraction(); 58 void CancelPendingExtraction();
55 59
56 private: 60 private:
57 struct FrameData; 61 struct FrameData;
58 struct PageFeatureState; 62 struct PageFeatureState;
59 63
64 // The maximum amount of time that we will spend on a single extraction
65 // iteration before pausing to let other MessageLoop tasks run.
66 static const int kMaxTimePerChunkMs;
67
68 // The number of elements that we will process before checking to see whether
69 // kMaxTimePerChunkMs has elapsed. Since checking the current time can be
70 // slow, we don't do this on every element processed.
71 static const int kClockCheckGranularity;
72
73 // The maximum total amount of time that the feature extractor will run
74 // before giving up on the current page.
75 static const int kMaxTotalTimeMs;
76
60 // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs 77 // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs
61 // until a predefined maximum amount of time has elapsed, then posts a task 78 // until a predefined maximum amount of time has elapsed, then posts a task
62 // to the current MessageLoop to continue extraction. When extraction 79 // to the current MessageLoop to continue extraction. When extraction
63 // finishes, calls RunCallback(). 80 // finishes, calls RunCallback().
64 void ExtractFeaturesWithTimeout(); 81 void ExtractFeaturesWithTimeout();
65 82
66 // Handlers for the various HTML elements that we compute features for. 83 // Handlers for the various HTML elements that we compute features for.
67 // Since some of the features (such as ratios) cannot be computed until 84 // Since some of the features (such as ratios) cannot be computed until
68 // feature extraction is finished, these handlers do not add to the feature 85 // feature extraction is finished, these handlers do not add to the feature
69 // map directly. Instead, they update the values in the PageFeatureState. 86 // map directly. Instead, they update the values in the PageFeatureState.
(...skipping 24 matching lines...) Expand all
94 bool IsExternalDomain(const GURL& url, std::string* domain) const; 111 bool IsExternalDomain(const GURL& url, std::string* domain) const;
95 112
96 // Called once all frames have been processed to compute features from the 113 // Called once all frames have been processed to compute features from the
97 // PageFeatureState and add them to |features_|. See features.h for a 114 // PageFeatureState and add them to |features_|. See features.h for a
98 // description of which features are computed. 115 // description of which features are computed.
99 void InsertFeatures(); 116 void InsertFeatures();
100 117
101 // Non-owned pointer to the view that we will extract features from. 118 // Non-owned pointer to the view that we will extract features from.
102 RenderView* render_view_; 119 RenderView* render_view_;
103 120
121 // Owned pointer to our clock.
122 scoped_ptr<FeatureExtractorClock> clock_;
123
104 // The output parameters from the most recent call to ExtractFeatures(). 124 // The output parameters from the most recent call to ExtractFeatures().
105 FeatureMap* features_; // The caller keeps ownership of this. 125 FeatureMap* features_; // The caller keeps ownership of this.
106 scoped_ptr<DoneCallback> done_callback_; 126 scoped_ptr<DoneCallback> done_callback_;
107 127
108 // Non-owned pointer to the current frame that we are processing. 128 // Non-owned pointer to the current frame that we are processing.
109 WebKit::WebFrame* cur_frame_; 129 WebKit::WebFrame* cur_frame_;
110 130
111 // Stores extra state for |cur_frame_| that will be persisted until we 131 // Stores extra state for |cur_frame_| that will be persisted until we
112 // advance to the next frame. 132 // advance to the next frame.
113 scoped_ptr<FrameData> cur_frame_data_; 133 scoped_ptr<FrameData> cur_frame_data_;
114 134
115 // Stores the intermediate data used to create features. This data is 135 // Stores the intermediate data used to create features. This data is
116 // accumulated across all frames in the RenderView. 136 // accumulated across all frames in the RenderView.
117 scoped_ptr<PageFeatureState> page_feature_state_; 137 scoped_ptr<PageFeatureState> page_feature_state_;
118 138
119 // Used to create ExtractFeaturesWithTimeout tasks. 139 // Used to create ExtractFeaturesWithTimeout tasks.
120 // These tasks are revoked if extraction is cancelled. 140 // These tasks are revoked if extraction is cancelled.
121 ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_; 141 ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_;
122 142
123 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); 143 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor);
124 }; 144 };
125 145
126 } // namespace safe_browsing 146 } // namespace safe_browsing
127 147
128 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ 148 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698