OLD | NEW |
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the | 5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the |
6 // client-side phishing detection model. These include the presence of various | 6 // client-side phishing detection model. These include the presence of various |
7 // types of elements, ratios of external and secure links, and tokens for | 7 // types of elements, ratios of external and secure links, and tokens for |
8 // external domains linked to. | 8 // external domains linked to. |
9 | 9 |
10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
12 | 12 |
13 #include <string> | 13 #include <string> |
14 | 14 |
15 #include "base/basictypes.h" | 15 #include "base/basictypes.h" |
16 #include "base/callback.h" | 16 #include "base/callback.h" |
17 #include "base/scoped_ptr.h" | 17 #include "base/scoped_ptr.h" |
18 #include "base/task.h" | 18 #include "base/task.h" |
19 | 19 |
20 class GURL; | 20 class GURL; |
21 class RenderView; | 21 class RenderView; |
22 | 22 |
23 namespace WebKit { | 23 namespace WebKit { |
24 class WebElement; | 24 class WebElement; |
25 class WebFrame; | 25 class WebFrame; |
26 } | 26 } |
27 | 27 |
28 namespace safe_browsing { | 28 namespace safe_browsing { |
| 29 class FeatureExtractorClock; |
29 class FeatureMap; | 30 class FeatureMap; |
30 | 31 |
31 class PhishingDOMFeatureExtractor { | 32 class PhishingDOMFeatureExtractor { |
32 public: | 33 public: |
33 // Callback to be run when feature extraction finishes. The callback | 34 // Callback to be run when feature extraction finishes. The callback |
34 // argument is true if extraction was successful, false otherwise. | 35 // argument is true if extraction was successful, false otherwise. |
35 typedef Callback1<bool>::Type DoneCallback; | 36 typedef Callback1<bool>::Type DoneCallback; |
36 | 37 |
37 // Creates a PhishingDOMFeatureExtractor for the specified RenderView. | 38 // Creates a PhishingDOMFeatureExtractor for the specified RenderView. |
38 // The PhishingDOMFeatureExtrator should be destroyed prior to destroying | 39 // The PhishingDOMFeatureExtrator should be destroyed prior to destroying |
39 // the RenderView. | 40 // the RenderView. |clock| is used for timing feature extractor operations, |
40 explicit PhishingDOMFeatureExtractor(RenderView* render_view); | 41 // and may be mocked for testing. PhishingDOMFeatureExtractor takes |
| 42 // ownership of the clock. |
| 43 PhishingDOMFeatureExtractor(RenderView* render_view, |
| 44 FeatureExtractorClock* clock); |
41 ~PhishingDOMFeatureExtractor(); | 45 ~PhishingDOMFeatureExtractor(); |
42 | 46 |
43 // Begins extracting features into the given FeatureMap for the page | 47 // Begins extracting features into the given FeatureMap for the page |
44 // currently loaded in this object's RenderView. To avoid blocking the | 48 // currently loaded in this object's RenderView. To avoid blocking the |
45 // render thread for too long, the feature extractor may run in several | 49 // render thread for too long, the feature extractor may run in several |
46 // chunks of work, posting a task to the current MessageLoop to continue | 50 // chunks of work, posting a task to the current MessageLoop to continue |
47 // processing. Once feature extraction is complete, |done_callback| | 51 // processing. Once feature extraction is complete, |done_callback| |
48 // is run. PhishingDOMFeatureExtractor takes ownership of the callback. | 52 // is run. PhishingDOMFeatureExtractor takes ownership of the callback. |
49 void ExtractFeatures(FeatureMap* features, DoneCallback* done_callback); | 53 void ExtractFeatures(FeatureMap* features, DoneCallback* done_callback); |
50 | 54 |
51 // Cancels any pending feature extraction. The DoneCallback will not be run. | 55 // Cancels any pending feature extraction. The DoneCallback will not be run. |
52 // Must be called if there is a feature extraction in progress when the page | 56 // Must be called if there is a feature extraction in progress when the page |
53 // is unloaded or the PhishingDOMFeatureExtractor is destroyed. | 57 // is unloaded or the PhishingDOMFeatureExtractor is destroyed. |
54 void CancelPendingExtraction(); | 58 void CancelPendingExtraction(); |
55 | 59 |
56 private: | 60 private: |
57 struct FrameData; | 61 struct FrameData; |
58 struct PageFeatureState; | 62 struct PageFeatureState; |
59 | 63 |
| 64 // The maximum amount of time that we will spend on a single extraction |
| 65 // iteration before pausing to let other MessageLoop tasks run. |
| 66 static const int kMaxTimePerChunkMs; |
| 67 |
| 68 // The number of elements that we will process before checking to see whether |
| 69 // kMaxTimePerChunkMs has elapsed. Since checking the current time can be |
| 70 // slow, we don't do this on every element processed. |
| 71 static const int kClockCheckGranularity; |
| 72 |
| 73 // The maximum total amount of time that the feature extractor will run |
| 74 // before giving up on the current page. |
| 75 static const int kMaxTotalTimeMs; |
| 76 |
60 // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs | 77 // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs |
61 // until a predefined maximum amount of time has elapsed, then posts a task | 78 // until a predefined maximum amount of time has elapsed, then posts a task |
62 // to the current MessageLoop to continue extraction. When extraction | 79 // to the current MessageLoop to continue extraction. When extraction |
63 // finishes, calls RunCallback(). | 80 // finishes, calls RunCallback(). |
64 void ExtractFeaturesWithTimeout(); | 81 void ExtractFeaturesWithTimeout(); |
65 | 82 |
66 // Handlers for the various HTML elements that we compute features for. | 83 // Handlers for the various HTML elements that we compute features for. |
67 // Since some of the features (such as ratios) cannot be computed until | 84 // Since some of the features (such as ratios) cannot be computed until |
68 // feature extraction is finished, these handlers do not add to the feature | 85 // feature extraction is finished, these handlers do not add to the feature |
69 // map directly. Instead, they update the values in the PageFeatureState. | 86 // map directly. Instead, they update the values in the PageFeatureState. |
(...skipping 24 matching lines...) Expand all Loading... |
94 bool IsExternalDomain(const GURL& url, std::string* domain) const; | 111 bool IsExternalDomain(const GURL& url, std::string* domain) const; |
95 | 112 |
96 // Called once all frames have been processed to compute features from the | 113 // Called once all frames have been processed to compute features from the |
97 // PageFeatureState and add them to |features_|. See features.h for a | 114 // PageFeatureState and add them to |features_|. See features.h for a |
98 // description of which features are computed. | 115 // description of which features are computed. |
99 void InsertFeatures(); | 116 void InsertFeatures(); |
100 | 117 |
101 // Non-owned pointer to the view that we will extract features from. | 118 // Non-owned pointer to the view that we will extract features from. |
102 RenderView* render_view_; | 119 RenderView* render_view_; |
103 | 120 |
| 121 // Owned pointer to our clock. |
| 122 scoped_ptr<FeatureExtractorClock> clock_; |
| 123 |
104 // The output parameters from the most recent call to ExtractFeatures(). | 124 // The output parameters from the most recent call to ExtractFeatures(). |
105 FeatureMap* features_; // The caller keeps ownership of this. | 125 FeatureMap* features_; // The caller keeps ownership of this. |
106 scoped_ptr<DoneCallback> done_callback_; | 126 scoped_ptr<DoneCallback> done_callback_; |
107 | 127 |
108 // Non-owned pointer to the current frame that we are processing. | 128 // Non-owned pointer to the current frame that we are processing. |
109 WebKit::WebFrame* cur_frame_; | 129 WebKit::WebFrame* cur_frame_; |
110 | 130 |
111 // Stores extra state for |cur_frame_| that will be persisted until we | 131 // Stores extra state for |cur_frame_| that will be persisted until we |
112 // advance to the next frame. | 132 // advance to the next frame. |
113 scoped_ptr<FrameData> cur_frame_data_; | 133 scoped_ptr<FrameData> cur_frame_data_; |
114 | 134 |
115 // Stores the intermediate data used to create features. This data is | 135 // Stores the intermediate data used to create features. This data is |
116 // accumulated across all frames in the RenderView. | 136 // accumulated across all frames in the RenderView. |
117 scoped_ptr<PageFeatureState> page_feature_state_; | 137 scoped_ptr<PageFeatureState> page_feature_state_; |
118 | 138 |
119 // Used to create ExtractFeaturesWithTimeout tasks. | 139 // Used to create ExtractFeaturesWithTimeout tasks. |
120 // These tasks are revoked if extraction is cancelled. | 140 // These tasks are revoked if extraction is cancelled. |
121 ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_; | 141 ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_; |
122 | 142 |
123 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); | 143 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); |
124 }; | 144 }; |
125 | 145 |
126 } // namespace safe_browsing | 146 } // namespace safe_browsing |
127 | 147 |
128 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 148 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
OLD | NEW |