| OLD | NEW |
| 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 // | 4 // |
| 5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the | 5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the |
| 6 // client-side phishing detection model. These include the presence of various | 6 // client-side phishing detection model. These include the presence of various |
| 7 // types of elements, ratios of external and secure links, and tokens for | 7 // types of elements, ratios of external and secure links, and tokens for |
| 8 // external domains linked to. | 8 // external domains linked to. |
| 9 | 9 |
| 10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
| 11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
| 12 | 12 |
| 13 #include <string> | 13 #include <string> |
| 14 | 14 |
| 15 #include "base/basictypes.h" | 15 #include "base/basictypes.h" |
| 16 #include "base/callback_old.h" | 16 #include "base/callback.h" |
| 17 #include "base/memory/scoped_ptr.h" | 17 #include "base/memory/scoped_ptr.h" |
| 18 #include "base/task.h" | 18 #include "base/task.h" |
| 19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" | 19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" |
| 20 | 20 |
| 21 class GURL; | 21 class GURL; |
| 22 | 22 |
| 23 namespace WebKit { | 23 namespace WebKit { |
| 24 class WebElement; | 24 class WebElement; |
| 25 } | 25 } |
| 26 | 26 |
| 27 namespace content { | 27 namespace content { |
| 28 class RenderView; | 28 class RenderView; |
| 29 } | 29 } |
| 30 | 30 |
| 31 namespace safe_browsing { | 31 namespace safe_browsing { |
| 32 class FeatureExtractorClock; | 32 class FeatureExtractorClock; |
| 33 class FeatureMap; | 33 class FeatureMap; |
| 34 | 34 |
| 35 class PhishingDOMFeatureExtractor { | 35 class PhishingDOMFeatureExtractor { |
| 36 public: | 36 public: |
| 37 // Callback to be run when feature extraction finishes. The callback | 37 // Callback to be run when feature extraction finishes. The callback |
| 38 // argument is true if extraction was successful, false otherwise. | 38 // argument is true if extraction was successful, false otherwise. |
| 39 typedef Callback1<bool>::Type DoneCallback; | 39 typedef base::Callback<void(bool)> DoneCallback; |
| 40 | 40 |
| 41 // Creates a PhishingDOMFeatureExtractor for the specified RenderView. | 41 // Creates a PhishingDOMFeatureExtractor for the specified RenderView. |
| 42 // The PhishingDOMFeatureExtrator should be destroyed prior to destroying | 42 // The PhishingDOMFeatureExtrator should be destroyed prior to destroying |
| 43 // the RenderView. |clock| is used for timing feature extractor operations, | 43 // the RenderView. |clock| is used for timing feature extractor operations, |
| 44 // and may be mocked for testing. The caller maintains ownership of the | 44 // and may be mocked for testing. The caller maintains ownership of the |
| 45 // clock. | 45 // clock. |
| 46 PhishingDOMFeatureExtractor(content::RenderView* render_view, | 46 PhishingDOMFeatureExtractor(content::RenderView* render_view, |
| 47 FeatureExtractorClock* clock); | 47 FeatureExtractorClock* clock); |
| 48 ~PhishingDOMFeatureExtractor(); | 48 ~PhishingDOMFeatureExtractor(); |
| 49 | 49 |
| 50 // Begins extracting features into the given FeatureMap for the page | 50 // Begins extracting features into the given FeatureMap for the page |
| 51 // currently loaded in this object's RenderView. To avoid blocking the | 51 // currently loaded in this object's RenderView. To avoid blocking the |
| 52 // render thread for too long, the feature extractor may run in several | 52 // render thread for too long, the feature extractor may run in several |
| 53 // chunks of work, posting a task to the current MessageLoop to continue | 53 // chunks of work, posting a task to the current MessageLoop to continue |
| 54 // processing. Once feature extraction is complete, |done_callback| | 54 // processing. Once feature extraction is complete, |done_callback| |
| 55 // is run on the current thread. PhishingDOMFeatureExtractor takes | 55 // is run on the current thread. PhishingDOMFeatureExtractor takes |
| 56 // ownership of the callback. | 56 // ownership of the callback. |
| 57 void ExtractFeatures(FeatureMap* features, DoneCallback* done_callback); | 57 void ExtractFeatures(FeatureMap* features, const DoneCallback& done_callback); |
| 58 | 58 |
| 59 // Cancels any pending feature extraction. The DoneCallback will not be run. | 59 // Cancels any pending feature extraction. The DoneCallback will not be run. |
| 60 // Must be called if there is a feature extraction in progress when the page | 60 // Must be called if there is a feature extraction in progress when the page |
| 61 // is unloaded or the PhishingDOMFeatureExtractor is destroyed. | 61 // is unloaded or the PhishingDOMFeatureExtractor is destroyed. |
| 62 void CancelPendingExtraction(); | 62 void CancelPendingExtraction(); |
| 63 | 63 |
| 64 private: | 64 private: |
| 65 struct FrameData; | 65 struct FrameData; |
| 66 struct PageFeatureState; | 66 struct PageFeatureState; |
| 67 | 67 |
| (...skipping 56 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 124 void InsertFeatures(); | 124 void InsertFeatures(); |
| 125 | 125 |
| 126 // Non-owned pointer to the view that we will extract features from. | 126 // Non-owned pointer to the view that we will extract features from. |
| 127 content::RenderView* render_view_; | 127 content::RenderView* render_view_; |
| 128 | 128 |
| 129 // Non-owned pointer to our clock. | 129 // Non-owned pointer to our clock. |
| 130 FeatureExtractorClock* clock_; | 130 FeatureExtractorClock* clock_; |
| 131 | 131 |
| 132 // The output parameters from the most recent call to ExtractFeatures(). | 132 // The output parameters from the most recent call to ExtractFeatures(). |
| 133 FeatureMap* features_; // The caller keeps ownership of this. | 133 FeatureMap* features_; // The caller keeps ownership of this. |
| 134 scoped_ptr<DoneCallback> done_callback_; | 134 DoneCallback done_callback_; |
| 135 | 135 |
| 136 // The current (sub-)document that we are processing. May be a null document | 136 // The current (sub-)document that we are processing. May be a null document |
| 137 // (isNull()) if we are not currently extracting features. | 137 // (isNull()) if we are not currently extracting features. |
| 138 WebKit::WebDocument cur_document_; | 138 WebKit::WebDocument cur_document_; |
| 139 | 139 |
| 140 // Stores extra state for |cur_document_| that will be persisted until we | 140 // Stores extra state for |cur_document_| that will be persisted until we |
| 141 // advance to the next frame. | 141 // advance to the next frame. |
| 142 scoped_ptr<FrameData> cur_frame_data_; | 142 scoped_ptr<FrameData> cur_frame_data_; |
| 143 | 143 |
| 144 // Stores the intermediate data used to create features. This data is | 144 // Stores the intermediate data used to create features. This data is |
| 145 // accumulated across all frames in the RenderView. | 145 // accumulated across all frames in the RenderView. |
| 146 scoped_ptr<PageFeatureState> page_feature_state_; | 146 scoped_ptr<PageFeatureState> page_feature_state_; |
| 147 | 147 |
| 148 // Used in scheduling ExtractFeaturesWithTimeout tasks. | 148 // Used in scheduling ExtractFeaturesWithTimeout tasks. |
| 149 // These pointers are invalidated if extraction is cancelled. | 149 // These pointers are invalidated if extraction is cancelled. |
| 150 base::WeakPtrFactory<PhishingDOMFeatureExtractor> weak_factory_; | 150 base::WeakPtrFactory<PhishingDOMFeatureExtractor> weak_factory_; |
| 151 | 151 |
| 152 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); | 152 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); |
| 153 }; | 153 }; |
| 154 | 154 |
| 155 } // namespace safe_browsing | 155 } // namespace safe_browsing |
| 156 | 156 |
| 157 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 157 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
| OLD | NEW |