OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the | 5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the |
6 // client-side phishing detection model. These include the presence of various | 6 // client-side phishing detection model. These include the presence of various |
7 // types of elements, ratios of external and secure links, and tokens for | 7 // types of elements, ratios of external and secure links, and tokens for |
8 // external domains linked to. | 8 // external domains linked to. |
9 | 9 |
10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
12 | 12 |
13 #include <string> | 13 #include <string> |
14 | 14 |
15 #include "base/basictypes.h" | 15 #include "base/basictypes.h" |
16 #include "base/callback_old.h" | 16 #include "base/callback_old.h" |
17 #include "base/memory/scoped_ptr.h" | 17 #include "base/memory/scoped_ptr.h" |
18 #include "base/task.h" | 18 #include "base/task.h" |
| 19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" |
19 | 20 |
20 class GURL; | 21 class GURL; |
21 class RenderView; | 22 class RenderView; |
22 | 23 |
23 namespace WebKit { | 24 namespace WebKit { |
24 class WebElement; | 25 class WebElement; |
25 class WebFrame; | |
26 } | 26 } |
27 | 27 |
28 namespace safe_browsing { | 28 namespace safe_browsing { |
29 class FeatureExtractorClock; | 29 class FeatureExtractorClock; |
30 class FeatureMap; | 30 class FeatureMap; |
31 | 31 |
32 class PhishingDOMFeatureExtractor { | 32 class PhishingDOMFeatureExtractor { |
33 public: | 33 public: |
34 // Callback to be run when feature extraction finishes. The callback | 34 // Callback to be run when feature extraction finishes. The callback |
35 // argument is true if extraction was successful, false otherwise. | 35 // argument is true if extraction was successful, false otherwise. |
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
95 // debug builds if the state is not as expected. This is a no-op in release | 95 // debug builds if the state is not as expected. This is a no-op in release |
96 // builds. | 96 // builds. |
97 void CheckNoPendingExtraction(); | 97 void CheckNoPendingExtraction(); |
98 | 98 |
99 // Runs |done_callback_| and then clears all internal state. | 99 // Runs |done_callback_| and then clears all internal state. |
100 void RunCallback(bool success); | 100 void RunCallback(bool success); |
101 | 101 |
102 // Clears all internal feature extraction state. | 102 // Clears all internal feature extraction state. |
103 void Clear(); | 103 void Clear(); |
104 | 104 |
105 // Called after advancing |cur_frame_| to update the state in | 105 // Called after advancing |cur_document_| to update the state in |
106 // |cur_frame_data_|. Returns true if the state was updated successfully. | 106 // |cur_frame_data_|. Returns true if the state was updated successfully. |
107 bool ResetFrameData(); | 107 bool ResetFrameData(); |
108 | 108 |
| 109 // Returns the next document in frame-traversal order from cur_document_. |
| 110 // If there are no more documents, returns a null WebDocument. |
| 111 WebKit::WebDocument GetNextDocument(); |
| 112 |
109 // Given a URL, checks whether the domain is different from the domain of | 113 // Given a URL, checks whether the domain is different from the domain of |
110 // the current frame's URL. If so, stores the domain in |domain| and returns | 114 // the current frame's URL. If so, stores the domain in |domain| and returns |
111 // true, otherwise returns false. | 115 // true, otherwise returns false. |
112 bool IsExternalDomain(const GURL& url, std::string* domain) const; | 116 bool IsExternalDomain(const GURL& url, std::string* domain) const; |
113 | 117 |
114 // Called once all frames have been processed to compute features from the | 118 // Called once all frames have been processed to compute features from the |
115 // PageFeatureState and add them to |features_|. See features.h for a | 119 // PageFeatureState and add them to |features_|. See features.h for a |
116 // description of which features are computed. | 120 // description of which features are computed. |
117 void InsertFeatures(); | 121 void InsertFeatures(); |
118 | 122 |
119 // Non-owned pointer to the view that we will extract features from. | 123 // Non-owned pointer to the view that we will extract features from. |
120 RenderView* render_view_; | 124 RenderView* render_view_; |
121 | 125 |
122 // Non-owned pointer to our clock. | 126 // Non-owned pointer to our clock. |
123 FeatureExtractorClock* clock_; | 127 FeatureExtractorClock* clock_; |
124 | 128 |
125 // The output parameters from the most recent call to ExtractFeatures(). | 129 // The output parameters from the most recent call to ExtractFeatures(). |
126 FeatureMap* features_; // The caller keeps ownership of this. | 130 FeatureMap* features_; // The caller keeps ownership of this. |
127 scoped_ptr<DoneCallback> done_callback_; | 131 scoped_ptr<DoneCallback> done_callback_; |
128 | 132 |
129 // Non-owned pointer to the current frame that we are processing. | 133 // The current (sub-)document that we are processing. May be a null document |
130 WebKit::WebFrame* cur_frame_; | 134 // (isNull()) if we are not currently extracting features. |
| 135 WebKit::WebDocument cur_document_; |
131 | 136 |
132 // Stores extra state for |cur_frame_| that will be persisted until we | 137 // Stores extra state for |cur_document_| that will be persisted until we |
133 // advance to the next frame. | 138 // advance to the next frame. |
134 scoped_ptr<FrameData> cur_frame_data_; | 139 scoped_ptr<FrameData> cur_frame_data_; |
135 | 140 |
136 // Stores the intermediate data used to create features. This data is | 141 // Stores the intermediate data used to create features. This data is |
137 // accumulated across all frames in the RenderView. | 142 // accumulated across all frames in the RenderView. |
138 scoped_ptr<PageFeatureState> page_feature_state_; | 143 scoped_ptr<PageFeatureState> page_feature_state_; |
139 | 144 |
140 // Used to create ExtractFeaturesWithTimeout tasks. | 145 // Used to create ExtractFeaturesWithTimeout tasks. |
141 // These tasks are revoked if extraction is cancelled. | 146 // These tasks are revoked if extraction is cancelled. |
142 ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_; | 147 ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_; |
143 | 148 |
144 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); | 149 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); |
145 }; | 150 }; |
146 | 151 |
147 } // namespace safe_browsing | 152 } // namespace safe_browsing |
148 | 153 |
149 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 154 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
OLD | NEW |