OLD | NEW |
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 // | 4 // |
5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the | 5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the |
6 // client-side phishing detection model. These include the presence of various | 6 // client-side phishing detection model. These include the presence of various |
7 // types of elements, ratios of external and secure links, and tokens for | 7 // types of elements, ratios of external and secure links, and tokens for |
8 // external domains linked to. | 8 // external domains linked to. |
9 | 9 |
10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
12 | 12 |
13 #include <string> | 13 #include <string> |
14 | 14 |
15 #include "base/basictypes.h" | 15 #include "base/basictypes.h" |
16 #include "base/callback.h" | 16 #include "base/callback.h" |
17 #include "base/memory/scoped_ptr.h" | 17 #include "base/memory/scoped_ptr.h" |
18 #include "base/memory/weak_ptr.h" | 18 #include "base/memory/weak_ptr.h" |
19 #include "third_party/WebKit/public/web/WebDocument.h" | 19 #include "third_party/WebKit/public/web/WebDocument.h" |
20 | 20 |
21 class GURL; | 21 class GURL; |
22 | 22 |
23 namespace WebKit { | 23 namespace blink { |
24 class WebElement; | 24 class WebElement; |
25 } | 25 } |
26 | 26 |
27 namespace content { | 27 namespace content { |
28 class RenderView; | 28 class RenderView; |
29 } | 29 } |
30 | 30 |
31 namespace safe_browsing { | 31 namespace safe_browsing { |
32 class FeatureExtractorClock; | 32 class FeatureExtractorClock; |
33 class FeatureMap; | 33 class FeatureMap; |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
81 // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs | 81 // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs |
82 // until a predefined maximum amount of time has elapsed, then posts a task | 82 // until a predefined maximum amount of time has elapsed, then posts a task |
83 // to the current MessageLoop to continue extraction. When extraction | 83 // to the current MessageLoop to continue extraction. When extraction |
84 // finishes, calls RunCallback(). | 84 // finishes, calls RunCallback(). |
85 void ExtractFeaturesWithTimeout(); | 85 void ExtractFeaturesWithTimeout(); |
86 | 86 |
87 // Handlers for the various HTML elements that we compute features for. | 87 // Handlers for the various HTML elements that we compute features for. |
88 // Since some of the features (such as ratios) cannot be computed until | 88 // Since some of the features (such as ratios) cannot be computed until |
89 // feature extraction is finished, these handlers do not add to the feature | 89 // feature extraction is finished, these handlers do not add to the feature |
90 // map directly. Instead, they update the values in the PageFeatureState. | 90 // map directly. Instead, they update the values in the PageFeatureState. |
91 void HandleLink(const WebKit::WebElement& element); | 91 void HandleLink(const blink::WebElement& element); |
92 void HandleForm(const WebKit::WebElement& element); | 92 void HandleForm(const blink::WebElement& element); |
93 void HandleImage(const WebKit::WebElement& element); | 93 void HandleImage(const blink::WebElement& element); |
94 void HandleInput(const WebKit::WebElement& element); | 94 void HandleInput(const blink::WebElement& element); |
95 void HandleScript(const WebKit::WebElement& element); | 95 void HandleScript(const blink::WebElement& element); |
96 | 96 |
97 // Helper to verify that there is no pending feature extraction. Dies in | 97 // Helper to verify that there is no pending feature extraction. Dies in |
98 // debug builds if the state is not as expected. This is a no-op in release | 98 // debug builds if the state is not as expected. This is a no-op in release |
99 // builds. | 99 // builds. |
100 void CheckNoPendingExtraction(); | 100 void CheckNoPendingExtraction(); |
101 | 101 |
102 // Runs |done_callback_| and then clears all internal state. | 102 // Runs |done_callback_| and then clears all internal state. |
103 void RunCallback(bool success); | 103 void RunCallback(bool success); |
104 | 104 |
105 // Clears all internal feature extraction state. | 105 // Clears all internal feature extraction state. |
106 void Clear(); | 106 void Clear(); |
107 | 107 |
108 // Called after advancing |cur_document_| to update the state in | 108 // Called after advancing |cur_document_| to update the state in |
109 // |cur_frame_data_|. | 109 // |cur_frame_data_|. |
110 void ResetFrameData(); | 110 void ResetFrameData(); |
111 | 111 |
112 // Returns the next document in frame-traversal order from cur_document_. | 112 // Returns the next document in frame-traversal order from cur_document_. |
113 // If there are no more documents, returns a null WebDocument. | 113 // If there are no more documents, returns a null WebDocument. |
114 WebKit::WebDocument GetNextDocument(); | 114 blink::WebDocument GetNextDocument(); |
115 | 115 |
116 // Given a URL, checks whether the domain is different from the domain of | 116 // Given a URL, checks whether the domain is different from the domain of |
117 // the current frame's URL. If so, stores the domain in |domain| and returns | 117 // the current frame's URL. If so, stores the domain in |domain| and returns |
118 // true, otherwise returns false. | 118 // true, otherwise returns false. |
119 bool IsExternalDomain(const GURL& url, std::string* domain) const; | 119 bool IsExternalDomain(const GURL& url, std::string* domain) const; |
120 | 120 |
121 // Called once all frames have been processed to compute features from the | 121 // Called once all frames have been processed to compute features from the |
122 // PageFeatureState and add them to |features_|. See features.h for a | 122 // PageFeatureState and add them to |features_|. See features.h for a |
123 // description of which features are computed. | 123 // description of which features are computed. |
124 void InsertFeatures(); | 124 void InsertFeatures(); |
125 | 125 |
126 // Non-owned pointer to the view that we will extract features from. | 126 // Non-owned pointer to the view that we will extract features from. |
127 content::RenderView* render_view_; | 127 content::RenderView* render_view_; |
128 | 128 |
129 // Non-owned pointer to our clock. | 129 // Non-owned pointer to our clock. |
130 FeatureExtractorClock* clock_; | 130 FeatureExtractorClock* clock_; |
131 | 131 |
132 // The output parameters from the most recent call to ExtractFeatures(). | 132 // The output parameters from the most recent call to ExtractFeatures(). |
133 FeatureMap* features_; // The caller keeps ownership of this. | 133 FeatureMap* features_; // The caller keeps ownership of this. |
134 DoneCallback done_callback_; | 134 DoneCallback done_callback_; |
135 | 135 |
136 // The current (sub-)document that we are processing. May be a null document | 136 // The current (sub-)document that we are processing. May be a null document |
137 // (isNull()) if we are not currently extracting features. | 137 // (isNull()) if we are not currently extracting features. |
138 WebKit::WebDocument cur_document_; | 138 blink::WebDocument cur_document_; |
139 | 139 |
140 // Stores extra state for |cur_document_| that will be persisted until we | 140 // Stores extra state for |cur_document_| that will be persisted until we |
141 // advance to the next frame. | 141 // advance to the next frame. |
142 scoped_ptr<FrameData> cur_frame_data_; | 142 scoped_ptr<FrameData> cur_frame_data_; |
143 | 143 |
144 // Stores the intermediate data used to create features. This data is | 144 // Stores the intermediate data used to create features. This data is |
145 // accumulated across all frames in the RenderView. | 145 // accumulated across all frames in the RenderView. |
146 scoped_ptr<PageFeatureState> page_feature_state_; | 146 scoped_ptr<PageFeatureState> page_feature_state_; |
147 | 147 |
148 // Used in scheduling ExtractFeaturesWithTimeout tasks. | 148 // Used in scheduling ExtractFeaturesWithTimeout tasks. |
149 // These pointers are invalidated if extraction is cancelled. | 149 // These pointers are invalidated if extraction is cancelled. |
150 base::WeakPtrFactory<PhishingDOMFeatureExtractor> weak_factory_; | 150 base::WeakPtrFactory<PhishingDOMFeatureExtractor> weak_factory_; |
151 | 151 |
152 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); | 152 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); |
153 }; | 153 }; |
154 | 154 |
155 } // namespace safe_browsing | 155 } // namespace safe_browsing |
156 | 156 |
157 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ | 157 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ |
OLD | NEW |