Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(31)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h

Issue 8055018: Merge 102541 - Change PhishingDOMFeatureExtractor to cache the WebDocument rather than a WebFrame... (Closed) Base URL: svn://svn.chromium.org/chrome/branches/874/src/
Patch Set: Created 9 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 // 4 //
5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the 5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the
6 // client-side phishing detection model. These include the presence of various 6 // client-side phishing detection model. These include the presence of various
7 // types of elements, ratios of external and secure links, and tokens for 7 // types of elements, ratios of external and secure links, and tokens for
8 // external domains linked to. 8 // external domains linked to.
9 9
10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ 10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ 11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
12 12
13 #include <string> 13 #include <string>
14 14
15 #include "base/basictypes.h" 15 #include "base/basictypes.h"
16 #include "base/callback_old.h" 16 #include "base/callback_old.h"
17 #include "base/memory/scoped_ptr.h" 17 #include "base/memory/scoped_ptr.h"
18 #include "base/task.h" 18 #include "base/task.h"
19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h"
19 20
20 class GURL; 21 class GURL;
21 class RenderView; 22 class RenderView;
22 23
23 namespace WebKit { 24 namespace WebKit {
24 class WebElement; 25 class WebElement;
25 class WebFrame;
26 } 26 }
27 27
28 namespace safe_browsing { 28 namespace safe_browsing {
29 class FeatureExtractorClock; 29 class FeatureExtractorClock;
30 class FeatureMap; 30 class FeatureMap;
31 31
32 class PhishingDOMFeatureExtractor { 32 class PhishingDOMFeatureExtractor {
33 public: 33 public:
34 // Callback to be run when feature extraction finishes. The callback 34 // Callback to be run when feature extraction finishes. The callback
35 // argument is true if extraction was successful, false otherwise. 35 // argument is true if extraction was successful, false otherwise.
(...skipping 59 matching lines...) Expand 10 before | Expand all | Expand 10 after
95 // debug builds if the state is not as expected. This is a no-op in release 95 // debug builds if the state is not as expected. This is a no-op in release
96 // builds. 96 // builds.
97 void CheckNoPendingExtraction(); 97 void CheckNoPendingExtraction();
98 98
99 // Runs |done_callback_| and then clears all internal state. 99 // Runs |done_callback_| and then clears all internal state.
100 void RunCallback(bool success); 100 void RunCallback(bool success);
101 101
102 // Clears all internal feature extraction state. 102 // Clears all internal feature extraction state.
103 void Clear(); 103 void Clear();
104 104
105 // Called after advancing |cur_frame_| to update the state in 105 // Called after advancing |cur_document_| to update the state in
106 // |cur_frame_data_|. Returns true if the state was updated successfully. 106 // |cur_frame_data_|. Returns true if the state was updated successfully.
107 bool ResetFrameData(); 107 bool ResetFrameData();
108 108
109 // Returns the next document in frame-traversal order from cur_document_.
110 // If there are no more documents, returns a null WebDocument.
111 WebKit::WebDocument GetNextDocument();
112
109 // Given a URL, checks whether the domain is different from the domain of 113 // Given a URL, checks whether the domain is different from the domain of
110 // the current frame's URL. If so, stores the domain in |domain| and returns 114 // the current frame's URL. If so, stores the domain in |domain| and returns
111 // true, otherwise returns false. 115 // true, otherwise returns false.
112 bool IsExternalDomain(const GURL& url, std::string* domain) const; 116 bool IsExternalDomain(const GURL& url, std::string* domain) const;
113 117
114 // Called once all frames have been processed to compute features from the 118 // Called once all frames have been processed to compute features from the
115 // PageFeatureState and add them to |features_|. See features.h for a 119 // PageFeatureState and add them to |features_|. See features.h for a
116 // description of which features are computed. 120 // description of which features are computed.
117 void InsertFeatures(); 121 void InsertFeatures();
118 122
119 // Non-owned pointer to the view that we will extract features from. 123 // Non-owned pointer to the view that we will extract features from.
120 RenderView* render_view_; 124 RenderView* render_view_;
121 125
122 // Non-owned pointer to our clock. 126 // Non-owned pointer to our clock.
123 FeatureExtractorClock* clock_; 127 FeatureExtractorClock* clock_;
124 128
125 // The output parameters from the most recent call to ExtractFeatures(). 129 // The output parameters from the most recent call to ExtractFeatures().
126 FeatureMap* features_; // The caller keeps ownership of this. 130 FeatureMap* features_; // The caller keeps ownership of this.
127 scoped_ptr<DoneCallback> done_callback_; 131 scoped_ptr<DoneCallback> done_callback_;
128 132
129 // Non-owned pointer to the current frame that we are processing. 133 // The current (sub-)document that we are processing. May be a null document
130 WebKit::WebFrame* cur_frame_; 134 // (isNull()) if we are not currently extracting features.
135 WebKit::WebDocument cur_document_;
131 136
132 // Stores extra state for |cur_frame_| that will be persisted until we 137 // Stores extra state for |cur_document_| that will be persisted until we
133 // advance to the next frame. 138 // advance to the next frame.
134 scoped_ptr<FrameData> cur_frame_data_; 139 scoped_ptr<FrameData> cur_frame_data_;
135 140
136 // Stores the intermediate data used to create features. This data is 141 // Stores the intermediate data used to create features. This data is
137 // accumulated across all frames in the RenderView. 142 // accumulated across all frames in the RenderView.
138 scoped_ptr<PageFeatureState> page_feature_state_; 143 scoped_ptr<PageFeatureState> page_feature_state_;
139 144
140 // Used to create ExtractFeaturesWithTimeout tasks. 145 // Used to create ExtractFeaturesWithTimeout tasks.
141 // These tasks are revoked if extraction is cancelled. 146 // These tasks are revoked if extraction is cancelled.
142 ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_; 147 ScopedRunnableMethodFactory<PhishingDOMFeatureExtractor> method_factory_;
143 148
144 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor); 149 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor);
145 }; 150 };
146 151
147 } // namespace safe_browsing 152 } // namespace safe_browsing
148 153
149 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_ 154 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
OLDNEW
« no previous file with comments | « no previous file | chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698