Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(43)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.h

Issue 2667343006: Componentize safe_browsing [X+1] : move the renderer part to component.
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 //
5 // PhishingDOMFeatureExtractor handles computing DOM-based features for the
6 // client-side phishing detection model. These include the presence of various
7 // types of elements, ratios of external and secure links, and tokens for
8 // external domains linked to.
9
10 #ifndef CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
11 #define CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
12
13 #include <memory>
14 #include <string>
15
16 #include "base/callback.h"
17 #include "base/macros.h"
18 #include "base/memory/weak_ptr.h"
19 #include "third_party/WebKit/public/web/WebDocument.h"
20
21 class GURL;
22
23 namespace blink {
24 class WebElement;
25 }
26
27 namespace safe_browsing {
28 class FeatureExtractorClock;
29 class FeatureMap;
30
31 class PhishingDOMFeatureExtractor {
32 public:
33 // Callback to be run when feature extraction finishes. The callback
34 // argument is true if extraction was successful, false otherwise.
35 typedef base::Callback<void(bool)> DoneCallback;
36
37 // Creates a PhishingDOMFeatureExtractor instance.
38 // |clock| is used for timing feature extractor operations, and may be
39 // mocked for testing. The caller maintains ownership of the clock.
40 explicit PhishingDOMFeatureExtractor(FeatureExtractorClock* clock);
41 ~PhishingDOMFeatureExtractor();
42
43 // Begins extracting features into the given FeatureMap for the page.
44 // To avoid blocking the render thread for too long, the feature extractor
45 // may run in several chunks of work, posting a task to the current
46 // MessageLoop to continue processing. Once feature extraction is complete,
47 // |done_callback| is run on the current thread. PhishingDOMFeatureExtractor
48 // takes ownership of the callback.
49 void ExtractFeatures(blink::WebDocument document,
50 FeatureMap* features,
51 const DoneCallback& done_callback);
52
53 // Cancels any pending feature extraction. The DoneCallback will not be run.
54 // Must be called if there is a feature extraction in progress when the page
55 // is unloaded or the PhishingDOMFeatureExtractor is destroyed.
56 void CancelPendingExtraction();
57
58 private:
59 struct FrameData;
60 struct PageFeatureState;
61
62 // The maximum amount of wall time that we will spend on a single extraction
63 // iteration before pausing to let other MessageLoop tasks run.
64 static const int kMaxTimePerChunkMs;
65
66 // The number of elements that we will process before checking to see whether
67 // kMaxTimePerChunkMs has elapsed. Since checking the current time can be
68 // slow, we don't do this on every element processed.
69 static const int kClockCheckGranularity;
70
71 // The maximum total amount of time that the feature extractor will run
72 // before giving up on the current page.
73 static const int kMaxTotalTimeMs;
74
75 // Does the actual work of ExtractFeatures. ExtractFeaturesWithTimeout runs
76 // until a predefined maximum amount of time has elapsed, then posts a task
77 // to the current MessageLoop to continue extraction. When extraction
78 // finishes, calls RunCallback().
79 void ExtractFeaturesWithTimeout();
80
81 // Handlers for the various HTML elements that we compute features for.
82 // Since some of the features (such as ratios) cannot be computed until
83 // feature extraction is finished, these handlers do not add to the feature
84 // map directly. Instead, they update the values in the PageFeatureState.
85 void HandleLink(const blink::WebElement& element);
86 void HandleForm(const blink::WebElement& element);
87 void HandleImage(const blink::WebElement& element);
88 void HandleInput(const blink::WebElement& element);
89 void HandleScript(const blink::WebElement& element);
90
91 // Helper to verify that there is no pending feature extraction. Dies in
92 // debug builds if the state is not as expected. This is a no-op in release
93 // builds.
94 void CheckNoPendingExtraction();
95
96 // Runs |done_callback_| and then clears all internal state.
97 void RunCallback(bool success);
98
99 // Clears all internal feature extraction state.
100 void Clear();
101
102 // Called after advancing |cur_document_| to update the state in
103 // |cur_frame_data_|.
104 void ResetFrameData();
105
106 // Returns the next document in frame-traversal order from cur_document_.
107 // If there are no more documents, returns a null WebDocument.
108 blink::WebDocument GetNextDocument();
109
110 // Given a URL, checks whether the domain is different from the domain of
111 // the current frame's URL. If so, stores the domain in |domain| and returns
112 // true, otherwise returns false.
113 virtual bool IsExternalDomain(const GURL& url, std::string* domain) const;
114
115 // Given a partial URL, extend it to a full url based on the current frame's
116 // URL.
117 virtual blink::WebURL CompleteURL(const blink::WebElement& element,
118 const blink::WebString& partial_url);
119
120 // Called once all frames have been processed to compute features from the
121 // PageFeatureState and add them to |features_|. See features.h for a
122 // description of which features are computed.
123 void InsertFeatures();
124
125
126 // Non-owned pointer to our clock.
127 FeatureExtractorClock* clock_;
128
129 // The output parameters from the most recent call to ExtractFeatures().
130 FeatureMap* features_; // The caller keeps ownership of this.
131 DoneCallback done_callback_;
132
133 // The current (sub-)document that we are processing. May be a null document
134 // (isNull()) if we are not currently extracting features.
135 blink::WebDocument cur_document_;
136
137 // Stores extra state for |cur_document_| that will be persisted until we
138 // advance to the next frame.
139 std::unique_ptr<FrameData> cur_frame_data_;
140
141 // Stores the intermediate data used to create features. This data is
142 // accumulated across all frames in the RenderView.
143 std::unique_ptr<PageFeatureState> page_feature_state_;
144
145 // Used in scheduling ExtractFeaturesWithTimeout tasks.
146 // These pointers are invalidated if extraction is cancelled.
147 base::WeakPtrFactory<PhishingDOMFeatureExtractor> weak_factory_;
148
149 DISALLOW_COPY_AND_ASSIGN(PhishingDOMFeatureExtractor);
150 };
151
152 } // namespace safe_browsing
153
154 #endif // CHROME_RENDERER_SAFE_BROWSING_PHISHING_DOM_FEATURE_EXTRACTOR_H_
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698