Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1697)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 2667343006: Componentize safe_browsing [X+1] : move the renderer part to component.
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
6
7 #include <memory>
8 #include <set>
9
10 #include "base/bind.h"
11 #include "base/callback.h"
12 #include "base/lazy_instance.h"
13 #include "base/logging.h"
14 #include "base/metrics/histogram_macros.h"
15 #include "chrome/common/safe_browsing/csd.pb.h"
16 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
17 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
18 #include "chrome/renderer/safe_browsing/scorer.h"
19 #include "components/safe_browsing/common/safebrowsing_messages.h"
20 #include "content/public/renderer/document_state.h"
21 #include "content/public/renderer/navigation_state.h"
22 #include "content/public/renderer/render_frame.h"
23 #include "content/public/renderer/render_thread.h"
24 #include "third_party/WebKit/public/platform/WebURL.h"
25 #include "third_party/WebKit/public/web/WebDocument.h"
26 #include "third_party/WebKit/public/web/WebLocalFrame.h"
27 #include "third_party/WebKit/public/web/WebView.h"
28
29 using content::DocumentState;
30 using content::NavigationState;
31 using content::RenderThread;
32
33 namespace safe_browsing {
34
35 static GURL StripRef(const GURL& url) {
36 GURL::Replacements replacements;
37 replacements.ClearRef();
38 return url.ReplaceComponents(replacements);
39 }
40
41 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates;
42 static base::LazyInstance<PhishingClassifierDelegates>
43 g_delegates = LAZY_INSTANCE_INITIALIZER;
44
45 static base::LazyInstance<std::unique_ptr<const safe_browsing::Scorer>>
46 g_phishing_scorer = LAZY_INSTANCE_INITIALIZER;
47
48 // static
49 PhishingClassifierFilter* PhishingClassifierFilter::Create() {
50 // Private constructor and public static Create() method to facilitate
51 // stubbing out this class for binary-size reduction purposes.
52 return new PhishingClassifierFilter();
53 }
54
55 PhishingClassifierFilter::PhishingClassifierFilter()
56 : RenderThreadObserver() {}
57
58 PhishingClassifierFilter::~PhishingClassifierFilter() {}
59
60 bool PhishingClassifierFilter::OnControlMessageReceived(
61 const IPC::Message& message) {
62 bool handled = true;
63 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierFilter, message)
64 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_SetPhishingModel, OnSetPhishingModel)
65 IPC_MESSAGE_UNHANDLED(handled = false)
66 IPC_END_MESSAGE_MAP()
67 return handled;
68 }
69
70 void PhishingClassifierFilter::OnSetPhishingModel(const std::string& model) {
71 safe_browsing::Scorer* scorer = NULL;
72 // An empty model string means we should disable client-side phishing
73 // detection.
74 if (!model.empty()) {
75 scorer = safe_browsing::Scorer::Create(model);
76 if (!scorer) {
77 DLOG(ERROR) << "Unable to create a PhishingScorer - corrupt model?";
78 return;
79 }
80 }
81 PhishingClassifierDelegates::iterator i;
82 for (i = g_delegates.Get().begin(); i != g_delegates.Get().end(); ++i) {
83 (*i)->SetPhishingScorer(scorer);
84 }
85 g_phishing_scorer.Get().reset(scorer);
86 }
87
88 // static
89 PhishingClassifierDelegate* PhishingClassifierDelegate::Create(
90 content::RenderFrame* render_frame,
91 PhishingClassifier* classifier) {
92 // Private constructor and public static Create() method to facilitate
93 // stubbing out this class for binary-size reduction purposes.
94 return new PhishingClassifierDelegate(render_frame, classifier);
95 }
96
97 PhishingClassifierDelegate::PhishingClassifierDelegate(
98 content::RenderFrame* render_frame,
99 PhishingClassifier* classifier)
100 : content::RenderFrameObserver(render_frame),
101 last_main_frame_transition_(ui::PAGE_TRANSITION_LINK),
102 have_page_text_(false),
103 is_classifying_(false) {
104 g_delegates.Get().insert(this);
105 if (!classifier) {
106 classifier =
107 new PhishingClassifier(render_frame, new FeatureExtractorClock());
108 }
109
110 classifier_.reset(classifier);
111
112 if (g_phishing_scorer.Get().get())
113 SetPhishingScorer(g_phishing_scorer.Get().get());
114 }
115
116 PhishingClassifierDelegate::~PhishingClassifierDelegate() {
117 CancelPendingClassification(SHUTDOWN);
118 g_delegates.Get().erase(this);
119 }
120
121 void PhishingClassifierDelegate::SetPhishingScorer(
122 const safe_browsing::Scorer* scorer) {
123 if (is_classifying_) {
124 // If there is a classification going on right now it means we're
125 // actually replacing an existing scorer with a new model. In
126 // this case we simply cancel the current classification.
127 // TODO(noelutz): if this happens too frequently we could also
128 // replace the old scorer with the new one once classification is done
129 // but this would complicate the code somewhat.
130 CancelPendingClassification(NEW_PHISHING_SCORER);
131 }
132 classifier_->set_phishing_scorer(scorer);
133 // Start classifying the current page if all conditions are met.
134 // See MaybeStartClassification() for details.
135 MaybeStartClassification();
136 }
137
138 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {
139 last_url_received_from_browser_ = StripRef(url);
140 // Start classifying the current page if all conditions are met.
141 // See MaybeStartClassification() for details.
142 MaybeStartClassification();
143 }
144
145 void PhishingClassifierDelegate::DidCommitProvisionalLoad(
146 bool is_new_navigation,
147 bool is_same_page_navigation) {
148 blink::WebLocalFrame* frame = render_frame()->GetWebFrame();
149 // A new page is starting to load, so cancel classificaiton.
150 //
151 // TODO(bryner): We shouldn't need to cancel classification if the navigation
152 // is within the same page. However, if we let classification continue in
153 // this case, we need to properly deal with the fact that PageCaptured will
154 // be called again for the in-page navigation. We need to be sure not to
155 // swap out the page text while the term feature extractor is still running.
156 DocumentState* document_state = DocumentState::FromDataSource(
157 frame->dataSource());
158 NavigationState* navigation_state = document_state->navigation_state();
159 CancelPendingClassification(navigation_state->WasWithinSamePage()
160 ? NAVIGATE_WITHIN_PAGE
161 : NAVIGATE_AWAY);
162 if (frame->parent())
163 return;
164
165 last_main_frame_transition_ = navigation_state->GetTransitionType();
166 }
167
168 void PhishingClassifierDelegate::PageCaptured(base::string16* page_text,
169 bool preliminary_capture) {
170 if (preliminary_capture) {
171 return;
172 }
173 // Make sure there's no classification in progress. We don't want to swap
174 // out the page text string from underneath the term feature extractor.
175 //
176 // Note: Currently, if the url hasn't changed, we won't restart
177 // classification in this case. We may want to adjust this.
178 CancelPendingClassification(PAGE_RECAPTURED);
179 last_finished_load_url_ = render_frame()->GetWebFrame()->document().url();
180 classifier_page_text_.swap(*page_text);
181 have_page_text_ = true;
182 MaybeStartClassification();
183 }
184
185 void PhishingClassifierDelegate::CancelPendingClassification(
186 CancelClassificationReason reason) {
187 if (is_classifying_) {
188 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.CancelClassificationReason",
189 reason,
190 CANCEL_CLASSIFICATION_MAX);
191 is_classifying_ = false;
192 }
193 if (classifier_->is_ready()) {
194 classifier_->CancelPendingClassification();
195 }
196 classifier_page_text_.clear();
197 have_page_text_ = false;
198 }
199
200 bool PhishingClassifierDelegate::OnMessageReceived(
201 const IPC::Message& message) {
202 bool handled = true;
203 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message)
204 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_StartPhishingDetection,
205 OnStartPhishingDetection)
206 IPC_MESSAGE_UNHANDLED(handled = false)
207 IPC_END_MESSAGE_MAP()
208 return handled;
209 }
210
211 void PhishingClassifierDelegate::ClassificationDone(
212 const ClientPhishingRequest& verdict) {
213 // We no longer need the page text.
214 classifier_page_text_.clear();
215 DVLOG(2) << "Phishy verdict = " << verdict.is_phishing()
216 << " score = " << verdict.client_score();
217 if (verdict.client_score() != PhishingClassifier::kInvalidScore) {
218 DCHECK_EQ(last_url_sent_to_classifier_.spec(), verdict.url());
219 RenderThread::Get()->Send(new SafeBrowsingHostMsg_PhishingDetectionDone(
220 routing_id(), verdict.SerializeAsString()));
221 }
222 }
223
224 void PhishingClassifierDelegate::MaybeStartClassification() {
225 // We can begin phishing classification when the following conditions are
226 // met:
227 // 1. A Scorer has been created
228 // 2. The browser has sent a StartPhishingDetection message for the current
229 // toplevel URL.
230 // 3. The page has finished loading and the page text has been extracted.
231 // 4. The load is a new navigation (not a session history navigation).
232 // 5. The toplevel URL has not already been classified.
233 //
234 // Note that if we determine that this particular navigation should not be
235 // classified at all (as opposed to deferring it until we get an IPC or the
236 // load completes), we discard the page text since it won't be needed.
237 if (!classifier_->is_ready()) {
238 DVLOG(2) << "Not starting classification, no Scorer created.";
239 // Keep classifier_page_text_, in case a Scorer is set later.
240 return;
241 }
242
243 if (last_main_frame_transition_ & ui::PAGE_TRANSITION_FORWARD_BACK) {
244 // Skip loads from session history navigation. However, update the
245 // last URL sent to the classifier, so that we'll properly detect
246 // in-page navigations.
247 DVLOG(2) << "Not starting classification for back/forward navigation";
248 last_url_sent_to_classifier_ = last_finished_load_url_;
249 classifier_page_text_.clear(); // we won't need this.
250 have_page_text_ = false;
251 return;
252 }
253
254 GURL stripped_last_load_url(StripRef(last_finished_load_url_));
255 if (stripped_last_load_url == StripRef(last_url_sent_to_classifier_)) {
256 // We've already classified this toplevel URL, so this was likely an
257 // in-page navigation or a subframe navigation. The browser should not
258 // send a StartPhishingDetection IPC in this case.
259 DVLOG(2) << "Toplevel URL is unchanged, not starting classification.";
260 classifier_page_text_.clear(); // we won't need this.
261 have_page_text_ = false;
262 return;
263 }
264
265 if (!have_page_text_) {
266 DVLOG(2) << "Not starting classification, there is no page text ready.";
267 return;
268 }
269
270 if (last_url_received_from_browser_ != stripped_last_load_url) {
271 // The browser has not yet confirmed that this URL should be classified,
272 // so defer classification for now. Note: the ref does not affect
273 // any of the browser's preclassification checks, so we don't require it
274 // to match.
275 DVLOG(2) << "Not starting classification, last url from browser is "
276 << last_url_received_from_browser_ << ", last finished load is "
277 << last_finished_load_url_;
278 // Keep classifier_page_text_, in case the browser notifies us later that
279 // we should classify the URL.
280 return;
281 }
282
283 DVLOG(2) << "Starting classification for " << last_finished_load_url_;
284 last_url_sent_to_classifier_ = last_finished_load_url_;
285 is_classifying_ = true;
286 classifier_->BeginClassification(
287 &classifier_page_text_,
288 base::Bind(&PhishingClassifierDelegate::ClassificationDone,
289 base::Unretained(this)));
290 }
291
292 void PhishingClassifierDelegate::OnDestruct() {
293 delete this;
294 }
295
296 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698