chrome/renderer/safe_browsing/phishing_classifier_delegate.cc - Issue 2667343006: Componentize safe_browsing [X+1] : move the renderer part to component.

Side by Side Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 2667343006: Componentize safe_browsing [X+1] : move the renderer part to component.

Patch Set: Created 3 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

« no previous file with comments | « chrome/renderer/safe_browsing/phishing_classifier_delegate.h ('k') | chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
	(Empty)
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.

4

5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"

6

7 #include <memory>

8 #include <set>

9

10 #include "base/bind.h"

11 #include "base/callback.h"

12 #include "base/lazy_instance.h"

13 #include "base/logging.h"

14 #include "base/metrics/histogram_macros.h"

15 #include "chrome/common/safe_browsing/csd.pb.h"

16 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"

17 #include "chrome/renderer/safe_browsing/phishing_classifier.h"

18 #include "chrome/renderer/safe_browsing/scorer.h"

19 #include "components/safe_browsing/common/safebrowsing_messages.h"

20 #include "content/public/renderer/document_state.h"

21 #include "content/public/renderer/navigation_state.h"

22 #include "content/public/renderer/render_frame.h"

23 #include "content/public/renderer/render_thread.h"

24 #include "third_party/WebKit/public/platform/WebURL.h"

25 #include "third_party/WebKit/public/web/WebDocument.h"

26 #include "third_party/WebKit/public/web/WebLocalFrame.h"

27 #include "third_party/WebKit/public/web/WebView.h"

28

29 using content::DocumentState;

30 using content::NavigationState;

31 using content::RenderThread;

32

33 namespace safe_browsing {

34

35 static GURL StripRef(const GURL& url) {

36 GURL::Replacements replacements;

37 replacements.ClearRef();

38 return url.ReplaceComponents(replacements);

39 }

40

41 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates;

42 static base::LazyInstance<PhishingClassifierDelegates>

43 g_delegates = LAZY_INSTANCE_INITIALIZER;

44

45 static base::LazyInstance<std::unique_ptr<const safe_browsing::Scorer>>

46 g_phishing_scorer = LAZY_INSTANCE_INITIALIZER;

47

48 // static

49 PhishingClassifierFilter* PhishingClassifierFilter::Create() {

50 // Private constructor and public static Create() method to facilitate

51 // stubbing out this class for binary-size reduction purposes.

52 return new PhishingClassifierFilter();

53 }

54

55 PhishingClassifierFilter::PhishingClassifierFilter()

56 : RenderThreadObserver() {}

57

58 PhishingClassifierFilter::~PhishingClassifierFilter() {}

59

60 bool PhishingClassifierFilter::OnControlMessageReceived(

61 const IPC::Message& message) {

62 bool handled = true;

63 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierFilter, message)

64 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_SetPhishingModel, OnSetPhishingModel)

65 IPC_MESSAGE_UNHANDLED(handled = false)

66 IPC_END_MESSAGE_MAP()

67 return handled;

68 }

69

70 void PhishingClassifierFilter::OnSetPhishingModel(const std::string& model) {

71 safe_browsing::Scorer* scorer = NULL;

72 // An empty model string means we should disable client-side phishing

73 // detection.

74 if (!model.empty()) {

75 scorer = safe_browsing::Scorer::Create(model);

76 if (!scorer) {

77 DLOG(ERROR) << "Unable to create a PhishingScorer - corrupt model?";

78 return;

79 }

80 }

81 PhishingClassifierDelegates::iterator i;

82 for (i = g_delegates.Get().begin(); i != g_delegates.Get().end(); ++i) {

83 (*i)->SetPhishingScorer(scorer);

84 }

85 g_phishing_scorer.Get().reset(scorer);

86 }

87

88 // static

89 PhishingClassifierDelegate* PhishingClassifierDelegate::Create(

90 content::RenderFrame* render_frame,

91 PhishingClassifier* classifier) {

92 // Private constructor and public static Create() method to facilitate

93 // stubbing out this class for binary-size reduction purposes.

94 return new PhishingClassifierDelegate(render_frame, classifier);

95 }

96

97 PhishingClassifierDelegate::PhishingClassifierDelegate(

98 content::RenderFrame* render_frame,

99 PhishingClassifier* classifier)

100 : content::RenderFrameObserver(render_frame),

101 last_main_frame_transition_(ui::PAGE_TRANSITION_LINK),

102 have_page_text_(false),

103 is_classifying_(false) {

104 g_delegates.Get().insert(this);

105 if (!classifier) {

106 classifier =

107 new PhishingClassifier(render_frame, new FeatureExtractorClock());

108 }

109

110 classifier_.reset(classifier);

111

112 if (g_phishing_scorer.Get().get())

113 SetPhishingScorer(g_phishing_scorer.Get().get());

114 }

115

116 PhishingClassifierDelegate::~PhishingClassifierDelegate() {

117 CancelPendingClassification(SHUTDOWN);

118 g_delegates.Get().erase(this);

119 }

120

121 void PhishingClassifierDelegate::SetPhishingScorer(

122 const safe_browsing::Scorer* scorer) {

123 if (is_classifying_) {

124 // If there is a classification going on right now it means we're

125 // actually replacing an existing scorer with a new model. In

126 // this case we simply cancel the current classification.

127 // TODO(noelutz): if this happens too frequently we could also

128 // replace the old scorer with the new one once classification is done

129 // but this would complicate the code somewhat.

130 CancelPendingClassification(NEW_PHISHING_SCORER);

131 }

132 classifier_->set_phishing_scorer(scorer);

133 // Start classifying the current page if all conditions are met.

134 // See MaybeStartClassification() for details.

135 MaybeStartClassification();

136 }

137

138 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {

139 last_url_received_from_browser_ = StripRef(url);

140 // Start classifying the current page if all conditions are met.

141 // See MaybeStartClassification() for details.

142 MaybeStartClassification();

143 }

144

145 void PhishingClassifierDelegate::DidCommitProvisionalLoad(

146 bool is_new_navigation,

147 bool is_same_page_navigation) {

148 blink::WebLocalFrame* frame = render_frame()->GetWebFrame();

149 // A new page is starting to load, so cancel classificaiton.

150 //

151 // TODO(bryner): We shouldn't need to cancel classification if the navigation

152 // is within the same page. However, if we let classification continue in

153 // this case, we need to properly deal with the fact that PageCaptured will

154 // be called again for the in-page navigation. We need to be sure not to

155 // swap out the page text while the term feature extractor is still running.

156 DocumentState* document_state = DocumentState::FromDataSource(

157 frame->dataSource());

158 NavigationState* navigation_state = document_state->navigation_state();

159 CancelPendingClassification(navigation_state->WasWithinSamePage()

160 ? NAVIGATE_WITHIN_PAGE

161 : NAVIGATE_AWAY);

162 if (frame->parent())

163 return;

164

165 last_main_frame_transition_ = navigation_state->GetTransitionType();

166 }

167

168 void PhishingClassifierDelegate::PageCaptured(base::string16* page_text,

169 bool preliminary_capture) {

170 if (preliminary_capture) {

171 return;

172 }

173 // Make sure there's no classification in progress. We don't want to swap

174 // out the page text string from underneath the term feature extractor.

175 //

176 // Note: Currently, if the url hasn't changed, we won't restart

177 // classification in this case. We may want to adjust this.

178 CancelPendingClassification(PAGE_RECAPTURED);

179 last_finished_load_url_ = render_frame()->GetWebFrame()->document().url();

180 classifier_page_text_.swap(*page_text);

181 have_page_text_ = true;

182 MaybeStartClassification();

183 }

184

185 void PhishingClassifierDelegate::CancelPendingClassification(

186 CancelClassificationReason reason) {

187 if (is_classifying_) {

188 UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.CancelClassificationReason",

189 reason,

190 CANCEL_CLASSIFICATION_MAX);

191 is_classifying_ = false;

192 }

193 if (classifier_->is_ready()) {

194 classifier_->CancelPendingClassification();

195 }

196 classifier_page_text_.clear();

197 have_page_text_ = false;

198 }

199

200 bool PhishingClassifierDelegate::OnMessageReceived(

201 const IPC::Message& message) {

202 bool handled = true;

203 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message)

204 IPC_MESSAGE_HANDLER(SafeBrowsingMsg_StartPhishingDetection,

205 OnStartPhishingDetection)

206 IPC_MESSAGE_UNHANDLED(handled = false)

207 IPC_END_MESSAGE_MAP()

208 return handled;

209 }

210

211 void PhishingClassifierDelegate::ClassificationDone(

212 const ClientPhishingRequest& verdict) {

213 // We no longer need the page text.

214 classifier_page_text_.clear();

215 DVLOG(2) << "Phishy verdict = " << verdict.is_phishing()

216 << " score = " << verdict.client_score();

217 if (verdict.client_score() != PhishingClassifier::kInvalidScore) {

218 DCHECK_EQ(last_url_sent_to_classifier_.spec(), verdict.url());

219 RenderThread::Get()->Send(new SafeBrowsingHostMsg_PhishingDetectionDone(

220 routing_id(), verdict.SerializeAsString()));

221 }

222 }

223

224 void PhishingClassifierDelegate::MaybeStartClassification() {

225 // We can begin phishing classification when the following conditions are

226 // met:

227 // 1. A Scorer has been created

228 // 2. The browser has sent a StartPhishingDetection message for the current

229 // toplevel URL.

230 // 3. The page has finished loading and the page text has been extracted.

231 // 4. The load is a new navigation (not a session history navigation).

232 // 5. The toplevel URL has not already been classified.

233 //

234 // Note that if we determine that this particular navigation should not be

235 // classified at all (as opposed to deferring it until we get an IPC or the

236 // load completes), we discard the page text since it won't be needed.

237 if (!classifier_->is_ready()) {

238 DVLOG(2) << "Not starting classification, no Scorer created.";

239 // Keep classifier_page_text_, in case a Scorer is set later.

240 return;

241 }

242

243 if (last_main_frame_transition_ & ui::PAGE_TRANSITION_FORWARD_BACK) {

244 // Skip loads from session history navigation. However, update the

245 // last URL sent to the classifier, so that we'll properly detect

246 // in-page navigations.

247 DVLOG(2) << "Not starting classification for back/forward navigation";

248 last_url_sent_to_classifier_ = last_finished_load_url_;

249 classifier_page_text_.clear(); // we won't need this.

250 have_page_text_ = false;

251 return;

252 }

253

254 GURL stripped_last_load_url(StripRef(last_finished_load_url_));

255 if (stripped_last_load_url == StripRef(last_url_sent_to_classifier_)) {

256 // We've already classified this toplevel URL, so this was likely an

257 // in-page navigation or a subframe navigation. The browser should not

258 // send a StartPhishingDetection IPC in this case.

259 DVLOG(2) << "Toplevel URL is unchanged, not starting classification.";

260 classifier_page_text_.clear(); // we won't need this.

261 have_page_text_ = false;

262 return;

263 }

264

265 if (!have_page_text_) {

266 DVLOG(2) << "Not starting classification, there is no page text ready.";

267 return;

268 }

269

270 if (last_url_received_from_browser_ != stripped_last_load_url) {

271 // The browser has not yet confirmed that this URL should be classified,

272 // so defer classification for now. Note: the ref does not affect

273 // any of the browser's preclassification checks, so we don't require it

274 // to match.

275 DVLOG(2) << "Not starting classification, last url from browser is "

276 << last_url_received_from_browser_ << ", last finished load is "

277 << last_finished_load_url_;

278 // Keep classifier_page_text_, in case the browser notifies us later that

279 // we should classify the URL.

280 return;

281 }

282

283 DVLOG(2) << "Starting classification for " << last_finished_load_url_;

284 last_url_sent_to_classifier_ = last_finished_load_url_;

285 is_classifying_ = true;

286 classifier_->BeginClassification(

287 &classifier_page_text_,

288 base::Bind(&PhishingClassifierDelegate::ClassificationDone,

289 base::Unretained(this)));

290 }

291

292 void PhishingClassifierDelegate::OnDestruct() {

293 delete this;

294 }

295

296 } // namespace safe_browsing

OLD	NEW