chrome/renderer/safe_browsing/phishing_classifier_delegate.cc - Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection.

Side by Side Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Address Noe's review comments Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « chrome/renderer/safe_browsing/phishing_classifier_delegate.h ('k') | chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"	5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"

6	6

7 #include <set>	7 #include <set>

8	8

9 #include "base/callback.h"	9 #include "base/callback.h"

10 #include "base/lazy_instance.h"	10 #include "base/lazy_instance.h"

11 #include "base/logging.h"	11 #include "base/logging.h"

12 #include "base/scoped_callback_factory.h"	12 #include "base/scoped_callback_factory.h"

13 #include "chrome/common/render_messages.h"	13 #include "chrome/common/render_messages.h"

14 #include "chrome/renderer/navigation_state.h"	14 #include "chrome/renderer/navigation_state.h"

15 #include "chrome/renderer/render_thread.h"	15 #include "chrome/renderer/render_thread.h"

16 #include "chrome/renderer/render_view.h"	16 #include "chrome/renderer/render_view.h"

17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"	17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"

18 #include "chrome/renderer/safe_browsing/phishing_classifier.h"	18 #include "chrome/renderer/safe_browsing/phishing_classifier.h"

19 #include "chrome/renderer/safe_browsing/scorer.h"	19 #include "chrome/renderer/safe_browsing/scorer.h"

20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"	20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"

21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"	21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"

22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"	22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"

23	23

24 namespace safe_browsing {	24 namespace safe_browsing {

25	25

	26

	27 static GURL StripRef(const GURL& url) {

	28 GURL::Replacements replacements;

	29 replacements.ClearRef();

	30 return url.ReplaceComponents(replacements);

	31 }

	32

26 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates;	33 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates;

27 static base::LazyInstance<PhishingClassifierDelegates>	34 static base::LazyInstance<PhishingClassifierDelegates>

28 g_delegates(base::LINKER_INITIALIZED);	35 g_delegates(base::LINKER_INITIALIZED);

29	36

30 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> >	37 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> >

31 g_phishing_scorer(base::LINKER_INITIALIZED);	38 g_phishing_scorer(base::LINKER_INITIALIZED);

32	39

33 class ScorerCallback {	40 class ScorerCallback {

34 public:	41 public:

35 static Scorer::CreationCallback* CreateCallback() {	42 static Scorer::CreationCallback* CreateCallback() {

(...skipping 32 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
68 safe_browsing::Scorer::CreateFromFile(	75 safe_browsing::Scorer::CreateFromFile(

69 IPC::PlatformFileForTransitToPlatformFile(model_file),	76 IPC::PlatformFileForTransitToPlatformFile(model_file),

70 RenderThread::current()->GetFileThreadMessageLoopProxy(),	77 RenderThread::current()->GetFileThreadMessageLoopProxy(),

71 ScorerCallback::CreateCallback());	78 ScorerCallback::CreateCallback());

72 }	79 }

73	80

74 PhishingClassifierDelegate::PhishingClassifierDelegate(	81 PhishingClassifierDelegate::PhishingClassifierDelegate(

75 RenderView* render_view,	82 RenderView* render_view,

76 PhishingClassifier* classifier)	83 PhishingClassifier* classifier)

77 : RenderViewObserver(render_view),	84 : RenderViewObserver(render_view),

78 last_page_id_sent_to_classifier_(-1),	85 last_finished_load_id_(-1),

79 pending_classification_(false) {	86 last_page_id_sent_to_classifier_(-1) {

80 g_delegates.Get().insert(this);	87 g_delegates.Get().insert(this);

81 if (!classifier) {	88 if (!classifier) {

82 classifier = new PhishingClassifier(render_view,	89 classifier = new PhishingClassifier(render_view,

83 new FeatureExtractorClock());	90 new FeatureExtractorClock());

84 }	91 }

85	92

86 classifier_.reset(classifier);	93 classifier_.reset(classifier);

87	94

88 if (g_phishing_scorer.Get().get())	95 if (g_phishing_scorer.Get().get())

89 SetPhishingScorer(g_phishing_scorer.Get().get());	96 SetPhishingScorer(g_phishing_scorer.Get().get());

90 }	97 }

91	98

92 PhishingClassifierDelegate::~PhishingClassifierDelegate() {	99 PhishingClassifierDelegate::~PhishingClassifierDelegate() {

93 CancelPendingClassification();	100 CancelPendingClassification();

94 g_delegates.Get().erase(this);	101 g_delegates.Get().erase(this);

95 }	102 }

96	103

97 void PhishingClassifierDelegate::SetPhishingScorer(	104 void PhishingClassifierDelegate::SetPhishingScorer(

98 const safe_browsing::Scorer* scorer) {	105 const safe_browsing::Scorer* scorer) {

99 if (!render_view()->webview())	106 if (!render_view()->webview())

100 return; // RenderView is tearing down.	107 return; // RenderView is tearing down.

101	108

102 classifier_->set_phishing_scorer(scorer);	109 classifier_->set_phishing_scorer(scorer);

	110 // Start classifying the current page if all conditions are met.

	111 // See MaybeStartClassification() for details.

	112 MaybeStartClassification();

	113 }

103	114

104 if (pending_classification_) {	115

105 pending_classification_ = false;	116 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {

106 // If we have a pending classificaton, it should always be true that the	117 last_url_received_from_browser_ = StripRef(url);

107 // main frame URL and page id have not changed since we queued the	118 // Start classifying the current page if all conditions are met.

108 // classification. This is because we stop any pending classification on	119 // See MaybeStartClassification() for details.

109 // main frame loads in RenderView::didCommitProvisionalLoad().	120 MaybeStartClassification();

110 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);

111 DCHECK_EQ(render_view()->page_id(), last_page_id_sent_to_classifier_);

112 classifier_->BeginClassification(

113 &classifier_page_text_,

114 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

115 }

116 }	121 }

117	122

118 void PhishingClassifierDelegate::DidCommitProvisionalLoad(	123 void PhishingClassifierDelegate::DidCommitProvisionalLoad(

119 WebKit::WebFrame* frame, bool is_new_navigation) {	124 WebKit::WebFrame* frame, bool is_new_navigation) {

120 // A new page is starting to load. Unless the load is a navigation within	125 // A new page is starting to load. Unless the load is a navigation within

121 // the same page, we need to cancel classification since the content will	126 // the same page, we need to cancel classification since we may get an

122 // now be inconsistent with the phishing model.	127 // inconsistent result.

123 NavigationState* state = NavigationState::FromDataSource(	128 NavigationState* state = NavigationState::FromDataSource(

124 frame->dataSource());	129 frame->dataSource());

125 if (!state->was_within_same_page()) {	130 if (!state->was_within_same_page()) {

126 CancelPendingClassification();	131 CancelPendingClassification();

127 }	132 }

128 }	133 }

129	134

130 void PhishingClassifierDelegate::PageCaptured(const string16& page_text) {	135 void PhishingClassifierDelegate::PageCaptured(const string16& page_text) {

131 // We check that the page id has incremented so that we don't reclassify	136 last_finished_load_id_ = render_view()->page_id();

132 // pages as the user moves back and forward in session history. Note: we	137 last_finished_load_url_ = StripToplevelUrl();

133 // don't send every page id to the classifier, only those where the toplevel

134 // URL changed.

135 int load_id = render_view()->page_id();

136 if (load_id <= last_page_id_sent_to_classifier_) {

137 return;

138 }

139

140 GURL url_without_ref = StripToplevelUrl();

141 if (url_without_ref == last_url_sent_to_classifier_) {

142 // The toplevle URL is the same, except for the ref.

143 // Update the last page id we sent, but don't trigger a new classification.

144 last_page_id_sent_to_classifier_ = load_id;

145 return;

146 }

147

148 last_url_sent_to_classifier_ = url_without_ref;

149 last_page_id_sent_to_classifier_ = load_id;

150 classifier_page_text_ = page_text;	138 classifier_page_text_ = page_text;

151	139 MaybeStartClassification();

152 if (classifier_->is_ready()) {

153 classifier_->BeginClassification(

154 &classifier_page_text_,

155 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

156 } else {

157 // If there is no phishing classifier yet, we'll begin classification once

158 // SetPhishingScorer() is called by the RenderView.

159 pending_classification_ = true;

160 }

161 }	140 }

162	141

163 void PhishingClassifierDelegate::CancelPendingClassification() {	142 void PhishingClassifierDelegate::CancelPendingClassification() {

164 if (classifier_->is_ready()) {	143 if (classifier_->is_ready()) {

165 classifier_->CancelPendingClassification();	144 classifier_->CancelPendingClassification();

166 }	145 }

167 classifier_page_text_.clear();	146 classifier_page_text_.clear();

168 pending_classification_ = false;

169 }	147 }

170	148

171 bool PhishingClassifierDelegate::OnMessageReceived(	149 bool PhishingClassifierDelegate::OnMessageReceived(

172 const IPC::Message& message) {	150 const IPC::Message& message) {

173 /*

174 bool handled = true;	151 bool handled = true;

175 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message)	152 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message)

	153 IPC_MESSAGE_HANDLER(ViewMsg_StartPhishingDetection,

	154 OnStartPhishingDetection)

176 IPC_MESSAGE_UNHANDLED(handled = false)	155 IPC_MESSAGE_UNHANDLED(handled = false)

177 IPC_END_MESSAGE_MAP()	156 IPC_END_MESSAGE_MAP()

178 return handled;	157 return handled;

179 */

180 return false;

181 }	158 }

182	159

183 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,	160 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,

184 double phishy_score) {	161 double phishy_score) {

185 // We no longer need the page text.	162 // We no longer need the page text.

186 classifier_page_text_.clear();	163 classifier_page_text_.clear();

187 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;	164 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;

188 if (!is_phishy) {	165 if (!is_phishy) {

189 return;	166 return;

190 }	167 }

191	168

192 render_view()->Send(new ViewHostMsg_DetectedPhishingSite(	169 render_view()->Send(new ViewHostMsg_DetectedPhishingSite(

193 render_view()->routing_id(),	170 render_view()->routing_id(),

194 last_url_sent_to_classifier_,	171 last_url_sent_to_classifier_,

195 phishy_score));	172 phishy_score));

196 }	173 }

197	174

198 GURL PhishingClassifierDelegate::StripToplevelUrl() {	175 GURL PhishingClassifierDelegate::StripToplevelUrl() {

199 GURL toplevel_url = render_view()->webview()->mainFrame()->url();	176 return StripRef(render_view()->webview()->mainFrame()->url());

200 GURL::Replacements replacements;	177 }

201 replacements.ClearRef();	178

202 return toplevel_url.ReplaceComponents(replacements);	179 void PhishingClassifierDelegate::MaybeStartClassification() {

	180 // We can begin phishing classification when the following conditions are

	181 // met:

	182 // 1. A Scorer has been created

	183 // 2. The browser has sent a StartPhishingDetection message for the current

	184 // toplevel URL.

	185 // 3. The page has finished loading and the page text has been extracted.

	186 // 4. The load is a new navigation (not a session history navigation).

	187 // 5. The toplevel URL has not already been classified.

	188 //

	189 // Note that if we determine that this particular navigation should not be

	190 // classified at all (as opposed to deferring it until we get an IPC or the

	191 // load completes), we discard the page text since it won't be needed.

	192 if (!classifier_->is_ready()) {

	193 VLOG(2) << "Not starting classification, no Scorer created.";

	194 // Keep classifier_page_text_, in case a Scorer is set later.

	195 return;

	196 }

	197

	198 if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) {

	199 // Skip loads from session history navigation.

	200 VLOG(2) << "Not starting classification, last finished load id is "

	201 << last_finished_load_id_ << " but we have classified up to "

	202 << "load id " << last_page_id_sent_to_classifier_;

	203 classifier_page_text_.clear(); // we won't need this.

	204 return;

	205 }

	206

	207 if (last_finished_load_id_ != render_view()->page_id()) {

	208 VLOG(2) << "Render view page has changed, not starting classification";

	209 classifier_page_text_.clear(); // we won't need this.

	210 return;

	211 }

	212 // If the page id is unchanged, the toplevel URL should also be unchanged.

	213 DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_);

	214

	215 if (last_finished_load_url_ == last_url_sent_to_classifier_) {

	216 // We've already classified this toplevel URL, so this was likely an

	217 // in-page navigation or a subframe navigation. The browser should not

	218 // send a StartPhishingDetection IPC in this case.

	219 VLOG(2) << "Toplevel URL is unchanged, not starting classification.";

	220 classifier_page_text_.clear(); // we won't need this.

	221 return;

	222 }

	223

	224 if (last_url_received_from_browser_ != last_finished_load_url_) {

	225 // The browser has not yet confirmed that this URL should be classified,

	226 // so defer classification for now.

	227 VLOG(2) << "Not starting classification, last url from browser is "

	228 << last_url_received_from_browser_ << ", last finished load is "

	229 << last_finished_load_url_;

	230 // Keep classifier_page_text_, in case the browser notifies us later that

	231 // we should classify the URL.

	232 return;

	233 }

	234

	235 VLOG(2) << "Starting classification for " << last_finished_load_url_;

	236 last_url_sent_to_classifier_ = last_finished_load_url_;

	237 last_page_id_sent_to_classifier_ = last_finished_load_id_;

	238 classifier_->BeginClassification(

	239 &classifier_page_text_,

	240 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

203 }	241 }

204	242

205 } // namespace safe_browsing	243 } // namespace safe_browsing

OLD	NEW