chrome/renderer/safe_browsing/phishing_classifier_delegate.cc - Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection.

Side by Side Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fix switch/case formatting Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« chrome/renderer/safe_browsing/phishing_classifier_delegate.h ('K') | « chrome/renderer/safe_browsing/phishing_classifier_delegate.h ('k') | chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"	5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"

6	6

7 #include "base/callback.h"	7 #include "base/callback.h"

8 #include "base/logging.h"	8 #include "base/logging.h"

9 #include "chrome/common/render_messages.h"	9 #include "chrome/common/render_messages.h"

10 #include "chrome/renderer/navigation_state.h"	10 #include "chrome/renderer/navigation_state.h"

11 #include "chrome/renderer/render_view.h"	11 #include "chrome/renderer/render_view.h"

12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"	12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"

13 #include "chrome/renderer/safe_browsing/phishing_classifier.h"	13 #include "chrome/renderer/safe_browsing/phishing_classifier.h"

14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"	14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"

15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"	15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"

16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"	16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"

17	17

18 namespace safe_browsing {	18 namespace safe_browsing {

19	19

	20 static GURL StripRef(const GURL& url) {

	21 GURL::Replacements replacements;

	22 replacements.ClearRef();

	23 return url.ReplaceComponents(replacements);

	24 }

	25

20 PhishingClassifierDelegate::PhishingClassifierDelegate(	26 PhishingClassifierDelegate::PhishingClassifierDelegate(

21 RenderView* render_view,	27 RenderView* render_view,

22 PhishingClassifier* classifier)	28 PhishingClassifier* classifier)

23 : render_view_(render_view),	29 : render_view_(render_view),

24 last_page_id_sent_to_classifier_(-1),	30 last_finished_load_id_(-1),

25 pending_classification_(false) {	31 last_page_id_sent_to_classifier_(-1) {

26 if (!classifier) {	32 if (!classifier) {

27 classifier = new PhishingClassifier(render_view_,	33 classifier = new PhishingClassifier(render_view_,

28 new FeatureExtractorClock());	34 new FeatureExtractorClock());

29 }	35 }

30 classifier_.reset(classifier);	36 classifier_.reset(classifier);

31 }	37 }

32	38

33 PhishingClassifierDelegate::~PhishingClassifierDelegate() {	39 PhishingClassifierDelegate::~PhishingClassifierDelegate() {

34 CancelPendingClassification();	40 CancelPendingClassification();

35 }	41 }

36	42

37 void PhishingClassifierDelegate::SetPhishingScorer(	43 void PhishingClassifierDelegate::SetPhishingScorer(

38 const safe_browsing::Scorer* scorer) {	44 const safe_browsing::Scorer* scorer) {

39 classifier_->set_phishing_scorer(scorer);	45 classifier_->set_phishing_scorer(scorer);

	46 // Start classifying the current page if all conditions are met.

	47 // See MaybeStartClassification() for details.

	48 MaybeStartClassification();

	49 }

40	50

41 if (pending_classification_) {	51 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {

42 pending_classification_ = false;	52 last_url_received_from_browser_ = StripRef(url);

43 // If we have a pending classificaton, it should always be true that the	53 // Start classifying the current page if all conditions are met.

44 // main frame URL and page id have not changed since we queued the	54 // See MaybeStartClassification() for details.

45 // classification. This is because we stop any pending classification on	55 MaybeStartClassification();

46 // main frame loads in RenderView::didCommitProvisionalLoad().

47 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);

48 DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_);

49 classifier_->BeginClassification(

50 &classifier_page_text_,

51 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

52 }

53 }	56 }

54	57

55 void PhishingClassifierDelegate::CommittedLoadInFrame(	58 void PhishingClassifierDelegate::CommittedLoadInFrame(

56 WebKit::WebFrame* frame) {	59 WebKit::WebFrame* frame) {

57 // A new page is starting to load. Unless the load is a navigation within	60 // A new page is starting to load. Unless the load is a navigation within

58 // the same page, we need to cancel classification since the content will	61 // the same page, we need to cancel classification since we may get an

59 // now be inconsistent with the phishing model.	62 // inconsistent result.

60 NavigationState* state = NavigationState::FromDataSource(	63 NavigationState* state = NavigationState::FromDataSource(

61 frame->dataSource());	64 frame->dataSource());

62 if (!state->was_within_same_page()) {	65 if (!state->was_within_same_page()) {

63 CancelPendingClassification();	66 CancelPendingClassification();

64 }	67 }

65 }	68 }

66	69

67 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) {	70 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) {

68 // We check that the page id has incremented so that we don't reclassify	71 last_finished_load_id_ = render_view_->page_id();

69 // pages as the user moves back and forward in session history. Note: we	72 last_finished_load_url_ = StripToplevelUrl();

70 // don't send every page id to the classifier, only those where the toplevel

71 // URL changed.

72 int load_id = render_view_->page_id();

73 if (load_id <= last_page_id_sent_to_classifier_) {

74 return;

75 }

76

77 GURL url_without_ref = StripToplevelUrl();

78 if (url_without_ref == last_url_sent_to_classifier_) {

79 // The toplevle URL is the same, except for the ref.

80 // Update the last page id we sent, but don't trigger a new classification.

81 last_page_id_sent_to_classifier_ = load_id;

82 return;

83 }

84

85 last_url_sent_to_classifier_ = url_without_ref;

86 last_page_id_sent_to_classifier_ = load_id;

87 classifier_page_text_.swap(*page_text);	73 classifier_page_text_.swap(*page_text);

88	74 MaybeStartClassification();

89 if (classifier_->is_ready()) {

90 classifier_->BeginClassification(

91 &classifier_page_text_,

92 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

93 } else {

94 // If there is no phishing classifier yet, we'll begin classification once

95 // SetPhishingScorer() is called by the RenderView.

96 pending_classification_ = true;

97 }

98 }	75 }

99	76

100 void PhishingClassifierDelegate::CancelPendingClassification() {	77 void PhishingClassifierDelegate::CancelPendingClassification() {

101 if (classifier_->is_ready()) {	78 if (classifier_->is_ready()) {

102 classifier_->CancelPendingClassification();	79 classifier_->CancelPendingClassification();

103 }	80 }

104 classifier_page_text_.clear();	81 classifier_page_text_.clear();

105 pending_classification_ = false;

106 }	82 }

107	83

108 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,	84 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,

109 double phishy_score) {	85 double phishy_score) {

110 // We no longer need the page text.	86 // We no longer need the page text.

111 classifier_page_text_.clear();	87 classifier_page_text_.clear();

112 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;	88 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;

113 if (!is_phishy) {	89 if (!is_phishy) {

114 return;	90 return;

115 }	91 }

116	92

117 render_view_->Send(new ViewHostMsg_DetectedPhishingSite(	93 render_view_->Send(new ViewHostMsg_DetectedPhishingSite(

118 render_view_->routing_id(),	94 render_view_->routing_id(),

119 last_url_sent_to_classifier_,	95 last_url_sent_to_classifier_,

120 phishy_score));	96 phishy_score));

121 }	97 }

122	98

123 GURL PhishingClassifierDelegate::StripToplevelUrl() {	99 GURL PhishingClassifierDelegate::StripToplevelUrl() {

124 GURL toplevel_url = render_view_->webview()->mainFrame()->url();	100 return StripRef(render_view_->webview()->mainFrame()->url());

125 GURL::Replacements replacements;	101 }

126 replacements.ClearRef();	102

127 return toplevel_url.ReplaceComponents(replacements);	103 void PhishingClassifierDelegate::MaybeStartClassification() {

	104 // We can begin phishing classification when the following conditions are

	105 // met:

	106 // 1. A Scorer has been created

	107 // 2. The browser has sent a StartPhishingDetection message for the current

	108 // toplevel URL.

	109 // 3. The page has finished loading and the page text has been extracted.

	110 // 4. The load is a new navigation (not a session history navigation).

	111 // 5. The toplevel URL has not already been classified.

	112 //

	113 // Note that if we determine that this particular navigation should not be

	114 // classified at all (as opposed to deferring it until we get an IPC or the

	115 // load completes), we discard the page text since it won't be needed.

	116 if (!classifier_->is_ready()) {

	117 VLOG(2) << "Not starting classification, no Scorer created.";

	118 // Keep classifier_page_text_, in case a Scorer is set later.

	119 return;

	120 }

	121

	122 if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) {

	123 // Skip loads from session history navigation.

	124 VLOG(2) << "Not starting classification, last finished load id is "

	125 << last_finished_load_id_ << " but we have classified up to "

	126 << "load id " << last_page_id_sent_to_classifier_;

	127 classifier_page_text_.clear(); // we won't need this.

	128 return;

	129 }

	130

	131 if (last_finished_load_id_ != render_view_->page_id()) {

	132 VLOG(2) << "Render view page has changed, not starting classification";

	133 classifier_page_text_.clear(); // we won't need this.

	134 return;

	135 }

	136 // If the page id is unchanged, the toplevel URL should also be unchanged.

	137 DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_);

	138

	139 if (last_finished_load_url_ == last_url_sent_to_classifier_) {

	140 // We've already classified this toplevel URL, so this was likely an

	141 // in-page navigation or a subframe navigation. The browser should not

	142 // send a StartPhishingDetection IPC in this case.

	143 VLOG(2) << "Toplevel URL is unchanged, not starting classification.";

	144 classifier_page_text_.clear(); // we won't need this.

	145 return;

	146 }

	147
	lzheng 2011/02/04 20:05:22 Can you add a comment here to explain that this me Can you add a comment here to explain that this means browser has not confirmed this url should be classified yet? Brian Ryner 2011/02/10 01:12:52 Done. Show quoted text On 2011/02/04 20:05:22, lzheng wrote: > Can you add a comment here to explain that this means browser has not confirmed > this url should be classified yet? Done.
	148 if (last_url_received_from_browser_ != last_finished_load_url_) {

	149 VLOG(2) << "Not starting classification, last url from browser is "

	150 << last_url_received_from_browser_ << ", last finished load is "

	151 << last_finished_load_url_;

	152 // Keep classifier_page_text_, in case the browser notifies us later that

	153 // we should classify the URL.

	154 return;

	155 }

	156

	157 VLOG(2) << "Starting classification for " << last_finished_load_url_;

	158 last_url_sent_to_classifier_ = last_finished_load_url_;

	159 last_page_id_sent_to_classifier_ = last_finished_load_id_;

	160 classifier_->BeginClassification(

	161 &classifier_page_text_,

	162 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

128 }	163 }

129	164

130 } // namespace safe_browsing	165 } // namespace safe_browsing

OLD	NEW