chrome/renderer/safe_browsing/phishing_classifier_delegate.cc - Issue 6250176: Make RenderView not have to know about how PhishingClassifierDelegate.

Side by Side Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 6250176: Make RenderView not have to know about how PhishingClassifierDelegate. (Closed) Base URL: svn://chrome-svn/chrome/trunk/src/

Patch Set: sync Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

« no previous file with comments | « chrome/renderer/safe_browsing/phishing_classifier_delegate.h ('k') | chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc » ('j') | chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

OLD	NEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.	1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style license that can be	2 // Use of this source code is governed by a BSD-style license that can be

3 // found in the LICENSE file.	3 // found in the LICENSE file.

4	4

5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"	5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"

6	6

	7 #include <set>

	8

7 #include "base/callback.h"	9 #include "base/callback.h"

	10 #include "base/lazy_instance.h"

8 #include "base/logging.h"	11 #include "base/logging.h"

	12 #include "base/scoped_callback_factory.h"

9 #include "chrome/common/render_messages.h"	13 #include "chrome/common/render_messages.h"

10 #include "chrome/renderer/navigation_state.h"	14 #include "chrome/renderer/navigation_state.h"

	15 #include "chrome/renderer/render_thread.h"

11 #include "chrome/renderer/render_view.h"	16 #include "chrome/renderer/render_view.h"

12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"	17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"

13 #include "chrome/renderer/safe_browsing/phishing_classifier.h"	18 #include "chrome/renderer/safe_browsing/phishing_classifier.h"

	19 #include "chrome/renderer/safe_browsing/scorer.h"

14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"	20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"

15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"	21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"

16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"	22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"

17	23

18 namespace safe_browsing {	24 namespace safe_browsing {

19	25

	26 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates;

	27 static base::LazyInstance<PhishingClassifierDelegates>

	28 g_delegates(base::LINKER_INITIALIZED);

	29

	30 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> >

	31 g_phishing_scorer(base::LINKER_INITIALIZED);

	32

	33 class ScorerCallback {

	34 public:

	35 static Scorer::CreationCallback* CreateCallback() {

	36 ScorerCallback* scorer_callback = new ScorerCallback();

	37 return scorer_callback->callback_factory_->NewCallback(

	38 &ScorerCallback::PhishingScorerCreated);

	39 }

	40

	41 private:

	42 ScorerCallback() {

	43 callback_factory_.reset(

	44 new base::ScopedCallbackFactory<ScorerCallback>(this));

	45 }

	46

	47 // Callback to be run once the phishing Scorer has been created.

	48 void PhishingScorerCreated(safe_browsing::Scorer* scorer) {

	49 if (!scorer) {

	50 DLOG(ERROR) << "Unable to create a PhishingScorer - corrupt model?";

	51 return;

	52 }

	53

	54 g_phishing_scorer.Get().reset(scorer);

	55

	56 PhishingClassifierDelegates::iterator i;

	57 for (i = g_delegates.Get().begin(); i != g_delegates.Get().end(); ++i)

	58 (*i)->SetPhishingScorer(scorer);

	59

	60 delete this;

	61 }

	62

	63 scoped_ptr<base::ScopedCallbackFactory<ScorerCallback> > callback_factory_;

	64 };

	65

	66 void PhishingClassifierDelegate::SetPhishingModel(

	67 IPC::PlatformFileForTransit model_file) {

	68 safe_browsing::Scorer::CreateFromFile(

	69 IPC::PlatformFileForTransitToPlatformFile(model_file),

	70 RenderThread::current()->GetFileThreadMessageLoopProxy(),

	71 ScorerCallback::CreateCallback());

	72 }

	73

20 PhishingClassifierDelegate::PhishingClassifierDelegate(	74 PhishingClassifierDelegate::PhishingClassifierDelegate(

21 RenderView* render_view,	75 RenderView* render_view,

22 PhishingClassifier* classifier)	76 PhishingClassifier* classifier)

23 : render_view_(render_view),	77 : RenderViewObserver(render_view),

24 last_page_id_sent_to_classifier_(-1),	78 last_page_id_sent_to_classifier_(-1),

25 pending_classification_(false) {	79 pending_classification_(false) {

	80 g_delegates.Get().insert(this);

26 if (!classifier) {	81 if (!classifier) {

27 classifier = new PhishingClassifier(render_view_,	82 classifier = new PhishingClassifier(render_view,

28 new FeatureExtractorClock());	83 new FeatureExtractorClock());

29 }	84 }

	85

	86 if (g_phishing_scorer.Get().get())

	87 SetPhishingScorer(g_phishing_scorer.Get().get());
	Brian Ryner 2011/02/07 20:08:19 I think this needs to be moved down to below where I think this needs to be moved down to below where classifier_ is set on line 89. Otherwise, classifier_ will still be NULL when SetPhishingScorer tries to access it on line 102. I'm surprised none of the test cases caught this, I'll make sure to add one as a follow-up (or feel free to add it yourself if you'd like)[ jam 2011/02/07 20:24:49 Done. Show quoted text On 2011/02/07 20:08:19, Brian Ryner wrote: > I think this needs to be moved down to below where classifier_ is set on line > 89. Otherwise, classifier_ will still be NULL when SetPhishingScorer tries to > access it on line 102. I'm surprised none of the test cases caught this, I'll > make sure to add one as a follow-up (or feel free to add it yourself if you'd > like)[ Done.
	88

30 classifier_.reset(classifier);	89 classifier_.reset(classifier);

31 }	90 }

32	91

33 PhishingClassifierDelegate::~PhishingClassifierDelegate() {	92 PhishingClassifierDelegate::~PhishingClassifierDelegate() {

34 CancelPendingClassification();	93 CancelPendingClassification();

	94 g_delegates.Get().erase(this);

35 }	95 }

36	96

37 void PhishingClassifierDelegate::SetPhishingScorer(	97 void PhishingClassifierDelegate::SetPhishingScorer(

38 const safe_browsing::Scorer* scorer) {	98 const safe_browsing::Scorer* scorer) {

	99 if (!render_view()->webview())

	100 return; // RenderView is tearing down.

	101

39 classifier_->set_phishing_scorer(scorer);	102 classifier_->set_phishing_scorer(scorer);

40	103

41 if (pending_classification_) {	104 if (pending_classification_) {

42 pending_classification_ = false;	105 pending_classification_ = false;

43 // If we have a pending classificaton, it should always be true that the	106 // If we have a pending classificaton, it should always be true that the

44 // main frame URL and page id have not changed since we queued the	107 // main frame URL and page id have not changed since we queued the

45 // classification. This is because we stop any pending classification on	108 // classification. This is because we stop any pending classification on

46 // main frame loads in RenderView::didCommitProvisionalLoad().	109 // main frame loads in RenderView::didCommitProvisionalLoad().

47 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);	110 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);

48 DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_);	111 DCHECK_EQ(render_view()->page_id(), last_page_id_sent_to_classifier_);

49 classifier_->BeginClassification(	112 classifier_->BeginClassification(

50 &classifier_page_text_,	113 &classifier_page_text_,

51 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));	114 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

52 }	115 }

53 }	116 }

54	117

55 void PhishingClassifierDelegate::CommittedLoadInFrame(	118 void PhishingClassifierDelegate::DidCommitProvisionalLoad(

56 WebKit::WebFrame* frame) {	119 WebKit::WebFrame* frame, bool is_new_navigation) {

	120 if (!is_new_navigation)

	121 return;

	122

57 // A new page is starting to load. Unless the load is a navigation within	123 // A new page is starting to load. Unless the load is a navigation within

58 // the same page, we need to cancel classification since the content will	124 // the same page, we need to cancel classification since the content will

59 // now be inconsistent with the phishing model.	125 // now be inconsistent with the phishing model.

60 NavigationState* state = NavigationState::FromDataSource(	126 NavigationState* state = NavigationState::FromDataSource(

61 frame->dataSource());	127 frame->dataSource());

62 if (!state->was_within_same_page()) {	128 if (!state->was_within_same_page()) {

63 CancelPendingClassification();	129 CancelPendingClassification();

64 }	130 }

65 }	131 }

66	132

67 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) {	133 void PhishingClassifierDelegate::PageCaptured(const string16& page_text) {

68 // We check that the page id has incremented so that we don't reclassify	134 // We check that the page id has incremented so that we don't reclassify

69 // pages as the user moves back and forward in session history. Note: we	135 // pages as the user moves back and forward in session history. Note: we

70 // don't send every page id to the classifier, only those where the toplevel	136 // don't send every page id to the classifier, only those where the toplevel

71 // URL changed.	137 // URL changed.

72 int load_id = render_view_->page_id();	138 int load_id = render_view()->page_id();

73 if (load_id <= last_page_id_sent_to_classifier_) {	139 if (load_id <= last_page_id_sent_to_classifier_) {

74 return;	140 return;

75 }	141 }

76	142

77 GURL url_without_ref = StripToplevelUrl();	143 GURL url_without_ref = StripToplevelUrl();
	Brian Ryner 2011/02/07 20:08:19 Would it ever be possible for this notification to Would it ever be possible for this notification to happen after the WebView has been destroyed? If so, we would crash trying to access the WebView in StripToplevelUrl(). jam 2011/02/07 20:24:49 I don't think this would happen. But if I'm wrong Show quoted text On 2011/02/07 20:08:19, Brian Ryner wrote: > Would it ever be possible for this notification to happen after the WebView has > been destroyed? If so, we would crash trying to access the WebView in > StripToplevelUrl(). I don't think this would happen. But if I'm wrong, then it'll be easy to see from the crash dumps. I don't think I changed the logic here from before, so it should be the same.
78 if (url_without_ref == last_url_sent_to_classifier_) {	144 if (url_without_ref == last_url_sent_to_classifier_) {

79 // The toplevle URL is the same, except for the ref.	145 // The toplevle URL is the same, except for the ref.

80 // Update the last page id we sent, but don't trigger a new classification.	146 // Update the last page id we sent, but don't trigger a new classification.

81 last_page_id_sent_to_classifier_ = load_id;	147 last_page_id_sent_to_classifier_ = load_id;

82 return;	148 return;

83 }	149 }

84	150

85 last_url_sent_to_classifier_ = url_without_ref;	151 last_url_sent_to_classifier_ = url_without_ref;

86 last_page_id_sent_to_classifier_ = load_id;	152 last_page_id_sent_to_classifier_ = load_id;

87 classifier_page_text_.swap(*page_text);	153 classifier_page_text_ = page_text;
	Brian Ryner 2011/02/07 20:08:19 As I mentioned to you offline, I'd set this up as As I mentioned to you offline, I'd set this up as a swap() to avoid copying a potentially large string (up to 64KB). It still seems preferable to avoid this... what do you think? jam 2011/02/07 20:24:49 Yeah, I thought about it. But I decided to skip t Show quoted text On 2011/02/07 20:08:19, Brian Ryner wrote: > As I mentioned to you offline, I'd set this up as a swap() to avoid copying a > potentially large string (up to 64KB). It still seems preferable to avoid > this... what do you think? Yeah, I thought about it. But I decided to skip this optimization because it breaks the abstraction between RenderView and its observers. It's preferable to not add constraints on the ordering of observers. I think if profiling shows that this is needed, we can reconsider, but until then best to keep the API simple. Brian Ryner 2011/02/07 21:39:41 Ok, sounds reasonable. Show quoted text On 2011/02/07 20:24:49, John Abd-El-Malek wrote: > On 2011/02/07 20:08:19, Brian Ryner wrote: > > As I mentioned to you offline, I'd set this up as a swap() to avoid copying a > > potentially large string (up to 64KB). It still seems preferable to avoid > > this... what do you think? > > Yeah, I thought about it. But I decided to skip this optimization because it > breaks the abstraction between RenderView and its observers. It's preferable to > not add constraints on the ordering of observers. I think if profiling shows > that this is needed, we can reconsider, but until then best to keep the API > simple. Ok, sounds reasonable.
88	154

89 if (classifier_->is_ready()) {	155 if (classifier_->is_ready()) {

90 classifier_->BeginClassification(	156 classifier_->BeginClassification(

91 &classifier_page_text_,	157 &classifier_page_text_,

92 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));	158 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

93 } else {	159 } else {

94 // If there is no phishing classifier yet, we'll begin classification once	160 // If there is no phishing classifier yet, we'll begin classification once

95 // SetPhishingScorer() is called by the RenderView.	161 // SetPhishingScorer() is called by the RenderView.

96 pending_classification_ = true;	162 pending_classification_ = true;

97 }	163 }

98 }	164 }

99	165

100 void PhishingClassifierDelegate::CancelPendingClassification() {	166 void PhishingClassifierDelegate::CancelPendingClassification() {

101 if (classifier_->is_ready()) {	167 if (classifier_->is_ready()) {

102 classifier_->CancelPendingClassification();	168 classifier_->CancelPendingClassification();

103 }	169 }

104 classifier_page_text_.clear();	170 classifier_page_text_.clear();

105 pending_classification_ = false;	171 pending_classification_ = false;

106 }	172 }

107	173

	174 bool PhishingClassifierDelegate::OnMessageReceived(

	175 const IPC::Message& message) {

	176 /*
	Brian Ryner 2011/02/07 20:08:19 Did you mean to leave this commented-out code here Did you mean to leave this commented-out code here? jam 2011/02/07 20:24:49 yep. I can't uncomment it until a message is adde Show quoted text On 2011/02/07 20:08:19, Brian Ryner wrote: > Did you mean to leave this commented-out code here? yep. I can't uncomment it until a message is added, which is awaiting http://codereview.chromium.org/6398001/ Brian Ryner 2011/02/07 21:39:41 Ok. If you end up submitting this first, I'll mer Show quoted text On 2011/02/07 20:24:49, John Abd-El-Malek wrote: > On 2011/02/07 20:08:19, Brian Ryner wrote: > > Did you mean to leave this commented-out code here? > > yep. I can't uncomment it until a message is added, which is awaiting > http://codereview.chromium.org/6398001/ Ok. If you end up submitting this first, I'll merge it into my change.
	177 bool handled = true;

	178 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message)

	179 IPC_MESSAGE_UNHANDLED(handled = false)

	180 IPC_END_MESSAGE_MAP()

	181 return handled;

	182 */

	183 return false;

	184 }

	185

108 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,	186 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,

109 double phishy_score) {	187 double phishy_score) {

110 // We no longer need the page text.	188 // We no longer need the page text.

111 classifier_page_text_.clear();	189 classifier_page_text_.clear();

112 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;	190 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;

113 if (!is_phishy) {	191 if (!is_phishy) {

114 return;	192 return;

115 }	193 }

116	194

117 render_view_->Send(new ViewHostMsg_DetectedPhishingSite(	195 render_view()->Send(new ViewHostMsg_DetectedPhishingSite(

118 render_view_->routing_id(),	196 render_view()->routing_id(),

119 last_url_sent_to_classifier_,	197 last_url_sent_to_classifier_,

120 phishy_score));	198 phishy_score));

121 }	199 }

122	200

123 GURL PhishingClassifierDelegate::StripToplevelUrl() {	201 GURL PhishingClassifierDelegate::StripToplevelUrl() {

124 GURL toplevel_url = render_view_->webview()->mainFrame()->url();	202 GURL toplevel_url = render_view()->webview()->mainFrame()->url();

125 GURL::Replacements replacements;	203 GURL::Replacements replacements;

126 replacements.ClearRef();	204 replacements.ClearRef();

127 return toplevel_url.ReplaceComponents(replacements);	205 return toplevel_url.ReplaceComponents(replacements);

128 }	206 }

129	207

130 } // namespace safe_browsing	208 } // namespace safe_browsing

OLD	NEW