Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Fix switch/case formatting Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
6 6
7 #include "base/callback.h" 7 #include "base/callback.h"
8 #include "base/logging.h" 8 #include "base/logging.h"
9 #include "chrome/common/render_messages.h" 9 #include "chrome/common/render_messages.h"
10 #include "chrome/renderer/navigation_state.h" 10 #include "chrome/renderer/navigation_state.h"
11 #include "chrome/renderer/render_view.h" 11 #include "chrome/renderer/render_view.h"
12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" 12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
13 #include "chrome/renderer/safe_browsing/phishing_classifier.h" 13 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" 14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" 15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" 16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"
17 17
18 namespace safe_browsing { 18 namespace safe_browsing {
19 19
20 static GURL StripRef(const GURL& url) {
21 GURL::Replacements replacements;
22 replacements.ClearRef();
23 return url.ReplaceComponents(replacements);
24 }
25
20 PhishingClassifierDelegate::PhishingClassifierDelegate( 26 PhishingClassifierDelegate::PhishingClassifierDelegate(
21 RenderView* render_view, 27 RenderView* render_view,
22 PhishingClassifier* classifier) 28 PhishingClassifier* classifier)
23 : render_view_(render_view), 29 : render_view_(render_view),
24 last_page_id_sent_to_classifier_(-1), 30 last_finished_load_id_(-1),
25 pending_classification_(false) { 31 last_page_id_sent_to_classifier_(-1) {
26 if (!classifier) { 32 if (!classifier) {
27 classifier = new PhishingClassifier(render_view_, 33 classifier = new PhishingClassifier(render_view_,
28 new FeatureExtractorClock()); 34 new FeatureExtractorClock());
29 } 35 }
30 classifier_.reset(classifier); 36 classifier_.reset(classifier);
31 } 37 }
32 38
33 PhishingClassifierDelegate::~PhishingClassifierDelegate() { 39 PhishingClassifierDelegate::~PhishingClassifierDelegate() {
34 CancelPendingClassification(); 40 CancelPendingClassification();
35 } 41 }
36 42
37 void PhishingClassifierDelegate::SetPhishingScorer( 43 void PhishingClassifierDelegate::SetPhishingScorer(
38 const safe_browsing::Scorer* scorer) { 44 const safe_browsing::Scorer* scorer) {
39 classifier_->set_phishing_scorer(scorer); 45 classifier_->set_phishing_scorer(scorer);
46 // Start classifying the current page if all conditions are met.
47 // See MaybeStartClassification() for details.
48 MaybeStartClassification();
49 }
40 50
41 if (pending_classification_) { 51 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {
42 pending_classification_ = false; 52 last_url_received_from_browser_ = StripRef(url);
43 // If we have a pending classificaton, it should always be true that the 53 // Start classifying the current page if all conditions are met.
44 // main frame URL and page id have not changed since we queued the 54 // See MaybeStartClassification() for details.
45 // classification. This is because we stop any pending classification on 55 MaybeStartClassification();
46 // main frame loads in RenderView::didCommitProvisionalLoad().
47 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);
48 DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_);
49 classifier_->BeginClassification(
50 &classifier_page_text_,
51 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
52 }
53 } 56 }
54 57
55 void PhishingClassifierDelegate::CommittedLoadInFrame( 58 void PhishingClassifierDelegate::CommittedLoadInFrame(
56 WebKit::WebFrame* frame) { 59 WebKit::WebFrame* frame) {
57 // A new page is starting to load. Unless the load is a navigation within 60 // A new page is starting to load. Unless the load is a navigation within
58 // the same page, we need to cancel classification since the content will 61 // the same page, we need to cancel classification since we may get an
59 // now be inconsistent with the phishing model. 62 // inconsistent result.
60 NavigationState* state = NavigationState::FromDataSource( 63 NavigationState* state = NavigationState::FromDataSource(
61 frame->dataSource()); 64 frame->dataSource());
62 if (!state->was_within_same_page()) { 65 if (!state->was_within_same_page()) {
63 CancelPendingClassification(); 66 CancelPendingClassification();
64 } 67 }
65 } 68 }
66 69
67 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) { 70 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) {
68 // We check that the page id has incremented so that we don't reclassify 71 last_finished_load_id_ = render_view_->page_id();
69 // pages as the user moves back and forward in session history. Note: we 72 last_finished_load_url_ = StripToplevelUrl();
70 // don't send every page id to the classifier, only those where the toplevel
71 // URL changed.
72 int load_id = render_view_->page_id();
73 if (load_id <= last_page_id_sent_to_classifier_) {
74 return;
75 }
76
77 GURL url_without_ref = StripToplevelUrl();
78 if (url_without_ref == last_url_sent_to_classifier_) {
79 // The toplevle URL is the same, except for the ref.
80 // Update the last page id we sent, but don't trigger a new classification.
81 last_page_id_sent_to_classifier_ = load_id;
82 return;
83 }
84
85 last_url_sent_to_classifier_ = url_without_ref;
86 last_page_id_sent_to_classifier_ = load_id;
87 classifier_page_text_.swap(*page_text); 73 classifier_page_text_.swap(*page_text);
88 74 MaybeStartClassification();
89 if (classifier_->is_ready()) {
90 classifier_->BeginClassification(
91 &classifier_page_text_,
92 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
93 } else {
94 // If there is no phishing classifier yet, we'll begin classification once
95 // SetPhishingScorer() is called by the RenderView.
96 pending_classification_ = true;
97 }
98 } 75 }
99 76
100 void PhishingClassifierDelegate::CancelPendingClassification() { 77 void PhishingClassifierDelegate::CancelPendingClassification() {
101 if (classifier_->is_ready()) { 78 if (classifier_->is_ready()) {
102 classifier_->CancelPendingClassification(); 79 classifier_->CancelPendingClassification();
103 } 80 }
104 classifier_page_text_.clear(); 81 classifier_page_text_.clear();
105 pending_classification_ = false;
106 } 82 }
107 83
108 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, 84 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,
109 double phishy_score) { 85 double phishy_score) {
110 // We no longer need the page text. 86 // We no longer need the page text.
111 classifier_page_text_.clear(); 87 classifier_page_text_.clear();
112 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; 88 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;
113 if (!is_phishy) { 89 if (!is_phishy) {
114 return; 90 return;
115 } 91 }
116 92
117 render_view_->Send(new ViewHostMsg_DetectedPhishingSite( 93 render_view_->Send(new ViewHostMsg_DetectedPhishingSite(
118 render_view_->routing_id(), 94 render_view_->routing_id(),
119 last_url_sent_to_classifier_, 95 last_url_sent_to_classifier_,
120 phishy_score)); 96 phishy_score));
121 } 97 }
122 98
123 GURL PhishingClassifierDelegate::StripToplevelUrl() { 99 GURL PhishingClassifierDelegate::StripToplevelUrl() {
124 GURL toplevel_url = render_view_->webview()->mainFrame()->url(); 100 return StripRef(render_view_->webview()->mainFrame()->url());
125 GURL::Replacements replacements; 101 }
126 replacements.ClearRef(); 102
127 return toplevel_url.ReplaceComponents(replacements); 103 void PhishingClassifierDelegate::MaybeStartClassification() {
104 // We can begin phishing classification when the following conditions are
105 // met:
106 // 1. A Scorer has been created
107 // 2. The browser has sent a StartPhishingDetection message for the current
108 // toplevel URL.
109 // 3. The page has finished loading and the page text has been extracted.
110 // 4. The load is a new navigation (not a session history navigation).
111 // 5. The toplevel URL has not already been classified.
112 //
113 // Note that if we determine that this particular navigation should not be
114 // classified at all (as opposed to deferring it until we get an IPC or the
115 // load completes), we discard the page text since it won't be needed.
116 if (!classifier_->is_ready()) {
117 VLOG(2) << "Not starting classification, no Scorer created.";
118 // Keep classifier_page_text_, in case a Scorer is set later.
119 return;
120 }
121
122 if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) {
123 // Skip loads from session history navigation.
124 VLOG(2) << "Not starting classification, last finished load id is "
125 << last_finished_load_id_ << " but we have classified up to "
126 << "load id " << last_page_id_sent_to_classifier_;
127 classifier_page_text_.clear(); // we won't need this.
128 return;
129 }
130
131 if (last_finished_load_id_ != render_view_->page_id()) {
132 VLOG(2) << "Render view page has changed, not starting classification";
133 classifier_page_text_.clear(); // we won't need this.
134 return;
135 }
136 // If the page id is unchanged, the toplevel URL should also be unchanged.
137 DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_);
138
139 if (last_finished_load_url_ == last_url_sent_to_classifier_) {
140 // We've already classified this toplevel URL, so this was likely an
141 // in-page navigation or a subframe navigation. The browser should not
142 // send a StartPhishingDetection IPC in this case.
143 VLOG(2) << "Toplevel URL is unchanged, not starting classification.";
144 classifier_page_text_.clear(); // we won't need this.
145 return;
146 }
147
lzheng 2011/02/04 20:05:22 Can you add a comment here to explain that this me
Brian Ryner 2011/02/10 01:12:52 Done.
148 if (last_url_received_from_browser_ != last_finished_load_url_) {
149 VLOG(2) << "Not starting classification, last url from browser is "
150 << last_url_received_from_browser_ << ", last finished load is "
151 << last_finished_load_url_;
152 // Keep classifier_page_text_, in case the browser notifies us later that
153 // we should classify the URL.
154 return;
155 }
156
157 VLOG(2) << "Starting classification for " << last_finished_load_url_;
158 last_url_sent_to_classifier_ = last_finished_load_url_;
159 last_page_id_sent_to_classifier_ = last_finished_load_id_;
160 classifier_->BeginClassification(
161 &classifier_page_text_,
162 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
128 } 163 }
129 164
130 } // namespace safe_browsing 165 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698