Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
6 6
7 #include "base/callback.h" 7 #include "base/callback.h"
8 #include "base/logging.h" 8 #include "base/logging.h"
9 #include "chrome/common/render_messages.h" 9 #include "chrome/common/render_messages.h"
10 #include "chrome/renderer/navigation_state.h" 10 #include "chrome/renderer/navigation_state.h"
11 #include "chrome/renderer/render_view.h" 11 #include "chrome/renderer/render_view.h"
12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" 12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
13 #include "chrome/renderer/safe_browsing/phishing_classifier.h" 13 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" 14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" 15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" 16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"
17 17
18 namespace safe_browsing { 18 namespace safe_browsing {
19 19
20 namespace {
21 GURL StripRef(const GURL& url) {
22 GURL::Replacements replacements;
23 replacements.ClearRef();
24 return url.ReplaceComponents(replacements);
25 }
26 }
27
20 PhishingClassifierDelegate::PhishingClassifierDelegate( 28 PhishingClassifierDelegate::PhishingClassifierDelegate(
21 RenderView* render_view, 29 RenderView* render_view,
22 PhishingClassifier* classifier) 30 PhishingClassifier* classifier)
23 : render_view_(render_view), 31 : render_view_(render_view),
24 last_page_id_sent_to_classifier_(-1), 32 last_finished_load_id_(-1),
25 pending_classification_(false) { 33 last_page_id_sent_to_classifier_(-1) {
26 if (!classifier) { 34 if (!classifier) {
27 classifier = new PhishingClassifier(render_view_, 35 classifier = new PhishingClassifier(render_view_,
28 new FeatureExtractorClock()); 36 new FeatureExtractorClock());
29 } 37 }
30 classifier_.reset(classifier); 38 classifier_.reset(classifier);
31 } 39 }
32 40
33 PhishingClassifierDelegate::~PhishingClassifierDelegate() { 41 PhishingClassifierDelegate::~PhishingClassifierDelegate() {
34 CancelPendingClassification(); 42 CancelPendingClassification();
35 } 43 }
36 44
37 void PhishingClassifierDelegate::SetPhishingScorer( 45 void PhishingClassifierDelegate::SetPhishingScorer(
38 const safe_browsing::Scorer* scorer) { 46 const safe_browsing::Scorer* scorer) {
39 classifier_->set_phishing_scorer(scorer); 47 classifier_->set_phishing_scorer(scorer);
48 // Start classifying the current page if all conditions are met.
49 // See MaybeStartClassification() for details.
50 MaybeStartClassification();
51 }
40 52
41 if (pending_classification_) { 53 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {
42 pending_classification_ = false; 54 last_url_received_from_browser_ = StripRef(url);
43 // If we have a pending classificaton, it should always be true that the 55 // Start classifying the current page if all conditions are met.
44 // main frame URL and page id have not changed since we queued the 56 // See MaybeStartClassification() for details.
45 // classification. This is because we stop any pending classification on 57 MaybeStartClassification();
46 // main frame loads in RenderView::didCommitProvisionalLoad().
47 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);
48 DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_);
49 classifier_->BeginClassification(
50 &classifier_page_text_,
51 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
52 }
53 } 58 }
54 59
55 void PhishingClassifierDelegate::CommittedLoadInFrame( 60 void PhishingClassifierDelegate::CommittedLoadInFrame(
56 WebKit::WebFrame* frame) { 61 WebKit::WebFrame* frame) {
57 // A new page is starting to load. Unless the load is a navigation within 62 // A new page is starting to load. Unless the load is a navigation within
58 // the same page, we need to cancel classification since the content will 63 // the same page, we need to cancel classification since we may get an
59 // now be inconsistent with the phishing model. 64 // inconsistent result.
60 NavigationState* state = NavigationState::FromDataSource( 65 NavigationState* state = NavigationState::FromDataSource(
61 frame->dataSource()); 66 frame->dataSource());
62 if (!state->was_within_same_page()) { 67 if (!state->was_within_same_page()) {
63 CancelPendingClassification(); 68 CancelPendingClassification();
64 } 69 }
65 } 70 }
66 71
67 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) { 72 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) {
68 // We check that the page id has incremented so that we don't reclassify 73 last_finished_load_id_ = render_view_->page_id();
69 // pages as the user moves back and forward in session history. Note: we 74 last_finished_load_url_ = StripToplevelUrl();
70 // don't send every page id to the classifier, only those where the toplevel
71 // URL changed.
72 int load_id = render_view_->page_id();
73 if (load_id <= last_page_id_sent_to_classifier_) {
74 return;
75 }
76
77 GURL url_without_ref = StripToplevelUrl();
78 if (url_without_ref == last_url_sent_to_classifier_) {
79 // The toplevle URL is the same, except for the ref.
80 // Update the last page id we sent, but don't trigger a new classification.
81 last_page_id_sent_to_classifier_ = load_id;
82 return;
83 }
84
85 last_url_sent_to_classifier_ = url_without_ref;
86 last_page_id_sent_to_classifier_ = load_id;
87 classifier_page_text_.swap(*page_text); 75 classifier_page_text_.swap(*page_text);
88 76 MaybeStartClassification();
89 if (classifier_->is_ready()) {
90 classifier_->BeginClassification(
91 &classifier_page_text_,
92 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
93 } else {
94 // If there is no phishing classifier yet, we'll begin classification once
95 // SetPhishingScorer() is called by the RenderView.
96 pending_classification_ = true;
97 }
98 } 77 }
99 78
100 void PhishingClassifierDelegate::CancelPendingClassification() { 79 void PhishingClassifierDelegate::CancelPendingClassification() {
101 if (classifier_->is_ready()) { 80 if (classifier_->is_ready()) {
102 classifier_->CancelPendingClassification(); 81 classifier_->CancelPendingClassification();
103 } 82 }
104 classifier_page_text_.clear(); 83 classifier_page_text_.clear();
105 pending_classification_ = false;
106 } 84 }
107 85
108 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, 86 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,
109 double phishy_score) { 87 double phishy_score) {
110 // We no longer need the page text. 88 // We no longer need the page text.
111 classifier_page_text_.clear(); 89 classifier_page_text_.clear();
112 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; 90 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;
113 if (!is_phishy) { 91 if (!is_phishy) {
114 return; 92 return;
115 } 93 }
116 94
117 render_view_->Send(new ViewHostMsg_DetectedPhishingSite( 95 render_view_->Send(new ViewHostMsg_DetectedPhishingSite(
118 render_view_->routing_id(), 96 render_view_->routing_id(),
119 last_url_sent_to_classifier_, 97 last_url_sent_to_classifier_,
120 phishy_score)); 98 phishy_score));
121 } 99 }
122 100
123 GURL PhishingClassifierDelegate::StripToplevelUrl() { 101 GURL PhishingClassifierDelegate::StripToplevelUrl() {
124 GURL toplevel_url = render_view_->webview()->mainFrame()->url(); 102 return StripRef(render_view_->webview()->mainFrame()->url());
125 GURL::Replacements replacements; 103 }
126 replacements.ClearRef(); 104
127 return toplevel_url.ReplaceComponents(replacements); 105 void PhishingClassifierDelegate::MaybeStartClassification() {
106 // We can begin phishing classification when the following conditions are
107 // met:
108 // 1. A Scorer has been created
109 // 2. The browser has sent a StartPhishingDetection message for the current
110 // toplevel URL.
111 // 3. The page has finished loading and the page text has been extracted.
112 // 4. The load is a new navigation (not a session history navigation).
113 // 5. The toplevel URL has not already been classified.
114 if (!classifier_->is_ready()) {
115 VLOG(2) << "Not starting classification, no Scorer created.";
116 // Keep classifier_page_text_, in case a Scorer is set later.
117 return;
118 }
119
120 if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) {
121 // Skip loads from session history navigation.
122 VLOG(2) << "Not starting classification, last finished load id is "
123 << last_finished_load_id_ << " but we have classified up to "
124 << "load id " << last_page_id_sent_to_classifier_;
125 classifier_page_text_.clear(); // we won't need this.
126 return;
127 }
128
129 if (last_finished_load_id_ != render_view_->page_id()) {
130 VLOG(2) << "Render view page has changed, not starting classification";
131 classifier_page_text_.clear(); // we won't need this.
132 return;
133 }
134 // If the page id is unchanged, the toplevel URL should also be unchanged.
135 DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_);
136
137 if (last_url_received_from_browser_ != last_finished_load_url_) {
138 VLOG(2) << "Not starting classification, last url from browser is "
139 << last_url_received_from_browser_ << ", last finished load is "
140 << last_finished_load_url_;
141 // Keep classifier_page_text_, in case the browser notifies us later that
142 // we should classify the URL.
143 return;
144 }
145
146 if (last_finished_load_url_ == last_url_sent_to_classifier_) {
147 // We've already classified this toplevel URL, so this was likely an
148 // in-page navigation or a subframe navigation. Don't classify the page a
149 // second time, but update the last classified page id for the session
150 // history check above.
151 VLOG(2) << "Toplevel URL is unchanged, not starting classification "
152 << "but updating last classified page id to "
153 << last_finished_load_id_;
154 last_page_id_sent_to_classifier_ = last_finished_load_id_;
155 classifier_page_text_.clear(); // we won't need this.
156 return;
157 }
158
159 VLOG(2) << "Starting classification for " << last_finished_load_url_;
160 last_url_sent_to_classifier_ = last_finished_load_url_;
161 last_page_id_sent_to_classifier_ = last_finished_load_id_;
162 classifier_->BeginClassification(
163 &classifier_page_text_,
164 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
128 } 165 }
129 166
130 } // namespace safe_browsing 167 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698