Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(545)

Side by Side Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Merge to trunk and address review comments Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h"
6 6
7 #include <set> 7 #include <set>
8 8
9 #include "base/callback.h" 9 #include "base/callback.h"
10 #include "base/lazy_instance.h" 10 #include "base/lazy_instance.h"
11 #include "base/logging.h" 11 #include "base/logging.h"
12 #include "base/scoped_callback_factory.h" 12 #include "base/scoped_callback_factory.h"
13 #include "chrome/common/render_messages.h" 13 #include "chrome/common/render_messages.h"
14 #include "chrome/renderer/navigation_state.h" 14 #include "chrome/renderer/navigation_state.h"
15 #include "chrome/renderer/render_thread.h" 15 #include "chrome/renderer/render_thread.h"
16 #include "chrome/renderer/render_view.h" 16 #include "chrome/renderer/render_view.h"
17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" 17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
18 #include "chrome/renderer/safe_browsing/phishing_classifier.h" 18 #include "chrome/renderer/safe_browsing/phishing_classifier.h"
19 #include "chrome/renderer/safe_browsing/scorer.h" 19 #include "chrome/renderer/safe_browsing/scorer.h"
20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" 21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h"
22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" 22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"
23 23
24 namespace safe_browsing { 24 namespace safe_browsing {
25 25
26
27 static GURL StripRef(const GURL& url) {
28 GURL::Replacements replacements;
29 replacements.ClearRef();
30 return url.ReplaceComponents(replacements);
31 }
32
26 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates; 33 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates;
27 static base::LazyInstance<PhishingClassifierDelegates> 34 static base::LazyInstance<PhishingClassifierDelegates>
28 g_delegates(base::LINKER_INITIALIZED); 35 g_delegates(base::LINKER_INITIALIZED);
29 36
30 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> > 37 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> >
31 g_phishing_scorer(base::LINKER_INITIALIZED); 38 g_phishing_scorer(base::LINKER_INITIALIZED);
32 39
33 class ScorerCallback { 40 class ScorerCallback {
34 public: 41 public:
35 static Scorer::CreationCallback* CreateCallback() { 42 static Scorer::CreationCallback* CreateCallback() {
(...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after
68 safe_browsing::Scorer::CreateFromFile( 75 safe_browsing::Scorer::CreateFromFile(
69 IPC::PlatformFileForTransitToPlatformFile(model_file), 76 IPC::PlatformFileForTransitToPlatformFile(model_file),
70 RenderThread::current()->GetFileThreadMessageLoopProxy(), 77 RenderThread::current()->GetFileThreadMessageLoopProxy(),
71 ScorerCallback::CreateCallback()); 78 ScorerCallback::CreateCallback());
72 } 79 }
73 80
74 PhishingClassifierDelegate::PhishingClassifierDelegate( 81 PhishingClassifierDelegate::PhishingClassifierDelegate(
75 RenderView* render_view, 82 RenderView* render_view,
76 PhishingClassifier* classifier) 83 PhishingClassifier* classifier)
77 : RenderViewObserver(render_view), 84 : RenderViewObserver(render_view),
78 last_page_id_sent_to_classifier_(-1), 85 last_finished_load_id_(-1),
79 pending_classification_(false) { 86 last_page_id_sent_to_classifier_(-1) {
80 g_delegates.Get().insert(this); 87 g_delegates.Get().insert(this);
81 if (!classifier) { 88 if (!classifier) {
82 classifier = new PhishingClassifier(render_view, 89 classifier = new PhishingClassifier(render_view,
83 new FeatureExtractorClock()); 90 new FeatureExtractorClock());
84 } 91 }
85 92
86 classifier_.reset(classifier); 93 classifier_.reset(classifier);
87 94
88 if (g_phishing_scorer.Get().get()) 95 if (g_phishing_scorer.Get().get())
89 SetPhishingScorer(g_phishing_scorer.Get().get()); 96 SetPhishingScorer(g_phishing_scorer.Get().get());
90 } 97 }
91 98
92 PhishingClassifierDelegate::~PhishingClassifierDelegate() { 99 PhishingClassifierDelegate::~PhishingClassifierDelegate() {
93 CancelPendingClassification(); 100 CancelPendingClassification();
94 g_delegates.Get().erase(this); 101 g_delegates.Get().erase(this);
95 } 102 }
96 103
97 void PhishingClassifierDelegate::SetPhishingScorer( 104 void PhishingClassifierDelegate::SetPhishingScorer(
98 const safe_browsing::Scorer* scorer) { 105 const safe_browsing::Scorer* scorer) {
99 if (!render_view()->webview()) 106 if (!render_view()->webview())
100 return; // RenderView is tearing down. 107 return; // RenderView is tearing down.
101 108
102 classifier_->set_phishing_scorer(scorer); 109 classifier_->set_phishing_scorer(scorer);
110 // Start classifying the current page if all conditions are met.
111 // See MaybeStartClassification() for details.
112 MaybeStartClassification();
113 }
103 114
104 if (pending_classification_) { 115
105 pending_classification_ = false; 116 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {
106 // If we have a pending classificaton, it should always be true that the 117 last_url_received_from_browser_ = StripRef(url);
107 // main frame URL and page id have not changed since we queued the 118 // Start classifying the current page if all conditions are met.
108 // classification. This is because we stop any pending classification on 119 // See MaybeStartClassification() for details.
109 // main frame loads in RenderView::didCommitProvisionalLoad(). 120 MaybeStartClassification();
110 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);
111 DCHECK_EQ(render_view()->page_id(), last_page_id_sent_to_classifier_);
112 classifier_->BeginClassification(
113 &classifier_page_text_,
114 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
115 }
116 } 121 }
117 122
118 void PhishingClassifierDelegate::DidCommitProvisionalLoad( 123 void PhishingClassifierDelegate::DidCommitProvisionalLoad(
119 WebKit::WebFrame* frame, bool is_new_navigation) { 124 WebKit::WebFrame* frame, bool is_new_navigation) {
120 // A new page is starting to load. Unless the load is a navigation within 125 // A new page is starting to load. Unless the load is a navigation within
121 // the same page, we need to cancel classification since the content will 126 // the same page, we need to cancel classification since we may get an
122 // now be inconsistent with the phishing model. 127 // inconsistent result.
123 NavigationState* state = NavigationState::FromDataSource( 128 NavigationState* state = NavigationState::FromDataSource(
124 frame->dataSource()); 129 frame->dataSource());
125 if (!state->was_within_same_page()) { 130 if (!state->was_within_same_page()) {
126 CancelPendingClassification(); 131 CancelPendingClassification();
127 } 132 }
128 } 133 }
129 134
130 void PhishingClassifierDelegate::PageCaptured(const string16& page_text) { 135 void PhishingClassifierDelegate::PageCaptured(const string16& page_text) {
131 // We check that the page id has incremented so that we don't reclassify 136 last_finished_load_id_ = render_view()->page_id();
132 // pages as the user moves back and forward in session history. Note: we 137 last_finished_load_url_ = StripToplevelUrl();
133 // don't send every page id to the classifier, only those where the toplevel
134 // URL changed.
135 int load_id = render_view()->page_id();
136 if (load_id <= last_page_id_sent_to_classifier_) {
137 return;
138 }
139
140 GURL url_without_ref = StripToplevelUrl();
141 if (url_without_ref == last_url_sent_to_classifier_) {
142 // The toplevle URL is the same, except for the ref.
143 // Update the last page id we sent, but don't trigger a new classification.
144 last_page_id_sent_to_classifier_ = load_id;
145 return;
146 }
147
148 last_url_sent_to_classifier_ = url_without_ref;
149 last_page_id_sent_to_classifier_ = load_id;
150 classifier_page_text_ = page_text; 138 classifier_page_text_ = page_text;
151 139 MaybeStartClassification();
152 if (classifier_->is_ready()) {
153 classifier_->BeginClassification(
154 &classifier_page_text_,
155 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
156 } else {
157 // If there is no phishing classifier yet, we'll begin classification once
158 // SetPhishingScorer() is called by the RenderView.
159 pending_classification_ = true;
160 }
161 } 140 }
162 141
163 void PhishingClassifierDelegate::CancelPendingClassification() { 142 void PhishingClassifierDelegate::CancelPendingClassification() {
164 if (classifier_->is_ready()) { 143 if (classifier_->is_ready()) {
165 classifier_->CancelPendingClassification(); 144 classifier_->CancelPendingClassification();
166 } 145 }
167 classifier_page_text_.clear(); 146 classifier_page_text_.clear();
168 pending_classification_ = false;
169 } 147 }
170 148
171 bool PhishingClassifierDelegate::OnMessageReceived( 149 bool PhishingClassifierDelegate::OnMessageReceived(
172 const IPC::Message& message) { 150 const IPC::Message& message) {
173 /*
174 bool handled = true; 151 bool handled = true;
175 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message) 152 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message)
153 IPC_MESSAGE_HANDLER(ViewMsg_StartPhishingDetection,
154 OnStartPhishingDetection)
176 IPC_MESSAGE_UNHANDLED(handled = false) 155 IPC_MESSAGE_UNHANDLED(handled = false)
177 IPC_END_MESSAGE_MAP() 156 IPC_END_MESSAGE_MAP()
178 return handled; 157 return handled;
179 */
180 return false;
181 } 158 }
182 159
183 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, 160 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,
184 double phishy_score) { 161 double phishy_score) {
185 // We no longer need the page text. 162 // We no longer need the page text.
186 classifier_page_text_.clear(); 163 classifier_page_text_.clear();
187 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; 164 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score;
188 if (!is_phishy) { 165 if (!is_phishy) {
189 return; 166 return;
190 } 167 }
191 168
192 render_view()->Send(new ViewHostMsg_DetectedPhishingSite( 169 render_view()->Send(new ViewHostMsg_DetectedPhishingSite(
193 render_view()->routing_id(), 170 render_view()->routing_id(),
194 last_url_sent_to_classifier_, 171 last_url_sent_to_classifier_,
195 phishy_score)); 172 phishy_score));
196 } 173 }
197 174
198 GURL PhishingClassifierDelegate::StripToplevelUrl() { 175 GURL PhishingClassifierDelegate::StripToplevelUrl() {
199 GURL toplevel_url = render_view()->webview()->mainFrame()->url(); 176 return StripRef(render_view()->webview()->mainFrame()->url());
200 GURL::Replacements replacements; 177 }
201 replacements.ClearRef(); 178
202 return toplevel_url.ReplaceComponents(replacements); 179 void PhishingClassifierDelegate::MaybeStartClassification() {
180 // We can begin phishing classification when the following conditions are
181 // met:
182 // 1. A Scorer has been created
183 // 2. The browser has sent a StartPhishingDetection message for the current
184 // toplevel URL.
185 // 3. The page has finished loading and the page text has been extracted.
186 // 4. The load is a new navigation (not a session history navigation).
187 // 5. The toplevel URL has not already been classified.
188 //
189 // Note that if we determine that this particular navigation should not be
190 // classified at all (as opposed to deferring it until we get an IPC or the
191 // load completes), we discard the page text since it won't be needed.
192 if (!classifier_->is_ready()) {
193 VLOG(2) << "Not starting classification, no Scorer created.";
194 // Keep classifier_page_text_, in case a Scorer is set later.
195 return;
196 }
197
198 if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) {
199 // Skip loads from session history navigation.
200 VLOG(2) << "Not starting classification, last finished load id is "
201 << last_finished_load_id_ << " but we have classified up to "
202 << "load id " << last_page_id_sent_to_classifier_;
203 classifier_page_text_.clear(); // we won't need this.
204 return;
205 }
206
207 if (last_finished_load_id_ != render_view()->page_id()) {
208 VLOG(2) << "Render view page has changed, not starting classification";
209 classifier_page_text_.clear(); // we won't need this.
210 return;
211 }
212 // If the page id is unchanged, the toplevel URL should also be unchanged.
213 DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_);
214
215 if (last_finished_load_url_ == last_url_sent_to_classifier_) {
216 // We've already classified this toplevel URL, so this was likely an
217 // in-page navigation or a subframe navigation. The browser should not
218 // send a StartPhishingDetection IPC in this case.
219 VLOG(2) << "Toplevel URL is unchanged, not starting classification.";
220 classifier_page_text_.clear(); // we won't need this.
221 return;
222 }
223
224 if (last_url_received_from_browser_ != last_finished_load_url_) {
225 // The browser has not yet confirmed that this URL should be classified,
226 // so defer classification for now.
227 VLOG(2) << "Not starting classification, last url from browser is "
228 << last_url_received_from_browser_ << ", last finished load is "
229 << last_finished_load_url_;
230 // Keep classifier_page_text_, in case the browser notifies us later that
231 // we should classify the URL.
232 return;
233 }
234
235 VLOG(2) << "Starting classification for " << last_finished_load_url_;
236 last_url_sent_to_classifier_ = last_finished_load_url_;
237 last_page_id_sent_to_classifier_ = last_finished_load_id_;
238 classifier_->BeginClassification(
239 &classifier_page_text_,
240 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
203 } 241 }
204 242
205 } // namespace safe_browsing 243 } // namespace safe_browsing
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698