| OLD | NEW |
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" | 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" |
| 6 | 6 |
| 7 #include <set> | 7 #include <set> |
| 8 | 8 |
| 9 #include "base/callback.h" | 9 #include "base/callback.h" |
| 10 #include "base/lazy_instance.h" | 10 #include "base/lazy_instance.h" |
| 11 #include "base/logging.h" | 11 #include "base/logging.h" |
| 12 #include "base/scoped_callback_factory.h" | 12 #include "base/scoped_callback_factory.h" |
| 13 #include "chrome/common/render_messages.h" | 13 #include "chrome/common/render_messages.h" |
| 14 #include "chrome/renderer/navigation_state.h" | 14 #include "chrome/renderer/navigation_state.h" |
| 15 #include "chrome/renderer/render_thread.h" | 15 #include "chrome/renderer/render_thread.h" |
| 16 #include "chrome/renderer/render_view.h" | 16 #include "chrome/renderer/render_view.h" |
| 17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" | 17 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" |
| 18 #include "chrome/renderer/safe_browsing/phishing_classifier.h" | 18 #include "chrome/renderer/safe_browsing/phishing_classifier.h" |
| 19 #include "chrome/renderer/safe_browsing/scorer.h" | 19 #include "chrome/renderer/safe_browsing/scorer.h" |
| 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" |
| 21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" | 21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" |
| 22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | 22 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" |
| 23 | 23 |
| 24 namespace safe_browsing { | 24 namespace safe_browsing { |
| 25 | 25 |
| 26 |
| 27 static GURL StripRef(const GURL& url) { |
| 28 GURL::Replacements replacements; |
| 29 replacements.ClearRef(); |
| 30 return url.ReplaceComponents(replacements); |
| 31 } |
| 32 |
| 26 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates; | 33 typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates; |
| 27 static base::LazyInstance<PhishingClassifierDelegates> | 34 static base::LazyInstance<PhishingClassifierDelegates> |
| 28 g_delegates(base::LINKER_INITIALIZED); | 35 g_delegates(base::LINKER_INITIALIZED); |
| 29 | 36 |
| 30 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> > | 37 static base::LazyInstance<scoped_ptr<const safe_browsing::Scorer> > |
| 31 g_phishing_scorer(base::LINKER_INITIALIZED); | 38 g_phishing_scorer(base::LINKER_INITIALIZED); |
| 32 | 39 |
| 33 class ScorerCallback { | 40 class ScorerCallback { |
| 34 public: | 41 public: |
| 35 static Scorer::CreationCallback* CreateCallback() { | 42 static Scorer::CreationCallback* CreateCallback() { |
| (...skipping 32 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 68 safe_browsing::Scorer::CreateFromFile( | 75 safe_browsing::Scorer::CreateFromFile( |
| 69 IPC::PlatformFileForTransitToPlatformFile(model_file), | 76 IPC::PlatformFileForTransitToPlatformFile(model_file), |
| 70 RenderThread::current()->GetFileThreadMessageLoopProxy(), | 77 RenderThread::current()->GetFileThreadMessageLoopProxy(), |
| 71 ScorerCallback::CreateCallback()); | 78 ScorerCallback::CreateCallback()); |
| 72 } | 79 } |
| 73 | 80 |
| 74 PhishingClassifierDelegate::PhishingClassifierDelegate( | 81 PhishingClassifierDelegate::PhishingClassifierDelegate( |
| 75 RenderView* render_view, | 82 RenderView* render_view, |
| 76 PhishingClassifier* classifier) | 83 PhishingClassifier* classifier) |
| 77 : RenderViewObserver(render_view), | 84 : RenderViewObserver(render_view), |
| 78 last_page_id_sent_to_classifier_(-1), | 85 last_finished_load_id_(-1), |
| 79 pending_classification_(false) { | 86 last_page_id_sent_to_classifier_(-1) { |
| 80 g_delegates.Get().insert(this); | 87 g_delegates.Get().insert(this); |
| 81 if (!classifier) { | 88 if (!classifier) { |
| 82 classifier = new PhishingClassifier(render_view, | 89 classifier = new PhishingClassifier(render_view, |
| 83 new FeatureExtractorClock()); | 90 new FeatureExtractorClock()); |
| 84 } | 91 } |
| 85 | 92 |
| 86 classifier_.reset(classifier); | 93 classifier_.reset(classifier); |
| 87 | 94 |
| 88 if (g_phishing_scorer.Get().get()) | 95 if (g_phishing_scorer.Get().get()) |
| 89 SetPhishingScorer(g_phishing_scorer.Get().get()); | 96 SetPhishingScorer(g_phishing_scorer.Get().get()); |
| 90 } | 97 } |
| 91 | 98 |
| 92 PhishingClassifierDelegate::~PhishingClassifierDelegate() { | 99 PhishingClassifierDelegate::~PhishingClassifierDelegate() { |
| 93 CancelPendingClassification(); | 100 CancelPendingClassification(); |
| 94 g_delegates.Get().erase(this); | 101 g_delegates.Get().erase(this); |
| 95 } | 102 } |
| 96 | 103 |
| 97 void PhishingClassifierDelegate::SetPhishingScorer( | 104 void PhishingClassifierDelegate::SetPhishingScorer( |
| 98 const safe_browsing::Scorer* scorer) { | 105 const safe_browsing::Scorer* scorer) { |
| 99 if (!render_view()->webview()) | 106 if (!render_view()->webview()) |
| 100 return; // RenderView is tearing down. | 107 return; // RenderView is tearing down. |
| 101 | 108 |
| 102 classifier_->set_phishing_scorer(scorer); | 109 classifier_->set_phishing_scorer(scorer); |
| 110 // Start classifying the current page if all conditions are met. |
| 111 // See MaybeStartClassification() for details. |
| 112 MaybeStartClassification(); |
| 113 } |
| 103 | 114 |
| 104 if (pending_classification_) { | 115 |
| 105 pending_classification_ = false; | 116 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { |
| 106 // If we have a pending classificaton, it should always be true that the | 117 last_url_received_from_browser_ = StripRef(url); |
| 107 // main frame URL and page id have not changed since we queued the | 118 // Start classifying the current page if all conditions are met. |
| 108 // classification. This is because we stop any pending classification on | 119 // See MaybeStartClassification() for details. |
| 109 // main frame loads in RenderView::didCommitProvisionalLoad(). | 120 MaybeStartClassification(); |
| 110 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_); | |
| 111 DCHECK_EQ(render_view()->page_id(), last_page_id_sent_to_classifier_); | |
| 112 classifier_->BeginClassification( | |
| 113 &classifier_page_text_, | |
| 114 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); | |
| 115 } | |
| 116 } | 121 } |
| 117 | 122 |
| 118 void PhishingClassifierDelegate::DidCommitProvisionalLoad( | 123 void PhishingClassifierDelegate::DidCommitProvisionalLoad( |
| 119 WebKit::WebFrame* frame, bool is_new_navigation) { | 124 WebKit::WebFrame* frame, bool is_new_navigation) { |
| 120 // A new page is starting to load. Unless the load is a navigation within | 125 // A new page is starting to load. Unless the load is a navigation within |
| 121 // the same page, we need to cancel classification since the content will | 126 // the same page, we need to cancel classification since we may get an |
| 122 // now be inconsistent with the phishing model. | 127 // inconsistent result. |
| 123 NavigationState* state = NavigationState::FromDataSource( | 128 NavigationState* state = NavigationState::FromDataSource( |
| 124 frame->dataSource()); | 129 frame->dataSource()); |
| 125 if (!state->was_within_same_page()) { | 130 if (!state->was_within_same_page()) { |
| 126 CancelPendingClassification(); | 131 CancelPendingClassification(); |
| 127 } | 132 } |
| 128 } | 133 } |
| 129 | 134 |
| 130 void PhishingClassifierDelegate::PageCaptured(const string16& page_text) { | 135 void PhishingClassifierDelegate::PageCaptured(const string16& page_text) { |
| 131 // We check that the page id has incremented so that we don't reclassify | 136 last_finished_load_id_ = render_view()->page_id(); |
| 132 // pages as the user moves back and forward in session history. Note: we | 137 last_finished_load_url_ = StripToplevelUrl(); |
| 133 // don't send every page id to the classifier, only those where the toplevel | |
| 134 // URL changed. | |
| 135 int load_id = render_view()->page_id(); | |
| 136 if (load_id <= last_page_id_sent_to_classifier_) { | |
| 137 return; | |
| 138 } | |
| 139 | |
| 140 GURL url_without_ref = StripToplevelUrl(); | |
| 141 if (url_without_ref == last_url_sent_to_classifier_) { | |
| 142 // The toplevle URL is the same, except for the ref. | |
| 143 // Update the last page id we sent, but don't trigger a new classification. | |
| 144 last_page_id_sent_to_classifier_ = load_id; | |
| 145 return; | |
| 146 } | |
| 147 | |
| 148 last_url_sent_to_classifier_ = url_without_ref; | |
| 149 last_page_id_sent_to_classifier_ = load_id; | |
| 150 classifier_page_text_ = page_text; | 138 classifier_page_text_ = page_text; |
| 151 | 139 MaybeStartClassification(); |
| 152 if (classifier_->is_ready()) { | |
| 153 classifier_->BeginClassification( | |
| 154 &classifier_page_text_, | |
| 155 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); | |
| 156 } else { | |
| 157 // If there is no phishing classifier yet, we'll begin classification once | |
| 158 // SetPhishingScorer() is called by the RenderView. | |
| 159 pending_classification_ = true; | |
| 160 } | |
| 161 } | 140 } |
| 162 | 141 |
| 163 void PhishingClassifierDelegate::CancelPendingClassification() { | 142 void PhishingClassifierDelegate::CancelPendingClassification() { |
| 164 if (classifier_->is_ready()) { | 143 if (classifier_->is_ready()) { |
| 165 classifier_->CancelPendingClassification(); | 144 classifier_->CancelPendingClassification(); |
| 166 } | 145 } |
| 167 classifier_page_text_.clear(); | 146 classifier_page_text_.clear(); |
| 168 pending_classification_ = false; | |
| 169 } | 147 } |
| 170 | 148 |
| 171 bool PhishingClassifierDelegate::OnMessageReceived( | 149 bool PhishingClassifierDelegate::OnMessageReceived( |
| 172 const IPC::Message& message) { | 150 const IPC::Message& message) { |
| 173 /* | |
| 174 bool handled = true; | 151 bool handled = true; |
| 175 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message) | 152 IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message) |
| 153 IPC_MESSAGE_HANDLER(ViewMsg_StartPhishingDetection, |
| 154 OnStartPhishingDetection) |
| 176 IPC_MESSAGE_UNHANDLED(handled = false) | 155 IPC_MESSAGE_UNHANDLED(handled = false) |
| 177 IPC_END_MESSAGE_MAP() | 156 IPC_END_MESSAGE_MAP() |
| 178 return handled; | 157 return handled; |
| 179 */ | |
| 180 return false; | |
| 181 } | 158 } |
| 182 | 159 |
| 183 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, | 160 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
| 184 double phishy_score) { | 161 double phishy_score) { |
| 185 // We no longer need the page text. | 162 // We no longer need the page text. |
| 186 classifier_page_text_.clear(); | 163 classifier_page_text_.clear(); |
| 187 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; | 164 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; |
| 188 if (!is_phishy) { | 165 if (!is_phishy) { |
| 189 return; | 166 return; |
| 190 } | 167 } |
| 191 | 168 |
| 192 render_view()->Send(new ViewHostMsg_DetectedPhishingSite( | 169 render_view()->Send(new ViewHostMsg_DetectedPhishingSite( |
| 193 render_view()->routing_id(), | 170 render_view()->routing_id(), |
| 194 last_url_sent_to_classifier_, | 171 last_url_sent_to_classifier_, |
| 195 phishy_score)); | 172 phishy_score)); |
| 196 } | 173 } |
| 197 | 174 |
| 198 GURL PhishingClassifierDelegate::StripToplevelUrl() { | 175 GURL PhishingClassifierDelegate::StripToplevelUrl() { |
| 199 GURL toplevel_url = render_view()->webview()->mainFrame()->url(); | 176 return StripRef(render_view()->webview()->mainFrame()->url()); |
| 200 GURL::Replacements replacements; | 177 } |
| 201 replacements.ClearRef(); | 178 |
| 202 return toplevel_url.ReplaceComponents(replacements); | 179 void PhishingClassifierDelegate::MaybeStartClassification() { |
| 180 // We can begin phishing classification when the following conditions are |
| 181 // met: |
| 182 // 1. A Scorer has been created |
| 183 // 2. The browser has sent a StartPhishingDetection message for the current |
| 184 // toplevel URL. |
| 185 // 3. The page has finished loading and the page text has been extracted. |
| 186 // 4. The load is a new navigation (not a session history navigation). |
| 187 // 5. The toplevel URL has not already been classified. |
| 188 // |
| 189 // Note that if we determine that this particular navigation should not be |
| 190 // classified at all (as opposed to deferring it until we get an IPC or the |
| 191 // load completes), we discard the page text since it won't be needed. |
| 192 if (!classifier_->is_ready()) { |
| 193 VLOG(2) << "Not starting classification, no Scorer created."; |
| 194 // Keep classifier_page_text_, in case a Scorer is set later. |
| 195 return; |
| 196 } |
| 197 |
| 198 if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) { |
| 199 // Skip loads from session history navigation. |
| 200 VLOG(2) << "Not starting classification, last finished load id is " |
| 201 << last_finished_load_id_ << " but we have classified up to " |
| 202 << "load id " << last_page_id_sent_to_classifier_; |
| 203 classifier_page_text_.clear(); // we won't need this. |
| 204 return; |
| 205 } |
| 206 |
| 207 if (last_finished_load_id_ != render_view()->page_id()) { |
| 208 VLOG(2) << "Render view page has changed, not starting classification"; |
| 209 classifier_page_text_.clear(); // we won't need this. |
| 210 return; |
| 211 } |
| 212 // If the page id is unchanged, the toplevel URL should also be unchanged. |
| 213 DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_); |
| 214 |
| 215 if (last_finished_load_url_ == last_url_sent_to_classifier_) { |
| 216 // We've already classified this toplevel URL, so this was likely an |
| 217 // in-page navigation or a subframe navigation. The browser should not |
| 218 // send a StartPhishingDetection IPC in this case. |
| 219 VLOG(2) << "Toplevel URL is unchanged, not starting classification."; |
| 220 classifier_page_text_.clear(); // we won't need this. |
| 221 return; |
| 222 } |
| 223 |
| 224 if (last_url_received_from_browser_ != last_finished_load_url_) { |
| 225 // The browser has not yet confirmed that this URL should be classified, |
| 226 // so defer classification for now. |
| 227 VLOG(2) << "Not starting classification, last url from browser is " |
| 228 << last_url_received_from_browser_ << ", last finished load is " |
| 229 << last_finished_load_url_; |
| 230 // Keep classifier_page_text_, in case the browser notifies us later that |
| 231 // we should classify the URL. |
| 232 return; |
| 233 } |
| 234 |
| 235 VLOG(2) << "Starting classification for " << last_finished_load_url_; |
| 236 last_url_sent_to_classifier_ = last_finished_load_url_; |
| 237 last_page_id_sent_to_classifier_ = last_finished_load_id_; |
| 238 classifier_->BeginClassification( |
| 239 &classifier_page_text_, |
| 240 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
| 203 } | 241 } |
| 204 | 242 |
| 205 } // namespace safe_browsing | 243 } // namespace safe_browsing |
| OLD | NEW |