Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" | 5 #include "chrome/renderer/safe_browsing/phishing_classifier_delegate.h" |
| 6 | 6 |
| 7 #include "base/callback.h" | 7 #include "base/callback.h" |
| 8 #include "base/logging.h" | 8 #include "base/logging.h" |
| 9 #include "chrome/common/render_messages.h" | 9 #include "chrome/common/render_messages.h" |
| 10 #include "chrome/renderer/navigation_state.h" | 10 #include "chrome/renderer/navigation_state.h" |
| 11 #include "chrome/renderer/render_view.h" | 11 #include "chrome/renderer/render_view.h" |
| 12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" | 12 #include "chrome/renderer/safe_browsing/feature_extractor_clock.h" |
| 13 #include "chrome/renderer/safe_browsing/phishing_classifier.h" | 13 #include "chrome/renderer/safe_browsing/phishing_classifier.h" |
| 14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | 14 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" |
| 15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" | 15 #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" |
| 16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | 16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" |
| 17 | 17 |
| 18 namespace safe_browsing { | 18 namespace safe_browsing { |
| 19 | 19 |
| 20 static GURL StripRef(const GURL& url) { | |
| 21 GURL::Replacements replacements; | |
| 22 replacements.ClearRef(); | |
| 23 return url.ReplaceComponents(replacements); | |
| 24 } | |
| 25 | |
| 20 PhishingClassifierDelegate::PhishingClassifierDelegate( | 26 PhishingClassifierDelegate::PhishingClassifierDelegate( |
| 21 RenderView* render_view, | 27 RenderView* render_view, |
| 22 PhishingClassifier* classifier) | 28 PhishingClassifier* classifier) |
| 23 : render_view_(render_view), | 29 : render_view_(render_view), |
| 24 last_page_id_sent_to_classifier_(-1), | 30 last_finished_load_id_(-1), |
| 25 pending_classification_(false) { | 31 last_page_id_sent_to_classifier_(-1) { |
| 26 if (!classifier) { | 32 if (!classifier) { |
| 27 classifier = new PhishingClassifier(render_view_, | 33 classifier = new PhishingClassifier(render_view_, |
| 28 new FeatureExtractorClock()); | 34 new FeatureExtractorClock()); |
| 29 } | 35 } |
| 30 classifier_.reset(classifier); | 36 classifier_.reset(classifier); |
| 31 } | 37 } |
| 32 | 38 |
| 33 PhishingClassifierDelegate::~PhishingClassifierDelegate() { | 39 PhishingClassifierDelegate::~PhishingClassifierDelegate() { |
| 34 CancelPendingClassification(); | 40 CancelPendingClassification(); |
| 35 } | 41 } |
| 36 | 42 |
| 37 void PhishingClassifierDelegate::SetPhishingScorer( | 43 void PhishingClassifierDelegate::SetPhishingScorer( |
| 38 const safe_browsing::Scorer* scorer) { | 44 const safe_browsing::Scorer* scorer) { |
| 39 classifier_->set_phishing_scorer(scorer); | 45 classifier_->set_phishing_scorer(scorer); |
| 46 // Start classifying the current page if all conditions are met. | |
| 47 // See MaybeStartClassification() for details. | |
| 48 MaybeStartClassification(); | |
| 49 } | |
| 40 | 50 |
| 41 if (pending_classification_) { | 51 void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { |
| 42 pending_classification_ = false; | 52 last_url_received_from_browser_ = StripRef(url); |
| 43 // If we have a pending classificaton, it should always be true that the | 53 // Start classifying the current page if all conditions are met. |
| 44 // main frame URL and page id have not changed since we queued the | 54 // See MaybeStartClassification() for details. |
| 45 // classification. This is because we stop any pending classification on | 55 MaybeStartClassification(); |
| 46 // main frame loads in RenderView::didCommitProvisionalLoad(). | |
| 47 DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_); | |
| 48 DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_); | |
| 49 classifier_->BeginClassification( | |
| 50 &classifier_page_text_, | |
| 51 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); | |
| 52 } | |
| 53 } | 56 } |
| 54 | 57 |
| 55 void PhishingClassifierDelegate::CommittedLoadInFrame( | 58 void PhishingClassifierDelegate::CommittedLoadInFrame( |
| 56 WebKit::WebFrame* frame) { | 59 WebKit::WebFrame* frame) { |
| 57 // A new page is starting to load. Unless the load is a navigation within | 60 // A new page is starting to load. Unless the load is a navigation within |
| 58 // the same page, we need to cancel classification since the content will | 61 // the same page, we need to cancel classification since we may get an |
| 59 // now be inconsistent with the phishing model. | 62 // inconsistent result. |
| 60 NavigationState* state = NavigationState::FromDataSource( | 63 NavigationState* state = NavigationState::FromDataSource( |
| 61 frame->dataSource()); | 64 frame->dataSource()); |
| 62 if (!state->was_within_same_page()) { | 65 if (!state->was_within_same_page()) { |
| 63 CancelPendingClassification(); | 66 CancelPendingClassification(); |
| 64 } | 67 } |
| 65 } | 68 } |
| 66 | 69 |
| 67 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) { | 70 void PhishingClassifierDelegate::FinishedLoad(string16* page_text) { |
| 68 // We check that the page id has incremented so that we don't reclassify | 71 last_finished_load_id_ = render_view_->page_id(); |
| 69 // pages as the user moves back and forward in session history. Note: we | 72 last_finished_load_url_ = StripToplevelUrl(); |
| 70 // don't send every page id to the classifier, only those where the toplevel | |
| 71 // URL changed. | |
| 72 int load_id = render_view_->page_id(); | |
| 73 if (load_id <= last_page_id_sent_to_classifier_) { | |
| 74 return; | |
| 75 } | |
| 76 | |
| 77 GURL url_without_ref = StripToplevelUrl(); | |
| 78 if (url_without_ref == last_url_sent_to_classifier_) { | |
| 79 // The toplevle URL is the same, except for the ref. | |
| 80 // Update the last page id we sent, but don't trigger a new classification. | |
| 81 last_page_id_sent_to_classifier_ = load_id; | |
| 82 return; | |
| 83 } | |
| 84 | |
| 85 last_url_sent_to_classifier_ = url_without_ref; | |
| 86 last_page_id_sent_to_classifier_ = load_id; | |
| 87 classifier_page_text_.swap(*page_text); | 73 classifier_page_text_.swap(*page_text); |
| 88 | 74 MaybeStartClassification(); |
| 89 if (classifier_->is_ready()) { | |
| 90 classifier_->BeginClassification( | |
| 91 &classifier_page_text_, | |
| 92 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); | |
| 93 } else { | |
| 94 // If there is no phishing classifier yet, we'll begin classification once | |
| 95 // SetPhishingScorer() is called by the RenderView. | |
| 96 pending_classification_ = true; | |
| 97 } | |
| 98 } | 75 } |
| 99 | 76 |
| 100 void PhishingClassifierDelegate::CancelPendingClassification() { | 77 void PhishingClassifierDelegate::CancelPendingClassification() { |
| 101 if (classifier_->is_ready()) { | 78 if (classifier_->is_ready()) { |
| 102 classifier_->CancelPendingClassification(); | 79 classifier_->CancelPendingClassification(); |
| 103 } | 80 } |
| 104 classifier_page_text_.clear(); | 81 classifier_page_text_.clear(); |
| 105 pending_classification_ = false; | |
| 106 } | 82 } |
| 107 | 83 |
| 108 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, | 84 void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
| 109 double phishy_score) { | 85 double phishy_score) { |
| 110 // We no longer need the page text. | 86 // We no longer need the page text. |
| 111 classifier_page_text_.clear(); | 87 classifier_page_text_.clear(); |
| 112 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; | 88 VLOG(2) << "Phishy verdict = " << is_phishy << " score = " << phishy_score; |
| 113 if (!is_phishy) { | 89 if (!is_phishy) { |
| 114 return; | 90 return; |
| 115 } | 91 } |
| 116 | 92 |
| 117 render_view_->Send(new ViewHostMsg_DetectedPhishingSite( | 93 render_view_->Send(new ViewHostMsg_DetectedPhishingSite( |
| 118 render_view_->routing_id(), | 94 render_view_->routing_id(), |
| 119 last_url_sent_to_classifier_, | 95 last_url_sent_to_classifier_, |
| 120 phishy_score)); | 96 phishy_score)); |
| 121 } | 97 } |
| 122 | 98 |
| 123 GURL PhishingClassifierDelegate::StripToplevelUrl() { | 99 GURL PhishingClassifierDelegate::StripToplevelUrl() { |
| 124 GURL toplevel_url = render_view_->webview()->mainFrame()->url(); | 100 return StripRef(render_view_->webview()->mainFrame()->url()); |
| 125 GURL::Replacements replacements; | 101 } |
| 126 replacements.ClearRef(); | 102 |
| 127 return toplevel_url.ReplaceComponents(replacements); | 103 void PhishingClassifierDelegate::MaybeStartClassification() { |
| 104 // We can begin phishing classification when the following conditions are | |
| 105 // met: | |
| 106 // 1. A Scorer has been created | |
| 107 // 2. The browser has sent a StartPhishingDetection message for the current | |
| 108 // toplevel URL. | |
| 109 // 3. The page has finished loading and the page text has been extracted. | |
| 110 // 4. The load is a new navigation (not a session history navigation). | |
| 111 // 5. The toplevel URL has not already been classified. | |
| 112 // | |
| 113 // Note that if we determine that this particular navigation should not be | |
| 114 // classified at all (as opposed to deferring it until we get an IPC or the | |
| 115 // load completes), we discard the page text since it won't be needed. | |
| 116 if (!classifier_->is_ready()) { | |
| 117 VLOG(2) << "Not starting classification, no Scorer created."; | |
| 118 // Keep classifier_page_text_, in case a Scorer is set later. | |
| 119 return; | |
| 120 } | |
| 121 | |
| 122 if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) { | |
| 123 // Skip loads from session history navigation. | |
| 124 VLOG(2) << "Not starting classification, last finished load id is " | |
| 125 << last_finished_load_id_ << " but we have classified up to " | |
| 126 << "load id " << last_page_id_sent_to_classifier_; | |
| 127 classifier_page_text_.clear(); // we won't need this. | |
| 128 return; | |
| 129 } | |
| 130 | |
| 131 if (last_finished_load_id_ != render_view_->page_id()) { | |
| 132 VLOG(2) << "Render view page has changed, not starting classification"; | |
| 133 classifier_page_text_.clear(); // we won't need this. | |
| 134 return; | |
| 135 } | |
| 136 // If the page id is unchanged, the toplevel URL should also be unchanged. | |
| 137 DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_); | |
| 138 | |
| 139 if (last_finished_load_url_ == last_url_sent_to_classifier_) { | |
| 140 // We've already classified this toplevel URL, so this was likely an | |
| 141 // in-page navigation or a subframe navigation. The browser should not | |
| 142 // send a StartPhishingDetection IPC in this case. | |
| 143 VLOG(2) << "Toplevel URL is unchanged, not starting classification."; | |
| 144 classifier_page_text_.clear(); // we won't need this. | |
| 145 return; | |
| 146 } | |
| 147 | |
|
lzheng
2011/02/04 20:05:22
Can you add a comment here to explain that this me
Brian Ryner
2011/02/10 01:12:52
Done.
| |
| 148 if (last_url_received_from_browser_ != last_finished_load_url_) { | |
| 149 VLOG(2) << "Not starting classification, last url from browser is " | |
| 150 << last_url_received_from_browser_ << ", last finished load is " | |
| 151 << last_finished_load_url_; | |
| 152 // Keep classifier_page_text_, in case the browser notifies us later that | |
| 153 // we should classify the URL. | |
| 154 return; | |
| 155 } | |
| 156 | |
| 157 VLOG(2) << "Starting classification for " << last_finished_load_url_; | |
| 158 last_url_sent_to_classifier_ = last_finished_load_url_; | |
| 159 last_page_id_sent_to_classifier_ = last_finished_load_id_; | |
| 160 classifier_->BeginClassification( | |
| 161 &classifier_page_text_, | |
| 162 NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); | |
| 128 } | 163 } |
| 129 | 164 |
| 130 } // namespace safe_browsing | 165 } // namespace safe_browsing |
| OLD | NEW |