Chromium Code Reviews| Index: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
| diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
| index a1364ed3ab1189201a7173e3af2dfc93d7a8ec06..add398f8ad52c271f89c27aa85fd564246d725cc 100644 |
| --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
| +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
| @@ -17,12 +17,18 @@ |
| namespace safe_browsing { |
| +static GURL StripRef(const GURL& url) { |
| + GURL::Replacements replacements; |
| + replacements.ClearRef(); |
| + return url.ReplaceComponents(replacements); |
| +} |
| + |
| PhishingClassifierDelegate::PhishingClassifierDelegate( |
| RenderView* render_view, |
| PhishingClassifier* classifier) |
| : render_view_(render_view), |
| - last_page_id_sent_to_classifier_(-1), |
| - pending_classification_(false) { |
| + last_finished_load_id_(-1), |
| + last_page_id_sent_to_classifier_(-1) { |
| if (!classifier) { |
| classifier = new PhishingClassifier(render_view_, |
| new FeatureExtractorClock()); |
| @@ -37,26 +43,23 @@ PhishingClassifierDelegate::~PhishingClassifierDelegate() { |
| void PhishingClassifierDelegate::SetPhishingScorer( |
| const safe_browsing::Scorer* scorer) { |
| classifier_->set_phishing_scorer(scorer); |
| + // Start classifying the current page if all conditions are met. |
| + // See MaybeStartClassification() for details. |
| + MaybeStartClassification(); |
| +} |
| - if (pending_classification_) { |
| - pending_classification_ = false; |
| - // If we have a pending classificaton, it should always be true that the |
| - // main frame URL and page id have not changed since we queued the |
| - // classification. This is because we stop any pending classification on |
| - // main frame loads in RenderView::didCommitProvisionalLoad(). |
| - DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_); |
| - DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_); |
| - classifier_->BeginClassification( |
| - &classifier_page_text_, |
| - NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
| - } |
| +void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { |
| + last_url_received_from_browser_ = StripRef(url); |
| + // Start classifying the current page if all conditions are met. |
| + // See MaybeStartClassification() for details. |
| + MaybeStartClassification(); |
| } |
| void PhishingClassifierDelegate::CommittedLoadInFrame( |
| WebKit::WebFrame* frame) { |
| // A new page is starting to load. Unless the load is a navigation within |
| - // the same page, we need to cancel classification since the content will |
| - // now be inconsistent with the phishing model. |
| + // the same page, we need to cancel classification since we may get an |
| + // inconsistent result. |
| NavigationState* state = NavigationState::FromDataSource( |
| frame->dataSource()); |
| if (!state->was_within_same_page()) { |
| @@ -65,36 +68,10 @@ void PhishingClassifierDelegate::CommittedLoadInFrame( |
| } |
| void PhishingClassifierDelegate::FinishedLoad(string16* page_text) { |
| - // We check that the page id has incremented so that we don't reclassify |
| - // pages as the user moves back and forward in session history. Note: we |
| - // don't send every page id to the classifier, only those where the toplevel |
| - // URL changed. |
| - int load_id = render_view_->page_id(); |
| - if (load_id <= last_page_id_sent_to_classifier_) { |
| - return; |
| - } |
| - |
| - GURL url_without_ref = StripToplevelUrl(); |
| - if (url_without_ref == last_url_sent_to_classifier_) { |
| - // The toplevle URL is the same, except for the ref. |
| - // Update the last page id we sent, but don't trigger a new classification. |
| - last_page_id_sent_to_classifier_ = load_id; |
| - return; |
| - } |
| - |
| - last_url_sent_to_classifier_ = url_without_ref; |
| - last_page_id_sent_to_classifier_ = load_id; |
| + last_finished_load_id_ = render_view_->page_id(); |
| + last_finished_load_url_ = StripToplevelUrl(); |
| classifier_page_text_.swap(*page_text); |
| - |
| - if (classifier_->is_ready()) { |
| - classifier_->BeginClassification( |
| - &classifier_page_text_, |
| - NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
| - } else { |
| - // If there is no phishing classifier yet, we'll begin classification once |
| - // SetPhishingScorer() is called by the RenderView. |
| - pending_classification_ = true; |
| - } |
| + MaybeStartClassification(); |
| } |
| void PhishingClassifierDelegate::CancelPendingClassification() { |
| @@ -102,7 +79,6 @@ void PhishingClassifierDelegate::CancelPendingClassification() { |
| classifier_->CancelPendingClassification(); |
| } |
| classifier_page_text_.clear(); |
| - pending_classification_ = false; |
| } |
| void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
| @@ -121,10 +97,69 @@ void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
| } |
| GURL PhishingClassifierDelegate::StripToplevelUrl() { |
| - GURL toplevel_url = render_view_->webview()->mainFrame()->url(); |
| - GURL::Replacements replacements; |
| - replacements.ClearRef(); |
| - return toplevel_url.ReplaceComponents(replacements); |
| + return StripRef(render_view_->webview()->mainFrame()->url()); |
| +} |
| + |
| +void PhishingClassifierDelegate::MaybeStartClassification() { |
| + // We can begin phishing classification when the following conditions are |
| + // met: |
| + // 1. A Scorer has been created |
| + // 2. The browser has sent a StartPhishingDetection message for the current |
| + // toplevel URL. |
| + // 3. The page has finished loading and the page text has been extracted. |
| + // 4. The load is a new navigation (not a session history navigation). |
| + // 5. The toplevel URL has not already been classified. |
| + // |
| + // Note that if we determine that this particular navigation should not be |
| + // classified at all (as opposed to deferring it until we get an IPC or the |
| + // load completes), we discard the page text since it won't be needed. |
| + if (!classifier_->is_ready()) { |
| + VLOG(2) << "Not starting classification, no Scorer created."; |
| + // Keep classifier_page_text_, in case a Scorer is set later. |
| + return; |
| + } |
| + |
| + if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) { |
| + // Skip loads from session history navigation. |
| + VLOG(2) << "Not starting classification, last finished load id is " |
| + << last_finished_load_id_ << " but we have classified up to " |
| + << "load id " << last_page_id_sent_to_classifier_; |
| + classifier_page_text_.clear(); // we won't need this. |
| + return; |
| + } |
| + |
| + if (last_finished_load_id_ != render_view_->page_id()) { |
| + VLOG(2) << "Render view page has changed, not starting classification"; |
| + classifier_page_text_.clear(); // we won't need this. |
| + return; |
| + } |
| + // If the page id is unchanged, the toplevel URL should also be unchanged. |
| + DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_); |
| + |
| + if (last_finished_load_url_ == last_url_sent_to_classifier_) { |
| + // We've already classified this toplevel URL, so this was likely an |
| + // in-page navigation or a subframe navigation. The browser should not |
| + // send a StartPhishingDetection IPC in this case. |
| + VLOG(2) << "Toplevel URL is unchanged, not starting classification."; |
| + classifier_page_text_.clear(); // we won't need this. |
| + return; |
| + } |
| + |
|
lzheng
2011/02/04 20:05:22
Can you add a comment here to explain that this me
Brian Ryner
2011/02/10 01:12:52
Done.
|
| + if (last_url_received_from_browser_ != last_finished_load_url_) { |
| + VLOG(2) << "Not starting classification, last url from browser is " |
| + << last_url_received_from_browser_ << ", last finished load is " |
| + << last_finished_load_url_; |
| + // Keep classifier_page_text_, in case the browser notifies us later that |
| + // we should classify the URL. |
| + return; |
| + } |
| + |
| + VLOG(2) << "Starting classification for " << last_finished_load_url_; |
| + last_url_sent_to_classifier_ = last_finished_load_url_; |
| + last_page_id_sent_to_classifier_ = last_finished_load_id_; |
| + classifier_->BeginClassification( |
| + &classifier_page_text_, |
| + NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
| } |
| } // namespace safe_browsing |