chrome/renderer/safe_browsing/phishing_classifier_delegate.cc - Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection.

Unified Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Fix switch/case formatting Created 9 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« chrome/renderer/safe_browsing/phishing_classifier_delegate.h ('K') | « chrome/renderer/safe_browsing/phishing_classifier_delegate.h ('k') | chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

index a1364ed3ab1189201a7173e3af2dfc93d7a8ec06..add398f8ad52c271f89c27aa85fd564246d725cc 100644

--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

@@ -17,12 +17,18 @@

namespace safe_browsing {

+static GURL StripRef(const GURL& url) {

+ GURL::Replacements replacements;

+ replacements.ClearRef();

+ return url.ReplaceComponents(replacements);

PhishingClassifierDelegate::PhishingClassifierDelegate(

RenderView* render_view,

PhishingClassifier* classifier)

: render_view_(render_view),

- last_page_id_sent_to_classifier_(-1),

- pending_classification_(false) {

+ last_finished_load_id_(-1),

+ last_page_id_sent_to_classifier_(-1) {

if (!classifier) {

classifier = new PhishingClassifier(render_view_,

new FeatureExtractorClock());

@@ -37,26 +43,23 @@ PhishingClassifierDelegate::~PhishingClassifierDelegate() {

void PhishingClassifierDelegate::SetPhishingScorer(

const safe_browsing::Scorer* scorer) {

classifier_->set_phishing_scorer(scorer);

+ // Start classifying the current page if all conditions are met.

+ // See MaybeStartClassification() for details.

+ MaybeStartClassification();

- if (pending_classification_) {

- pending_classification_ = false;

- // If we have a pending classificaton, it should always be true that the

- // main frame URL and page id have not changed since we queued the

- // classification. This is because we stop any pending classification on

- // main frame loads in RenderView::didCommitProvisionalLoad().

- DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);

- DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_);

- classifier_->BeginClassification(

- &classifier_page_text_,

- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

- }

+void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {

+ last_url_received_from_browser_ = StripRef(url);

+ // Start classifying the current page if all conditions are met.

+ // See MaybeStartClassification() for details.

+ MaybeStartClassification();

}

void PhishingClassifierDelegate::CommittedLoadInFrame(

WebKit::WebFrame* frame) {

// A new page is starting to load. Unless the load is a navigation within

- // the same page, we need to cancel classification since the content will

- // now be inconsistent with the phishing model.

+ // the same page, we need to cancel classification since we may get an

+ // inconsistent result.

NavigationState* state = NavigationState::FromDataSource(

frame->dataSource());

if (!state->was_within_same_page()) {

@@ -65,36 +68,10 @@ void PhishingClassifierDelegate::CommittedLoadInFrame(

}

void PhishingClassifierDelegate::FinishedLoad(string16* page_text) {

- // We check that the page id has incremented so that we don't reclassify

- // pages as the user moves back and forward in session history. Note: we

- // don't send every page id to the classifier, only those where the toplevel

- // URL changed.

- int load_id = render_view_->page_id();

- if (load_id <= last_page_id_sent_to_classifier_) {

- return;

- }

- GURL url_without_ref = StripToplevelUrl();

- if (url_without_ref == last_url_sent_to_classifier_) {

- // The toplevle URL is the same, except for the ref.

- // Update the last page id we sent, but don't trigger a new classification.

- last_page_id_sent_to_classifier_ = load_id;

- return;

- }

- last_url_sent_to_classifier_ = url_without_ref;

- last_page_id_sent_to_classifier_ = load_id;

+ last_finished_load_id_ = render_view_->page_id();

+ last_finished_load_url_ = StripToplevelUrl();

classifier_page_text_.swap(*page_text);

- if (classifier_->is_ready()) {

- classifier_->BeginClassification(

- &classifier_page_text_,

- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

- } else {

- // If there is no phishing classifier yet, we'll begin classification once

- // SetPhishingScorer() is called by the RenderView.

- pending_classification_ = true;

- }

+ MaybeStartClassification();

}

void PhishingClassifierDelegate::CancelPendingClassification() {

@@ -102,7 +79,6 @@ void PhishingClassifierDelegate::CancelPendingClassification() {

classifier_->CancelPendingClassification();

}

classifier_page_text_.clear();

- pending_classification_ = false;

}

void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,

@@ -121,10 +97,69 @@ void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,

}

GURL PhishingClassifierDelegate::StripToplevelUrl() {

- GURL toplevel_url = render_view_->webview()->mainFrame()->url();

- GURL::Replacements replacements;

- replacements.ClearRef();

- return toplevel_url.ReplaceComponents(replacements);

+ return StripRef(render_view_->webview()->mainFrame()->url());

+void PhishingClassifierDelegate::MaybeStartClassification() {

+ // We can begin phishing classification when the following conditions are

+ // met:

+ // 1. A Scorer has been created

+ // 2. The browser has sent a StartPhishingDetection message for the current

+ // toplevel URL.

+ // 3. The page has finished loading and the page text has been extracted.

+ // 4. The load is a new navigation (not a session history navigation).

+ // 5. The toplevel URL has not already been classified.

+ //

+ // Note that if we determine that this particular navigation should not be

+ // classified at all (as opposed to deferring it until we get an IPC or the

+ // load completes), we discard the page text since it won't be needed.

+ if (!classifier_->is_ready()) {

+ VLOG(2) << "Not starting classification, no Scorer created.";

+ // Keep classifier_page_text_, in case a Scorer is set later.

+ return;

+ }

+ if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) {

+ // Skip loads from session history navigation.

+ VLOG(2) << "Not starting classification, last finished load id is "

+ << last_finished_load_id_ << " but we have classified up to "

+ << "load id " << last_page_id_sent_to_classifier_;

+ classifier_page_text_.clear(); // we won't need this.

+ return;

+ }

+ if (last_finished_load_id_ != render_view_->page_id()) {

+ VLOG(2) << "Render view page has changed, not starting classification";

+ classifier_page_text_.clear(); // we won't need this.

+ return;

+ }

+ // If the page id is unchanged, the toplevel URL should also be unchanged.

+ DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_);

+ if (last_finished_load_url_ == last_url_sent_to_classifier_) {

+ // We've already classified this toplevel URL, so this was likely an

+ // in-page navigation or a subframe navigation. The browser should not

+ // send a StartPhishingDetection IPC in this case.

+ VLOG(2) << "Toplevel URL is unchanged, not starting classification.";

+ classifier_page_text_.clear(); // we won't need this.

+ return;

+ }

lzheng 2011/02/04 20:05:22 Can you add a comment here to explain that this me

Brian Ryner 2011/02/10 01:12:52 Done.

+ if (last_url_received_from_browser_ != last_finished_load_url_) {

+ VLOG(2) << "Not starting classification, last url from browser is "

+ << last_url_received_from_browser_ << ", last finished load is "

+ << last_finished_load_url_;

+ // Keep classifier_page_text_, in case the browser notifies us later that

+ // we should classify the URL.

+ return;

+ }

+ VLOG(2) << "Starting classification for " << last_finished_load_url_;

+ last_url_sent_to_classifier_ = last_finished_load_url_;

+ last_page_id_sent_to_classifier_ = last_finished_load_id_;

+ classifier_->BeginClassification(

+ &classifier_page_text_,

+ NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

}

} // namespace safe_browsing