chrome/renderer/safe_browsing/phishing_classifier_delegate.cc - Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection.

Unified Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Address Noe's review comments Created 9 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« no previous file with comments | « chrome/renderer/safe_browsing/phishing_classifier_delegate.h ('k') | chrome/renderer/safe_browsing/phishing_classifier_delegate_browsertest.cc » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

index 7fd059522bb28fb664ca9622a138b06406075d24..985965db9282e425170507092f4acaf0e68fa262 100644

--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

@@ -23,6 +23,13 @@

namespace safe_browsing {

+static GURL StripRef(const GURL& url) {

+ GURL::Replacements replacements;

+ replacements.ClearRef();

+ return url.ReplaceComponents(replacements);

typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates;

static base::LazyInstance<PhishingClassifierDelegates>

g_delegates(base::LINKER_INITIALIZED);

@@ -75,8 +82,8 @@ PhishingClassifierDelegate::PhishingClassifierDelegate(

RenderView* render_view,

PhishingClassifier* classifier)

: RenderViewObserver(render_view),

- last_page_id_sent_to_classifier_(-1),

- pending_classification_(false) {

+ last_finished_load_id_(-1),

+ last_page_id_sent_to_classifier_(-1) {

g_delegates.Get().insert(this);

if (!classifier) {

classifier = new PhishingClassifier(render_view,

@@ -100,26 +107,24 @@ void PhishingClassifierDelegate::SetPhishingScorer(

return; // RenderView is tearing down.

classifier_->set_phishing_scorer(scorer);

+ // Start classifying the current page if all conditions are met.

+ // See MaybeStartClassification() for details.

+ MaybeStartClassification();

- if (pending_classification_) {

- pending_classification_ = false;

- // If we have a pending classificaton, it should always be true that the

- // main frame URL and page id have not changed since we queued the

- // classification. This is because we stop any pending classification on

- // main frame loads in RenderView::didCommitProvisionalLoad().

- DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);

- DCHECK_EQ(render_view()->page_id(), last_page_id_sent_to_classifier_);

- classifier_->BeginClassification(

- &classifier_page_text_,

- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

- }

+void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {

+ last_url_received_from_browser_ = StripRef(url);

+ // Start classifying the current page if all conditions are met.

+ // See MaybeStartClassification() for details.

+ MaybeStartClassification();

}

void PhishingClassifierDelegate::DidCommitProvisionalLoad(

WebKit::WebFrame* frame, bool is_new_navigation) {

// A new page is starting to load. Unless the load is a navigation within

- // the same page, we need to cancel classification since the content will

- // now be inconsistent with the phishing model.

+ // the same page, we need to cancel classification since we may get an

+ // inconsistent result.

NavigationState* state = NavigationState::FromDataSource(

frame->dataSource());

if (!state->was_within_same_page()) {

@@ -128,36 +133,10 @@ void PhishingClassifierDelegate::DidCommitProvisionalLoad(

}

void PhishingClassifierDelegate::PageCaptured(const string16& page_text) {

- // We check that the page id has incremented so that we don't reclassify

- // pages as the user moves back and forward in session history. Note: we

- // don't send every page id to the classifier, only those where the toplevel

- // URL changed.

- int load_id = render_view()->page_id();

- if (load_id <= last_page_id_sent_to_classifier_) {

- return;

- }

- GURL url_without_ref = StripToplevelUrl();

- if (url_without_ref == last_url_sent_to_classifier_) {

- // The toplevle URL is the same, except for the ref.

- // Update the last page id we sent, but don't trigger a new classification.

- last_page_id_sent_to_classifier_ = load_id;

- return;

- }

- last_url_sent_to_classifier_ = url_without_ref;

- last_page_id_sent_to_classifier_ = load_id;

+ last_finished_load_id_ = render_view()->page_id();

+ last_finished_load_url_ = StripToplevelUrl();

classifier_page_text_ = page_text;

- if (classifier_->is_ready()) {

- classifier_->BeginClassification(

- &classifier_page_text_,

- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

- } else {

- // If there is no phishing classifier yet, we'll begin classification once

- // SetPhishingScorer() is called by the RenderView.

- pending_classification_ = true;

- }

+ MaybeStartClassification();

}

void PhishingClassifierDelegate::CancelPendingClassification() {

@@ -165,19 +144,17 @@ void PhishingClassifierDelegate::CancelPendingClassification() {

classifier_->CancelPendingClassification();

}

classifier_page_text_.clear();

- pending_classification_ = false;

}

bool PhishingClassifierDelegate::OnMessageReceived(

const IPC::Message& message) {

- /*

bool handled = true;

IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message)

+ IPC_MESSAGE_HANDLER(ViewMsg_StartPhishingDetection,

+ OnStartPhishingDetection)

IPC_MESSAGE_UNHANDLED(handled = false)

IPC_END_MESSAGE_MAP()

return handled;

- */

- return false;

}

void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,

@@ -196,10 +173,71 @@ void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,

}

GURL PhishingClassifierDelegate::StripToplevelUrl() {

- GURL toplevel_url = render_view()->webview()->mainFrame()->url();

- GURL::Replacements replacements;

- replacements.ClearRef();

- return toplevel_url.ReplaceComponents(replacements);

+ return StripRef(render_view()->webview()->mainFrame()->url());

+void PhishingClassifierDelegate::MaybeStartClassification() {

+ // We can begin phishing classification when the following conditions are

+ // met:

+ // 1. A Scorer has been created

+ // 2. The browser has sent a StartPhishingDetection message for the current

+ // toplevel URL.

+ // 3. The page has finished loading and the page text has been extracted.

+ // 4. The load is a new navigation (not a session history navigation).

+ // 5. The toplevel URL has not already been classified.

+ //

+ // Note that if we determine that this particular navigation should not be

+ // classified at all (as opposed to deferring it until we get an IPC or the

+ // load completes), we discard the page text since it won't be needed.

+ if (!classifier_->is_ready()) {

+ VLOG(2) << "Not starting classification, no Scorer created.";

+ // Keep classifier_page_text_, in case a Scorer is set later.

+ return;

+ }

+ if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) {

+ // Skip loads from session history navigation.

+ VLOG(2) << "Not starting classification, last finished load id is "

+ << last_finished_load_id_ << " but we have classified up to "

+ << "load id " << last_page_id_sent_to_classifier_;

+ classifier_page_text_.clear(); // we won't need this.

+ return;

+ }

+ if (last_finished_load_id_ != render_view()->page_id()) {

+ VLOG(2) << "Render view page has changed, not starting classification";

+ classifier_page_text_.clear(); // we won't need this.

+ return;

+ }

+ // If the page id is unchanged, the toplevel URL should also be unchanged.

+ DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_);

+ if (last_finished_load_url_ == last_url_sent_to_classifier_) {

+ // We've already classified this toplevel URL, so this was likely an

+ // in-page navigation or a subframe navigation. The browser should not

+ // send a StartPhishingDetection IPC in this case.

+ VLOG(2) << "Toplevel URL is unchanged, not starting classification.";

+ classifier_page_text_.clear(); // we won't need this.

+ return;

+ }

+ if (last_url_received_from_browser_ != last_finished_load_url_) {

+ // The browser has not yet confirmed that this URL should be classified,

+ // so defer classification for now.

+ VLOG(2) << "Not starting classification, last url from browser is "

+ << last_url_received_from_browser_ << ", last finished load is "

+ << last_finished_load_url_;

+ // Keep classifier_page_text_, in case the browser notifies us later that

+ // we should classify the URL.

+ return;

+ }

+ VLOG(2) << "Starting classification for " << last_finished_load_url_;

+ last_url_sent_to_classifier_ = last_finished_load_url_;

+ last_page_id_sent_to_classifier_ = last_finished_load_id_;

+ classifier_->BeginClassification(

+ &classifier_page_text_,

+ NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));

}

} // namespace safe_browsing