Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(532)

Unified Diff: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc

Issue 6398001: Run pre-classification checks in the browser before starting client-side phishing detection. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Address Noe's review comments Created 9 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
index 7fd059522bb28fb664ca9622a138b06406075d24..985965db9282e425170507092f4acaf0e68fa262 100644
--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
@@ -23,6 +23,13 @@
namespace safe_browsing {
+
+static GURL StripRef(const GURL& url) {
+ GURL::Replacements replacements;
+ replacements.ClearRef();
+ return url.ReplaceComponents(replacements);
+}
+
typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates;
static base::LazyInstance<PhishingClassifierDelegates>
g_delegates(base::LINKER_INITIALIZED);
@@ -75,8 +82,8 @@ PhishingClassifierDelegate::PhishingClassifierDelegate(
RenderView* render_view,
PhishingClassifier* classifier)
: RenderViewObserver(render_view),
- last_page_id_sent_to_classifier_(-1),
- pending_classification_(false) {
+ last_finished_load_id_(-1),
+ last_page_id_sent_to_classifier_(-1) {
g_delegates.Get().insert(this);
if (!classifier) {
classifier = new PhishingClassifier(render_view,
@@ -100,26 +107,24 @@ void PhishingClassifierDelegate::SetPhishingScorer(
return; // RenderView is tearing down.
classifier_->set_phishing_scorer(scorer);
+ // Start classifying the current page if all conditions are met.
+ // See MaybeStartClassification() for details.
+ MaybeStartClassification();
+}
- if (pending_classification_) {
- pending_classification_ = false;
- // If we have a pending classificaton, it should always be true that the
- // main frame URL and page id have not changed since we queued the
- // classification. This is because we stop any pending classification on
- // main frame loads in RenderView::didCommitProvisionalLoad().
- DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);
- DCHECK_EQ(render_view()->page_id(), last_page_id_sent_to_classifier_);
- classifier_->BeginClassification(
- &classifier_page_text_,
- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
- }
+
+void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {
+ last_url_received_from_browser_ = StripRef(url);
+ // Start classifying the current page if all conditions are met.
+ // See MaybeStartClassification() for details.
+ MaybeStartClassification();
}
void PhishingClassifierDelegate::DidCommitProvisionalLoad(
WebKit::WebFrame* frame, bool is_new_navigation) {
// A new page is starting to load. Unless the load is a navigation within
- // the same page, we need to cancel classification since the content will
- // now be inconsistent with the phishing model.
+ // the same page, we need to cancel classification since we may get an
+ // inconsistent result.
NavigationState* state = NavigationState::FromDataSource(
frame->dataSource());
if (!state->was_within_same_page()) {
@@ -128,36 +133,10 @@ void PhishingClassifierDelegate::DidCommitProvisionalLoad(
}
void PhishingClassifierDelegate::PageCaptured(const string16& page_text) {
- // We check that the page id has incremented so that we don't reclassify
- // pages as the user moves back and forward in session history. Note: we
- // don't send every page id to the classifier, only those where the toplevel
- // URL changed.
- int load_id = render_view()->page_id();
- if (load_id <= last_page_id_sent_to_classifier_) {
- return;
- }
-
- GURL url_without_ref = StripToplevelUrl();
- if (url_without_ref == last_url_sent_to_classifier_) {
- // The toplevle URL is the same, except for the ref.
- // Update the last page id we sent, but don't trigger a new classification.
- last_page_id_sent_to_classifier_ = load_id;
- return;
- }
-
- last_url_sent_to_classifier_ = url_without_ref;
- last_page_id_sent_to_classifier_ = load_id;
+ last_finished_load_id_ = render_view()->page_id();
+ last_finished_load_url_ = StripToplevelUrl();
classifier_page_text_ = page_text;
-
- if (classifier_->is_ready()) {
- classifier_->BeginClassification(
- &classifier_page_text_,
- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
- } else {
- // If there is no phishing classifier yet, we'll begin classification once
- // SetPhishingScorer() is called by the RenderView.
- pending_classification_ = true;
- }
+ MaybeStartClassification();
}
void PhishingClassifierDelegate::CancelPendingClassification() {
@@ -165,19 +144,17 @@ void PhishingClassifierDelegate::CancelPendingClassification() {
classifier_->CancelPendingClassification();
}
classifier_page_text_.clear();
- pending_classification_ = false;
}
bool PhishingClassifierDelegate::OnMessageReceived(
const IPC::Message& message) {
- /*
bool handled = true;
IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message)
+ IPC_MESSAGE_HANDLER(ViewMsg_StartPhishingDetection,
+ OnStartPhishingDetection)
IPC_MESSAGE_UNHANDLED(handled = false)
IPC_END_MESSAGE_MAP()
return handled;
- */
- return false;
}
void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,
@@ -196,10 +173,71 @@ void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,
}
GURL PhishingClassifierDelegate::StripToplevelUrl() {
- GURL toplevel_url = render_view()->webview()->mainFrame()->url();
- GURL::Replacements replacements;
- replacements.ClearRef();
- return toplevel_url.ReplaceComponents(replacements);
+ return StripRef(render_view()->webview()->mainFrame()->url());
+}
+
+void PhishingClassifierDelegate::MaybeStartClassification() {
+ // We can begin phishing classification when the following conditions are
+ // met:
+ // 1. A Scorer has been created
+ // 2. The browser has sent a StartPhishingDetection message for the current
+ // toplevel URL.
+ // 3. The page has finished loading and the page text has been extracted.
+ // 4. The load is a new navigation (not a session history navigation).
+ // 5. The toplevel URL has not already been classified.
+ //
+ // Note that if we determine that this particular navigation should not be
+ // classified at all (as opposed to deferring it until we get an IPC or the
+ // load completes), we discard the page text since it won't be needed.
+ if (!classifier_->is_ready()) {
+ VLOG(2) << "Not starting classification, no Scorer created.";
+ // Keep classifier_page_text_, in case a Scorer is set later.
+ return;
+ }
+
+ if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) {
+ // Skip loads from session history navigation.
+ VLOG(2) << "Not starting classification, last finished load id is "
+ << last_finished_load_id_ << " but we have classified up to "
+ << "load id " << last_page_id_sent_to_classifier_;
+ classifier_page_text_.clear(); // we won't need this.
+ return;
+ }
+
+ if (last_finished_load_id_ != render_view()->page_id()) {
+ VLOG(2) << "Render view page has changed, not starting classification";
+ classifier_page_text_.clear(); // we won't need this.
+ return;
+ }
+ // If the page id is unchanged, the toplevel URL should also be unchanged.
+ DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_);
+
+ if (last_finished_load_url_ == last_url_sent_to_classifier_) {
+ // We've already classified this toplevel URL, so this was likely an
+ // in-page navigation or a subframe navigation. The browser should not
+ // send a StartPhishingDetection IPC in this case.
+ VLOG(2) << "Toplevel URL is unchanged, not starting classification.";
+ classifier_page_text_.clear(); // we won't need this.
+ return;
+ }
+
+ if (last_url_received_from_browser_ != last_finished_load_url_) {
+ // The browser has not yet confirmed that this URL should be classified,
+ // so defer classification for now.
+ VLOG(2) << "Not starting classification, last url from browser is "
+ << last_url_received_from_browser_ << ", last finished load is "
+ << last_finished_load_url_;
+ // Keep classifier_page_text_, in case the browser notifies us later that
+ // we should classify the URL.
+ return;
+ }
+
+ VLOG(2) << "Starting classification for " << last_finished_load_url_;
+ last_url_sent_to_classifier_ = last_finished_load_url_;
+ last_page_id_sent_to_classifier_ = last_finished_load_id_;
+ classifier_->BeginClassification(
+ &classifier_page_text_,
+ NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
}
} // namespace safe_browsing

Powered by Google App Engine
This is Rietveld 408576698