Index: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
index 7fd059522bb28fb664ca9622a138b06406075d24..985965db9282e425170507092f4acaf0e68fa262 100644 |
--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
@@ -23,6 +23,13 @@ |
namespace safe_browsing { |
+ |
+static GURL StripRef(const GURL& url) { |
+ GURL::Replacements replacements; |
+ replacements.ClearRef(); |
+ return url.ReplaceComponents(replacements); |
+} |
+ |
typedef std::set<PhishingClassifierDelegate*> PhishingClassifierDelegates; |
static base::LazyInstance<PhishingClassifierDelegates> |
g_delegates(base::LINKER_INITIALIZED); |
@@ -75,8 +82,8 @@ PhishingClassifierDelegate::PhishingClassifierDelegate( |
RenderView* render_view, |
PhishingClassifier* classifier) |
: RenderViewObserver(render_view), |
- last_page_id_sent_to_classifier_(-1), |
- pending_classification_(false) { |
+ last_finished_load_id_(-1), |
+ last_page_id_sent_to_classifier_(-1) { |
g_delegates.Get().insert(this); |
if (!classifier) { |
classifier = new PhishingClassifier(render_view, |
@@ -100,26 +107,24 @@ void PhishingClassifierDelegate::SetPhishingScorer( |
return; // RenderView is tearing down. |
classifier_->set_phishing_scorer(scorer); |
+ // Start classifying the current page if all conditions are met. |
+ // See MaybeStartClassification() for details. |
+ MaybeStartClassification(); |
+} |
- if (pending_classification_) { |
- pending_classification_ = false; |
- // If we have a pending classificaton, it should always be true that the |
- // main frame URL and page id have not changed since we queued the |
- // classification. This is because we stop any pending classification on |
- // main frame loads in RenderView::didCommitProvisionalLoad(). |
- DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_); |
- DCHECK_EQ(render_view()->page_id(), last_page_id_sent_to_classifier_); |
- classifier_->BeginClassification( |
- &classifier_page_text_, |
- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
- } |
+ |
+void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { |
+ last_url_received_from_browser_ = StripRef(url); |
+ // Start classifying the current page if all conditions are met. |
+ // See MaybeStartClassification() for details. |
+ MaybeStartClassification(); |
} |
void PhishingClassifierDelegate::DidCommitProvisionalLoad( |
WebKit::WebFrame* frame, bool is_new_navigation) { |
// A new page is starting to load. Unless the load is a navigation within |
- // the same page, we need to cancel classification since the content will |
- // now be inconsistent with the phishing model. |
+ // the same page, we need to cancel classification since we may get an |
+ // inconsistent result. |
NavigationState* state = NavigationState::FromDataSource( |
frame->dataSource()); |
if (!state->was_within_same_page()) { |
@@ -128,36 +133,10 @@ void PhishingClassifierDelegate::DidCommitProvisionalLoad( |
} |
void PhishingClassifierDelegate::PageCaptured(const string16& page_text) { |
- // We check that the page id has incremented so that we don't reclassify |
- // pages as the user moves back and forward in session history. Note: we |
- // don't send every page id to the classifier, only those where the toplevel |
- // URL changed. |
- int load_id = render_view()->page_id(); |
- if (load_id <= last_page_id_sent_to_classifier_) { |
- return; |
- } |
- |
- GURL url_without_ref = StripToplevelUrl(); |
- if (url_without_ref == last_url_sent_to_classifier_) { |
- // The toplevle URL is the same, except for the ref. |
- // Update the last page id we sent, but don't trigger a new classification. |
- last_page_id_sent_to_classifier_ = load_id; |
- return; |
- } |
- |
- last_url_sent_to_classifier_ = url_without_ref; |
- last_page_id_sent_to_classifier_ = load_id; |
+ last_finished_load_id_ = render_view()->page_id(); |
+ last_finished_load_url_ = StripToplevelUrl(); |
classifier_page_text_ = page_text; |
- |
- if (classifier_->is_ready()) { |
- classifier_->BeginClassification( |
- &classifier_page_text_, |
- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
- } else { |
- // If there is no phishing classifier yet, we'll begin classification once |
- // SetPhishingScorer() is called by the RenderView. |
- pending_classification_ = true; |
- } |
+ MaybeStartClassification(); |
} |
void PhishingClassifierDelegate::CancelPendingClassification() { |
@@ -165,19 +144,17 @@ void PhishingClassifierDelegate::CancelPendingClassification() { |
classifier_->CancelPendingClassification(); |
} |
classifier_page_text_.clear(); |
- pending_classification_ = false; |
} |
bool PhishingClassifierDelegate::OnMessageReceived( |
const IPC::Message& message) { |
- /* |
bool handled = true; |
IPC_BEGIN_MESSAGE_MAP(PhishingClassifierDelegate, message) |
+ IPC_MESSAGE_HANDLER(ViewMsg_StartPhishingDetection, |
+ OnStartPhishingDetection) |
IPC_MESSAGE_UNHANDLED(handled = false) |
IPC_END_MESSAGE_MAP() |
return handled; |
- */ |
- return false; |
} |
void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
@@ -196,10 +173,71 @@ void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
} |
GURL PhishingClassifierDelegate::StripToplevelUrl() { |
- GURL toplevel_url = render_view()->webview()->mainFrame()->url(); |
- GURL::Replacements replacements; |
- replacements.ClearRef(); |
- return toplevel_url.ReplaceComponents(replacements); |
+ return StripRef(render_view()->webview()->mainFrame()->url()); |
+} |
+ |
+void PhishingClassifierDelegate::MaybeStartClassification() { |
+ // We can begin phishing classification when the following conditions are |
+ // met: |
+ // 1. A Scorer has been created |
+ // 2. The browser has sent a StartPhishingDetection message for the current |
+ // toplevel URL. |
+ // 3. The page has finished loading and the page text has been extracted. |
+ // 4. The load is a new navigation (not a session history navigation). |
+ // 5. The toplevel URL has not already been classified. |
+ // |
+ // Note that if we determine that this particular navigation should not be |
+ // classified at all (as opposed to deferring it until we get an IPC or the |
+ // load completes), we discard the page text since it won't be needed. |
+ if (!classifier_->is_ready()) { |
+ VLOG(2) << "Not starting classification, no Scorer created."; |
+ // Keep classifier_page_text_, in case a Scorer is set later. |
+ return; |
+ } |
+ |
+ if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) { |
+ // Skip loads from session history navigation. |
+ VLOG(2) << "Not starting classification, last finished load id is " |
+ << last_finished_load_id_ << " but we have classified up to " |
+ << "load id " << last_page_id_sent_to_classifier_; |
+ classifier_page_text_.clear(); // we won't need this. |
+ return; |
+ } |
+ |
+ if (last_finished_load_id_ != render_view()->page_id()) { |
+ VLOG(2) << "Render view page has changed, not starting classification"; |
+ classifier_page_text_.clear(); // we won't need this. |
+ return; |
+ } |
+ // If the page id is unchanged, the toplevel URL should also be unchanged. |
+ DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_); |
+ |
+ if (last_finished_load_url_ == last_url_sent_to_classifier_) { |
+ // We've already classified this toplevel URL, so this was likely an |
+ // in-page navigation or a subframe navigation. The browser should not |
+ // send a StartPhishingDetection IPC in this case. |
+ VLOG(2) << "Toplevel URL is unchanged, not starting classification."; |
+ classifier_page_text_.clear(); // we won't need this. |
+ return; |
+ } |
+ |
+ if (last_url_received_from_browser_ != last_finished_load_url_) { |
+ // The browser has not yet confirmed that this URL should be classified, |
+ // so defer classification for now. |
+ VLOG(2) << "Not starting classification, last url from browser is " |
+ << last_url_received_from_browser_ << ", last finished load is " |
+ << last_finished_load_url_; |
+ // Keep classifier_page_text_, in case the browser notifies us later that |
+ // we should classify the URL. |
+ return; |
+ } |
+ |
+ VLOG(2) << "Starting classification for " << last_finished_load_url_; |
+ last_url_sent_to_classifier_ = last_finished_load_url_; |
+ last_page_id_sent_to_classifier_ = last_finished_load_id_; |
+ classifier_->BeginClassification( |
+ &classifier_page_text_, |
+ NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
} |
} // namespace safe_browsing |