| Index: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
|
| diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
|
| index a1364ed3ab1189201a7173e3af2dfc93d7a8ec06..37778d16fa1990d96ceaed82374b11e45ba4dffe 100644
|
| --- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
|
| +++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc
|
| @@ -17,12 +17,20 @@
|
|
|
| namespace safe_browsing {
|
|
|
| +namespace {
|
| +GURL StripRef(const GURL& url) {
|
| + GURL::Replacements replacements;
|
| + replacements.ClearRef();
|
| + return url.ReplaceComponents(replacements);
|
| +}
|
| +}
|
| +
|
| PhishingClassifierDelegate::PhishingClassifierDelegate(
|
| RenderView* render_view,
|
| PhishingClassifier* classifier)
|
| : render_view_(render_view),
|
| - last_page_id_sent_to_classifier_(-1),
|
| - pending_classification_(false) {
|
| + last_finished_load_id_(-1),
|
| + last_page_id_sent_to_classifier_(-1) {
|
| if (!classifier) {
|
| classifier = new PhishingClassifier(render_view_,
|
| new FeatureExtractorClock());
|
| @@ -37,26 +45,23 @@ PhishingClassifierDelegate::~PhishingClassifierDelegate() {
|
| void PhishingClassifierDelegate::SetPhishingScorer(
|
| const safe_browsing::Scorer* scorer) {
|
| classifier_->set_phishing_scorer(scorer);
|
| + // Start classifying the current page if all conditions are met.
|
| + // See MaybeStartClassification() for details.
|
| + MaybeStartClassification();
|
| +}
|
|
|
| - if (pending_classification_) {
|
| - pending_classification_ = false;
|
| - // If we have a pending classificaton, it should always be true that the
|
| - // main frame URL and page id have not changed since we queued the
|
| - // classification. This is because we stop any pending classification on
|
| - // main frame loads in RenderView::didCommitProvisionalLoad().
|
| - DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_);
|
| - DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_);
|
| - classifier_->BeginClassification(
|
| - &classifier_page_text_,
|
| - NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
|
| - }
|
| +void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) {
|
| + last_url_received_from_browser_ = StripRef(url);
|
| + // Start classifying the current page if all conditions are met.
|
| + // See MaybeStartClassification() for details.
|
| + MaybeStartClassification();
|
| }
|
|
|
| void PhishingClassifierDelegate::CommittedLoadInFrame(
|
| WebKit::WebFrame* frame) {
|
| // A new page is starting to load. Unless the load is a navigation within
|
| - // the same page, we need to cancel classification since the content will
|
| - // now be inconsistent with the phishing model.
|
| + // the same page, we need to cancel classification since we may get an
|
| + // inconsistent result.
|
| NavigationState* state = NavigationState::FromDataSource(
|
| frame->dataSource());
|
| if (!state->was_within_same_page()) {
|
| @@ -65,36 +70,10 @@ void PhishingClassifierDelegate::CommittedLoadInFrame(
|
| }
|
|
|
| void PhishingClassifierDelegate::FinishedLoad(string16* page_text) {
|
| - // We check that the page id has incremented so that we don't reclassify
|
| - // pages as the user moves back and forward in session history. Note: we
|
| - // don't send every page id to the classifier, only those where the toplevel
|
| - // URL changed.
|
| - int load_id = render_view_->page_id();
|
| - if (load_id <= last_page_id_sent_to_classifier_) {
|
| - return;
|
| - }
|
| -
|
| - GURL url_without_ref = StripToplevelUrl();
|
| - if (url_without_ref == last_url_sent_to_classifier_) {
|
| - // The toplevle URL is the same, except for the ref.
|
| - // Update the last page id we sent, but don't trigger a new classification.
|
| - last_page_id_sent_to_classifier_ = load_id;
|
| - return;
|
| - }
|
| -
|
| - last_url_sent_to_classifier_ = url_without_ref;
|
| - last_page_id_sent_to_classifier_ = load_id;
|
| + last_finished_load_id_ = render_view_->page_id();
|
| + last_finished_load_url_ = StripToplevelUrl();
|
| classifier_page_text_.swap(*page_text);
|
| -
|
| - if (classifier_->is_ready()) {
|
| - classifier_->BeginClassification(
|
| - &classifier_page_text_,
|
| - NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
|
| - } else {
|
| - // If there is no phishing classifier yet, we'll begin classification once
|
| - // SetPhishingScorer() is called by the RenderView.
|
| - pending_classification_ = true;
|
| - }
|
| + MaybeStartClassification();
|
| }
|
|
|
| void PhishingClassifierDelegate::CancelPendingClassification() {
|
| @@ -102,7 +81,6 @@ void PhishingClassifierDelegate::CancelPendingClassification() {
|
| classifier_->CancelPendingClassification();
|
| }
|
| classifier_page_text_.clear();
|
| - pending_classification_ = false;
|
| }
|
|
|
| void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,
|
| @@ -121,10 +99,69 @@ void PhishingClassifierDelegate::ClassificationDone(bool is_phishy,
|
| }
|
|
|
| GURL PhishingClassifierDelegate::StripToplevelUrl() {
|
| - GURL toplevel_url = render_view_->webview()->mainFrame()->url();
|
| - GURL::Replacements replacements;
|
| - replacements.ClearRef();
|
| - return toplevel_url.ReplaceComponents(replacements);
|
| + return StripRef(render_view_->webview()->mainFrame()->url());
|
| +}
|
| +
|
| +void PhishingClassifierDelegate::MaybeStartClassification() {
|
| + // We can begin phishing classification when the following conditions are
|
| + // met:
|
| + // 1. A Scorer has been created
|
| + // 2. The browser has sent a StartPhishingDetection message for the current
|
| + // toplevel URL.
|
| + // 3. The page has finished loading and the page text has been extracted.
|
| + // 4. The load is a new navigation (not a session history navigation).
|
| + // 5. The toplevel URL has not already been classified.
|
| + if (!classifier_->is_ready()) {
|
| + VLOG(2) << "Not starting classification, no Scorer created.";
|
| + // Keep classifier_page_text_, in case a Scorer is set later.
|
| + return;
|
| + }
|
| +
|
| + if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) {
|
| + // Skip loads from session history navigation.
|
| + VLOG(2) << "Not starting classification, last finished load id is "
|
| + << last_finished_load_id_ << " but we have classified up to "
|
| + << "load id " << last_page_id_sent_to_classifier_;
|
| + classifier_page_text_.clear(); // we won't need this.
|
| + return;
|
| + }
|
| +
|
| + if (last_finished_load_id_ != render_view_->page_id()) {
|
| + VLOG(2) << "Render view page has changed, not starting classification";
|
| + classifier_page_text_.clear(); // we won't need this.
|
| + return;
|
| + }
|
| + // If the page id is unchanged, the toplevel URL should also be unchanged.
|
| + DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_);
|
| +
|
| + if (last_url_received_from_browser_ != last_finished_load_url_) {
|
| + VLOG(2) << "Not starting classification, last url from browser is "
|
| + << last_url_received_from_browser_ << ", last finished load is "
|
| + << last_finished_load_url_;
|
| + // Keep classifier_page_text_, in case the browser notifies us later that
|
| + // we should classify the URL.
|
| + return;
|
| + }
|
| +
|
| + if (last_finished_load_url_ == last_url_sent_to_classifier_) {
|
| + // We've already classified this toplevel URL, so this was likely an
|
| + // in-page navigation or a subframe navigation. Don't classify the page a
|
| + // second time, but update the last classified page id for the session
|
| + // history check above.
|
| + VLOG(2) << "Toplevel URL is unchanged, not starting classification "
|
| + << "but updating last classified page id to "
|
| + << last_finished_load_id_;
|
| + last_page_id_sent_to_classifier_ = last_finished_load_id_;
|
| + classifier_page_text_.clear(); // we won't need this.
|
| + return;
|
| + }
|
| +
|
| + VLOG(2) << "Starting classification for " << last_finished_load_url_;
|
| + last_url_sent_to_classifier_ = last_finished_load_url_;
|
| + last_page_id_sent_to_classifier_ = last_finished_load_id_;
|
| + classifier_->BeginClassification(
|
| + &classifier_page_text_,
|
| + NewCallback(this, &PhishingClassifierDelegate::ClassificationDone));
|
| }
|
|
|
| } // namespace safe_browsing
|
|
|