Index: chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
diff --git a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
index a1364ed3ab1189201a7173e3af2dfc93d7a8ec06..add398f8ad52c271f89c27aa85fd564246d725cc 100644 |
--- a/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
+++ b/chrome/renderer/safe_browsing/phishing_classifier_delegate.cc |
@@ -17,12 +17,18 @@ |
namespace safe_browsing { |
+static GURL StripRef(const GURL& url) { |
+ GURL::Replacements replacements; |
+ replacements.ClearRef(); |
+ return url.ReplaceComponents(replacements); |
+} |
+ |
PhishingClassifierDelegate::PhishingClassifierDelegate( |
RenderView* render_view, |
PhishingClassifier* classifier) |
: render_view_(render_view), |
- last_page_id_sent_to_classifier_(-1), |
- pending_classification_(false) { |
+ last_finished_load_id_(-1), |
+ last_page_id_sent_to_classifier_(-1) { |
if (!classifier) { |
classifier = new PhishingClassifier(render_view_, |
new FeatureExtractorClock()); |
@@ -37,26 +43,23 @@ PhishingClassifierDelegate::~PhishingClassifierDelegate() { |
void PhishingClassifierDelegate::SetPhishingScorer( |
const safe_browsing::Scorer* scorer) { |
classifier_->set_phishing_scorer(scorer); |
+ // Start classifying the current page if all conditions are met. |
+ // See MaybeStartClassification() for details. |
+ MaybeStartClassification(); |
+} |
- if (pending_classification_) { |
- pending_classification_ = false; |
- // If we have a pending classificaton, it should always be true that the |
- // main frame URL and page id have not changed since we queued the |
- // classification. This is because we stop any pending classification on |
- // main frame loads in RenderView::didCommitProvisionalLoad(). |
- DCHECK_EQ(StripToplevelUrl(), last_url_sent_to_classifier_); |
- DCHECK_EQ(render_view_->page_id(), last_page_id_sent_to_classifier_); |
- classifier_->BeginClassification( |
- &classifier_page_text_, |
- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
- } |
+void PhishingClassifierDelegate::OnStartPhishingDetection(const GURL& url) { |
+ last_url_received_from_browser_ = StripRef(url); |
+ // Start classifying the current page if all conditions are met. |
+ // See MaybeStartClassification() for details. |
+ MaybeStartClassification(); |
} |
void PhishingClassifierDelegate::CommittedLoadInFrame( |
WebKit::WebFrame* frame) { |
// A new page is starting to load. Unless the load is a navigation within |
- // the same page, we need to cancel classification since the content will |
- // now be inconsistent with the phishing model. |
+ // the same page, we need to cancel classification since we may get an |
+ // inconsistent result. |
NavigationState* state = NavigationState::FromDataSource( |
frame->dataSource()); |
if (!state->was_within_same_page()) { |
@@ -65,36 +68,10 @@ void PhishingClassifierDelegate::CommittedLoadInFrame( |
} |
void PhishingClassifierDelegate::FinishedLoad(string16* page_text) { |
- // We check that the page id has incremented so that we don't reclassify |
- // pages as the user moves back and forward in session history. Note: we |
- // don't send every page id to the classifier, only those where the toplevel |
- // URL changed. |
- int load_id = render_view_->page_id(); |
- if (load_id <= last_page_id_sent_to_classifier_) { |
- return; |
- } |
- |
- GURL url_without_ref = StripToplevelUrl(); |
- if (url_without_ref == last_url_sent_to_classifier_) { |
- // The toplevle URL is the same, except for the ref. |
- // Update the last page id we sent, but don't trigger a new classification. |
- last_page_id_sent_to_classifier_ = load_id; |
- return; |
- } |
- |
- last_url_sent_to_classifier_ = url_without_ref; |
- last_page_id_sent_to_classifier_ = load_id; |
+ last_finished_load_id_ = render_view_->page_id(); |
+ last_finished_load_url_ = StripToplevelUrl(); |
classifier_page_text_.swap(*page_text); |
- |
- if (classifier_->is_ready()) { |
- classifier_->BeginClassification( |
- &classifier_page_text_, |
- NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
- } else { |
- // If there is no phishing classifier yet, we'll begin classification once |
- // SetPhishingScorer() is called by the RenderView. |
- pending_classification_ = true; |
- } |
+ MaybeStartClassification(); |
} |
void PhishingClassifierDelegate::CancelPendingClassification() { |
@@ -102,7 +79,6 @@ void PhishingClassifierDelegate::CancelPendingClassification() { |
classifier_->CancelPendingClassification(); |
} |
classifier_page_text_.clear(); |
- pending_classification_ = false; |
} |
void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
@@ -121,10 +97,69 @@ void PhishingClassifierDelegate::ClassificationDone(bool is_phishy, |
} |
GURL PhishingClassifierDelegate::StripToplevelUrl() { |
- GURL toplevel_url = render_view_->webview()->mainFrame()->url(); |
- GURL::Replacements replacements; |
- replacements.ClearRef(); |
- return toplevel_url.ReplaceComponents(replacements); |
+ return StripRef(render_view_->webview()->mainFrame()->url()); |
+} |
+ |
+void PhishingClassifierDelegate::MaybeStartClassification() { |
+ // We can begin phishing classification when the following conditions are |
+ // met: |
+ // 1. A Scorer has been created |
+ // 2. The browser has sent a StartPhishingDetection message for the current |
+ // toplevel URL. |
+ // 3. The page has finished loading and the page text has been extracted. |
+ // 4. The load is a new navigation (not a session history navigation). |
+ // 5. The toplevel URL has not already been classified. |
+ // |
+ // Note that if we determine that this particular navigation should not be |
+ // classified at all (as opposed to deferring it until we get an IPC or the |
+ // load completes), we discard the page text since it won't be needed. |
+ if (!classifier_->is_ready()) { |
+ VLOG(2) << "Not starting classification, no Scorer created."; |
+ // Keep classifier_page_text_, in case a Scorer is set later. |
+ return; |
+ } |
+ |
+ if (last_finished_load_id_ <= last_page_id_sent_to_classifier_) { |
+ // Skip loads from session history navigation. |
+ VLOG(2) << "Not starting classification, last finished load id is " |
+ << last_finished_load_id_ << " but we have classified up to " |
+ << "load id " << last_page_id_sent_to_classifier_; |
+ classifier_page_text_.clear(); // we won't need this. |
+ return; |
+ } |
+ |
+ if (last_finished_load_id_ != render_view_->page_id()) { |
+ VLOG(2) << "Render view page has changed, not starting classification"; |
+ classifier_page_text_.clear(); // we won't need this. |
+ return; |
+ } |
+ // If the page id is unchanged, the toplevel URL should also be unchanged. |
+ DCHECK_EQ(StripToplevelUrl(), last_finished_load_url_); |
+ |
+ if (last_finished_load_url_ == last_url_sent_to_classifier_) { |
+ // We've already classified this toplevel URL, so this was likely an |
+ // in-page navigation or a subframe navigation. The browser should not |
+ // send a StartPhishingDetection IPC in this case. |
+ VLOG(2) << "Toplevel URL is unchanged, not starting classification."; |
+ classifier_page_text_.clear(); // we won't need this. |
+ return; |
+ } |
+ |
lzheng
2011/02/04 20:05:22
Can you add a comment here to explain that this me
Brian Ryner
2011/02/10 01:12:52
Done.
|
+ if (last_url_received_from_browser_ != last_finished_load_url_) { |
+ VLOG(2) << "Not starting classification, last url from browser is " |
+ << last_url_received_from_browser_ << ", last finished load is " |
+ << last_finished_load_url_; |
+ // Keep classifier_page_text_, in case the browser notifies us later that |
+ // we should classify the URL. |
+ return; |
+ } |
+ |
+ VLOG(2) << "Starting classification for " << last_finished_load_url_; |
+ last_url_sent_to_classifier_ = last_finished_load_url_; |
+ last_page_id_sent_to_classifier_ = last_finished_load_id_; |
+ classifier_->BeginClassification( |
+ &classifier_page_text_, |
+ NewCallback(this, &PhishingClassifierDelegate::ClassificationDone)); |
} |
} // namespace safe_browsing |