Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5217)

Unified Diff: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc

Issue 3130039: Limit the time spent on a single iteration of PhishingDOMFeatureExtractor. (Closed) Base URL: http://src.chromium.org/git/chromium.git
Patch Set: tiny comment fix Created 10 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
diff --git a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
index 7dbf7651825409b288604c11e1f226bf959e7aa9..db17e1d9c5aecbbfcc07eb97807d973ebffff486 100644
--- a/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
+++ b/chrome/renderer/safe_browsing/phishing_dom_feature_extractor.cc
@@ -8,7 +8,10 @@
#include "base/hash_tables.h"
#include "base/histogram.h"
#include "base/logging.h"
+#include "base/message_loop.h"
+#include "base/time.h"
#include "chrome/renderer/render_view.h"
+#include "chrome/renderer/safe_browsing/feature_extractor_clock.h"
#include "chrome/renderer/safe_browsing/features.h"
#include "net/base/registry_controlled_domain.h"
#include "third_party/WebKit/WebKit/chromium/public/WebDocument.h"
@@ -20,6 +23,19 @@
namespace safe_browsing {
+// This time should be short enough that it doesn't noticeably disrupt the
+// user's interaction with the page.
+const int PhishingDOMFeatureExtractor::kMaxTimePerChunkMs = 50;
+
+// Experimenting shows that we get a reasonable gain in performance by
+// increasing this up to around 10, but there's not much benefit in
+// increasing it past that.
+const int PhishingDOMFeatureExtractor::kClockCheckGranularity = 10;
+
+// This should be longer than we expect feature extraction to take on any
+// actual phishing page.
+const int PhishingDOMFeatureExtractor::kMaxTotalTimeMs = 500;
+
// Intermediate state used for computing features. See features.h for
// descriptions of the DOM features that are computed.
struct PhishingDOMFeatureExtractor::PageFeatureState {
@@ -45,7 +61,13 @@ struct PhishingDOMFeatureExtractor::PageFeatureState {
// How many script tags
int num_script_tags;
- PageFeatureState()
+ // The time at which we started feature extraction for the current page.
+ base::TimeTicks start_time;
+
+ // The number of iterations we've done for the current extraction.
+ int num_iterations;
+
+ explicit PageFeatureState(base::TimeTicks start_time_ticks)
: external_links(0),
secure_links(0),
total_links(0),
@@ -58,7 +80,9 @@ struct PhishingDOMFeatureExtractor::PageFeatureState {
total_actions(0),
img_other_domain(0),
total_imgs(0),
- num_script_tags(0) {}
+ num_script_tags(0),
+ start_time(start_time_ticks),
+ num_iterations(0) {}
~PageFeatureState() {}
};
@@ -74,8 +98,10 @@ struct PhishingDOMFeatureExtractor::FrameData {
};
PhishingDOMFeatureExtractor::PhishingDOMFeatureExtractor(
- RenderView* render_view)
+ RenderView* render_view,
+ FeatureExtractorClock* clock)
: render_view_(render_view),
+ clock_(clock),
ALLOW_THIS_IN_INITIALIZER_LIST(method_factory_(this)) {
Clear();
}
@@ -98,6 +124,8 @@ void PhishingDOMFeatureExtractor::ExtractFeatures(
features_ = features;
done_callback_.reset(done_callback);
+
+ page_feature_state_.reset(new PageFeatureState(clock_->Now()));
MessageLoop::current()->PostTask(
FROM_HERE,
method_factory_.NewRunnableMethod(
@@ -111,6 +139,10 @@ void PhishingDOMFeatureExtractor::CancelPendingExtraction() {
}
void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
+ DCHECK(page_feature_state_.get());
+ ++page_feature_state_->num_iterations;
+ base::TimeTicks current_chunk_start_time = clock_->Now();
+
if (!cur_frame_) {
WebKit::WebView* web_view = render_view_->webview();
if (!web_view) {
@@ -122,15 +154,21 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
return;
}
cur_frame_ = web_view->mainFrame();
- page_feature_state_.reset(new PageFeatureState);
}
+ int num_elements = 0;
for (; cur_frame_;
cur_frame_ = cur_frame_->traverseNext(false /* don't wrap around */)) {
WebKit::WebNode cur_node;
if (cur_frame_data_.get()) {
// We're resuming traversal of a frame, so just advance to the next node.
cur_node = cur_frame_data_->elements.nextItem();
+ // When we resume the traversal, the first call to nextItem() potentially
+ // has to walk through the document again from the beginning, if it was
+ // modified between our chunks of work. Log how long this takes, so we
+ // can tell if it's too slow.
+ UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureResumeTime",
+ clock_->Now() - current_chunk_start_time);
} else {
// We just moved to a new frame, so update our frame state
// and advance to the first element.
@@ -160,8 +198,36 @@ void PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout() {
HandleScript(element);
}
- // TODO(bryner): stop if too much time has elapsed, and add histograms
- // for the time spent processing.
+ if (++num_elements >= kClockCheckGranularity) {
+ num_elements = 0;
+ base::TimeTicks now = clock_->Now();
+ if (now - page_feature_state_->start_time >=
+ base::TimeDelta::FromMilliseconds(kMaxTotalTimeMs)) {
+ DLOG(ERROR) << "Feature extraction took too long, giving up";
+ // We expect this to happen infrequently, so record when it does.
+ UMA_HISTOGRAM_COUNTS("SBClientPhishing.DOMFeatureTimeout", 1);
+ RunCallback(false);
+ return;
+ }
+ base::TimeDelta chunk_elapsed = now - current_chunk_start_time;
+ if (chunk_elapsed >=
+ base::TimeDelta::FromMilliseconds(kMaxTimePerChunkMs)) {
+ // The time limit for the current chunk is up, so post a task to
+ // continue extraction.
+ //
+ // Record how much time we actually spent on the chunk. If this is
+ // much higher than kMaxTimePerChunkMs, we may need to adjust the
+ // clock granularity.
+ UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureChunkTime",
+ chunk_elapsed);
+ MessageLoop::current()->PostTask(
+ FROM_HERE,
+ method_factory_.NewRunnableMethod(
+ &PhishingDOMFeatureExtractor::ExtractFeaturesWithTimeout));
+ return;
+ }
+ // Otherwise, continue.
+ }
}
// We're done with this frame, recalculate the FrameData when we
@@ -296,6 +362,14 @@ void PhishingDOMFeatureExtractor::CheckNoPendingExtraction() {
}
void PhishingDOMFeatureExtractor::RunCallback(bool success) {
+ // Record some timing stats that we can use to evaluate feature extraction
+ // performance. These include both successful and failed extractions.
+ DCHECK(page_feature_state_.get());
+ UMA_HISTOGRAM_COUNTS("SBClientPhishing.DOMFeatureIterations",
+ page_feature_state_->num_iterations);
+ UMA_HISTOGRAM_TIMES("SBClientPhishing.DOMFeatureTotalTime",
+ clock_->Now() - page_feature_state_->start_time);
+
DCHECK(done_callback_.get());
done_callback_->Run(success);
Clear();

Powered by Google App Engine
This is Rietveld 408576698