net/url_request/url_request_throttler_entry.cc - Issue 6966038: Anti-DDoS enhancements: Log to net log, UMA stats, improved policy.

Unified Diff: net/url_request/url_request_throttler_entry.cc

Issue 6966038: Anti-DDoS enhancements: Log to net log, UMA stats, improved policy. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src

Patch Set: Respond to review comments. Created 9 years, 7 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

« net/url_request/url_request_throttler_entry.h ('K') | « net/url_request/url_request_throttler_entry.h ('k') | net/url_request/url_request_throttler_entry_interface.h » ('j') | net/url_request/url_request_throttler_manager.h » ('J')
Expand Comments ('e') | Collapse Comments ('c') | Hide Comments ('s')

Index: net/url_request/url_request_throttler_entry.cc

diff --git a/net/url_request/url_request_throttler_entry.cc b/net/url_request/url_request_throttler_entry.cc

index b14bcbe5a83b137408b5f464b5f1383e60e64885..f81e88803a77f4e2ccc3bfae0e8cb8c36a0235f4 100644

--- a/net/url_request/url_request_throttler_entry.cc

+++ b/net/url_request/url_request_throttler_entry.cc

@@ -7,8 +7,13 @@

#include <cmath>

#include "base/logging.h"

+#include "base/metrics/field_trial.h"

+#include "base/metrics/histogram.h"

#include "base/rand_util.h"

#include "base/string_number_conversions.h"

+#include "base/time.h"

wtc 2011/05/25 23:12:06 Nit: url_request_throttler_entry.h already include

Jói 2011/05/26 14:53:42 Done.

+#include "base/values.h"

+#include "net/base/net_log.h"

#include "net/url_request/url_request_throttler_header_interface.h"

#include "net/url_request/url_request_throttler_manager.h"

@@ -30,9 +35,10 @@ const int URLRequestThrottlerEntry::kDefaultMaxSendThreshold = 20;

// unavailable at the end of each back-off period for a total of about

// 48 minutes.

-// Ignoring the first 4 errors helps avoid back-off from kicking in on

-// flaky connections.

-const int URLRequestThrottlerEntry::kDefaultNumErrorsToIgnore = 4;

+// Ignoring the first couple of errors is just a conservative measure to

+// avoid false positives. It should help avoid back-off from kicking in e.g.

+// on flaky connections.

+const int URLRequestThrottlerEntry::kDefaultNumErrorsToIgnore = 2;

const int URLRequestThrottlerEntry::kDefaultInitialBackoffMs = 700;

const double URLRequestThrottlerEntry::kDefaultMultiplyFactor = 1.4;

const double URLRequestThrottlerEntry::kDefaultJitterFactor = 0.4;

@@ -44,20 +50,71 @@ const char URLRequestThrottlerEntry::kExponentialThrottlingHeader[] =

const char URLRequestThrottlerEntry::kExponentialThrottlingDisableValue[] =

"disable";

+// NetLog parameters when a request is rejected by throttling.

+class RejectedRequestParameters : public NetLog::EventParameters {

+ public:

+ RejectedRequestParameters(const std::string& url_id,

+ int num_failures,

+ int release_after_ms)

+ : url_id_(url_id),

+ num_failures_(num_failures),

+ release_after_ms_(release_after_ms) {

+ }

+ virtual Value* ToValue() const {

+ DictionaryValue* dict = new DictionaryValue();

+ dict->SetString("url", url_id_);

+ dict->SetInteger("num_failures", num_failures_);

+ dict->SetInteger("release_after_ms", release_after_ms_);

+ return dict;

+ }

+ private:

+ std::string url_id_;

+ int num_failures_;

+ int release_after_ms_;

+};

+// NetLog parameters when a response contains an X-Retry-After header.

+class RetryAfterParameters : public NetLog::EventParameters {

+ public:

+ RetryAfterParameters(const std::string& url_id,

+ int retry_after_ms)

+ : url_id_(url_id),

+ retry_after_ms_(retry_after_ms) {

+ }

+ virtual Value* ToValue() const {

+ DictionaryValue* dict = new DictionaryValue();

+ dict->SetString("url", url_id_);

+ dict->SetInteger("retry_after_ms", retry_after_ms_);

+ return dict;

+ }

+ private:

+ std::string url_id_;

+ int retry_after_ms_;

+};

URLRequestThrottlerEntry::URLRequestThrottlerEntry(

- URLRequestThrottlerManager* manager)

+ URLRequestThrottlerManager* manager,

+ const std::string& url_id)

: sliding_window_period_(

base::TimeDelta::FromMilliseconds(kDefaultSlidingWindowPeriodMs)),

max_send_threshold_(kDefaultMaxSendThreshold),

is_backoff_disabled_(false),

backoff_entry_(&backoff_policy_),

- manager_(manager) {

+ manager_(manager),

+ url_id_(url_id),

+ net_log_(BoundNetLog::Make(

+ manager->net_log(), NetLog::SOURCE_EXPONENTIAL_BACKOFF_THROTTLING)) {

DCHECK(manager_);

Initialize();

}

URLRequestThrottlerEntry::URLRequestThrottlerEntry(

URLRequestThrottlerManager* manager,

+ const std::string& url_id,

int sliding_window_period_ms,

int max_send_threshold,

int initial_backoff_ms,

@@ -69,7 +126,8 @@ URLRequestThrottlerEntry::URLRequestThrottlerEntry(

max_send_threshold_(max_send_threshold),

is_backoff_disabled_(false),

backoff_entry_(&backoff_policy_),

- manager_(manager) {

+ manager_(manager),

+ url_id_(url_id) {

DCHECK_GT(sliding_window_period_ms, 0);

DCHECK_GT(max_send_threshold_, 0);

DCHECK_GE(initial_backoff_ms, 0);

@@ -106,7 +164,7 @@ bool URLRequestThrottlerEntry::IsEntryOutdated() const {

// If there are send events in the sliding window period, we still need this

// entry.

if (!send_log_.empty() &&

- send_log_.back() + sliding_window_period_ > GetTimeNow()) {

+ send_log_.back() + sliding_window_period_ > ImplGetTimeNow()) {

return false;

}

@@ -122,15 +180,37 @@ void URLRequestThrottlerEntry::DetachManager() {

}

bool URLRequestThrottlerEntry::IsDuringExponentialBackoff() const {

- if (is_backoff_disabled_)

- return false;

+ bool reject_request = false;

+ if (!is_backoff_disabled_ && GetBackoffEntry()->ShouldRejectRequest()) {

+ int num_failures = GetBackoffEntry()->failure_count();

+ int release_after_ms =

+ (GetBackoffEntry()->GetReleaseTime() - base::TimeTicks::Now())

+ .InMilliseconds();

+ net_log_.AddEvent(

+ NetLog::TYPE_THROTTLING_REJECTED_REQUEST,

+ make_scoped_refptr(

+ new RejectedRequestParameters(url_id_,

+ num_failures,

+ release_after_ms)));

+ reject_request = true;

+ }

+ int reject_count = reject_request ? 1 : 0;

+ UMA_HISTOGRAM_ENUMERATION(

+ "Throttling.RequestThrottled", reject_count, 2);

+ if (base::FieldTrialList::TrialExists("ThrottlingEnabled")) {

+ UMA_HISTOGRAM_ENUMERATION(base::FieldTrial::MakeName(

+ "Throttling.RequestThrottled", "ThrottlingEnabled"), reject_count, 2);

+ }

- return GetBackoffEntry()->ShouldRejectRequest();

+ return reject_request;

}

int64 URLRequestThrottlerEntry::ReserveSendingTimeForNextRequest(

const base::TimeTicks& earliest_time) {

- base::TimeTicks now = GetTimeNow();

+ base::TimeTicks now = ImplGetTimeNow();

// If a lot of requests were successfully made recently,

// sliding_window_release_time_ may be greater than

@@ -171,7 +251,7 @@ base::TimeTicks

// wrong thing to do (i.e. it would likely be too long). Therefore, we

// return "now" so that retries are not delayed.

if (is_backoff_disabled_)

- return GetTimeNow();

+ return ImplGetTimeNow();

return GetBackoffEntry()->GetReleaseTime();

}

@@ -179,7 +259,10 @@ base::TimeTicks

void URLRequestThrottlerEntry::UpdateWithResponse(

const std::string& host,

const URLRequestThrottlerHeaderInterface* response) {

- if (response->GetResponseCode() >= 500) {

+ int status_code = response->GetResponseCode();

+ HandleMetricsTracking(status_code);

+ if (IsConsideredError(status_code)) {

GetBackoffEntry()->InformOfRequest(false);

} else {

GetBackoffEntry()->InformOfRequest(true);

@@ -195,15 +278,20 @@ void URLRequestThrottlerEntry::UpdateWithResponse(

}

-void URLRequestThrottlerEntry::ReceivedContentWasMalformed() {

+void URLRequestThrottlerEntry::ReceivedContentWasMalformed(int status_code) {

// A malformed body can only occur when the request to fetch a resource

// was successful. Therefore, in such a situation, we will receive one

- // call to ReceivedContentWasMalformed() and one call to UpdateWithResponse()

- // with a response categorized as "good". To end up counting one failure,

- // we need to count two failures here against the one success in

- // UpdateWithResponse().

- GetBackoffEntry()->InformOfRequest(false);

+ // call to ReceivedContentWasMalformed() and one call to

+ // UpdateWithResponse() with a response categorized as "good". To end

+ // up counting one failure, we need to count two failures here against

+ // the one success in UpdateWithResponse().

+ //

+ // We do nothing for a response that is already being considered an error

+ // based on its status code (otherwise we would count 3 errors instead of 1).

+ if (!IsConsideredError(status_code)) {

+ GetBackoffEntry()->InformOfRequest(false);

+ }

}

URLRequestThrottlerEntry::~URLRequestThrottlerEntry() {

@@ -217,9 +305,38 @@ void URLRequestThrottlerEntry::Initialize() {

backoff_policy_.jitter_factor = kDefaultJitterFactor;

backoff_policy_.maximum_backoff_ms = kDefaultMaximumBackoffMs;

backoff_policy_.entry_lifetime_ms = kDefaultEntryLifetimeMs;

+ // We pretend we just had a successful response so that we have a

+ // starting point to our tracking. This is called from the

+ // constructor so we do not use the virtual ImplGetTimeNow().

+ last_successful_response_time_ = base::TimeTicks::Now();

+ last_response_was_success_ = true;

+bool URLRequestThrottlerEntry::IsConsideredError(int status_code) {

+ // We throttle only for the status codes most likely to indicate the server

+ // is failing because it is too busy or otherwise are likely to be

+ // because of DDoS.

+ //

+ // 500 is the generic error when no better message is suitable, and

+ // as such does not necessarily indicate a temporary state, but

+ // other status codes cover most of the permanent error states.

+ // 503 is explicitly documented as a temporary state where the server

+ // is either overloaded or down for maintenance.

+ // 509 is the (non-standard but widely implemented) Bandwidth Limit Exceeded

+ // status code, which might indicate DDoS.

+ //

+ // We do not back off on 502 or 504, which are reported by gateways

+ // (proxies) on timeouts or failures, because in many cases these requests

+ // have not made it to the destination server and so we do not actually

+ // know that it is down or busy. One degenerate case could be a proxy on

+ // localhost, where you are not actually connected to the network.

+ return (status_code == 500 ||

+ status_code == 503 ||

+ status_code == 509);

}

-base::TimeTicks URLRequestThrottlerEntry::GetTimeNow() const {

+base::TimeTicks URLRequestThrottlerEntry::ImplGetTimeNow() const {

return base::TimeTicks::Now();

}

@@ -241,8 +358,22 @@ void URLRequestThrottlerEntry::HandleCustomRetryAfter(

if (value_ms < 0)

return;

- GetBackoffEntry()->SetCustomReleaseTime(

- GetTimeNow() + base::TimeDelta::FromMilliseconds(value_ms));

+ net_log_.AddEvent(

+ NetLog::TYPE_THROTTLING_GOT_CUSTOM_RETRY_AFTER,

+ make_scoped_refptr(new RetryAfterParameters(url_id_, value_ms)));

+ base::TimeDelta value = base::TimeDelta::FromMilliseconds(value_ms);

+ GetBackoffEntry()->SetCustomReleaseTime(ImplGetTimeNow() + value);

+ UMA_HISTOGRAM_CUSTOM_TIMES(

+ "Throttling.CustomRetryAfterMs", value,

+ base::TimeDelta::FromSeconds(1), base::TimeDelta::FromHours(12), 50);

+ if (base::FieldTrialList::TrialExists("ThrottlingEnabled")) {

+ UMA_HISTOGRAM_CUSTOM_TIMES(

+ base::FieldTrial::MakeName("Throttling.CustomRetryAfterMs",

+ "ThrottlingEnabled"), value,

+ base::TimeDelta::FromSeconds(1), base::TimeDelta::FromHours(12), 50);

+ }

}

void URLRequestThrottlerEntry::HandleThrottlingHeader(

@@ -257,6 +388,52 @@ void URLRequestThrottlerEntry::HandleThrottlingHeader(

}

+void URLRequestThrottlerEntry::HandleMetricsTracking(int status_code) {

+ // This is essentially the same as the "Net.HttpResponseCode" UMA stat

+ // but we are tracking it separately here for the throttling experiment

+ // to make sure we count only the responses seen by throttling.

+ // TODO(joi): Remove after experiment.

+ UMA_HISTOGRAM_ENUMERATION("Throttling.HttpResponseCode", status_code, 600);

+ if (base::FieldTrialList::TrialExists("ThrottlingEnabled")) {

+ UMA_HISTOGRAM_ENUMERATION(base::FieldTrial::MakeName(

+ "Throttling.HttpResponseCode", "ThrottlingEnabled"), status_code, 600);

+ }

+ // Note that we are not interested in whether the code is considered

+ // an error for the backoff logic, but whether it is a 5xx error in

+ // general. This is because here, we are tracking the apparent total

+ // downtime of a server.

+ if (status_code >= 500) {

+ last_response_was_success_ = false;

+ } else {

+ base::TimeTicks now = ImplGetTimeNow();

+ if (!last_response_was_success_) {

+ // We are transitioning from failure to success, so generate our stats.

+ base::TimeDelta down_time = now - last_successful_response_time_;

+ int failure_count = GetBackoffEntry()->failure_count();

+ UMA_HISTOGRAM_COUNTS("Throttling.FailureCountAtSuccess", failure_count);

+ UMA_HISTOGRAM_CUSTOM_TIMES(

+ "Throttling.PerceivedDowntime", down_time,

+ base::TimeDelta::FromMilliseconds(10),

+ base::TimeDelta::FromHours(6), 50);

+ if (base::FieldTrialList::TrialExists("ThrottlingEnabled")) {

+ UMA_HISTOGRAM_COUNTS(base::FieldTrial::MakeName(

+ "Throttling.FailureCountAtSuccess", "ThrottlingEnabled"),

+ failure_count);

+ UMA_HISTOGRAM_CUSTOM_TIMES(base::FieldTrial::MakeName(

+ "Throttling.PerceivedDowntime", "ThrottlingEnabled"), down_time,

+ base::TimeDelta::FromMilliseconds(10),

+ base::TimeDelta::FromHours(6), 50);

+ }

+ last_successful_response_time_ = now;

+ last_response_was_success_ = true;

+ }

const BackoffEntry* URLRequestThrottlerEntry::GetBackoffEntry() const {

return &backoff_entry_;

}