| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 #include "net/dns/dns_session.h" | |
| 6 | |
| 7 #include "base/basictypes.h" | |
| 8 #include "base/bind.h" | |
| 9 #include "base/lazy_instance.h" | |
| 10 #include "base/metrics/histogram.h" | |
| 11 #include "base/metrics/sample_vector.h" | |
| 12 #include "base/rand_util.h" | |
| 13 #include "base/stl_util.h" | |
| 14 #include "base/time/time.h" | |
| 15 #include "net/base/ip_endpoint.h" | |
| 16 #include "net/base/net_errors.h" | |
| 17 #include "net/dns/dns_config_service.h" | |
| 18 #include "net/dns/dns_socket_pool.h" | |
| 19 #include "net/socket/stream_socket.h" | |
| 20 #include "net/udp/datagram_client_socket.h" | |
| 21 | |
| 22 namespace net { | |
| 23 | |
| 24 namespace { | |
| 25 // Never exceed max timeout. | |
| 26 const unsigned kMaxTimeoutMs = 5000; | |
| 27 // Set min timeout, in case we are talking to a local DNS proxy. | |
| 28 const unsigned kMinTimeoutMs = 10; | |
| 29 | |
| 30 // Number of buckets in the histogram of observed RTTs. | |
| 31 const size_t kRTTBucketCount = 100; | |
| 32 // Target percentile in the RTT histogram used for retransmission timeout. | |
| 33 const unsigned kRTOPercentile = 99; | |
| 34 } // namespace | |
| 35 | |
| 36 // Runtime statistics of DNS server. | |
| 37 struct DnsSession::ServerStats { | |
| 38 ServerStats(base::TimeDelta rtt_estimate_param, RttBuckets* buckets) | |
| 39 : last_failure_count(0), rtt_estimate(rtt_estimate_param) { | |
| 40 rtt_histogram.reset(new base::SampleVector(buckets)); | |
| 41 // Seed histogram with 2 samples at |rtt_estimate| timeout. | |
| 42 rtt_histogram->Accumulate( | |
| 43 static_cast<base::HistogramBase::Sample>(rtt_estimate.InMilliseconds()), | |
| 44 2); | |
| 45 } | |
| 46 | |
| 47 // Count of consecutive failures after last success. | |
| 48 int last_failure_count; | |
| 49 | |
| 50 // Last time when server returned failure or timeout. | |
| 51 base::Time last_failure; | |
| 52 // Last time when server returned success. | |
| 53 base::Time last_success; | |
| 54 | |
| 55 // Estimated RTT using moving average. | |
| 56 base::TimeDelta rtt_estimate; | |
| 57 // Estimated error in the above. | |
| 58 base::TimeDelta rtt_deviation; | |
| 59 | |
| 60 // A histogram of observed RTT . | |
| 61 scoped_ptr<base::SampleVector> rtt_histogram; | |
| 62 | |
| 63 DISALLOW_COPY_AND_ASSIGN(ServerStats); | |
| 64 }; | |
| 65 | |
| 66 // static | |
| 67 base::LazyInstance<DnsSession::RttBuckets>::Leaky DnsSession::rtt_buckets_ = | |
| 68 LAZY_INSTANCE_INITIALIZER; | |
| 69 | |
| 70 DnsSession::RttBuckets::RttBuckets() : base::BucketRanges(kRTTBucketCount + 1) { | |
| 71 base::Histogram::InitializeBucketRanges(1, 5000, this); | |
| 72 } | |
| 73 | |
| 74 DnsSession::SocketLease::SocketLease(scoped_refptr<DnsSession> session, | |
| 75 unsigned server_index, | |
| 76 scoped_ptr<DatagramClientSocket> socket) | |
| 77 : session_(session), server_index_(server_index), socket_(socket.Pass()) {} | |
| 78 | |
| 79 DnsSession::SocketLease::~SocketLease() { | |
| 80 session_->FreeSocket(server_index_, socket_.Pass()); | |
| 81 } | |
| 82 | |
| 83 DnsSession::DnsSession(const DnsConfig& config, | |
| 84 scoped_ptr<DnsSocketPool> socket_pool, | |
| 85 const RandIntCallback& rand_int_callback, | |
| 86 NetLog* net_log) | |
| 87 : config_(config), | |
| 88 socket_pool_(socket_pool.Pass()), | |
| 89 rand_callback_(base::Bind(rand_int_callback, 0, kuint16max)), | |
| 90 net_log_(net_log), | |
| 91 server_index_(0) { | |
| 92 socket_pool_->Initialize(&config_.nameservers, net_log); | |
| 93 UMA_HISTOGRAM_CUSTOM_COUNTS( | |
| 94 "AsyncDNS.ServerCount", config_.nameservers.size(), 0, 10, 11); | |
| 95 for (size_t i = 0; i < config_.nameservers.size(); ++i) { | |
| 96 server_stats_.push_back(new ServerStats(config_.timeout, | |
| 97 rtt_buckets_.Pointer())); | |
| 98 } | |
| 99 } | |
| 100 | |
| 101 DnsSession::~DnsSession() { | |
| 102 RecordServerStats(); | |
| 103 } | |
| 104 | |
| 105 uint16 DnsSession::NextQueryId() const { | |
| 106 return static_cast<uint16>(rand_callback_.Run()); | |
| 107 } | |
| 108 | |
| 109 unsigned DnsSession::NextFirstServerIndex() { | |
| 110 unsigned index = NextGoodServerIndex(server_index_); | |
| 111 if (config_.rotate) | |
| 112 server_index_ = (server_index_ + 1) % config_.nameservers.size(); | |
| 113 return index; | |
| 114 } | |
| 115 | |
| 116 unsigned DnsSession::NextGoodServerIndex(unsigned server_index) { | |
| 117 unsigned index = server_index; | |
| 118 base::Time oldest_server_failure(base::Time::Now()); | |
| 119 unsigned oldest_server_failure_index = 0; | |
| 120 | |
| 121 UMA_HISTOGRAM_BOOLEAN("AsyncDNS.ServerIsGood", | |
| 122 server_stats_[server_index]->last_failure.is_null()); | |
| 123 | |
| 124 do { | |
| 125 base::Time cur_server_failure = server_stats_[index]->last_failure; | |
| 126 // If number of failures on this server doesn't exceed number of allowed | |
| 127 // attempts, return its index. | |
| 128 if (server_stats_[server_index]->last_failure_count < config_.attempts) { | |
| 129 return index; | |
| 130 } | |
| 131 // Track oldest failed server. | |
| 132 if (cur_server_failure < oldest_server_failure) { | |
| 133 oldest_server_failure = cur_server_failure; | |
| 134 oldest_server_failure_index = index; | |
| 135 } | |
| 136 index = (index + 1) % config_.nameservers.size(); | |
| 137 } while (index != server_index); | |
| 138 | |
| 139 // If we are here it means that there are no successful servers, so we have | |
| 140 // to use one that has failed oldest. | |
| 141 return oldest_server_failure_index; | |
| 142 } | |
| 143 | |
| 144 void DnsSession::RecordServerFailure(unsigned server_index) { | |
| 145 UMA_HISTOGRAM_CUSTOM_COUNTS( | |
| 146 "AsyncDNS.ServerFailureIndex", server_index, 0, 10, 11); | |
| 147 ++(server_stats_[server_index]->last_failure_count); | |
| 148 server_stats_[server_index]->last_failure = base::Time::Now(); | |
| 149 } | |
| 150 | |
| 151 void DnsSession::RecordServerSuccess(unsigned server_index) { | |
| 152 if (server_stats_[server_index]->last_success.is_null()) { | |
| 153 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresAfterNetworkChange", | |
| 154 server_stats_[server_index]->last_failure_count); | |
| 155 } else { | |
| 156 UMA_HISTOGRAM_COUNTS_100("AsyncDNS.ServerFailuresBeforeSuccess", | |
| 157 server_stats_[server_index]->last_failure_count); | |
| 158 } | |
| 159 server_stats_[server_index]->last_failure_count = 0; | |
| 160 server_stats_[server_index]->last_failure = base::Time(); | |
| 161 server_stats_[server_index]->last_success = base::Time::Now(); | |
| 162 } | |
| 163 | |
| 164 void DnsSession::RecordRTT(unsigned server_index, base::TimeDelta rtt) { | |
| 165 DCHECK_LT(server_index, server_stats_.size()); | |
| 166 | |
| 167 // For measurement, assume it is the first attempt (no backoff). | |
| 168 base::TimeDelta timeout_jacobson = NextTimeoutFromJacobson(server_index, 0); | |
| 169 base::TimeDelta timeout_histogram = NextTimeoutFromHistogram(server_index, 0); | |
| 170 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobson", rtt - timeout_jacobson); | |
| 171 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogram", | |
| 172 rtt - timeout_histogram); | |
| 173 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorJacobsonUnder", | |
| 174 timeout_jacobson - rtt); | |
| 175 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutErrorHistogramUnder", | |
| 176 timeout_histogram - rtt); | |
| 177 | |
| 178 // Jacobson/Karels algorithm for TCP. | |
| 179 // Using parameters: alpha = 1/8, delta = 1/4, beta = 4 | |
| 180 base::TimeDelta& estimate = server_stats_[server_index]->rtt_estimate; | |
| 181 base::TimeDelta& deviation = server_stats_[server_index]->rtt_deviation; | |
| 182 base::TimeDelta current_error = rtt - estimate; | |
| 183 estimate += current_error / 8; // * alpha | |
| 184 base::TimeDelta abs_error = base::TimeDelta::FromInternalValue( | |
| 185 std::abs(current_error.ToInternalValue())); | |
| 186 deviation += (abs_error - deviation) / 4; // * delta | |
| 187 | |
| 188 // Histogram-based method. | |
| 189 server_stats_[server_index]->rtt_histogram->Accumulate( | |
| 190 static_cast<base::HistogramBase::Sample>(rtt.InMilliseconds()), 1); | |
| 191 } | |
| 192 | |
| 193 void DnsSession::RecordLostPacket(unsigned server_index, int attempt) { | |
| 194 base::TimeDelta timeout_jacobson = | |
| 195 NextTimeoutFromJacobson(server_index, attempt); | |
| 196 base::TimeDelta timeout_histogram = | |
| 197 NextTimeoutFromHistogram(server_index, attempt); | |
| 198 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentJacobson", timeout_jacobson); | |
| 199 UMA_HISTOGRAM_TIMES("AsyncDNS.TimeoutSpentHistogram", timeout_histogram); | |
| 200 } | |
| 201 | |
| 202 void DnsSession::RecordServerStats() { | |
| 203 for (size_t index = 0; index < server_stats_.size(); ++index) { | |
| 204 if (server_stats_[index]->last_failure_count) { | |
| 205 if (server_stats_[index]->last_success.is_null()) { | |
| 206 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresWithoutSuccess", | |
| 207 server_stats_[index]->last_failure_count); | |
| 208 } else { | |
| 209 UMA_HISTOGRAM_COUNTS("AsyncDNS.ServerFailuresAfterSuccess", | |
| 210 server_stats_[index]->last_failure_count); | |
| 211 } | |
| 212 } | |
| 213 } | |
| 214 } | |
| 215 | |
| 216 | |
| 217 base::TimeDelta DnsSession::NextTimeout(unsigned server_index, int attempt) { | |
| 218 // Respect config timeout if it exceeds |kMaxTimeoutMs|. | |
| 219 if (config_.timeout.InMilliseconds() >= kMaxTimeoutMs) | |
| 220 return config_.timeout; | |
| 221 return NextTimeoutFromHistogram(server_index, attempt); | |
| 222 } | |
| 223 | |
| 224 // Allocate a socket, already connected to the server address. | |
| 225 scoped_ptr<DnsSession::SocketLease> DnsSession::AllocateSocket( | |
| 226 unsigned server_index, const NetLog::Source& source) { | |
| 227 scoped_ptr<DatagramClientSocket> socket; | |
| 228 | |
| 229 socket = socket_pool_->AllocateSocket(server_index); | |
| 230 if (!socket.get()) | |
| 231 return scoped_ptr<SocketLease>(); | |
| 232 | |
| 233 socket->NetLog().BeginEvent(NetLog::TYPE_SOCKET_IN_USE, | |
| 234 source.ToEventParametersCallback()); | |
| 235 | |
| 236 SocketLease* lease = new SocketLease(this, server_index, socket.Pass()); | |
| 237 return scoped_ptr<SocketLease>(lease); | |
| 238 } | |
| 239 | |
| 240 scoped_ptr<StreamSocket> DnsSession::CreateTCPSocket( | |
| 241 unsigned server_index, const NetLog::Source& source) { | |
| 242 return socket_pool_->CreateTCPSocket(server_index, source); | |
| 243 } | |
| 244 | |
| 245 // Release a socket. | |
| 246 void DnsSession::FreeSocket(unsigned server_index, | |
| 247 scoped_ptr<DatagramClientSocket> socket) { | |
| 248 DCHECK(socket.get()); | |
| 249 | |
| 250 socket->NetLog().EndEvent(NetLog::TYPE_SOCKET_IN_USE); | |
| 251 | |
| 252 socket_pool_->FreeSocket(server_index, socket.Pass()); | |
| 253 } | |
| 254 | |
| 255 base::TimeDelta DnsSession::NextTimeoutFromJacobson(unsigned server_index, | |
| 256 int attempt) { | |
| 257 DCHECK_LT(server_index, server_stats_.size()); | |
| 258 | |
| 259 base::TimeDelta timeout = server_stats_[server_index]->rtt_estimate + | |
| 260 4 * server_stats_[server_index]->rtt_deviation; | |
| 261 | |
| 262 timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs)); | |
| 263 | |
| 264 // The timeout doubles every full round. | |
| 265 unsigned num_backoffs = attempt / config_.nameservers.size(); | |
| 266 | |
| 267 return std::min(timeout * (1 << num_backoffs), | |
| 268 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs)); | |
| 269 } | |
| 270 | |
| 271 base::TimeDelta DnsSession::NextTimeoutFromHistogram(unsigned server_index, | |
| 272 int attempt) { | |
| 273 DCHECK_LT(server_index, server_stats_.size()); | |
| 274 | |
| 275 static_assert(std::numeric_limits<base::HistogramBase::Count>::is_signed, | |
| 276 "histogram base count assumed to be signed"); | |
| 277 | |
| 278 // Use fixed percentile of observed samples. | |
| 279 const base::SampleVector& samples = | |
| 280 *server_stats_[server_index]->rtt_histogram; | |
| 281 | |
| 282 base::HistogramBase::Count total = samples.TotalCount(); | |
| 283 base::HistogramBase::Count remaining_count = kRTOPercentile * total / 100; | |
| 284 size_t index = 0; | |
| 285 while (remaining_count > 0 && index < rtt_buckets_.Get().size()) { | |
| 286 remaining_count -= samples.GetCountAtIndex(index); | |
| 287 ++index; | |
| 288 } | |
| 289 | |
| 290 base::TimeDelta timeout = | |
| 291 base::TimeDelta::FromMilliseconds(rtt_buckets_.Get().range(index)); | |
| 292 | |
| 293 timeout = std::max(timeout, base::TimeDelta::FromMilliseconds(kMinTimeoutMs)); | |
| 294 | |
| 295 // The timeout still doubles every full round. | |
| 296 unsigned num_backoffs = attempt / config_.nameservers.size(); | |
| 297 | |
| 298 return std::min(timeout * (1 << num_backoffs), | |
| 299 base::TimeDelta::FromMilliseconds(kMaxTimeoutMs)); | |
| 300 } | |
| 301 | |
| 302 } // namespace net | |
| OLD | NEW |